Commits (3)
......@@ -5,7 +5,12 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
## [Unreleased]
[Unreleased]: https://git.embl.de/grp-zeller/GECCO/compare/v0.8.9...master
[Unreleased]: https://git.embl.de/grp-zeller/GECCO/compare/v0.8.10...master
## [v0.8.10] - 2022-02-23
[v0.8.10]: https://git.embl.de/grp-zeller/GECCO/compare/v0.8.9...v0.8.10
### Fixed
- `--antismash-sideload` flag of `gecco run` causing command to crash.
## [v0.8.9] - 2022-02-22
[v0.8.9]: https://git.embl.de/grp-zeller/GECCO/compare/v0.8.8...v0.8.9
......
<?xml version='1.0' encoding='utf-8'?>
<tool id="gecco" name="GECCO" version="0.8.5" python_template_version="3.5">
<tool id="gecco" name="GECCO" version="0.8.9" python_template_version="3.5">
<description>is a fast and scalable method for identifying putative novel Biosynthetic Gene Clusters (BGCs) in genomic and metagenomic data using Conditional Random Fields (CRFs).</description>
<requirements>
<requirement type="package" version="0.8.5">gecco</requirement>
<requirement type="package" version="0.8.9">gecco</requirement>
</requirements>
<version_command>gecco --version</version_command>
<command detect_errors="aggressive"><![CDATA[
......@@ -18,7 +18,11 @@
--format $input.ext
--genome input_tempfile.$file_extension
--postproc $postproc
--edge-distance $edge_distance
--force-clusters-tsv
#if $mask
--mask
#end if
#if $cds:
--cds $cds
#end if
......@@ -38,12 +42,14 @@
]]></command>
<inputs>
<param name="input" type="data" format="genbank,fasta,embl" label="Sequence file in GenBank, EMBL or FASTA format"/>
<param argument="--mask" type="boolean" checked="false" label="Enable masking of regions with unknown nucleotides when finding ORFs"/>
<param argument="--cds" type="integer" min="0" value="" optional="true" label="Minimum number of genes required for a cluster"/>
<param argument="--threshold" type="float" min="0" max="1" value="" optional="true" label="Probability threshold for cluster detection"/>
<param argument="--postproc" type="select" label="Post-processing method for gene cluster validation">
<option value="antismash">antiSMASH</option>
<option value="gecco" selected="true">GECCO</option>
</param>
<param argument="--edge-distance" type="integer" min="0" value="10" label="Number of genes from the contig edges to filter out"/>
<param argument="--antismash-sideload" type="boolean" checked="false" label="Generate an antiSMASH v6 sideload JSON file"/>
</inputs>
<outputs>
......@@ -61,6 +67,12 @@
<param name="input" value="BGC0001866.fna"/>
<output name="features" file="features.tsv"/>
<output name="clusters" file="clusters.tsv"/>
</test>
<test>
<param name="input" value="BGC0001866.fna"/>
<param name="edge_distance" value="0"/>
<output name="features" file="features.tsv"/>
<output name="clusters" file="clusters.tsv"/>
<output_collection name="records" type="list">
<element name="BGC0001866.1_cluster_1" file="BGC0001866.1_cluster_1.gbk" ftype="genbank" compare="diff" lines_diff="4"/>
</output_collection>
......@@ -68,6 +80,7 @@
<test>
<param name="input" value="BGC0001866.fna"/>
<param name="antismash_sideload" value="True"/>
<param name="edge_distance" value="0"/>
<output name="features" file="features.tsv"/>
<output name="clusters" file="clusters.tsv"/>
<output name="sideload" file="sideload.json"/>
......
......@@ -10,4 +10,4 @@ See Also:
__author__ = "Martin Larralde"
__license__ = "GPLv3"
__version__ = "0.8.9"
__version__ = "0.8.10"
......@@ -307,7 +307,6 @@ class Run(Annotate): # noqa: D101
"saccharide_probability": f"{cluster.type_probabilities.get(ProductType.Saccharide, 0.0):.3f}",
"terpene_probability": f"{cluster.type_probabilities.get(ProductType.Terpene, 0.0):.3f}",
"nrp_probability": f"{cluster.type_probabilities.get(ProductType.NRP, 0.0):.3f}",
"other_probability": f"{cluster.type_probabilities.get(ProductType.Other, 0.0):.3f}",
}
})
# write the JSON file to the output folder
......
......@@ -15,7 +15,7 @@ REFERENCE 1
JOURNAL bioRxiv (2021.05.03.442509)
REMARK doi:10.1101/2021.05.03.442509
COMMENT ##GECCO-Data-START##
version :: GECCO v0.8.5
version :: GECCO v0.8.9
creation_date :: 2021-11-21T16:33:58.470847
biosyn_class :: Polyketide
alkaloid_probability :: 0.0
......@@ -24,7 +24,6 @@ COMMENT ##GECCO-Data-START##
saccharide_probability :: 0.0
terpene_probability :: 0.0
nrp_probability :: 0.14
other_probability :: 0.0
##GECCO-Data-END##
FEATURES Location/Qualifiers
CDS complement(1..1143)
......
......@@ -31,6 +31,6 @@
},
"description": "Biosynthetic Gene Cluster prediction with Conditional Random Fields.",
"name": "GECCO",
"version": "0.8.5"
"version": "0.8.9"
}
}
\ No newline at end of file
}