Commits (9)
name: Package
on:
- push
- pull_request
jobs:
sdist:
runs-on: ubuntu-latest
name: Build source distribution
steps:
- name: Checkout code
uses: actions/checkout@v2
with:
submodules: true
- name: Set up Python 3.9
uses: actions/setup-python@v2
with:
python-version: 3.9
- name: List project dependencies
run: python setup.py list_requirements
- name: Install project dependencies
run: python -m pip install -r requirements.txt
- name: Build wheel distribution
run: python setup.py sdist
- name: Store built wheels
uses: actions/upload-artifact@v2
with:
name: dist
path: dist/*
upload:
environment: PyPI
runs-on: ubuntu-latest
name: Upload
needs:
- sdist
steps:
- name: Checkout code
uses: actions/checkout@v1
- name: Set up Python 3.9
uses: actions/setup-python@v1
with:
python-version: 3.9
- name: Download built distributions
uses: actions/download-artifact@v2
with:
name: dist
path: dist
- name: Publish distributions to PyPI
if: startsWith(github.ref, 'refs/tags/v')
uses: pypa/gh-action-pypi-publish@master
with:
user: __token__
password: ${{ secrets.PYPI_API_TOKEN }}
skip_existing: false
name: Changelog
name: Release
on:
push:
......@@ -6,6 +6,39 @@ on:
- v*.*.*
jobs:
attach:
runs-on: ubuntu-latest
name: Attach HMM artifacts to GitHub releases page
steps:
- name: Checkout code
uses: actions/checkout@v2
with:
submodules: true
- name: Set up Python 3.9
uses: actions/setup-python@v2
with:
python-version: 3.9
- name: List project dependencies
run: python setup.py list_requirements
- name: Install project dependencies
run: python -m pip install -r requirements.txt
- name: Build new HMM artifacts
run: python setup.py build_data -f -r
- name: Compress Pfam HMM
run: gzip -c build/lib/gecco/hmmer/Pfam.h3m > Pfam.h3m.gz
- name: Compress Tigrfam HMM
run: gzip -c build/lib/gecco/hmmer/Tigrfam.h3m > Tigrfam.h3m.gz
- name: Upload HMM
uses: softprops/action-gh-release@v1
if: startsWith(github.ref, 'refs/tags/')
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
with:
files: |
Pfam.h3m.gz
Tigrfam.h3m.gz
chandler:
environment: GitHub Releases
runs-on: ubuntu-latest
......
......@@ -5,7 +5,16 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
## [Unreleased]
[Unreleased]: https://git.embl.de/grp-zeller/GECCO/compare/v0.6.1...master
[Unreleased]: https://git.embl.de/grp-zeller/GECCO/compare/v0.6.2...master
## [v0.6.2] - 2021-05-04
[v0.6.2]: https://git.embl.de/grp-zeller/GECCO/compare/v0.6.1...v0.6.2
### Fixed
- `gecco cv loto` crashing because of outdated code.
### Changed
- Logging-style prompt will only display if GECCO is running with `-vv` flag.
### Added
- GECCO bioRxiv paper reference to `Cluster.to_seq_record` output record.
## [v0.6.1] - 2021-03-15
[v0.6.1]: https://git.embl.de/grp-zeller/GECCO/compare/v0.6.0...v0.6.1
......
......@@ -16,6 +16,7 @@ in genomic and metagenomic data using Conditional Random Fields (CRFs).
[![Mirror](https://img.shields.io/badge/mirror-EMBL-009f4d?style=flat-square&maxAge=2678400)](https://git.embl.de/grp-zeller/GECCO/)
[![Changelog](https://img.shields.io/badge/keep%20a-changelog-8A0707.svg?maxAge=2678400&style=flat-square)](https://github.com/zellerlab/GECCO/blob/master/CHANGELOG.md)
[![Issues](https://img.shields.io/github/issues/zellerlab/GECCO.svg?style=flat-square&maxAge=600)](https://github.com/zellerlab/GECCO/issues)
[![Preprint](https://img.shields.io/badge/preprint-bioRxiv-darkblue?style=flat-square&maxAge=2678400)](https://www.biorxiv.org/content/10.1101/2021.05.03.442509v1)
## 🔧 Installing GECCO
......@@ -26,7 +27,7 @@ PyPI, the Python Package Index.
Use `pip` to install GECCO on your machine:
```console
$ pip install https://github.com/zellerlab/GECCO/archive/master.zip
$ pip install gecco-tool
```
This will install GECCO, its dependencies, and the data needed to run
......@@ -64,6 +65,15 @@ Additional parameters of interest are:
<!-- ## 📖 Documentation -->
## 🔖 Reference
GECCO can be cited using the following preprint:
> **Accurate de novo identification of biosynthetic gene clusters with GECCO**.
> Laura M Carroll, Martin Larralde, Jonas Simon Fleck, Ruby Ponnudurai, Alessio Milanese, Elisa Cappio Barazzone, Georg Zeller.
> bioRxiv 2021.05.03.442509; [doi:10.1101/2021.05.03.442509](https://doi.org/10.1101/2021.05.03.442509)
## 💭 Feedback
### ⚠️ Issue Tracker
......
......@@ -13,8 +13,5 @@ Domain Annotation
.. autoclass:: DomainAnnotator(object)
:members:
.. autoclass:: HMMER(DomainAnnotator)
:members:
.. autoclass:: PyHMMER(DomainAnnotator)
:members:
......@@ -51,7 +51,6 @@ Domain Annotation
.. autosummary::
:nosignatures:
HMMER
PyHMMER
BGC Detection
......
......@@ -4,7 +4,7 @@ GECCO
*Biosynthetic Gene Cluster prediction with Conditional Random Fields.*
|GitLabCI| |Coverage| |License| |Source| |Mirror| |Issues|
|GitLabCI| |Coverage| |License| |Source| |Issues| |Preprint|
.. |GitLabCI| image:: https://img.shields.io/gitlab/pipeline/grp-zeller/GECCO/master?gitlab_url=https%3A%2F%2Fgit.embl.de&logo=gitlab&style=flat-square&maxAge=600
:target: https://git.embl.de/grp-zeller/GECCO/-/pipelines
......@@ -24,6 +24,9 @@ GECCO
.. |Issues| image:: https://img.shields.io/github/issues/zellerlab/GECCO.svg?logo=github&style=flat-square&maxAge=600
:target: https://github.com/zellerlab/GECCO/issues
.. |Preprint| image:: https://img.shields.io/badge/preprint-bioRxiv-darkblue?style=flat-square&maxAge=2678400&logo=arxiv
:target: https://www.biorxiv.org/content/10.1101/2021.05.03.442509v1
Overview
--------
......@@ -82,6 +85,16 @@ the input file):
containing the cluster sequence annotated with its member proteins and domains.
Reference
---------
GECCO can be cited using the following preprint:
**Accurate de novo identification of biosynthetic gene clusters with GECCO**.
Laura M Carroll, Martin Larralde, Jonas Simon Fleck, Ruby Ponnudurai, Alessio Milanese, Elisa Cappio Barazzone, Georg Zeller.
bioRxiv 2021.05.03.442509; `doi:10.1101/2021.05.03.442509 <https://doi.org/10.1101/2021.05.03.442509>`_
Feedback
--------
......
......@@ -2,8 +2,10 @@
"""
import abc
import contextlib
import functools
import importlib
import locale
import operator
import typing
from multiprocessing.pool import Pool
......@@ -119,3 +121,15 @@ class OrderedPoolWrapper:
results = self.inner.map(wrapped_func, wrapped_it)
results.sort(key=operator.itemgetter(0))
return list(map(operator.itemgetter(1), results))
@contextlib.contextmanager
def patch_locale(name: str):
"""Create a context manager to locally change the locale in use.
"""
lc = locale.setlocale(locale.LC_TIME)
try:
locale.setlocale(locale.LC_TIME, name)
yield
finally:
locale.setlocale(locale.LC_TIME, lc)
......@@ -144,39 +144,67 @@ class Command(metaclass=abc.ABCMeta):
def error(self, message, *args, level=0):
if self.quiet <= 2 and level <= self.verbose:
self.console.print(
*self._logprefix(),
"[bold red]FAIL[/]",
message,
*args,
)
if self.verbose <= 1:
self.console.print(
"[bold red]x[/]",
message,
*args,
)
else:
self.console.print(
*self._logprefix(),
"[bold red]FAIL[/]",
message,
*args,
)
def info(self, verb, *args, level=1):
if self.quiet == 0 and level <= self.verbose:
self.console.print(
*self._logprefix(),
f"[bold blue]INFO[/]",
verb,
*args,
)
if self.verbose <= 1:
self.console.print(
"[bold blue]i[/]",
verb,
*args,
)
else:
self.console.print(
*self._logprefix(),
f"[bold blue]INFO[/]",
verb,
*args,
)
def success(self, verb, *args, level=1):
if self.quiet == 0 and level <= self.verbose:
self.console.print(
*self._logprefix(),
f"[bold green] OK[/]",
verb,
*args,
)
if self.verbose <= 1:
self.console.print(
"[green]:heavy_check_mark:[/]",
verb,
*args,
)
else:
self.console.print(
*self._logprefix(),
f"[bold green] OK[/]",
verb,
*args,
)
def warn(self, verb, *args, level=0):
if self.quiet <= 1 and level <= self.verbose:
self.console.print(
*self._logprefix(),
"[bold yellow]WARN[/]",
verb,
*args
)
if self.verbose <= 1:
self.console.print(
"[bold yellow]![/]",
verb,
*args,
)
else:
self.console.print(
*self._logprefix(),
"[bold yellow]WARN[/]",
verb,
*args
)
def _logprefix(self):
return [
......
......@@ -109,6 +109,7 @@ class Cv(Command): # noqa: D101
hint="positive or null integer"
)
self.features = self._check_flag("--features")
self.clusters = self._check_flag("--clusters")
self.loto = self.args["loto"]
self.output = self.args["--output"]
except InvalidArgument:
......@@ -149,13 +150,14 @@ class Cv(Command): # noqa: D101
return seqs
def _loto_splits(self, seqs):
self.logger.info("Loading the clusters table")
self.info("Loading", "the clusters table")
with open(self.clusters) as in_:
table = ClusterTable.load(in_)
index = { row.sequence_id: row.type for row in table }
if len(index) != len(table):
raise ValueError("Training data contains several clusters per sequence")
self.info("Grouping", "sequences by cluster types")
groups = []
for cluster in seqs:
ty = next((index[g.source.id] for g in cluster if g.source.id in index), None)
......
......@@ -18,12 +18,12 @@ from typing import Dict, Iterable, List, Mapping, Optional, Sequence, TextIO, Na
import numpy
from Bio.Seq import Seq
from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation
from Bio.SeqFeature import SeqFeature, FeatureLocation, CompoundLocation, Reference
from Bio.SeqRecord import SeqRecord
from . import __version__
from ._base import Dumpable
from ._meta import requires
from ._meta import requires, patch_locale
__all__ = [
......@@ -343,6 +343,9 @@ class Cluster:
*misc_feature*.
"""
# store time of record creation
now = datetime.datetime.now()
# NB(@althonos): we use inclusive 1-based ranges in the data model
# but slicing expects 0-based ranges with exclusive ends
bgc = self.source[self.start - 1 : self.end]
......@@ -352,13 +355,30 @@ class Cluster:
bgc.annotations = self.source.annotations.copy()
bgc.annotations["topology"] = "linear"
bgc.annotations["molecule_type"] = "DNA"
#bgc.annotations.setdefault("comment", []).append(f"Detected with GECCO v{__version__}")
with patch_locale("C"):
bgc.annotations['date'] = now.strftime("%d-%b-%Y").upper()
# add GECCO preprint as a reference
ref = Reference()
ref.title = "Accurate de novo identification of biosynthetic gene clusters with GECCO"
ref.journal = "bioRxiv (2021.05.03.442509)"
ref.comment = "doi:10.1101/2021.05.03.442509"
ref.authors = ", ".join([
"Laura M Carroll",
"Martin Larralde",
"Jonas Simon Fleck",
"Ruby Ponnudurai",
"Alessio Milanese",
"Elisa Cappio Barazzone",
"Georg Zeller"
])
bgc.annotations.setdefault("references", []).append(ref)
# add GECCO-specific annotations as a structured comment
structured_comment = bgc.annotations.setdefault("structured_comment", OrderedDict())
structured_comment['GECCO-Data'] = {
"version": f"GECCO v{__version__}",
"creation_date": datetime.datetime.now().isoformat(),
"creation_date": now.isoformat(),
"biosyn_class": ",".join(ty.name for ty in self.type.unpack()),
"alkaloid_probability": self.type_probabilities.get(ProductType.Alkaloid, 0.0),
"polyketide_probability": self.type_probabilities.get(ProductType.Polyketide, 0.0),
......
[metadata]
name = gecco
name = gecco-tool
version = file: gecco/_version.txt
author = Martin Larralde
author-email = martin.larralde@embl.de
......@@ -43,7 +43,8 @@ install_requires =
biopython ~=1.78
dataclasses ~=0.8 ; python_version < '3.7'
docopt ~=0.6.2
numpy ~=1.16
numpy ~=1.16,<1.20 ; python_version < '3.7'
numpy ~=1.16 ; python_version >= '3.7'
psutil ~=5.8
pyhmmer ~=0.3.0
pyrodigal ~=0.4.2
......