......@@ -67,6 +67,7 @@ jobs:
runs-on: macos-latest
env:
OS: OSX
if: "!startsWith(github.ref, 'refs/tags/')"
strategy:
matrix:
include:
......
......@@ -5,7 +5,15 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
## [Unreleased]
[Unreleased]: https://git.embl.de/grp-zeller/GECCO/compare/v0.8.1...master
[Unreleased]: https://git.embl.de/grp-zeller/GECCO/compare/v0.8.2...master
## [v0.8.2] - 2021-07-31
[v0.8.2]: https://git.embl.de/grp-zeller/GECCO/compare/v0.8.1...v0.8.2
### Fixed
- `gecco run` crashing on Python 3.6 because of missing `contextlib.nullcontext` class.
### Changed
- `gecco run` and `gecco annotate` will not try to count the number of profiles when given an external HMM file with the `--hmm` flag.
- `PyHMMER.run` now reports the *p-value* of each domain in addition to the *e-value* as a `/note` qualifier.
## [v0.8.1] - 2021-07-29
[v0.8.1]: https://git.embl.de/grp-zeller/GECCO/compare/v0.8.0...v0.8.1
......
......@@ -10,4 +10,4 @@ See Also:
__author__ = "Martin Larralde"
__license__ = "GPLv3"
__version__ = "0.8.1"
__version__ = "0.8.2"
......@@ -92,7 +92,6 @@ class Annotate(Command): # noqa: D101
raise CommandExit(1)
def _custom_hmms(self):
import pyhmmer
from ...hmmer import HMM
for path in self.hmm:
......@@ -102,17 +101,12 @@ class Annotate(Command): # noqa: D101
base, _ = os.path.splitext(base)
file = gzip.GzipFile(fileobj=file)
base, _ = os.path.splitext(base)
self.info("Counting", "profiles in HMM file", repr(path), level=1)
with file:
with pyhmmer.plan7.HMMFile(file) as hmm_file:
size = sum(1 for _ in hmm_file)
self.success("Found", size, "profiles in HMM file", repr(path), level=1)
yield HMM(
id=base,
version="?",
url="?",
path=path,
size=size,
size=1,
relabel_with=r"s/([^\.]*)(\..*)?/\1/"
)
......
......@@ -142,25 +142,22 @@ class Run(Annotate): # noqa: D101
break
def _load_model_domains(self) -> typing.Set[str]:
if self.model is None:
self.info("Loading", "features from internal model", level=2)
resource_context = importlib_resources.path("gecco.types", "domains.tsv")
domains_file = resource_context.__enter__()
else:
self.info("Loading", "domain whitelist from", repr(self.model), level=2)
resource_context = contextlib.nullcontext()
domains_file = os.path.join(self.model, "domains.tsv")
try:
with open(domains_file) as f:
if self.model is None:
self.info("Loading", "feature list from internal model", level=2)
domains_file = importlib_resources.open_text("gecco.types", "domains.tsv")
else:
self.info("Loading", "feature list from", repr(self.model), level=2)
domains_file = open(os.path.join(self.model, "domains.tsv"))
with domains_file as f:
domains = set(filter(None, map(str.strip, f)))
except FileNotFoundError as err:
self.error("Could not find model domains:", repr(domains_file))
if self.model is not None:
self.error("Could not find domains list :", repr(self.model))
raise CommandExit(e.errno) from err
else:
self.success("Found", len(domains), "selected features", level=2)
return domains
finally:
resource_context.__exit__(None, None, None)
def _predict_probabilities(self, genes):
from ...crf import ClusterCRF
......
......@@ -155,10 +155,14 @@ class PyHMMER(DomainAnnotator):
# extract qualifiers
qualifiers: Dict[str, List[str]] = {
"inference": ["protein motif"],
"note": ["e-value: {}".format(domain.i_evalue)],
"db_xref": ["{}:{}".format(self.hmm.id.upper(), accession)],
"function": [] if entry is None else [entry.name]
"note": [
"e-value: {}".format(domain.i_evalue),
"p-value: {}".format(domain.pvalue),
],
}
if entry is not None:
qualifiers["function"] = [entry.name]
if entry is not None and entry.integrated is not None:
qualifiers["db_xref"].append("InterPro:{}".format(entry.integrated))
......