Commits (3)
......@@ -5,7 +5,13 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
## [Unreleased]
[Unreleased]: https://git.embl.de/grp-zeller/GECCO/compare/v0.9.1-alpha1...master
[Unreleased]: https://git.embl.de/grp-zeller/GECCO/compare/v0.9.1-alpha2...master
## [v0.9.1-alpha2] - 2022-03-23
[v0.9.1-alpha1]: https://git.embl.de/grp-zeller/GECCO/compare/v0.9.1-alpha1...v0.9.1-alpha2
### Fixed
- `TypeClassifier.trained` not being able to read unknown types from type tables.
## [v0.9.1-alpha1] - 2022-03-20
[v0.9.1-alpha1]: https://git.embl.de/grp-zeller/GECCO/compare/v0.8.10...v0.9.1-alpha1
......
......@@ -2,7 +2,7 @@
See Also:
*Accurate de novo identification of biosynthetic gene clusters with GECCO*
Laura M. Carroll, Martin Larralde, Jonas Simon Fleck, Ruby Ponnudurai,
Laura M. Carroll, Martin Larralde, Jonas Simon Fleck, Ruby Ponnudurai,
Alessio Milanese, Elisa Cappio, Georg Zeller. bioRxiv 2021.05.03.442509
`doi:10.1101/2021.05.03.442509 <https://doi.org/10.1101/2021.05.03.442509>`_
......@@ -10,4 +10,4 @@ See Also:
__author__ = "Martin Larralde"
__license__ = "GPLv3"
__version__ = "0.9.1-alpha1"
__version__ = "0.9.1-alpha2"
......@@ -6,6 +6,7 @@ import functools
import operator
import os
import typing
import warnings
from typing import Callable, Dict, List, Iterable, Optional, Sequence, Tuple, Union
import numpy
......@@ -79,12 +80,18 @@ class TypeClassifier(object):
with doms_file as doms_src:
domains = [ line.strip() for line in doms_src ]
with typs_file as typs_src:
types = [
ProductType.pack(ProductType.__members__[ty] for ty in raw.split(";"))
if raw.strip()
else ProductType.Unknown
for raw in (line.split("\t")[1].strip() for line in typs_src)
]
types = []
for line in typs_src:
unpacked = set()
for ty in line.split("\t")[1].strip().split(";"):
if ty in ProductType.__members__:
unpacked.add(ProductType.__members__[ty])
elif not ty:
unpacked.add(ProductType.Unknown)
else:
warnings.warn(f"Unknown type in types table: {ty!r}")
unpacked.add(ProductType.Unknown)
types.append(ProductType.pack(unpacked))
classifier = cls(random_state=0)
types_bin = classifier.binarizer.transform(types)
......
......@@ -32,7 +32,6 @@ class TestRun(TestCommand, unittest.TestCase):
def tearDown(self):
shutil.rmtree(self.tmpdir)
@unittest.expectedFailure
def test_fasta_genome(self):
sequence = os.path.join(self.folder, "data", "BGC0001866.fna")
source = Bio.SeqIO.read(sequence, "fasta")
......