Commit 5fa7942d authored by Martin Larralde's avatar Martin Larralde
Browse files

Run `black` on the Python source files

parent d9e938ad
......@@ -54,4 +54,6 @@ if __doc__ is not None:
of the library on
`Read The Docs <https://pyrodigal.readthedocs.io/en/v{}/>`_.
""".format(__version__)
""".format(
__version__
)
import threading
import typing
from typing import FrozenSet, Iterable, Iterator, List, Dict, Optional, TextIO, Tuple, Union
from typing import (
FrozenSet,
Iterable,
Iterator,
List,
Dict,
Optional,
TextIO,
Tuple,
Union,
)
# --- Globals ----------------------------------------------------------------
_TARGET_CPU : str
_AVX2_RUNTIME_SUPPORT : bool
_NEON_RUNTIME_SUPPORT : bool
_SSE2_RUNTIME_SUPPORT : bool
_AVX2_BUILD_SUPPORT : bool
_NEON_BUILD_SUPPORT : bool
_SSE2_BUILD_SUPPORT : bool
TRANSLATION_TABLES : FrozenSet[int]
METAGENOMIC_BINS : Tuple[MetagenomicBin]
_TARGET_CPU: str
_AVX2_RUNTIME_SUPPORT: bool
_NEON_RUNTIME_SUPPORT: bool
_SSE2_RUNTIME_SUPPORT: bool
_AVX2_BUILD_SUPPORT: bool
_NEON_BUILD_SUPPORT: bool
_SSE2_BUILD_SUPPORT: bool
TRANSLATION_TABLES: FrozenSet[int]
METAGENOMIC_BINS: Tuple[MetagenomicBin]
# --- Sequence mask ----------------------------------------------------------
......@@ -59,7 +68,7 @@ class Sequence(typing.Sized):
start: int,
training_info: TrainingInfo,
strand: int = 1,
exact: bool = True
exact: bool = True,
) -> int: ...
# --- Connection Scorer ------------------------------------------------------
......@@ -68,7 +77,9 @@ class ConnectionScorer:
def __init__(self, backend: str = "detect") -> None: ...
def index(self, nodes: Nodes) -> None: ...
def compute_skippable(self, min: int, i: int) -> None: ...
def score_connections(self, nodes: Nodes, min: int, i: int, tinf: TrainingInfo, final: bool = False) -> None: ...
def score_connections(
self, nodes: Nodes, min: int, i: int, tinf: TrainingInfo, final: bool = False
) -> None: ...
# --- Nodes ------------------------------------------------------------------
......@@ -176,7 +187,9 @@ class Gene:
def score(self) -> float: ...
def confidence(self) -> float: ...
def sequence(self) -> str: ...
def translate(self, translation_table: Optional[int] = None, unknown_residue: str = "X") -> str: ...
def translate(
self, translation_table: Optional[int] = None, unknown_residue: str = "X"
) -> str: ...
class Genes(typing.Sequence[Gene]):
def __bool__(self) -> int: ...
......@@ -189,17 +202,26 @@ class Genes(typing.Sequence[Gene]):
def __setstate__(self, state: Dict[str, object]) -> None: ...
def clear(self) -> None: ...
def write_gff(self, file: TextIO, prefix: str = "gene_") -> int: ...
def write_genes(self, file: TextIO, prefix: str ="gene_", width: typing.Optional[int] = 70) -> int: ...
def write_translations(self, file: TextIO, prefix: str = "gene_", width: typing.Optional[int] = 60, translation_table: typing.Optional[int] = None) -> int: ...
def write_genes(
self, file: TextIO, prefix: str = "gene_", width: typing.Optional[int] = 70
) -> int: ...
def write_translations(
self,
file: TextIO,
prefix: str = "gene_",
width: typing.Optional[int] = 60,
translation_table: typing.Optional[int] = None,
) -> int: ...
def write_scores(self, file: TextIO, header: bool = True) -> int: ...
# --- Training Info ----------------------------------------------------------
class TrainingInfo:
@classmethod
def load(cls, fp: typing.BinaryIO) -> TrainingInfo: ...
def __init__(self, gc: float, start_weight: float = 4.35, translation_table: int = 11) -> None: ...
def __init__(
self, gc: float, start_weight: float = 4.35, translation_table: int = 11
) -> None: ...
def __repr__(self) -> str: ...
def __getstate__(self) -> Dict[str, object]: ...
def __setstate__(self, state: Dict[str, object]) -> None: ...
......@@ -230,7 +252,6 @@ class TrainingInfo:
def start_weight(self, st_wt: float) -> None: ...
def dump(self, fp: typing.BinaryIO) -> None: ...
# --- Metagenomic Bins -------------------------------------------------------
class MetagenomicBin:
......@@ -241,7 +262,6 @@ class MetagenomicBin:
@property
def description(self) -> str: ...
# --- Pyrodigal --------------------------------------------------------------
class OrfFinder:
......@@ -275,10 +295,7 @@ class OrfFinder:
def min_edge_gene(self) -> int: ...
@property
def max_overlap(self) -> int: ...
def find_genes(
self,
sequence: Union[Sequence, str, bytes, bytearray]
) -> Genes: ...
def find_genes(self, sequence: Union[Sequence, str, bytes, bytearray]) -> Genes: ...
def train(
self,
sequence: Union[Sequence, str, bytes, bytearray],
......
......@@ -12,24 +12,96 @@ from . import __name__, __author__, __version__
from ._pyrodigal import TRANSLATION_TABLES, OrfFinder, TrainingInfo
from .tests.fasta import parse
def argument_parser():
parser = argparse.ArgumentParser(prog=__name__, add_help=False)
parser.add_argument("-a", required=False, metavar="trans_file", help="Write protein translations to the selected file.")
parser.add_argument("-c", required=False, action="store_true", help="Closed ends. Do not allow genes to run off edges.", default=False)
parser.add_argument("-d", required=False, metavar="nuc_file", help="Write nucleotide sequences of genes to the selected file.")
parser.add_argument("-f", required=False, metavar="output_type", help="Select output format.", choices={"gff"}, default="gff")
parser.add_argument("-g", required=False, metavar="tr_table", type=int, choices=TRANSLATION_TABLES, help="Specify a translation table to use.", default=11)
parser.add_argument("-i", metavar="input_file", required=True, help="Specify FASTA input file.")
parser.add_argument("-m", action="store_true", help="Treat runs of N as masked sequence; don't build genes across them.", default=False)
parser.add_argument("-n", action="store_true", help="Bypass Shine-Dalgarno trainer and force a full motif scan.", default=False)
parser.add_argument("-o", metavar="output_file", required=False, help="Specify output file.")
parser.add_argument("-p", required=False, metavar="mode", help="Select procedure.", choices={"single", "meta"}, default="single")
parser.add_argument("-s", required=False, metavar="start_file", help="Write all potential genes (with scores) to the selected file.")
parser.add_argument("-t", required=False, metavar="training_file", help="Write a training file (if none exists); otherwise, read and use the specified training file.")
parser.add_argument("-h", "--help", action="help", help="Show this help message and exit.")
parser.add_argument("-V", "--version", help="Show version number and exit.", action="version", version="{} v{}".format(__name__, __version__))
parser.add_argument(
"-a",
required=False,
metavar="trans_file",
help="Write protein translations to the selected file.",
)
parser.add_argument(
"-c",
required=False,
action="store_true",
help="Closed ends. Do not allow genes to run off edges.",
default=False,
)
parser.add_argument(
"-d",
required=False,
metavar="nuc_file",
help="Write nucleotide sequences of genes to the selected file.",
)
parser.add_argument(
"-f",
required=False,
metavar="output_type",
help="Select output format.",
choices={"gff"},
default="gff",
)
parser.add_argument(
"-g",
required=False,
metavar="tr_table",
type=int,
choices=TRANSLATION_TABLES,
help="Specify a translation table to use.",
default=11,
)
parser.add_argument(
"-i", metavar="input_file", required=True, help="Specify FASTA input file."
)
parser.add_argument(
"-m",
action="store_true",
help="Treat runs of N as masked sequence; don't build genes across them.",
default=False,
)
parser.add_argument(
"-n",
action="store_true",
help="Bypass Shine-Dalgarno trainer and force a full motif scan.",
default=False,
)
parser.add_argument(
"-o", metavar="output_file", required=False, help="Specify output file."
)
parser.add_argument(
"-p",
required=False,
metavar="mode",
help="Select procedure.",
choices={"single", "meta"},
default="single",
)
parser.add_argument(
"-s",
required=False,
metavar="start_file",
help="Write all potential genes (with scores) to the selected file.",
)
parser.add_argument(
"-t",
required=False,
metavar="training_file",
help="Write a training file (if none exists); otherwise, read and use the specified training file.",
)
parser.add_argument(
"-h", "--help", action="help", help="Show this help message and exit."
)
parser.add_argument(
"-V",
"--version",
help="Show version number and exit.",
action="version",
version="{} v{}".format(__name__, __version__),
)
return parser
def main(argv=None, stdout=sys.stdout, stderr=sys.stderr):
parser = argument_parser()
args = parser.parse_args(argv)
......@@ -39,13 +111,20 @@ def main(argv=None, stdout=sys.stdout, stderr=sys.stderr):
# open output files if required
nuc_file = None if args.d is None else ctx.enter_context(open(args.d, "w"))
prot_file = None if args.a is None else ctx.enter_context(open(args.a, "w"))
scores_file = None if args.s is None else ctx.enter_context(open(args.s, "w"))
out_file = stdout if args.o is None else ctx.enter_context(open(args.o, "w"))
scores_file = (
None if args.s is None else ctx.enter_context(open(args.s, "w"))
)
out_file = (
stdout if args.o is None else ctx.enter_context(open(args.o, "w"))
)
# load training info
if args.t is not None:
if args.p == "meta":
print("Error: cannot specify metagenomic sequence with a training file.", file=stderr)
print(
"Error: cannot specify metagenomic sequence with a training file.",
file=stderr,
)
return 1
elif os.path.exists(args.t):
with open(args.t, "rb") as f:
......@@ -67,7 +146,9 @@ def main(argv=None, stdout=sys.stdout, stderr=sys.stderr):
for i, seq in enumerate(parse(args.i)):
# train if not in meta mode and encountering the first sequence
if args.p == "single" and i == 0:
training_info = pyrodigal.train(seq.seq, force_nonsd=args.n, translation_table=args.g)
training_info = pyrodigal.train(
seq.seq, force_nonsd=args.n, translation_table=args.g
)
if args.t is not None and not os.path.exists(args.t):
with open(args.t, "wb") as f:
training_info.dump(f)
......
......@@ -9,6 +9,7 @@ from . import (
test_training_info,
)
def load_tests(loader, suite, pattern):
suite.addTests(loader.loadTestsFromModule(test_connection_scorer))
suite.addTests(loader.loadTestsFromModule(test_gene))
......
......@@ -11,7 +11,6 @@ from .fasta import parse
class TestConnectionScorer(unittest.TestCase):
def assertNodeEqual(self, n1, n2):
self.assertEqual(n1.index, n2.index)
self.assertEqual(n1.strand, n2.strand)
......@@ -33,8 +32,12 @@ class TestConnectionScorer(unittest.TestCase):
cls.record = next(parse(f))
@unittest.skipUnless(_pyrodigal._TARGET_CPU == "x86", "requires x86 CPU")
@unittest.skipUnless(_pyrodigal._SSE2_BUILD_SUPPORT, "requires extension compiled with SSE2 support")
@unittest.skipUnless(_pyrodigal._SSE2_RUNTIME_SUPPORT, "requires machine with SSE2 support")
@unittest.skipUnless(
_pyrodigal._SSE2_BUILD_SUPPORT, "requires extension compiled with SSE2 support"
)
@unittest.skipUnless(
_pyrodigal._SSE2_RUNTIME_SUPPORT, "requires machine with SSE2 support"
)
def test_score_connections_sse(self):
# setup
seq = Sequence.from_string(self.record.seq)
......@@ -65,8 +68,12 @@ class TestConnectionScorer(unittest.TestCase):
self.assertNodeEqual(n1, n2)
@unittest.skipUnless(_pyrodigal._TARGET_CPU == "x86", "requires x86 CPU")
@unittest.skipUnless(_pyrodigal._AVX2_BUILD_SUPPORT, "requires extension compiled with AVX2 support")
@unittest.skipUnless(_pyrodigal._AVX2_RUNTIME_SUPPORT, "requires machine with AVX2 support")
@unittest.skipUnless(
_pyrodigal._AVX2_BUILD_SUPPORT, "requires extension compiled with AVX2 support"
)
@unittest.skipUnless(
_pyrodigal._AVX2_RUNTIME_SUPPORT, "requires machine with AVX2 support"
)
def test_score_connections_avx(self):
# setup
seq = Sequence.from_string(self.record.seq)
......@@ -96,9 +103,15 @@ class TestConnectionScorer(unittest.TestCase):
for n1, n2 in zip(nodes_avx, nodes_none):
self.assertNodeEqual(n1, n2)
@unittest.skipUnless(_pyrodigal._TARGET_CPU in ("arm", "aarch64"), "requires ARM CPU")
@unittest.skipUnless(_pyrodigal._NEON_BUILD_SUPPORT, "requires extension compiled with NEON support")
@unittest.skipUnless(_pyrodigal._NEON_RUNTIME_SUPPORT, "requires machine with NEON support")
@unittest.skipUnless(
_pyrodigal._TARGET_CPU in ("arm", "aarch64"), "requires ARM CPU"
)
@unittest.skipUnless(
_pyrodigal._NEON_BUILD_SUPPORT, "requires extension compiled with NEON support"
)
@unittest.skipUnless(
_pyrodigal._NEON_RUNTIME_SUPPORT, "requires machine with NEON support"
)
def test_score_connections_neon(self):
# setup
seq = Sequence.from_string(self.record.seq)
......
......@@ -10,7 +10,6 @@ from .fasta import parse
class TestGene(unittest.TestCase):
@classmethod
def find_genes(cls, seq):
p = OrfFinder(meta=True)
......
......@@ -13,7 +13,6 @@ from .fasta import parse
class TestGenes(unittest.TestCase):
@classmethod
def setUpClass(cls):
data = os.path.realpath(os.path.join(__file__, "..", "data"))
......@@ -32,14 +31,14 @@ class TestGenes(unittest.TestCase):
with self.assertRaises(IndexError):
self.genes[length]
with self.assertRaises(IndexError):
self.genes[-length-1]
self.genes[-length - 1]
def test_iter(self):
for i, gene in zip(range(len(self.genes)), self.genes):
self.assertEqual(gene._gene_data, self.genes[i]._gene_data)
def test_reversed(self):
for i, gene in zip(range(1, len(self.genes)+1), reversed(self.genes)):
for i, gene in zip(range(1, len(self.genes) + 1), reversed(self.genes)):
self.assertEqual(gene._gene_data, self.genes[-i]._gene_data)
def test_bool(self):
......@@ -51,7 +50,7 @@ class TestGenes(unittest.TestCase):
for gene in self.genes:
self.assertIn(gene.translation_table, valid)
@unittest.skipIf(sys.implementation.name != 'cpython', 'can panic with PyPy')
@unittest.skipIf(sys.implementation.name != "cpython", "can panic with PyPy")
def test_collection_abc_subclass(self):
self.assertIsInstance(self.genes, collections.abc.Sequence)
self.assertIsInstance(self.genes, collections.abc.Sized)
......@@ -66,18 +65,14 @@ class TestGenes(unittest.TestCase):
actual = [
line.strip()
for line in buffer.getvalue().splitlines()
if not line.startswith("#")
and line.strip()
if not line.startswith("#") and line.strip()
]
data = os.path.realpath(os.path.join(__file__, "..", "data"))
tsv = os.path.join(data, "SRR492066.meta.tsv")
with open(tsv) as f:
expected = [
line.strip()
for line in f
if not line.startswith("#")
and line.strip()
line.strip() for line in f if not line.startswith("#") and line.strip()
]
r1 = csv.reader(actual, dialect="excel-tab")
......
......@@ -2,8 +2,8 @@ import unittest
from .. import Mask
class TestMask(unittest.TestCase):
class TestMask(unittest.TestCase):
def test_repr(self):
mask = Mask(1, 2)
self.assertEqual(repr(mask), "<pyrodigal._pyrodigal.Mask begin=1 end=2>")
......
......@@ -12,7 +12,6 @@ from .fasta import parse
class TestNodes(unittest.TestCase):
def assertNodeEqual(self, n1, n2):
self.assertEqual(n1.index, n2.index, "indices differ")
self.assertEqual(n1.strand, n2.strand, "strands differ")
......@@ -67,10 +66,10 @@ class TestNodes(unittest.TestCase):
nodes1 = Nodes()
nodes1.extract(seq, translation_table=tt)
nodes2 = pickle.loads(pickle.dumps(nodes1))
self.assertEqual(len(nodes1), len(nodes2), "lengths differ")
self.assertEqual(len(nodes1), len(nodes2), "lengths differ")
for n1, n2 in zip(nodes1, nodes2):
self.assertNodeEqual(n1, n2)
def test_pickle_empty(self):
nodes1 = Nodes()
nodes2 = pickle.loads(pickle.dumps(nodes1))
......
......@@ -11,7 +11,6 @@ from .utils import load_record, load_proteins, load_genes
class _OrfFinderTestCase(object):
def assertGeneEqual(self, gene1, gene2):
self.assertEqual(gene1.begin, gene2.begin)
self.assertEqual(gene1.end, gene2.end)
......@@ -100,7 +99,6 @@ class _OrfFinderTestCase(object):
class _TestMode(_OrfFinderTestCase):
def test_find_genes_KK037166(self):
record = load_record("KK037166")
proteins = load_proteins("KK037166", self.mode)
......@@ -127,12 +125,12 @@ class _TestMode(_OrfFinderTestCase):
preds = self.find_genes(self.get_sequence(record))
self.assertGenesEqual(preds, genes)
self.assertPredictionsEqual(preds, proteins)
class _TestBin(object):
@classmethod
def get_sequence(cls, r):
return r.seq.encode('ascii')
return r.seq.encode("ascii")
class _TestTxt(object):
......@@ -143,6 +141,7 @@ class _TestTxt(object):
class _TestSingle(object):
mode = "single"
@classmethod
def find_genes(cls, seq):
p = OrfFinder(meta=False)
......@@ -154,6 +153,7 @@ class _TestSingle(object):
class _TestMeta(object):
mode = "meta"
@classmethod
def find_genes(cls, seq):
p = OrfFinder(meta=True)
......@@ -177,7 +177,6 @@ class TestSingleBin(_TestSingle, _TestBin, _TestMode, unittest.TestCase):
class TestOrfFinder(_OrfFinderTestCase, unittest.TestCase):
def test_invalid_overlap(self):
self.assertRaises(ValueError, OrfFinder, min_gene=10, max_overlap=100)
self.assertRaises(ValueError, OrfFinder, max_overlap=-1)
......@@ -187,7 +186,6 @@ class TestOrfFinder(_OrfFinderTestCase, unittest.TestCase):
class TestMeta(_OrfFinderTestCase, unittest.TestCase):
def test_train(self):
record = load_record("SRR492066")
p = OrfFinder(meta=True)
......@@ -255,7 +253,9 @@ class TestMeta(_OrfFinderTestCase, unittest.TestCase):
record = load_record("KK037166")
genes = load_genes("KK037166", "meta+mask")
orf_finder = OrfFinder(meta=True, min_gene=30, min_edge_gene=20, max_overlap=20, mask=True)
orf_finder = OrfFinder(
meta=True, min_gene=30, min_edge_gene=20, max_overlap=20, mask=True
)
preds = orf_finder.find_genes(record.seq)
self.assertGreaterEqual(len(preds), len(genes))
......@@ -290,10 +290,9 @@ class TestMeta(_OrfFinderTestCase, unittest.TestCase):
self.assertEqual(genes[1].start_type, "ATG")
self.assertEqual(genes[1].begin, 426)
self.assertEqual(genes[1].end, 590)
class TestSingle(_OrfFinderTestCase, unittest.TestCase):
class TestSingle(_OrfFinderTestCase, unittest.TestCase):
def test_train_info(self):
record = load_record("SRR492066")
p = OrfFinder(meta=False)
......@@ -325,7 +324,7 @@ class TestSingle(_OrfFinderTestCase, unittest.TestCase):
warnings.simplefilter("ignore")
p.train(str(record.seq))
genes = p.find_genes(str(record.seq))
del p # normally should not deallocate training info since it's RC
del p # normally should not deallocate training info since it's RC
self.assertEqual(genes[0].translate(), str(proteins[0].seq))
def test_short_sequences(self):
......@@ -357,7 +356,7 @@ class TestSingle(_OrfFinderTestCase, unittest.TestCase):
with warnings.catch_warnings():
warnings.simplefilter("ignore")
p1.train(str(record.seq[:20000]))
# pickle/unpickle the OrfFinder
# pickle/unpickle the OrfFinder
p2 = pickle.loads(pickle.dumps(p1))
# make sure the same genes are found
g1 = p1.find_genes(record.seq)
......@@ -365,7 +364,7 @@ class TestSingle(_OrfFinderTestCase, unittest.TestCase):
# make sure genes are the same
self.assertEqual(len(g1), len(g2))
for gene1, gene2 in zip(g1, g2):
self.assertGeneEqual(gene1, gene2)
self.assertGeneEqual(gene1, gene2)
def test_training_info_pickle(self):
record = load_record("SRR492066")
......@@ -374,7 +373,7 @@ class TestSingle(_OrfFinderTestCase, unittest.TestCase):
with warnings.catch_warnings():
warnings.simplefilter("ignore")
p1.train(str(record.seq[:20000]))
# pickle/unpickle the TrainingInfo
# pickle/unpickle the TrainingInfo
ti = pickle.loads(pickle.dumps(p1.training_info))
p2 = OrfFinder(meta=False, training_info=ti, min_gene=60)
# make sure the same genes are found
......
......@@ -6,7 +6,6 @@ from .._pyrodigal import METAGENOMIC_BINS
class TestSequence(unittest.TestCase):
def test_pickle(self):
s1 = Sequence.from_string("ATGCNNNNNNNNNNATGCNNNNNNNNTGC", mask=True)
s2 = pickle.loads(pickle.dumps(s1))
......
......@@ -14,7 +14,6 @@ from .utils import load_record
class TestTrainingInfo(unittest.TestCase):
def assertTrainingInfoEqual(self, t1, t2):
self.assertEqual(t1.translation_table, t2.translation_table)
self.assertEqual(t1.gc, t2.gc)
......
......@@ -10,12 +10,14 @@ def load_record(name):
with gzip.open(fna, "rt") as f:
return next(parse(f))
def load_proteins(name, mode):
data = os.path.realpath(os.path.join(__file__, "..", "data"))
faa = os.path.join(data, "{name}.{mode}.faa.gz".format(name=name, mode=mode))
with gzip.open(faa, "rt") as f:
return list(parse(f))
def load_genes(name, mode):
data = os.path.realpath(os.path.join(__file__, "..", "data"))
fna = os.path.join(data, "{name}.{mode}.fna.gz".format(name=name, mode=mode))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment