Commit f9640791 authored by Martin Larralde's avatar Martin Larralde
Browse files

Test that the generic connection filter produces the same score as the Prodigal code

parent f3f6e717
......@@ -12,6 +12,19 @@ from .fasta import parse
class TestConnectionScorer(unittest.TestCase):
def assertNodeEqual(self, n1, n2):
self.assertEqual(n1.index, n2.index)
self.assertEqual(n1.strand, n2.strand)
self.assertEqual(n1.type, n2.type)
self.assertEqual(n1.edge, n2.edge)
self.assertEqual(n1.gc_bias, n2.gc_bias)
self.assertEqual(n1.gc_cont, n2.gc_cont)
self.assertAlmostEqual(n1.score, n2.score, places=4)
self.assertAlmostEqual(n1.cscore, n2.cscore, places=4)
self.assertAlmostEqual(n1.rscore, n2.rscore, places=4)
self.assertAlmostEqual(n1.sscore, n2.sscore, places=4)
self.assertAlmostEqual(n1.tscore, n2.tscore, places=4)
@classmethod
def setUpClass(cls):
data = os.path.realpath(os.path.join(__file__, "..", "data"))
......@@ -27,29 +40,29 @@ class TestConnectionScorer(unittest.TestCase):
seq = Sequence.from_string(self.record.seq)
tinf = METAGENOMIC_BINS[0].training_info
scorer_sse = ConnectionScorer(backend="sse")
scorer_generic = ConnectionScorer(backend=None)
scorer_none = ConnectionScorer(backend=None)
# add nodes from the sequence
nodes = Nodes()
nodes.extract(seq, translation_table=tinf.translation_table)
nodes.sort()
# index nodes for the scorers
scorer_sse.index(nodes)
scorer_generic.index(nodes)
scorer_none.index(nodes)
# use copies to compute both scores
nodes_sse = nodes.copy()
nodes_generic = nodes.copy()
nodes_none = nodes.copy()
for i in range(500, len(nodes)):
# compute boundary
j = 0 if i < 500 else i - 500
# score connections without fast-indexing skippable nodes
scorer_generic.compute_skippable(j, i)
scorer_generic.score_connections(nodes_generic, j, i, tinf, final=True)
scorer_none.compute_skippable(j, i)
scorer_none.score_connections(nodes_none, j, i, tinf, final=True)
# compute skippable nodes with SSE and score connections with
scorer_sse.compute_skippable(j, i)
scorer_sse.score_connections(nodes_sse, j, i, tinf, final=True)
# check that both methods scored the same
for n_sse, n_generic in zip(nodes_sse, nodes_generic):
self.assertEqual(n_sse.score, n_generic.score)
for n1, n2 in zip(nodes_sse, nodes_none):
self.assertNodeEqual(n1, n2)
@unittest.skipUnless(_pyrodigal._TARGET_CPU == "x86", "requires x86 CPU")
@unittest.skipUnless(_pyrodigal._AVX2_BUILD_SUPPORT, "requires extension compiled with AVX2 support")
......@@ -59,29 +72,29 @@ class TestConnectionScorer(unittest.TestCase):
seq = Sequence.from_string(self.record.seq)
tinf = METAGENOMIC_BINS[0].training_info
scorer_avx = ConnectionScorer(backend="avx")
scorer_generic = ConnectionScorer(backend=None)
scorer_none = ConnectionScorer(backend=None)
# add nodes from the sequence
nodes = Nodes()
nodes.extract(seq, translation_table=tinf.translation_table)
nodes.sort()
# index nodes for the scorers
scorer_avx.index(nodes)
scorer_generic.index(nodes)
scorer_none.index(nodes)
# use copies to compute both scores
nodes_avx = nodes.copy()
nodes_generic = nodes.copy()
nodes_none = nodes.copy()
for i in range(500, len(nodes)):
# compute boundary
j = 0 if i < 500 else i - 500
# score connections without fast-indexing skippable nodes
scorer_generic.compute_skippable(j, i)
scorer_generic.score_connections(nodes_generic, j, i, tinf, final=True)
scorer_none.compute_skippable(j, i)
scorer_none.score_connections(nodes_none, j, i, tinf, final=True)
# compute skippable nodes with SSE and score connections with
scorer_avx.compute_skippable(j, i)
scorer_avx.score_connections(nodes_avx, j, i, tinf, final=True)
# check that both methods scored the same
for n_avx, n_generic in zip(nodes_avx, nodes_generic):
self.assertEqual(n_avx.score, n_generic.score)
for n1, n2 in zip(nodes_avx, nodes_none):
self.assertNodeEqual(n1, n2)
@unittest.skipUnless(_pyrodigal._TARGET_CPU in ("arm", "aarch64"), "requires ARM CPU")
@unittest.skipUnless(_pyrodigal._NEON_BUILD_SUPPORT, "requires extension compiled with NEON support")
......@@ -91,26 +104,55 @@ class TestConnectionScorer(unittest.TestCase):
seq = Sequence.from_string(self.record.seq)
tinf = METAGENOMIC_BINS[0].training_info
scorer_avx = ConnectionScorer(backend="neon")
scorer_generic = ConnectionScorer(backend=None)
scorer_none = ConnectionScorer(backend=None)
# add nodes from the sequence
nodes = Nodes()
nodes.extract(seq, translation_table=tinf.translation_table)
nodes.sort()
# index nodes for the scorers
scorer_avx.index(nodes)
scorer_generic.index(nodes)
scorer_none.index(nodes)
# use copies to compute both scores
nodes_avx = nodes.copy()
nodes_generic = nodes.copy()
nodes_none = nodes.copy()
for i in range(500, len(nodes)):
# compute boundary
j = 0 if i < 500 else i - 500
# score connections without fast-indexing skippable nodes
scorer_generic.compute_skippable(j, i)
scorer_generic.score_connections(nodes_generic, j, i, tinf, final=True)
scorer_none.compute_skippable(j, i)
scorer_none.score_connections(nodes_none, j, i, tinf, final=True)
# compute skippable nodes with SSE and score connections with
scorer_avx.compute_skippable(j, i)
scorer_avx.score_connections(nodes_avx, j, i, tinf, final=True)
# check that both methods scored the same
for n_avx, n_generic in zip(nodes_avx, nodes_generic):
self.assertEqual(n_avx.score, n_generic.score)
for n1, n2 in zip(nodes_avx, nodes_none):
self.assertNodeEqual(n1, n2)
def test_score_connections_generic(self):
# setup
seq = Sequence.from_string(self.record.seq)
tinf = METAGENOMIC_BINS[0].training_info
scorer_generic = ConnectionScorer(backend="generic")
scorer_none = ConnectionScorer(backend=None)
# add nodes from the sequence
nodes = Nodes()
nodes.extract(seq, translation_table=tinf.translation_table)
nodes.sort()
# index nodes for the scorers
scorer_generic.index(nodes)
scorer_none.index(nodes)
# use copies to compute both scores
nodes_generic = nodes.copy()
nodes_none = nodes.copy()
for i in range(500, len(nodes)):
# compute boundary
j = 0 if i < 500 else i - 500
# score connections without fast-indexing skippable nodes
scorer_none.compute_skippable(j, i)
scorer_none.score_connections(nodes_none, j, i, tinf, final=True)
# compute skippable nodes with generic filter and score connections
scorer_generic.compute_skippable(j, i)
scorer_generic.score_connections(nodes_generic, j, i, tinf, final=True)
# check that both methods scored the same
for n1, n2 in zip(nodes_generic, nodes_none):
self.assertNodeEqual(n1, n2)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment