diff --git a/README.md b/README.md index 279c34e517aaeb9157e1d0e97dff8ac6eb422b98..468eee81a354cea78b9b0839ab659c0e2ec48c82 100644 --- a/README.md +++ b/README.md @@ -45,7 +45,9 @@ affect the `name` attribute of the hits returned for a query.* ### 🔬 [Biopython](https://github.com/biopython/biopython) Biopython does not let us access to the sequence directly, so we need to -convert it to bytes first with the `encode` method. +convert it to bytes first with the `bytes` builtin function. For older +versions of Biopython (earlier than 1.79), use `record.seq.encode()` +instead of `bytes(record.seq).` ```python import pyfastani @@ -55,12 +57,12 @@ m = pyfastani.Mapper() # add a single draft genome to the mapper, and index it ref = list(Bio.SeqIO.parse("vendor/FastANI/data/Shigella_flexneri_2a_01.fna", "fasta")) -m.add_draft("Shigella_flexneri_2a_01", (record.seq.encode() for record in ref)) +m.add_draft("Shigella_flexneri_2a_01", (bytes(record.seq) for record in ref)) m.index() # read the query and query the mapper query = Bio.SeqIO.read("vendor/FastANI/data/Escherichia_coli_str_K12_MG1655.fna", "fasta") -hits = m.query_sequence(query.seq.encode()) +hits = m.query_sequence(bytes(query.seq)) for hit in hits: print("Escherichia_coli_str_K12_MG1655", hit.name, hit.identity, hit.matches, hit.fragments) diff --git a/pyfastani/tests/test_mapper.py b/pyfastani/tests/test_mapper.py index 4b444d479e503595cda4c1b23f3a422d920ec5cd..d7a305c9e415a1a8f4944491f2369b1a6c3548d2 100644 --- a/pyfastani/tests/test_mapper.py +++ b/pyfastani/tests/test_mapper.py @@ -57,15 +57,16 @@ class TestMapperSkbio(_TestMapper, unittest.TestCase): return sequence.values.view('B') try: - from Bio import SeqIO + import Bio.SeqIO except ImportError: - SeqIO = None + Bio = None -@unittest.skipUnless(SeqIO, "Biopython is required for this test suite") +@unittest.skipUnless(Bio, "Biopython is required for this test suite") class TestMapperBiopython(_TestMapper, unittest.TestCase): def _load_fasta(self, path): - return list(SeqIO.parse(path, "fasta")) + return list(Bio.SeqIO.parse(path, "fasta")) - def _get_sequence(self, sequence): - return sequence.seq.encode() + def _get_sequence(self, record): + version = tuple(map(int, Bio.__version__.split("."))) + return record.seq.encode() if version < (1, 79) else bytes(record.seq)