diff --git a/README.md b/README.md index f6ec1b4273e59593f49488c637506ca8d19f45f7..ee0e60beedfbb337948728c34f6680a4a25c2608 100644 --- a/README.md +++ b/README.md @@ -77,19 +77,21 @@ instead of `bytes(record.seq)`. import pyfastani import Bio.SeqIO -m = pyfastani.Mapper() +sketch = pyfastani.Sketch() # add a single draft genome to the mapper, and index it ref = list(Bio.SeqIO.parse("vendor/FastANI/data/Shigella_flexneri_2a_01.fna", "fasta")) -m.add_draft("Shigella_flexneri_2a_01", (bytes(record.seq) for record in ref)) -m.index() +sketch.add_draft("S. flexneri", (bytes(record.seq) for record in ref)) + +# index the sketch and get a mapper +mapper = sketch.index() # read the query and query the mapper query = Bio.SeqIO.read("vendor/FastANI/data/Escherichia_coli_str_K12_MG1655.fna", "fasta") hits = m.query_sequence(bytes(query.seq)) for hit in hits: - print("Escherichia_coli_str_K12_MG1655", hit.name, hit.identity, hit.matches, hit.fragments) + print("E. coli", hit.name, hit.identity, hit.matches, hit.fragments) ``` ### 🧪 [Scikit-bio](https://github.com/biocore/scikit-bio) diff --git a/pyfastani/_fastani.pyx b/pyfastani/_fastani.pyx index 27575b1cf6623b17785d3fa211eef041fdfd50fc..94c173ddf0656c7b3fb0c6b50135fef08b0ea108 100644 --- a/pyfastani/_fastani.pyx +++ b/pyfastani/_fastani.pyx @@ -363,11 +363,10 @@ cdef class Sketch: cdef Mapper mapper = Mapper.__new__(Mapper) mapper._param = self._param # copy params mapper._sk = self._sk - mapper._names = self._names + mapper._names = self._names.copy() mapper._lengths.swap(self._lengths) # reset the current sketch self._sk = new Sketch_t(self._param) - self._names = [] self.clear() # return the new mapper return mapper @@ -517,13 +516,18 @@ cdef class Mapper: contigs (iterable or `str` or `bytes`): The genome to query the mapper with. - Note: + Returns: + `list` of `~pyfastani.Hit`: The hits found for the query. + + Hint: Sequence must be larger than the window size, the k-mer size, and the fragment length to be mapped, otherwise an empty list of hits will be returned. - Returns: - `list` of `~pyfastani.Hit`: The hits found for the query. + Note: + This method is reentrant and releases the GIL when hashing + the blocks allowing to query the mapper in parallel for + several individual genomes. """ # delegate to C code @@ -538,13 +542,18 @@ cdef class Mapper: sequence (`str` or `bytes`): The genome to query the mapper with. - Note: + Returns: + `list` of `~pyfastani.Hit`: The hits found for the query. + + Hint: Sequence must be larger than the window size, the k-mer size, and the fragment length to be mapped, otherwise an empty list of hits will be returned. - Returns: - `list` of `~pyfastani.Hit`: The hits found for the query. + Note: + This method is reentrant and releases the GIL when hashing + the blocks allowing to query the mapper in parallel for + several individual genomes. """ # delegate to C code