From c7cc98b96407b16d62bb50f5927bc08cf663333b Mon Sep 17 00:00:00 2001 From: Martin Larralde <martin.larralde@embl.de> Date: Wed, 9 Oct 2024 02:23:21 +0200 Subject: [PATCH] Implement `__len__` for `HMMPressedFile` using the SSI index --- pyhmmer/plan7.pxd | 1 + pyhmmer/plan7.pyi | 1 + pyhmmer/plan7.pyx | 7 +++++++ pyhmmer/tests/test_plan7/test_hmmfile.py | 10 ++++++++++ 4 files changed, 19 insertions(+) diff --git a/pyhmmer/plan7.pxd b/pyhmmer/plan7.pxd index 2b2e5977..f0200666 100644 --- a/pyhmmer/plan7.pxd +++ b/pyhmmer/plan7.pxd @@ -173,6 +173,7 @@ cdef class HMMPressedFile: cdef P7_HMMFILE* _hfp cdef Alphabet _alphabet cdef HMMFile _hmmfile + cdef size_t _position cpdef void close(self) except * cpdef void rewind(self) except * diff --git a/pyhmmer/plan7.pyi b/pyhmmer/plan7.pyi index 4393079d..e6c67a08 100644 --- a/pyhmmer/plan7.pyi +++ b/pyhmmer/plan7.pyi @@ -453,6 +453,7 @@ class HMMPressedFile(typing.Iterator[OptimizedProfile]): ) -> bool: ... def __iter__(self) -> HMMPressedFile: ... def __next__(self) -> OptimizedProfile: ... + def __len__(self) -> int: ... @property def closed(self) -> bool: ... @property diff --git a/pyhmmer/plan7.pyx b/pyhmmer/plan7.pyx index 2f7e1fc1..9137461c 100644 --- a/pyhmmer/plan7.pyx +++ b/pyhmmer/plan7.pyx @@ -3750,6 +3750,7 @@ cdef class HMMPressedFile: self._alphabet = None self._hmmfile = None self._hfp = NULL + self._position = 0 def __init__(self, object file): """__init__(self, file)\n--\n @@ -3790,6 +3791,10 @@ cdef class HMMPressedFile: def __exit__(self, exc_type, exc_value, traceback): self.close() + def __len__(self): + assert self._hfp.ssi != NULL + return self._hfp.ssi.nprimary - self._position + # --- Properties --------------------------------------------------------- @property @@ -3822,6 +3827,7 @@ cdef class HMMPressedFile: """Rewind the file back to the beginning. """ self._hmmfile.rewind() + self._position = 0 cpdef OptimizedProfile read(self): """Read the next optimized profile from the file. @@ -3856,6 +3862,7 @@ cdef class HMMPressedFile: if status == libeasel.eslOK: om.alphabet = self._alphabet + self._position += 1 return om elif status == libeasel.eslEOF: return None diff --git a/pyhmmer/tests/test_plan7/test_hmmfile.py b/pyhmmer/tests/test_plan7/test_hmmfile.py index 900aa247..a71fea8f 100644 --- a/pyhmmer/tests/test_plan7/test_hmmfile.py +++ b/pyhmmer/tests/test_plan7/test_hmmfile.py @@ -93,6 +93,16 @@ class _TestHMMPath: with self.open_hmm(path) as f: self.check_hmmfile(f.optimized_profiles()) + def test_optimized_profiles_length(self): + path = os.path.join(self.hmms_folder, "db", "{}.hmm".format(self.ID)) + with self.open_hmm(path) as f: + profiles = f.optimized_profiles() + self.assertEqual(len(profiles), len(self.NAMES)) + profiles.read() + self.assertEqual(len(profiles), len(self.NAMES) - 1) + profiles.rewind() + self.assertEqual(len(profiles), len(self.NAMES)) + def test_rewind(self): path = os.path.join(self.hmms_folder, "txt", "{}.hmm".format(self.ID)) with self.open_hmm(path) as f: -- GitLab