From c7cc98b96407b16d62bb50f5927bc08cf663333b Mon Sep 17 00:00:00 2001
From: Martin Larralde <martin.larralde@embl.de>
Date: Wed, 9 Oct 2024 02:23:21 +0200
Subject: [PATCH] Implement `__len__` for `HMMPressedFile` using the SSI index

---
 pyhmmer/plan7.pxd                        |  1 +
 pyhmmer/plan7.pyi                        |  1 +
 pyhmmer/plan7.pyx                        |  7 +++++++
 pyhmmer/tests/test_plan7/test_hmmfile.py | 10 ++++++++++
 4 files changed, 19 insertions(+)

diff --git a/pyhmmer/plan7.pxd b/pyhmmer/plan7.pxd
index 2b2e5977..f0200666 100644
--- a/pyhmmer/plan7.pxd
+++ b/pyhmmer/plan7.pxd
@@ -173,6 +173,7 @@ cdef class HMMPressedFile:
     cdef P7_HMMFILE* _hfp
     cdef Alphabet    _alphabet
     cdef HMMFile     _hmmfile
+    cdef size_t      _position
 
     cpdef void close(self) except *
     cpdef void rewind(self) except *
diff --git a/pyhmmer/plan7.pyi b/pyhmmer/plan7.pyi
index 4393079d..e6c67a08 100644
--- a/pyhmmer/plan7.pyi
+++ b/pyhmmer/plan7.pyi
@@ -453,6 +453,7 @@ class HMMPressedFile(typing.Iterator[OptimizedProfile]):
     ) -> bool: ...
     def __iter__(self) -> HMMPressedFile: ...
     def __next__(self) -> OptimizedProfile: ...
+    def __len__(self) -> int: ...
     @property
     def closed(self) -> bool: ...
     @property
diff --git a/pyhmmer/plan7.pyx b/pyhmmer/plan7.pyx
index 2f7e1fc1..9137461c 100644
--- a/pyhmmer/plan7.pyx
+++ b/pyhmmer/plan7.pyx
@@ -3750,6 +3750,7 @@ cdef class HMMPressedFile:
         self._alphabet = None
         self._hmmfile = None
         self._hfp = NULL
+        self._position = 0
 
     def __init__(self, object file):
         """__init__(self, file)\n--\n
@@ -3790,6 +3791,10 @@ cdef class HMMPressedFile:
     def __exit__(self, exc_type, exc_value, traceback):
         self.close()
 
+    def __len__(self):
+        assert self._hfp.ssi != NULL
+        return self._hfp.ssi.nprimary - self._position
+
     # --- Properties ---------------------------------------------------------
 
     @property
@@ -3822,6 +3827,7 @@ cdef class HMMPressedFile:
         """Rewind the file back to the beginning.
         """
         self._hmmfile.rewind()
+        self._position = 0
 
     cpdef OptimizedProfile read(self):
         """Read the next optimized profile from the file.
@@ -3856,6 +3862,7 @@ cdef class HMMPressedFile:
 
         if status == libeasel.eslOK:
             om.alphabet = self._alphabet
+            self._position += 1
             return om
         elif status == libeasel.eslEOF:
             return None
diff --git a/pyhmmer/tests/test_plan7/test_hmmfile.py b/pyhmmer/tests/test_plan7/test_hmmfile.py
index 900aa247..a71fea8f 100644
--- a/pyhmmer/tests/test_plan7/test_hmmfile.py
+++ b/pyhmmer/tests/test_plan7/test_hmmfile.py
@@ -93,6 +93,16 @@ class _TestHMMPath:
         with self.open_hmm(path) as f:
             self.check_hmmfile(f.optimized_profiles())
 
+    def test_optimized_profiles_length(self):
+        path = os.path.join(self.hmms_folder, "db", "{}.hmm".format(self.ID))
+        with self.open_hmm(path) as f:
+            profiles = f.optimized_profiles()
+            self.assertEqual(len(profiles), len(self.NAMES))
+            profiles.read()
+            self.assertEqual(len(profiles), len(self.NAMES) - 1)
+            profiles.rewind()
+            self.assertEqual(len(profiles), len(self.NAMES))
+
     def test_rewind(self):
         path = os.path.join(self.hmms_folder, "txt", "{}.hmm".format(self.ID))
         with self.open_hmm(path) as f:
-- 
GitLab