From 8b41a59a7d451dbaf23124b3992477a2d7d244a7 Mon Sep 17 00:00:00 2001
From: Martin Larralde <martin.larralde@embl.de>
Date: Thu, 6 Jun 2024 15:09:39 +0200
Subject: [PATCH] Make `HMMFile` and `HMMPressedFile` raise `AlphabetMismatch`
 on files with mixed alphabets

---
 pyhmmer/easel.pyx |  2 +-
 pyhmmer/plan7.pyx | 13 +++++++++----
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/pyhmmer/easel.pyx b/pyhmmer/easel.pyx
index ee243d4f..5f87047a 100644
--- a/pyhmmer/easel.pyx
+++ b/pyhmmer/easel.pyx
@@ -3716,7 +3716,7 @@ cdef class TextMSA(MSA):
             return new
         elif status == libeasel.eslEINVAL:
             err_msg = errbuf.decode("utf-8", "replace")
-            raise ValueError(f"Cannot digitize MSA with alphabet {alphabet}: {err_msg}")
+            raise ValueError(f"Cannot digitize MSA with {alphabet.type} alphabet: {err_msg}")
         else:
             raise UnexpectedError(status, "esl_msa_Digitize")
 
diff --git a/pyhmmer/plan7.pyx b/pyhmmer/plan7.pyx
index b5cf9316..6f8b873e 100644
--- a/pyhmmer/plan7.pyx
+++ b/pyhmmer/plan7.pyx
@@ -3610,6 +3610,9 @@ cdef class HMMFile:
                 file, or when the file could not be parsed.
             `~pyhmmer.errors.AllocationError`: When memory for the HMM could
                 not be allocated successfully.
+            `~pyhmmer.errors.AlphabetMismatch`: When the file contains HMMs
+                in different alphabets, or in an alphabet that is different
+                from the alphabet used to initialize the `HMMFile`.
 
         .. versionadded:: 0.4.11
 
@@ -3638,8 +3641,7 @@ cdef class HMMFile:
         elif status == libeasel.eslEFORMAT:
             raise ValueError("Invalid format in file: {}".format(self._hfp.errbuf.decode("utf-8", "replace")))
         elif status == libeasel.eslEINCOMPAT:
-            alphabet = libeasel.alphabet.esl_abc_DecodeType(self._alphabet.type)
-            raise ValueError("HMM is not in the expected {} alphabet".format(alphabet))
+            raise AlphabetMismatch(self._alphabet)
         else:
             _reraise_error()
             raise UnexpectedError(status, "p7_hmmfile_Read")
@@ -3818,6 +3820,10 @@ cdef class HMMPressedFile:
                 closed file, or when the file could not be parsed.
             `~pyhmmer.errors.AllocationError`: When memory for the
                 `OptimizedProfile` could not be allocated successfully.
+            `~pyhmmer.errors.AlphabetMismatch`: When the file contains
+                optimized profiles in different alphabets, or in an alphabet
+                that is different from the alphabet used to initialize the
+                `HMMFile`.
 
         .. versionadded:: 0.4.11
 
@@ -3845,8 +3851,7 @@ cdef class HMMPressedFile:
         elif status == libeasel.eslEFORMAT:
             raise ValueError("Invalid format in file: {}".format(self._hfp.errbuf.decode("utf-8", "replace")))
         elif status == libeasel.eslEINCOMPAT:
-            alphabet = libeasel.alphabet.esl_abc_DecodeType(self._alphabet.type)
-            raise ValueError("HMM is not in the expected {} alphabet".format(alphabet))
+            raise AlphabetMismatch(self._alphabet)
         else:
             _reraise_error()
             raise UnexpectedError(status, "p7_oprofile_ReadMSV")
-- 
GitLab