From f4f81a18c5f6c05558fe539261f42e43c5202125 Mon Sep 17 00:00:00 2001
From: Martin Larralde <martin.larralde@embl.de>
Date: Tue, 8 Oct 2024 20:59:11 +0200
Subject: [PATCH] Fix detection of alphabet from arbitrary queries in `phmmer`
 and `hmmscan`

---
 pyhmmer/hmmer.py | 31 ++++++++++++++++++++-----------
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/pyhmmer/hmmer.py b/pyhmmer/hmmer.py
index 05c511c0..fa1e1fe9 100644
--- a/pyhmmer/hmmer.py
+++ b/pyhmmer/hmmer.py
@@ -995,14 +995,13 @@ def phmmer(
         alphabet = alphabet or sequences.alphabet
         targets = sequences
     else:
-        alphabet = alphabet or Alphabet.amino()
-        targets = DigitalSequenceBlock(alphabet, sequences)
-
-    if builder is None:
-        builder = Builder(
-            alphabet, 
-            seed=options.get("seed", 42)
-        )
+        sequences = peekable(sequences)
+        try:
+            alphabet = alphabet or sequences.peek().alphabet or Alphabet.amino()
+            targets = DigitalSequenceBlock(alphabet, sequences)
+        except StopIteration:
+            alphabet = alphabet or Alphabet.amino()
+            targets = DigitalSequenceBlock(alphabet)
 
     if "alphabet" not in options:
         options["alphabet"] = alphabet
@@ -1170,8 +1169,13 @@ def jackhmmer(
         alphabet = alphabet or sequences.alphabet
         targets = sequences
     else:
-        alphabet = alphabet or Alphabet.amino()
-        targets = DigitalSequenceBlock(alphabet, sequences)
+        sequences = peekable(sequences)
+        try:
+            alphabet = alphabet or sequences.peek().alphabet or Alphabet.amino()
+            targets = DigitalSequenceBlock(alphabet, sequences)
+        except StopIteration:
+            alphabet = alphabet or Alphabet.amino()
+            targets = DigitalSequenceBlock(alphabet)
 
     if builder is None:
         builder = Builder(
@@ -1513,7 +1517,12 @@ def hmmscan(
     if not isinstance(queries, collections.abc.Iterable):
         queries = (queries,)
     if isinstance(profiles, HMMPressedFile):
-        alphabet = alphabet or Alphabet.amino() # FIXME: try to detect alphabet?
+        opt = profiles.read()
+        profiles.rewind()
+        if opt is not None:
+            alphabet = alphabet or opt.alphabet
+        else:
+            alphabet = Alphabet.amino()
         targets = profiles
     elif isinstance(profiles, OptimizedProfileBlock):
         alphabet = alphabet or profiles.alphabet
-- 
GitLab