From f4f81a18c5f6c05558fe539261f42e43c5202125 Mon Sep 17 00:00:00 2001 From: Martin Larralde <martin.larralde@embl.de> Date: Tue, 8 Oct 2024 20:59:11 +0200 Subject: [PATCH] Fix detection of alphabet from arbitrary queries in `phmmer` and `hmmscan` --- pyhmmer/hmmer.py | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/pyhmmer/hmmer.py b/pyhmmer/hmmer.py index 05c511c0..fa1e1fe9 100644 --- a/pyhmmer/hmmer.py +++ b/pyhmmer/hmmer.py @@ -995,14 +995,13 @@ def phmmer( alphabet = alphabet or sequences.alphabet targets = sequences else: - alphabet = alphabet or Alphabet.amino() - targets = DigitalSequenceBlock(alphabet, sequences) - - if builder is None: - builder = Builder( - alphabet, - seed=options.get("seed", 42) - ) + sequences = peekable(sequences) + try: + alphabet = alphabet or sequences.peek().alphabet or Alphabet.amino() + targets = DigitalSequenceBlock(alphabet, sequences) + except StopIteration: + alphabet = alphabet or Alphabet.amino() + targets = DigitalSequenceBlock(alphabet) if "alphabet" not in options: options["alphabet"] = alphabet @@ -1170,8 +1169,13 @@ def jackhmmer( alphabet = alphabet or sequences.alphabet targets = sequences else: - alphabet = alphabet or Alphabet.amino() - targets = DigitalSequenceBlock(alphabet, sequences) + sequences = peekable(sequences) + try: + alphabet = alphabet or sequences.peek().alphabet or Alphabet.amino() + targets = DigitalSequenceBlock(alphabet, sequences) + except StopIteration: + alphabet = alphabet or Alphabet.amino() + targets = DigitalSequenceBlock(alphabet) if builder is None: builder = Builder( @@ -1513,7 +1517,12 @@ def hmmscan( if not isinstance(queries, collections.abc.Iterable): queries = (queries,) if isinstance(profiles, HMMPressedFile): - alphabet = alphabet or Alphabet.amino() # FIXME: try to detect alphabet? + opt = profiles.read() + profiles.rewind() + if opt is not None: + alphabet = alphabet or opt.alphabet + else: + alphabet = Alphabet.amino() targets = profiles elif isinstance(profiles, OptimizedProfileBlock): alphabet = alphabet or profiles.alphabet -- GitLab