Commit f9a395ca authored by Martin Larralde's avatar Martin Larralde
Browse files

Record name and accession of query in `TopHits` objects

parent c54273dc
......@@ -305,6 +305,8 @@ cdef class TopHits:
# computed and thresholding can be done correctly.
cdef P7_PIPELINE _pli
cdef P7_TOPHITS* _th
cdef bytes _qname
cdef bytes _qacc
cpdef dict __getstate__(self)
cpdef object __setstate__(self, dict state)
......@@ -313,10 +315,10 @@ cdef class TopHits:
cdef int _sort_by_key(self) nogil except 1
cdef int _sort_by_seqidx(self) nogil except 1
cpdef void sort(self, str by=*) except *
cpdef bint is_sorted(self, str by=*) except *
cpdef int compare_ranking(self, KeyHash) except -1
cpdef TopHits copy(self)
cpdef int compare_ranking(self, KeyHash) except -1
cpdef bint is_sorted(self, str by=*) except *
cpdef void sort(self, str by=*) except *
cpdef MSA to_msa(self, Alphabet alphabet, list sequences=?, list traces=?, bint trim=*, bint digitize=?, bint all_consensus_cols=?)
......
......@@ -509,6 +509,10 @@ class Pipeline(object):
bit_cutoffs: typing.Optional[BIT_CUTOFFS] = None,
) -> None: ...
@property
def query_name(self) -> typing.Optional[bytes]: ...
@property
def query_accession(self) -> typing.Optional[bytes]: ...
@property
def Z(self) -> typing.Optional[float]: ...
@Z.setter
def Z(self, Z: typing.Optional[float]) -> None: ...
......
......@@ -13,7 +13,7 @@ See Also:
# --- C imports --------------------------------------------------------------
cimport cython
from cpython.bytes cimport PyBytes_FromStringAndSize
from cpython.bytes cimport PyBytes_FromStringAndSize, PyBytes_FromString
from cpython.list cimport PyList_New, PyList_SET_ITEM
from cpython.ref cimport PyObject
from cpython.exc cimport PyErr_Clear
......@@ -4821,6 +4821,11 @@ cdef class Pipeline:
hits._sort_by_key()
hits._threshold(self)
# record the query name and accession
hits._qname = PyBytes_FromString(om.name)
if om.acc != NULL:
hits._qacc = PyBytes_FromString(om.acc)
# return the hits
return hits
......@@ -5060,6 +5065,12 @@ cdef class Pipeline:
hmm.alphabet
)
# record the query name
if query._sq.name != NULL:
hits._qname = PyBytes_FromString(query._sq.name)
if query._sq.acc != NULL:
hits._qacc = PyBytes_FromString(query._sq.acc)
# threshold hits
hits._sort_by_key()
hits._threshold(self)
......@@ -6283,6 +6294,8 @@ cdef class TopHits:
def __cinit__(self):
self._th = NULL
self._qname = None
self._qacc = None
memset(&self._pli, 0, sizeof(P7_PIPELINE))
def __init__(self):
......@@ -6346,6 +6359,8 @@ cdef class TopHits:
hits.append(offset)
return {
"qname": self._qname,
"qacc": self._qacc,
"unsrt": unsrt,
"hit": hits,
"Nalloc": self._th.Nalloc,
......@@ -6410,6 +6425,10 @@ cdef class TopHits:
cdef size_t offset
cdef VectorU8 hit_state
# record query name and accession
self._qname = state["qname"]
self._qacc = state["qacc"]
# deallocate current data if needed
if self._th != NULL:
libhmmer.p7_tophits.p7_tophits_Destroy(self._th)
......@@ -6501,6 +6520,24 @@ cdef class TopHits:
# --- Properties ---------------------------------------------------------
@property
def query_name(self):
"""`bytes` or `None`: The name of the query, if any.
.. versionadded:: 0.6.1
"""
return self._qname
@property
def query_accession(self):
"""`bytes` or `None`: The accession of the query, if any.
.. versionadded:: 0.6.1
"""
return self._qacc
@property
def Z(self):
"""`float`: The effective number of targets searched.
......@@ -6735,9 +6772,15 @@ cdef class TopHits:
cdef P7_DOMAIN* dom_copy
cdef TopHits copy = TopHits.__new__(TopHits)
# record query name and accession
copy._qname = self._qname
copy._qacc = self._qacc
# WARN(@althonos): there is no way to do this in the HMMER codebase
# so this is a manual implementation; make sure
# that it stays consistent if P7_TOPHITS changes!
# TODO(@althonos): Replace with `p7_tophits_Clone` as implemented
# in EddyRivasLab/hmmer#273 when formally released.
with nogil:
# copy pipeline configuration
memcpy(&copy._pli, &self._pli, sizeof(P7_PIPELINE))
......@@ -7054,6 +7097,14 @@ cdef class TopHits:
elif self._pli.domZ_setby != p7_zsetby_e.p7_ZSETBY_NTARGETS and self._pli.domZ != other._pli.domZ:
raise ValueError("Trying to merge `TopHits` obtained from pipelines manually configured to different `domZ` values.")
# copy query name and accession if merging into an empty, otherwise
# check that names/accessions are consistent
if merged._th.N == 0:
merged._qname = other._qname
merged._qacc = other._qacc
elif merged._qname != other._qname or merged._qacc != other._qacc:
raise ValueError("Trying to merge `TopHits` obtained from different queries")
# copy hits (`p7_tophits_Merge` effectively destroys the old storage
# but because of Python references we cannot be sure that the data is
# not referenced anywhere else)
......
......@@ -75,6 +75,8 @@ class TestTopHits(unittest.TestCase):
self.assertDomainEqual(d1, d2)
def assertHitsEqual(self, hits1, hits2):
self.assertEqual(hits1.query_name, hits2.query_name)
self.assertEqual(hits1.query_accession, hits2.query_accession)
self.assertEqual(len(hits1), len(hits2))
for h1, h2 in zip(hits1, hits2):
self.assertHitEqual(h1, h2)
......@@ -247,3 +249,9 @@ class TestTopHits(unittest.TestCase):
def test_pickle(self):
pickled = pickle.loads(pickle.dumps(self.hits))
self.assertHitsEqual(pickled, self.hits)
def test_query_name(self):
self.assertEqual(self.hits.query_name, self.hmm.name)
def test_query_accession(self):
self.assertEqual(self.hits.query_accession, self.hmm.accession)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment