Commit c2cdbe6d authored by Martin Larralde's avatar Martin Larralde
Browse files

Add `TopHits.write` method to write hits to a file in tabular format

parent f9a395ca
......@@ -62,8 +62,8 @@ cdef extern from "hmmer.h" nogil:
int p7_tophits_Alignment(const P7_TOPHITS *th, const ESL_ALPHABET *abc,
ESL_SQ **inc_sqarr, P7_TRACE **inc_trarr, int inc_n, int optflags,
ESL_MSA **ret_msa)
int p7_tophits_TabularTargets(FILE *ofp, char *qname, char *qacc, P7_TOPHITS *th, P7_PIPELINE *pli, int show_header)
int p7_tophits_TabularDomains(FILE *ofp, char *qname, char *qacc, P7_TOPHITS *th, P7_PIPELINE *pli, int show_header)
int p7_tophits_TabularTargets(FILE *ofp, char *qname, char *qacc, P7_TOPHITS *th, P7_PIPELINE *pli, int show_header) except *
int p7_tophits_TabularDomains(FILE *ofp, char *qname, char *qacc, P7_TOPHITS *th, P7_PIPELINE *pli, int show_header) except *
int p7_tophits_TabularXfam(FILE *ofp, char *qname, char *qacc, P7_TOPHITS *th, P7_PIPELINE *pli)
int p7_tophits_TabularTail(FILE *ofp, const char *progname, p7_pipemodes_e pipemode,
const char *qfile, const char *tfile, const ESL_GETOPTS *go)
......
......@@ -320,6 +320,7 @@ cdef class TopHits:
cpdef bint is_sorted(self, str by=*) except *
cpdef void sort(self, str by=*) except *
cpdef MSA to_msa(self, Alphabet alphabet, list sequences=?, list traces=?, bint trim=*, bint digitize=?, bint all_consensus_cols=?)
cpdef void write(self, object fh, str format=*, bint header=*) except *
cdef class Trace:
......
......@@ -31,6 +31,7 @@ WEIGHTING = Literal["pb", "gsc", "blosum", "none", "given"]
EFFECTIVE = Literal["entropy", "exp", "clust", "none"]
PRIOR_SCHEME = Literal["laplace", "alphabet"]
STRAND = Literal["watson", "crick"]
HITS_FORMAT = Literal["targets", "domain", "pfam"]
class Alignment(collections.abc.Sized):
domain: Domain
......@@ -509,10 +510,6 @@ class Pipeline(object):
bit_cutoffs: typing.Optional[BIT_CUTOFFS] = None,
) -> None: ...
@property
def query_name(self) -> typing.Optional[bytes]: ...
@property
def query_accession(self) -> typing.Optional[bytes]: ...
@property
def Z(self) -> typing.Optional[float]: ...
@Z.setter
def Z(self, Z: typing.Optional[float]) -> None: ...
......@@ -721,6 +718,10 @@ class TopHits(typing.Sequence[Hit]):
def __getstate__(self) -> typing.Dict[str, object]: ...
def __setstate__(self, state: typing.Dict[str, object]) -> None: ...
@property
def query_name(self) -> typing.Optional[bytes]: ...
@property
def query_accession(self) -> typing.Optional[bytes]: ...
@property
def Z(self) -> float: ...
@property
def domZ(self) -> float: ...
......@@ -775,6 +776,12 @@ class TopHits(typing.Sequence[Hit]):
digitize: bool = False,
all_consensus_cols: bool = False,
) -> MSA: ...
def write(
self,
fh: typing.BinaryIO,
format: HITS_FORMAT = "targets",
header: bool = True,
) -> None: ...
class Trace(object):
def __init__(self, posteriors: bool = False) -> None: ...
......
......@@ -7038,6 +7038,83 @@ cdef class TopHits:
free(sqarr)
free(trarr)
cpdef void write(self, object fh, str format="targets", bint header=True) except *:
"""write(self, fh, format="targets", header=True)\n--
Write the hits in tabular format to a file-like object.
Arguments:
fh (`io.IOBase`): A Python file handle, opened in binary mode.
format (`str`): The tabular format in which to write the hits.
header (`bool`): Whether to write a table header. Ignored
when writing in the ``pfam`` format.
Hint:
The hits can be written in one of the following formats:
``targets``
A tabular output format of per-target hits, as obtained
with the ``--tblout`` output flag of ``hmmsearch`` or
``hmmscan``.
``domains``
A tabular output format of per-domain hits, as obtained
with the ``--domtblout`` output flag of ``hmmsearch`` or
``hmmscan``.
``pfam``
A tabular output format suitable for Pfam, merging per-sequence
and per-domain hits in a single file, with fewer fields and
sorted by score.
.. versionadded:: 0.6.1
"""
cdef FILE* file
cdef str fname
cdef int status
cdef char* unk = b"-"
cdef char* qname = unk if self._qname is None else <char*> self._qname
cdef char* qacc = unk if self._qacc is None else <char*> self._qacc
file = fopen_obj(fh, mode="w")
try:
if format == "targets":
fname = "p7_tophits_TabularTargets"
status = libhmmer.p7_tophits.p7_tophits_TabularTargets(
file,
qname,
qacc,
self._th,
&self._pli,
header
)
elif format == "domains":
fname = "p7_tophits_TabularDomains"
status = libhmmer.p7_tophits.p7_tophits_TabularDomains(
file,
qname,
qacc,
self._th,
&self._pli,
header
)
elif format == "pfam":
fname = "p7_tophits_TabularXfam"
status = libhmmer.p7_tophits.p7_tophits_TabularXfam(
file,
qname,
qacc,
self._th,
&self._pli,
)
else:
raise ValueError("Invalid hits tabular format: {!r}".format(format))
if status != libeasel.eslOK:
raise UnexpectedError(status, fname)
finally:
fclose(file)
def merge(self, *others):
"""merge(self, *others)\n--
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment