Commit 6af67fc7 authored by Martin Larralde's avatar Martin Larralde
Browse files

Rewrite `auto_correlation` and `hydrophobicity` using vectorized code

parent 6c4d5a78
......@@ -344,12 +344,12 @@ class Peptide(typing.Sequence[str]):
mu = statistics.mean(table.values())
sigma = statistics.stdev(table.values())
table = {k: (v - mu) / sigma for k, v in table.items()}
# compute using Cruciani formula
s = 0.0
for aa1, aa2 in zip(self.sequence, self.sequence[lag:]):
s += table.get(aa1, 0.0) * table.get(aa2, 0.0)
# return correlation
return s / len(self.sequence)
# build the lookup table
lut = array([table.get(aa, 0.0) for aa in self._CODE1])
# compute correlation using Cruciani formula
v1 = take(lut, self.encoded[:-lag])
v2 = take(lut, self.encoded[lag:])
return sum(v1 * v2) / len(self.sequence)
def cross_covariance(
self,
......@@ -903,7 +903,8 @@ class Peptide(typing.Sequence[str]):
table = tables.HYDROPHOBICITY.get(scale)
if table is None:
raise ValueError(f"Invalid hydrophobicity scale: {scale!r}")
return sum(table.get(aa, 0.0) for aa in self.sequence) / len(self.sequence)
lut = array([table.get(aa, 0.0) for aa in self._CODE1], dtype="d")
return sumtake(lut, self.encoded) / len(self.sequence)
def instability_index(self) -> float:
"""Compute the instability index of a protein sequence.
......
......@@ -11,6 +11,16 @@ class array(array.array):
def __new__(cls, values, dtype='f'):
return super().__new__(cls, dtype, values)
def __mul__(self, other):
if not isinstance(other, array):
return NotImplemented
if len(self) != len(other):
raise ValueError("Cannot pairwise multiply arrays of different lengths")
return array(
[x1 * x2 for x1, x2 in zip(self, other)],
dtype=self.typecode
)
def take(a, indices):
"""Take elements from an array.
......
......@@ -53,17 +53,12 @@ def load_tests(loader, tests, ignore):
if sys.argv[0].endswith("green"):
return tests
# doctests require `numpy` to run, which may not be available because
# it is a pain to get to work out-of-the-box on OSX
# if numpy is None:
# return tests
# recursively traverse all library submodules and load tests from them
packages = [None, peptides]
for pkg in iter(packages.pop, None):
# import the base module and add it to the tests
globs = dict(numpy=numpy, peptides=peptides, **pkg.__dict__)
globs = dict(peptides=peptides, **pkg.__dict__)
tests.addTests(
doctest.DocTestSuite(
pkg,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment