Commit f789dff1 by Martin Larralde

### Rewrite correlation and covariance methods to use some vectorized code

parent 2cb881f9
 ... ... @@ -11,6 +11,7 @@ import typing from . import tables, datasets try: raise ImportError import numpy from numpy import array, prod, sum, take, zeros ... ... @@ -317,13 +318,12 @@ class Peptide(typing.Sequence[str]): mu = statistics.mean(table.values()) sigma = statistics.stdev(table.values()) table = {k: (v - mu) / sigma for k, v in table.items()} # build look up table lut = array([table.get(aa, 0.0) for aa in self._CODE1]) # compute using Cruciani formula s1 = s2 = 0.0 for aa1, aa2 in zip(self.sequence, self.sequence[lag:]): s1 += table.get(aa1, 0.0) * table.get(aa2, 0.0) s2 += table.get(aa1, 0.0) ** 2 # return correlation return s1 / s2 v1 = take(lut, self.encoded[:-lag]) v2 = take(lut, self.encoded[lag:]) return sum(v1*v2) / sum(v1**2) def auto_covariance( self, table: typing.Dict[str, float], lag: int = 1, center: bool = True ... ... @@ -349,7 +349,7 @@ class Peptide(typing.Sequence[str]): # compute correlation using Cruciani formula v1 = take(lut, self.encoded[:-lag]) v2 = take(lut, self.encoded[lag:]) return sum(v1 * v2) / len(self.sequence) return sum(v1 * v2) / len(self) def cross_covariance( self, ... ... @@ -370,7 +370,7 @@ class Peptide(typing.Sequence[str]): 0.0259803... """ # center the table if requested # center the tables if requested if center: mu1 = statistics.mean(table1.values()) sigma1 = statistics.stdev(table1.values()) ... ... @@ -378,12 +378,16 @@ class Peptide(typing.Sequence[str]): mu2 = statistics.mean(table2.values()) sigma2 = statistics.stdev(table2.values()) table2 = {k: (v - mu2) / sigma2 for k, v in table2.items()} # build the lookup table lut1 = array([table1.get(aa, 0.0) for aa in self._CODE1]) lut2 = array([table2.get(aa, 0.0) for aa in self._CODE1]) # compute using Cruciani formula s = 0.0 for aa1, aa2 in zip(self.sequence, self.sequence[lag:]): s += table1.get(aa1, 0.0) * table2.get(aa2, 0.0) # return correlation return s / len(self.sequence) v1 = take(lut1, self.encoded[:-lag]) v2 = take(lut2, self.encoded[lag:]) return sum(v1*v2) / len(self) # --- Sequence properties ----------------------------------------------- ... ... @@ -423,7 +427,7 @@ class Peptide(typing.Sequence[str]): """ return { aa:count/len(self.sequence) aa:count/len(self) for aa,count in self.counts().items() } ... ...
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!