Commit 4a0a18d4 by Martin Larralde

### Rewrite correlation and covariance methods to use some vectorized code

parent 724c30f3
 ... @@ -11,6 +11,7 @@ import typing ... @@ -11,6 +11,7 @@ import typing from . import tables, datasets from . import tables, datasets try: try: raise ImportError import numpy import numpy from numpy import array, prod, sum, take, zeros from numpy import array, prod, sum, take, zeros ... @@ -317,13 +318,12 @@ class Peptide(typing.Sequence[str]): ... @@ -317,13 +318,12 @@ class Peptide(typing.Sequence[str]): mu = statistics.mean(table.values()) mu = statistics.mean(table.values()) sigma = statistics.stdev(table.values()) sigma = statistics.stdev(table.values()) table = {k: (v - mu) / sigma for k, v in table.items()} table = {k: (v - mu) / sigma for k, v in table.items()} # build look up table lut = array([table.get(aa, 0.0) for aa in self._CODE1]) # compute using Cruciani formula # compute using Cruciani formula s1 = s2 = 0.0 v1 = take(lut, self.encoded[:-lag]) for aa1, aa2 in zip(self.sequence, self.sequence[lag:]): v2 = take(lut, self.encoded[lag:]) s1 += table.get(aa1, 0.0) * table.get(aa2, 0.0) return sum(v1*v2) / sum(v1**2) s2 += table.get(aa1, 0.0) ** 2 # return correlation return s1 / s2 def auto_covariance( def auto_covariance( self, table: typing.Dict[str, float], lag: int = 1, center: bool = True self, table: typing.Dict[str, float], lag: int = 1, center: bool = True ... @@ -349,7 +349,7 @@ class Peptide(typing.Sequence[str]): ... @@ -349,7 +349,7 @@ class Peptide(typing.Sequence[str]): # compute correlation using Cruciani formula # compute correlation using Cruciani formula v1 = take(lut, self.encoded[:-lag]) v1 = take(lut, self.encoded[:-lag]) v2 = take(lut, self.encoded[lag:]) v2 = take(lut, self.encoded[lag:]) return sum(v1 * v2) / len(self.sequence) return sum(v1 * v2) / len(self) def cross_covariance( def cross_covariance( self, self, ... @@ -370,7 +370,7 @@ class Peptide(typing.Sequence[str]): ... @@ -370,7 +370,7 @@ class Peptide(typing.Sequence[str]): 0.0259803... 0.0259803... """ """ # center the table if requested # center the tables if requested if center: if center: mu1 = statistics.mean(table1.values()) mu1 = statistics.mean(table1.values()) sigma1 = statistics.stdev(table1.values()) sigma1 = statistics.stdev(table1.values()) ... @@ -378,12 +378,16 @@ class Peptide(typing.Sequence[str]): ... @@ -378,12 +378,16 @@ class Peptide(typing.Sequence[str]): mu2 = statistics.mean(table2.values()) mu2 = statistics.mean(table2.values()) sigma2 = statistics.stdev(table2.values()) sigma2 = statistics.stdev(table2.values()) table2 = {k: (v - mu2) / sigma2 for k, v in table2.items()} table2 = {k: (v - mu2) / sigma2 for k, v in table2.items()} # build the lookup table lut1 = array([table1.get(aa, 0.0) for aa in self._CODE1]) lut2 = array([table2.get(aa, 0.0) for aa in self._CODE1]) # compute using Cruciani formula # compute using Cruciani formula s = 0.0 v1 = take(lut1, self.encoded[:-lag]) for aa1, aa2 in zip(self.sequence, self.sequence[lag:]): v2 = take(lut2, self.encoded[lag:]) s += table1.get(aa1, 0.0) * table2.get(aa2, 0.0) return sum(v1*v2) / len(self) # return correlation return s / len(self.sequence) # --- Sequence properties ----------------------------------------------- # --- Sequence properties ----------------------------------------------- ... @@ -423,7 +427,7 @@ class Peptide(typing.Sequence[str]): ... @@ -423,7 +427,7 @@ class Peptide(typing.Sequence[str]): """ """ return { return { aa:count/len(self.sequence) aa:count/len(self) for aa,count in self.counts().items() for aa,count in self.counts().items() } } ... ...
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!