Commit 1b0e1aac authored by Martin Larralde's avatar Martin Larralde
Browse files

Add vectors described in Sneath (1966)

parent 86728a7c
......@@ -79,8 +79,9 @@ A non-exhaustive list of available features:
- `FASGAI vectors <https://doi.org/10.1111/j.1747-0285.2008.00641.x>`_
- `Kidera factors <https://doi.org/10.1007/BF01025492>`_
- `MS-WHIM scores <https://doi.org/10.1021/ci980211b>`_
- `PCP descriptors <https://doi.org/10.1007/s00894-001-0058-5>`_.
- `PCP descriptors <https://doi.org/10.1007/s00894-001-0058-5>`_
- `ProtFP descriptors <https://doi.org/10.1186/1758-2946-5-41>`_
- `Sneath vectors <10.1016/0022-5193(66)90112-3>`_
- `ST-scales <https://doi.org/10.1007/s00726-009-0287-y>`_
- `T-scales <https://doi.org/10.1016/j.molstruc.2006.07.004>`_
- `VHSE-scales <https://doi.org/10.1002/bip.20296>`_
......
......@@ -93,6 +93,13 @@ class ProtFPDescriptors(typing.NamedTuple):
protfp8: float
class SneathVectors(typing.NamedTuple):
sv1: float
sv2: float
sv3: float
sv4: float
class STScales(typing.NamedTuple):
st1: float
st2: float
......@@ -1601,6 +1608,48 @@ class Peptide(typing.Sequence[str]):
)
return ProtFPDescriptors(*out)
def sneath_vectors(self) -> SneathVectors:
"""Compute the Sneath vectors for a protein sequence.
These vectors were obtained in Sneath (1996) by running PCA on the
`ϕ coefficient <https://en.wikipedia.org/wiki/Phi_coefficient>`_
to explain the dissimilarity between the 20 natural amino acids
based on binary state encoding of 134 physical and chemical
properties (such as presence/absence of a —CH₃ group, step-wise
optical rotation, etc.).
Returns:
`peptides.SneathVectors`: The computed average of Sneath vectors
of all the amino acids in the peptide. *SV1* appears to
represent mainly aliphatic properties, *SV2* may model the
number of reactive groups, *SV3* the aromatic properties, but
*SV4* has no certain interpretation.
Example:
>>> peptide = Peptide("QWGRRCCGWGPGRRYCVRWC")
>>> for i, fp in enumerate(peptide.sneath_vectors()):
... print(f"SV{i+1:<3} {fp: .4f}")
SV1 0.1962
SV2 0.0466
SV3 0.0405
SV4 0.0277
References:
- Sneath, P. H. A.
*Relations between Chemical Structure and Biological Activity
in Peptides*.
Journal of Theoretical Biology. Nov 1996;12(2):157–95.
doi:10.1016/0022-5193(66)90112-3. PMID:4291386.
"""
out = array.array("d")
for i in range(len(tables.SNEATH)):
scale = tables.SNEATH[f"SV{i+1}"]
out.append(
sum(scale.get(aa, 0) for aa in self.sequence) / len(self.sequence)
)
return SneathVectors(*out)
def st_scales(self) -> STScales:
"""Compute the ST-scales of a protein sequence.
......@@ -1775,7 +1824,9 @@ class Peptide(typing.Sequence[str]):
"F": fasgai_vectors,
"KF": kidera_factors,
"MSWHIM": ms_whim_scores,
"E": pcp_descriptors,
"ProtFP": protfp_descriptors,
"SV": sneath_vectors,
"ST": st_scales,
"T": t_scales,
"VHSE": vhse_scales,
......
A,0.239
R,0.211
N,0.249
D,0.171
C,0.220
Q,0.187
E,0.260
G,0.160
H,0.205
I,0.273
L,0.281
K,0.228
M,0.253
F,0.234
P,0.165
S,0.236
T,0.213
W,0.183
Y,0.193
V,0.255
A,0.330
R,-0.176
N,-0.233
D,-0.371
C,0.074
Q,-0.409
E,-0.254
G,0.370
H,-0.078
I,0.149
L,0.129
K,-0.075
M,-0.092
F,-0.011
P,0.370
S,0.022
T,0.136
W,-0.011
Y,-0.138
V,0.245
A,-0.110
R,0.079
N,-0.136
D,-0.285
C,-0.184
Q,-0.246
E,-0.067
G,-0.073
H,0.320
I,0.001
L,-0.008
K,0.049
M,-0.041
F,0.438
P,-0.016
S,-0.153
T,-0.208
W,0.493
Y,0.381
V,-0.155
A,-0.062
R,-0.167
N,0.166
D,-0.079
C,0.380
Q,-0.184
E,-0.025
G,-0.017
H,0.056
I,-0.309
L,-0.264
K,-0.371
M,0.077
F,0.074
P,-0.036
S,0.470
T,0.348
W,0.050
Y,0.220
V,-0.212
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment