Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Martin Larralde
peptides.py
Commits
6c4d5a78
Commit
6c4d5a78
authored
Oct 25, 2021
by
Martin Larralde
Browse files
Rewrite some `Peptide` methods to use `numpy` or pseudovectorization with `array.array`
parent
51b0632e
Changes
2
Hide whitespace changes
Inline
Side-by-side
peptides/__init__.py
View file @
6c4d5a78
...
...
@@ -8,15 +8,24 @@ import random
import
statistics
import
typing
from
.
import
tables
,
datasets
try
:
from
numpy
import
prod
,
sum
import
numpy
from
numpy
import
array
,
prod
,
sum
,
take
,
zeros
def
sumtake
(
a
,
indices
):
return
numpy
.
sum
(
numpy
.
take
(
a
,
indices
))
except
ImportError
:
from
._npcompat
import
prod
numpy
=
None
from
._npcompat
import
array
,
prod
,
take
,
zeros
from
.
import
tables
,
datasets
def
sumtake
(
a
,
indices
):
return
sum
(
map
(
a
.
__getitem__
,
indices
))
__all__
=
[
"Peptide"
,
"tables"
,
"datasets"
]
__all__
=
[
"Peptide"
,
"tables"
,
"datasets"
]
__version__
=
"0.1.0"
__author__
=
"Martin Larralde <martin.larralde@embl.de>"
__license__
=
"GPLv3"
...
...
@@ -239,7 +248,13 @@ class Peptide(typing.Sequence[str]):
in some methods, but not all of them.
"""
# store the sequence in text format
self
.
sequence
:
str
=
sequence
# store an encoded version of the sequence as an array of indices
encoder
=
{
aa
:
i
for
i
,
aa
in
enumerate
(
self
.
_CODE1
)}
self
.
encoded
:
array
=
zeros
(
len
(
sequence
),
dtype
=
'B'
)
for
i
,
aa
in
enumerate
(
sequence
):
self
.
encoded
[
i
]
=
encoder
.
get
(
aa
,
encoder
[
"X"
])
def
__len__
(
self
)
->
int
:
return
len
(
self
.
sequence
)
...
...
@@ -479,8 +494,8 @@ class Peptide(typing.Sequence[str]):
:doi:`10.1046/j.1365-2796.2003.01228.x`. :pmid:`12930229`.
"""
scale
=
tables
.
BOMAN
[
"Boman"
]
return
-
sum
(
scale
.
get
(
aa
,
0.0
)
for
aa
in
self
.
sequence
)
/
len
(
self
.
sequence
)
lut
=
array
([
tables
.
BOMAN
[
"Boman"
]
.
get
(
aa
,
0.0
)
for
aa
in
self
.
_CODE1
])
return
-
sum
take
(
lut
,
self
.
encoded
)
/
len
(
self
.
sequence
)
def
charge
(
self
,
pH
:
float
=
7
,
pKscale
:
str
=
"Lehninger"
)
->
float
:
"""Compute the theoretical net charge of a peptide sequence.
...
...
@@ -1002,9 +1017,9 @@ class Peptide(typing.Sequence[str]):
Example:
>>> peptide = Peptide("EGVNDNECEGFFSAR")
>>> peptide.mass_shift(aa_shift="silac_13c")
6.020129
6.020129
...
>>> peptide.mass_shift(aa_shift=dict(R=10.00827))
10.00827
10.00827
...
References:
- Ong, S-E., I. Kratchmarova, and M. Mann.
...
...
@@ -1040,9 +1055,8 @@ class Peptide(typing.Sequence[str]):
f
"Expected str or dict, found
{
aa_shift
.
__class__
.
__name__
}
"
)
s
=
scale
.
get
(
"nTer"
,
0.0
)
+
scale
.
get
(
"cTer"
,
0.0
)
s
+=
sum
(
scale
.
get
(
aa
,
0.0
)
for
aa
in
self
.
sequence
)
return
s
lut
=
array
([
scale
.
get
(
aa
,
0.0
)
for
aa
in
self
.
_CODE1
])
return
sumtake
(
lut
,
self
.
encoded
)
+
scale
.
get
(
"nTer"
,
0.0
)
+
scale
.
get
(
"cTer"
,
0.0
)
def
molecular_weight
(
self
,
...
...
@@ -1088,8 +1102,10 @@ class Peptide(typing.Sequence[str]):
if
scale
is
None
:
raise
ValueError
(
f
"Invalid average weight scale:
{
average
!r}
"
)
# build a look-up table
lut
=
array
([
scale
.
get
(
aa
,
0.0
)
for
aa
in
self
.
_CODE1
])
# sum the weight of each amino acid
mass
=
sum
(
scale
.
get
(
aa
,
0.0
)
for
aa
in
self
.
sequence
)
mass
=
sum
take
(
lut
,
self
.
encoded
)
# add weight of water molecules
mass
+=
scale
[
"H2O"
]
# add mass shift for labeled proteins
...
...
@@ -1165,9 +1181,9 @@ class Peptide(typing.Sequence[str]):
if
scale
not
in
tables
.
HYDROPHOBICITY
:
raise
ValueError
(
f
"Invalid hydrophobicity scale:
{
scale
!r}
"
)
profile
=
array
.
array
(
"d"
)
for
i
in
range
(
len
(
self
.
sequence
)
-
window
+
1
):
profile
.
append
(
self
[
i
:
i
+
window
].
hydrophobicity
(
scale
=
scale
)
)
profile
=
zeros
(
len
(
self
.
sequence
)
-
window
+
1
)
for
i
in
range
(
len
(
profile
)
):
profile
[
i
]
=
self
[
i
:
i
+
window
].
hydrophobicity
(
scale
=
scale
)
return
profile
...
...
@@ -1196,9 +1212,9 @@ class Peptide(typing.Sequence[str]):
maximal hydrophobic moment instead of building a profile.
"""
profile
=
array
.
array
(
"d"
)
for
i
in
range
(
len
(
self
.
sequence
)
-
window
+
1
):
profile
.
append
(
profile
=
zeros
(
len
(
self
.
sequence
)
-
window
+
1
)
for
i
in
range
(
len
(
profile
)
):
profile
[
i
]
=
(
self
[
i
:
i
+
window
].
hydrophobic_moment
(
window
=
window
-
1
,
angle
=
angle
)
)
...
...
@@ -1508,10 +1524,13 @@ class Peptide(typing.Sequence[str]):
:doi:`10.1089/cmb.2008.0173`. :pmid:`19432540`.
"""
out
=
array
.
array
(
"d"
)
out
=
[]
for
i
in
range
(
len
(
tables
.
BLOSUM
)):
# build a look-up table
scale
=
tables
.
BLOSUM
[
f
"BLOSUM
{
i
+
1
}
"
]
out
.
append
(
sum
(
scale
.
get
(
aa
,
0.0
)
for
aa
in
self
.
sequence
)
/
len
(
self
.
sequence
))
lut
=
array
([
scale
.
get
(
aa
,
0.0
)
for
aa
in
self
.
_CODE1
])
# average the weight of each amino acid
out
.
append
(
sumtake
(
lut
,
self
.
encoded
)
/
len
(
self
))
return
BLOSUMIndices
(
*
out
)
def
cruciani_properties
(
self
)
->
CrucianiProperties
:
...
...
@@ -1545,10 +1564,13 @@ class Peptide(typing.Sequence[str]):
:doi:`10.1002/cem.856`.
"""
out
=
array
.
array
(
"d"
)
out
=
[]
for
i
in
range
(
len
(
tables
.
CRUCIANI
)):
# build a look-up table
scale
=
tables
.
CRUCIANI
[
f
"PP
{
i
+
1
}
"
]
out
.
append
(
sum
(
scale
.
get
(
aa
,
0.0
)
for
aa
in
self
.
sequence
)
/
len
(
self
.
sequence
))
lut
=
array
([
scale
.
get
(
aa
,
0.0
)
for
aa
in
self
.
_CODE1
])
# average the weight of each amino acid
out
.
append
(
sumtake
(
lut
,
self
.
encoded
)
/
len
(
self
))
return
CrucianiProperties
(
*
out
)
def
fasgai_vectors
(
self
)
->
FasgaiVectors
:
...
...
@@ -1585,10 +1607,13 @@ class Peptide(typing.Sequence[str]):
:doi:`10.1111/j.1747-0285.2008.00641.x`. :pmid:`18318694`.
"""
out
=
array
.
array
(
"d"
)
out
=
[]
for
i
in
range
(
len
(
tables
.
FASGAI
)):
# build a look-up table
scale
=
tables
.
FASGAI
[
f
"F
{
i
+
1
}
"
]
out
.
append
(
sum
(
scale
.
get
(
aa
,
0.0
)
for
aa
in
self
.
sequence
)
/
len
(
self
.
sequence
))
lut
=
array
([
scale
.
get
(
aa
,
0.0
)
for
aa
in
self
.
_CODE1
])
# average the weight of each amino acid
out
.
append
(
sumtake
(
lut
,
self
.
encoded
)
/
len
(
self
))
return
FasgaiVectors
(
*
out
)
def
kidera_factors
(
self
)
->
KideraFactors
:
...
...
@@ -1630,12 +1655,13 @@ class Peptide(typing.Sequence[str]):
:doi:`10.1007/BF01025492`.
"""
out
=
array
.
array
(
"d"
)
out
=
[]
for
i
in
range
(
len
(
tables
.
KIDERA
)):
# build a look-up table
scale
=
tables
.
KIDERA
[
f
"KF
{
i
+
1
}
"
]
o
ut
.
append
(
sum
(
scale
.
get
(
aa
,
0.0
)
for
aa
in
self
.
sequence
)
/
len
(
self
.
sequence
)
)
l
ut
=
array
([
scale
.
get
(
aa
,
0.0
)
for
aa
in
self
.
_CODE1
])
# average the weight of each amino acid
out
.
append
(
sumtake
(
lut
,
self
.
encoded
)
/
len
(
self
)
)
return
KideraFactors
(
*
out
)
def
ms_whim_scores
(
self
)
->
MSWHIMScores
:
...
...
@@ -1676,12 +1702,13 @@ class Peptide(typing.Sequence[str]):
:doi:`10.1021/ci980211b`.
"""
out
=
array
.
array
(
"d"
)
out
=
[]
for
i
in
range
(
len
(
tables
.
MSWHIM
)):
# build a look-up table
scale
=
tables
.
MSWHIM
[
f
"MSWHIM
{
i
+
1
}
"
]
o
ut
.
append
(
sum
(
scale
.
get
(
aa
,
0
)
for
aa
in
self
.
sequence
)
/
len
(
self
.
sequence
)
)
l
ut
=
array
([
scale
.
get
(
aa
,
0.0
)
for
aa
in
self
.
_CODE1
])
# average the weight of each amino acid
out
.
append
(
sumtake
(
lut
,
self
.
encoded
)
/
len
(
self
)
)
return
MSWHIMScores
(
*
out
)
def
pcp_descriptors
(
self
)
->
PCPDescriptors
:
...
...
@@ -1697,12 +1724,12 @@ class Peptide(typing.Sequence[str]):
Example:
>>> peptide = Peptide("QWGRRCCGWGPGRRYCVRWC")
>>> for i, pcp in enumerate(peptide.pcp_descriptors()):
... print(f"E{i+1:<3} {pcp: .
4
f}")
E1 0.0109
E2 0.0381
E3 0.1250
E4 0.0409
E5 -0.1059
... print(f"E{i+1:<3} {pcp: .
5
f}")
E1 0.0109
0
E2 0.0381
0
E3 0.1250
5
E4 0.0409
5
E5 -0.1059
5
References:
- Mathura, V. S., and W. Braun.
...
...
@@ -1718,12 +1745,13 @@ class Peptide(typing.Sequence[str]):
:doi:`10.2174/092986609788923220`. :pmid:`19689427`.
"""
out
=
array
.
array
(
"d"
)
out
=
[]
for
i
in
range
(
len
(
tables
.
PCP_DESCRIPTORS
)):
# build a look-up table
scale
=
tables
.
PCP_DESCRIPTORS
[
f
"E
{
i
+
1
}
"
]
o
ut
.
append
(
sum
(
scale
.
get
(
aa
,
0
)
for
aa
in
self
.
sequence
)
/
len
(
self
.
sequence
)
)
l
ut
=
array
([
scale
.
get
(
aa
,
0.0
)
for
aa
in
self
.
_CODE1
])
# average the weight of each amino acid
out
.
append
(
sumtake
(
lut
,
self
.
encoded
)
/
len
(
self
)
)
return
PCPDescriptors
(
*
out
)
def
physical_descriptors
(
self
)
->
PhysicalDescriptors
:
...
...
@@ -1765,12 +1793,13 @@ class Peptide(typing.Sequence[str]):
:doi:`10.1002/chem.201103811`. :pmid:`22434591`.
"""
out
=
array
.
array
(
"d"
)
out
=
[]
for
i
in
range
(
len
(
tables
.
PHYSICAL_DESCRIPTORS
)):
# build a look-up table
scale
=
tables
.
PHYSICAL_DESCRIPTORS
[
f
"PD
{
i
+
1
}
"
]
o
ut
.
append
(
sum
(
scale
.
get
(
aa
,
0
)
for
aa
in
self
.
sequence
)
/
len
(
self
.
sequence
)
)
l
ut
=
array
([
scale
.
get
(
aa
,
0.0
)
for
aa
in
self
.
_CODE1
])
# average the weight of each amino acid
out
.
append
(
sumtake
(
lut
,
self
.
encoded
)
/
len
(
self
)
)
return
PhysicalDescriptors
(
*
out
)
def
protfp_descriptors
(
self
)
->
ProtFPDescriptors
:
...
...
@@ -1814,12 +1843,13 @@ class Peptide(typing.Sequence[str]):
:doi:`10.1186/1758-2946-5-42`. :pmid:`24059743`.
"""
out
=
array
.
array
(
"d"
)
out
=
[]
for
i
in
range
(
len
(
tables
.
PROTFP
)):
# build a look-up table
scale
=
tables
.
PROTFP
[
f
"ProtFP
{
i
+
1
}
"
]
o
ut
.
append
(
sum
(
scale
.
get
(
aa
,
0
)
for
aa
in
self
.
sequence
)
/
len
(
self
.
sequence
)
)
l
ut
=
array
([
scale
.
get
(
aa
,
0.0
)
for
aa
in
self
.
_CODE1
])
# average the weight of each amino acid
out
.
append
(
sumtake
(
lut
,
self
.
encoded
)
/
len
(
self
)
)
return
ProtFPDescriptors
(
*
out
)
def
sneath_vectors
(
self
)
->
SneathVectors
:
...
...
@@ -1846,7 +1876,7 @@ class Peptide(typing.Sequence[str]):
SV1 0.1962
SV2 0.0466
SV3 0.0405
SV4 0.027
7
SV4 0.027
8
References:
- Sneath, P. H. A.
...
...
@@ -1856,12 +1886,13 @@ class Peptide(typing.Sequence[str]):
:doi:`10.1016/0022-5193(66)90112-3`. :pmid:`4291386`.
"""
out
=
array
.
array
(
"d"
)
out
=
[]
for
i
in
range
(
len
(
tables
.
SNEATH
)):
# build a look-up table
scale
=
tables
.
SNEATH
[
f
"SV
{
i
+
1
}
"
]
o
ut
.
append
(
sum
(
scale
.
get
(
aa
,
0
)
for
aa
in
self
.
sequence
)
/
len
(
self
.
sequence
)
)
l
ut
=
array
([
scale
.
get
(
aa
,
0.0
)
for
aa
in
self
.
_CODE1
])
# average the weight of each amino acid
out
.
append
(
sumtake
(
lut
,
self
.
encoded
)
/
len
(
self
)
)
return
SneathVectors
(
*
out
)
def
st_scales
(
self
)
->
STScales
:
...
...
@@ -1897,12 +1928,13 @@ class Peptide(typing.Sequence[str]):
:doi:`10.1007/s00726-009-0287-y`. :pmid:`19373543`.
"""
out
=
array
.
array
(
"d"
)
out
=
[]
for
i
in
range
(
len
(
tables
.
ST_SCALES
)):
# build a look-up table
scale
=
tables
.
ST_SCALES
[
f
"ST
{
i
+
1
}
"
]
o
ut
.
append
(
sum
(
scale
.
get
(
aa
,
0
)
for
aa
in
self
.
sequence
)
/
len
(
self
.
sequence
)
)
l
ut
=
array
([
scale
.
get
(
aa
,
0.0
)
for
aa
in
self
.
_CODE1
])
# average the weight of each amino acid
out
.
append
(
sumtake
(
lut
,
self
.
encoded
)
/
len
(
self
)
)
return
STScales
(
*
out
)
def
t_scales
(
self
)
->
TScales
:
...
...
@@ -1935,12 +1967,13 @@ class Peptide(typing.Sequence[str]):
:doi:`10.1016/j.molstruc.2006.07.004`.
"""
out
=
array
.
array
(
"d"
)
out
=
[]
for
i
in
range
(
len
(
tables
.
T_SCALES
)):
# build a look-up table
scale
=
tables
.
T_SCALES
[
f
"T
{
i
+
1
}
"
]
o
ut
.
append
(
sum
(
scale
.
get
(
aa
,
0
)
for
aa
in
self
.
sequence
)
/
len
(
self
.
sequence
)
)
l
ut
=
array
([
scale
.
get
(
aa
,
0.0
)
for
aa
in
self
.
_CODE1
])
# average the weight of each amino acid
out
.
append
(
sumtake
(
lut
,
self
.
encoded
)
/
len
(
self
)
)
return
TScales
(
*
out
)
def
vhse_scales
(
self
)
->
VHSEScales
:
...
...
@@ -1980,12 +2013,13 @@ class Peptide(typing.Sequence[str]):
:doi:`10.1002/bip.20296`. :pmid:`15895431`.
"""
out
=
array
.
array
(
"d"
)
out
=
[]
for
i
in
range
(
len
(
tables
.
VHSE
)):
# build a look-up table
scale
=
tables
.
VHSE
[
f
"VHSE
{
i
+
1
}
"
]
o
ut
.
append
(
sum
(
scale
.
get
(
aa
,
0
)
for
aa
in
self
.
sequence
)
/
len
(
self
.
sequence
)
)
l
ut
=
array
([
scale
.
get
(
aa
,
0.0
)
for
aa
in
self
.
_CODE1
])
# average the weight of each amino acid
out
.
append
(
sumtake
(
lut
,
self
.
encoded
)
/
len
(
self
)
)
return
VHSEScales
(
*
out
)
def
z_scales
(
self
)
->
ZScales
:
...
...
@@ -1999,7 +2033,7 @@ class Peptide(typing.Sequence[str]):
Example:
>>> peptide = Peptide("QWGRRCCGWGPGRRYCVRWC")
>>> for i, z in enumerate(peptide.z_scales()):
... print(f"Z{i+1:<3} {
z
: .4f}")
... print(f"Z{i+1:<3} {
0.0+round(z,5)
: .4f}")
Z1 0.5520
Z2 0.0985
Z3 0.0000
...
...
@@ -2024,12 +2058,13 @@ class Peptide(typing.Sequence[str]):
:doi:`10.1021/jm9700575`. :pmid:`9651153`.
"""
out
=
array
.
array
(
"d"
)
out
=
[]
for
i
in
range
(
len
(
tables
.
Z_SCALES
)):
# build a look-up table
scale
=
tables
.
Z_SCALES
[
f
"Z
{
i
+
1
}
"
]
o
ut
.
append
(
sum
(
scale
.
get
(
aa
,
0
)
for
aa
in
self
.
sequence
)
/
len
(
self
.
sequence
)
)
l
ut
=
array
([
scale
.
get
(
aa
,
0.0
)
for
aa
in
self
.
_CODE1
])
# average the weight of each amino acid
out
.
append
(
sumtake
(
lut
,
self
.
encoded
)
/
len
(
self
)
)
return
ZScales
(
*
out
)
__DESCRIPTORS
=
{
...
...
peptides/_npcompat.py
View file @
6c4d5a78
"""Compatibility layer to use `array.array` when `numpy` is not available.
"""
import
array
import
functools
import
operator
class
array
(
array
.
array
):
def
__new__
(
cls
,
values
,
dtype
=
'f'
):
return
super
().
__new__
(
cls
,
dtype
,
values
)
def
take
(
a
,
indices
):
"""Take elements from an array.
"""
return
array
([
a
[
i
]
for
i
in
indices
])
def
prod
(
a
):
"""Return the product of an iterable of numbers
"""Return the product of an iterable of numbers
.
"""
return
functools
.
reduce
(
operator
.
prod
,
a
)
def
zeros
(
shape
,
dtype
=
'f'
):
return
array
([
0
for
_
in
range
(
shape
)],
dtype
=
dtype
)
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment