Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Martin Larralde
peptides.py
Commits
bb116938
Commit
bb116938
authored
Oct 25, 2021
by
Martin Larralde
Browse files
Rewrite `auto_correlation` and `hydrophobicity` using vectorized code
parent
08df9ca4
Changes
3
Hide whitespace changes
Inline
Side-by-side
peptides/__init__.py
View file @
bb116938
...
...
@@ -344,12 +344,12 @@ class Peptide(typing.Sequence[str]):
mu
=
statistics
.
mean
(
table
.
values
())
sigma
=
statistics
.
stdev
(
table
.
values
())
table
=
{
k
:
(
v
-
mu
)
/
sigma
for
k
,
v
in
table
.
items
()}
#
compute using Cruciani formula
s
=
0.0
for
aa1
,
aa2
in
zip
(
self
.
sequence
,
self
.
sequence
[
lag
:]):
s
+=
table
.
get
(
aa1
,
0.0
)
*
table
.
get
(
aa2
,
0.0
)
# return correlation
return
s
/
len
(
self
.
sequence
)
#
build the lookup table
lut
=
array
([
table
.
get
(
aa
,
0.0
)
for
aa
in
self
.
_CODE1
])
# compute correlation using Cruciani formula
v1
=
take
(
lut
,
self
.
encoded
[:
-
lag
]
)
v2
=
take
(
lut
,
self
.
encoded
[
lag
:])
return
s
um
(
v1
*
v2
)
/
len
(
self
.
sequence
)
def
cross_covariance
(
self
,
...
...
@@ -903,7 +903,8 @@ class Peptide(typing.Sequence[str]):
table
=
tables
.
HYDROPHOBICITY
.
get
(
scale
)
if
table
is
None
:
raise
ValueError
(
f
"Invalid hydrophobicity scale:
{
scale
!
r
}
"
)
return
sum
(
table
.
get
(
aa
,
0.0
)
for
aa
in
self
.
sequence
)
/
len
(
self
.
sequence
)
lut
=
array
([
table
.
get
(
aa
,
0.0
)
for
aa
in
self
.
_CODE1
],
dtype
=
"d"
)
return
sumtake
(
lut
,
self
.
encoded
)
/
len
(
self
.
sequence
)
def
instability_index
(
self
)
->
float
:
"""Compute the instability index of a protein sequence.
...
...
peptides/_npcompat.py
View file @
bb116938
...
...
@@ -11,6 +11,16 @@ class array(array.array):
def
__new__
(
cls
,
values
,
dtype
=
'f'
):
return
super
().
__new__
(
cls
,
dtype
,
values
)
def
__mul__
(
self
,
other
):
if
not
isinstance
(
other
,
array
):
return
NotImplemented
if
len
(
self
)
!=
len
(
other
):
raise
ValueError
(
"Cannot pairwise multiply arrays of different lengths"
)
return
array
(
[
x1
*
x2
for
x1
,
x2
in
zip
(
self
,
other
)],
dtype
=
self
.
typecode
)
def
take
(
a
,
indices
):
"""Take elements from an array.
...
...
tests/test_doctest.py
View file @
bb116938
...
...
@@ -53,17 +53,12 @@ def load_tests(loader, tests, ignore):
if
sys
.
argv
[
0
].
endswith
(
"green"
):
return
tests
# doctests require `numpy` to run, which may not be available because
# it is a pain to get to work out-of-the-box on OSX
# if numpy is None:
# return tests
# recursively traverse all library submodules and load tests from them
packages
=
[
None
,
peptides
]
for
pkg
in
iter
(
packages
.
pop
,
None
):
# import the base module and add it to the tests
globs
=
dict
(
numpy
=
numpy
,
peptides
=
peptides
,
**
pkg
.
__dict__
)
globs
=
dict
(
peptides
=
peptides
,
**
pkg
.
__dict__
)
tests
.
addTests
(
doctest
.
DocTestSuite
(
pkg
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment