Commit a0f5563a authored by Martin Larralde's avatar Martin Larralde
Browse files

Add class method to generate a random `Peptide` with fixed AA frequencies

parent 9c6f8e11
import array
import math
import random
import statistics
import typing
......@@ -114,6 +115,63 @@ class ZScales(typing.NamedTuple):
class Peptide(object):
@classmethod
def sample(
cls,
length: int,
frequencies: str = "SwissProt2021",
) -> "Peptide":
"""Generate a peptide with the given amino-acid frequencies.
This method is useful for testing, but using amino-acid frequencies
to generate a peptide is not a biologically accurate method, instead
consider sampling based on dipeptide frequencies in a particular
organism, or using k-mer shuffling.
Arguments:
length (`int`): The desired length for the generated peptide.
frequencies (`str`): The name of the amino-acid frequency table
to use: either *KingJukes* to use the amino-acid frequencies
for vertebrate organisms reported in King & Jukes (1969),
or *SwissProt2021* to use the amino-acid frequencies in all
the proteins from the January 2021 release of SwissProt.
Returns:
`~peptides.Peptide`: A new peptide. The first amino-acid will
always be a Methionine for biological accuracy.
References:
- King, J. L., and T. H. Jukes.
*Non-Darwinian Evolution*.
Science. May 1969;164(3881):788–98.
doi:10.1126/science.164.3881.788. PMID:5767777.
- The UniProt Consortium.
*UniProt: The Universal Protein Knowledgebase in 2021*.
Nucleic Acids Research. Jan 2021;49(D1):D480–89.
doi:10.1093/nar/gkaa1100. PMID:33237286.
"""
table = tables.AA_FREQUENCIES.get(frequencies)
if table is None:
raise ValueError(f"Invalid amino acid frequencies: {frequencies!r}")
if length == 0:
return cls("")
cumfreq = 0
cumulative_frequencies = {}
for k,v in table.items():
cumfreq += v
cumulative_frequencies[k] = cumfreq
residues = ["M"]
for i in range(1, length):
x = random.random()
r = next((k for k,v in cumulative_frequencies.items() if x <= v), "X")
residues.append(r)
return cls("".join(residues))
# --- Magic methods ------------------------------------------------------
def __init__(self, sequence: str) -> None:
......@@ -248,6 +306,8 @@ class Peptide(object):
# return correlation
return s / len(self.sequence)
# --- Sequence properties -----------------------------------------------
# --- Physico-chemical properties ----------------------------------------
def aliphatic_index(self) -> float:
......
A,0.074
R,0.042
N,0.044
D,0.058
C,0.033
E,0.058
Q,0.037
G,0.074
H,0.029
I,0.038
L,0.076
K,0.072
M,0.018
F,0.040
P,0.050
S,0.081
T,0.062
W,0.013
Y,0.033
V,0.068
A,0.0825
R,0.0553
N,0.0406
D,0.0546
C,0.0138
Q,0.0393
E,0.0672
G,0.0707
H,0.0227
I,0.0591
L,0.0965
K,0.0580
M,0.0241
F,0.0386
P,0.0473
S,0.0664
T,0.0535
W,0.0110
Y,0.0292
V,0.0686
X,0.0001
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment