Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
MorF
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Arendt Group
MorF
Commits
c63c7b63
Commit
c63c7b63
authored
3 years ago
by
Nikolaos Papadopoulos
Browse files
Options
Downloads
Patches
Plain Diff
sequence alignments vs UniRef30, PDB
parent
26bcef93
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
align.sh
+68
-0
68 additions, 0 deletions
align.sh
create_index.sh
+8
-8
8 additions, 8 deletions
create_index.sh
spongilla_pdb70.sh
+9
-0
9 additions, 0 deletions
spongilla_pdb70.sh
spongilla_uniref100.sh
+10
-0
10 additions, 0 deletions
spongilla_uniref100.sh
with
95 additions
and
8 deletions
align.sh
0 → 100755
+
68
−
0
View file @
c63c7b63
#!/bin/bash -ex
#SBATCH -J align
#SBATCH -t 10:00:00
#SBATCH -c 128
#SBATCH --mem=200G
#SBATCH -o /g/arendt/npapadop/cluster/align_%j.out
#SBATCH -e /g/arendt/npapadop/cluster/align_%j.err
# a heavily modified version of the colabfold_search.sh script from https://github.com/sokrypton/ColabFold/
# briefly:
# - we replace the MMseqs2 binary with the version in the module system
# - replace variables with static files and locations
# - replace DB2; initially BFD/Mgnify, now PDB70
# - remove DB3; initially ColabfoldDB
# - keep query database (search against UniRef30), since we are interested in the sequence alignments
module load MMseqs2
export
OMP_NUM_THREADS
=
128
# MMSEQS="$1"
QUERY
=
"/g/arendt/data/spongilla_singlecell_dataset/spongilla_lacustris_Trinity.fasta.transdecoder_70AA_mediumheader.pep"
DBBASE
=
"/scratch/npapadop/database"
BASE
=
"/scratch/npapadop/result"
DB1
=
"uniref30_2103_db"
DB2
=
"PDB"
# DB3="$7"
FILTER
=
"1"
THREADS
=
"128"
SENSITIVITY
=
8
EXPAND_EVAL
=
inf
ALIGN_EVAL
=
10
DIFF
=
3000
QSC
=
-20
.0
MAX_ACCEPT
=
1000000
if
[
"
${
FILTER
}
"
=
"1"
]
;
then
# 0.1 was not used in benchmarks due to POSIX shell bug in line above
# EXPAND_EVAL=0.1
ALIGN_EVAL
=
10
QSC
=
0.8
MAX_ACCEPT
=
100000
fi
export
MMSEQS_CALL_DEPTH
=
1
SEARCH_PARAM
=
"--num-iterations 3 --db-load-mode 2 -a -s
${
SENSITIVITY
}
-e 0.1 --max-seqs 10000"
FILTER_PARAM
=
"--filter-msa
${
FILTER
}
--filter-min-enable 1000 --diff
${
DIFF
}
--qid 0.0,0.2,0.4,0.6,0.8,1.0 --qsc 0 --max-seq-id 0.95"
EXPAND_PARAM
=
"--expansion-mode 0 -e
${
EXPAND_EVAL
}
--expand-filter-clusters
${
FILTER
}
--max-seq-id 0.95"
mkdir
-p
"
${
BASE
}
"
mmseqs createdb
"
${
QUERY
}
"
"
${
BASE
}
/qdb"
mmseqs search
"
${
BASE
}
/qdb"
"
${
DBBASE
}
/
${
DB1
}
"
"
${
BASE
}
/res"
"
${
BASE
}
/tmp"
$SEARCH_PARAM
mmseqs expandaln
"
${
BASE
}
/qdb"
"
${
DBBASE
}
/
${
DB1
}
.idx"
"
${
BASE
}
/res"
"
${
DBBASE
}
/
${
DB1
}
.idx"
"
${
BASE
}
/res_exp"
--db-load-mode
2
--threads
${
THREADS
}
${
EXPAND_PARAM
}
mmseqs mvdb
"
${
BASE
}
/tmp/latest/profile_1"
"
${
BASE
}
/prof_res"
mmseqs lndb
"
${
BASE
}
/qdb_h"
"
${
BASE
}
/prof_res_h"
mmseqs align
"
${
BASE
}
/prof_res"
"
${
DBBASE
}
/
${
DB1
}
.idx"
"
${
BASE
}
/res_exp"
"
${
BASE
}
/res_exp_realign"
--db-load-mode
2
--threads
${
THREADS
}
-e
${
ALIGN_EVAL
}
--max-accept
${
MAX_ACCEPT
}
--alt-ali
10
-a
mmseqs filterresult
"
${
BASE
}
/qdb"
"
${
DBBASE
}
/
${
DB1
}
.idx"
"
${
BASE
}
/res_exp_realign"
"
${
BASE
}
/res_exp_realign_filter"
--db-load-mode
2
--threads
${
THREADS
}
--qid
0
--qsc
$QSC
--diff
0
--max-seq-id
1.0
--filter-min-enable
100
mmseqs result2msa
"
${
BASE
}
/qdb"
"
${
DBBASE
}
/
${
DB1
}
.idx"
"
${
BASE
}
/res_exp_realign_filter"
"
${
BASE
}
/uniref.a3m"
--msa-format-mode
6
--db-load-mode
2
--threads
${
THREADS
}
${
FILTER_PARAM
}
mmseqs rmdb
"
${
BASE
}
/res_exp_realign"
mmseqs rmdb
"
${
BASE
}
/res_exp"
mmseqs rmdb
"
${
BASE
}
/res"
mmseqs rmdb
"
${
BASE
}
/res_exp_realign_filter"
# align profile database against PDB
mmseqs search
"
${
BASE
}
/prof_res"
"
${
DBBASE
}
/
${
DB2
}
"
"
${
BASE
}
/res_pdb"
"
${
BASE
}
/tmp"
--db-load-mode
2
--threads
${
THREADS
}
-s
7.5
-a
-e
0.1
mmseqs convertalis
"
${
BASE
}
/prof_res"
"
${
DBBASE
}
/
${
DB2
}
.idx"
"
${
BASE
}
/res_pdb"
"
${
BASE
}
/
${
DB2
}
.m8"
--threads
${
THREADS
}
--format-output
query,target,fident,alnlen,mismatch,gapopen,qstart,qend,tstart,tend,evalue,bits,cigar
--db-load-mode
2
mmseqs rmdb
"
${
BASE
}
/res_pdb"
mmseqs rmdb
"
${
BASE
}
/qdb"
mmseqs rmdb
"
${
BASE
}
/qdb_h"
mmseqs rmdb
"
${
BASE
}
/res"
rm
-f
--
"
${
BASE
}
/prof_res"
*
rm
-rf
--
"
${
BASE
}
/tmp"
This diff is collapsed.
Click to expand it.
create_index.sh
+
8
−
8
View file @
c63c7b63
#!/bin/bash -ex
#SBATCH -J create_index
#SBATCH -t 0
2
:00:00
#SBATCH -c
12
#SBATCH --mem=
5
0G
#SBATCH -t 0
8
:00:00
#SBATCH -c
64
#SBATCH --mem=
20
0G
#SBATCH -o /g/arendt/npapadop/cluster/create_index.out
#SBATCH -e /g/arendt/npapadop/cluster/create_index.err
OUTPUT_DIR
=
"
$1
"
DATABASE
=
"
$2
"
cd
${
OUTPUT_DIR
}
cd
/scratch/npapadop/database
module load MMseqs2
mmseqs createindex
${
DATABASE
}
tmp
--threads
12
--remove-tmp-files
1
# mmseqs tsv2exprofiledb "uniref30_2103" "uniref30_2103_db"
# mmseqs createindex "uniref30_2103_db" tmp1 --remove-tmp-files 1 --threads 64
mmseqs databases PDB PDB tmp
mmseqs createindex PDB tmp3
--remove-tmp-files
1
--threads
64
module unload MMseqs2
\ No newline at end of file
This diff is collapsed.
Click to expand it.
spongilla_pdb70.sh
0 → 100755
+
9
−
0
View file @
c63c7b63
#!/bin/bash -ex
SCRIPT
=
"/g/arendt/npapadop/repos/spongfold/align.sh"
QUERYDB
=
"/scratch/npapadop/Spongilla_lacustris_70AAcutoffTransDecoder/spongilla"
TARGETDB
=
"/scratch/npapadop/PDB70/PDB70"
RESULTDB
=
"/scratch/npapadop/spongilla_pdb70/result"
TMP
=
"/scratch/npapadop/spongilla_pdb70/tmp"
sbatch
${
SCRIPT
}
${
QUERYDB
}
${
TARGETDB
}
${
RESULTDB
}
${
TMP
}
\ No newline at end of file
This diff is collapsed.
Click to expand it.
spongilla_uniref100.sh
0 → 100755
+
10
−
0
View file @
c63c7b63
#!/bin/bash -ex
SCRIPT
=
"/g/arendt/npapadop/repos/spongfold/align.sh"
QUERY
=
"/g/arendt/data/spongilla_singlecell_dataset/spongilla_lacustris_Trinity.fasta.transdecoder_70AA_mediumheader.pep"
DBBASE
=
"/scratch/npapadop"
BASE
=
"/scratch/npapadop"
DB1
=
"Uniref100/Uniref100"
FILTER
=
"1"
sbatch
${
SCRIPT
}
${
QUERY
}
${
DBBASE
}
${
BASE
}
${
DB1
}
${
FILTER
}
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment