Commit 9fe1f52b authored by Robin Erich Muench's avatar Robin Erich Muench
Browse files

Merge branch 'master' into 'master'

Update script to make SNP caller commands: add command line options to pass to caller & mpileup.

See merge request !5
parent 68937bca
......@@ -24,6 +24,16 @@ SPLITS=""
DB_ANNO=""
DEBUG=0
#Set defaults for...
#minimum coverage
PARAM_c=4
#SNV calling threshold
PARAM_t=4
#minimum calling fraction
PARAM_p=0.01
#maximum depth per position (for samtools mpileup)
PARAM_d=20000
display_usage() {
echo >&2 ""
echo >&2 " Usage: $(basename $0) project_dir/ all_samples ref_db [options]"
......@@ -36,7 +46,12 @@ display_usage() {
echo >&2 ""
echo >&2 " Optional:"
echo >&2 " -a db_ann FILE = database annotation."
echo >&2 " -l splits/ DIR = genome splits DIR for job parallelization (pre-processing)."
echo >&2 " -l splits/ DIR = genome splits DIR for job parallelization (pre-processing)."
echo >&2 " -c INT = minimum coverage per position to call a SNV (default: 4)."
echo >&2 " -t INT = minimum number of non-reference reads per position (across samples) to call a SNV (default: 4)."
echo >&2 " -p NUM[0,1] = minimum fraction of non-reference reads per position (across samples) to call a SNV (default: 0.01)."
echo >&2 " -d INT = maximum depth per position, as passed to samtools mpileup (default: 20.000)."
echo >&2 " -D 1/0 = run in debug mode (y/n)."
echo >&2 ""
echo >&2 "Note: Alternatively, use the '-l splits' option to call SNPs for specific species, contig regions (BED) or single positions (contig_id pos). Unlisted contigs/pos are skipped."
echo >&2 ""
......@@ -65,6 +80,20 @@ db_missing() {
exit 1
}
param_non_integer() {
echo >&2 ""
echo >&2 "ERROR: $1 is not an integer."
display_usage
exit 1
}
error_fraction_range() {
echo >&2 ""
echo >&2 "ERROR: value for minimum SNV fraction ('$1') is not in range [0,1]."
display_usage
exit 1
}
make_dir() {
echo >&2 ""
echo >&2 "WARNING '$1' is not an initialized metaSNP project. Subdirectory '$(basename $2)' (output DIR) is missing."
......@@ -80,7 +109,7 @@ make_dir() {
# Parse args using getopt (instead of getopts) to allow arguments before options
ARGA=$@
ARGS=$(getopt -o l:a:d:h -n "$0" -- "$@")
ARGS=$(getopt -o l:a:D:c:t:p:d:h -n "$0" -- "$@")
# reorganize arguments as returned by getopt
eval set -- "$ARGS"
......@@ -103,7 +132,7 @@ while true; do
DB_ANNO="-g $DB_ANNO"
shift
;;
-d)
-D)
shift
DEBUG="$1"
shift
......@@ -113,7 +142,35 @@ while true; do
display_usage
exit 1
;;
-c)
shift
PARAM_c="$1"
#Check if parameter is an integer (or zero)
[[ $PARAM_c =~ ^[0-9]+$ ]] || param_non_integer "-c $PARAM_c"
shift
;;
-t)
shift
PARAM_t="$1"
#Check if parameter is an integer (or zero)
[[ $PARAM_t =~ ^[0-9]+$ ]] || param_non_integer "-t $PARAM_t"
shift
;;
-p)
shift
PARAM_p="$1"
#Check if parameter is in range [0,1]
if [ $(echo " $PARAM_p < 0" | bc) -eq 1 ]; then error_fraction_range "$PARAM_p"; fi
if [ $(echo " $PARAM_p > 1" | bc) -eq 1 ]; then error_fraction_range "$PARAM_p"; fi
shift
;;
-d)
shift
PARAM_d="$1"
#Check if parameter is an integer (or zero)
[[ $PARAM_d =~ ^[0-9]+$ ]] || param_non_integer "-d $PARAM_d"
shift
;;
--)
shift
break
......@@ -135,6 +192,11 @@ done
[ -f "$REF_DB" ] || db_missing "$REF_DB"
REF_DB="-f $REF_DB"
#Set calling thresholds to appropriate formats
FLAG_c="-c $PARAM_c"
FLAG_t="-t $PARAM_t"
FLAG_p="-p $PARAM_p"
# SNP-CALLER + I/O FILES
snpCaller="$DIR/src/snpCaller/snpCall"
indiv_out=$PROJECT_DIR"/snpCaller/indiv_called"
......@@ -151,12 +213,12 @@ if [ "$SPLITS" ]
# Job parallelization (one command-line per split)
for split in $best_splt;
do
echo "samtools mpileup -l $split $REF_DB -B -b $SAMPLES | $snpCaller $REF_DB $DB_ANNO -i $indiv_out.$(echo $split | rev | cut -f1 -d/ | rev) > $called_SP.${split##*/}"
echo "samtools mpileup -l $split -d $PARAM_d $REF_DB -B -b $SAMPLES | $snpCaller $REF_DB $DB_ANNO -i $indiv_out.$(echo $split | rev | cut -f1 -d/ | rev) -c $PARAM_c -t $PARAM_t -p $PARAM_p > $called_SP.${split##*/}"
done
else
# Single batch jobs
echo "samtools mpileup $REF_DB -B -b $SAMPLES | $snpCaller $REF_DB $DB_ANNO -i $indiv_out > $called_SP"
echo "samtools mpileup -d $PARAM_d $REF_DB -B -b $SAMPLES | $snpCaller $REF_DB $DB_ANNO -i $indiv_out -c $PARAM_c -t $PARAM_t -p $PARAM_p > $called_SP"
fi
exit
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment