Commit 33ae21c3 authored by Robin Erich Muench's avatar Robin Erich Muench
Browse files

basedir and readlink fix

parent 796a453b
......@@ -7,8 +7,7 @@ import argparse
import subprocess
import multiprocessing
basedir = path.dirname(__file__)
basedir = os.path.dirname(os.path.abspath(__file__))
def exit_worker(signum, frame):
raise RuntimeError("Keyboard Interrupt")
......
#!/bin/bash
#########################################
# metaSNP Step II: `Genome Splitting` #
# metaSNP Step II: `Database Indexing` #
#########################################
#
# Helper script
......@@ -15,17 +15,10 @@ set -e
# Variables
wd=`pwd`
DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
DIR="$( dirname $( readlink -f "${BASH_SOURCE[0]}" ) )"
arg1="$1"
arg2="$2"
arg3="$3"
PROJECT_DIR=$arg1
GENOME_DEF=$arg2
NUM_SPLITS=10
SPLIT_LIMIT=100
COV_FILES=$PROJECT_DIR"/cov"
NUM_SPLITS="10"
date=$(date +%Y-%m-%d)
#Usage Messages
......@@ -40,7 +33,7 @@ display_usage() {
echo >&2 " genome_def FILE = Contig ranges in BED format. (Fields: Contig_id, contigStart, contigEnd)"
echo >&2 ""
echo >&2 " Optional:"
echo >&2 " nr_splits INT = INT for job parallelization (range: 1-100) [10]"
echo >&2 " -n INT = split the workload into INT jobs for SNP call parallelization [10]"
echo >&2 ""
echo >&2 "Note: Expecting 'metaSNP_COV' to be completed!"
echo >&2 ""
......@@ -53,6 +46,13 @@ required_parameter() {
exit 1
}
param_non_integer() {
echo >&2 ""
echo >&2 "ERROR: '$2' is not an integer. Parameter ($1)."
display_usage
exit 1
}
no_such_file() {
echo >&2 ""
echo >&2 "ERROR: '$1' no such file or directory"
......@@ -102,7 +102,7 @@ make_dir() {
# getopt to use -h flag
ARGS=$(getopt -o h -n "$0" -- "$@")
ARGS=$(getopt -o hn: -n "$0" -- "$@")
# reorganize arguments as returned by getopt
eval set -- "$ARGS"
......@@ -115,6 +115,13 @@ while true; do
display_usage
exit 1
;;
-n)
shift
NUM_SPLITS="$1"
[[ $NUM_SPLITS =~ ^[0-9]+$ ]] || param_non_integer "-n INT" "$NUM_SPLITS"
shift
;;
--)
shift
break
......@@ -126,10 +133,11 @@ done
# Required parameters:
[ -z "$arg1" ] && required_parameter "project_dir"
[ -z "$arg2" ] && required_parameter "genome_def"
[ -n "$arg3" ] && NUM_SPLITS=$arg3
# Nr_Splits
[ "$NUM_SPLITS" -gt $SPLIT_LIMIT ] && split_limit $NUM_SPLITS
# Read links
PROJECT_DIR="$(readlink -f $arg1)"
GENOME_DEF="$(readlink -f $arg2)"
COV_FILES="$PROJECT_DIR/cov"
# Required files and directories
[ -d "$PROJECT_DIR" ] || no_such_file "$PROJECT_DIR"
......@@ -139,14 +147,16 @@ done
# Required output DIR
[ -d "$PROJECT_DIR/bestsplits/" ] || make_dir "$PROJECT_DIR" "$PROJECT_DIR/bestsplits/"
# Get project name
# Get project name
#PROJECT_NAME=$(echo $PROJECT_DIR | awk -F"/" '{print $(NF-1)}')
PROJECT_NAME=$(basename $PROJECT_DIR)
# Get a summary for the coverage computation (Step I) from the .cov and .detail files
# List of coverage files for all samples
##
# Generate a summary for the coverage computation (Step I) from the .cov and .detail files
cd $COV_FILES
# List of coverage files for all samples
[ "$(ls *.cov)" ] || rerun_cov
allFile=$(ls *.cov)
......@@ -157,8 +167,8 @@ do
done
echo -e "\nCoverage summary here: $PROJECT_DIR"
echo " Average vertical genome coverage: '$PROJECT_DIR$PROJECT_NAME.all_cov.tab'"
echo " Horizontal genome coverage (1X): '$PROJECT_DIR$PROJECT_NAME.all_perc.tab'"
echo " Average vertical genome coverage: '$PROJECT_DIR/$PROJECT_NAME.all_cov.tab'"
echo " Horizontal genome coverage (1X): '$PROJECT_DIR/$PROJECT_NAME.all_perc.tab'"
ls *.summary | xargs awk -f $DIR/src/collapse_AvgCov.awk > ../$PROJECT_NAME.all_cov.tab
ls *.summary | xargs awk -f $DIR/src/collapse_PercCov.awk > ../$PROJECT_NAME.all_perc.tab
......@@ -184,9 +194,4 @@ echo -e >&2 "\nGenome Splitting:"
# usage createOptimumSplit.sh <all_cov.tab> <all_perc.tab> <geneDefinitions> <INT_NrSplits> <.outfile>
python $DIR/src/createOptimumSplit.py $cov $perc $genDef $nr $outFile
# Remove second Row - CAUTION: change in filtering.py as well
#awk '!(NR == 2)' $PROJECT_DIR$PROJECT_NAME.all_cov.tab > temp
#mv temp $PROJECT_DIR$PROJECT_NAME.all_cov.tab
#awk '!(NR == 2)' $PROJECT_DIR$PROJECT_NAME.all_perc.tab > temp
#mv temp $PROJECT_DIR$PROJECT_NAME.all_perc.tab
exit
\ No newline at end of file
exit
......@@ -9,7 +9,7 @@ import subprocess
from glob import glob
import multiprocessing
basedir = path.dirname(__file__)
basedir = os.path.dirname(os.path.abspath(__file__))
def exit_worker(signum, frame):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment