Commit 796a453b authored by Luis Pedro Coelho's avatar Luis Pedro Coelho
Browse files

ENH Add --threads argument to SNP step

This now enables multithreading the SNP step.
parent 1cffce6c
......@@ -7,10 +7,18 @@ from os import path
import argparse
import subprocess
from glob import glob
import multiprocessing
basedir = path.dirname(__file__)
def exit_worker(signum, frame):
raise RuntimeError("Keyboard Interrupt")
def init_worker():
import signal
signal.signal(signal.SIGINT, exit_worker)
def execute_snp_call(args, snpCaller, ifile, ofile, split):
db_ann_args = []
if args.db_ann:
......@@ -30,7 +38,6 @@ def execute_snp_call(args, snpCaller, ifile, ofile, split):
if args.print_commands:
print(" ".join(samtools_cmd + ['|'] + snpcaller_cmd + ['>', ofile]))
else:
with open(ofile, 'wt') as ofile:
samtools_call = subprocess.Popen(samtools_cmd, stdout=subprocess.PIPE)
snpcaller_call = subprocess.Popen(snpcaller_cmd, stdin=samtools_call.stdout, stdout=ofile)
......@@ -55,6 +62,8 @@ def snp_call():
help='Instead of executing the commands, simply print them out')
parser.add_argument('--splits', default=None, action='store',
help='Directory where split files are found')
parser.add_argument('--threads', metavar='INT', default=1, type=int,
help='Number of jobs to run simmultaneously')
args = parser.parse_args()
if not path.isdir(args.project_dir):
stderr.write("Cannot find project directory '{}'".format(args.project_dir))
......@@ -87,14 +96,22 @@ SOLUTION: run getRefDB.sh or set up a custom database before running metaSNP cal
# Note: Different phred score scales might be disregarded.
# Note: If samtools > v0.1.18 is used -Q 20 filtering is highly recommended.
threads = (args.threads if not args.print_commands else 1)
p = multiprocessing.Pool(threads, init_worker)
results = []
if args.splits:
splits = glob('{}/best_split_*'.format(args.splits))
for split in splits:
execute_snp_call(args,
snpCaller,
'{}.{}'.format(indiv_out, path.basename(split)),
'{}.{}'.format(called_SNP, path.basename(split)),
split)
results.append(p.apply_async(execute_snp_call,
(args,
snpCaller,
'{}.{}'.format(indiv_out, path.basename(split)),
'{}.{}'.format(called_SNP, path.basename(split)),
split)))
p.close()
p.join()
for r in results:
r.wait()
else:
execute_snp_call(args, snpCaller, indiv_out, called_SNP, None)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment