From 1c7e1b7e33528afa4174d9e6e6337570324398c4 Mon Sep 17 00:00:00 2001 From: Tobias Marschall <tobias.marschall@0ohm.net> Date: Thu, 1 Nov 2018 14:02:48 +0100 Subject: [PATCH] Change rule filter_calls to retain all calls that overlap with any merged call --- Snakefile | 5 +++-- utils/apply_filter.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 2 deletions(-) create mode 100755 utils/apply_filter.py diff --git a/Snakefile b/Snakefile index bdbcbe7..b501014 100644 --- a/Snakefile +++ b/Snakefile @@ -617,11 +617,12 @@ rule mosaiClassifier_make_call: rule filter_calls: input: - calls = "sv_calls/{sample}/{window}_fixed_norm.{bpdens}/simpleCalls_llr{llr}_poppriors{pop_priors}_haplotags{use_haplotags}_gtcutoff{gtcutoff}_regfactor{regfactor}_filterFALSE.txt" + inputcalls = "sv_calls/{sample}/{window}_fixed_norm.{bpdens}/simpleCalls_llr{llr}_poppriors{pop_priors}_haplotags{use_haplotags}_gtcutoff{gtcutoff}_regfactor{regfactor}_filterFALSE.txt", + mergedcalls = "postprocessing/merge/{sample}/{window}_fixed_norm.{bpdens}/simpleCalls_llr{llr}_poppriors{pop_priors}_haplotags{use_haplotags}_gtcutoff{gtcutoff}_regfactor{regfactor}.txt", output: calls = "sv_calls/{sample}/{window}_fixed_norm.{bpdens,selected_j[0-9\\.]+_s[0-9\\.]+_scedist[0-9\\.]+}/simpleCalls_llr{llr}_poppriors{pop_priors,(TRUE|FALSE)}_haplotags{use_haplotags,(TRUE|FALSE)}_gtcutoff{gtcutoff,[0-9\\.]+}_regfactor{regfactor,[0-9]+}_filterTRUE.txt" shell: - 'utils/filter_MosaiCatcher_calls.pl {input.calls} | awk \'BEGIN {{OFS="\\t"}} (NR==1) || ($16=="PASS") {{$16=""; print}}\' > {output.calls}' + 'utils/apply_filter.py {input.inputcalls} {input.mergedcalls} > {output.calls}' rule mosaiClassifier_calc_probs: diff --git a/utils/apply_filter.py b/utils/apply_filter.py new file mode 100755 index 0000000..5e58aa4 --- /dev/null +++ b/utils/apply_filter.py @@ -0,0 +1,32 @@ +#!/usr/bin/env python + +""" +Retains calls that overlap with a call after filtering and merging. +That is, output all calls in the input sets that are present in the +provided merged set. +""" + +import sys +from argparse import ArgumentParser +import pandas as pd + +def main(): + parser = ArgumentParser(prog='apply_filter.py', description=__doc__) + + parser.add_argument('inputcalls', metavar='INPUTCALLS', help='Input call set') + parser.add_argument('mergedcalls', metavar='MERGEDCALLS', help='Merged calls used to decide which input calls to retain') + + args = parser.parse_args() + + input_calls = pd.read_csv(args.inputcalls, sep='\t') + merged_calls = pd.read_csv(args.mergedcalls, sep=', ', engine='python') + + input_calls['selected'] = False + for chrom, start, end in merged_calls[['chrom','start','end']].values: + input_calls.loc[(input_calls.chrom==chrom) & (input_calls.start<end) & (input_calls.end>start), 'selected'] = True + + output_calls = input_calls[input_calls.selected].drop(columns=['selected']) + output_calls.to_csv(sys.stdout, sep='\t', index=False) + +if __name__ == '__main__': + main() -- GitLab