Skip to content
Snippets Groups Projects
Commit 1c7e1b7e authored by Tobias Marschall's avatar Tobias Marschall
Browse files

Change rule filter_calls to retain all calls that overlap with any merged call

parent d8d94142
No related branches found
No related tags found
No related merge requests found
......@@ -617,11 +617,12 @@ rule mosaiClassifier_make_call:
rule filter_calls:
input:
calls = "sv_calls/{sample}/{window}_fixed_norm.{bpdens}/simpleCalls_llr{llr}_poppriors{pop_priors}_haplotags{use_haplotags}_gtcutoff{gtcutoff}_regfactor{regfactor}_filterFALSE.txt"
inputcalls = "sv_calls/{sample}/{window}_fixed_norm.{bpdens}/simpleCalls_llr{llr}_poppriors{pop_priors}_haplotags{use_haplotags}_gtcutoff{gtcutoff}_regfactor{regfactor}_filterFALSE.txt",
mergedcalls = "postprocessing/merge/{sample}/{window}_fixed_norm.{bpdens}/simpleCalls_llr{llr}_poppriors{pop_priors}_haplotags{use_haplotags}_gtcutoff{gtcutoff}_regfactor{regfactor}.txt",
output:
calls = "sv_calls/{sample}/{window}_fixed_norm.{bpdens,selected_j[0-9\\.]+_s[0-9\\.]+_scedist[0-9\\.]+}/simpleCalls_llr{llr}_poppriors{pop_priors,(TRUE|FALSE)}_haplotags{use_haplotags,(TRUE|FALSE)}_gtcutoff{gtcutoff,[0-9\\.]+}_regfactor{regfactor,[0-9]+}_filterTRUE.txt"
shell:
'utils/filter_MosaiCatcher_calls.pl {input.calls} | awk \'BEGIN {{OFS="\\t"}} (NR==1) || ($16=="PASS") {{$16=""; print}}\' > {output.calls}'
'utils/apply_filter.py {input.inputcalls} {input.mergedcalls} > {output.calls}'
rule mosaiClassifier_calc_probs:
......
#!/usr/bin/env python
"""
Retains calls that overlap with a call after filtering and merging.
That is, output all calls in the input sets that are present in the
provided merged set.
"""
import sys
from argparse import ArgumentParser
import pandas as pd
def main():
parser = ArgumentParser(prog='apply_filter.py', description=__doc__)
parser.add_argument('inputcalls', metavar='INPUTCALLS', help='Input call set')
parser.add_argument('mergedcalls', metavar='MERGEDCALLS', help='Merged calls used to decide which input calls to retain')
args = parser.parse_args()
input_calls = pd.read_csv(args.inputcalls, sep='\t')
merged_calls = pd.read_csv(args.mergedcalls, sep=', ', engine='python')
input_calls['selected'] = False
for chrom, start, end in merged_calls[['chrom','start','end']].values:
input_calls.loc[(input_calls.chrom==chrom) & (input_calls.start<end) & (input_calls.end>start), 'selected'] = True
output_calls = input_calls[input_calls.selected].drop(columns=['selected'])
output_calls.to_csv(sys.stdout, sep='\t', index=False)
if __name__ == '__main__':
main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment