From 268d8d55b953c983a6ea974d261c56227f55f1d1 Mon Sep 17 00:00:00 2001 From: Tobias Marschall <tobias.marschall@0ohm.net> Date: Sat, 29 Sep 2018 11:13:13 +0200 Subject: [PATCH] Add AF to SV call tables --- Snakefile | 2 +- utils/filter_MosaiCatcher_calls.pl | 6 +++--- utils/mosaiClassifier/makeSVcalls.R | 11 ++++++++--- 3 files changed, 12 insertions(+), 7 deletions(-) diff --git a/Snakefile b/Snakefile index 10934a9..d4579d5 100644 --- a/Snakefile +++ b/Snakefile @@ -591,7 +591,7 @@ rule filter_calls: output: calls = "sv_calls/{sample}/{window}_fixed_norm.{bpdens,selected_j[0-9\\.]+_s[0-9\\.]+}/simpleCalls_llr{llr}_poppriors{pop_priors,(TRUE|FALSE)}_haplotags{use_haplotags,(TRUE|FALSE)}_gtcutoff{gtcutoff,[0-9\\.]+}_regfactor{regfactor,[0-9]+}_filterTRUE.txt" shell: - 'utils/filter_MosaiCatcher_calls.pl {input.calls} | awk \'BEGIN {{OFS="\\t"}} $15!="FAIL" {{$15=""; print}}\' > {output.calls}' + 'utils/filter_MosaiCatcher_calls.pl {input.calls} | awk \'BEGIN {{OFS="\\t"}} $16!="FAIL" {{$16=""; print}}\' > {output.calls}' rule mosaiClassifier_calc_probs: diff --git a/utils/filter_MosaiCatcher_calls.pl b/utils/filter_MosaiCatcher_calls.pl index bd2a95f..e7e0957 100755 --- a/utils/filter_MosaiCatcher_calls.pl +++ b/utils/filter_MosaiCatcher_calls.pl @@ -3,7 +3,7 @@ use strict; my $min_N_inv = 3; -#Use input file with format "chrom start end sample cell class scalar num_bins sv_call_name sv_call_haplotype sv_call_name_2nd sv_call_haplotype_2nd llr_to_ref llr_to_2nd" +#Use input file with format "chrom start end sample cell class scalar num_bins sv_call_name sv_call_haplotype sv_call_name_2nd sv_call_haplotype_2nd llr_to_ref llr_to_2nd af" # # e.g. # @@ -27,7 +27,7 @@ open FH, "$input_file" or die; while (<FH>) { chomp; next if ($_ =~ /^chrom/); #ignore first line - my ($chrom, $start, $end, $sample, $cell, $strand_state_class, $scalar, $num_bins, $sv_call_name, $sv_call_haplotype, $sv_call_name_2nd, $sv_call_haplotype_2nd, $llr_to_ref, $llr_to_2nd) = split (/\t/, $_); #split by TAB + my ($chrom, $start, $end, $sample, $cell, $strand_state_class, $scalar, $num_bins, $sv_call_name, $sv_call_haplotype, $sv_call_name_2nd, $sv_call_haplotype_2nd, $llr_to_ref, $llr_to_2nd, $af) = split (/\t/, $_); #split by TAB push (@{$STARTs{$chrom}}, $start); push (@{$ENDs{$chrom}}, $end); push (@{$SV_TYPEs{$chrom}}, $sv_call_name); @@ -74,7 +74,7 @@ foreach my $chrom (sort keys %STARTs) { #--- output file with filters -print "chrom\tstart\tend\tsample\tcell\tstrand_state_class\tscalar\tnum_bins\tsv_call_name\tsv_call_haplotype\tsv_call_name_2nd\tsv_call_haplotype_2nd\tllr_to_ref\tllr_to_2nd\tpass/fail\n"; +print "chrom\tstart\tend\tsample\tcell\tstrand_state_class\tscalar\tnum_bins\tsv_call_name\tsv_call_haplotype\tsv_call_name_2nd\tsv_call_haplotype_2nd\tllr_to_ref\tllr_to_2nd\taf\tpass/fail\n"; foreach my $chrom (sort keys %STARTs) { for (my $i=0; $i<@{$STARTs{$chrom}}; $i++) { $FILTER{$chrom}{$STARTs{$chrom}[$i]}{$ENDs{$chrom}[$i]}= 'PASS' unless (exists($FILTER{$chrom}{$STARTs{$chrom}[$i]}{$ENDs{$chrom}[$i]})); #initialize diff --git a/utils/mosaiClassifier/makeSVcalls.R b/utils/mosaiClassifier/makeSVcalls.R index af32890..e374b82 100644 --- a/utils/mosaiClassifier/makeSVcalls.R +++ b/utils/mosaiClassifier/makeSVcalls.R @@ -77,11 +77,16 @@ makeSVCallSimple <- function(probs, llr_thr = 1, use.pop.priors = FALSE, use.hap by = .(chrom, start, end, sample, cell)] probs <- probs[rank == 1] + # compute the number of cells (used below to compute AFs) + ncells = length(unique(probs[,cell])) - # Clean up table - probs <- probs[, .(chrom, start, end, sample, cell, class, scalar, num_bins, sv_call_name, sv_call_haplotype, sv_call_name_2nd, sv_call_haplotype_2nd, llr_to_ref, llr_to_2nd)] + # select calls to retain and clean up table by removing unnecessary columns + probs <- probs[sv_call_name != "ref_hom" & llr_to_ref > llr_thr, .(chrom, start, end, sample, cell, class, scalar, num_bins, sv_call_name, sv_call_haplotype, sv_call_name_2nd, sv_call_haplotype_2nd, llr_to_ref, llr_to_2nd)] - return(probs[sv_call_name != "ref_hom" & llr_to_ref > llr_thr]) + # compute allele frequencies + probs[, af := .N / ncells , by = .(chrom, start, end, sample, sv_call_name)] + + return(probs) } forceBiallelic <- function(probs, penalize_factor = 0.1) -- GitLab