Skip to content
Snippets Groups Projects
Commit 268d8d55 authored by Tobias Marschall's avatar Tobias Marschall
Browse files

Add AF to SV call tables

parent ba94e52d
No related branches found
No related tags found
No related merge requests found
......@@ -591,7 +591,7 @@ rule filter_calls:
output:
calls = "sv_calls/{sample}/{window}_fixed_norm.{bpdens,selected_j[0-9\\.]+_s[0-9\\.]+}/simpleCalls_llr{llr}_poppriors{pop_priors,(TRUE|FALSE)}_haplotags{use_haplotags,(TRUE|FALSE)}_gtcutoff{gtcutoff,[0-9\\.]+}_regfactor{regfactor,[0-9]+}_filterTRUE.txt"
shell:
'utils/filter_MosaiCatcher_calls.pl {input.calls} | awk \'BEGIN {{OFS="\\t"}} $15!="FAIL" {{$15=""; print}}\' > {output.calls}'
'utils/filter_MosaiCatcher_calls.pl {input.calls} | awk \'BEGIN {{OFS="\\t"}} $16!="FAIL" {{$16=""; print}}\' > {output.calls}'
rule mosaiClassifier_calc_probs:
......
......@@ -3,7 +3,7 @@ use strict;
my $min_N_inv = 3;
#Use input file with format "chrom start end sample cell class scalar num_bins sv_call_name sv_call_haplotype sv_call_name_2nd sv_call_haplotype_2nd llr_to_ref llr_to_2nd"
#Use input file with format "chrom start end sample cell class scalar num_bins sv_call_name sv_call_haplotype sv_call_name_2nd sv_call_haplotype_2nd llr_to_ref llr_to_2nd af"
#
# e.g.
#
......@@ -27,7 +27,7 @@ open FH, "$input_file" or die;
while (<FH>) {
chomp;
next if ($_ =~ /^chrom/); #ignore first line
my ($chrom, $start, $end, $sample, $cell, $strand_state_class, $scalar, $num_bins, $sv_call_name, $sv_call_haplotype, $sv_call_name_2nd, $sv_call_haplotype_2nd, $llr_to_ref, $llr_to_2nd) = split (/\t/, $_); #split by TAB
my ($chrom, $start, $end, $sample, $cell, $strand_state_class, $scalar, $num_bins, $sv_call_name, $sv_call_haplotype, $sv_call_name_2nd, $sv_call_haplotype_2nd, $llr_to_ref, $llr_to_2nd, $af) = split (/\t/, $_); #split by TAB
push (@{$STARTs{$chrom}}, $start);
push (@{$ENDs{$chrom}}, $end);
push (@{$SV_TYPEs{$chrom}}, $sv_call_name);
......@@ -74,7 +74,7 @@ foreach my $chrom (sort keys %STARTs) {
#--- output file with filters
print "chrom\tstart\tend\tsample\tcell\tstrand_state_class\tscalar\tnum_bins\tsv_call_name\tsv_call_haplotype\tsv_call_name_2nd\tsv_call_haplotype_2nd\tllr_to_ref\tllr_to_2nd\tpass/fail\n";
print "chrom\tstart\tend\tsample\tcell\tstrand_state_class\tscalar\tnum_bins\tsv_call_name\tsv_call_haplotype\tsv_call_name_2nd\tsv_call_haplotype_2nd\tllr_to_ref\tllr_to_2nd\taf\tpass/fail\n";
foreach my $chrom (sort keys %STARTs) {
for (my $i=0; $i<@{$STARTs{$chrom}}; $i++) {
$FILTER{$chrom}{$STARTs{$chrom}[$i]}{$ENDs{$chrom}[$i]}= 'PASS' unless (exists($FILTER{$chrom}{$STARTs{$chrom}[$i]}{$ENDs{$chrom}[$i]})); #initialize
......
......@@ -77,11 +77,16 @@ makeSVCallSimple <- function(probs, llr_thr = 1, use.pop.priors = FALSE, use.hap
by = .(chrom, start, end, sample, cell)]
probs <- probs[rank == 1]
# compute the number of cells (used below to compute AFs)
ncells = length(unique(probs[,cell]))
# Clean up table
probs <- probs[, .(chrom, start, end, sample, cell, class, scalar, num_bins, sv_call_name, sv_call_haplotype, sv_call_name_2nd, sv_call_haplotype_2nd, llr_to_ref, llr_to_2nd)]
# select calls to retain and clean up table by removing unnecessary columns
probs <- probs[sv_call_name != "ref_hom" & llr_to_ref > llr_thr, .(chrom, start, end, sample, cell, class, scalar, num_bins, sv_call_name, sv_call_haplotype, sv_call_name_2nd, sv_call_haplotype_2nd, llr_to_ref, llr_to_2nd)]
return(probs[sv_call_name != "ref_hom" & llr_to_ref > llr_thr])
# compute allele frequencies
probs[, af := .N / ncells , by = .(chrom, start, end, sample, sv_call_name)]
return(probs)
}
forceBiallelic <- function(probs, penalize_factor = 0.1)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment