From 8a5f43eff7155ee0d9f28df0745f6d88aa09d3d7 Mon Sep 17 00:00:00 2001
From: Maryam Ghareghani <mgharegh@d3compute01.mpi-inf.mpg.de>
Date: Fri, 3 Aug 2018 15:52:07 +0200
Subject: [PATCH] added a cutoff on the fraction of blacklisted bins for SV
 calling

---
 Snakefile                              | 8 +++++---
 utils/mosaiClassifier/makeSVcalls.R    | 6 +++++-
 utils/mosaiClassifier_call.snakemake.R | 4 +++-
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/Snakefile b/Snakefile
index 4aa4907..e04e1f9 100644
--- a/Snakefile
+++ b/Snakefile
@@ -557,11 +557,13 @@ rule plot_heatmap:
 
 rule mosaiClassifier_make_call:
     input:
-        probs = 'haplotag/table/{sample}/haplotag-likelihoods.{windows}.{bpdens}.Rdata'
+        probs = 'haplotag/table/{sample}/haplotag-likelihoods.{window}_fixed_norm.{bpdens}.Rdata'
     output:
-        "sv_calls/{sample}/{windows}.{bpdens,selected_j[0-9\\.]+_s[0-9\\.]+}/simpleCalls_llr{llr}_poppriors{pop_priors,(TRUE|FALSE)}_haplotags{use_haplotags,(TRUE|FALSE)}_gtcutoff{gtcutoff,[0-9\\.]+}_regfactor{regfactor,[0-9]+}.txt"
+        "sv_calls/{sample}/{window}_fixed_norm.{bpdens,selected_j[0-9\\.]+_s[0-9\\.]+}/simpleCalls_llr{llr}_poppriors{pop_priors,(TRUE|FALSE)}_haplotags{use_haplotags,(TRUE|FALSE)}_gtcutoff{gtcutoff,[0-9\\.]+}_regfactor{regfactor,[0-9]+}.txt"
+    params:
+        minFrac_used_bins = 0.8
     log:
-        "log/mosaiClassifier_make_call/{sample}/{windows}.{bpdens}.llr{llr}.poppriors{pop_priors}.haplotags{use_haplotags}.gtcutoff{gtcutoff}.regfactor{regfactor}.log"
+        "log/mosaiClassifier_make_call/{sample}/{window}_fixed_norm.{bpdens}.llr{llr}.poppriors{pop_priors}.haplotags{use_haplotags}.gtcutoff{gtcutoff}.regfactor{regfactor}.log"
     script:
         "utils/mosaiClassifier_call.snakemake.R"
 
diff --git a/utils/mosaiClassifier/makeSVcalls.R b/utils/mosaiClassifier/makeSVcalls.R
index 3fb4288..af32890 100644
--- a/utils/mosaiClassifier/makeSVcalls.R
+++ b/utils/mosaiClassifier/makeSVcalls.R
@@ -9,7 +9,7 @@ source("utils/mosaiClassifier/mosaiClassifier.R")
 #' @author Sascha Meiers
 #' @export
 #'
-makeSVCallSimple <- function(probs, llr_thr = 1, use.pop.priors = FALSE, use.haplotags = FALSE, genotype.cutoff = 0.0) {
+makeSVCallSimple <- function(probs, llr_thr = 1, use.pop.priors = FALSE, use.haplotags = FALSE, genotype.cutoff = 0.0, bin.size, minFrac.used.bins = 0.5) {
 
   assert_that(is.data.table(probs),
               "sample" %in% colnames(probs),
@@ -23,6 +23,10 @@ makeSVCallSimple <- function(probs, llr_thr = 1, use.pop.priors = FALSE, use.hap
 
   # make sure post-processing was done beforehands
   assert_that("nb_hap_pp" %in% colnames(probs)) %>% invisible
+
+  # kick out the segments with a large fraction of blacklisted bins
+  probs <- probs[num_bins*bin.size/(end-start) >= minFrac.used.bins]
+
   setkey(probs, chrom, start, end, sample, cell)
 
   if (use.pop.priors) {
diff --git a/utils/mosaiClassifier_call.snakemake.R b/utils/mosaiClassifier_call.snakemake.R
index 9566303..361ae59 100644
--- a/utils/mosaiClassifier_call.snakemake.R
+++ b/utils/mosaiClassifier_call.snakemake.R
@@ -11,8 +11,10 @@ use.pop.priors       = eval(parse(text=snakemake@wildcards[["pop_priors"]]))
 use.haplotags        = eval(parse(text=snakemake@wildcards[["use_haplotags"]]))
 regularizationFactor = 10^(-as.numeric(snakemake@wildcards[["regfactor"]]))
 genotype.cutoff      = as.numeric(snakemake@wildcards[["gtcutoff"]])
+minFrac.used.bins    = as.numeric(snakemake@params[["minFrac_used_bins"]])
+bin.size     	     = as.numeric(snakemake@wildcards[["window"]])
 
 probs <- mosaiClassifierPostProcessing(probs, regularizationFactor = regularizationFactor)
-tab <- makeSVCallSimple(probs, llr_thr = llr, use.pop.priors = use.pop.priors, use.haplotags = use.haplotags, genotype.cutoff = genotype.cutoff)
+tab <- makeSVCallSimple(probs, llr_thr = llr, use.pop.priors = use.pop.priors, use.haplotags = use.haplotags, genotype.cutoff = genotype.cutoff, bin.size, minFrac.used.bins = minFrac.used.bins)
 
 write.table(tab, file = snakemake@output[[1]], sep = "\t", quote=F, row.names = F, col.names = T)
-- 
GitLab