From 8a5f43eff7155ee0d9f28df0745f6d88aa09d3d7 Mon Sep 17 00:00:00 2001 From: Maryam Ghareghani <mgharegh@d3compute01.mpi-inf.mpg.de> Date: Fri, 3 Aug 2018 15:52:07 +0200 Subject: [PATCH] added a cutoff on the fraction of blacklisted bins for SV calling --- Snakefile | 8 +++++--- utils/mosaiClassifier/makeSVcalls.R | 6 +++++- utils/mosaiClassifier_call.snakemake.R | 4 +++- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/Snakefile b/Snakefile index 4aa4907..e04e1f9 100644 --- a/Snakefile +++ b/Snakefile @@ -557,11 +557,13 @@ rule plot_heatmap: rule mosaiClassifier_make_call: input: - probs = 'haplotag/table/{sample}/haplotag-likelihoods.{windows}.{bpdens}.Rdata' + probs = 'haplotag/table/{sample}/haplotag-likelihoods.{window}_fixed_norm.{bpdens}.Rdata' output: - "sv_calls/{sample}/{windows}.{bpdens,selected_j[0-9\\.]+_s[0-9\\.]+}/simpleCalls_llr{llr}_poppriors{pop_priors,(TRUE|FALSE)}_haplotags{use_haplotags,(TRUE|FALSE)}_gtcutoff{gtcutoff,[0-9\\.]+}_regfactor{regfactor,[0-9]+}.txt" + "sv_calls/{sample}/{window}_fixed_norm.{bpdens,selected_j[0-9\\.]+_s[0-9\\.]+}/simpleCalls_llr{llr}_poppriors{pop_priors,(TRUE|FALSE)}_haplotags{use_haplotags,(TRUE|FALSE)}_gtcutoff{gtcutoff,[0-9\\.]+}_regfactor{regfactor,[0-9]+}.txt" + params: + minFrac_used_bins = 0.8 log: - "log/mosaiClassifier_make_call/{sample}/{windows}.{bpdens}.llr{llr}.poppriors{pop_priors}.haplotags{use_haplotags}.gtcutoff{gtcutoff}.regfactor{regfactor}.log" + "log/mosaiClassifier_make_call/{sample}/{window}_fixed_norm.{bpdens}.llr{llr}.poppriors{pop_priors}.haplotags{use_haplotags}.gtcutoff{gtcutoff}.regfactor{regfactor}.log" script: "utils/mosaiClassifier_call.snakemake.R" diff --git a/utils/mosaiClassifier/makeSVcalls.R b/utils/mosaiClassifier/makeSVcalls.R index 3fb4288..af32890 100644 --- a/utils/mosaiClassifier/makeSVcalls.R +++ b/utils/mosaiClassifier/makeSVcalls.R @@ -9,7 +9,7 @@ source("utils/mosaiClassifier/mosaiClassifier.R") #' @author Sascha Meiers #' @export #' -makeSVCallSimple <- function(probs, llr_thr = 1, use.pop.priors = FALSE, use.haplotags = FALSE, genotype.cutoff = 0.0) { +makeSVCallSimple <- function(probs, llr_thr = 1, use.pop.priors = FALSE, use.haplotags = FALSE, genotype.cutoff = 0.0, bin.size, minFrac.used.bins = 0.5) { assert_that(is.data.table(probs), "sample" %in% colnames(probs), @@ -23,6 +23,10 @@ makeSVCallSimple <- function(probs, llr_thr = 1, use.pop.priors = FALSE, use.hap # make sure post-processing was done beforehands assert_that("nb_hap_pp" %in% colnames(probs)) %>% invisible + + # kick out the segments with a large fraction of blacklisted bins + probs <- probs[num_bins*bin.size/(end-start) >= minFrac.used.bins] + setkey(probs, chrom, start, end, sample, cell) if (use.pop.priors) { diff --git a/utils/mosaiClassifier_call.snakemake.R b/utils/mosaiClassifier_call.snakemake.R index 9566303..361ae59 100644 --- a/utils/mosaiClassifier_call.snakemake.R +++ b/utils/mosaiClassifier_call.snakemake.R @@ -11,8 +11,10 @@ use.pop.priors = eval(parse(text=snakemake@wildcards[["pop_priors"]])) use.haplotags = eval(parse(text=snakemake@wildcards[["use_haplotags"]])) regularizationFactor = 10^(-as.numeric(snakemake@wildcards[["regfactor"]])) genotype.cutoff = as.numeric(snakemake@wildcards[["gtcutoff"]]) +minFrac.used.bins = as.numeric(snakemake@params[["minFrac_used_bins"]]) +bin.size = as.numeric(snakemake@wildcards[["window"]]) probs <- mosaiClassifierPostProcessing(probs, regularizationFactor = regularizationFactor) -tab <- makeSVCallSimple(probs, llr_thr = llr, use.pop.priors = use.pop.priors, use.haplotags = use.haplotags, genotype.cutoff = genotype.cutoff) +tab <- makeSVCallSimple(probs, llr_thr = llr, use.pop.priors = use.pop.priors, use.haplotags = use.haplotags, genotype.cutoff = genotype.cutoff, bin.size, minFrac.used.bins = minFrac.used.bins) write.table(tab, file = snakemake@output[[1]], sep = "\t", quote=F, row.names = F, col.names = T) -- GitLab