diff --git a/Snakefile b/Snakefile index 4aa490763cb3351fdb6d6006faf1f48bc8a3196f..e04e1f9590cfa1d25445865e02c3c35e7acc0dfd 100644 --- a/Snakefile +++ b/Snakefile @@ -557,11 +557,13 @@ rule plot_heatmap: rule mosaiClassifier_make_call: input: - probs = 'haplotag/table/{sample}/haplotag-likelihoods.{windows}.{bpdens}.Rdata' + probs = 'haplotag/table/{sample}/haplotag-likelihoods.{window}_fixed_norm.{bpdens}.Rdata' output: - "sv_calls/{sample}/{windows}.{bpdens,selected_j[0-9\\.]+_s[0-9\\.]+}/simpleCalls_llr{llr}_poppriors{pop_priors,(TRUE|FALSE)}_haplotags{use_haplotags,(TRUE|FALSE)}_gtcutoff{gtcutoff,[0-9\\.]+}_regfactor{regfactor,[0-9]+}.txt" + "sv_calls/{sample}/{window}_fixed_norm.{bpdens,selected_j[0-9\\.]+_s[0-9\\.]+}/simpleCalls_llr{llr}_poppriors{pop_priors,(TRUE|FALSE)}_haplotags{use_haplotags,(TRUE|FALSE)}_gtcutoff{gtcutoff,[0-9\\.]+}_regfactor{regfactor,[0-9]+}.txt" + params: + minFrac_used_bins = 0.8 log: - "log/mosaiClassifier_make_call/{sample}/{windows}.{bpdens}.llr{llr}.poppriors{pop_priors}.haplotags{use_haplotags}.gtcutoff{gtcutoff}.regfactor{regfactor}.log" + "log/mosaiClassifier_make_call/{sample}/{window}_fixed_norm.{bpdens}.llr{llr}.poppriors{pop_priors}.haplotags{use_haplotags}.gtcutoff{gtcutoff}.regfactor{regfactor}.log" script: "utils/mosaiClassifier_call.snakemake.R" diff --git a/utils/mosaiClassifier/makeSVcalls.R b/utils/mosaiClassifier/makeSVcalls.R index 3fb4288139a9b21fa61d8c9ed2277692ec03e705..af32890620465aa0da1460f96b56c1509483b572 100644 --- a/utils/mosaiClassifier/makeSVcalls.R +++ b/utils/mosaiClassifier/makeSVcalls.R @@ -9,7 +9,7 @@ source("utils/mosaiClassifier/mosaiClassifier.R") #' @author Sascha Meiers #' @export #' -makeSVCallSimple <- function(probs, llr_thr = 1, use.pop.priors = FALSE, use.haplotags = FALSE, genotype.cutoff = 0.0) { +makeSVCallSimple <- function(probs, llr_thr = 1, use.pop.priors = FALSE, use.haplotags = FALSE, genotype.cutoff = 0.0, bin.size, minFrac.used.bins = 0.5) { assert_that(is.data.table(probs), "sample" %in% colnames(probs), @@ -23,6 +23,10 @@ makeSVCallSimple <- function(probs, llr_thr = 1, use.pop.priors = FALSE, use.hap # make sure post-processing was done beforehands assert_that("nb_hap_pp" %in% colnames(probs)) %>% invisible + + # kick out the segments with a large fraction of blacklisted bins + probs <- probs[num_bins*bin.size/(end-start) >= minFrac.used.bins] + setkey(probs, chrom, start, end, sample, cell) if (use.pop.priors) { diff --git a/utils/mosaiClassifier_call.snakemake.R b/utils/mosaiClassifier_call.snakemake.R index 9566303d6f16f1a61d588b15cff91a3c44e18f75..361ae5940897b9fea14b9a30e701dbd4cee33d87 100644 --- a/utils/mosaiClassifier_call.snakemake.R +++ b/utils/mosaiClassifier_call.snakemake.R @@ -11,8 +11,10 @@ use.pop.priors = eval(parse(text=snakemake@wildcards[["pop_priors"]])) use.haplotags = eval(parse(text=snakemake@wildcards[["use_haplotags"]])) regularizationFactor = 10^(-as.numeric(snakemake@wildcards[["regfactor"]])) genotype.cutoff = as.numeric(snakemake@wildcards[["gtcutoff"]]) +minFrac.used.bins = as.numeric(snakemake@params[["minFrac_used_bins"]]) +bin.size = as.numeric(snakemake@wildcards[["window"]]) probs <- mosaiClassifierPostProcessing(probs, regularizationFactor = regularizationFactor) -tab <- makeSVCallSimple(probs, llr_thr = llr, use.pop.priors = use.pop.priors, use.haplotags = use.haplotags, genotype.cutoff = genotype.cutoff) +tab <- makeSVCallSimple(probs, llr_thr = llr, use.pop.priors = use.pop.priors, use.haplotags = use.haplotags, genotype.cutoff = genotype.cutoff, bin.size, minFrac.used.bins = minFrac.used.bins) write.table(tab, file = snakemake@output[[1]], sep = "\t", quote=F, row.names = F, col.names = T)