diff --git a/Snakefile b/Snakefile index 9ca01cacd37be6bbc0ff41c978ce92ab67794eed..16203ee8033c74af06d7e17e8f23fa93c0d59636 100644 --- a/Snakefile +++ b/Snakefile @@ -30,7 +30,17 @@ import os.path # * calculate a segmentation into potential SVs using Mosaicatcher -METHODS = ["simpleCalls_llr1_poppriorsFALSE_regfactor10", "simpleCalls_llr4_poppriorsFALSE_regfactor10", "simpleCalls_llr1_poppriorsTRUE_regfactor6", "simpleCalls_llr4_poppriorsTRUE_regfactor6", "simpleCalls_llr1_poppriorsTRUE_regfactor10", "simpleCalls_llr4_poppriorsTRUE_regfactor10", "biAllelic_llr1", "biAllelic_llr4"] +METHODS = [ + "simpleCalls_llr1_poppriorsFALSE_haplotagsFALSE_regfactor10", + "simpleCalls_llr4_poppriorsFALSE_haplotagsFALSE_regfactor10", + "simpleCalls_llr1_poppriorsTRUE_haplotagsFALSE_regfactor6", + "simpleCalls_llr4_poppriorsTRUE_haplotagsFALSE_regfactor6", + "simpleCalls_llr1_poppriorsTRUE_haplotagsFALSE_regfactor10", + "simpleCalls_llr4_poppriorsTRUE_haplotagsFALSE_regfactor10", + "simpleCalls_llr4_poppriorsTRUE_haplotagsTRUE_regfactor6", + "biAllelic_llr1", + "biAllelic_llr4" +] singularity: "docker://smei/mosaicatcher-pipeline:v0.1" @@ -251,7 +261,7 @@ rule plot_SV_calls: calls={input.calls} \ {input.counts} \ {wildcards.chrom} \ - {output} 2>&1 > {log} + {output} > {log} 2>&1 """ rule plot_SV_calls_simulated: @@ -475,11 +485,11 @@ rule plot_heatmap: rule mosaiClassifier_make_call: input: - probs = "sv_probabilities/{sample}/{windows}.{bpdens}/probabilities.Rdata" + probs = 'haplotag/table/{sample}/haplotag-likelihoods.{windows}.{bpdens}.data' output: - "sv_calls/{sample}/{windows}.{bpdens}/simpleCalls_llr{llr}_poppriors{pop_priors,(TRUE|FALSE)}_regfactor{regfactor,[0-9]+}.txt" + "sv_calls/{sample}/{windows}.{bpdens}/simpleCalls_llr{llr}_poppriors{pop_priors,(TRUE|FALSE)}_haplotags{use_haplotags,(TRUE|FALSE)}_regfactor{regfactor,[0-9]+}.txt" log: - "log/mosaiClassifier_make_call/{sample}/{windows}.{bpdens}.llr{llr}.poppriors{pop_priors}.regfactor{regfactor}.log" + "log/mosaiClassifier_make_call/{sample}/{windows}.{bpdens}.llr{llr}.poppriors{pop_priors}.haplotags{use_haplotags}.regfactor{regfactor}.log" script: "utils/mosaiClassifier_call.snakemake.R" diff --git a/utils/mosaiClassifier/makeSVcalls.R b/utils/mosaiClassifier/makeSVcalls.R index 2024919e41285e7c7b1a51543a519a7e36d65c65..e33649559ddff5dc59a28f76bac9333ad5aefc3b 100644 --- a/utils/mosaiClassifier/makeSVcalls.R +++ b/utils/mosaiClassifier/makeSVcalls.R @@ -9,7 +9,7 @@ source("utils/mosaiClassifier/mosaiClassifier.R") #' @author Sascha Meiers #' @export #' -makeSVCallSimple <- function(probs, llr_thr = 1, use.pop.priors = FALSE) { +makeSVCallSimple <- function(probs, llr_thr = 1, use.pop.priors = FALSE, use.haplotags = FALSE) { assert_that(is.data.table(probs), "sample" %in% colnames(probs), @@ -25,6 +25,12 @@ makeSVCallSimple <- function(probs, llr_thr = 1, use.pop.priors = FALSE) { assert_that("nb_hap_pp" %in% colnames(probs)) %>% invisible setkey(probs, chrom, start, end, sample, cell) + if (use.haplotags) { + assert_that("haplotag.prob" %in% colnames(probs)) %>% invisible + probs[!is.na(haplotag.prob), nb_hap_ll := nb_hap_ll*haplotag.prob ] + probs[!is.na(haplotag.prob), nb_hap_pp := nb_hap_pp*haplotag.prob ] + } + if (use.pop.priors) { message('Applying population priors') probs[, pop.prior := sum(nb_hap_ll) , by = .(chrom, start, end, sample, haplotype)] @@ -38,7 +44,7 @@ makeSVCallSimple <- function(probs, llr_thr = 1, use.pop.priors = FALSE) { probs[, ref_hom_pp := .SD[haplo_name == "ref_hom", nb_hap_pp], by = .(chrom, start, end, sample, cell)] - + # order the different haplotype states based on their posterior prob. (nb_hap_pp) # and keep only the two most likely states probs[, diff --git a/utils/mosaiClassifier_call.snakemake.R b/utils/mosaiClassifier_call.snakemake.R index 41f048cc086fbcbe2650a5d45228538d677ff7d5..b373ddd8118e58f11c2c9b97ec04744a9f97960d 100644 --- a/utils/mosaiClassifier_call.snakemake.R +++ b/utils/mosaiClassifier_call.snakemake.R @@ -8,9 +8,10 @@ source("utils/mosaiClassifier/makeSVcalls.R") probs = readRDS(snakemake@input[["probs"]]) llr = as.numeric(snakemake@wildcards[["llr"]]) use.pop.priors = eval(parse(text=snakemake@wildcards[["pop_priors"]])) +use.haplotags = eval(parse(text=snakemake@wildcards[["use_haplotags"]])) regularizationFactor = 10^(-as.numeric(snakemake@wildcards[["regfactor"]])) probs <- mosaiClassifierPostProcessing(probs, regularizationFactor = regularizationFactor) -tab <- makeSVCallSimple(probs, llr_thr = llr, use.pop.priors = use.pop.priors) +tab <- makeSVCallSimple(probs, llr_thr = llr, use.pop.priors = use.pop.priors, use.haplotags = use.haplotags) write.table(tab, file = snakemake@output[[1]], sep = "\t", quote=F, row.names = F, col.names = T)