Skip to content
Snippets Groups Projects
Commit 38c4cadb authored by Tobias Marschall's avatar Tobias Marschall
Browse files

Add means to narrow down the number of considered genotypes (a bit less...

Add means to narrow down the number of considered genotypes (a bit less drastic than forcing biallelic); apply haplotag probabilities only after that
parent 0e5d0921
No related branches found
No related tags found
No related merge requests found
......@@ -31,15 +31,15 @@ import os.path
METHODS = [
"simpleCalls_llr1_poppriorsFALSE_haplotagsFALSE_regfactor10",
"simpleCalls_llr4_poppriorsFALSE_haplotagsFALSE_regfactor10",
"simpleCalls_llr1_poppriorsTRUE_haplotagsFALSE_regfactor6",
"simpleCalls_llr4_poppriorsTRUE_haplotagsFALSE_regfactor6",
"simpleCalls_llr1_poppriorsTRUE_haplotagsFALSE_regfactor10",
"simpleCalls_llr4_poppriorsTRUE_haplotagsFALSE_regfactor10",
"simpleCalls_llr4_poppriorsTRUE_haplotagsTRUE_regfactor6",
"biAllelic_llr1",
"biAllelic_llr4"
"simpleCalls_llr1_poppriorsFALSE_haplotagsFALSE_gtcutoff0_regfactor10",
"simpleCalls_llr4_poppriorsFALSE_haplotagsFALSE_gtcutoff0_regfactor10",
"simpleCalls_llr4_poppriorsFALSE_haplotagsFALSE_gtcutoff0_regfactor6",
"simpleCalls_llr4_poppriorsTRUE_haplotagsFALSE_gtcutoff0_regfactor6",
"simpleCalls_llr4_poppriorsTRUE_haplotagsFALSE_gtcutoff0.05_regfactor6",
"simpleCalls_llr4_poppriorsTRUE_haplotagsFALSE_gtcutoff0.2_regfactor6",
"simpleCalls_llr4_poppriorsTRUE_haplotagsTRUE_gtcutoff0_regfactor6",
"simpleCalls_llr4_poppriorsTRUE_haplotagsTRUE_gtcutoff0.05_regfactor6",
"simpleCalls_llr4_poppriorsTRUE_haplotagsTRUE_gtcutoff0.2_regfactor6",
]
......@@ -487,9 +487,9 @@ rule mosaiClassifier_make_call:
input:
probs = 'haplotag/table/{sample}/haplotag-likelihoods.{windows}.{bpdens}.data'
output:
"sv_calls/{sample}/{windows}.{bpdens}/simpleCalls_llr{llr}_poppriors{pop_priors,(TRUE|FALSE)}_haplotags{use_haplotags,(TRUE|FALSE)}_regfactor{regfactor,[0-9]+}.txt"
"sv_calls/{sample}/{windows}.{bpdens}/simpleCalls_llr{llr}_poppriors{pop_priors,(TRUE|FALSE)}_haplotags{use_haplotags,(TRUE|FALSE)}_gtcutoff{gtcutoff,[0-9\\.]+}_regfactor{regfactor,[0-9]+}.txt"
log:
"log/mosaiClassifier_make_call/{sample}/{windows}.{bpdens}.llr{llr}.poppriors{pop_priors}.haplotags{use_haplotags}.regfactor{regfactor}.log"
"log/mosaiClassifier_make_call/{sample}/{windows}.{bpdens}.llr{llr}.poppriors{pop_priors}.haplotags{use_haplotags}.gtcutoff{gtcutoff}.regfactor{regfactor}.log"
script:
"utils/mosaiClassifier_call.snakemake.R"
......
......@@ -9,7 +9,7 @@ source("utils/mosaiClassifier/mosaiClassifier.R")
#' @author Sascha Meiers
#' @export
#'
makeSVCallSimple <- function(probs, llr_thr = 1, use.pop.priors = FALSE, use.haplotags = FALSE) {
makeSVCallSimple <- function(probs, llr_thr = 1, use.pop.priors = FALSE, use.haplotags = FALSE, genotype.cutoff = 0.0) {
assert_that(is.data.table(probs),
"sample" %in% colnames(probs),
......@@ -25,12 +25,6 @@ makeSVCallSimple <- function(probs, llr_thr = 1, use.pop.priors = FALSE, use.hap
assert_that("nb_hap_pp" %in% colnames(probs)) %>% invisible
setkey(probs, chrom, start, end, sample, cell)
if (use.haplotags) {
assert_that("haplotag.prob" %in% colnames(probs)) %>% invisible
probs[!is.na(haplotag.prob), nb_hap_ll := nb_hap_ll*haplotag.prob ]
probs[!is.na(haplotag.prob), nb_hap_pp := nb_hap_pp*haplotag.prob ]
}
if (use.pop.priors) {
message('Applying population priors')
probs[, pop.prior := sum(nb_hap_ll) , by = .(chrom, start, end, sample, haplotype)]
......@@ -40,6 +34,18 @@ makeSVCallSimple <- function(probs, llr_thr = 1, use.pop.priors = FALSE, use.hap
message('Skipping population priors')
}
if (genotype.cutoff > 0.0) {
probs[, nb_hap_pp_norm := nb_hap_pp/sum(nb_hap_pp) , by = .(chrom, start, end, sample, cell)]
probs[, nb_hap_pp_norm_pop := sum(nb_hap_pp_norm) , by = .(chrom, start, end, sample, haplotype)]
probs[, nb_hap_pp_norm_pop := nb_hap_pp_norm_pop/sum(nb_hap_pp_norm_pop) , by = .(chrom, start, end, sample, cell)]
probs[, nb_hap_pp := ifelse(nb_hap_pp_norm_pop>genotype.cutoff, nb_hap_pp, 0.0)]
}
if (use.haplotags) {
assert_that("haplotag.prob" %in% colnames(probs)) %>% invisible
probs[!is.na(haplotag.prob), nb_hap_pp := nb_hap_pp*haplotag.prob ]
}
# annotate the ref_hom posterior probability per segment / cell
probs[,
ref_hom_pp := .SD[haplo_name == "ref_hom", nb_hap_pp],
......
......@@ -10,8 +10,9 @@ llr = as.numeric(snakemake@wildcards[["llr"]])
use.pop.priors = eval(parse(text=snakemake@wildcards[["pop_priors"]]))
use.haplotags = eval(parse(text=snakemake@wildcards[["use_haplotags"]]))
regularizationFactor = 10^(-as.numeric(snakemake@wildcards[["regfactor"]]))
genotype.cutoff = as.numeric(snakemake@wildcards[["gtcutoff"]])
probs <- mosaiClassifierPostProcessing(probs, regularizationFactor = regularizationFactor)
tab <- makeSVCallSimple(probs, llr_thr = llr, use.pop.priors = use.pop.priors, use.haplotags = use.haplotags)
tab <- makeSVCallSimple(probs, llr_thr = llr, use.pop.priors = use.pop.priors, use.haplotags = use.haplotags, genotype.cutoff = genotype.cutoff)
write.table(tab, file = snakemake@output[[1]], sep = "\t", quote=F, row.names = F, col.names = T)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment