From 70ddcf131687bbf9bd7be896e9f1768d43793033 Mon Sep 17 00:00:00 2001 From: maryamghr <maryam@lap-13-67.mmci.uni-saarland.de> Date: Tue, 17 Jul 2018 17:33:45 +0200 Subject: [PATCH] added scripts and rules for computing haplotagged read counts probabilities --- Snakefile | 10 +++++ utils/haplotagProbs.R | 77 +++++++++++++++++++++++++++++++++ utils/haplotagProbs.snakemake.R | 8 ++++ 3 files changed, 95 insertions(+) create mode 100644 utils/haplotagProbs.R create mode 100644 utils/haplotagProbs.snakemake.R diff --git a/Snakefile b/Snakefile index 9f5afef..8116d8e 100644 --- a/Snakefile +++ b/Snakefile @@ -711,6 +711,16 @@ rule create_haplotag_table: script: "utils/haplotagTable.snakemake.R" +rule create_haplotag_likelihoods: + input: + haplotag_table='haplotag/table/{sample}/haplotag-counts.{windows}.{bpdens}.tsv' + sv_probs_table = 'sv_probabilities/{sample}/{windows}.{bpdens}/probabilities.Rdata' + output: 'haplotag/table/{sample}/haplotag-likelihoods.{windows}.{bpdens}.data' + log: + "log/create_haplotag_likelihoods/{sample}.log" + script: + "utils/haplotagProbs.snakemake.R" + ################################################################################ # Call SNVs # diff --git a/utils/haplotagProbs.R b/utils/haplotagProbs.R new file mode 100644 index 0000000..09cfe4c --- /dev/null +++ b/utils/haplotagProbs.R @@ -0,0 +1,77 @@ +suppressMessages(library(mc2d)) +suppressMessages(library(dplyr)) +suppressMessages(library(data.table)) +suppressMessages(library(assertthat)) +source("utils/mosaiClassifier/getDispParAndSegType.R") + +# defining multinomial parameters based on haplosegType and alpha +getMultinomialParams <- function(haploSegType, alpha) +{ + CNs <- NULL + if (haploSegType=="?"){ + CNs <- rep(-1, 4) + } + + else{ + CNs <- as.numeric(unlist(strsplit(haploSegType, ""))) + CNs[CNs==0]=alpha + CNs <- CNs/sum(CNs) + } + + return(list(multi.p.W.h1=CNs[1], + multi.p.C.h1=CNs[2], + multi.p.W.h2=CNs[3], + multi.p.C.h2=CNs[4])) +} + +# adding an extra column for multinomial probabilities +addHaploCountProbs <- function(probs, haploCounts, alpha) +{ + assert_that(is.data.table(probs), + "sample" %in% colnames(probs), + "cell" %in% colnames(probs), + "chrom" %in% colnames(probs), + "start" %in% colnames(probs), + "end" %in% colnames(probs), + "nb_p" %in% colnames(probs), + "expected" %in% colnames(probs), + "W" %in% colnames(probs), + "C" %in% colnames(probs), + "class" %in% colnames(probs), + "haplotype" %in% colnames(probs)) %>% invisible + + # sorting probs + setkey(probs, sample, chrom, start, end, cell, haplotype) + + # adding segTypes and haplo segTypes + probs[, `:=`(segtype=paste0(getSegType(class[1], haplotype[1]), collapse = ''), + haploSegType=ifelse(class=="?", "?", paste0(c(getSegType(substr(class[1],1,1), substr(haplotype[1],1,2)), + getSegType(substr(class[1],2,2), substr(haplotype[1],3,4))), collapse = ''))), + by = .(class, haplotype)] + + # merging probs and haplotype counts + probs <- merge(probs, + haploCounts[,.(chrom, start, end, cell, watson.H1, crick.H1, watson.H2, crick.H2)], + by=c("chrom", "start", "end", "cell"), + allow.cartesian = T, + all.x = T) + + # define multinomial probabilities for each of the four different outcomes based on haplo segtype + probs[, c("multi.p.W.h1", "multi.p.C.h1", "multi.p.W.h2", "multi.p.C.h2"):=getMultinomialParams(haploSegType[1], alpha), + by=haploSegType] + + # add a column for multinomial probabilities of haplotagged read counts + probs.new <- probs[!is.na(watson.H1) & !is.na(multi.p.W.h1)] + + probs.new[, multi.prob:=dmultinomial(x=as.matrix(probs.new[,.(watson.H1, crick.H1, watson.H2, crick.H2)]), + prob=as.matrix(probs.new[,.(multi.p.W.h1, multi.p.C.h1, multi.p.W.h2, multi.p.C.h2)]))] + + # merge probs.new and probs + probs <- merge(probs, + probs.new[, .(chrom, start, end, cell, sample, haplotype, multi.prob)], + by=c("chrom", "start", "end", "cell", "sample", "haplotype"), + all.x = T, + allow.cartesian = T) + + return(probs) +} \ No newline at end of file diff --git a/utils/haplotagProbs.snakemake.R b/utils/haplotagProbs.snakemake.R new file mode 100644 index 0000000..14e14df --- /dev/null +++ b/utils/haplotagProbs.snakemake.R @@ -0,0 +1,8 @@ +sink(snakemake@log[[1]]) + +haplotagCounts <- snakemake@input[[haplotag_table]] +probs <- snakemake@input[[sv_probs_table]] + +probs <- addHaploCountProbs(probs, haploCounts, alpha) + +save(probs, file=snakemake@output[[1]]) \ No newline at end of file -- GitLab