Skip to content
Snippets Groups Projects
Commit 70ddcf13 authored by maryamghr's avatar maryamghr
Browse files

added scripts and rules for computing haplotagged read counts probabilities

parent 8df6b0cb
No related branches found
No related tags found
No related merge requests found
......@@ -711,6 +711,16 @@ rule create_haplotag_table:
script:
"utils/haplotagTable.snakemake.R"
rule create_haplotag_likelihoods:
input:
haplotag_table='haplotag/table/{sample}/haplotag-counts.{windows}.{bpdens}.tsv'
sv_probs_table = 'sv_probabilities/{sample}/{windows}.{bpdens}/probabilities.Rdata'
output: 'haplotag/table/{sample}/haplotag-likelihoods.{windows}.{bpdens}.data'
log:
"log/create_haplotag_likelihoods/{sample}.log"
script:
"utils/haplotagProbs.snakemake.R"
################################################################################
# Call SNVs #
......
suppressMessages(library(mc2d))
suppressMessages(library(dplyr))
suppressMessages(library(data.table))
suppressMessages(library(assertthat))
source("utils/mosaiClassifier/getDispParAndSegType.R")
# defining multinomial parameters based on haplosegType and alpha
getMultinomialParams <- function(haploSegType, alpha)
{
CNs <- NULL
if (haploSegType=="?"){
CNs <- rep(-1, 4)
}
else{
CNs <- as.numeric(unlist(strsplit(haploSegType, "")))
CNs[CNs==0]=alpha
CNs <- CNs/sum(CNs)
}
return(list(multi.p.W.h1=CNs[1],
multi.p.C.h1=CNs[2],
multi.p.W.h2=CNs[3],
multi.p.C.h2=CNs[4]))
}
# adding an extra column for multinomial probabilities
addHaploCountProbs <- function(probs, haploCounts, alpha)
{
assert_that(is.data.table(probs),
"sample" %in% colnames(probs),
"cell" %in% colnames(probs),
"chrom" %in% colnames(probs),
"start" %in% colnames(probs),
"end" %in% colnames(probs),
"nb_p" %in% colnames(probs),
"expected" %in% colnames(probs),
"W" %in% colnames(probs),
"C" %in% colnames(probs),
"class" %in% colnames(probs),
"haplotype" %in% colnames(probs)) %>% invisible
# sorting probs
setkey(probs, sample, chrom, start, end, cell, haplotype)
# adding segTypes and haplo segTypes
probs[, `:=`(segtype=paste0(getSegType(class[1], haplotype[1]), collapse = ''),
haploSegType=ifelse(class=="?", "?", paste0(c(getSegType(substr(class[1],1,1), substr(haplotype[1],1,2)),
getSegType(substr(class[1],2,2), substr(haplotype[1],3,4))), collapse = ''))),
by = .(class, haplotype)]
# merging probs and haplotype counts
probs <- merge(probs,
haploCounts[,.(chrom, start, end, cell, watson.H1, crick.H1, watson.H2, crick.H2)],
by=c("chrom", "start", "end", "cell"),
allow.cartesian = T,
all.x = T)
# define multinomial probabilities for each of the four different outcomes based on haplo segtype
probs[, c("multi.p.W.h1", "multi.p.C.h1", "multi.p.W.h2", "multi.p.C.h2"):=getMultinomialParams(haploSegType[1], alpha),
by=haploSegType]
# add a column for multinomial probabilities of haplotagged read counts
probs.new <- probs[!is.na(watson.H1) & !is.na(multi.p.W.h1)]
probs.new[, multi.prob:=dmultinomial(x=as.matrix(probs.new[,.(watson.H1, crick.H1, watson.H2, crick.H2)]),
prob=as.matrix(probs.new[,.(multi.p.W.h1, multi.p.C.h1, multi.p.W.h2, multi.p.C.h2)]))]
# merge probs.new and probs
probs <- merge(probs,
probs.new[, .(chrom, start, end, cell, sample, haplotype, multi.prob)],
by=c("chrom", "start", "end", "cell", "sample", "haplotype"),
all.x = T,
allow.cartesian = T)
return(probs)
}
\ No newline at end of file
sink(snakemake@log[[1]])
haplotagCounts <- snakemake@input[[haplotag_table]]
probs <- snakemake@input[[sv_probs_table]]
probs <- addHaploCountProbs(probs, haploCounts, alpha)
save(probs, file=snakemake@output[[1]])
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment