Skip to content
Snippets Groups Projects
Commit 15bce258 authored by Sascha Meiers's avatar Sascha Meiers
Browse files

Merge branch 'mosaiClassifier' of...

Merge branch 'mosaiClassifier' of https://github.com/friendsofstrandseq/pipeline into mosaiClassifier
parents ba49dbac 5a4cce4a
No related branches found
No related tags found
No related merge requests found
......@@ -7,10 +7,11 @@
get_hap_name <- function(hap.code)
{
hap.codes <- c("1010", "0010", "1000", "0000", "0110", "1001", "0101", "2010", "1020", "2020")
hap.codes <- c("1010", "0010", "1000", "0000", "0110", "1001", "0101", "2010", "1020", "2020", "1110", "1011")
hap.names <- c("ref_hom", "del_h1", "del_h2", "del_hom", # ref and del
"inv_h1", "inv_h2", "inv_hom", # inv
"dup_h1", "dup_h2", "dup_hom") # dup
"dup_h1", "dup_h2", "dup_hom", # dup
"idup_h1", "idup_h2") # inv-dup
hap.idx <- match(hap.code, hap.codes)
......
......@@ -191,7 +191,7 @@ mosaiClassifierCalcProbs <- function(probs, maximumCN=4, haplotypeMode=F, alpha=
###########
# reshuffling the columns
probs <- probs[,.(sample, cell, chrom, start, end, start, end, class, nb_p, expected,
probs <- probs[,.(sample, cell, chrom, start, end, class, nb_p, expected,
W, C, scalar, haplotype, Wcn, Ccn, haplo_name, geno_name)]
# computing dispersion parameters seperately for each segment and W and C counts ("Wcn" and "Ccn")
......@@ -246,6 +246,10 @@ mosaiClassifierPostProcessing <- function(probs, haplotypeMode=F, regularization
message(paste("the number of segments with 0 prob for all haplotypes = ",
segs_max_hap_nb_probs[max_nb_hap_ll==0, .N]))
# set a uniform prob on sce segs and the segs_max_hap_nb_probs=0
probs[segs_max_hap_nb_probs$max_nb_hap_ll==0,
`:=`(nb_hap_ll = 1.0, nb_geno_ll = 1.0)]
# add prior probs to the table
probs[,prior:=100L]
probs[haplo_name=="ref_hom",prior:=200L]
......@@ -254,12 +258,6 @@ mosaiClassifierPostProcessing <- function(probs, haplotypeMode=F, regularization
# compute the posteriori probs (add new columns)
probs[,nb_hap_pp:=.(nb_hap_ll*prior)][,nb_gt_pp:=.(nb_gt_ll*prior)]
# set a uniform prob on sce segs and the segs_max_hap_nb_probs=0
probs[segs_max_hap_nb_probs$max_nb_hap_ll==0,nb_hap_pp:=1L]
probs[class=="?", nb_hap_pp:=1L]
probs[segs_max_hap_nb_probs$max_nb_hap_ll==0,nb_gt_pp:=1L]
probs[class=="?", nb_gt_pp:=1L]
# normalizing nb_hap_pp and nb_gt_pp to 1 per sample, cell, and segment
probs[, nb_hap_pp := nb_hap_pp/sum(nb_hap_pp), by=.(sample, cell, chrom, start, end)]
probs[, nb_gt_pp := nb_gt_pp/sum(nb_gt_pp), by=.(sample, cell, chrom, start, end)]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment