From c6ab4003903f0fa67bde49e796246dbd1efa26c8 Mon Sep 17 00:00:00 2001
From: Sascha Meiers <meiers@embl.de>
Date: Mon, 4 Jun 2018 15:10:32 +0200
Subject: [PATCH] Include log-likelihood-ratio filter and normalization

---
 Snake.config.json                                |  7 +++----
 Snakefile                                        | 12 ++++++------
 utils/mosaiClassifier.snakemake.R                |  3 ++-
 utils/mosaiClassifier/mosaiClassifier.R          |  1 +
 utils/mosaiClassifier_call.snakemake.R           |  4 ++--
 utils/mosaiClassifier_call_biallelic.snakemake.R |  4 ++--
 6 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/Snake.config.json b/Snake.config.json
index 6c41edd..73afc04 100644
--- a/Snake.config.json
+++ b/Snake.config.json
@@ -19,10 +19,9 @@
     "R_reference"   : "BSgenome.Hsapiens.UCSC.hg38",
 
     "bp_density"    : {
-        "few"       : 0.1,
-        "medium"    : 0.25,
-        "more"      : 0.33,
-        "many"      : 0.5
+        "few"       : 0.2,
+        "medium"    : 0.4,
+        "many"      : 0.6
     },
 
     "paired_end"    : true,
diff --git a/Snakefile b/Snakefile
index ef51fa3..805bf3c 100644
--- a/Snakefile
+++ b/Snakefile
@@ -18,7 +18,7 @@ import os.path
 # * calculate a segmentation into potential SVs using Mosaicatcher
 
 
-METHODS = ["simpleCalls", "biAllelic"]
+METHODS = ["simpleCalls_llr1", "simpleCalls_llr4", "biAllelic_llr1", "biAllelic_llr4"]
 
 rule all:
     input:
@@ -27,7 +27,7 @@ rule all:
                sample = SAMPLE,
                chrom = config["chromosomes"],
                window = [50000, 100000],
-               bpdens = ["few","medium","more","many"],
+               bpdens = ["few","medium","many"],
                method = METHODS)
 
 
@@ -349,9 +349,9 @@ rule mosaiClassifier_make_call:
     input:
         probs = "sv_probabilities/{sample}/{windows}.{bpdens}/probabilities.Rdata"
     output:
-        "sv_calls/{sample}/{windows}.{bpdens}/simpleCalls.txt"
+        "sv_calls/{sample}/{windows}.{bpdens}/simpleCalls_llr{llr}.txt"
     log:
-        "log/{sample}/mosaiClassifier_make_call.{windows}.{bpdens}.txt"
+        "log/{sample}/mosaiClassifier_make_call.{windows}.{bpdens}.{llr}.txt"
     script:
         "utils/mosaiClassifier_call.snakemake.R"
 
@@ -379,9 +379,9 @@ rule mosaiClassifier_make_call_biallelic:
     input:
         probs = "sv_probabilities/{sample}/{windows}.{bpdens}/probabilities.Rdata"
     output:
-        "sv_calls/{sample}/{windows}.{bpdens}/biAllelic.txt"
+        "sv_calls/{sample}/{windows}.{bpdens}/biAllelic_llr{llr}.txt"
     log:
-        "log/{sample}/mosaiClassifier_make_call_biallelic.{windows}.{bpdens}.txt"
+        "log/{sample}/mosaiClassifier_make_call_biallelic.{windows}.{bpdens}.{llr}.txt"
     script:
         "utils/mosaiClassifier_call_biallelic.snakemake.R"
 
diff --git a/utils/mosaiClassifier.snakemake.R b/utils/mosaiClassifier.snakemake.R
index 8471cbb..60a716c 100644
--- a/utils/mosaiClassifier.snakemake.R
+++ b/utils/mosaiClassifier.snakemake.R
@@ -12,7 +12,8 @@ segs   = fread(snakemake@input[["bp"]])
 
 
 # is there a normalization file given?
-if ("norm" %in% snakemake@input) {
+if ("norm" %in% names(snakemake@input)) {
+  message("[MosaiClassifier] Read normalization from ", snakemake@input[["norm"]])
   normalization = fread(snakemake@input[["norm"]])
 } else {
   normalization = NULL
diff --git a/utils/mosaiClassifier/mosaiClassifier.R b/utils/mosaiClassifier/mosaiClassifier.R
index d03ef9f..547cb83 100644
--- a/utils/mosaiClassifier/mosaiClassifier.R
+++ b/utils/mosaiClassifier/mosaiClassifier.R
@@ -127,6 +127,7 @@ mosaiClassifierPrepare <- function(counts, info, strand, segs, normVector = NULL
 
   # Add normalization factors to the expected counts ("scalar")
   if (!is.null(normVector)) {
+    message("[MosaiClassifier] Normalize coverage expectation")
     addNormalizationScalar(probs, counts, normVector)
   } else {
     probs[, scalar := 1.0]
diff --git a/utils/mosaiClassifier_call.snakemake.R b/utils/mosaiClassifier_call.snakemake.R
index 19c3a35..1cbd445 100644
--- a/utils/mosaiClassifier_call.snakemake.R
+++ b/utils/mosaiClassifier_call.snakemake.R
@@ -3,9 +3,9 @@ library(data.table)
 source("utils/mosaiClassifier/makeSVcalls.R")
 
 probs = readRDS(snakemake@input[["probs"]])
-llr   = as.numeric(snakemake@params[["llr"]])
+llr   = as.numeric(snakemake@wildcards[["llr"]])
 
 probs <- mosaiClassifierPostProcessing(probs)
-tab <- makeSVCallSimple(probs)
+tab <- makeSVCallSimple(probs, llr_thr = llr)
 
 write.table(tab, file = snakemake@output[[1]], sep = "\t", quote=F, row.names = F, col.names = T)
diff --git a/utils/mosaiClassifier_call_biallelic.snakemake.R b/utils/mosaiClassifier_call_biallelic.snakemake.R
index da22838..fa42cd8 100644
--- a/utils/mosaiClassifier_call_biallelic.snakemake.R
+++ b/utils/mosaiClassifier_call_biallelic.snakemake.R
@@ -3,10 +3,10 @@ library(data.table)
 source("utils/mosaiClassifier/makeSVcalls.R")
 
 probs = readRDS(snakemake@input[["probs"]])
-llr   = as.numeric(snakemake@params[["llr"]])
+llr   = as.numeric(snakemake@wildcards[["llr"]])
 
 probs <- mosaiClassifierPostProcessing(probs)
 probs <- forceBiallelic(probs)
-tab <- makeSVCallSimple(probs)
+tab <- makeSVCallSimple(probs, llr_thr = llr)
 
 write.table(tab, file = snakemake@output[[1]], sep = "\t", quote=F, row.names = F, col.names = T)
-- 
GitLab