From c6ab4003903f0fa67bde49e796246dbd1efa26c8 Mon Sep 17 00:00:00 2001 From: Sascha Meiers <meiers@embl.de> Date: Mon, 4 Jun 2018 15:10:32 +0200 Subject: [PATCH] Include log-likelihood-ratio filter and normalization --- Snake.config.json | 7 +++---- Snakefile | 12 ++++++------ utils/mosaiClassifier.snakemake.R | 3 ++- utils/mosaiClassifier/mosaiClassifier.R | 1 + utils/mosaiClassifier_call.snakemake.R | 4 ++-- utils/mosaiClassifier_call_biallelic.snakemake.R | 4 ++-- 6 files changed, 16 insertions(+), 15 deletions(-) diff --git a/Snake.config.json b/Snake.config.json index 6c41edd..73afc04 100644 --- a/Snake.config.json +++ b/Snake.config.json @@ -19,10 +19,9 @@ "R_reference" : "BSgenome.Hsapiens.UCSC.hg38", "bp_density" : { - "few" : 0.1, - "medium" : 0.25, - "more" : 0.33, - "many" : 0.5 + "few" : 0.2, + "medium" : 0.4, + "many" : 0.6 }, "paired_end" : true, diff --git a/Snakefile b/Snakefile index ef51fa3..805bf3c 100644 --- a/Snakefile +++ b/Snakefile @@ -18,7 +18,7 @@ import os.path # * calculate a segmentation into potential SVs using Mosaicatcher -METHODS = ["simpleCalls", "biAllelic"] +METHODS = ["simpleCalls_llr1", "simpleCalls_llr4", "biAllelic_llr1", "biAllelic_llr4"] rule all: input: @@ -27,7 +27,7 @@ rule all: sample = SAMPLE, chrom = config["chromosomes"], window = [50000, 100000], - bpdens = ["few","medium","more","many"], + bpdens = ["few","medium","many"], method = METHODS) @@ -349,9 +349,9 @@ rule mosaiClassifier_make_call: input: probs = "sv_probabilities/{sample}/{windows}.{bpdens}/probabilities.Rdata" output: - "sv_calls/{sample}/{windows}.{bpdens}/simpleCalls.txt" + "sv_calls/{sample}/{windows}.{bpdens}/simpleCalls_llr{llr}.txt" log: - "log/{sample}/mosaiClassifier_make_call.{windows}.{bpdens}.txt" + "log/{sample}/mosaiClassifier_make_call.{windows}.{bpdens}.{llr}.txt" script: "utils/mosaiClassifier_call.snakemake.R" @@ -379,9 +379,9 @@ rule mosaiClassifier_make_call_biallelic: input: probs = "sv_probabilities/{sample}/{windows}.{bpdens}/probabilities.Rdata" output: - "sv_calls/{sample}/{windows}.{bpdens}/biAllelic.txt" + "sv_calls/{sample}/{windows}.{bpdens}/biAllelic_llr{llr}.txt" log: - "log/{sample}/mosaiClassifier_make_call_biallelic.{windows}.{bpdens}.txt" + "log/{sample}/mosaiClassifier_make_call_biallelic.{windows}.{bpdens}.{llr}.txt" script: "utils/mosaiClassifier_call_biallelic.snakemake.R" diff --git a/utils/mosaiClassifier.snakemake.R b/utils/mosaiClassifier.snakemake.R index 8471cbb..60a716c 100644 --- a/utils/mosaiClassifier.snakemake.R +++ b/utils/mosaiClassifier.snakemake.R @@ -12,7 +12,8 @@ segs = fread(snakemake@input[["bp"]]) # is there a normalization file given? -if ("norm" %in% snakemake@input) { +if ("norm" %in% names(snakemake@input)) { + message("[MosaiClassifier] Read normalization from ", snakemake@input[["norm"]]) normalization = fread(snakemake@input[["norm"]]) } else { normalization = NULL diff --git a/utils/mosaiClassifier/mosaiClassifier.R b/utils/mosaiClassifier/mosaiClassifier.R index d03ef9f..547cb83 100644 --- a/utils/mosaiClassifier/mosaiClassifier.R +++ b/utils/mosaiClassifier/mosaiClassifier.R @@ -127,6 +127,7 @@ mosaiClassifierPrepare <- function(counts, info, strand, segs, normVector = NULL # Add normalization factors to the expected counts ("scalar") if (!is.null(normVector)) { + message("[MosaiClassifier] Normalize coverage expectation") addNormalizationScalar(probs, counts, normVector) } else { probs[, scalar := 1.0] diff --git a/utils/mosaiClassifier_call.snakemake.R b/utils/mosaiClassifier_call.snakemake.R index 19c3a35..1cbd445 100644 --- a/utils/mosaiClassifier_call.snakemake.R +++ b/utils/mosaiClassifier_call.snakemake.R @@ -3,9 +3,9 @@ library(data.table) source("utils/mosaiClassifier/makeSVcalls.R") probs = readRDS(snakemake@input[["probs"]]) -llr = as.numeric(snakemake@params[["llr"]]) +llr = as.numeric(snakemake@wildcards[["llr"]]) probs <- mosaiClassifierPostProcessing(probs) -tab <- makeSVCallSimple(probs) +tab <- makeSVCallSimple(probs, llr_thr = llr) write.table(tab, file = snakemake@output[[1]], sep = "\t", quote=F, row.names = F, col.names = T) diff --git a/utils/mosaiClassifier_call_biallelic.snakemake.R b/utils/mosaiClassifier_call_biallelic.snakemake.R index da22838..fa42cd8 100644 --- a/utils/mosaiClassifier_call_biallelic.snakemake.R +++ b/utils/mosaiClassifier_call_biallelic.snakemake.R @@ -3,10 +3,10 @@ library(data.table) source("utils/mosaiClassifier/makeSVcalls.R") probs = readRDS(snakemake@input[["probs"]]) -llr = as.numeric(snakemake@params[["llr"]]) +llr = as.numeric(snakemake@wildcards[["llr"]]) probs <- mosaiClassifierPostProcessing(probs) probs <- forceBiallelic(probs) -tab <- makeSVCallSimple(probs) +tab <- makeSVCallSimple(probs, llr_thr = llr) write.table(tab, file = snakemake@output[[1]], sep = "\t", quote=F, row.names = F, col.names = T) -- GitLab