diff --git a/Snake.config.json b/Snake.config.json index 3c93ebd46045f416ef346825b8e60f424cc4d801..509c5d20bc2137124ad5c8b034892a6489ca5e9c 100644 --- a/Snake.config.json +++ b/Snake.config.json @@ -4,6 +4,7 @@ "mosaicatcher" : "/g/korbel/meiers/tools/mosaicatcher/mosaicatcher/build/mosaicatcher", "plot_script" : "/g/korbel/meiers/tools/mosaicatcher/mosaicatcher/R/qc.R", + "sv_plot_script": "/g/korbel/meiers/tools/mosaicatcher/mosaicatcher/R/svs.R", "sce_script" : "/g/korbel/meiers/tools/mosaicatcher/strandsequtils/sce_utils/SCE.R", "samtools" : "module load SAMtools && samtools", "class_script" : "finalCall_cmd.R", @@ -16,7 +17,6 @@ "100000" : "utils/variable_bins.GRCh38.100kb.bed" }, - "bp_density" : { "few" : 0.1, "medium" : 0.33, diff --git a/Snakefile b/Snakefile index b755fc629346d4af574fd43cb790dfd391d60539..2e03dbf21d12bdf6a7faf75779124701cc79b335 100644 --- a/Snakefile +++ b/Snakefile @@ -16,9 +16,9 @@ rule all: expand("segmentation2/" + config["sample"] + ".{window}_fixed.{bpdens}.txt", window = [50000, 100000, 200000, 500000], bpdens = ["few","many"]), expand("segmentation2/" + config["sample"] + ".{window}_variable.{bpdens}.txt", window = [50000, 100000], bpdens = ["few","many"]), "strand_states/" + config["sample"] + ".final.txt", - expand("sv_probabilities/" + config["sample"] + ".{window}_fixed.{bpdens}/allSegCellProbs.table", + expand("sv_calls/" + config["sample"] + ".{window}_fixed.{bpdens}.SV_probs.pdf", window = [50000, 100000, 200000, 500000], bpdens = ["few","many"]), - expand("sv_probabilities/" + config["sample"] + ".{window}_variable.{bpdens}/allSegCellProbs.table", + expand("sv_calls/" + config["sample"] + ".{window}_variable.{bpdens}.SV_probs.pdf", window = [50000, 100000], bpdens = ["few","many"]) @@ -40,7 +40,19 @@ rule plot_mosaic_counts: {params.plot_command} {input.counts} {input.info} {output} """ - +rule plot_SV_calls: + input: + counts = "counts/" + config["sample"] + ".{windows}.txt.gz", + probs = "sv_probabilities/" + config["sample"] + ".{windows}.{bpdens}/probabilities.txt" + output: + "sv_calls/" + config["sample"] + ".{windows}.{bpdens}.SV_probs.pdf" + params: + plot_command = "Rscript " + config["sv_plot_script"] + shell: + """ + {params.plot_command} {input.counts} {input.probs} {output} + touch {output} + """ ################################################################################ @@ -137,10 +149,11 @@ rule run_sv_classification: out1 = "sv_probabilities/" + config["sample"] + ".{windows}.{bpdens}/allSegCellProbs.table" params: class_dir = config["class_dir"], - class_command = "Rscript " + config["class_dir"] + "/" + config["class_script"] + class_command = "Rscript " + config["class_dir"] + "/" + config["class_script"], + windowsize = lambda wc: wc.windows.split("_")[0] shell: """ - set -x + set -x # set haplotypeInfo if phasing info is available {params.class_command} \ Rdirectory={params.class_dir} \ @@ -150,10 +163,19 @@ rule run_sv_classification: stateFile={input.states} \ K=22 \ maximumCN=4 \ + bin.size={params.windowsize} \ haplotypeInfo \ outputDir={output.outdir} """ +rule convert_SVprob_output: + input: + probs = "sv_probabilities/" + config["sample"] + ".{windows}.{bpdens}/allSegCellProbs.table", + info = "counts/" + config["sample"] + ".{windows}.info" + output: + "sv_probabilities/" + config["sample"] + ".{windows}.{bpdens}/probabilities.txt" + script: + "utils/helper.convert_svprob_output.R" ################################################################################ diff --git a/utils/helper.convert_svprob_output.R b/utils/helper.convert_svprob_output.R new file mode 100644 index 0000000000000000000000000000000000000000..9ca10f2387f4053e578dbeae7c3a653f45ab4643 --- /dev/null +++ b/utils/helper.convert_svprob_output.R @@ -0,0 +1,22 @@ +library(data.table) +library(assertthat) + +f_maryam = snakemake@input[["probs"]] +f_info = snakemake@input[["info"]] +sample = snakemake@params[["sample_name"]] + +d = fread(f_maryam, header = T) +f = fread(f_info) + +# Map cell number to cell name +assert_that(max(d$cells) <= nrow(f)) +d$cell = f$cell[d$cells] +d$sample = sample + +d <- d[, .(chrom = chr, start, end, sample, cell, type = types, w = Wcount, c = Ccount, + p_cn0 = CN0, p_cn1 = CN1, p_cn2 = CN2, p_cn3 = CN3, p_cn4 = CN4, + p_ref = `1010`, p_del_hom = `0000`, p_del_h1 = `0010`, p_del_h2 = `1000`, + p_inv_hom = `0101`, p_inv_h1 = `0110`, p_inv_h2 = `1001`, + p_dup_hom = `2020`, p_dup_h1 = `2010`, p_dup_h2 = `1020`, + p_idup_h1 = `1110`, p_idup_h2 = `1011`)] +write.table(d, file = snakemake@output[[1]], quote=F, col.names = T, row.names = F, sep = "\")