diff --git a/Snake.config.json b/Snake.config.json index d6e157c8476989d6a76dfdf508d04000dcc2b0e2..d3b4ced744e7d5a1256e97bb6328bbc6e13859c6 100644 --- a/Snake.config.json +++ b/Snake.config.json @@ -4,6 +4,7 @@ "reference" : "/g/korbel/shared/datasets/refgenomes/human/hg19_chr1_22XYM.fa", "mosaicatcher" : "/g/korbel/meiers/tools/mosaicatcher/mosaicatcher/build/mosaicatcher", "plot_script" : "/g/korbel/meiers/tools/mosaicatcher/mosaicatcher/R/qc.R", + "plot_segments" : "/g/korbel/meiers/tools/mosaicatcher/mosaicatcher/R/plot_segments.R", "sv_plot_script": "/g/korbel/meiers/tools/mosaicatcher/mosaicatcher/R/svs.R", "sce_script" : "/g/korbel/meiers/tools/mosaicatcher/strandsequtils/sce_utils/SCE.R", "samtools" : "samtools", diff --git a/Snakefile b/Snakefile index 7a54ee92fd59ea8470f0bd6590a1db7a30644087..ea25c76ac2f31494381694a28133754c48a3df82 100644 --- a/Snakefile +++ b/Snakefile @@ -29,7 +29,9 @@ rule all: expand("sv_calls/{sample}/{window}_fixed.{bpdens}.SV_probs.{chrom}.pdf", sample = SAMPLE, chrom = config['chromosomes'], window = [50000, 100000, 200000, 500000], bpdens = ["few","medium","many"]), #expand("sv_calls/{sample}/{window}_variable.{bpdens}.SV_probs.chr1.pdf", sample = SAMPLE, - # window = [50000, 100000], bpdens = ["few","medium","many"]) + # window = [50000, 100000], bpdens = ["few","medium","many"]), + expand("segmentation/{sample}/{window}_fixed/{chrom}.pdf", sample = SAMPLE, + window = [50000, 100000], chrom = config['chromosomes'][0]) # Specifically run this only for one chrom because it is super slow @@ -276,6 +278,23 @@ rule segmentation: {input} > {log} 2>&1 """ +rule plot_segmentation: + input: + counts = "counts/{sample}/{file_name}.txt.gz", + segments = "segmentation/{sample}/{file_name}.txt" + output: + "segmentation/{sample}/{file_name}/{chrom}.pdf" + log: + "log/{sample}/plot_segmentation.{file_name}.{chrom}.txt" + params: + command = config["plot_segments"] + shell: + """ + Rscript {params.command} {input.counts} {input.segments} {wildcards.chrom} {output} > {log} 2>&1 + """ + + + # Pick a few segmentations and prepare the input files for SV classification rule prepare_segments: input: @@ -313,7 +332,8 @@ rule run_sv_classification: bp = "segmentation2/{sample}/{windows}.{bpdens}.txt" output: outdir = "sv_probabilities/{sample}/{windows}.{bpdens}/", - out1 = "sv_probabilities/{sample}/{windows}.{bpdens}/allSegCellProbs.table" + out1 = "sv_probabilities/{sample}/{windows}.{bpdens}/allSegCellProbs.table", + bamNames = "sv_probabilities/{sample}/{windows}.{bpdens}/bamNames.txt" log: "log/{sample}/run_sv_classification.{windows}.{bpdens}.txt" params: @@ -336,8 +356,9 @@ rule run_sv_classification: rule convert_SVprob_output: input: - probs = "sv_probabilities/{sample}/{windows}.{bpdens}/allSegCellProbs.table", - info = "counts/{sample}/{windows}.info" + probs = "sv_probabilities/{sample}/{windows}.{bpdens}/allSegCellProbs.table", + info = "counts/{sample}/{windows}.info", + bamNames = "sv_probabilities/{sample}/{windows}.{bpdens}/bamNames.txt" output: "sv_probabilities/{sample}/{windows}.{bpdens}/probabilities.txt" params: diff --git a/utils/helper.convert_svprob_output.R b/utils/helper.convert_svprob_output.R index adf40284ab1fb2bf1ba196a90310449860f7cf67..c4a7d16e67afeafdb107f5b52dca55fa15c0acb7 100644 --- a/utils/helper.convert_svprob_output.R +++ b/utils/helper.convert_svprob_output.R @@ -8,15 +8,26 @@ f_info = snakemake@input[["info"]] f_info sample = snakemake@params[["sample_name"]] sample +f_bamNames = snakemake@input[["bamNames"]] +f_bamNames d = fread(f_maryam, header = T) +print("Maryam's data:") d f = fread(f_info) +print("Info data:") f +b = fread(f_bamNames) +b = b[order(cell_id),] +print("bamNames data:") +b # Map cell number to cell name assert_that(max(d$cells) <= nrow(f)) -d$cell = f$cell[d$cells] +assert_that(max(d$cells) <= max(b$cell_id)) +assert_that(all(1:nrow(b) == b$cell_id)) # Make sure that bamNames are sorted and exactly match numbers 1...n + +d$cell = b[d$cells,]$cell_name d$sample = sample d