From e6c009a1ab92689977be66bd0eb721739f1bbff6 Mon Sep 17 00:00:00 2001 From: Tobias Marschall <tobias.marschall@0ohm.net> Date: Sun, 7 Oct 2018 09:48:13 +0200 Subject: [PATCH] Switch from v0.3 to latest git version of C++ code, requires a workaround to fix the bps column of segmentation output. --- Snake.config.json | 4 ++-- Snakefile | 24 ++++++++++++++++++++++-- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/Snake.config.json b/Snake.config.json index 5091f09..83dfd24 100644 --- a/Snake.config.json +++ b/Snake.config.json @@ -2,8 +2,8 @@ "chromosomes" : ["chr1","chr2","chr3","chr4","chr5","chr6","chr7","chr8","chr9","chr10","chr11","chr12","chr13","chr14","chr15","chr16","chr17","chr18","chr19","chr20","chr21","chr22","chrX"], "reference" : "/MMCI/TM/scratch/ref/GRCh38_no_alt_analysis_set/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna", - "mosaicatcher" : "/MMCI/TM/scratch/strandseq/mosaicatcher-builds/v0.3/build/mosaic", - "plot_script" : "/MMCI/TM/scratch/strandseq/mosaicatcher-builds/v0.3/R/qc.R", + "mosaicatcher" : "/MMCI/TM/scratch/strandseq/mosaicatcher-builds/git/build/mosaic", + "plot_script" : "/MMCI/TM/scratch/strandseq/mosaicatcher-builds/git/R/qc.R", "samtools" : "samtools", "bcftools" : "bcftools", diff --git a/Snakefile b/Snakefile index 80b43c5..3f83e4e 100644 --- a/Snakefile +++ b/Snakefile @@ -490,7 +490,7 @@ rule segmentation: input: "counts/{sample}/{window}_{file_name}.txt.gz" output: - "segmentation/{sample}/{window,\d+}_{file_name}.txt" + "segmentation/{sample}/{window,\d+}_{file_name}.txt.fixme" log: "log/segmentation/{sample}/{window}_{file_name}.log" params: @@ -506,6 +506,16 @@ rule segmentation: {input} > {log} 2>&1 """ +# TODO: This is a workaround because latest versions of "mosaic segment" don't compute the "bps" +# TODO: column properly. Remove once fixed in the C++ code. +rule fix_segmentation: + input: + "segmentation/{sample}/{window}_{file_name}.txt.fixme" + output: + "segmentation/{sample}/{window,\d+}_{file_name}.txt" + shell: + 'awk \'BEGIN {{OFS="\\t"}} {{if ($1=="{wildcards.sample}") $12=int(($14-1)/100000); print}}\' {input} > {output}' + # Pick a few segmentations and prepare the input files for SV classification rule prepare_segments: input: @@ -523,7 +533,7 @@ rule segment_one_cell: input: "counts-per-cell/{sample}/{cell}/{window}_{file_name}.txt.gz" output: - "segmentation-per-cell/{sample}/{cell}/{window,\d+}_{file_name}.txt" + "segmentation-per-cell/{sample}/{cell}/{window,\d+}_{file_name}.txt.fixme" log: "log/segmentation-per-cell/{sample}/{cell}/{window}_{file_name}.log" params: @@ -539,6 +549,16 @@ rule segment_one_cell: {input} > {log} 2>&1 """ +# TODO: This is a workaround because latest versions of "mosaic segment" don't compute the "bps" +# TODO: column properly. Remove once fixed in the C++ code. +rule fix_segmentation_one_cell: + input: + "segmentation-per-cell/{sample}/{cell}/{window}_{file_name}.txt.fixme" + output: + "segmentation-per-cell/{sample}/{cell}/{window,\d+}_{file_name}.txt" + shell: + 'awk \'BEGIN {{OFS="\\t"}} {{if ($1=="{wildcards.sample}") $12=int(($14-1)/100000); print}}\' {input} > {output}' + rule segmentation_selection: input: counts="counts/{sample}/{window}_{file_name}.txt.gz", -- GitLab