diff --git a/Snake.config.json b/Snake.config.json index 5091f0958b4ebbca54fb17759044039df0b39d6b..83dfd2430445877f3555b636ca9ca69d3f993f8f 100644 --- a/Snake.config.json +++ b/Snake.config.json @@ -2,8 +2,8 @@ "chromosomes" : ["chr1","chr2","chr3","chr4","chr5","chr6","chr7","chr8","chr9","chr10","chr11","chr12","chr13","chr14","chr15","chr16","chr17","chr18","chr19","chr20","chr21","chr22","chrX"], "reference" : "/MMCI/TM/scratch/ref/GRCh38_no_alt_analysis_set/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna", - "mosaicatcher" : "/MMCI/TM/scratch/strandseq/mosaicatcher-builds/v0.3/build/mosaic", - "plot_script" : "/MMCI/TM/scratch/strandseq/mosaicatcher-builds/v0.3/R/qc.R", + "mosaicatcher" : "/MMCI/TM/scratch/strandseq/mosaicatcher-builds/git/build/mosaic", + "plot_script" : "/MMCI/TM/scratch/strandseq/mosaicatcher-builds/git/R/qc.R", "samtools" : "samtools", "bcftools" : "bcftools", diff --git a/Snakefile b/Snakefile index 80b43c522d434184f7f223216bf895ad3dd976cb..3f83e4eeb6be8d72183e13e1c296a3a41d8992d9 100644 --- a/Snakefile +++ b/Snakefile @@ -490,7 +490,7 @@ rule segmentation: input: "counts/{sample}/{window}_{file_name}.txt.gz" output: - "segmentation/{sample}/{window,\d+}_{file_name}.txt" + "segmentation/{sample}/{window,\d+}_{file_name}.txt.fixme" log: "log/segmentation/{sample}/{window}_{file_name}.log" params: @@ -506,6 +506,16 @@ rule segmentation: {input} > {log} 2>&1 """ +# TODO: This is a workaround because latest versions of "mosaic segment" don't compute the "bps" +# TODO: column properly. Remove once fixed in the C++ code. +rule fix_segmentation: + input: + "segmentation/{sample}/{window}_{file_name}.txt.fixme" + output: + "segmentation/{sample}/{window,\d+}_{file_name}.txt" + shell: + 'awk \'BEGIN {{OFS="\\t"}} {{if ($1=="{wildcards.sample}") $12=int(($14-1)/100000); print}}\' {input} > {output}' + # Pick a few segmentations and prepare the input files for SV classification rule prepare_segments: input: @@ -523,7 +533,7 @@ rule segment_one_cell: input: "counts-per-cell/{sample}/{cell}/{window}_{file_name}.txt.gz" output: - "segmentation-per-cell/{sample}/{cell}/{window,\d+}_{file_name}.txt" + "segmentation-per-cell/{sample}/{cell}/{window,\d+}_{file_name}.txt.fixme" log: "log/segmentation-per-cell/{sample}/{cell}/{window}_{file_name}.log" params: @@ -539,6 +549,16 @@ rule segment_one_cell: {input} > {log} 2>&1 """ +# TODO: This is a workaround because latest versions of "mosaic segment" don't compute the "bps" +# TODO: column properly. Remove once fixed in the C++ code. +rule fix_segmentation_one_cell: + input: + "segmentation-per-cell/{sample}/{cell}/{window}_{file_name}.txt.fixme" + output: + "segmentation-per-cell/{sample}/{cell}/{window,\d+}_{file_name}.txt" + shell: + 'awk \'BEGIN {{OFS="\\t"}} {{if ($1=="{wildcards.sample}") $12=int(($14-1)/100000); print}}\' {input} > {output}' + rule segmentation_selection: input: counts="counts/{sample}/{window}_{file_name}.txt.gz",