diff --git a/Snake.config.json b/Snake.config.json index eb71bc747f9f4375fd33ecfbbbfe8e7d584826d6..04054ebadbf7333c5bad241bafa20be0801e63af 100644 --- a/Snake.config.json +++ b/Snake.config.json @@ -1,6 +1,6 @@ { "sample" : "D2Rfb", - + "chromosomes" : ["chr1","chr2","chr3","chr4","chr5","chr6","chr7","chr8","chr9","chr10","chr11","chr12","chr13","chr14","chr15","chr16","chr17","chr18","chr19","chr20","chr21","chr22"], "reference" : "/g/korbel/shared/datasets/refgenomes/human/hg19_chr1_22XYM.fa", "mosaicatcher" : "/g/korbel/meiers/tools/mosaicatcher/mosaicatcher/build/mosaicatcher", diff --git a/Snakefile b/Snakefile index da487a083d397723c2928ea8cd4d6eb4df3fb419..71603edf382b12922b492f31b2b415d6f2b35619 100644 --- a/Snakefile +++ b/Snakefile @@ -64,25 +64,53 @@ rule plot_SV_calls: # Read counting # ################################################################################ +rule generate_exclude_file_1: + output: + temp("log/exclude_file.temp") + input: + bam = expand("bam/{bam}.bam", bam = BAM[0]), + params: + samtools = config["samtools"] + shell: + """ + {params.samtools} view -H {input.bam} | awk '/^@SQ/ {{print substr($2,4)}}' > {output} + """ + +rule generate_exclude_file_2: + output: + "log/exclude_file" + input: + "log/exclude_file.temp" + params: + chroms = config["chromosomes"] + run: + with open(input[0]) as f: + with open(output[0],"w") as out: + for line in f: + if line.strip() not in params.chroms: + print(line.strip(), file = out) + + + rule mosaic_count_fixed: input: bam = expand("bam/{bam}.bam", bam = BAM), - bai = expand("bam/{bam}.bam.bai", bam = BAM) + bai = expand("bam/{bam}.bam.bai", bam = BAM), + excl = "log/exclude_file" output: counts = "counts/" + config["sample"] + ".{window}_fixed.txt.gz", info = "counts/" + config["sample"] + ".{window}_fixed.info" log: "log/mosaic_count_fixed.{window}.txt" params: - mc_command = config["mosaicatcher"], - mc_exclfile = config["exclude_file"] + mc_command = config["mosaicatcher"] shell: """ {params.mc_command} count \ --verbose \ -o {output.counts} \ -i {output.info} \ - -x {params.mc_exclfile} \ + -x {input.excl} \ -w {wildcards.window} \ {input.bam} \ > {log} 2>&1 @@ -93,7 +121,8 @@ rule mosaic_count_variable: input: bam = expand("bam/{bam}.bam", bam = BAM), bai = expand("bam/{bam}.bam.bai", bam = BAM), - bed = lambda wc: config["variable_bins"][str(wc.window)] + bed = lambda wc: config["variable_bins"][str(wc.window)], + excl = "log/exclude_file" output: counts = "counts/" + config["sample"] + ".{window}_variable.txt.gz", info = "counts/" + config["sample"] + ".{window}_variable.info" @@ -103,6 +132,7 @@ rule mosaic_count_variable: mc_command = config["mosaicatcher"] shell: """ + echo "NOTE: Exclude file not used in variable-width bins" {params.mc_command} count \ --verbose \ -o {output.counts} \