From bcd95dd88a4f0aa7857a02fe41d50c84edc7ea37 Mon Sep 17 00:00:00 2001 From: Sascha Meiers <meiers@embl.de> Date: Thu, 16 Nov 2017 08:14:58 +0100 Subject: [PATCH] Integrated Strandphaser into Snakemake pipeline. However, Strandphaser still throws an error --- Snake.config.json | 4 +- Snakefile | 101 ++++++++++++++++-- Snakefile_StrandPhaseR | 74 ------------- install_StrandPhaseR.R | 4 - .../StrandPhaseR.config | 0 .../StrandPhaseR_pipeline.R | 0 utils/install_strandphaser.R | 4 + 7 files changed, 99 insertions(+), 88 deletions(-) delete mode 100644 Snakefile_StrandPhaseR delete mode 100644 install_StrandPhaseR.R rename StrandPhaseR.config => utils/StrandPhaseR.config (100%) rename StrandPhaseR_pipeline.R => utils/StrandPhaseR_pipeline.R (100%) create mode 100644 utils/install_strandphaser.R diff --git a/Snake.config.json b/Snake.config.json index 509c5d2..636c429 100644 --- a/Snake.config.json +++ b/Snake.config.json @@ -2,11 +2,13 @@ "sample" : "D2Rfb", + "reference" : "/g/korbel/shared/datasets/refgenomes/human/hg19_chr1_22XYM.fa", "mosaicatcher" : "/g/korbel/meiers/tools/mosaicatcher/mosaicatcher/build/mosaicatcher", "plot_script" : "/g/korbel/meiers/tools/mosaicatcher/mosaicatcher/R/qc.R", "sv_plot_script": "/g/korbel/meiers/tools/mosaicatcher/mosaicatcher/R/svs.R", "sce_script" : "/g/korbel/meiers/tools/mosaicatcher/strandsequtils/sce_utils/SCE.R", - "samtools" : "module load SAMtools && samtools", + "samtools" : "samtools", + "bcftools" : "bcftools", "class_script" : "finalCall_cmd.R", "class_dir" : "/g/korbel/meiers/tools/mosaicatcher/strandsequtils/maryamCode", diff --git a/Snakefile b/Snakefile index 2e03dbf..74de1a9 100644 --- a/Snakefile +++ b/Snakefile @@ -16,9 +16,9 @@ rule all: expand("segmentation2/" + config["sample"] + ".{window}_fixed.{bpdens}.txt", window = [50000, 100000, 200000, 500000], bpdens = ["few","many"]), expand("segmentation2/" + config["sample"] + ".{window}_variable.{bpdens}.txt", window = [50000, 100000], bpdens = ["few","many"]), "strand_states/" + config["sample"] + ".final.txt", - expand("sv_calls/" + config["sample"] + ".{window}_fixed.{bpdens}.SV_probs.pdf", + expand("sv_calls/" + config["sample"] + ".{window}_fixed.{bpdens}.SV_probs.pdf", window = [50000, 100000, 200000, 500000], bpdens = ["few","many"]), - expand("sv_calls/" + config["sample"] + ".{window}_variable.{bpdens}.SV_probs.pdf", + expand("sv_calls/" + config["sample"] + ".{window}_variable.{bpdens}.SV_probs.pdf", window = [50000, 100000], bpdens = ["few","many"]) @@ -33,7 +33,7 @@ rule plot_mosaic_counts: info = "counts/" + config["sample"] + ".{file_name}.info" output: "plots/" + config["sample"] + ".{file_name}.pdf" - params: + params: plot_command = "Rscript " + config["plot_script"] shell: """ @@ -194,8 +194,7 @@ rule determine_initial_strand_states: {params.sce_command} {input} {output} """ - -# Strandphaser needs a different input format which contains the path names to +# Strandphaser needs a different input format which contains the path names to # the bam files. This rule extracts this information and prepares an input file. rule convert_strandphaser_input: input: @@ -206,15 +205,41 @@ rule convert_strandphaser_input: script: "utils/helper.convert_strandphaser_input.R" +rule install_StrandPhaseR: + output: + "utils/R-packages/StrandPhaseR/R/StrandPhaseR" + log: + "log/strandphaser-install.log" + shell: + """ + Rscript utils/install_strandphaser.R > {log} 2>&1 + """ -### Dummy rule - this will be replaced by strand-phaser rule run_strandphaser: - input: - "phased_haps.txt" + input: + mergedbam = "snv_calls/merged.bam", + wcregions = "strand_states/" + config["sample"] + ".strandphaser_input.txt", + snppositions = "snv_calls/" + config["sample"] + ".vcf", + configfile = "utils/StrandPhaseR.config", + strandphaser = "utils/R-packages/StrandPhaseR/R/StrandPhaseR", + bamfolder = "bam" output: "strand_states/" + config["sample"] + ".strandphaser_output.txt" + log: + "log/phased_haps.txt.log" shell: - "cp {input} {output}" + """ + Rscript utils/StrandPhaseR_pipeline.R \ + {input.bamfolder} \ + log/StrandPhaseR_analysis \ + {input.configfile} \ + {input.wcregions} \ + {input.snppositions} \ + $(pwd)/utils/R-packages/ \ + > {log} 2>&1 + touch {output} + """ + rule convert_strandphaser_output: input: @@ -225,3 +250,61 @@ rule convert_strandphaser_output: "strand_states/" + config["sample"] + ".final.txt" script: "utils/helper.convert_strandphaser_output.R" + + +################################################################################ +# Call SNVs # +################################################################################ + +rule mergeBams: + input: + expand("bam/{bam}.bam", bam=BAM) + output: + "snv_calls/merged.bam" + shell: + config["samtools"] + " merge {output} {input}" + +rule indexMergedBam: + input: + "snv_calls/merged.bam" + output: + "snv_calls/merged.bam.bai" + shell: + config["samtools"] + " index {input}" + + +rule call_SNVs_bcftools_chrom: + input: + fa = config["reference"], + chrom = "chroms/{chrom}", + bam = "snv_calls/merged.bam", + bai = "snv_calls/merged.bam.bai" + output: + "snv_calls/D2Rfb.{chrom}.vcf" + params: + samtools = config["samtools"], + bcftools = config["bcftools"] + shell: + """ + {params.samtools} mpileup -r {wildcards.chrom} -g -f {input.fa} {input.bam} \ + | {params.bcftools} call -mv - > {output} + """ + +# Write one file per chromosome that should be analysed. +rule prepare_chromosomes: + input: + "strand_states/" + config["sample"] + ".txt" + output: + dynamic("chroms/{chrom}") + shell: + """ + tail -n+2 {input} | cut -f1 | sort | uniq | awk '{{print "chroms/" $1}}' | xargs touch + """ + +rule merge_SNV_calls: + input: + dynamic("snv_calls/" + config["sample"] + ".{chrom}.vcf") + output: + expand("snv_calls/" + config["sample"] + ".vcf") + shell: + config["bcftools"] + " concat -O v -o {output} {input}" diff --git a/Snakefile_StrandPhaseR b/Snakefile_StrandPhaseR deleted file mode 100644 index ba9f2bc..0000000 --- a/Snakefile_StrandPhaseR +++ /dev/null @@ -1,74 +0,0 @@ -### StrandPhaseR pipeline ### - -BAM, = glob_wildcards("bam/{bam}.bam") #already part of of main pipeline -ref = '/local/data/david/test_pipeline/hg19.fa' #this should be included in JSON file - -rule all: - input: - "StrandPhaseR_analysis/Phased/phased_haps.txt" - -rule indexBams: - input: - "bam/{bam}.bam" - output: - "bam/{bam}.bam.bai" - shell: - "samtools index {input}" - - -rule mergeBams: - input: - expand("bam/{bam}.bam", bam=BAM) - output: - "bam/mergedBam/merged.bam" - shell: - "samtools merge {output} {input}" - -rule indexMergedBam: - input: - "bam/mergedBam/merged.bam" - output: - "bam/mergedBam/merged.bam.bai" - shell: - "samtools index {input}" - -rule CallSNVs_bcftools: - input: - fa={ref}, - #bam=expand("bam/{bam}.bam", bam=BAM), #perhaps later we can decide to skip merging and submit all BAMs for SNV calling - #bai=expand("bam/{bam}.bam.bai", bam=BAM) - bam="bam/mergedBam/merged.bam", - bai="bam/mergedBam/merged.bam.bai" - - output: - "bam/SNVcalls/input.vcf" - - shell: - "samtools mpileup -g -f {input.fa} {input.bam} | " - "bcftools call -mv - > {output}" - -rule install_StrandPhaseR: - output: - 'R-packages/StrandPhaseR/R/StrandPhaseR' - log: - 'strandphaser-install.log' - shell: - 'Rscript install_StrandPhaseR.R > {log} 2>&1' - - -rule run_SSphasing_pipeline: - input: - mergedbam='bam/mergedBam/merged.bam', - wcregions='D2Rfb.strand_WCregions.txt', - snppositions='bam/SNVcalls/input.vcf', - configfile="StrandPhaseR.config", - strandphaser='R-packages/StrandPhaseR/R/StrandPhaseR', - bamfolder='bam' - output: - 'StrandPhaseR_analysis/Phased/phased_haps.txt' - log: - 'StrandPhaseR_analysis/Phased/phased_haps.txt.log' - run: - cwd = os.getcwd() - shell('Rscript StrandPhaseR_pipeline.R {input.bamfolder} StrandPhaseR_analysis {input.configfile} {input.wcregions} {input.snppositions} {cwd}/R-packages/ > {log} 2>&1') - diff --git a/install_StrandPhaseR.R b/install_StrandPhaseR.R deleted file mode 100644 index 5d77225..0000000 --- a/install_StrandPhaseR.R +++ /dev/null @@ -1,4 +0,0 @@ -#!/usr/bin/env Rscript -Sys.setenv(Renv='PWD') -library(devtools) -withr::with_libpaths(new = "R-packages", install_git("git://github.com/daewoooo/StrandPhaseR.git", branch = "master")) diff --git a/StrandPhaseR.config b/utils/StrandPhaseR.config similarity index 100% rename from StrandPhaseR.config rename to utils/StrandPhaseR.config diff --git a/StrandPhaseR_pipeline.R b/utils/StrandPhaseR_pipeline.R similarity index 100% rename from StrandPhaseR_pipeline.R rename to utils/StrandPhaseR_pipeline.R diff --git a/utils/install_strandphaser.R b/utils/install_strandphaser.R new file mode 100644 index 0000000..efa39d9 --- /dev/null +++ b/utils/install_strandphaser.R @@ -0,0 +1,4 @@ +#!/usr/bin/env Rscript +Sys.setenv(Renv='PWD') +library(devtools) +withr::with_libpaths(new = "utils/R-packages", install_git("git://github.com/daewoooo/StrandPhaseR.git", branch = "master"), "prefix") -- GitLab