From bcd95dd88a4f0aa7857a02fe41d50c84edc7ea37 Mon Sep 17 00:00:00 2001
From: Sascha Meiers <meiers@embl.de>
Date: Thu, 16 Nov 2017 08:14:58 +0100
Subject: [PATCH] Integrated Strandphaser into Snakemake pipeline. However,
 Strandphaser still throws an error

---
 Snake.config.json                             |   4 +-
 Snakefile                                     | 101 ++++++++++++++++--
 Snakefile_StrandPhaseR                        |  74 -------------
 install_StrandPhaseR.R                        |   4 -
 .../StrandPhaseR.config                       |   0
 .../StrandPhaseR_pipeline.R                   |   0
 utils/install_strandphaser.R                  |   4 +
 7 files changed, 99 insertions(+), 88 deletions(-)
 delete mode 100644 Snakefile_StrandPhaseR
 delete mode 100644 install_StrandPhaseR.R
 rename StrandPhaseR.config => utils/StrandPhaseR.config (100%)
 rename StrandPhaseR_pipeline.R => utils/StrandPhaseR_pipeline.R (100%)
 create mode 100644 utils/install_strandphaser.R

diff --git a/Snake.config.json b/Snake.config.json
index 509c5d2..636c429 100644
--- a/Snake.config.json
+++ b/Snake.config.json
@@ -2,11 +2,13 @@
     "sample"        : "D2Rfb",
 
 
+    "reference"     : "/g/korbel/shared/datasets/refgenomes/human/hg19_chr1_22XYM.fa",
     "mosaicatcher"  : "/g/korbel/meiers/tools/mosaicatcher/mosaicatcher/build/mosaicatcher",
     "plot_script"   : "/g/korbel/meiers/tools/mosaicatcher/mosaicatcher/R/qc.R",
     "sv_plot_script": "/g/korbel/meiers/tools/mosaicatcher/mosaicatcher/R/svs.R",
     "sce_script"    : "/g/korbel/meiers/tools/mosaicatcher/strandsequtils/sce_utils/SCE.R",
-    "samtools"      : "module load SAMtools && samtools",
+    "samtools"      : "samtools",
+    "bcftools"      : "bcftools",
     "class_script"  : "finalCall_cmd.R",
     "class_dir"     : "/g/korbel/meiers/tools/mosaicatcher/strandsequtils/maryamCode",
 
diff --git a/Snakefile b/Snakefile
index 2e03dbf..74de1a9 100644
--- a/Snakefile
+++ b/Snakefile
@@ -16,9 +16,9 @@ rule all:
         expand("segmentation2/" + config["sample"] + ".{window}_fixed.{bpdens}.txt", window = [50000, 100000, 200000, 500000], bpdens = ["few","many"]),
         expand("segmentation2/" + config["sample"] + ".{window}_variable.{bpdens}.txt", window = [50000, 100000], bpdens = ["few","many"]),
         "strand_states/" + config["sample"] + ".final.txt",
-        expand("sv_calls/" + config["sample"] + ".{window}_fixed.{bpdens}.SV_probs.pdf", 
+        expand("sv_calls/" + config["sample"] + ".{window}_fixed.{bpdens}.SV_probs.pdf",
                window = [50000, 100000, 200000, 500000], bpdens = ["few","many"]),
-        expand("sv_calls/" + config["sample"] + ".{window}_variable.{bpdens}.SV_probs.pdf", 
+        expand("sv_calls/" + config["sample"] + ".{window}_variable.{bpdens}.SV_probs.pdf",
                window = [50000, 100000], bpdens = ["few","many"])
 
 
@@ -33,7 +33,7 @@ rule plot_mosaic_counts:
         info   = "counts/" + config["sample"] + ".{file_name}.info"
     output:
         "plots/" + config["sample"] + ".{file_name}.pdf"
-    params: 
+    params:
         plot_command = "Rscript " + config["plot_script"]
     shell:
         """
@@ -194,8 +194,7 @@ rule determine_initial_strand_states:
         {params.sce_command} {input} {output}
         """
 
-
-# Strandphaser needs a different input format which contains the path names to 
+# Strandphaser needs a different input format which contains the path names to
 # the bam files. This rule extracts this information and prepares an input file.
 rule convert_strandphaser_input:
     input:
@@ -206,15 +205,41 @@ rule convert_strandphaser_input:
     script:
         "utils/helper.convert_strandphaser_input.R"
 
+rule install_StrandPhaseR:
+    output:
+        "utils/R-packages/StrandPhaseR/R/StrandPhaseR"
+    log:
+        "log/strandphaser-install.log"
+    shell:
+        """
+        Rscript  utils/install_strandphaser.R > {log} 2>&1
+        """
 
-### Dummy rule - this will be replaced by strand-phaser
 rule run_strandphaser:
-    input: 
-        "phased_haps.txt"
+    input:
+        mergedbam    = "snv_calls/merged.bam",
+        wcregions    = "strand_states/" + config["sample"] + ".strandphaser_input.txt",
+        snppositions = "snv_calls/" + config["sample"] + ".vcf",
+        configfile   = "utils/StrandPhaseR.config",
+        strandphaser = "utils/R-packages/StrandPhaseR/R/StrandPhaseR",
+        bamfolder    = "bam"
     output:
         "strand_states/" + config["sample"] + ".strandphaser_output.txt"
+    log:
+        "log/phased_haps.txt.log"
     shell:
-        "cp {input} {output}"
+        """
+        Rscript utils/StrandPhaseR_pipeline.R \
+                {input.bamfolder} \
+                log/StrandPhaseR_analysis \
+                {input.configfile} \
+                {input.wcregions} \
+                {input.snppositions} \
+                $(pwd)/utils/R-packages/ \
+                > {log} 2>&1
+        touch {output}
+        """
+
 
 rule convert_strandphaser_output:
     input:
@@ -225,3 +250,61 @@ rule convert_strandphaser_output:
         "strand_states/" + config["sample"] + ".final.txt"
     script:
         "utils/helper.convert_strandphaser_output.R"
+
+
+################################################################################
+# Call SNVs                                                                    #
+################################################################################
+
+rule mergeBams:
+    input:
+        expand("bam/{bam}.bam", bam=BAM)
+    output:
+        "snv_calls/merged.bam"
+    shell:
+        config["samtools"] + " merge {output} {input}"
+
+rule indexMergedBam:
+    input:
+        "snv_calls/merged.bam"
+    output:
+        "snv_calls/merged.bam.bai"
+    shell:
+        config["samtools"] + " index {input}"
+
+
+rule call_SNVs_bcftools_chrom:
+    input:
+        fa    = config["reference"],
+        chrom = "chroms/{chrom}",
+        bam   = "snv_calls/merged.bam",
+        bai   = "snv_calls/merged.bam.bai"
+    output:
+        "snv_calls/D2Rfb.{chrom}.vcf"
+    params:
+        samtools = config["samtools"],
+        bcftools = config["bcftools"]
+    shell:
+        """
+        {params.samtools} mpileup -r {wildcards.chrom} -g -f {input.fa} {input.bam} \
+        | {params.bcftools} call -mv - > {output}
+        """
+
+# Write one file per chromosome that should be analysed.
+rule prepare_chromosomes:
+    input:
+        "strand_states/" + config["sample"] + ".txt"
+    output:
+        dynamic("chroms/{chrom}")
+    shell:
+        """
+        tail -n+2 {input} | cut -f1 | sort | uniq | awk '{{print "chroms/" $1}}' | xargs touch
+        """
+
+rule merge_SNV_calls:
+    input:
+        dynamic("snv_calls/" + config["sample"] + ".{chrom}.vcf")
+    output:
+        expand("snv_calls/" + config["sample"] + ".vcf")
+    shell:
+        config["bcftools"] + " concat -O v -o {output} {input}"
diff --git a/Snakefile_StrandPhaseR b/Snakefile_StrandPhaseR
deleted file mode 100644
index ba9f2bc..0000000
--- a/Snakefile_StrandPhaseR
+++ /dev/null
@@ -1,74 +0,0 @@
-### StrandPhaseR pipeline ###
- 
-BAM, = glob_wildcards("bam/{bam}.bam") #already part of of main pipeline 
-ref = '/local/data/david/test_pipeline/hg19.fa' #this should be included in JSON file 
-
-rule all:
-    input:	
-        "StrandPhaseR_analysis/Phased/phased_haps.txt"
-
-rule indexBams:
-    input:
-        "bam/{bam}.bam"
-    output:
-        "bam/{bam}.bam.bai"
-    shell:
-        "samtools index {input}"
-
-
-rule mergeBams:
-    input:
-        expand("bam/{bam}.bam", bam=BAM)
-    output:
-        "bam/mergedBam/merged.bam"	
-    shell:
-        "samtools merge {output} {input}" 
-
-rule indexMergedBam:
-    input:
-	"bam/mergedBam/merged.bam"
-    output:
-	"bam/mergedBam/merged.bam.bai"
-    shell:
-        "samtools index {input}"		
-
-rule CallSNVs_bcftools:
-    input:
-        fa={ref},
-        #bam=expand("bam/{bam}.bam", bam=BAM), #perhaps later we can decide to skip merging and submit all BAMs for SNV calling 
-        #bai=expand("bam/{bam}.bam.bai", bam=BAM)
-	bam="bam/mergedBam/merged.bam",	
-	bai="bam/mergedBam/merged.bam.bai"	
-
-    output:
-        "bam/SNVcalls/input.vcf"
-	
-    shell:		
-        "samtools mpileup -g -f {input.fa} {input.bam} | "
-        "bcftools call -mv - > {output}"
-
-rule install_StrandPhaseR:
-    output:
-        'R-packages/StrandPhaseR/R/StrandPhaseR'
-    log:
-        'strandphaser-install.log'
-    shell:
-        'Rscript install_StrandPhaseR.R > {log} 2>&1'
-
-
-rule run_SSphasing_pipeline:
-    input: 
-        mergedbam='bam/mergedBam/merged.bam',
-        wcregions='D2Rfb.strand_WCregions.txt',
- 	snppositions='bam/SNVcalls/input.vcf',
-	configfile="StrandPhaseR.config",
-        strandphaser='R-packages/StrandPhaseR/R/StrandPhaseR',
-        bamfolder='bam'
-    output:
-        'StrandPhaseR_analysis/Phased/phased_haps.txt' 
-    log:
-        'StrandPhaseR_analysis/Phased/phased_haps.txt.log'
-    run: 
-        cwd = os.getcwd()
-        shell('Rscript StrandPhaseR_pipeline.R {input.bamfolder} StrandPhaseR_analysis {input.configfile} {input.wcregions} {input.snppositions} {cwd}/R-packages/ > {log} 2>&1')
-
diff --git a/install_StrandPhaseR.R b/install_StrandPhaseR.R
deleted file mode 100644
index 5d77225..0000000
--- a/install_StrandPhaseR.R
+++ /dev/null
@@ -1,4 +0,0 @@
-#!/usr/bin/env Rscript
-Sys.setenv(Renv='PWD')
-library(devtools)
-withr::with_libpaths(new = "R-packages", install_git("git://github.com/daewoooo/StrandPhaseR.git", branch = "master"))
diff --git a/StrandPhaseR.config b/utils/StrandPhaseR.config
similarity index 100%
rename from StrandPhaseR.config
rename to utils/StrandPhaseR.config
diff --git a/StrandPhaseR_pipeline.R b/utils/StrandPhaseR_pipeline.R
similarity index 100%
rename from StrandPhaseR_pipeline.R
rename to utils/StrandPhaseR_pipeline.R
diff --git a/utils/install_strandphaser.R b/utils/install_strandphaser.R
new file mode 100644
index 0000000..efa39d9
--- /dev/null
+++ b/utils/install_strandphaser.R
@@ -0,0 +1,4 @@
+#!/usr/bin/env Rscript
+Sys.setenv(Renv='PWD')
+library(devtools)
+withr::with_libpaths(new = "utils/R-packages", install_git("git://github.com/daewoooo/StrandPhaseR.git", branch = "master"), "prefix")
-- 
GitLab