From 044fd3d7a4620dee274d0bb0c2ba110f5d5eabc4 Mon Sep 17 00:00:00 2001
From: daewoooo <daewoooo@gmail.com>
Date: Tue, 14 Nov 2017 10:11:47 +0100
Subject: [PATCH] Added StrandPhaseR pipeline

---
 Snakefile_StrandPhaseR  | 74 +++++++++++++++++++++++++++++++++++++++++
 StrandPhaseR.config     | 19 +++++++++++
 StrandPhaseR_pipeline.R | 11 ++++++
 install_StrandPhaseR.R  |  4 +++
 4 files changed, 108 insertions(+)
 create mode 100644 Snakefile_StrandPhaseR
 create mode 100644 StrandPhaseR.config
 create mode 100644 StrandPhaseR_pipeline.R
 create mode 100644 install_StrandPhaseR.R

diff --git a/Snakefile_StrandPhaseR b/Snakefile_StrandPhaseR
new file mode 100644
index 0000000..ba9f2bc
--- /dev/null
+++ b/Snakefile_StrandPhaseR
@@ -0,0 +1,74 @@
+### StrandPhaseR pipeline ###
+ 
+BAM, = glob_wildcards("bam/{bam}.bam") #already part of of main pipeline 
+ref = '/local/data/david/test_pipeline/hg19.fa' #this should be included in JSON file 
+
+rule all:
+    input:	
+        "StrandPhaseR_analysis/Phased/phased_haps.txt"
+
+rule indexBams:
+    input:
+        "bam/{bam}.bam"
+    output:
+        "bam/{bam}.bam.bai"
+    shell:
+        "samtools index {input}"
+
+
+rule mergeBams:
+    input:
+        expand("bam/{bam}.bam", bam=BAM)
+    output:
+        "bam/mergedBam/merged.bam"	
+    shell:
+        "samtools merge {output} {input}" 
+
+rule indexMergedBam:
+    input:
+	"bam/mergedBam/merged.bam"
+    output:
+	"bam/mergedBam/merged.bam.bai"
+    shell:
+        "samtools index {input}"		
+
+rule CallSNVs_bcftools:
+    input:
+        fa={ref},
+        #bam=expand("bam/{bam}.bam", bam=BAM), #perhaps later we can decide to skip merging and submit all BAMs for SNV calling 
+        #bai=expand("bam/{bam}.bam.bai", bam=BAM)
+	bam="bam/mergedBam/merged.bam",	
+	bai="bam/mergedBam/merged.bam.bai"	
+
+    output:
+        "bam/SNVcalls/input.vcf"
+	
+    shell:		
+        "samtools mpileup -g -f {input.fa} {input.bam} | "
+        "bcftools call -mv - > {output}"
+
+rule install_StrandPhaseR:
+    output:
+        'R-packages/StrandPhaseR/R/StrandPhaseR'
+    log:
+        'strandphaser-install.log'
+    shell:
+        'Rscript install_StrandPhaseR.R > {log} 2>&1'
+
+
+rule run_SSphasing_pipeline:
+    input: 
+        mergedbam='bam/mergedBam/merged.bam',
+        wcregions='D2Rfb.strand_WCregions.txt',
+ 	snppositions='bam/SNVcalls/input.vcf',
+	configfile="StrandPhaseR.config",
+        strandphaser='R-packages/StrandPhaseR/R/StrandPhaseR',
+        bamfolder='bam'
+    output:
+        'StrandPhaseR_analysis/Phased/phased_haps.txt' 
+    log:
+        'StrandPhaseR_analysis/Phased/phased_haps.txt.log'
+    run: 
+        cwd = os.getcwd()
+        shell('Rscript StrandPhaseR_pipeline.R {input.bamfolder} StrandPhaseR_analysis {input.configfile} {input.wcregions} {input.snppositions} {cwd}/R-packages/ > {log} 2>&1')
+
diff --git a/StrandPhaseR.config b/StrandPhaseR.config
new file mode 100644
index 0000000..356d3a5
--- /dev/null
+++ b/StrandPhaseR.config
@@ -0,0 +1,19 @@
+#============== StrandPhaseR configuration file ===============#
+
+[General]
+numCPU  =  1 
+chromosomes  =  c('chr22','chr21') 
+pairedEndReads  =  TRUE 
+min.mapq  =  10 
+
+[StrandPhaseR]
+positions  = NULL 
+WCregions  =  NULL 
+min.baseq  =  20 
+num.iterations  =  2 
+translateBases  =  TRUE 
+fillMissAllele  =  NULL 
+splitPhasedReads  =  TRUE 
+callBreaks  =  FALSE 
+exportVCF  =  'D2Rfb' 
+bsGenome  =  'BSgenome.Hsapiens.UCSC.hg19' 
diff --git a/StrandPhaseR_pipeline.R b/StrandPhaseR_pipeline.R
new file mode 100644
index 0000000..92960ff
--- /dev/null
+++ b/StrandPhaseR_pipeline.R
@@ -0,0 +1,11 @@
+#!/usr/bin/Rscript
+
+args=commandArgs(TRUE)
+
+#add user defined path to load needed libraries
+.libPaths( c( .libPaths(), args[6]) )
+
+suppressPackageStartupMessages(library(StrandPhaseR))
+suppressPackageStartupMessages(library(BSgenome.Hsapiens.UCSC.hg19))
+
+strandPhaseR(inputfolder=args[1], outputfolder=args[2], configfile = args[3],  WCregions = args[4] , positions=args[5])
diff --git a/install_StrandPhaseR.R b/install_StrandPhaseR.R
new file mode 100644
index 0000000..5d77225
--- /dev/null
+++ b/install_StrandPhaseR.R
@@ -0,0 +1,4 @@
+#!/usr/bin/env Rscript
+Sys.setenv(Renv='PWD')
+library(devtools)
+withr::with_libpaths(new = "R-packages", install_git("git://github.com/daewoooo/StrandPhaseR.git", branch = "master"))
-- 
GitLab