From 044fd3d7a4620dee274d0bb0c2ba110f5d5eabc4 Mon Sep 17 00:00:00 2001 From: daewoooo <daewoooo@gmail.com> Date: Tue, 14 Nov 2017 10:11:47 +0100 Subject: [PATCH] Added StrandPhaseR pipeline --- Snakefile_StrandPhaseR | 74 +++++++++++++++++++++++++++++++++++++++++ StrandPhaseR.config | 19 +++++++++++ StrandPhaseR_pipeline.R | 11 ++++++ install_StrandPhaseR.R | 4 +++ 4 files changed, 108 insertions(+) create mode 100644 Snakefile_StrandPhaseR create mode 100644 StrandPhaseR.config create mode 100644 StrandPhaseR_pipeline.R create mode 100644 install_StrandPhaseR.R diff --git a/Snakefile_StrandPhaseR b/Snakefile_StrandPhaseR new file mode 100644 index 0000000..ba9f2bc --- /dev/null +++ b/Snakefile_StrandPhaseR @@ -0,0 +1,74 @@ +### StrandPhaseR pipeline ### + +BAM, = glob_wildcards("bam/{bam}.bam") #already part of of main pipeline +ref = '/local/data/david/test_pipeline/hg19.fa' #this should be included in JSON file + +rule all: + input: + "StrandPhaseR_analysis/Phased/phased_haps.txt" + +rule indexBams: + input: + "bam/{bam}.bam" + output: + "bam/{bam}.bam.bai" + shell: + "samtools index {input}" + + +rule mergeBams: + input: + expand("bam/{bam}.bam", bam=BAM) + output: + "bam/mergedBam/merged.bam" + shell: + "samtools merge {output} {input}" + +rule indexMergedBam: + input: + "bam/mergedBam/merged.bam" + output: + "bam/mergedBam/merged.bam.bai" + shell: + "samtools index {input}" + +rule CallSNVs_bcftools: + input: + fa={ref}, + #bam=expand("bam/{bam}.bam", bam=BAM), #perhaps later we can decide to skip merging and submit all BAMs for SNV calling + #bai=expand("bam/{bam}.bam.bai", bam=BAM) + bam="bam/mergedBam/merged.bam", + bai="bam/mergedBam/merged.bam.bai" + + output: + "bam/SNVcalls/input.vcf" + + shell: + "samtools mpileup -g -f {input.fa} {input.bam} | " + "bcftools call -mv - > {output}" + +rule install_StrandPhaseR: + output: + 'R-packages/StrandPhaseR/R/StrandPhaseR' + log: + 'strandphaser-install.log' + shell: + 'Rscript install_StrandPhaseR.R > {log} 2>&1' + + +rule run_SSphasing_pipeline: + input: + mergedbam='bam/mergedBam/merged.bam', + wcregions='D2Rfb.strand_WCregions.txt', + snppositions='bam/SNVcalls/input.vcf', + configfile="StrandPhaseR.config", + strandphaser='R-packages/StrandPhaseR/R/StrandPhaseR', + bamfolder='bam' + output: + 'StrandPhaseR_analysis/Phased/phased_haps.txt' + log: + 'StrandPhaseR_analysis/Phased/phased_haps.txt.log' + run: + cwd = os.getcwd() + shell('Rscript StrandPhaseR_pipeline.R {input.bamfolder} StrandPhaseR_analysis {input.configfile} {input.wcregions} {input.snppositions} {cwd}/R-packages/ > {log} 2>&1') + diff --git a/StrandPhaseR.config b/StrandPhaseR.config new file mode 100644 index 0000000..356d3a5 --- /dev/null +++ b/StrandPhaseR.config @@ -0,0 +1,19 @@ +#============== StrandPhaseR configuration file ===============# + +[General] +numCPU = 1 +chromosomes = c('chr22','chr21') +pairedEndReads = TRUE +min.mapq = 10 + +[StrandPhaseR] +positions = NULL +WCregions = NULL +min.baseq = 20 +num.iterations = 2 +translateBases = TRUE +fillMissAllele = NULL +splitPhasedReads = TRUE +callBreaks = FALSE +exportVCF = 'D2Rfb' +bsGenome = 'BSgenome.Hsapiens.UCSC.hg19' diff --git a/StrandPhaseR_pipeline.R b/StrandPhaseR_pipeline.R new file mode 100644 index 0000000..92960ff --- /dev/null +++ b/StrandPhaseR_pipeline.R @@ -0,0 +1,11 @@ +#!/usr/bin/Rscript + +args=commandArgs(TRUE) + +#add user defined path to load needed libraries +.libPaths( c( .libPaths(), args[6]) ) + +suppressPackageStartupMessages(library(StrandPhaseR)) +suppressPackageStartupMessages(library(BSgenome.Hsapiens.UCSC.hg19)) + +strandPhaseR(inputfolder=args[1], outputfolder=args[2], configfile = args[3], WCregions = args[4] , positions=args[5]) diff --git a/install_StrandPhaseR.R b/install_StrandPhaseR.R new file mode 100644 index 0000000..5d77225 --- /dev/null +++ b/install_StrandPhaseR.R @@ -0,0 +1,4 @@ +#!/usr/bin/env Rscript +Sys.setenv(Renv='PWD') +library(devtools) +withr::with_libpaths(new = "R-packages", install_git("git://github.com/daewoooo/StrandPhaseR.git", branch = "master")) -- GitLab