Skip to content
Snippets Groups Projects
Commit 044fd3d7 authored by daewoooo's avatar daewoooo
Browse files

Added StrandPhaseR pipeline

parent 47125871
No related branches found
No related tags found
No related merge requests found
### StrandPhaseR pipeline ###
BAM, = glob_wildcards("bam/{bam}.bam") #already part of of main pipeline
ref = '/local/data/david/test_pipeline/hg19.fa' #this should be included in JSON file
rule all:
input:
"StrandPhaseR_analysis/Phased/phased_haps.txt"
rule indexBams:
input:
"bam/{bam}.bam"
output:
"bam/{bam}.bam.bai"
shell:
"samtools index {input}"
rule mergeBams:
input:
expand("bam/{bam}.bam", bam=BAM)
output:
"bam/mergedBam/merged.bam"
shell:
"samtools merge {output} {input}"
rule indexMergedBam:
input:
"bam/mergedBam/merged.bam"
output:
"bam/mergedBam/merged.bam.bai"
shell:
"samtools index {input}"
rule CallSNVs_bcftools:
input:
fa={ref},
#bam=expand("bam/{bam}.bam", bam=BAM), #perhaps later we can decide to skip merging and submit all BAMs for SNV calling
#bai=expand("bam/{bam}.bam.bai", bam=BAM)
bam="bam/mergedBam/merged.bam",
bai="bam/mergedBam/merged.bam.bai"
output:
"bam/SNVcalls/input.vcf"
shell:
"samtools mpileup -g -f {input.fa} {input.bam} | "
"bcftools call -mv - > {output}"
rule install_StrandPhaseR:
output:
'R-packages/StrandPhaseR/R/StrandPhaseR'
log:
'strandphaser-install.log'
shell:
'Rscript install_StrandPhaseR.R > {log} 2>&1'
rule run_SSphasing_pipeline:
input:
mergedbam='bam/mergedBam/merged.bam',
wcregions='D2Rfb.strand_WCregions.txt',
snppositions='bam/SNVcalls/input.vcf',
configfile="StrandPhaseR.config",
strandphaser='R-packages/StrandPhaseR/R/StrandPhaseR',
bamfolder='bam'
output:
'StrandPhaseR_analysis/Phased/phased_haps.txt'
log:
'StrandPhaseR_analysis/Phased/phased_haps.txt.log'
run:
cwd = os.getcwd()
shell('Rscript StrandPhaseR_pipeline.R {input.bamfolder} StrandPhaseR_analysis {input.configfile} {input.wcregions} {input.snppositions} {cwd}/R-packages/ > {log} 2>&1')
#============== StrandPhaseR configuration file ===============#
[General]
numCPU = 1
chromosomes = c('chr22','chr21')
pairedEndReads = TRUE
min.mapq = 10
[StrandPhaseR]
positions = NULL
WCregions = NULL
min.baseq = 20
num.iterations = 2
translateBases = TRUE
fillMissAllele = NULL
splitPhasedReads = TRUE
callBreaks = FALSE
exportVCF = 'D2Rfb'
bsGenome = 'BSgenome.Hsapiens.UCSC.hg19'
#!/usr/bin/Rscript
args=commandArgs(TRUE)
#add user defined path to load needed libraries
.libPaths( c( .libPaths(), args[6]) )
suppressPackageStartupMessages(library(StrandPhaseR))
suppressPackageStartupMessages(library(BSgenome.Hsapiens.UCSC.hg19))
strandPhaseR(inputfolder=args[1], outputfolder=args[2], configfile = args[3], WCregions = args[4] , positions=args[5])
#!/usr/bin/env Rscript
Sys.setenv(Renv='PWD')
library(devtools)
withr::with_libpaths(new = "R-packages", install_git("git://github.com/daewoooo/StrandPhaseR.git", branch = "master"))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment