Skip to content
Snippets Groups Projects
Commit 75da0fbe authored by Sascha Meiers's avatar Sascha Meiers
Browse files

Prepared the pipeline to include phased strand-states. The phasing pipeline...

Prepared the pipeline to include phased strand-states. The phasing pipeline will be added later on. ALso, cleaned up the Snakefile.
parent 1e876952
No related branches found
No related tags found
No related merge requests found
......@@ -15,7 +15,13 @@ rule all:
expand("plots/" + config["sample"] + ".{window}_variable.pdf", window = [50000, 100000]),
expand("segmentation2/" + config["sample"] + ".{window}_fixed.{bpdens}.txt", window = [50000, 100000, 200000, 500000], bpdens = ["few","many"]),
expand("segmentation2/" + config["sample"] + ".{window}_variable.{bpdens}.txt", window = [50000, 100000], bpdens = ["few","many"]),
"strand_states/" + config["sample"] + ".txt"
"strand_states/" + config["sample"] + ".final.txt"
################################################################################
# Plots #
################################################################################
rule plot_mosaic_counts:
input:
......@@ -30,6 +36,13 @@ rule plot_mosaic_counts:
{params.plot_command} {input.counts} {input.info} {output}
"""
################################################################################
# Read counting #
################################################################################
rule mosaic_count_fixed:
input:
bam = expand("bam/{bam}.bam", bam = BAM),
......@@ -73,19 +86,6 @@ rule mosaic_count_variable:
rule determine_strand_states:
input:
"counts/" + config["sample"] + "_500000_fixed.txt.gz"
output:
"strand_states/" + config["sample"] + ".txt"
params:
sce_command = "Rscript " + config["sce_script"]
shell:
"""
{params.sce_command} {input} {output}
"""
################################################################################
......@@ -124,7 +124,7 @@ rule run_sv_classification:
input:
counts = "counts/" + config["sample"] + ".{windows}.txt.gz",
info = "counts/" + config["sample"] + ".{windows}.info",
states = "strand_states/" + config["sample"] + ".txt",
states = "strand_states/" + config["sample"] + ".final.txt",
bp = "segmentation2/" + config["sample"] + ".{windows}.{bpdens}.txt"
output:
outdir = "sv_probabilities/{file_name}.{bpdens}/",
......@@ -134,6 +134,7 @@ rule run_sv_classification:
class_command = "Rscript " + config["class_script"]
shell:
"""
# set haplotype
cd {params.class_dir}
{params.class_command} \
binRCfile={input.counts} \
......@@ -142,8 +143,52 @@ rule run_sv_classification:
stateFile={input.states} \
K=22 \
maxCN= 4 \
haplotypeInfo \
outputDir={output.outdir}
"""
################################################################################
# Strand states & phasing #
################################################################################
rule determine_initial_strand_states:
input:
"counts/" + config["sample"] + "_500000_fixed.txt.gz"
output:
"strand_states/" + config["sample"] + ".txt"
params:
sce_command = "Rscript " + config["sce_script"]
shell:
"""
{params.sce_command} {input} {output}
"""
# Strandphaser needs a different input format which contains the path names to
# the bam files. This rule extracts this information and prepares an input file.
rule convert_strandphaser_input:
input:
states = "strand_states/" + config["sample"] + ".txt",
info = "counts/" + config["sample"] + "_500000_fixed.info"
output:
"strand_states/" + config["sample"] + ".strandphaser_input.txt"
script:
"utils/helper.convert_strandphaser_input.R"
# Dummy rule - this will be replaced by strand-phaser
rule run_strandphaser:
output:
"strand_states/" + config["sample"] + ".strandphaser_output.txt"
rule convert_strandphaser_output:
input:
phased_states = "strand_states/" + config["sample"] + ".strandphaser_output.txt",
initial_states = "strand_states/" + config["sample"] + ".txt",
info = "counts/" + config["sample"] + "_500000_fixed.info"
output:
"strand_states/" + config["sample"] + ".final.txt"
script:
"utils/helper.convert_strandphaser_output.R"
library(data.table);
d = fread(snakemake@input[["states"]])
e = fread(snakemake@input[["info"]])
e$bam = basename(e$bam);
f = merge(d, e, by = c("sample","cell"))[class == "WC", .(chrom,start,end,bam)]
write.table(f, file=snakemake@output[[1]], quote=F, row.names=F, col.names=F, sep="\t")
\ No newline at end of file
library(data.table);
library(assertthat)
d = fread(snakemake@input[["phased_states"]])
e = fread(snakemake@input[["info"]])
g = fread(snakemake@intput[["initial_states"]])
d$bam = basename(d$bam);
e$bam = e$cell
e$cell = NULL
e$sample = NULL
f = merge(d, e, by = "bam")[, .(chrom,start,end,sample,cell,class)]
# Note that there is still a bug in Venla's strand state detection.
g = merge(g,f, by = c("chrom","start","end","sample","cell"), all.x = T)
# Overwrite with David's phased strand state if available!
g = g[, class := ifelse(!is.na(class.y), class.y, class.x)][]
g$class.x = NULL
g$class.y = NULL
g = g[, .(chrom, start, end, sample, cell, class)]
write.table(g, file=snakemake@output[[1]], quote=F, row.names=F, col.names=F, sep="\t")
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment