Skip to content
Snippets Groups Projects
Commit 4d7da438 authored by Tobias Marschall's avatar Tobias Marschall
Browse files

Evaluate different parameter settings, including different modes to select a segmentation.

parent 5b1b34ec
No related branches found
No related tags found
No related merge requests found
......@@ -44,6 +44,13 @@
"simulation_cell_count" : 100,
"simulation_alpha" : 0.02,
"genome_size" : 3e9
"genome_size" : 3e9,
"ground_truth_clonal" : {
"RPE-BM510" : "/MMCI/TM/scratch/strandseq/input-data/ground_truth/RPE-BM510_manual/clonal-events.tsv"
},
"ground_truth_single_cell" : {
"RPE-BM510" : "/MMCI/TM/scratch/strandseq/input-data/ground_truth/RPE-BM510_manual/single-cell-events.tsv"
}
}
......@@ -43,6 +43,9 @@ METHODS = [
"simpleCalls_llr4_poppriorsTRUE_haplotagsTRUE_gtcutoff0.05_regfactor6",
]
BPDENS = [
"selected_j{}_s{}".format(joint, single) for joint in [0.5,0.1,0.01] for single in [1.0,0.5,0.1]
]
singularity: "docker://smei/mosaicatcher-pipeline:v0.1"
......@@ -68,19 +71,24 @@ rule all:
sample = SAMPLE,
chrom = config["chromosomes"],
window = [100000],
bpdens = ["selected"],
bpdens = BPDENS,
method = METHODS),
expand("ploidy/{sample}/ploidy.{chrom}.txt", sample = SAMPLES, chrom = config["chromosomes"]),
expand("sv_calls/{sample}/{window}_fixed_norm.{bpdens}/plots/sv_consistency/{method}.consistency-barplot-{plottype}.pdf",
sample = SAMPLES,
window = [100000],
bpdens = ["selected"],
bpdens = BPDENS,
method = METHODS,
plottype = ["byaf","bypos"]),
expand("halo/{sample}/{window}_{suffix}.json.gz",
sample = SAMPLES,
window = [100000],
suffix = ["fixed", "fixed_norm"])
suffix = ["fixed", "fixed_norm"]),
expand("stats/{sample}/{window}_fixed_norm.{bpdens}/{method}.tsv",
sample = SAMPLES,
window = [100000],
bpdens = BPDENS,
method = METHODS),
################################################################################
......@@ -249,7 +257,7 @@ rule plot_SV_calls:
strand = "strand_states/{sample}/final.txt",
segments = "segmentation2/{sample}/{windows}.{bpdens}.txt"
output:
"sv_calls/{sample}/{windows}.{bpdens}/plots/sv_calls/{method}.{chrom}.pdf"
"sv_calls/{sample}/{windows}.{bpdens,selected_j[0-9\\.]+_s[0-9\\.]+}/plots/sv_calls/{method}.{chrom}.pdf"
log:
"log/plot_SV_calls/{sample}/{windows}.{bpdens}.{method}.{chrom}.log"
params:
......@@ -273,7 +281,7 @@ rule plot_SV_calls_simulated:
segments = "segmentation2/simulation{seed}-{window}/{window}_fixed.{bpdens}.txt",
truth = "simulation/variants/genome{seed}-{window}.txt"
output:
"sv_calls/simulation{seed}-{window}/{window}_fixed.{bpdens}/plots/sv_calls/{method}.{chrom}.pdf"
"sv_calls/simulation{seed}-{window}/{window}_fixed.{bpdens,selected_j[0-9\\.]+_s[0-9\\.]+}/plots/sv_calls/{method}.{chrom}.pdf"
log:
"log/plot_SV_calls_simulated/simulation{seed}-{window}/{window}_fixed.{bpdens}.{method}.{chrom}.log"
params:
......@@ -295,8 +303,8 @@ rule plot_SV_consistency_barplot:
input:
sv_calls = "sv_calls/{sample}/{windows}.{bpdens}/{method}.txt",
output:
barplot_bypos = "sv_calls/{sample}/{windows}.{bpdens}/plots/sv_consistency/{method}.consistency-barplot-bypos.pdf",
barplot_byaf = "sv_calls/{sample}/{windows}.{bpdens}/plots/sv_consistency/{method}.consistency-barplot-byaf.pdf",
barplot_bypos = "sv_calls/{sample}/{windows}.{bpdens,selected_j[0-9\\.]+_s[0-9\\.]+}/plots/sv_consistency/{method}.consistency-barplot-bypos.pdf",
barplot_byaf = "sv_calls/{sample}/{windows}.{bpdens,selected_j[0-9\\.]+_s[0-9\\.]+}/plots/sv_consistency/{method}.consistency-barplot-byaf.pdf",
log:
"log/plot_SV_consistency/{sample}/{windows}.{bpdens}.{method}.log"
script:
......@@ -502,15 +510,15 @@ rule segmentation_selection:
singleseg=lambda wc: ["segmentation-per-cell/{}/{}/{}_{}.txt".format(wc.sample, cell, wc.window, wc.file_name) for cell in CELL_PER_SAMPLE[wc.sample]],
info="counts/{sample}/{window}_{file_name}.info",
output:
jointseg="segmentation2/{sample}/{window,[0-9]+}_{file_name}.selected.txt",
strand_states="strand_states/{sample}/{window,[0-9]+}_{file_name}.intitial_strand_state",
jointseg="segmentation2/{sample}/{window,[0-9]+}_{file_name}.selected_j{min_diff_jointseg}_s{min_diff_singleseg}.txt",
strand_states="strand_states/{sample}/{window,[0-9]+}_{file_name}.selected_j{min_diff_jointseg}_s{min_diff_singleseg}.intitial_strand_state",
log:
"log/segmentation_selection/{sample}/{window}_{file_name}.log"
"log/segmentation_selection/{sample}/{window}_{file_name}.selected_j{min_diff_jointseg}_s{min_diff_singleseg}.log"
params:
cellnames = lambda wc: ",".join(cell for cell in CELL_PER_SAMPLE[wc.sample]),
sce_min_distance = 500000,
shell:
"./utils/detect_strand_states.py --sce_min_distance {params.sce_min_distance} --output_jointseg {output.jointseg} --output_strand_states {output.strand_states} --samplename {wildcards.sample} --cellnames {params.cellnames} {input.info} {input.counts} {input.jointseg} {input.singleseg} > {log} 2>&1"
"./utils/detect_strand_states.py --sce_min_distance {params.sce_min_distance} --min_diff_jointseg {wildcards.min_diff_jointseg} --min_diff_singleseg {wildcards.min_diff_singleseg} --output_jointseg {output.jointseg} --output_strand_states {output.strand_states} --samplename {wildcards.sample} --cellnames {params.cellnames} {input.info} {input.counts} {input.jointseg} {input.singleseg} > {log} 2>&1"
################################################################################
......@@ -525,7 +533,7 @@ rule plot_heatmap:
info = "counts/{sample}/{windows}.info",
bamNames = "sv_probabilities/{sample}/{windows}.{bpdens}/bamNames.txt"
output:
"sv_probabilities/{sample}/{windows}.{bpdens}/final_plots/heatmapPlots.pdf"
"sv_probabilities/{sample}/{windows}.{bpdens,selected_j[0-9\\.]+_s[0-9\\.]+}/final_plots/heatmapPlots.pdf"
params:
r_package_path = "utils/R-packages2"
log:
......@@ -542,7 +550,7 @@ rule mosaiClassifier_make_call:
input:
probs = 'haplotag/table/{sample}/haplotag-likelihoods.{windows}.{bpdens}.Rdata'
output:
"sv_calls/{sample}/{windows}.{bpdens}/simpleCalls_llr{llr}_poppriors{pop_priors,(TRUE|FALSE)}_haplotags{use_haplotags,(TRUE|FALSE)}_gtcutoff{gtcutoff,[0-9\\.]+}_regfactor{regfactor,[0-9]+}.txt"
"sv_calls/{sample}/{windows}.{bpdens,selected_j[0-9\\.]+_s[0-9\\.]+}/simpleCalls_llr{llr}_poppriors{pop_priors,(TRUE|FALSE)}_haplotags{use_haplotags,(TRUE|FALSE)}_gtcutoff{gtcutoff,[0-9\\.]+}_regfactor{regfactor,[0-9]+}.txt"
log:
"log/mosaiClassifier_make_call/{sample}/{windows}.{bpdens}.llr{llr}.poppriors{pop_priors}.haplotags{use_haplotags}.gtcutoff{gtcutoff}.regfactor{regfactor}.log"
script:
......@@ -555,7 +563,7 @@ rule mosaiClassifier_calc_probs:
states = "strand_states/{sample}/final.txt",
bp = "segmentation2/{sample}/{windows}.{bpdens}.txt"
output:
output = "sv_probabilities/{sample}/{windows}.{bpdens}/probabilities.Rdata"
output = "sv_probabilities/{sample}/{windows}.{bpdens,selected_j[0-9\\.]+_s[0-9\\.]+}/probabilities.Rdata"
log:
"log/mosaiClassifier_calc_probs/{sample}/{windows}.{bpdens}.log"
script:
......@@ -565,7 +573,7 @@ rule mosaiClassifier_make_call_biallelic:
input:
probs = "sv_probabilities/{sample}/{windows}.{bpdens}/probabilities.Rdata"
output:
"sv_calls/{sample}/{windows}.{bpdens}/biAllelic_llr{llr}.txt"
"sv_calls/{sample}/{windows}.{bpdens,selected_j[0-9\\.]+_s[0-9\\.]+}/biAllelic_llr{llr}.txt"
log:
"log/mosaiClassifier_make_call_biallelic/{sample}/{windows}.{bpdens}.{llr}.log"
script:
......@@ -595,12 +603,12 @@ rule mosaiClassifier_make_call_biallelic:
rule determine_initial_strand_states:
input:
"strand_states/{sample}/100000_fixed_norm.intitial_strand_state"
"strand_states/{sample}/100000_fixed_norm.selected_j0.5_s1.0.intitial_strand_state"
output:
"strand_states/{sample}/intitial_strand_state"
shell:
"""
cd strand_states/{wildcards.sample} && ln -s 100000_fixed_norm.intitial_strand_state intitial_strand_state && cd ../..
cd strand_states/{wildcards.sample} && ln -s 100000_fixed_norm.selected_j0.5_s1.0.intitial_strand_state intitial_strand_state && cd ../..
"""
# Strandphaser needs a different input format which contains the path names to
......@@ -772,7 +780,7 @@ rule create_haplotag_segment_bed:
input:
segments="segmentation2/{sample}/{size}{what}.{bpdens}.txt",
output:
bed="haplotag/bed/{sample}/{size,[0-9]+}{what}.{bpdens}.bed",
bed="haplotag/bed/{sample}/{size,[0-9]+}{what}.{bpdens,selected_j[0-9\\.]+_s[0-9\\.]+}.bed",
shell:
"awk 'BEGIN {{s={wildcards.size};OFS=\"\\t\"}} $2!=c {{prev=0}} NR>1 {{print $2,prev*s+1,($3+1)*s; prev=$3+1; c=$2}}' {input.segments} > {output.bed}"
......@@ -782,7 +790,7 @@ rule create_haplotag_table:
bai='haplotag/bam/{sample}/{cell}.bam.bai',
bed = "haplotag/bed/{sample}/{windows}.{bpdens}.bed"
output:
tsv='haplotag/table/{sample}/by-cell/haplotag-counts.{cell}.{windows}.{bpdens}.tsv'
tsv='haplotag/table/{sample}/by-cell/haplotag-counts.{cell}.{windows}.{bpdens,selected_j[0-9\\.]+_s[0-9\\.]+}.tsv'
log:
"log/create_haplotag_table/{sample}.{cell}.{windows}.{bpdens}.log"
script:
......@@ -792,7 +800,7 @@ rule merge_haplotag_tables:
input:
tsvs=lambda wc: ['haplotag/table/{}/by-cell/haplotag-counts.{}.{}.{}.tsv'.format(wc.sample,cell,wc.windows,wc.bpdens) for cell in BAM_PER_SAMPLE[wc.sample]],
output:
tsv='haplotag/table/{sample}/full/haplotag-counts.{windows}.{bpdens}.tsv'
tsv='haplotag/table/{sample}/full/haplotag-counts.{windows}.{bpdens,selected_j[0-9\\.]+_s[0-9\\.]+}.tsv'
shell:
'(head -n1 {input.tsvs[0]} && tail -q -n +2 {input.tsvs}) > {output.tsv}'
......@@ -801,7 +809,7 @@ rule create_haplotag_likelihoods:
input:
haplotag_table='haplotag/table/{sample}/full/haplotag-counts.{windows}.{bpdens}.tsv',
sv_probs_table = 'sv_probabilities/{sample}/{windows}.{bpdens}/probabilities.Rdata',
output: 'haplotag/table/{sample}/haplotag-likelihoods.{windows}.{bpdens}.Rdata'
output: 'haplotag/table/{sample}/haplotag-likelihoods.{windows}.{bpdens,selected_j[0-9\\.]+_s[0-9\\.]+}.Rdata'
log:
"log/create_haplotag_likelihoods/{sample}.{windows}.{bpdens}.log"
script:
......@@ -903,3 +911,36 @@ rule split_external_snv_calls:
> {log} 2>&1
"""
################################################################################
# Summary statistics on sv calls #
################################################################################
rule summary_statistics:
input:
segmentation = 'segmentation2/{sample}/{windows}.{bpdens}.txt',
strandstates = 'strand_states/{sample}/{windows}.{bpdens}.intitial_strand_state',
sv_calls = 'sv_calls/{sample}/{windows}.{bpdens}/{method}.txt',
output:
tsv = 'stats/{sample}/{windows}.{bpdens,selected_j[0-9\\.]+_s[0-9\\.]+}/{method}.tsv',
log:
'log/summary_statistics/{sample}/{windows}.{bpdens}/{method}.log'
run:
p = []
try:
f = config["ground_truth_clonal"][wildcards.sample]
if len(f) > 0:
p.append('--true-events-clonal')
p.append(f)
except KeyError:
pass
try:
f = config["ground_truth_single_cell"][wildcards.sample]
if len(f) > 0:
p.append('--true-events-single-cell')
p.append(f)
except KeyError:
pass
additional_params = ' '.join(p)
shell('utils/callset_summary_stats.py --segmentation {input.segmentation} --strandstates {input.strandstates} {additional_params} {input.sv_calls} > {output.tsv}')
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment