From 38405c4d19a5128c70eff24cc5e65d7e2414c304 Mon Sep 17 00:00:00 2001 From: Tobias Marschall <tobias.marschall@0ohm.net> Date: Thu, 11 Oct 2018 08:33:40 +0200 Subject: [PATCH] Add group labels to SV plots --- Snakefile | 21 ++++++++++++++++----- utils/plot-sv-calls.R | 31 +++++++++++++++++++++++++++++-- 2 files changed, 45 insertions(+), 7 deletions(-) diff --git a/Snakefile b/Snakefile index 3f83e4e..e85e334 100644 --- a/Snakefile +++ b/Snakefile @@ -278,15 +278,16 @@ ruleorder: plot_SV_calls_simulated > plot_SV_calls rule plot_SV_calls: input: counts = "counts/{sample}/{windows}.txt.gz", - calls = "sv_calls/{sample}/{windows}.{bpdens}/{method}.txt", - complex = "sv_calls/{sample}/{windows}.{bpdens}/{method}.complex.tsv", + calls = "sv_calls/{sample}/{windows}.{bpdens}/{method}_filter{filter}.txt", + complex = "sv_calls/{sample}/{windows}.{bpdens}/{method}_filter{filter}.complex.tsv", strand = "strand_states/{sample}/{windows}.{bpdens}/final.txt", segments = "segmentation2/{sample}/{windows}.{bpdens}.txt", scsegments = "segmentation-singlecell/{sample}/{windows}.{bpdens}.txt", + grouptrack = "postprocessing/group-table/{sample}/{windows}.{bpdens}/{method}.tsv", output: - "sv_calls/{sample}/{windows}.{bpdens,selected_j[0-9\\.]+_s[0-9\\.]+}/plots/sv_calls/{method}.{chrom}.pdf" + "sv_calls/{sample}/{windows}.{bpdens,selected_j[0-9\\.]+_s[0-9\\.]+}/plots/sv_calls/{method}_filter{filter,(TRUE|FALSE)}.{chrom}.pdf" log: - "log/plot_SV_calls/{sample}/{windows}.{bpdens}.{method}.{chrom}.log" + "log/plot_SV_calls/{sample}/{windows}.{bpdens}.{method}_filter{filter}.{chrom}.log" shell: """ Rscript utils/plot-sv-calls.R \ @@ -294,6 +295,7 @@ rule plot_SV_calls: singlecellsegments={input.scsegments} \ strand={input.strand} \ complex={input.complex} \ + groups={input.grouptrack} \ calls={input.calls} \ {input.counts} \ {wildcards.chrom} \ @@ -669,13 +671,22 @@ rule postprocessing_filter: rule postprocessing_merge: input: - calls = "postprocessing/filter/{sample}/{window}_fixed_norm.{bpdens,selected_j[0-9\\.]+_s[0-9\\.]+}/simpleCalls_llr{llr}_poppriors{pop_priors,(TRUE|FALSE)}_haplotags{use_haplotags,(TRUE|FALSE)}_gtcutoff{gtcutoff,[0-9\\.]+}_regfactor{regfactor,[0-9]+}.txt" + calls = "postprocessing/filter/{sample}/{window}_fixed_norm.{bpdens}/simpleCalls_llr{llr}_poppriors{pop_priors}_haplotags{use_haplotags}_gtcutoff{gtcutoff}_regfactor{regfactor}.txt" output: calls = "postprocessing/merge/{sample}/{window}_fixed_norm.{bpdens,selected_j[0-9\\.]+_s[0-9\\.]+}/simpleCalls_llr{llr}_poppriors{pop_priors,(TRUE|FALSE)}_haplotags{use_haplotags,(TRUE|FALSE)}_gtcutoff{gtcutoff,[0-9\\.]+}_regfactor{regfactor,[0-9]+}.txt" shell: 'utils/group_nearby_calls_of_same_AF_and_generate_output_table.pl {input.calls} > {output.calls}' +rule postprocessing_sv_group_table: + input: + calls = "postprocessing/merge/{sample}/{window}_fixed_norm.{bpdens}/simpleCalls_llr{llr}_poppriors{pop_priors}_haplotags{use_haplotags}_gtcutoff{gtcutoff}_regfactor{regfactor}.txt" + output: + grouptrack = "postprocessing/group-table/{sample}/{window}_fixed_norm.{bpdens,selected_j[0-9\\.]+_s[0-9\\.]+}/simpleCalls_llr{llr}_poppriors{pop_priors,(TRUE|FALSE)}_haplotags{use_haplotags,(TRUE|FALSE)}_gtcutoff{gtcutoff,[0-9\\.]+}_regfactor{regfactor,[0-9]+}.tsv" + shell: + 'utils/create-sv-group-track.py {input.calls} > {output.grouptrack}' + + ################################################################################ # Strand states & phasing # ################################################################################ diff --git a/utils/plot-sv-calls.R b/utils/plot-sv-calls.R index 60d29b2..b35d1d3 100644 --- a/utils/plot-sv-calls.R +++ b/utils/plot-sv-calls.R @@ -9,7 +9,7 @@ suppressMessages(library(ggplot2)) library(scales) %>% invisible library(assertthat) %>% invisible library(stringr) %>% invisible - +library(RColorBrewer) %>% invisible ################################################################################ # Settings # @@ -76,6 +76,7 @@ print_usage_and_stop = function(msg = NULL) { message(" truth=<file> Mark the `true`` SVs provided in a table ") message(" strand=<file> Mark the strand states which calls are based on ") message(" complex=<file> Mark complex regions given in file ") + message(" groups=<file> Table with SV call grouping ") message(" no-none Do not hightlight black-listed (i.e. None) bins ") message(" ") message("Generates one plot per chromosome listing all cells below another, separated ") @@ -112,7 +113,7 @@ cells_per_page = 8 show_none = T if (length(args)>3) { - if (!all(grepl("^(strand|calls|segments|per-page|truth|no-none|complex|singlecellsegments)=?", args[1:(length(args)-3)]))) { + if (!all(grepl("^(strand|calls|segments|per-page|truth|no-none|complex|singlecellsegments|groups)=?", args[1:(length(args)-3)]))) { print_usage_and_stop("[Error]: Options must be one of `calls`, `segments`, `per-page`, or `truth`") } for (op in args[1:(length(args)-3)]) { if (grepl("^segments=", op)) f_segments = str_sub(op, 10) @@ -124,6 +125,7 @@ if (length(args)>3) { } if (grepl("^strand=", op)) f_strand = str_sub(op, 8) if (grepl("^complex=", op)) f_complex = str_sub(op, 9) + if (grepl("^groups=", op)) f_groups = str_sub(op, 8) if (grepl("^singlecellsegments=", op)) f_scsegments = str_sub(op, 20) if (grepl("^no-none$", op)) show_none = F } @@ -249,6 +251,19 @@ if (!is.null(f_complex)) { } +### Check SV groups file +if (!is.null(f_groups)) { + message(" * Reading SV group file from ", f_groups, "...") + groups = fread(f_groups) + assert_that("chrom" %in% colnames(groups), + "start" %in% colnames(groups), + "end" %in% colnames(groups), + "group_id" %in% colnames(groups)) %>% invisible + groups[, group_id := paste("SV group", group_id)] + groups = groups[chrom == CHROM] + message(" --> Found ", nrow(groups), " SV groups in chromosome ", CHROM) +} + ### Check single cell segmentation file if (!is.null(f_scsegments)) { message(" * Reading per-cell segmentation regions state file from ", f_scsegments, "...") @@ -345,6 +360,18 @@ while (i <= n_cells) { } } + # Add bars for SV group, if available + if (!is.null(f_groups)) { + message(" * Adding SV groups") + if (nrow(groups) > 0) { + plt <- plt + + geom_rect(data = groups, + aes(xmin = start, xmax = end, ymin = .85*y_lim, ymax = Inf, fill = group_id)) + # Add colors for SV classes + manual_colors = c(manual_colors, setNames(colorRampPalette(brewer.pal(12,"Set2"))(nrow(groups)),groups$group_id)) + } + } + # Add bars for complex states, if available if (!is.null(f_complex)) { message(" * Adding complex intervals") -- GitLab