From 38405c4d19a5128c70eff24cc5e65d7e2414c304 Mon Sep 17 00:00:00 2001
From: Tobias Marschall <tobias.marschall@0ohm.net>
Date: Thu, 11 Oct 2018 08:33:40 +0200
Subject: [PATCH] Add group labels to SV plots

---
 Snakefile             | 21 ++++++++++++++++-----
 utils/plot-sv-calls.R | 31 +++++++++++++++++++++++++++++--
 2 files changed, 45 insertions(+), 7 deletions(-)

diff --git a/Snakefile b/Snakefile
index 3f83e4e..e85e334 100644
--- a/Snakefile
+++ b/Snakefile
@@ -278,15 +278,16 @@ ruleorder: plot_SV_calls_simulated > plot_SV_calls
 rule plot_SV_calls:
     input:
         counts = "counts/{sample}/{windows}.txt.gz",
-        calls  = "sv_calls/{sample}/{windows}.{bpdens}/{method}.txt",
-        complex = "sv_calls/{sample}/{windows}.{bpdens}/{method}.complex.tsv",
+        calls  = "sv_calls/{sample}/{windows}.{bpdens}/{method}_filter{filter}.txt",
+        complex = "sv_calls/{sample}/{windows}.{bpdens}/{method}_filter{filter}.complex.tsv",
         strand = "strand_states/{sample}/{windows}.{bpdens}/final.txt",
         segments = "segmentation2/{sample}/{windows}.{bpdens}.txt",
         scsegments = "segmentation-singlecell/{sample}/{windows}.{bpdens}.txt",
+        grouptrack = "postprocessing/group-table/{sample}/{windows}.{bpdens}/{method}.tsv",
     output:
-        "sv_calls/{sample}/{windows}.{bpdens,selected_j[0-9\\.]+_s[0-9\\.]+}/plots/sv_calls/{method}.{chrom}.pdf"
+        "sv_calls/{sample}/{windows}.{bpdens,selected_j[0-9\\.]+_s[0-9\\.]+}/plots/sv_calls/{method}_filter{filter,(TRUE|FALSE)}.{chrom}.pdf"
     log:
-        "log/plot_SV_calls/{sample}/{windows}.{bpdens}.{method}.{chrom}.log"
+        "log/plot_SV_calls/{sample}/{windows}.{bpdens}.{method}_filter{filter}.{chrom}.log"
     shell:
         """
         Rscript utils/plot-sv-calls.R \
@@ -294,6 +295,7 @@ rule plot_SV_calls:
             singlecellsegments={input.scsegments} \
             strand={input.strand} \
             complex={input.complex} \
+            groups={input.grouptrack} \
             calls={input.calls} \
             {input.counts} \
             {wildcards.chrom} \
@@ -669,13 +671,22 @@ rule postprocessing_filter:
 
 rule postprocessing_merge:
     input: 
-        calls = "postprocessing/filter/{sample}/{window}_fixed_norm.{bpdens,selected_j[0-9\\.]+_s[0-9\\.]+}/simpleCalls_llr{llr}_poppriors{pop_priors,(TRUE|FALSE)}_haplotags{use_haplotags,(TRUE|FALSE)}_gtcutoff{gtcutoff,[0-9\\.]+}_regfactor{regfactor,[0-9]+}.txt"
+        calls = "postprocessing/filter/{sample}/{window}_fixed_norm.{bpdens}/simpleCalls_llr{llr}_poppriors{pop_priors}_haplotags{use_haplotags}_gtcutoff{gtcutoff}_regfactor{regfactor}.txt"
     output: 
         calls = "postprocessing/merge/{sample}/{window}_fixed_norm.{bpdens,selected_j[0-9\\.]+_s[0-9\\.]+}/simpleCalls_llr{llr}_poppriors{pop_priors,(TRUE|FALSE)}_haplotags{use_haplotags,(TRUE|FALSE)}_gtcutoff{gtcutoff,[0-9\\.]+}_regfactor{regfactor,[0-9]+}.txt"
     shell:
         'utils/group_nearby_calls_of_same_AF_and_generate_output_table.pl {input.calls}  > {output.calls}'
 
 
+rule postprocessing_sv_group_table:
+    input: 
+        calls = "postprocessing/merge/{sample}/{window}_fixed_norm.{bpdens}/simpleCalls_llr{llr}_poppriors{pop_priors}_haplotags{use_haplotags}_gtcutoff{gtcutoff}_regfactor{regfactor}.txt"
+    output: 
+        grouptrack = "postprocessing/group-table/{sample}/{window}_fixed_norm.{bpdens,selected_j[0-9\\.]+_s[0-9\\.]+}/simpleCalls_llr{llr}_poppriors{pop_priors,(TRUE|FALSE)}_haplotags{use_haplotags,(TRUE|FALSE)}_gtcutoff{gtcutoff,[0-9\\.]+}_regfactor{regfactor,[0-9]+}.tsv"
+    shell:
+        'utils/create-sv-group-track.py {input.calls}  > {output.grouptrack}'
+
+
 ################################################################################
 # Strand states & phasing                                                      #
 ################################################################################
diff --git a/utils/plot-sv-calls.R b/utils/plot-sv-calls.R
index 60d29b2..b35d1d3 100644
--- a/utils/plot-sv-calls.R
+++ b/utils/plot-sv-calls.R
@@ -9,7 +9,7 @@ suppressMessages(library(ggplot2))
 library(scales) %>% invisible
 library(assertthat) %>% invisible
 library(stringr) %>% invisible
-
+library(RColorBrewer) %>% invisible
 
 ################################################################################
 # Settings                                                                     #
@@ -76,6 +76,7 @@ print_usage_and_stop = function(msg = NULL) {
   message("    truth=<file>              Mark the `true`` SVs provided in a table          ")
   message("    strand=<file>             Mark the strand states which calls are based on   ")
   message("    complex=<file>            Mark complex regions given in file                ")
+  message("    groups=<file>             Table with SV call grouping                       ")
   message("    no-none                   Do not hightlight black-listed (i.e. None) bins   ")
   message("                                                                                ")
   message("Generates one plot per chromosome listing all cells below another, separated    ")
@@ -112,7 +113,7 @@ cells_per_page = 8
 show_none  = T
 
 if (length(args)>3) {
-  if (!all(grepl("^(strand|calls|segments|per-page|truth|no-none|complex|singlecellsegments)=?", args[1:(length(args)-3)]))) {
+  if (!all(grepl("^(strand|calls|segments|per-page|truth|no-none|complex|singlecellsegments|groups)=?", args[1:(length(args)-3)]))) {
     print_usage_and_stop("[Error]: Options must be one of `calls`, `segments`, `per-page`, or `truth`") }
   for (op in args[1:(length(args)-3)]) {
     if (grepl("^segments=", op)) f_segments = str_sub(op, 10)
@@ -124,6 +125,7 @@ if (length(args)>3) {
     }
     if (grepl("^strand=", op))   f_strand = str_sub(op, 8)
     if (grepl("^complex=", op))   f_complex = str_sub(op, 9)
+    if (grepl("^groups=", op))   f_groups = str_sub(op, 8)
     if (grepl("^singlecellsegments=", op))   f_scsegments = str_sub(op, 20)
     if (grepl("^no-none$", op)) show_none = F
   }
@@ -249,6 +251,19 @@ if (!is.null(f_complex)) {
 
 }
 
+### Check SV groups file
+if (!is.null(f_groups)) {
+  message(" * Reading SV group file from ", f_groups, "...")
+  groups = fread(f_groups)
+  assert_that("chrom"   %in% colnames(groups),
+              "start"   %in% colnames(groups),
+              "end"     %in% colnames(groups),
+              "group_id"     %in% colnames(groups)) %>% invisible
+  groups[, group_id := paste("SV group", group_id)]
+  groups = groups[chrom == CHROM]
+  message("   --> Found ", nrow(groups), " SV groups in chromosome ", CHROM)
+}
+
 ### Check single cell segmentation file
 if (!is.null(f_scsegments)) {
   message(" * Reading per-cell segmentation regions state file from ", f_scsegments, "...")
@@ -345,6 +360,18 @@ while (i <= n_cells) {
       }
     }
 
+    # Add bars for SV group, if available
+    if (!is.null(f_groups)) {
+      message("   * Adding SV groups")
+      if (nrow(groups) > 0) {
+        plt <- plt +
+          geom_rect(data = groups,
+                    aes(xmin = start, xmax = end, ymin = .85*y_lim, ymax = Inf, fill = group_id))
+          # Add colors for SV classes
+          manual_colors = c(manual_colors, setNames(colorRampPalette(brewer.pal(12,"Set2"))(nrow(groups)),groups$group_id))
+      }
+    }
+
     # Add bars for complex states, if available
     if (!is.null(f_complex)) {
       message("   * Adding complex intervals")
-- 
GitLab