From eb297e8d5863e56a74bfd64d7ff10251593b44be Mon Sep 17 00:00:00 2001
From: Tobias Marschall <tobias.marschall@0ohm.net>
Date: Wed, 19 Sep 2018 10:43:19 +0200
Subject: [PATCH] Output and plot single cell segmentation

---
 Snakefile                     |  7 ++++--
 utils/detect_strand_states.py | 15 +++++++++++++
 utils/plot-sv-calls.R         | 42 +++++++++++++++++++++++++++--------
 3 files changed, 53 insertions(+), 11 deletions(-)

diff --git a/Snakefile b/Snakefile
index d5f3ede..1367fe9 100644
--- a/Snakefile
+++ b/Snakefile
@@ -252,7 +252,8 @@ rule plot_SV_calls:
         calls  = "sv_calls/{sample}/{windows}.{bpdens}/{method}.txt",
         complex = "sv_calls/{sample}/{windows}.{bpdens}/{method}.complex.tsv",
         strand = "strand_states/{sample}/{windows}.{bpdens}/final.txt",
-        segments = "segmentation2/{sample}/{windows}.{bpdens}.txt"
+        segments = "segmentation2/{sample}/{windows}.{bpdens}.txt",
+        scsegments = "segmentation-singlecell/{sample}/{windows}.{bpdens}.txt",
     output:
         "sv_calls/{sample}/{windows}.{bpdens,selected_j[0-9\\.]+_s[0-9\\.]+}/plots/sv_calls/{method}.{chrom}.pdf"
     log:
@@ -261,6 +262,7 @@ rule plot_SV_calls:
         """
         Rscript utils/plot-sv-calls.R \
             segments={input.segments} \
+            singlecellsegments={input.scsegments} \
             strand={input.strand} \
             complex={input.complex} \
             calls={input.calls} \
@@ -505,6 +507,7 @@ rule segmentation_selection:
         info="counts/{sample}/{window}_{file_name}.info",
     output:
         jointseg="segmentation2/{sample}/{window,[0-9]+}_{file_name}.selected_j{min_diff_jointseg}_s{min_diff_singleseg}.txt",
+        singleseg="segmentation-singlecell/{sample}/{window,[0-9]+}_{file_name}.selected_j{min_diff_jointseg}_s{min_diff_singleseg}.txt",
         strand_states="strand_states/{sample}/{window,[0-9]+}_{file_name}.selected_j{min_diff_jointseg}_s{min_diff_singleseg}/intitial_strand_state",
     log:
         "log/segmentation_selection/{sample}/{window}_{file_name}.selected_j{min_diff_jointseg}_s{min_diff_singleseg}.log"
@@ -512,7 +515,7 @@ rule segmentation_selection:
         cellnames = lambda wc: ",".join(cell for cell in CELL_PER_SAMPLE[wc.sample]),
         sce_min_distance = 500000,
     shell:
-        "./utils/detect_strand_states.py --sce_min_distance {params.sce_min_distance} --min_diff_jointseg {wildcards.min_diff_jointseg} --min_diff_singleseg {wildcards.min_diff_singleseg} --output_jointseg {output.jointseg} --output_strand_states {output.strand_states} --samplename {wildcards.sample} --cellnames {params.cellnames} {input.info} {input.counts} {input.jointseg} {input.singleseg} > {log} 2>&1"
+        "./utils/detect_strand_states.py --sce_min_distance {params.sce_min_distance} --min_diff_jointseg {wildcards.min_diff_jointseg} --min_diff_singleseg {wildcards.min_diff_singleseg} --output_jointseg {output.jointseg} --output_singleseg {output.singleseg} --output_strand_states {output.strand_states} --samplename {wildcards.sample} --cellnames {params.cellnames} {input.info} {input.counts} {input.jointseg} {input.singleseg} > {log} 2>&1"
 
 
 ################################################################################
diff --git a/utils/detect_strand_states.py b/utils/detect_strand_states.py
index 6a6d12f..59761f9 100755
--- a/utils/detect_strand_states.py
+++ b/utils/detect_strand_states.py
@@ -228,6 +228,8 @@ def main():
 		help='Minimum gain in mismatch distance needed to add an additional SCE.')
 	parser.add_argument('--output_jointseg', default=None,
 		help='Filename to output selected joint segmentation to.')
+	parser.add_argument('--output_singleseg', default=None,
+		help='Filename to output selected single cell segmentations to.')
 	parser.add_argument('--output_strand_states', default=None,
 		help='Filename to output strand states to.')
 	parser.add_argument('--min_diff_jointseg', default=0.5, type=float, 
@@ -271,14 +273,24 @@ def main():
 		output_strand_states_file = open(args.output_strand_states, 'w')
 		print('sample','cell','chrom','start','end','class', sep='\t', file=output_strand_states_file)
 
+	output_single_cell_seg_file = None
+	if args.output_singleseg != None:
+		output_single_cell_seg_file = open(args.output_singleseg, 'w')
+		print('sample','cell','chrom','position', sep='\t', file=output_single_cell_seg_file)
+
 	for filename, cell in zip(args.singleseg, cell_names):
 		print('='*100, filename, file=sys.stderr)
 		print('Processing', filename, file=sys.stderr)
 		singleseg = Segmentation(filename)
 		singleseg.select_k(min_diff = args.min_diff_singleseg)
+
 		for chromosome in singleseg.chromosomes:
 			print(' -- chromosome', chromosome, file=sys.stderr)
 			breaks = singleseg.get_selected_segmentation(chromosome)
+			if output_single_cell_seg_file is not None:
+				for position in breaks:
+					print(args.samplename, cell, chromosome, position, sep='\t', file=output_single_cell_seg_file)
+
 			w_counts, c_counts = count_table.get_counts(cell, chromosome, breaks)
 			w, c = 0, 0
 			strand_state_list = []
@@ -382,6 +394,9 @@ def main():
 	if output_strand_states_file is not None:
 		output_strand_states_file.close()
 
+	if output_single_cell_seg_file is not None:
+		output_single_cell_seg_file.close()
+
 
 if __name__ == '__main__':
 	main()
diff --git a/utils/plot-sv-calls.R b/utils/plot-sv-calls.R
index e8e2641..1e2955c 100644
--- a/utils/plot-sv-calls.R
+++ b/utils/plot-sv-calls.R
@@ -69,13 +69,14 @@ print_usage_and_stop = function(msg = NULL) {
   message("    Rscript chrom.R [OPTIONS] <count-file> <chrom> <out.pdf>                    ")
   message("                                                                                ")
   message("OPTIONS (no spaces around `=`):                                                 ")
-  message("    per-page=<int>        Number of cells to be printed per page                ")
-  message("    segments=<file>       Show the segmentation in the plots                    ")
-  message("    calls=<file>          Highlight SV calls provided in a table                ")
-  message("    truth=<file>          Mark the `true`` SVs provided in a table              ")
-  message("    strand=<file>         Mark the strand states which calls are based on       ")
-  message("    complex=<file>        Mark complex regions given in file                    ")
-  message("    no-none               Do not hightlight black-listed (i.e. None) bins       ")
+  message("    per-page=<int>            Number of cells to be printed per page            ")
+  message("    segments=<file>           Show the segmentation in the plots                ")
+  message("    singlecellsegments=<file> Show per-cell  segmentation in the plots          ")
+  message("    calls=<file>              Highlight SV calls provided in a table            ")
+  message("    truth=<file>              Mark the `true`` SVs provided in a table          ")
+  message("    strand=<file>             Mark the strand states which calls are based on   ")
+  message("    complex=<file>            Mark complex regions given in file                ")
+  message("    no-none                   Do not hightlight black-listed (i.e. None) bins   ")
   message("                                                                                ")
   message("Generates one plot per chromosome listing all cells below another, separated    ")
   message("into pages. If an SV probability file is provided (2), segments are colored     ")
@@ -111,7 +112,7 @@ cells_per_page = 8
 show_none  = T
 
 if (length(args)>3) {
-  if (!all(grepl("^(strand|calls|segments|per-page|truth|no-none|complex)=?", args[1:(length(args)-3)]))) {
+  if (!all(grepl("^(strand|calls|segments|per-page|truth|no-none|complex|singlecellsegments)=?", args[1:(length(args)-3)]))) {
     print_usage_and_stop("[Error]: Options must be one of `calls`, `segments`, `per-page`, or `truth`") }
   for (op in args[1:(length(args)-3)]) {
     if (grepl("^segments=", op)) f_segments = str_sub(op, 10)
@@ -123,6 +124,7 @@ if (length(args)>3) {
     }
     if (grepl("^strand=", op))   f_strand = str_sub(op, 8)
     if (grepl("^complex=", op))   f_complex = str_sub(op, 9)
+    if (grepl("^singlecellsegments=", op))   f_scsegments = str_sub(op, 20)
     if (grepl("^no-none$", op)) show_none = F
   }
 }
@@ -234,7 +236,7 @@ if (!is.null(f_strand)) {
   strand = strand[chrom == CHROM]
 }
 
-### Check complex regionss file
+### Check complex regions file
 if (!is.null(f_complex)) {
   message(" * Reading complex regions state file from ", f_complex, "...")
   complex = fread(f_complex)
@@ -247,6 +249,18 @@ if (!is.null(f_complex)) {
 
 }
 
+### Check single cell segmentation file
+if (!is.null(f_scsegments)) {
+  message(" * Reading per-cell segmentation regions state file from ", f_scsegments, "...")
+  scsegments = fread(f_scsegments)
+  assert_that("sample"   %in% colnames(scsegments),
+              "cell"     %in% colnames(scsegments),
+              "chrom"    %in% colnames(scsegments),
+              "position" %in% colnames(scsegments)) %>% invisible
+  scsegments[, sample_cell := paste(sample, "-", cell)]
+
+  scsegments = scsegments[chrom == CHROM]
+}
 
 
 ################################################################################
@@ -305,6 +319,16 @@ while (i <= n_cells) {
       }
     }
 
+    # Add lines for single cell segmentation, if available
+    if (!is.null(f_scsegments)) {
+      local_scsegments = scsegments[CELLS, on = .(sample_cell), nomatch = 0]
+      if (nrow(local_scsegments) > 0) {
+        plt <- plt +
+          geom_segment(data = local_scsegments,
+                    aes(x = position, xend = position, y = -Inf, yend = -.8*y_lim), color = 'blue')
+      }
+    }
+
     # Add bars for strand states, if available
     if (!is.null(f_strand)) {
       local_strand = strand[CELLS, on = .(sample_cell), nomatch = 0]
-- 
GitLab