diff --git a/Snakefile b/Snakefile index d5f3edee09f923c15d94dd78855e6a4a17f51423..1367fe9aa879eddeb88e35d02bddec778ad92f3f 100644 --- a/Snakefile +++ b/Snakefile @@ -252,7 +252,8 @@ rule plot_SV_calls: calls = "sv_calls/{sample}/{windows}.{bpdens}/{method}.txt", complex = "sv_calls/{sample}/{windows}.{bpdens}/{method}.complex.tsv", strand = "strand_states/{sample}/{windows}.{bpdens}/final.txt", - segments = "segmentation2/{sample}/{windows}.{bpdens}.txt" + segments = "segmentation2/{sample}/{windows}.{bpdens}.txt", + scsegments = "segmentation-singlecell/{sample}/{windows}.{bpdens}.txt", output: "sv_calls/{sample}/{windows}.{bpdens,selected_j[0-9\\.]+_s[0-9\\.]+}/plots/sv_calls/{method}.{chrom}.pdf" log: @@ -261,6 +262,7 @@ rule plot_SV_calls: """ Rscript utils/plot-sv-calls.R \ segments={input.segments} \ + singlecellsegments={input.scsegments} \ strand={input.strand} \ complex={input.complex} \ calls={input.calls} \ @@ -505,6 +507,7 @@ rule segmentation_selection: info="counts/{sample}/{window}_{file_name}.info", output: jointseg="segmentation2/{sample}/{window,[0-9]+}_{file_name}.selected_j{min_diff_jointseg}_s{min_diff_singleseg}.txt", + singleseg="segmentation-singlecell/{sample}/{window,[0-9]+}_{file_name}.selected_j{min_diff_jointseg}_s{min_diff_singleseg}.txt", strand_states="strand_states/{sample}/{window,[0-9]+}_{file_name}.selected_j{min_diff_jointseg}_s{min_diff_singleseg}/intitial_strand_state", log: "log/segmentation_selection/{sample}/{window}_{file_name}.selected_j{min_diff_jointseg}_s{min_diff_singleseg}.log" @@ -512,7 +515,7 @@ rule segmentation_selection: cellnames = lambda wc: ",".join(cell for cell in CELL_PER_SAMPLE[wc.sample]), sce_min_distance = 500000, shell: - "./utils/detect_strand_states.py --sce_min_distance {params.sce_min_distance} --min_diff_jointseg {wildcards.min_diff_jointseg} --min_diff_singleseg {wildcards.min_diff_singleseg} --output_jointseg {output.jointseg} --output_strand_states {output.strand_states} --samplename {wildcards.sample} --cellnames {params.cellnames} {input.info} {input.counts} {input.jointseg} {input.singleseg} > {log} 2>&1" + "./utils/detect_strand_states.py --sce_min_distance {params.sce_min_distance} --min_diff_jointseg {wildcards.min_diff_jointseg} --min_diff_singleseg {wildcards.min_diff_singleseg} --output_jointseg {output.jointseg} --output_singleseg {output.singleseg} --output_strand_states {output.strand_states} --samplename {wildcards.sample} --cellnames {params.cellnames} {input.info} {input.counts} {input.jointseg} {input.singleseg} > {log} 2>&1" ################################################################################ diff --git a/utils/detect_strand_states.py b/utils/detect_strand_states.py index 6a6d12ff485fd553dcc0aaa7e7ac7c3f16a3ba65..59761f99237762e4bc1c5efad80c33d5e622f90b 100755 --- a/utils/detect_strand_states.py +++ b/utils/detect_strand_states.py @@ -228,6 +228,8 @@ def main(): help='Minimum gain in mismatch distance needed to add an additional SCE.') parser.add_argument('--output_jointseg', default=None, help='Filename to output selected joint segmentation to.') + parser.add_argument('--output_singleseg', default=None, + help='Filename to output selected single cell segmentations to.') parser.add_argument('--output_strand_states', default=None, help='Filename to output strand states to.') parser.add_argument('--min_diff_jointseg', default=0.5, type=float, @@ -271,14 +273,24 @@ def main(): output_strand_states_file = open(args.output_strand_states, 'w') print('sample','cell','chrom','start','end','class', sep='\t', file=output_strand_states_file) + output_single_cell_seg_file = None + if args.output_singleseg != None: + output_single_cell_seg_file = open(args.output_singleseg, 'w') + print('sample','cell','chrom','position', sep='\t', file=output_single_cell_seg_file) + for filename, cell in zip(args.singleseg, cell_names): print('='*100, filename, file=sys.stderr) print('Processing', filename, file=sys.stderr) singleseg = Segmentation(filename) singleseg.select_k(min_diff = args.min_diff_singleseg) + for chromosome in singleseg.chromosomes: print(' -- chromosome', chromosome, file=sys.stderr) breaks = singleseg.get_selected_segmentation(chromosome) + if output_single_cell_seg_file is not None: + for position in breaks: + print(args.samplename, cell, chromosome, position, sep='\t', file=output_single_cell_seg_file) + w_counts, c_counts = count_table.get_counts(cell, chromosome, breaks) w, c = 0, 0 strand_state_list = [] @@ -382,6 +394,9 @@ def main(): if output_strand_states_file is not None: output_strand_states_file.close() + if output_single_cell_seg_file is not None: + output_single_cell_seg_file.close() + if __name__ == '__main__': main() diff --git a/utils/plot-sv-calls.R b/utils/plot-sv-calls.R index e8e2641c46e5bd635d4e0e54f6f3576895c48f92..1e2955c835313af0d14d46b4e36accf6c2c9739b 100644 --- a/utils/plot-sv-calls.R +++ b/utils/plot-sv-calls.R @@ -69,13 +69,14 @@ print_usage_and_stop = function(msg = NULL) { message(" Rscript chrom.R [OPTIONS] <count-file> <chrom> <out.pdf> ") message(" ") message("OPTIONS (no spaces around `=`): ") - message(" per-page=<int> Number of cells to be printed per page ") - message(" segments=<file> Show the segmentation in the plots ") - message(" calls=<file> Highlight SV calls provided in a table ") - message(" truth=<file> Mark the `true`` SVs provided in a table ") - message(" strand=<file> Mark the strand states which calls are based on ") - message(" complex=<file> Mark complex regions given in file ") - message(" no-none Do not hightlight black-listed (i.e. None) bins ") + message(" per-page=<int> Number of cells to be printed per page ") + message(" segments=<file> Show the segmentation in the plots ") + message(" singlecellsegments=<file> Show per-cell segmentation in the plots ") + message(" calls=<file> Highlight SV calls provided in a table ") + message(" truth=<file> Mark the `true`` SVs provided in a table ") + message(" strand=<file> Mark the strand states which calls are based on ") + message(" complex=<file> Mark complex regions given in file ") + message(" no-none Do not hightlight black-listed (i.e. None) bins ") message(" ") message("Generates one plot per chromosome listing all cells below another, separated ") message("into pages. If an SV probability file is provided (2), segments are colored ") @@ -111,7 +112,7 @@ cells_per_page = 8 show_none = T if (length(args)>3) { - if (!all(grepl("^(strand|calls|segments|per-page|truth|no-none|complex)=?", args[1:(length(args)-3)]))) { + if (!all(grepl("^(strand|calls|segments|per-page|truth|no-none|complex|singlecellsegments)=?", args[1:(length(args)-3)]))) { print_usage_and_stop("[Error]: Options must be one of `calls`, `segments`, `per-page`, or `truth`") } for (op in args[1:(length(args)-3)]) { if (grepl("^segments=", op)) f_segments = str_sub(op, 10) @@ -123,6 +124,7 @@ if (length(args)>3) { } if (grepl("^strand=", op)) f_strand = str_sub(op, 8) if (grepl("^complex=", op)) f_complex = str_sub(op, 9) + if (grepl("^singlecellsegments=", op)) f_scsegments = str_sub(op, 20) if (grepl("^no-none$", op)) show_none = F } } @@ -234,7 +236,7 @@ if (!is.null(f_strand)) { strand = strand[chrom == CHROM] } -### Check complex regionss file +### Check complex regions file if (!is.null(f_complex)) { message(" * Reading complex regions state file from ", f_complex, "...") complex = fread(f_complex) @@ -247,6 +249,18 @@ if (!is.null(f_complex)) { } +### Check single cell segmentation file +if (!is.null(f_scsegments)) { + message(" * Reading per-cell segmentation regions state file from ", f_scsegments, "...") + scsegments = fread(f_scsegments) + assert_that("sample" %in% colnames(scsegments), + "cell" %in% colnames(scsegments), + "chrom" %in% colnames(scsegments), + "position" %in% colnames(scsegments)) %>% invisible + scsegments[, sample_cell := paste(sample, "-", cell)] + + scsegments = scsegments[chrom == CHROM] +} ################################################################################ @@ -305,6 +319,16 @@ while (i <= n_cells) { } } + # Add lines for single cell segmentation, if available + if (!is.null(f_scsegments)) { + local_scsegments = scsegments[CELLS, on = .(sample_cell), nomatch = 0] + if (nrow(local_scsegments) > 0) { + plt <- plt + + geom_segment(data = local_scsegments, + aes(x = position, xend = position, y = -Inf, yend = -.8*y_lim), color = 'blue') + } + } + # Add bars for strand states, if available if (!is.null(f_strand)) { local_strand = strand[CELLS, on = .(sample_cell), nomatch = 0]