From fd43409f8ccdf65c80f254267a9069e6a3978ce0 Mon Sep 17 00:00:00 2001 From: Tobias Marschall <tobias.marschall@0ohm.net> Date: Thu, 11 Oct 2018 08:42:33 +0200 Subject: [PATCH] Some code formatting and cleanup --- utils/plot-clustering.R | 96 +++++++++++++++++++---------------------- 1 file changed, 45 insertions(+), 51 deletions(-) diff --git a/utils/plot-clustering.R b/utils/plot-clustering.R index c055b2b..329d292 100644 --- a/utils/plot-clustering.R +++ b/utils/plot-clustering.R @@ -1,37 +1,32 @@ plot.clustering <- function(inputfile, bin.bed.filename, position.outputfile, chromosome.outputfile) { -# setwd("/Users/jeong/Documents/Strand_Seq/TALL_analysis/sv_calls_new_1008") ##This folder need to be changed ( /path/to/sv_calls ) genome_bins <- read.table(bin.bed.filename, sep = '\t', header=F, comment.char = "") list_directory <- dir("./", full.names=TRUE) -# files <- list.files(paste0(list_directory[m], '/100000_fixed_norm.selected_j0.1_s0.5'), pattern=".txt$", full.names=TRUE) -# plotname<-paste0("Mosaic_plot_", strsplit(list_directory[m], './/')[[1]][2], "_position.pdf") pdf(position.outputfile, width = 11, height = 10 ) -# filename<-strsplit(files[k], './/')[[1]][2] data1<-read.table(inputfile, sep = '\t', header=T, comment.char = "") - data1$color <- 0 ash12rainbow <- c("#77AADD", "#4477AA", "#114477", "#CC99BB", "#AA4488", "#771155", "#DDDD77", "#AAAA44", "#777711", "#DDAA77", "#AA7744", "#774411") sv_call_name <- c("del_h1", "del_h2", "del_hom", "dup_h1", "dup_h2", "dup_hom", "inv_h1", "inv_h2", "inv_hom", "idup_h1", "idup_h2", "complex") + for (j in 1:length(sv_call_name)){ - tmp <- which(data1[,9]==sv_call_name[j]) - data1[tmp,15] <- j + tmp <- which(data1[,9]==sv_call_name[j]) + data1[tmp,15] <- j } - - data1_pos <- data1[,1:3] data1_pos_uniq <- unique(data1_pos) data1_pos_uniq_sort <- data1_pos_uniq[data1_pos_uniq$chrom=="chr1",] chrom <- c("chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10", "chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chr17", "chr18", "chr19", "chr20", "chr21", "chr22", "chrX") for (i in 2:length(chrom)){ - data1_pos_uniq_sort<-rbind(data1_pos_uniq_sort, data1_pos_uniq[data1_pos_uniq$chrom==chrom[i],]) + data1_pos_uniq_sort<-rbind(data1_pos_uniq_sort, data1_pos_uniq[data1_pos_uniq$chrom==chrom[i],]) } + data1_pos_uniq_sort$posind <- c(1:nrow(data1_pos_uniq_sort)) data1_cell <- data1[,5] @@ -42,11 +37,12 @@ plot.clustering <- function(inputfile, bin.bed.filename, position.outputfile, ch result_sv <- matrix(0, nrow(data1_cell_uniq_sort), nrow(data1_pos_uniq)) for (i in 1:nrow(data1)){ - pos_ind <- which(data1_pos_uniq_sort[,1]==data1[i,1] & data1_pos_uniq_sort[,2]==data1[i,2] & data1_pos_uniq_sort[,3]==data1[i,3]) - cell_ind <- which(data1_cell_uniq_sort[,1]==data1[i,5]) - result[cell_ind, pos_ind] <- data1[i,13] - result_sv[cell_ind, pos_ind] <- data1[i,15] + pos_ind <- which(data1_pos_uniq_sort[,1]==data1[i,1] & data1_pos_uniq_sort[,2]==data1[i,2] & data1_pos_uniq_sort[,3]==data1[i,3]) + cell_ind <- which(data1_cell_uniq_sort[,1]==data1[i,5]) + result[cell_ind, pos_ind] <- data1[i,13] + result_sv[cell_ind, pos_ind] <- data1[i,15] } + rownames(result) <- data1_cell_uniq_sort colnames(result) <- data1_pos_uniq_sort$posind result[result==Inf] <- max(result[result!=Inf]) @@ -72,6 +68,7 @@ plot.clustering <- function(inputfile, bin.bed.filename, position.outputfile, ch library(pheatmap) library(gplots) library(ComplexHeatmap) + breaksList = seq(4, 30, by = 0.1) breaksList=append(breaksList, max(result)) breaksList=append(breaksList, -1, 0) @@ -86,40 +83,40 @@ plot.clustering <- function(inputfile, bin.bed.filename, position.outputfile, ch res<-pheatmap(result, border_color = NA, show_rownames=T, show_colnames=F, cluster_cols=F, cluster_rows=T, clustering_method="ward.D", scale="none", col=mycol, cex=0.7, main=inputfile, annotation_col = col_annotation, annotation_colors = anno_colors, breaks = breaksList, annotation_legend = FALSE) - #pheatmap(result_sv[res$tree_row$order,], border_color = NA, show_rownames=T, show_colnames=F, cluster_cols=F, cluster_rows=F, scale="none", cex=0.7, col=c("white", "#77AADD", "#4477AA", "#114477", "#CC99BB", "#AA4488", "#771155", "#DDDD77", "#AAAA44", "#777711", "#DDAA77", "#AA7744", "#774411"), main=filename, annotation_col = col_annotation, annotation_colors = anno_colors, annotation_legend = FALSE, legend=TRUE, legend_labels <- c("none", "del_h1", "del_h2", "del_hom", "dup_h1", "dup_h2", "dup_hom", "inv_h1", "inv_h2", "inv_hom", "idup_h1", "idup_h2", "complex")) chr_name <- matrix("chr", ncol(result), 1) for (i in 1:ncol(result)){ - chr_name[i,1] <- paste0(data1_pos_uniq_sort[i,1], '_', data1_pos_uniq_sort[i,2], '_', data1_pos_uniq_sort[i,3]) + chr_name[i,1] <- paste0(data1_pos_uniq_sort[i,1], '_', data1_pos_uniq_sort[i,2], '_', data1_pos_uniq_sort[i,3]) } + colnames(result_sv) <- chr_name colors = structure(c("white", "#77AADD", "#4477AA", "#114477", "#CC99BB", "#AA4488", "#771155", "#DDDD77", "#AAAA44", "#777711", "#DDAA77", "#AA7744", "#774411"), names = c(0:12)) sv_list = c("none", "del_h1", "del_h2", "del_hom", "dup_h1", "dup_h2", "dup_hom", "inv_h1", "inv_h2", "inv_hom", "idup_h1", "idup_h2", "complex") sv_tmp <- matrix(0, 13, 1) - for (i in 1:13){sv_tmp[i,1] <- sum(result_sv==(i-1)) } + for (i in 1:13){ + sv_tmp[i,1] <- sum(result_sv==(i-1)) + } sv_list_sub <- sv_list[sv_tmp>0] mat <- as.data.frame(result_sv[res$tree_row$order,]) ha_column = HeatmapAnnotation(df = data.frame(type1 = data1_pos_uniq_sort$color), - col = list(type1 = c("magenta" = "magenta", "purple" = "purple"))) - #ht1 = Heatmap(mat, name = "", col=c("white", "#77AADD", "#4477AA", "#114477", "#CC99BB", "#AA4488", "#771155", "#DDDD77", "#AAAA44", "#777711", "#DDAA77", "#AA7744", "#774411"), heatmap_legend_param = list(labels = c("none", "del_h1", "del_h2", "del_hom", "dup_h1", "dup_h2", "dup_hom", "inv_h1", "inv_h2", "inv_hom", "idup_h1", "idup_h2", "complex")),cluster_rows = FALSE, cluster_columns = FALSE, column_title = filename, row_names_gp = gpar(fontsize = 5), column_names_gp = gpar(fontsize = 3), column_title_gp = gpar(fontsize = 7, fontface = "bold"), top_annotation = ha_column) - ht1 = Heatmap(mat, name = "", col=colors, heatmap_legend_param = list(labels = sv_list_sub),cluster_rows = FALSE, cluster_columns = FALSE, column_title = inputfile, row_names_gp = gpar(fontsize = 5), column_names_gp = gpar(fontsize = 1), column_title_gp = gpar(fontsize = 7, fontface = "bold"), top_annotation = ha_column) + col = list(type1 = c("magenta" = "magenta", "purple" = "purple"))) + ht1 = Heatmap(mat, name = "", col=colors, heatmap_legend_param = list(labels = sv_list_sub), cluster_rows = FALSE, cluster_columns = FALSE, column_title = inputfile, row_names_gp = gpar(fontsize = 5), column_names_gp = gpar(fontsize = 1), column_title_gp = gpar(fontsize = 7, fontface = "bold"), top_annotation = ha_column) draw(ht1, show_annotation_legend = FALSE) - dev.off() + dev.off() pdf(chromosome.outputfile, width = 11, height = 5 ) - data1<-read.table(inputfile, sep = '\t', header=T, comment.char = "") data1$color <- 0 ash12rainbow <- c("#77AADD", "#4477AA", "#114477", "#CC99BB", "#AA4488", "#771155", "#DDDD77", "#AAAA44", "#777711", "#DDAA77", "#AA7744", "#774411") sv_call_name <- c("del_h1", "del_h2", "del_hom", "dup_h1", "dup_h2", "dup_hom", "inv_h1", "inv_h2", "inv_hom", "idup_h1", "idup_h2", "complex") for (j in 1:length(sv_call_name)){ - tmp <- which(data1[,9]==sv_call_name[j]) - data1[tmp,15] <- j + tmp <- which(data1[,9]==sv_call_name[j]) + data1[tmp,15] <- j } colors = structure(c("white", "#77AADD", "#4477AA", "#114477", "#CC99BB", "#AA4488", "#771155", "#DDDD77", "#AAAA44", "#777711", "#DDAA77", "#AA7744", "#774411"), names = c(0:12)) @@ -130,7 +127,7 @@ plot.clustering <- function(inputfile, bin.bed.filename, position.outputfile, ch chrom <- c("chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10", "chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chr17", "chr18", "chr19", "chr20", "chr21", "chr22", "chrX") for (i in 2:length(chrom)){ - data1_pos_uniq_sort<-rbind(data1_pos_uniq_sort, data1_pos_uniq[data1_pos_uniq$chrom==chrom[i],]) + data1_pos_uniq_sort<-rbind(data1_pos_uniq_sort, data1_pos_uniq[data1_pos_uniq$chrom==chrom[i],]) } data1_pos_uniq_sort$posind <- c(1:nrow(data1_pos_uniq_sort)) @@ -142,10 +139,10 @@ plot.clustering <- function(inputfile, bin.bed.filename, position.outputfile, ch result_sv <- matrix(0, nrow(data1_cell_uniq_sort), nrow(data1_pos_uniq)) for (i in 1:nrow(data1)){ - pos_ind <- which(data1_pos_uniq_sort[,1]==data1[i,1] & data1_pos_uniq_sort[,2]==data1[i,2] & data1_pos_uniq_sort[,3]==data1[i,3]) - cell_ind <- which(data1_cell_uniq_sort[,1]==data1[i,5]) - result[cell_ind, pos_ind] <- data1[i,13] - result_sv[cell_ind, pos_ind] <- data1[i,15] + pos_ind <- which(data1_pos_uniq_sort[,1]==data1[i,1] & data1_pos_uniq_sort[,2]==data1[i,2] & data1_pos_uniq_sort[,3]==data1[i,3]) + cell_ind <- which(data1_cell_uniq_sort[,1]==data1[i,5]) + result[cell_ind, pos_ind] <- data1[i,13] + result_sv[cell_ind, pos_ind] <- data1[i,15] } rownames(result) <- data1_cell_uniq_sort colnames(result) <- data1_pos_uniq_sort$posind @@ -153,10 +150,6 @@ plot.clustering <- function(inputfile, bin.bed.filename, position.outputfile, ch rownames(result_sv) <- data1_cell_uniq_sort colnames(result_sv) <- data1_pos_uniq_sort$posind - - - library(pheatmap) - library(gplots) breaksList = seq(4, 30, by = 0.1) breaksList=append(breaksList, max(result)) breaksList=append(breaksList, -1, 0) @@ -164,28 +157,26 @@ plot.clustering <- function(inputfile, bin.bed.filename, position.outputfile, ch res<-pheatmap(result, border_color = NA, show_rownames=T, show_colnames=F, cluster_cols=F, cluster_rows=T, clustering_method="ward.D", scale="none", col=mycol, cex=0.5, main=inputfile) - - ##Assign sv calls to the genome-wide bins (200kb) par("mar") par(mar=c(0.5,0.5,0.5,0.5)) genome_bins_sort <- genome_bins[genome_bins[,1]==chrom[1],] for (i in 2:length(chrom)){ - genome_bins_sort <- rbind(genome_bins_sort, genome_bins[genome_bins[,1]==chrom[i],]) + genome_bins_sort <- rbind(genome_bins_sort, genome_bins[genome_bins[,1]==chrom[i],]) } result_sv_bins <- matrix(0, nrow(genome_bins_sort), nrow(result_sv)) for (i in 1:ncol(result_sv)){ - num_bins1 <- which(genome_bins_sort[,1]==as.character(data1_pos_uniq_sort[i,1]) & genome_bins_sort[,2]<=data1_pos_uniq_sort[i,2] & genome_bins_sort[,3]>data1_pos_uniq_sort[i,2]) - num_bins2 <- which(genome_bins_sort[,1]==as.character(data1_pos_uniq_sort[i,1]) & genome_bins_sort[,2]<=data1_pos_uniq_sort[i,3] & genome_bins_sort[,3]>data1_pos_uniq_sort[i,3]) - if (sum(genome_bins_sort[,1]==as.character(data1_pos_uniq_sort[i,1]) & genome_bins_sort[,2]<=data1_pos_uniq_sort[i,3] & genome_bins_sort[,3]>data1_pos_uniq_sort[i,3])==0){ - num_bins2 <- max(which(genome_bins_sort[,1]==as.character(data1_pos_uniq_sort[i,1]))) - } - - num_bins <- c(num_bins1:num_bins2) - for (j in 1:length(num_bins)){result_sv_bins[num_bins[j],] <- t(as.matrix(result_sv[,i]))} - cat(paste0(i, ' ')) + num_bins1 <- which(genome_bins_sort[,1]==as.character(data1_pos_uniq_sort[i,1]) & genome_bins_sort[,2]<=data1_pos_uniq_sort[i,2] & genome_bins_sort[,3]>data1_pos_uniq_sort[i,2]) + num_bins2 <- which(genome_bins_sort[,1]==as.character(data1_pos_uniq_sort[i,1]) & genome_bins_sort[,2]<=data1_pos_uniq_sort[i,3] & genome_bins_sort[,3]>data1_pos_uniq_sort[i,3]) + if (sum(genome_bins_sort[,1]==as.character(data1_pos_uniq_sort[i,1]) & genome_bins_sort[,2]<=data1_pos_uniq_sort[i,3] & genome_bins_sort[,3]>data1_pos_uniq_sort[i,3])==0){ + num_bins2 <- max(which(genome_bins_sort[,1]==as.character(data1_pos_uniq_sort[i,1]))) + } + + num_bins <- c(num_bins1:num_bins2) + for (j in 1:length(num_bins)){result_sv_bins[num_bins[j],] <- t(as.matrix(result_sv[,i]))} + cat(paste0(i, ' ')) } widths <- matrix(0, length(chrom), 1) @@ -196,13 +187,16 @@ plot.clustering <- function(inputfile, bin.bed.filename, position.outputfile, ch mat2 <- result_sv_bins[,res$tree_row$order] mat2 <- mat2[,ncol(mat2):1] dnull <- matrix(0, nrow(mat2), ncol(mat2)) - #par(mfrow=c(1, 23)) + l <- layout(matrix(seq(1,23),1,23,byrow=TRUE), widths=widths) - for (i in 1:length(chrom)){ - d = mat2[gos2$chrom==chrom[i],] - #if (sum(d)==1){image(seq_len(1), seq_len(length(d)), as.matrix(t(d)), zlim=c(0, 12), col=colors, xlab="", ylab="", axes=FALSE, main=chrom[i], cex.main=0.8);box() } - if (sum(d)==0){image(seq_len(nrow(dnull)), seq_len(ncol(dnull)), dnull, zlim=c(0, 12), col="white", xlab="", ylab="", axes=FALSE, main=chrom[i], cex.main=0.8);box() } - if (sum(d)>1){image(seq_len(nrow(d)), seq_len(ncol(d)), d, zlim=c(0, 12), col=colors, xlab="", ylab="", axes=FALSE, main=chrom[i], cex.main=0.8);box() } + for (i in 1:length(chrom)){ + d = mat2[gos2$chrom==chrom[i],] + if (sum(d)==0){ + image(seq_len(nrow(dnull)), seq_len(ncol(dnull)), dnull, zlim=c(0, 12), col="white", xlab="", ylab="", axes=FALSE, main=chrom[i], cex.main=0.8);box() + } + if (sum(d)>1){ + image(seq_len(nrow(d)), seq_len(ncol(d)), d, zlim=c(0, 12), col=colors, xlab="", ylab="", axes=FALSE, main=chrom[i], cex.main=0.8);box() + } } dev.off() -- GitLab