Some code formatting and cleanup

fd43409f · Tobias Marschall · b29fae87 · fd43409f
Commit fd43409f authored 6 years ago by Tobias Marschall
--- a/utils/plot-clustering.R
+++ b/utils/plot-clustering.R

 plot.clustering <- function(inputfile, bin.bed.filename, position.outputfile, chromosome.outputfile) {

-# 	setwd("/Users/jeong/Documents/Strand_Seq/TALL_analysis/sv_calls_new_1008") ##This folder need to be changed ( /path/to/sv_calls )
 	genome_bins <- read.table(bin.bed.filename, sep = '\t', header=F, comment.char = "")
 	list_directory <- dir("./", full.names=TRUE)

-#	files <- list.files(paste0(list_directory[m], '/100000_fixed_norm.selected_j0.1_s0.5'), pattern=".txt$", full.names=TRUE)
-#	plotname<-paste0("Mosaic_plot_", strsplit(list_directory[m], './/')[[1]][2], "_position.pdf")
 	pdf(position.outputfile, width = 11, height = 10 )


-# 	filename<-strsplit(files[k], './/')[[1]][2]
 	data1<-read.table(inputfile, sep = '\t', header=T, comment.char = "")

-
 	data1$color <- 0
 	ash12rainbow <- c("#77AADD", "#4477AA", "#114477", "#CC99BB", "#AA4488", "#771155", "#DDDD77", "#AAAA44", "#777711", "#DDAA77", "#AA7744", "#774411")
 	sv_call_name <- c("del_h1",  "del_h2",  "del_hom", "dup_h1",  "dup_h2",  "dup_hom", "inv_h1",  "inv_h2",  "inv_hom", "idup_h1", "idup_h2", "complex")
+
 	for (j in 1:length(sv_call_name)){
-	tmp <- which(data1[,9]==sv_call_name[j])
-	data1[tmp,15] <- j
+		tmp <- which(data1[,9]==sv_call_name[j])
+		data1[tmp,15] <- j
 	}

-
-
 	data1_pos <- data1[,1:3]
 	data1_pos_uniq <- unique(data1_pos)
 	data1_pos_uniq_sort <- data1_pos_uniq[data1_pos_uniq$chrom=="chr1",]
 	chrom <- c("chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10", "chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chr17", "chr18", "chr19", "chr20", "chr21", "chr22", "chrX")

 	for (i in 2:length(chrom)){
-	data1_pos_uniq_sort<-rbind(data1_pos_uniq_sort, data1_pos_uniq[data1_pos_uniq$chrom==chrom[i],])
+		data1_pos_uniq_sort<-rbind(data1_pos_uniq_sort, data1_pos_uniq[data1_pos_uniq$chrom==chrom[i],])
 	}
+
 	data1_pos_uniq_sort$posind <- c(1:nrow(data1_pos_uniq_sort))

 	data1_cell <- data1[,5]
@@ -42,11 +37,12 @@ plot.clustering <- function(inputfile, bin.bed.filename, position.outputfile, ch
 	result_sv <- matrix(0, nrow(data1_cell_uniq_sort), nrow(data1_pos_uniq))

 	for (i in 1:nrow(data1)){
-	pos_ind <- which(data1_pos_uniq_sort[,1]==data1[i,1] & data1_pos_uniq_sort[,2]==data1[i,2] & data1_pos_uniq_sort[,3]==data1[i,3])
-	cell_ind <- which(data1_cell_uniq_sort[,1]==data1[i,5])
-	result[cell_ind, pos_ind] <- data1[i,13]
-	result_sv[cell_ind, pos_ind] <- data1[i,15]
+		pos_ind <- which(data1_pos_uniq_sort[,1]==data1[i,1] & data1_pos_uniq_sort[,2]==data1[i,2] & data1_pos_uniq_sort[,3]==data1[i,3])
+		cell_ind <- which(data1_cell_uniq_sort[,1]==data1[i,5])
+		result[cell_ind, pos_ind] <- data1[i,13]
+		result_sv[cell_ind, pos_ind] <- data1[i,15]
 	}
+
 	rownames(result) <- data1_cell_uniq_sort
 	colnames(result) <- data1_pos_uniq_sort$posind
 	result[result==Inf] <- max(result[result!=Inf])
@@ -72,6 +68,7 @@ plot.clustering <- function(inputfile, bin.bed.filename, position.outputfile, ch
 	library(pheatmap)
 	library(gplots)
 	library(ComplexHeatmap)
+
 	breaksList = seq(4, 30, by = 0.1)
 	breaksList=append(breaksList, max(result))
 	breaksList=append(breaksList, -1, 0)
@@ -86,40 +83,40 @@ plot.clustering <- function(inputfile, bin.bed.filename, position.outputfile, ch


 	res<-pheatmap(result, border_color = NA, show_rownames=T, show_colnames=F, cluster_cols=F, cluster_rows=T, clustering_method="ward.D", scale="none", col=mycol, cex=0.7, main=inputfile, annotation_col = col_annotation, annotation_colors = anno_colors, breaks = breaksList, annotation_legend = FALSE)
-	#pheatmap(result_sv[res$tree_row$order,], border_color = NA, show_rownames=T, show_colnames=F, cluster_cols=F, cluster_rows=F, scale="none", cex=0.7, col=c("white", "#77AADD", "#4477AA", "#114477", "#CC99BB", "#AA4488", "#771155", "#DDDD77", "#AAAA44", "#777711", "#DDAA77", "#AA7744", "#774411"), main=filename, annotation_col = col_annotation, annotation_colors = anno_colors, annotation_legend = FALSE, legend=TRUE, legend_labels <- c("none", "del_h1",  "del_h2",  "del_hom", "dup_h1",  "dup_h2",  "dup_hom", "inv_h1",  "inv_h2",  "inv_hom", "idup_h1", "idup_h2", "complex"))

 	chr_name <- matrix("chr", ncol(result), 1)
 	for (i in 1:ncol(result)){
-	chr_name[i,1] <- paste0(data1_pos_uniq_sort[i,1], '_', data1_pos_uniq_sort[i,2], '_', data1_pos_uniq_sort[i,3])
+		chr_name[i,1] <- paste0(data1_pos_uniq_sort[i,1], '_', data1_pos_uniq_sort[i,2], '_', data1_pos_uniq_sort[i,3])
 	}
+
 	colnames(result_sv) <- chr_name
 	colors = structure(c("white", "#77AADD", "#4477AA", "#114477", "#CC99BB", "#AA4488", "#771155", "#DDDD77", "#AAAA44", "#777711", "#DDAA77", "#AA7744", "#774411"), names = c(0:12))

 	sv_list = c("none", "del_h1",  "del_h2",  "del_hom", "dup_h1",  "dup_h2",  "dup_hom", "inv_h1",  "inv_h2",  "inv_hom", "idup_h1", "idup_h2", "complex")
 	sv_tmp <- matrix(0, 13, 1)
-	for (i in 1:13){sv_tmp[i,1] <- sum(result_sv==(i-1)) }
+	for (i in 1:13){
+		sv_tmp[i,1] <- sum(result_sv==(i-1))
+	}
 	sv_list_sub <- sv_list[sv_tmp>0]

 	mat <- as.data.frame(result_sv[res$tree_row$order,])
 	ha_column = HeatmapAnnotation(df = data.frame(type1 = data1_pos_uniq_sort$color),
-		col = list(type1 = c("magenta" =  "magenta", "purple" = "purple")))
-	#ht1 = Heatmap(mat, name = "", col=c("white", "#77AADD", "#4477AA", "#114477", "#CC99BB", "#AA4488", "#771155", "#DDDD77", "#AAAA44", "#777711", "#DDAA77", "#AA7744", "#774411"), heatmap_legend_param = list(labels = c("none", "del_h1",  "del_h2",  "del_hom", "dup_h1",  "dup_h2",  "dup_hom", "inv_h1",  "inv_h2",  "inv_hom", "idup_h1", "idup_h2", "complex")),cluster_rows = FALSE, cluster_columns = FALSE, column_title = filename, row_names_gp = gpar(fontsize = 5), column_names_gp = gpar(fontsize = 3), column_title_gp = gpar(fontsize = 7, fontface = "bold"), top_annotation = ha_column)
-	ht1 = Heatmap(mat, name = "", col=colors, heatmap_legend_param = list(labels = sv_list_sub),cluster_rows = FALSE, cluster_columns = FALSE, column_title = inputfile, row_names_gp = gpar(fontsize = 5), column_names_gp = gpar(fontsize = 1), column_title_gp = gpar(fontsize = 7, fontface = "bold"), top_annotation = ha_column)
+	col = list(type1 = c("magenta" =  "magenta", "purple" = "purple")))
+	ht1 = Heatmap(mat, name = "", col=colors, heatmap_legend_param = list(labels = sv_list_sub), cluster_rows = FALSE, cluster_columns = FALSE, column_title = inputfile, row_names_gp = gpar(fontsize = 5), column_names_gp = gpar(fontsize = 1), column_title_gp = gpar(fontsize = 7, fontface = "bold"), top_annotation = ha_column)
 	draw(ht1, show_annotation_legend = FALSE)

-	dev.off()	
+	dev.off()

 	pdf(chromosome.outputfile, width = 11, height = 5 )

-
 	data1<-read.table(inputfile, sep = '\t', header=T, comment.char = "")

 	data1$color <- 0
 	ash12rainbow <- c("#77AADD", "#4477AA", "#114477", "#CC99BB", "#AA4488", "#771155", "#DDDD77", "#AAAA44", "#777711", "#DDAA77", "#AA7744", "#774411")
 	sv_call_name <- c("del_h1",  "del_h2",  "del_hom", "dup_h1",  "dup_h2",  "dup_hom", "inv_h1",  "inv_h2",  "inv_hom", "idup_h1", "idup_h2", "complex")
 	for (j in 1:length(sv_call_name)){
-	tmp <- which(data1[,9]==sv_call_name[j])
-	data1[tmp,15] <- j
+		tmp <- which(data1[,9]==sv_call_name[j])
+		data1[tmp,15] <- j
 	}
 	colors = structure(c("white", "#77AADD", "#4477AA", "#114477", "#CC99BB", "#AA4488", "#771155", "#DDDD77", "#AAAA44", "#777711", "#DDAA77", "#AA7744", "#774411"), names = c(0:12))

@@ -130,7 +127,7 @@ plot.clustering <- function(inputfile, bin.bed.filename, position.outputfile, ch
 	chrom <- c("chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10", "chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chr17", "chr18", "chr19", "chr20", "chr21", "chr22", "chrX")

 	for (i in 2:length(chrom)){
-	data1_pos_uniq_sort<-rbind(data1_pos_uniq_sort, data1_pos_uniq[data1_pos_uniq$chrom==chrom[i],])
+		data1_pos_uniq_sort<-rbind(data1_pos_uniq_sort, data1_pos_uniq[data1_pos_uniq$chrom==chrom[i],])
 	}
 	data1_pos_uniq_sort$posind <- c(1:nrow(data1_pos_uniq_sort))

@@ -142,10 +139,10 @@ plot.clustering <- function(inputfile, bin.bed.filename, position.outputfile, ch
 	result_sv <- matrix(0, nrow(data1_cell_uniq_sort), nrow(data1_pos_uniq))

 	for (i in 1:nrow(data1)){
-	pos_ind <- which(data1_pos_uniq_sort[,1]==data1[i,1] & data1_pos_uniq_sort[,2]==data1[i,2] & data1_pos_uniq_sort[,3]==data1[i,3])
-	cell_ind <- which(data1_cell_uniq_sort[,1]==data1[i,5])
-	result[cell_ind, pos_ind] <- data1[i,13]
-	result_sv[cell_ind, pos_ind] <- data1[i,15]
+		pos_ind <- which(data1_pos_uniq_sort[,1]==data1[i,1] & data1_pos_uniq_sort[,2]==data1[i,2] & data1_pos_uniq_sort[,3]==data1[i,3])
+		cell_ind <- which(data1_cell_uniq_sort[,1]==data1[i,5])
+		result[cell_ind, pos_ind] <- data1[i,13]
+		result_sv[cell_ind, pos_ind] <- data1[i,15]
 	}
 	rownames(result) <- data1_cell_uniq_sort
 	colnames(result) <- data1_pos_uniq_sort$posind
@@ -153,10 +150,6 @@ plot.clustering <- function(inputfile, bin.bed.filename, position.outputfile, ch
 	rownames(result_sv) <- data1_cell_uniq_sort
 	colnames(result_sv) <- data1_pos_uniq_sort$posind

-
-
-	library(pheatmap)
-	library(gplots)
 	breaksList = seq(4, 30, by = 0.1)
 	breaksList=append(breaksList, max(result))
 	breaksList=append(breaksList, -1, 0)
@@ -164,28 +157,26 @@ plot.clustering <- function(inputfile, bin.bed.filename, position.outputfile, ch

 	res<-pheatmap(result, border_color = NA, show_rownames=T, show_colnames=F, cluster_cols=F, cluster_rows=T, clustering_method="ward.D", scale="none", col=mycol, cex=0.5, main=inputfile)

-
-
 	##Assign sv calls to the genome-wide bins (200kb)
 	par("mar")
 	par(mar=c(0.5,0.5,0.5,0.5))

 	genome_bins_sort <- genome_bins[genome_bins[,1]==chrom[1],]
 	for (i in 2:length(chrom)){
-	genome_bins_sort <- rbind(genome_bins_sort, genome_bins[genome_bins[,1]==chrom[i],])
+		genome_bins_sort <- rbind(genome_bins_sort, genome_bins[genome_bins[,1]==chrom[i],])
 	}

 	result_sv_bins <- matrix(0, nrow(genome_bins_sort), nrow(result_sv))
 	for (i in 1:ncol(result_sv)){
-	num_bins1 <- which(genome_bins_sort[,1]==as.character(data1_pos_uniq_sort[i,1]) & genome_bins_sort[,2]<=data1_pos_uniq_sort[i,2] & genome_bins_sort[,3]>data1_pos_uniq_sort[i,2])
-	num_bins2 <- which(genome_bins_sort[,1]==as.character(data1_pos_uniq_sort[i,1]) & genome_bins_sort[,2]<=data1_pos_uniq_sort[i,3] & genome_bins_sort[,3]>data1_pos_uniq_sort[i,3])
-	if (sum(genome_bins_sort[,1]==as.character(data1_pos_uniq_sort[i,1]) & genome_bins_sort[,2]<=data1_pos_uniq_sort[i,3] & genome_bins_sort[,3]>data1_pos_uniq_sort[i,3])==0){
-	num_bins2 <- max(which(genome_bins_sort[,1]==as.character(data1_pos_uniq_sort[i,1])))
-	}
-
-	num_bins <- c(num_bins1:num_bins2)
-	for (j in 1:length(num_bins)){result_sv_bins[num_bins[j],] <- t(as.matrix(result_sv[,i]))}
-	cat(paste0(i, ' '))
+		num_bins1 <- which(genome_bins_sort[,1]==as.character(data1_pos_uniq_sort[i,1]) & genome_bins_sort[,2]<=data1_pos_uniq_sort[i,2] & genome_bins_sort[,3]>data1_pos_uniq_sort[i,2])
+		num_bins2 <- which(genome_bins_sort[,1]==as.character(data1_pos_uniq_sort[i,1]) & genome_bins_sort[,2]<=data1_pos_uniq_sort[i,3] & genome_bins_sort[,3]>data1_pos_uniq_sort[i,3])
+		if (sum(genome_bins_sort[,1]==as.character(data1_pos_uniq_sort[i,1]) & genome_bins_sort[,2]<=data1_pos_uniq_sort[i,3] & genome_bins_sort[,3]>data1_pos_uniq_sort[i,3])==0){
+			num_bins2 <- max(which(genome_bins_sort[,1]==as.character(data1_pos_uniq_sort[i,1])))
+		}
+
+		num_bins <- c(num_bins1:num_bins2)
+		for (j in 1:length(num_bins)){result_sv_bins[num_bins[j],] <- t(as.matrix(result_sv[,i]))}
+		cat(paste0(i, ' '))
 	}

 	widths <- matrix(0, length(chrom), 1)
@@ -196,13 +187,16 @@ plot.clustering <- function(inputfile, bin.bed.filename, position.outputfile, ch
 	mat2 <- result_sv_bins[,res$tree_row$order]
 	mat2 <- mat2[,ncol(mat2):1]
 	dnull <- matrix(0, nrow(mat2), ncol(mat2))
-	#par(mfrow=c(1, 23))  
+
 	l <- layout(matrix(seq(1,23),1,23,byrow=TRUE), widths=widths)      
-	for (i in 1:length(chrom)){        
-	d = mat2[gos2$chrom==chrom[i],]  
-	#if (sum(d)==1){image(seq_len(1), seq_len(length(d)), as.matrix(t(d)), zlim=c(0, 12), col=colors, xlab="", ylab="", axes=FALSE, main=chrom[i], cex.main=0.8);box()   }
-	if (sum(d)==0){image(seq_len(nrow(dnull)), seq_len(ncol(dnull)), dnull, zlim=c(0, 12), col="white", xlab="", ylab="", axes=FALSE, main=chrom[i], cex.main=0.8);box()   }
-	if (sum(d)>1){image(seq_len(nrow(d)), seq_len(ncol(d)), d, zlim=c(0, 12), col=colors, xlab="", ylab="", axes=FALSE, main=chrom[i], cex.main=0.8);box()   }
+	for (i in 1:length(chrom)){
+		d = mat2[gos2$chrom==chrom[i],]
+		if (sum(d)==0){
+			image(seq_len(nrow(dnull)), seq_len(ncol(dnull)), dnull, zlim=c(0, 12), col="white", xlab="", ylab="", axes=FALSE, main=chrom[i], cex.main=0.8);box()
+		}
+		if (sum(d)>1){
+			image(seq_len(nrow(d)), seq_len(ncol(d)), d, zlim=c(0, 12), col=colors, xlab="", ylab="", axes=FALSE, main=chrom[i], cex.main=0.8);box()
+		}
 	}     

 	dev.off()