Skip to content
Snippets Groups Projects
Commit fd43409f authored by Tobias Marschall's avatar Tobias Marschall
Browse files

Some code formatting and cleanup

parent b29fae87
No related branches found
No related tags found
No related merge requests found
plot.clustering <- function(inputfile, bin.bed.filename, position.outputfile, chromosome.outputfile) {
# setwd("/Users/jeong/Documents/Strand_Seq/TALL_analysis/sv_calls_new_1008") ##This folder need to be changed ( /path/to/sv_calls )
genome_bins <- read.table(bin.bed.filename, sep = '\t', header=F, comment.char = "")
list_directory <- dir("./", full.names=TRUE)
# files <- list.files(paste0(list_directory[m], '/100000_fixed_norm.selected_j0.1_s0.5'), pattern=".txt$", full.names=TRUE)
# plotname<-paste0("Mosaic_plot_", strsplit(list_directory[m], './/')[[1]][2], "_position.pdf")
pdf(position.outputfile, width = 11, height = 10 )
# filename<-strsplit(files[k], './/')[[1]][2]
data1<-read.table(inputfile, sep = '\t', header=T, comment.char = "")
data1$color <- 0
ash12rainbow <- c("#77AADD", "#4477AA", "#114477", "#CC99BB", "#AA4488", "#771155", "#DDDD77", "#AAAA44", "#777711", "#DDAA77", "#AA7744", "#774411")
sv_call_name <- c("del_h1", "del_h2", "del_hom", "dup_h1", "dup_h2", "dup_hom", "inv_h1", "inv_h2", "inv_hom", "idup_h1", "idup_h2", "complex")
for (j in 1:length(sv_call_name)){
tmp <- which(data1[,9]==sv_call_name[j])
data1[tmp,15] <- j
tmp <- which(data1[,9]==sv_call_name[j])
data1[tmp,15] <- j
}
data1_pos <- data1[,1:3]
data1_pos_uniq <- unique(data1_pos)
data1_pos_uniq_sort <- data1_pos_uniq[data1_pos_uniq$chrom=="chr1",]
chrom <- c("chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10", "chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chr17", "chr18", "chr19", "chr20", "chr21", "chr22", "chrX")
for (i in 2:length(chrom)){
data1_pos_uniq_sort<-rbind(data1_pos_uniq_sort, data1_pos_uniq[data1_pos_uniq$chrom==chrom[i],])
data1_pos_uniq_sort<-rbind(data1_pos_uniq_sort, data1_pos_uniq[data1_pos_uniq$chrom==chrom[i],])
}
data1_pos_uniq_sort$posind <- c(1:nrow(data1_pos_uniq_sort))
data1_cell <- data1[,5]
......@@ -42,11 +37,12 @@ plot.clustering <- function(inputfile, bin.bed.filename, position.outputfile, ch
result_sv <- matrix(0, nrow(data1_cell_uniq_sort), nrow(data1_pos_uniq))
for (i in 1:nrow(data1)){
pos_ind <- which(data1_pos_uniq_sort[,1]==data1[i,1] & data1_pos_uniq_sort[,2]==data1[i,2] & data1_pos_uniq_sort[,3]==data1[i,3])
cell_ind <- which(data1_cell_uniq_sort[,1]==data1[i,5])
result[cell_ind, pos_ind] <- data1[i,13]
result_sv[cell_ind, pos_ind] <- data1[i,15]
pos_ind <- which(data1_pos_uniq_sort[,1]==data1[i,1] & data1_pos_uniq_sort[,2]==data1[i,2] & data1_pos_uniq_sort[,3]==data1[i,3])
cell_ind <- which(data1_cell_uniq_sort[,1]==data1[i,5])
result[cell_ind, pos_ind] <- data1[i,13]
result_sv[cell_ind, pos_ind] <- data1[i,15]
}
rownames(result) <- data1_cell_uniq_sort
colnames(result) <- data1_pos_uniq_sort$posind
result[result==Inf] <- max(result[result!=Inf])
......@@ -72,6 +68,7 @@ plot.clustering <- function(inputfile, bin.bed.filename, position.outputfile, ch
library(pheatmap)
library(gplots)
library(ComplexHeatmap)
breaksList = seq(4, 30, by = 0.1)
breaksList=append(breaksList, max(result))
breaksList=append(breaksList, -1, 0)
......@@ -86,40 +83,40 @@ plot.clustering <- function(inputfile, bin.bed.filename, position.outputfile, ch
res<-pheatmap(result, border_color = NA, show_rownames=T, show_colnames=F, cluster_cols=F, cluster_rows=T, clustering_method="ward.D", scale="none", col=mycol, cex=0.7, main=inputfile, annotation_col = col_annotation, annotation_colors = anno_colors, breaks = breaksList, annotation_legend = FALSE)
#pheatmap(result_sv[res$tree_row$order,], border_color = NA, show_rownames=T, show_colnames=F, cluster_cols=F, cluster_rows=F, scale="none", cex=0.7, col=c("white", "#77AADD", "#4477AA", "#114477", "#CC99BB", "#AA4488", "#771155", "#DDDD77", "#AAAA44", "#777711", "#DDAA77", "#AA7744", "#774411"), main=filename, annotation_col = col_annotation, annotation_colors = anno_colors, annotation_legend = FALSE, legend=TRUE, legend_labels <- c("none", "del_h1", "del_h2", "del_hom", "dup_h1", "dup_h2", "dup_hom", "inv_h1", "inv_h2", "inv_hom", "idup_h1", "idup_h2", "complex"))
chr_name <- matrix("chr", ncol(result), 1)
for (i in 1:ncol(result)){
chr_name[i,1] <- paste0(data1_pos_uniq_sort[i,1], '_', data1_pos_uniq_sort[i,2], '_', data1_pos_uniq_sort[i,3])
chr_name[i,1] <- paste0(data1_pos_uniq_sort[i,1], '_', data1_pos_uniq_sort[i,2], '_', data1_pos_uniq_sort[i,3])
}
colnames(result_sv) <- chr_name
colors = structure(c("white", "#77AADD", "#4477AA", "#114477", "#CC99BB", "#AA4488", "#771155", "#DDDD77", "#AAAA44", "#777711", "#DDAA77", "#AA7744", "#774411"), names = c(0:12))
sv_list = c("none", "del_h1", "del_h2", "del_hom", "dup_h1", "dup_h2", "dup_hom", "inv_h1", "inv_h2", "inv_hom", "idup_h1", "idup_h2", "complex")
sv_tmp <- matrix(0, 13, 1)
for (i in 1:13){sv_tmp[i,1] <- sum(result_sv==(i-1)) }
for (i in 1:13){
sv_tmp[i,1] <- sum(result_sv==(i-1))
}
sv_list_sub <- sv_list[sv_tmp>0]
mat <- as.data.frame(result_sv[res$tree_row$order,])
ha_column = HeatmapAnnotation(df = data.frame(type1 = data1_pos_uniq_sort$color),
col = list(type1 = c("magenta" = "magenta", "purple" = "purple")))
#ht1 = Heatmap(mat, name = "", col=c("white", "#77AADD", "#4477AA", "#114477", "#CC99BB", "#AA4488", "#771155", "#DDDD77", "#AAAA44", "#777711", "#DDAA77", "#AA7744", "#774411"), heatmap_legend_param = list(labels = c("none", "del_h1", "del_h2", "del_hom", "dup_h1", "dup_h2", "dup_hom", "inv_h1", "inv_h2", "inv_hom", "idup_h1", "idup_h2", "complex")),cluster_rows = FALSE, cluster_columns = FALSE, column_title = filename, row_names_gp = gpar(fontsize = 5), column_names_gp = gpar(fontsize = 3), column_title_gp = gpar(fontsize = 7, fontface = "bold"), top_annotation = ha_column)
ht1 = Heatmap(mat, name = "", col=colors, heatmap_legend_param = list(labels = sv_list_sub),cluster_rows = FALSE, cluster_columns = FALSE, column_title = inputfile, row_names_gp = gpar(fontsize = 5), column_names_gp = gpar(fontsize = 1), column_title_gp = gpar(fontsize = 7, fontface = "bold"), top_annotation = ha_column)
col = list(type1 = c("magenta" = "magenta", "purple" = "purple")))
ht1 = Heatmap(mat, name = "", col=colors, heatmap_legend_param = list(labels = sv_list_sub), cluster_rows = FALSE, cluster_columns = FALSE, column_title = inputfile, row_names_gp = gpar(fontsize = 5), column_names_gp = gpar(fontsize = 1), column_title_gp = gpar(fontsize = 7, fontface = "bold"), top_annotation = ha_column)
draw(ht1, show_annotation_legend = FALSE)
dev.off()
dev.off()
pdf(chromosome.outputfile, width = 11, height = 5 )
data1<-read.table(inputfile, sep = '\t', header=T, comment.char = "")
data1$color <- 0
ash12rainbow <- c("#77AADD", "#4477AA", "#114477", "#CC99BB", "#AA4488", "#771155", "#DDDD77", "#AAAA44", "#777711", "#DDAA77", "#AA7744", "#774411")
sv_call_name <- c("del_h1", "del_h2", "del_hom", "dup_h1", "dup_h2", "dup_hom", "inv_h1", "inv_h2", "inv_hom", "idup_h1", "idup_h2", "complex")
for (j in 1:length(sv_call_name)){
tmp <- which(data1[,9]==sv_call_name[j])
data1[tmp,15] <- j
tmp <- which(data1[,9]==sv_call_name[j])
data1[tmp,15] <- j
}
colors = structure(c("white", "#77AADD", "#4477AA", "#114477", "#CC99BB", "#AA4488", "#771155", "#DDDD77", "#AAAA44", "#777711", "#DDAA77", "#AA7744", "#774411"), names = c(0:12))
......@@ -130,7 +127,7 @@ plot.clustering <- function(inputfile, bin.bed.filename, position.outputfile, ch
chrom <- c("chr1", "chr2", "chr3", "chr4", "chr5", "chr6", "chr7", "chr8", "chr9", "chr10", "chr11", "chr12", "chr13", "chr14", "chr15", "chr16", "chr17", "chr18", "chr19", "chr20", "chr21", "chr22", "chrX")
for (i in 2:length(chrom)){
data1_pos_uniq_sort<-rbind(data1_pos_uniq_sort, data1_pos_uniq[data1_pos_uniq$chrom==chrom[i],])
data1_pos_uniq_sort<-rbind(data1_pos_uniq_sort, data1_pos_uniq[data1_pos_uniq$chrom==chrom[i],])
}
data1_pos_uniq_sort$posind <- c(1:nrow(data1_pos_uniq_sort))
......@@ -142,10 +139,10 @@ plot.clustering <- function(inputfile, bin.bed.filename, position.outputfile, ch
result_sv <- matrix(0, nrow(data1_cell_uniq_sort), nrow(data1_pos_uniq))
for (i in 1:nrow(data1)){
pos_ind <- which(data1_pos_uniq_sort[,1]==data1[i,1] & data1_pos_uniq_sort[,2]==data1[i,2] & data1_pos_uniq_sort[,3]==data1[i,3])
cell_ind <- which(data1_cell_uniq_sort[,1]==data1[i,5])
result[cell_ind, pos_ind] <- data1[i,13]
result_sv[cell_ind, pos_ind] <- data1[i,15]
pos_ind <- which(data1_pos_uniq_sort[,1]==data1[i,1] & data1_pos_uniq_sort[,2]==data1[i,2] & data1_pos_uniq_sort[,3]==data1[i,3])
cell_ind <- which(data1_cell_uniq_sort[,1]==data1[i,5])
result[cell_ind, pos_ind] <- data1[i,13]
result_sv[cell_ind, pos_ind] <- data1[i,15]
}
rownames(result) <- data1_cell_uniq_sort
colnames(result) <- data1_pos_uniq_sort$posind
......@@ -153,10 +150,6 @@ plot.clustering <- function(inputfile, bin.bed.filename, position.outputfile, ch
rownames(result_sv) <- data1_cell_uniq_sort
colnames(result_sv) <- data1_pos_uniq_sort$posind
library(pheatmap)
library(gplots)
breaksList = seq(4, 30, by = 0.1)
breaksList=append(breaksList, max(result))
breaksList=append(breaksList, -1, 0)
......@@ -164,28 +157,26 @@ plot.clustering <- function(inputfile, bin.bed.filename, position.outputfile, ch
res<-pheatmap(result, border_color = NA, show_rownames=T, show_colnames=F, cluster_cols=F, cluster_rows=T, clustering_method="ward.D", scale="none", col=mycol, cex=0.5, main=inputfile)
##Assign sv calls to the genome-wide bins (200kb)
par("mar")
par(mar=c(0.5,0.5,0.5,0.5))
genome_bins_sort <- genome_bins[genome_bins[,1]==chrom[1],]
for (i in 2:length(chrom)){
genome_bins_sort <- rbind(genome_bins_sort, genome_bins[genome_bins[,1]==chrom[i],])
genome_bins_sort <- rbind(genome_bins_sort, genome_bins[genome_bins[,1]==chrom[i],])
}
result_sv_bins <- matrix(0, nrow(genome_bins_sort), nrow(result_sv))
for (i in 1:ncol(result_sv)){
num_bins1 <- which(genome_bins_sort[,1]==as.character(data1_pos_uniq_sort[i,1]) & genome_bins_sort[,2]<=data1_pos_uniq_sort[i,2] & genome_bins_sort[,3]>data1_pos_uniq_sort[i,2])
num_bins2 <- which(genome_bins_sort[,1]==as.character(data1_pos_uniq_sort[i,1]) & genome_bins_sort[,2]<=data1_pos_uniq_sort[i,3] & genome_bins_sort[,3]>data1_pos_uniq_sort[i,3])
if (sum(genome_bins_sort[,1]==as.character(data1_pos_uniq_sort[i,1]) & genome_bins_sort[,2]<=data1_pos_uniq_sort[i,3] & genome_bins_sort[,3]>data1_pos_uniq_sort[i,3])==0){
num_bins2 <- max(which(genome_bins_sort[,1]==as.character(data1_pos_uniq_sort[i,1])))
}
num_bins <- c(num_bins1:num_bins2)
for (j in 1:length(num_bins)){result_sv_bins[num_bins[j],] <- t(as.matrix(result_sv[,i]))}
cat(paste0(i, ' '))
num_bins1 <- which(genome_bins_sort[,1]==as.character(data1_pos_uniq_sort[i,1]) & genome_bins_sort[,2]<=data1_pos_uniq_sort[i,2] & genome_bins_sort[,3]>data1_pos_uniq_sort[i,2])
num_bins2 <- which(genome_bins_sort[,1]==as.character(data1_pos_uniq_sort[i,1]) & genome_bins_sort[,2]<=data1_pos_uniq_sort[i,3] & genome_bins_sort[,3]>data1_pos_uniq_sort[i,3])
if (sum(genome_bins_sort[,1]==as.character(data1_pos_uniq_sort[i,1]) & genome_bins_sort[,2]<=data1_pos_uniq_sort[i,3] & genome_bins_sort[,3]>data1_pos_uniq_sort[i,3])==0){
num_bins2 <- max(which(genome_bins_sort[,1]==as.character(data1_pos_uniq_sort[i,1])))
}
num_bins <- c(num_bins1:num_bins2)
for (j in 1:length(num_bins)){result_sv_bins[num_bins[j],] <- t(as.matrix(result_sv[,i]))}
cat(paste0(i, ' '))
}
widths <- matrix(0, length(chrom), 1)
......@@ -196,13 +187,16 @@ plot.clustering <- function(inputfile, bin.bed.filename, position.outputfile, ch
mat2 <- result_sv_bins[,res$tree_row$order]
mat2 <- mat2[,ncol(mat2):1]
dnull <- matrix(0, nrow(mat2), ncol(mat2))
#par(mfrow=c(1, 23))
l <- layout(matrix(seq(1,23),1,23,byrow=TRUE), widths=widths)
for (i in 1:length(chrom)){
d = mat2[gos2$chrom==chrom[i],]
#if (sum(d)==1){image(seq_len(1), seq_len(length(d)), as.matrix(t(d)), zlim=c(0, 12), col=colors, xlab="", ylab="", axes=FALSE, main=chrom[i], cex.main=0.8);box() }
if (sum(d)==0){image(seq_len(nrow(dnull)), seq_len(ncol(dnull)), dnull, zlim=c(0, 12), col="white", xlab="", ylab="", axes=FALSE, main=chrom[i], cex.main=0.8);box() }
if (sum(d)>1){image(seq_len(nrow(d)), seq_len(ncol(d)), d, zlim=c(0, 12), col=colors, xlab="", ylab="", axes=FALSE, main=chrom[i], cex.main=0.8);box() }
for (i in 1:length(chrom)){
d = mat2[gos2$chrom==chrom[i],]
if (sum(d)==0){
image(seq_len(nrow(dnull)), seq_len(ncol(dnull)), dnull, zlim=c(0, 12), col="white", xlab="", ylab="", axes=FALSE, main=chrom[i], cex.main=0.8);box()
}
if (sum(d)>1){
image(seq_len(nrow(d)), seq_len(ncol(d)), d, zlim=c(0, 12), col=colors, xlab="", ylab="", axes=FALSE, main=chrom[i], cex.main=0.8);box()
}
}
dev.off()
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment