Commit 99d19446 authored by Bernd Klaus's avatar Bernd Klaus

finished data cleaning and heatmap / clustering section

parent 1d4b4371
......@@ -329,7 +329,8 @@ compute_pca <- function(data_mat, ntop = 500, ...){
return(list(pca = data.frame(PC1 = PCA$x[,1], PC2 = PCA$x[,2],
PC3 = PCA$x[,3], PC4 = PCA$x[,4], ...),
perc_var = percentVar))
perc_var = percentVar,
selected_vars = select))
}
......@@ -376,22 +377,48 @@ pca_plot_2 <- ggplot(pca$pca, aes(x = PC2, y = PC3,
pca_plot_2
## ----heatmap, fig.height = 5, eval=TRUE----------------------------------
dists <- as.matrix(get_dist(t(counts_norm_vst_crc), method = "pearson"))
diag(dists) <- NA
create_dist_mat <- function(data, anno, method = "pearson"){
dists <- as.matrix(get_dist(t(data), method = method))
diag(dists) <- NA
rownames(dists) <- anno
colnames(dists) <- anno
return(dists)
}
if(!("cl_anno" %in% colnames(col_data_crc))){
col_data_crc <- unite(col_data_crc, cl_anno, patient, tissue, remove = FALSE)
}
rownames(dists) <- col_data_crc$cl_anno
colnames(dists) <- col_data_crc$cl_anno
hmcol <- colorRampPalette(brewer.pal(9, "GnBu"))(100)
pheatmap(dists, trace="none", col = rev(hmcol))
pheatmap(create_dist_mat(counts_norm_vst_crc,
anno = col_data_crc$cl_anno,
method = "euclidian"),
trace="none", col = rev(hmcol))
## ----heatmap_pc_regression-----------------------------------------------
## ----pc_regression, warning=FALSE----------------------------------------
lm_pc1 <- lm(t(counts_norm_vst_crc) ~ pca$pca$PC1)
gene_means <- tidy(lm_pc1) %>%
filter(term == "(Intercept)") %>%
select(response, estimate)
cleaned_data <- t(residuals(lm_pc1)) + gene_means$estimate
# sanity check whether gene wise means are equal
all.equal(rowMeans(cleaned_data), rowMeans(counts_norm_vst_crc))
## ----heatmapCleanedData--------------------------------------------------
hmcol <- colorRampPalette(brewer.pal(9, "RdPu"))(255)
pheatmap(create_dist_mat(cleaned_data,
anno = col_data_crc$cl_anno,
method = "euclidean"),
trace="none", col = rev(hmcol))
## ----session_info, cache = FALSE-----------------------------------------
......
......@@ -818,7 +818,8 @@ compute_pca <- function(data_mat, ntop = 500, ...){
return(list(pca = data.frame(PC1 = PCA$x[,1], PC2 = PCA$x[,2],
PC3 = PCA$x[,3], PC4 = PCA$x[,4], ...),
perc_var = percentVar))
perc_var = percentVar,
selected_vars = select))
}
......@@ -935,9 +936,12 @@ if(!("cl_anno" %in% colnames(col_data_crc))){
hmcol <- colorRampPalette(brewer.pal(9, "GnBu"))(100)
pheatmap(create_dist_mat(counts_norm_vst_crc, anno = col_data_crc$cl_anno),
pheatmap(create_dist_mat(counts_norm_vst_crc,
anno = col_data_crc$cl_anno,
method = "euclidian"),
trace="none", col = rev(hmcol))
```
The PCA plot already indicated that there is not clear--cut clustering present,
this is confirmed by the heatmap. We predominantly see a clustering by patient.
As the first PC mainly captures patient to
......@@ -949,7 +953,7 @@ patient variation, we now "regress out" PC1 and perform the clustering again.
In order to do this, we build multiple regression models, each of them
regresses the first PC against the gene expression data. We then take
the residuals and add the estimated intercept again to preserve the means
of the gene expression.
of the gene expression. [cf. Leek et. al. 2010]
```{r pc_regression, warning=FALSE}
......@@ -970,15 +974,17 @@ data should be used with caution for downstream analysis. It is always best to
incorporate a batch directly into statistical modelling. See Jaffe and Langaas]
```{r heatmapCleanedData}
hmcol <- colorRampPalette(brewer.pal(9, "YlOrRd"))(256)
hmcol <- colorRampPalette(brewer.pal(9, "RdPu"))(255)
pheatmap(create_dist_mat(cleaned_data,
anno = col_data_crc$cl_anno,
method = "kendall"),
method = "euclidean"),
trace="none", col = rev(hmcol))
```
As already seen in the plot of PC2 vs PC3 in the exercise, we can
now see some clustering by tissue: the normal tissues are separated
from the tumor / metastasis tissues.
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment