Commit d58138d6 authored by Florian Huber's avatar Florian Huber

Clustering script added.

parent 3a216f2e
## ---- message = FALSE----------------------------------------------------
library(tidyverse)
library(psych)
library(factoextra)
library(plotly)
library(ComplexHeatmap)
library(viridis)
library(circlize)
## ------------------------------------------------------------------------
input_data <- readRDS("./data/pca_input_data.rds")
(input_data <- as_tibble(input_data))
## ------------------------------------------------------------------------
no_cells_per_well <-
group_by(input_data, well) %>%
dplyr::summarize(no_cells = sum(count))
no_cells_per_well
(data_with_sums <- left_join(input_data, no_cells_per_well))
data_for_PCA <- mutate(data_with_sums, perc = count / no_cells,
z_score = logit(perc))
data_for_PCA
## ------------------------------------------------------------------------
data_for_PCA_wide <- data_for_PCA %>%
dplyr::select(class, well, z_score) %>%
spread(key = class, value = z_score)
data_for_PCA_wide
## ----PCA-----------------------------------------------------------------
(PCA <- prcomp(data_for_PCA_wide[, -1], center = TRUE, scale. = TRUE))
summary(PCA)
genes <-
group_by(input_data, well) %>%
dplyr::summarize(gene = unique(Gene.Symbol))
genes <- ifelse(is.na(genes$gene), "empty", genes$gene)
(dataGG = tibble(PC1 = PCA$x[,1], PC2 = PCA$x[,2], PC3 = PCA$x[,3],
PC4 = PCA$x[,4], genes))
pl <- (ggplot(dataGG, aes(x = PC1, y = PC2, color = genes))
+ geom_text(aes(label = genes), size = I(2))
+ coord_fixed(ratio = (PCA$sdev^2)[2] / (PCA$sdev^2)[1])
+ ggtitle("Principal components plot")
)
pl
## ----var_imp-------------------------------------------------------------
(loadings <- PCA$rotation[, 1:2])
(loadings_gg <-
as.data.frame(loadings) %>%
rownames_to_column(var = "class") %>%
dplyr::select(class, PC1, PC2))
(loadings_gg <- gather(loadings_gg, key = "comp", value = "loading", PC1:PC2))
ggplot(loadings_gg, aes(x = class, y = loading, fill = class)) +
facet_wrap( ~ comp) +
geom_bar(stat = "identity", position = "identity") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_fill_brewer(type = "qual", palette = "Set3")
## ----co_circle-----------------------------------------------------------
fviz_pca_var(PCA, col.circle = "black", title = "Correlation Circle for the PCA") + coord_equal()
## ------------------------------------------------------------------------
# prepare the data set, don't go through all the steps
cluster_input_long <-
data_for_PCA %>%
select(class, Well, Gene.Symbol, Group, count, no_cells, perc) %>%
filter(Group == "target") %>%
group_by(Gene.Symbol, class) %>%
slice(1) %>%
ungroup()
cluster_input_long
cluster_input_wide <-
select(cluster_input_long, class, Gene.Symbol, perc) %>%
spread(key = class, value = perc)
cluster_input_wide
cluster_input_wide_m <- as.matrix(cluster_input_wide[, -1])
rownames(cluster_input_wide_m) <- cluster_input_wide$Gene.Symbol
cluster_input_wide_m
## ------------------------------------------------------------------------
(m0 <- rbind(c(0, 0), c(0, 1), c(1, 1)))
dist(m0)
## ------------------------------------------------------------------------
m <- cluster_input_wide_m
d <- dist(m) # alternatively: run as.dist() on a matrix of precomputed distances
d
h <- hclust(d)
h
plot(h)
## ------------------------------------------------------------------------
# k to specify number of clusters
(clusts <- cutree(h, k = 2))
cutree(h, h = 0.4)
# tibble(gene = names(clusts), cluster = clusts) %>% group_by(cluster) %>% nest()
## ------------------------------------------------------------------------
Heatmap(m,
cluster_rows = FALSE,
cluster_columns = FALSE,
column_names_side = "top",
row_names_side = "left")
quantile(m)
my_cols <- plasma(20)
Heatmap(m,
col = colorRamp2(
c(seq(from = 0, to = 0.1, length.out = 20)), my_cols),
cluster_rows = FALSE,
cluster_columns = FALSE,
column_names_side = "top",
row_names_side = "left")
Heatmap(m,
col = colorRamp2(
c(seq(from = 0, to = 0.1, length.out = 20)), my_cols),
column_names_side = "top",
row_names_side = "left")
Heatmap(m,
col = colorRamp2(
c(seq(from = 0, to = 0.1, length.out = 20)), my_cols),
column_names_side = "top",
row_names_side = "left",
clustering_distance_columns = "spearman",
clustering_distance_rows = "spearman")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment