Tutorial_HTM_2016.Rmd

title: "Visual Exploration of High--Throughput--Microscopy Data"
author: "Bernd Klaus, Andrzej Oles, Mike Smith"
date: "`r doc_date()`"
output:
    BiocStyle::html_document:
        toc: true
        toc_float: true
        highlight: tango
        code_folding: hide
    BiocStyle::pdf_document2:
        toc: true
        highlight: tango
library(knitr)
options(digits=3, width=80)
opts_chunk$set(echo=TRUE,tidy=FALSE,include=TRUE,
               dev='png', fig.width = 6, fig.height = 3.5, comment = '  ', dpi = 300,
cache = TRUE)
devtools::install_github("aoles/cellh5-R")
library(rmarkdown)
library(tidyverse)
library(openxlsx)
library(cellh5)
library(psych)
library(stringr)
library(splots)
data_path <- "~/p12_data"
plate_map <- read.xlsx(xlsxFile = file.path(data_path, "plate_mapping.xlsx"))
head(plate_map)
path <- file.path(data_path, "_all_positions.ch5")
c5f <- CellH5(path)
c5_pos <- C5Positions(c5f, C5Plates(c5f))
predictions <- C5Predictions(c5f, c5_pos[[1]], mask = "primary__primary3", as = "name")

c5_pos[["WB08_P1"]] <- NULL


raw_data <- sapply(c5_pos, function(pos){
                predictions <- C5Predictions(c5f, pos, mask = "primary__primary3", as = "name")
                table(predictions)}
               )
save(raw_data, file = "raw_data.RData")

load("raw_data.RData")

tidy_raw_data  <- rownames_to_column(as.data.frame(raw_data), var = "class") %>%
                   gather(key = "well", value = "count", WA01_P1:WC07_P1)

tidy_raw_data$well <- str_replace(tidy_raw_data$well, "^W([A-H][0-9]{2})_P1", "\\1_01")

#join annotation

input_data <- left_join(tidy_raw_data, plate_map, by = c("well" = "Position"))


no_cells_per_well <- input_data %>%
                    group_by(well) %>%
                    summarize(no_cells = sum(count))

data_with_sums <-  left_join(input_data, no_cells_per_well)

# size_factors <- no_cells_per_well$no_cells /  geometric.mean(no_cells_per_well$no_cells)

data_for_PCA <- mutate(data_with_sums, perc = count / no_cells,
                       z_score = logit(perc))

data_for_PCA <- data_for_PCA %>%
                select(class, well, z_score) %>%
                spread(key = class, value = z_score)

PCA <- prcomp(data_for_PCA[, -1], center = TRUE, scale. = TRUE)


genes <- input_data %>%
         group_by(well) %>%
         summarize(gene = unique(Gene.Symbol))

genes <- ifelse(is.na(genes$gene), "empty", genes$gene)

dataGG = data.frame(PC1 = PCA$x[,1], PC2 = PCA$x[,2],
                    PC3 = PCA$x[,3], PC4 = PCA$x[,4],
                    genes)
(qplot(PC1, PC2, data = dataGG, color =  genes, geom = "text",
       label = genes, asp = 1,
       main = "PC1 vs PC2, top variable genes", size = I(6))
)

dat_rows = toupper(letters[1:8])
dat_cols = c(paste0("0",seq(1:9)),seq(10,12))
wells <- data.frame( well = paste0(outer(dat_rows, dat_cols, paste0), "_01"))
full_data <- arrange(full_join(data_for_PCA, wells), well)

plotScreen(list(logistic (full_data$Apoptosis)), ncol = 1, nx = 12, ny = 8,
           main = "Apoptosis percentages",
           do.names = FALSE, legend.label = "percentage of apoptotic cells ", zrange = c(0,.4) )

sessionInfo()