Commit b4165823 authored by Jakob Wirbel's avatar Jakob Wirbel
Browse files

split association plot and calculation functions, add function for volcano.

parent 695ac2a6
#!/usr/bin/Rscript
### SIAMCAT - Statistical Inference of Associations between
### Microbial Communities And host phenoTypes R flavor EMBL
### Heidelberg 2012-2018 GNU GPL 3.0
#' @title Visualize associations between features and classes
#'
#' @description This function visualizes different measures of association
#' between features and the label, computed previously with
#' the \link{check.associations} function
#'
#' @usage association.plot(siamcat, fn.plot=NULL, color.scheme = "RdYlBu",
#' sort.by = "fc", max.show = 50, plot.type = "quantile.box",
#' panels = c("fc", "auroc"), prompt=TRUE, verbose = 1)
#'
#' @param siamcat object of class \link{siamcat-class}
#'
#' @param fn.plot string, filename for the pdf-plot. If \code{fn.plot} is
#' \code{NULL}, the plot will be produced in the active graphics device.
#'
#' @param color.scheme valid R color scheme or vector of valid R colors (must
#' be of the same length as the number of classes), defaults to \code{'RdYlBu'}
#'
#' @param sort.by string, sort features by p-value (\code{"p.val"}), by fold
#' change (\code{"fc"}) or by prevalence shift (\code{"pr.shift"}),
#' defaults to \code{"fc"}
#'
#' @param max.show integer, how many associated features should be shown,
#' defaults to \code{50}
#'
#' @param plot.type string, specify how the abundance should be plotted, must
#' be one of these: \code{c("bean", "box", "quantile.box", "quantile.rect")},
#' defaults to \code{"quantile.box"}
#'
#' @param panels vector, name of the panels to be plotted next to the
#' abundances, possible entries are \code{c("fc", "auroc", "prevalence")},
#' defaults to \code{c("fc", "auroc")}
#'
#' @param prompt boolean, turn on/off prompting user input when not plotting
#' into a pdf-file, defaults to TRUE
#'
#' @param verbose integer, control output: \code{0} for no output at all,
#' \code{1} for only information about progress and success, \code{2} for
#' normal level of information and \code{3} for full debug information,
#' defaults to \code{1}
#'
#' @return Does not return anything, but instead produces association plot
#'
#' @keywords SIAMCAT association.plot
#'
#' @details This function visualizes the results of the computations carried
#' out in the \link{check.associations} function. It produces a plot of the
#' top \code{max.show} associated features at a user-specified significance
#' level \code{alpha}.
#'
#' For binary classification problems, the plot will show the distribution of
#' the log10-transformed abundances for both classes, a P-value from the
#' significance test, and user-selected panels for the effect size (AU-ROC,
#' prevalence shift, or generalized fold change). For regression problems,
#' the plot will show the Spearman correlation, the significance, and the
#' linear model effect size.
#'
#' @export
#'
#' @encoding UTF-8
#'
#' @examples
#' # Example data
#' data(siamcat_example)
#'
#' # Simple example
#' association.plot(siamcat_example, fn.plot = "./assoc_plot.pdf")
#'
#' # Plot associations as box plot
#' association.plot(siamcat_example,
#' fn.plot = "./assoc_plot_box.pdf",
#' plot.type = "box"
#' )
#'
#' # Additionally, sort by p-value instead of by fold change
#' association.plot(siamcat_example,
#' fn.plot = "./assoc_plot_fc.pdf",
#' plot.type = "box", sort.by = "p.val"
#' )
#'
#' # Custom colors
#' association.plot(siamcat_example,
#' fn.plot = "./assoc_plot_blue_yellow.pdf",
#' plot.type = "box", color.scheme = c("cornflowerblue", "#ffc125")
#' )
association.plot <- function(siamcat, fn.plot = NULL, color.scheme = "RdYlBu",
sort.by = "fc", max.show = 50, plot.type = "quantile.box",
panels = c("fc", "auroc"), prompt = TRUE, verbose = 1) {
if (verbose > 1) message("+ starting association.plot")
s.time <- proc.time()[3]
association.results <- associations(siamcat, verbose = 0)
if (is.null(association.results)) {
stop("SIAMCAT objects does not contain association results yet!")
}
# check label
label <- label(siamcat)
if (label$type == "CONTINUOUS") {
create.regr.assoc.plot(
siamcat, fn.plot, color.scheme, sort.by,
max.show, prompt, s.time, verbose
)
} else {
create.binary.assoc.plot(
siamcat, fn.plot, color.scheme, sort.by,
max.show, plot.type, panels, prompt, s.time, verbose
)
}
}
#' @keywords internal
create.regr.assoc.plot <- function(siamcat, fn.plot, color.scheme, sort.by,
max.show, prompt, s.time, verbose) {
assoc.param <- assoc_param(siamcat)
association.results <- associations(siamcat, verbose = 0)
label <- label(siamcat)
# get features
if (assoc.param$feature.type == "original") {
feat <- get.orig_feat.matrix(siamcat)
} else if (assoc.param$feature.type == "filtered") {
if (is.null(filt_feat(siamcat, verbose = 0))) {
stop("Features have not yet been filtered, exiting...\n")
}
feat <- get.filt_feat.matrix(siamcat)
} else if (assoc.param$feature.type == "normalized") {
if (is.null(norm_feat(siamcat, verbose = 0))) {
stop("Features have not yet been normalized, exiting...\n")
}
feat <- get.norm_feat.matrix(siamcat)
}
if ((any(colSums(feat) > 1.01) | any(feat < -0.01)) &
assoc.param$feature.type != "normalized") {
stop("This function expects compositional data. Exiting...")
}
meta <- meta(siamcat)
# check fn.plot
if (is.null(fn.plot)) {
if (verbose > 0) {
message(paste0(
"### WARNING: Not plotting to a pdf-file.\n",
"### The plot is optimized for landscape DIN-A4 (or similar) ",
"layout.\n### Please make sure that your plotting region is",
" large enough!!!\n### Use at your own risk..."
))
}
if (prompt == TRUE) {
continue <- askYesNo(
"Are you sure that you want to continue?",
default = TRUE,
prompts = getOption(
"askYesNo",
gettext(c("Yes", "No", "Cancel"))
)
)
} else {
continue <- TRUE
}
if (!continue || is.na(continue)) {
opt <- options(show.error.messages = FALSE)
on.exit(options(opt))
stop("Exiting...")
}
par.old <- par(no.readonly = TRUE)
}
# either give n_classes colors or color palette
col <- check.color.scheme.volcano(color.scheme)
########################################################################
# extract relevant info for plotting
temp <- get.plotting.idx(association.results,
alpha = assoc.param$alpha,
sort.by = sort.by, max.show = max.show,
verbose = verbose
)
if (!is.null(temp)) {
effect.size <- association.results[temp$idx, , drop = FALSE]
truncated <- temp$truncated
log.n0 <- assoc.param$log.n0
########################################################################
### generate plots with significant associations between
## features and labels
# make plot matrix dependent on panels parameters
if (verbose > 2) {
message("+++ preparing plotting layout")
}
layout.mat <- cbind(2, 1, 3)
widths <- c(0.5, 0.1, 0.4)
if (!is.null(fn.plot)) {
pdf(fn.plot, paper = "special", height = 8.27, width = 11.69)
# format:A4 landscape
}
layout(mat = layout.mat, widths = widths)
########################################################################
# PANEL 2: P-VALUES
# print p-values in second panel of the plot
associations.pvals.plot(
p.vals = effect.size$p.adj,
alpha = assoc.param$alpha,
mult.corr = assoc.param$mult.corr,
verbose = verbose
)
########################################################################
# PANEL 1: DATA
# prepare margins
associations.margins.plot(
species_names = row.names(effect.size),
verbose = verbose
)
bcol <- ifelse(effect.size$fc > 0, col[2], col[1])
associations.sp.plot(
effect.size$spearman, bcol,
rownames(effect.size), verbose
)
associations.fcs.plot(
effect.size$fc, bcol,
type = "CONTINUOUS", verbose)
# close pdf device
if (!is.null(fn.plot)) {
tmp <- dev.off()
} else {
par(par.old)
}
e.time <- proc.time()[3]
if (verbose > 1) {
message(paste(
"+ finished association.plot in",
formatC(e.time - s.time, digits = 3), "s"
))
}
if (verbose == 1 & !is.null(fn.plot)) {
message(paste(
"Plotted associations between features and label",
"successfully to:", fn.plot
))
}
}
}
#' @keywords internal
create.binary.assoc.plot <- function(siamcat, fn.plot, color.scheme,
sort.by, max.show, plot.type, panels, prompt, s.time, verbose) {
assoc.param <- assoc_param(siamcat)
association.results <- associations(siamcat, verbose = 0)
label <- label(siamcat)
# check panel and plot.type parameter
if (!all(panels %in% c("fc", "auroc", "prevalence"))) {
stop("Unknown panel-type selected!")
}
panels <- unique(panels)
if (length(panels) > 3) {
warning(
"Plot layout is not suited for more than 3 panels.",
"Continuing with first three panels."
)
panels <- panels[seq_len(3)]
}
if ((!plot.type %in% c("bean", "box", "quantile.box", "quantile.rect")) ||
length(plot.type) != 1) {
warning(
"Plot type has not been specified properly! Continue with",
" quantile.box."
)
plot.type <- "quantile.box"
}
# get features
if (assoc.param$feature.type == "original") {
feat <- get.orig_feat.matrix(siamcat)
} else if (assoc.param$feature.type == "filtered") {
if (is.null(filt_feat(siamcat, verbose = 0))) {
stop("Features have not yet been filtered, exiting...\n")
}
feat <- get.filt_feat.matrix(siamcat)
} else if (assoc.param$feature.type == "normalized") {
if (is.null(norm_feat(siamcat, verbose = 0))) {
stop("Features have not yet been normalized, exiting...\n")
}
feat <- get.norm_feat.matrix(siamcat)
}
if ((any(colSums(feat) > 1.01) | any(feat < -0.01)) &
assoc.param$feature.type != "normalized") {
stop("This function expects compositional data. Exiting...")
}
meta <- meta(siamcat)
# check fn.plot
if (is.null(fn.plot)) {
if (verbose > 0) {
message(paste0(
"### WARNING: Not plotting to a pdf-file.\n",
"### The plot is optimized for landscape DIN-A4 (or similar) ",
"layout.\n### Please make sure that your plotting region is",
" large enough!!!\n### Use at your own risk..."
))
}
if (prompt == TRUE) {
continue <- askYesNo(
"Are you sure that you want to continue?",
default = TRUE,
prompts = getOption(
"askYesNo",
gettext(c("Yes", "No", "Cancel"))
)
)
} else {
continue <- TRUE
}
if (!continue || is.na(continue)) {
opt <- options(show.error.messages = FALSE)
on.exit(options(opt))
stop("Exiting...")
}
par.old <- par(no.readonly = TRUE)
}
# either give n_classes colors or color palette
col <- check.color.scheme(color.scheme, label)
########################################################################
# extract relevant info for plotting
temp <- get.plotting.idx(association.results,
alpha = assoc.param$alpha,
sort.by = sort.by, max.show = max.show,
verbose = verbose
)
if (!is.null(temp)) {
effect.size <- association.results[temp$idx, , drop = FALSE]
truncated <- temp$truncated
log.n0 <- assoc.param$log.n0
feat.red <- feat[temp$idx, , drop = FALSE]
if (assoc.param$feature.type == "normalized") {
feat.plot <- feat.red
} else {
feat.red.log <- log10(feat.red + log.n0)
feat.plot <- feat.red.log
}
########################################################################
### generate plots with significant associations between
## features and labels
# make plot matrix dependent on panels parameters
if (verbose > 2) {
message("+++ preparing plotting layout")
}
if (length(panels) == 3) {
layout.mat <- cbind(2, 1, t(seq(3, length.out = length(panels))))
widths <- c(0.5, 0.1, rep(0.4 / 3, length(panels)))
} else {
layout.mat <- cbind(2, 1, t(seq(3, length.out = length(panels))))
widths <- c(0.5, 0.1, rep(0.2, length(panels)))
}
if (!is.null(fn.plot)) {
pdf(fn.plot, paper = "special", height = 8.27, width = 11.69)
# format:A4 landscape
}
layout(mat = layout.mat, widths = widths)
########################################################################
# PANEL 2: P-VALUES
# print p-values in second panel of the plot
associations.pvals.plot(
p.vals = effect.size$p.adj,
alpha = assoc.param$alpha, mult.corr = assoc.param$mult.corr,
verbose = verbose
)
########################################################################
# PANEL 1: DATA
# prepare margins
associations.margins.plot(
species_names = row.names(feat.red),
verbose = verbose
)
if (verbose > 2) {
message("+++ plotting results")
}
if (plot.type == "bean") {
associations.bean.plot(feat.plot, label,
col = col,
take.log = ifelse(assoc.param$feature.type == "normalized",
FALSE, TRUE
), verbose = verbose
)
} else if (plot.type == "box") {
associations.box.plot(feat.plot, label,
col = col,
take.log = ifelse(assoc.param$feature.type == "normalized",
FALSE, TRUE
), verbose = verbose
)
} else if (plot.type == "quantile.box") {
associations.quantile.box.plot(feat.plot, label,
col = col,
take.log = ifelse(assoc.param$feature.type == "normalized",
FALSE, TRUE
), verbose = verbose
)
} else if (plot.type == "quantile.rect") {
associations.quantile.rect.plot(feat.plot, label,
col = col,
take.log = ifelse(assoc.param$feature.type == "normalized",
FALSE, TRUE
), verbose = verbose
)
}
# plot title
xlab <- ifelse(assoc.param$feature.type == "normalized",
"Normalized abundance", "Abundance (log10-scale)"
)
if (!truncated) {
title(main = "Differentially abundant features", xlab = xlab)
} else {
title(main = paste(
"Differentially abundant features\nshowing top",
max.show, "features"
), xlab = xlab)
}
########################################################################
# OTHER PANELS
bcol <- ifelse(effect.size$fc > 0, col[2], col[1])
for (p in panels) {
if (p == "fc") {
associations.fcs.plot(
fc.all = effect.size$fc,
binary.cols = bcol, verbose = verbose
)
} else if (p == "prevalence") {
associations.pr.shift.plot(
pr.shifts = effect.size[, c("pr.n", "pr.p")], col = col,
verbose = verbose
)
} else if (p == "auroc") {
associations.aucs.plot(
aucs = effect.size[, c("auc", "auc.ci.l", "auc.ci.h")],
binary.cols = bcol, verbose = verbose
)
}
}
# close pdf device
if (!is.null(fn.plot)) {
tmp <- dev.off()
} else {
par(par.old)
}
e.time <- proc.time()[3]
if (verbose > 1) {
message(paste(
"+ finished association.plot in",
formatC(e.time - s.time, digits = 3), "s"
))
}
if (verbose == 1 & !is.null(fn.plot)) {
message(paste(
"Plotted associations between features and label",
"successfully to:", fn.plot
))
}
}
}
# ##############################################################################
### AUC
#' @keywords internal
associations.aucs.plot <- function(aucs, binary.cols, verbose = 1) {
if (verbose > 2) {
message("+ starting associations.aucs.plot")
}
# set margins
par(mar = c(5.1, 0, 4.1, 1.6))
# plot background
plot(NULL,
xlab = "", ylab = "", xaxs = "i", yaxs = "i", axes = FALSE,
xlim = c(0, 1), ylim = c(0.5, nrow(aucs) + 0.5), type = "n"
)
ticks <- seq(0, 1.0, length.out = 5)
tick.labels <- formatC(ticks, digits = 2)
# plot gridlines
for (v in ticks) {
abline(v = v, lty = 3, col = "lightgrey")
}
# make thicker line at .5
abline(v = .5, lty = 1, col = "lightgrey")
# plot single feature aucs
for (i in seq_len(nrow(aucs))) {
segments(
x0 = aucs[i, 2], x1 = aucs[i, 3], y0 = i, col = "lightgrey",
lwd = 1.5
)
points(aucs[i, 1], i, pch = 18, col = binary.cols[i])
points(aucs[i, 1], i, pch = 5, col = "black", cex = 0.9)
}
# Title and axis label
axis(side = 1, at = ticks, labels = tick.labels, cex.axis = 0.7)
title(main = "Feature AUCs", xlab = "AU-ROC")
if (verbose > 2) {
message("+ finished associations.aucs.plot")
}
}
# ##############################################################################
### FC
#' @keywords internal
associations.fcs.plot <-
function(fc.all, binary.cols, verbose = 1, type = "BINARY") {
if (verbose > 2) {
message("+ starting associations.fcs.plot")
}
# margins
par(mar = c(5.1, 0, 4.1, 1.6))
# get minimum and maximum fcs
mx <- max(ceiling(abs(
range(fc.all, na.rm = TRUE, finite = TRUE)
)))
mn <- -mx
# plot background
plot(NULL,
xlab = "", ylab = "", xaxs = "i", yaxs = "i", axes = FALSE,
xlim = c(mn, mx), ylim = c(0.2, length(fc.all) + 0.2), type = "n"
)
grid(NULL, NA, lty = 3, col = "lightgrey")
# plot bars
barplot(fc.all,
horiz = TRUE, width = 0.6, space = 2 / 3,
col = binary.cols, axes = FALSE, add = TRUE, names.arg = FALSE
)
# gridlines and axes labels
ticks <- seq(from = mn, to = mx, length.out = 5)
tick.labels <- formatC(ticks, digits = 2)
axis(side = 1, at = ticks, labels = tick.labels, cex.axis = 0.7)
if (type == "BINARY") {
title(main = "Fold change", xlab = "Generalized fold change")
} else if (type == "CONTINUOUS") {
title(main = "Effect size", xlab = "Linear model estimate")
}
if (verbose > 2) {
message("+ finished associations.fcs.plot")
}
}
# ##############################################################################
### PREVALENCE
#' @keywords internal
associations.pr.shift.plot <-
function(pr.shifts, col, verbose = 1) {
if (verbose > 2) {
message("+ starting associations.pr.shift.plot")
}
# margins
par(mar = c(5.1, 0, 4.1, 1.6))
# plot background
plot(
NULL, xlab = "", ylab = "", xaxs = "i", yaxs = "i", axes = FALSE,
xlim = c(0, 1), ylim = c(0.2, nrow(pr.shifts) + 0.2), type = "n"
)
# gridlines and axes labels
ticks <- seq(from = 0, to = 1, length.out = 5)
for (v in ticks) {
abline(v = v, lty = 3, col = "lightgrey")
}
tick.labels <- formatC(ticks * 100, digits = 3)
axis(side = 1, at = ticks, labels = tick.labels, cex.axis = 0.7)
# plot bars
row.names(pr.shifts) <- NULL