Commit 70495558 authored by Jakob Wirbel's avatar Jakob Wirbel
Browse files

add regression-type label, add warning for old mlr version.

parent b4165823
......@@ -8,11 +8,8 @@
#' @description This function creates a label object from metadata
#' or an atomic vector
#'
#' @usage create.label(label, case,
#' meta=NULL, control=NULL,
#' p.lab = NULL, n.lab = NULL,
#' remove.meta.column=FALSE,
#' verbose=1)
#' @usage create.label(label, case, meta=NULL, control=NULL,
#' p.lab = NULL, n.lab = NULL, remove.meta.column=FALSE, verbose=1)
#'
#' @param label named vector to create the label or the name of the metadata
#' column that will be used to create the label
......@@ -63,6 +60,8 @@
#'
#' @export
#'
#' @encoding UTF-8
#'
#' @return return either \itemize{
#' \item a list to be used in a SIMCAT object \strong{OR}
#' \item a list with entries \code{meta} and \code{label},
......@@ -73,11 +72,10 @@
#' data('meta_crc_zeller')
#'
#' label <- create.label(label='Group', case='CRC', meta=meta.crc.zeller)
create.label <- function(label, case, meta=NULL, control = NULL,
create.label <- function(label, case=NULL, meta=NULL, control=NULL,
p.lab=NULL, n.lab=NULL, remove.meta.column=FALSE, verbose=1) {
if (verbose > 1)
message("+ starting create.label")
s.time <- proc.time()[3]
#if metadata has been supplied and the label is of length 1
......@@ -85,11 +83,9 @@ create.label <- function(label, case, meta=NULL, control = NULL,
if (!label %in% colnames(meta))
stop("ERROR: Column ", label, " not found in the metadata\n")
if (is(meta,'sample_data')){
label.vec <- vapply(meta[, label], as.character,
FUN.VALUE = character(nrow(meta)))
label.vec <- as.matrix(as.data.frame(meta[,label]))[,1]
} else if (is.data.frame(meta)){
label.vec <- vapply(meta[, label], as.character,
FUN.VALUE = character(1))
label.vec <- meta[, label]
} else {
stop(paste0('Please provide the metadata either as a data.frame',
' or a sample_data object!'))
......@@ -118,9 +114,22 @@ create.label <- function(label, case, meta=NULL, control = NULL,
label.vec <- label.vec[!is.na(label.vec)]
}
# find out if binary or regression!
n.groups <- length(unique(label.vec))
if (is.character(label.vec) |
is.factor(label.vec) |
length(n.groups) == 2 | !is.null(case)){
if (!is.character(label.vec)){
x <- names(label.vec)
label.vec <- as.character(label.vec)
names(label.vec) <- x
}
# get different groups
groups <- unique(label.vec)
if (is.null(case)) stop("Case information is needed for binary label")
### checking case
if(!all(case %in% groups)){
stop("The chosen label does not contain values: ",
......@@ -135,7 +144,7 @@ create.label <- function(label, case, meta=NULL, control = NULL,
}else{
control <- setdiff(groups, case)
}
}else{
} else {
if(!control%in%groups){
stop("The chose label does not contain value:",control,
"\nInstead, contains: ", paste(groups, collapse=','))
......@@ -159,7 +168,8 @@ create.label <- function(label, case, meta=NULL, control = NULL,
n.lab <- ifelse(is.null(n.lab), gsub("[_.-]", ".", control), n.lab)
p.lab <- ifelse(is.null(p.lab),
ifelse(length(case) > 1, 'Case', gsub("[_.-]", ".", case)), p.lab)
ifelse(length(case) > 1, 'Case',
gsub("[_.-]", ".", case)), p.lab)
info <- c(-1, 1)
names(info) <- c(n.lab, p.lab)
......@@ -176,6 +186,16 @@ create.label <- function(label, case, meta=NULL, control = NULL,
label.new$type <- "BINARY"
label.new <- label.new
} else if (is.double(label.vec)){
if (!is.null(case) | !is.null(control)){
warning(paste0("Case and control parameters will be ignored for",
" continuous labels!"))
}
label.new <- list(label=label.vec, info=range(label.vec),
type='CONTINUOUS')
}
e.time <- proc.time()[3]
if (verbose > 0)
......
......@@ -16,10 +16,7 @@ check.label <- function(object){
errors <- c(errors, msg)
}
# check that label is binary or test
if (object$type != 'BINARY' & object$type != 'TEST'){
msg <- 'Label object is neither binary nor a test label!'
errors <- c(errors, msg)
}
if (object$type == 'BINARY'){
# check that info and label match up
if (!all(sort(unique(object$label)) == object$info)){
msg <- 'label info does not match to label entries!'
......@@ -30,6 +27,11 @@ check.label <- function(object){
msg <- 'Label info does not contain group names!'
errors <- c(errors, msg)
}
}
if (!object$type %in% c('BINARY', 'TEST', 'CONTINUOUS')){
msg <- 'Label object is neither binary, regression, nor a test label!'
errors <- c(errors, msg)
}
if (length(errors) == 0) NULL else errors
}
......@@ -87,21 +89,16 @@ check.assoc <- function(object){
errors <- c(errors, msg)
}
# check that assoc.param contains all entries
if (!all(names(object$assoc.param) == c('detect.lim', 'pr.cutoff',
'probs.fc', 'mult.corr', 'alpha', 'feature.type', 'paired'))){
if (!all(names(object$assoc.param) == c('formula', 'alpha', 'mult.corr',
'log.n0', 'pr.cutoff', 'test', 'feature.type', 'paired', 'probs.fc'))){
msg <- 'Association testing parameters do not contain all entries!'
errors <- c(errors, msg)
}
# check that all entries are valid and in the expected ranges
if (!all(vapply(object$assoc.param, class,
FUN.VALUE=character(1)) == c('numeric', 'numeric', 'numeric',
'character', 'numeric', 'character', 'logical'))){
msg<-'Association testing parameters do not contain the expected classes!'
errors <- c(errors, msg)
}
# detect.lim
if (object$assoc.param$detect.lim > 1 | object$assoc.param$detect.lim < 0){
msg<-'Detection limit (pseudocount) is not valid (not between 1 and 0)!'
if (object$assoc.param$log.n0 > 1 | object$assoc.param$log.n0 < 0){
msg<-paste0("Parameter 'log.n0' (pseudocount) is not valid (not ",
"between 1 and 0)!")
errors <- c(errors, msg)
}
# pr.cutoff
......@@ -135,11 +132,6 @@ check.assoc <- function(object){
errors <- c(errors, msg)
}
# check that assoc.results contains all that it should
if (!all(colnames(object$assoc.results)==c("fc", "p.val", "auc",
"auc.ci.l", "auc.ci.h", "pr.shift", "pr.n", "pr.p", "bcol", "p.adj"))){
msg <- 'Association results do not contain all needed entries!'
errors <- c(errors, msg)
}
if (nrow(object$assoc.results) < 1){
msg <- 'Association results are empty!'
errors <- c(errors, msg)
......@@ -296,6 +288,14 @@ check.data.split <- function(object){
# check model list for validity
#'@keywords internal
check.model.list <- function(object){
if (is(object$models[[1]], "WrappedModel")){
message("Legacy warning:\n",
"This SIAMCAT object seems to have been constructed with ",
"version 1.x, based on 'mlr'.\nYour current SIAMCAT version ",
"has been upgraded to use 'mlr3' internally.\nPlease consider ",
"re-training your SIAMCAT object or downgrading your SIAMCAT ",
"version in order to continue.")
}
errors <- character()
if (!all(c('model.type', 'feature.type', 'models') %in% names(object))){
msg <- 'Model list does not contain all needed entries!'
......@@ -308,12 +308,6 @@ check.model.list <- function(object){
' Should be length 1!')
errors <- c(errors, msg)
}
# check that all models in the list are mlr models
if (any(vapply(object$models, class,
FUN.VALUE = character(1)) != 'WrappedModel')){
msg <- 'Models are supposed to be mlr-WrappedModels!'
errors <- c(errors, msg)
}
# check feature type
if (!object$feature.type %in% c('original', 'filtered', 'normalized')){
msg <- paste0('Feature type ', object$feature.type,
......@@ -336,6 +330,7 @@ check.model.list <- function(object){
#'@keywords internal
check.eval.data <- function(object){
errors <- character()
if ('auroc' %in% names(object)){
# check that all entries are there
if (!all(c('roc', 'auroc', 'prc', 'auprc', 'ev') %in% names(object))){
msg <- 'Not all needed entries are given!'
......@@ -349,7 +344,8 @@ check.eval.data <- function(object){
# check prc
if (!is.list(object$prc) |
!all(names(object$prc) ==c('recall', 'precision')) |
length(unique(vapply(object$prc, length, FUN.VALUE=numeric(1))))!=1){
length(unique(vapply(object$prc, length,
FUN.VALUE=numeric(1))))!=1){
msg <- paste0('No valid entry for prc ',
'(missing entries or no list with entries of equal length)!')
errors <- c(errors, msg)
......@@ -361,20 +357,22 @@ check.eval.data <- function(object){
errors <- c(errors, msg)
}
# check that the lenghts of each entry in ev are the same
if (length(unique(vapply(object$ev, length, FUN.VALUE = numeric(1)))) != 1){
if (length(unique(vapply(object$ev, length,
FUN.VALUE = numeric(1)))) != 1){
msg <- 'No concordance for the entries in ev (unequal length)!'
errors <- c(errors, msg)
}
# check concordance between ev and prc
if (length(object$prc$recall) != length(object$ev$thresholds)){
msg <- 'No concordance for the entries in ev and prc (unequal length)!'
msg <- 'No concordance for the entries in ev and prc!'
errors <- c(errors, msg)
}
# for the case that there are multiple repeats
if (!is.null(object$roc.all)){
# check if all entries are there
if (!all(c('roc.all', 'auroc.all', 'prc.all', 'auprc.all', 'ev.all')
if (!all(c('roc.all', 'auroc.all', 'prc.all',
'auprc.all', 'ev.all')
%in% names(object))){
msg <- 'Not all needed entries are given!'
errors <- c(errors, msg)
......@@ -389,11 +387,13 @@ check.eval.data <- function(object){
if (length(unique(
vapply(object[grep('.all', names(object))], length,
FUN.VALUE = integer(1))))!=1){
msg<-'entries for individual repeats do not have concordant length!'
msg<- paste0('entries for individual repeats do not have ",
"concordant length!')
errors <- c(errors, msg)
}
### MORE CHECKS FOR EVAL_DATA WITH MULTIPLE REPEATS?
}
}
if (length(errors) == 0) NULL else errors
}
......
......@@ -777,6 +777,15 @@ setMethod("weight_matrix", "siamcat", function(siamcat, verbose=1) {
}
return(NULL)
}
if (is(temp[[1]], "WrappedModel")){
stop("Legacy warning:\n",
"This SIAMCAT object seems to have been constructed with ",
"version 1.x, based on 'mlr'.\nYour current SIAMCAT version ",
"has been upgraded to use 'mlr3' internally.\nPlease consider ",
"re-training your SIAMCAT object or downgrading your SIAMCAT ",
"version in order to continue.")
}
feat.type <- feature_type(siamcat)
if (feat.type == 'original'){
......@@ -791,8 +800,9 @@ setMethod("weight_matrix", "siamcat", function(siamcat, verbose=1) {
ncol=length(temp), dimnames=list(rownames(feat),
paste0('Model_', seq_along(temp))))
for (i in seq_along(temp)){
m.idx <- match(temp[[i]]$features, make.names(rownames(weight.mat)))
weight.mat[m.idx, i] <- temp[[i]]$feat.weights
m.idx <- match(names(temp[[i]]$features),
make.names(rownames(weight.mat)))
weight.mat[m.idx, i] <- temp[[i]]$features
}
weight.mat[is.na(weight.mat)] <- 0
......
......@@ -237,6 +237,7 @@ setReplaceMethod("data_split", c("siamcat", "list"), function(x, value) {
#'
#' @examples
#' data(siamcat_example)
#' siamcat_example <- train.model(siamcat_example, method='lasso')
#' model_list(siamcat_example) <- model_list(siamcat_example)
setGeneric("model_list<-", function(x, value)
standardGeneric("model_list<-"))
......
......@@ -96,6 +96,11 @@ siamcat <- function(..., feat=NULL, label=NULL, meta=NULL, phyloseq=NULL,
} else {
case <- NULL
}
if ('control' %in% names(other.args)){
control <- other.args$control
} else {
control <- NULL
}
# Remove names from arglist. Will replace them based on their class
names(other.args) <- NULL
......@@ -155,7 +160,7 @@ siamcat <- function(..., feat=NULL, label=NULL, meta=NULL, phyloseq=NULL,
arglistphyloseq))
# label object
temp <- validate.label(label, feat, meta, case, verbose)
temp <- validate.label(label, feat, meta, case, control, verbose)
label <- temp$label
if (!is.null(temp$meta)){
sample_data(other.args$phyloseq) <- temp$meta
......@@ -271,6 +276,9 @@ validate.features <- function(feat){
stop(paste0('SIAMCAT expects numerical features!.\n',
'Please check your feature matrix! Exiting...'))
}
if (length(unique(rownames(feat))) != nrow(feat)){
stop(paste0("Features need unique identifiers!"))
}
feat <- otu_table(feat, taxa_are_rows=TRUE)
return(feat)
} else if (is.data.frame(feat)){
......@@ -283,6 +291,9 @@ validate.features <- function(feat){
stop(paste0('SIAMCAT expects numerical features!.\n',
'Please check your feature data.frame! Exiting...'))
}
if (length(unique(rownames(feat))) != nrow(feat)){
stop(paste0("Features need unique identifiers!"))
}
feat <- otu_table(feat, taxa_are_rows=TRUE)
return(feat)
}
......@@ -290,7 +301,7 @@ validate.features <- function(feat){
# check label object
#' @keywords internal
validate.label <- function(label, feat, meta, case, verbose){
validate.label <- function(label, feat, meta, case, control, verbose){
# if NA, return simple label object which contains only one class
if (is.null(label)){
warning(paste0('No label information given! Generating SIAMCAT object ',
......@@ -303,14 +314,13 @@ validate.label <- function(label, feat, meta, case, verbose){
label <- label
} else if (is.character(label) & length(label) == 1){
if(is.null(meta)) stop('Metadata needed to generate label! Exiting...')
if(is.null(case)) stop('Case information needed! Exiting...')
temp <- create.label(meta=meta, label=label, case=case,
temp <- create.label(meta=meta, label=label, case=case, control=control,
verbose=verbose, remove.meta.column=TRUE)
label <- temp$label
meta <- temp$meta
} else if (is.atomic(label)) {
if(is.null(case)) stop('Case information needed! Exiting...')
label <- create.label(label=label, case=case, verbose=verbose)
label <- create.label(label=label, case=case, control=control,
verbose=verbose)
} else {
stop(paste0('Cannot interpret the label object!\nPlease ',
'provide either a label object, a column in your metadata, or a
......
......@@ -80,6 +80,17 @@ filter.label <- function(siamcat, ids, verbose = 1) {
setMethod("show", "siamcat", function(object) {
cat("siamcat-class object", fill = TRUE)
# if it is a SIAMCAT.v1 model, print a warning
if (is(models(object, verbose=0)[[1]], "WrappedModel")){
message("Legacy warning:\n",
"This SIAMCAT object seems to have been constructed with ",
"version 1.x, based on 'mlr'.\nYour current SIAMCAT version ",
"has been upgraded to use 'mlr3' internally.\nPlease consider ",
"re-training your SIAMCAT object or downgrading your SIAMCAT ",
"version in order to continue.")
stop("Exiting...")
}
# Label object
if (!is.null(label(object)))
label <- label(object)
......@@ -88,7 +99,7 @@ setMethod("show", "siamcat", function(object) {
n <- length(label$label)
cat(paste("label() Label object: ",
"Test label for", n, "samples"), fill=TRUE)
} else {
} else if (type == 'BINARY'){
p.lab <- names(which(label$info == max(label$info)))
n.lab <- setdiff(names(label$info), p.lab)
p.n <- length(which(label$label == max(label$info)))
......@@ -96,6 +107,12 @@ setMethod("show", "siamcat", function(object) {
cat(paste("label() Label object: ",
n.n, n.lab, "and", p.n, p.lab, "samples", sep = " "),
fill = TRUE)
} else if (type=='CONTINUOUS'){
cat(paste("label() Label object: ",
length(label$label), "samples, ranging from",
sprintf(fmt='%.2f', label$info[1]), "to",
sprintf(fmt='%.2f', label$info[2]), sep = " "),
fill = TRUE)
}
# filtered features
......@@ -166,9 +183,15 @@ setMethod("show", "siamcat", function(object) {
# evaluation data
if (!is.null(eval_data(object, verbose=0))) {
cat(paste( "eval_data() Evaluation data: Average AUC:",
round(eval_data(object)$auroc, 3), sep = " "),
fill = TRUE)
if ('auroc' %in% names(eval_data(object))){
cat(paste( "eval_data() Evaluation data: ",
"Average AUC:", round(eval_data(object)$auroc, 3),
sep = " "), fill = TRUE)
} else if ('r2' %in% names(eval_data(object))){
cat(paste("eval_data() Evaluation data: ",
"Average R-squared:", round(eval_data(object)$r2, 3),
sep = " "), fill = TRUE)
}
}
# print otu_table (always there).
......
......@@ -4,23 +4,32 @@
### Heidelberg 2012-2018 GNU GPL 3.0
#' @title Validate samples in labels, features, and metadata
#' @description This function checks if labels are available for all samples in
#' features. Additionally validates metadata, if available.
#'
#' @description This function checks if labels are available for all samples
#' in features. Additionally validates metadata, if available.
#'
#' @param siamcat an object of class \link{siamcat-class}
#'
#' @param verbose integer, control output: \code{0} for no output at all,
#' \code{1} for only information about progress and success, \code{2} for
#' normal level of information and \code{3} for full debug information,
#' defaults to \code{1}
#'
#' @keywords SIAMCAT validate.data
#'
#' @export
#'
#' @encoding UTF-8
#'
#' @details This function validates the data by checking that labels are
#' available for all samples in the feature matrix. Furthermore,
#' the number of samples per class is checked to ensure a minimum
#' number. If metadata is available, the overlap between labels and
#' metadata is checked as well.
#' available for all samples in the feature matrix. Furthermore, the number
#' of samples per class is checked to ensure a minimum number. If metadata
#' is available, the overlap between labels and metadata is checked as well.
#'
#' This function is run when a \link{siamcat-class} object is created.
#'
#' @return an object of class \link{siamcat-class}
#'
#' @examples
#' data(siamcat_example)
#'
......@@ -61,6 +70,7 @@ validate.data <- function(siamcat, verbose = 1) {
if (verbose > 2) {
message("+++ checking overlap between labels and features")
}
s.intersect <- intersect(names(label$label), colnames(feat))
# check and re-order features
s.removed <- ncol(feat) - length(s.intersect)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment