Commit 70495558 authored by Jakob Wirbel's avatar Jakob Wirbel
Browse files

add regression-type label, add warning for old mlr version.

parent b4165823
...@@ -8,11 +8,8 @@ ...@@ -8,11 +8,8 @@
#' @description This function creates a label object from metadata #' @description This function creates a label object from metadata
#' or an atomic vector #' or an atomic vector
#' #'
#' @usage create.label(label, case, #' @usage create.label(label, case, meta=NULL, control=NULL,
#' meta=NULL, control=NULL, #' p.lab = NULL, n.lab = NULL, remove.meta.column=FALSE, verbose=1)
#' p.lab = NULL, n.lab = NULL,
#' remove.meta.column=FALSE,
#' verbose=1)
#' #'
#' @param label named vector to create the label or the name of the metadata #' @param label named vector to create the label or the name of the metadata
#' column that will be used to create the label #' column that will be used to create the label
...@@ -22,8 +19,8 @@ ...@@ -22,8 +19,8 @@
#' variable has multiple values, all the other values will be used a negative #' variable has multiple values, all the other values will be used a negative
#' label (testing one vs rest). #' label (testing one vs rest).
#' #'
#' @param meta metadata dataframe object or an object of class #' @param meta metadata dataframe object or an object of class
#' \link[phyloseq]{sample_data-class} #' \link[phyloseq]{sample_data-class}
#' #'
#' @param control name of a label or vector with names that will be used as a #' @param control name of a label or vector with names that will be used as a
#' negative label. All values that are nor equal to case and control will be #' negative label. All values that are nor equal to case and control will be
...@@ -41,14 +38,14 @@ ...@@ -41,14 +38,14 @@
#' values. #' values.
#' #'
#' @param remove.meta.column boolean indicating if the label column in the #' @param remove.meta.column boolean indicating if the label column in the
#' metadata should be retained. Please note that if this is set to #' metadata should be retained. Please note that if this is set to
#' \code{TRUE}, the function will return a list as result. Defaults to #' \code{TRUE}, the function will return a list as result. Defaults to
#' \code{FALSE} #' \code{FALSE}
#' #'
#' @param verbose integer, control output: \code{0} for no output at all, #' @param verbose integer, control output: \code{0} for no output at all,
#' \code{1} for only information about progress and success, \code{2} for #' \code{1} for only information about progress and success, \code{2} for
#' normal level of information and \code{3} for full debug information, #' normal level of information and \code{3} for full debug information,
#' defaults to \code{1} #' defaults to \code{1}
#' #'
#' @keywords create.label #' @keywords create.label
#' #'
...@@ -62,6 +59,8 @@ ...@@ -62,6 +59,8 @@
#' contains the label information. #' contains the label information.
#' #'
#' @export #' @export
#'
#' @encoding UTF-8
#' #'
#' @return return either \itemize{ #' @return return either \itemize{
#' \item a list to be used in a SIMCAT object \strong{OR} #' \item a list to be used in a SIMCAT object \strong{OR}
...@@ -73,11 +72,10 @@ ...@@ -73,11 +72,10 @@
#' data('meta_crc_zeller') #' data('meta_crc_zeller')
#' #'
#' label <- create.label(label='Group', case='CRC', meta=meta.crc.zeller) #' label <- create.label(label='Group', case='CRC', meta=meta.crc.zeller)
create.label <- function(label, case, meta=NULL, control = NULL, create.label <- function(label, case=NULL, meta=NULL, control=NULL,
p.lab=NULL, n.lab=NULL, remove.meta.column=FALSE, verbose=1) { p.lab=NULL, n.lab=NULL, remove.meta.column=FALSE, verbose=1) {
if (verbose > 1) if (verbose > 1)
message("+ starting create.label") message("+ starting create.label")
s.time <- proc.time()[3] s.time <- proc.time()[3]
#if metadata has been supplied and the label is of length 1 #if metadata has been supplied and the label is of length 1
...@@ -85,11 +83,9 @@ create.label <- function(label, case, meta=NULL, control = NULL, ...@@ -85,11 +83,9 @@ create.label <- function(label, case, meta=NULL, control = NULL,
if (!label %in% colnames(meta)) if (!label %in% colnames(meta))
stop("ERROR: Column ", label, " not found in the metadata\n") stop("ERROR: Column ", label, " not found in the metadata\n")
if (is(meta,'sample_data')){ if (is(meta,'sample_data')){
label.vec <- vapply(meta[, label], as.character, label.vec <- as.matrix(as.data.frame(meta[,label]))[,1]
FUN.VALUE = character(nrow(meta)))
} else if (is.data.frame(meta)){ } else if (is.data.frame(meta)){
label.vec <- vapply(meta[, label], as.character, label.vec <- meta[, label]
FUN.VALUE = character(1))
} else { } else {
stop(paste0('Please provide the metadata either as a data.frame', stop(paste0('Please provide the metadata either as a data.frame',
' or a sample_data object!')) ' or a sample_data object!'))
...@@ -118,64 +114,88 @@ create.label <- function(label, case, meta=NULL, control = NULL, ...@@ -118,64 +114,88 @@ create.label <- function(label, case, meta=NULL, control = NULL,
label.vec <- label.vec[!is.na(label.vec)] label.vec <- label.vec[!is.na(label.vec)]
} }
# get different groups # find out if binary or regression!
groups <- unique(label.vec) n.groups <- length(unique(label.vec))
### checking case if (is.character(label.vec) |
if(!all(case %in% groups)){ is.factor(label.vec) |
stop("The chosen label does not contain values: ", length(n.groups) == 2 | !is.null(case)){
paste(case,collapse=","),"\nInstead, contains: ",
paste(groups, collapse=','))
}
### checking control if (!is.character(label.vec)){
if (is.null(control)) { x <- names(label.vec)
if((length(groups)-length(case))>1){ label.vec <- as.character(label.vec)
control <- "rest" names(label.vec) <- x
}else{
control <- setdiff(groups, case)
} }
}else{
if(!control%in%groups){ # get different groups
stop("The chose label does not contain value:",control, groups <- unique(label.vec)
"\nInstead, contains: ", paste(groups, collapse=',')) if (is.null(case)) stop("Case information is needed for binary label")
### checking case
if(!all(case %in% groups)){
stop("The chosen label does not contain values: ",
paste(case,collapse=","),"\nInstead, contains: ",
paste(groups, collapse=','))
} }
### dropping unused values
if(any(!groups%in%c(case, control))){ ### checking control
label.vec <- label.vec[which(label.vec%in%c(case, control))] if (is.null(control)) {
warning("Dropping values: ", if((length(groups)-length(case))>1){
paste(groups[which(!groups%in%c(case, control))], control <- "rest"
collapse=', '), '\n') }else{
control <- setdiff(groups, case)
}
} else {
if(!control%in%groups){
stop("The chose label does not contain value:",control,
"\nInstead, contains: ", paste(groups, collapse=','))
}
### dropping unused values
if(any(!groups%in%c(case, control))){
label.vec <- label.vec[which(label.vec%in%c(case, control))]
warning("Dropping values: ",
paste(groups[which(!groups%in%c(case, control))],
collapse=', '), '\n')
}
} }
}
# message status # message status
if (verbose > 0) if (verbose > 0)
message("Label used as case:\n ",paste(case,collapse=","), message("Label used as case:\n ",paste(case,collapse=","),
"\nLabel used as control:\n ",paste(control,collapse=",")) "\nLabel used as control:\n ",paste(control,collapse=","))
# create new label.object # create new label.object
label.new <- list(label = rep(-1, length(label.vec))) label.new <- list(label = rep(-1, length(label.vec)))
n.lab <- ifelse(is.null(n.lab), gsub("[_.-]", ".", control), n.lab) n.lab <- ifelse(is.null(n.lab), gsub("[_.-]", ".", control), n.lab)
p.lab <- ifelse(is.null(p.lab), p.lab <- ifelse(is.null(p.lab),
ifelse(length(case) > 1, 'Case', gsub("[_.-]", ".", case)), p.lab) ifelse(length(case) > 1, 'Case',
gsub("[_.-]", ".", case)), p.lab)
info <- c(-1, 1) info <- c(-1, 1)
names(info) <- c(n.lab, p.lab) names(info) <- c(n.lab, p.lab)
names(label.new$label) <- names(label.vec) names(label.new$label) <- names(label.vec)
if (length(case) > 1){ if (length(case) > 1){
label.new$label[which(label.vec %in% case)] <- 1 label.new$label[which(label.vec %in% case)] <- 1
} else { } else {
label.new$label[which(label.vec == case)] <- 1 label.new$label[which(label.vec == case)] <- 1
} }
label.new$info <- info
label.new$type <- "BINARY"
label.new$info <- info label.new <- label.new
label.new$type <- "BINARY" } else if (is.double(label.vec)){
if (!is.null(case) | !is.null(control)){
warning(paste0("Case and control parameters will be ignored for",
" continuous labels!"))
}
label.new <- label.new label.new <- list(label=label.vec, info=range(label.vec),
type='CONTINUOUS')
}
e.time <- proc.time()[3] e.time <- proc.time()[3]
if (verbose > 0) if (verbose > 0)
......
...@@ -16,18 +16,20 @@ check.label <- function(object){ ...@@ -16,18 +16,20 @@ check.label <- function(object){
errors <- c(errors, msg) errors <- c(errors, msg)
} }
# check that label is binary or test # check that label is binary or test
if (object$type != 'BINARY' & object$type != 'TEST'){ if (object$type == 'BINARY'){
msg <- 'Label object is neither binary nor a test label!' # check that info and label match up
errors <- c(errors, msg) if (!all(sort(unique(object$label)) == object$info)){
} msg <- 'label info does not match to label entries!'
# check that info and label match up errors <- c(errors, msg)
if (!all(sort(unique(object$label)) == object$info)){ }
msg <- 'label info does not match to label entries!' # check that info has names
errors <- c(errors, msg) if (is.null(names(object$info))){
msg <- 'Label info does not contain group names!'
errors <- c(errors, msg)
}
} }
# check that info has names if (!object$type %in% c('BINARY', 'TEST', 'CONTINUOUS')){
if (is.null(names(object$info))){ msg <- 'Label object is neither binary, regression, nor a test label!'
msg <- 'Label info does not contain group names!'
errors <- c(errors, msg) errors <- c(errors, msg)
} }
if (length(errors) == 0) NULL else errors if (length(errors) == 0) NULL else errors
...@@ -87,21 +89,16 @@ check.assoc <- function(object){ ...@@ -87,21 +89,16 @@ check.assoc <- function(object){
errors <- c(errors, msg) errors <- c(errors, msg)
} }
# check that assoc.param contains all entries # check that assoc.param contains all entries
if (!all(names(object$assoc.param) == c('detect.lim', 'pr.cutoff', if (!all(names(object$assoc.param) == c('formula', 'alpha', 'mult.corr',
'probs.fc', 'mult.corr', 'alpha', 'feature.type', 'paired'))){ 'log.n0', 'pr.cutoff', 'test', 'feature.type', 'paired', 'probs.fc'))){
msg <- 'Association testing parameters do not contain all entries!' msg <- 'Association testing parameters do not contain all entries!'
errors <- c(errors, msg) errors <- c(errors, msg)
} }
# check that all entries are valid and in the expected ranges # check that all entries are valid and in the expected ranges
if (!all(vapply(object$assoc.param, class,
FUN.VALUE=character(1)) == c('numeric', 'numeric', 'numeric',
'character', 'numeric', 'character', 'logical'))){
msg<-'Association testing parameters do not contain the expected classes!'
errors <- c(errors, msg)
}
# detect.lim # detect.lim
if (object$assoc.param$detect.lim > 1 | object$assoc.param$detect.lim < 0){ if (object$assoc.param$log.n0 > 1 | object$assoc.param$log.n0 < 0){
msg<-'Detection limit (pseudocount) is not valid (not between 1 and 0)!' msg<-paste0("Parameter 'log.n0' (pseudocount) is not valid (not ",
"between 1 and 0)!")
errors <- c(errors, msg) errors <- c(errors, msg)
} }
# pr.cutoff # pr.cutoff
...@@ -135,11 +132,6 @@ check.assoc <- function(object){ ...@@ -135,11 +132,6 @@ check.assoc <- function(object){
errors <- c(errors, msg) errors <- c(errors, msg)
} }
# check that assoc.results contains all that it should # check that assoc.results contains all that it should
if (!all(colnames(object$assoc.results)==c("fc", "p.val", "auc",
"auc.ci.l", "auc.ci.h", "pr.shift", "pr.n", "pr.p", "bcol", "p.adj"))){
msg <- 'Association results do not contain all needed entries!'
errors <- c(errors, msg)
}
if (nrow(object$assoc.results) < 1){ if (nrow(object$assoc.results) < 1){
msg <- 'Association results are empty!' msg <- 'Association results are empty!'
errors <- c(errors, msg) errors <- c(errors, msg)
...@@ -296,6 +288,14 @@ check.data.split <- function(object){ ...@@ -296,6 +288,14 @@ check.data.split <- function(object){
# check model list for validity # check model list for validity
#'@keywords internal #'@keywords internal
check.model.list <- function(object){ check.model.list <- function(object){
if (is(object$models[[1]], "WrappedModel")){
message("Legacy warning:\n",
"This SIAMCAT object seems to have been constructed with ",
"version 1.x, based on 'mlr'.\nYour current SIAMCAT version ",
"has been upgraded to use 'mlr3' internally.\nPlease consider ",
"re-training your SIAMCAT object or downgrading your SIAMCAT ",
"version in order to continue.")
}
errors <- character() errors <- character()
if (!all(c('model.type', 'feature.type', 'models') %in% names(object))){ if (!all(c('model.type', 'feature.type', 'models') %in% names(object))){
msg <- 'Model list does not contain all needed entries!' msg <- 'Model list does not contain all needed entries!'
...@@ -308,12 +308,6 @@ check.model.list <- function(object){ ...@@ -308,12 +308,6 @@ check.model.list <- function(object){
' Should be length 1!') ' Should be length 1!')
errors <- c(errors, msg) errors <- c(errors, msg)
} }
# check that all models in the list are mlr models
if (any(vapply(object$models, class,
FUN.VALUE = character(1)) != 'WrappedModel')){
msg <- 'Models are supposed to be mlr-WrappedModels!'
errors <- c(errors, msg)
}
# check feature type # check feature type
if (!object$feature.type %in% c('original', 'filtered', 'normalized')){ if (!object$feature.type %in% c('original', 'filtered', 'normalized')){
msg <- paste0('Feature type ', object$feature.type, msg <- paste0('Feature type ', object$feature.type,
...@@ -336,63 +330,69 @@ check.model.list <- function(object){ ...@@ -336,63 +330,69 @@ check.model.list <- function(object){
#'@keywords internal #'@keywords internal
check.eval.data <- function(object){ check.eval.data <- function(object){
errors <- character() errors <- character()
# check that all entries are there if ('auroc' %in% names(object)){
if (!all(c('roc', 'auroc', 'prc', 'auprc', 'ev') %in% names(object))){ # check that all entries are there
msg <- 'Not all needed entries are given!' if (!all(c('roc', 'auroc', 'prc', 'auprc', 'ev') %in% names(object))){
errors <- c(errors, msg) msg <- 'Not all needed entries are given!'
} errors <- c(errors, msg)
# check roc
if (!is(object$roc,'roc')){
msg <- 'Entry for roc is not an object of class roc (from pROC)!'
errors <- c(errors, msg)
}
# check prc
if (!is.list(object$prc) |
!all(names(object$prc) ==c('recall', 'precision')) |
length(unique(vapply(object$prc, length, FUN.VALUE=numeric(1))))!=1){
msg <- paste0('No valid entry for prc ',
'(missing entries or no list with entries of equal length)!')
errors <- c(errors, msg)
}
# check ev
if (!is.list(object$ev) |
!all(names(object$ev) == c("tp", "tn", "fp", "fn", "thresholds"))){
msg <- 'Not a valid entry for ev (missing entries or no list)!'
errors <- c(errors, msg)
}
# check that the lenghts of each entry in ev are the same
if (length(unique(vapply(object$ev, length, FUN.VALUE = numeric(1)))) != 1){
msg <- 'No concordance for the entries in ev (unequal length)!'
errors <- c(errors, msg)
}
# check concordance between ev and prc
if (length(object$prc$recall) != length(object$ev$thresholds)){
msg <- 'No concordance for the entries in ev and prc (unequal length)!'
errors <- c(errors, msg)
}
# for the case that there are multiple repeats
if (!is.null(object$roc.all)){
# check if all entries are there
if (!all(c('roc.all', 'auroc.all', 'prc.all', 'auprc.all', 'ev.all')
%in% names(object))){
msg <- 'Not all needed entries are given!'
errors <- c(errors, msg)
} }
# test roc.all # check roc
if (!all(vapply(object$roc.all, class, if (!is(object$roc,'roc')){
FUN.VALUE = character(1)) == 'roc')){ msg <- 'Entry for roc is not an object of class roc (from pROC)!'
msg <- 'roc.all entries are not objects of class roc!' errors <- c(errors, msg)
}
# check prc
if (!is.list(object$prc) |
!all(names(object$prc) ==c('recall', 'precision')) |
length(unique(vapply(object$prc, length,
FUN.VALUE=numeric(1))))!=1){
msg <- paste0('No valid entry for prc ',
'(missing entries or no list with entries of equal length)!')
errors <- c(errors, msg)
}
# check ev
if (!is.list(object$ev) |
!all(names(object$ev) == c("tp", "tn", "fp", "fn", "thresholds"))){
msg <- 'Not a valid entry for ev (missing entries or no list)!'
errors <- c(errors, msg)
}
# check that the lenghts of each entry in ev are the same
if (length(unique(vapply(object$ev, length,
FUN.VALUE = numeric(1)))) != 1){
msg <- 'No concordance for the entries in ev (unequal length)!'
errors <- c(errors, msg)
}
# check concordance between ev and prc
if (length(object$prc$recall) != length(object$ev$thresholds)){
msg <- 'No concordance for the entries in ev and prc!'
errors <- c(errors, msg)
}
# for the case that there are multiple repeats
if (!is.null(object$roc.all)){
# check if all entries are there
if (!all(c('roc.all', 'auroc.all', 'prc.all',
'auprc.all', 'ev.all')
%in% names(object))){
msg <- 'Not all needed entries are given!'
errors <- c(errors, msg)
}
# test roc.all
if (!all(vapply(object$roc.all, class,
FUN.VALUE = character(1)) == 'roc')){
msg <- 'roc.all entries are not objects of class roc!'
errors <- c(errors, msg)
}
# test lenght concordance
if (length(unique(
vapply(object[grep('.all', names(object))], length,
FUN.VALUE = integer(1))))!=1){
msg<- paste0('entries for individual repeats do not have ",
"concordant length!')
errors <- c(errors, msg) errors <- c(errors, msg)
} }
# test lenght concordance ### MORE CHECKS FOR EVAL_DATA WITH MULTIPLE REPEATS?
if (length(unique(
vapply(object[grep('.all', names(object))], length,
FUN.VALUE = integer(1))))!=1){
msg<-'entries for individual repeats do not have concordant length!'
errors <- c(errors, msg)
} }
### MORE CHECKS FOR EVAL_DATA WITH MULTIPLE REPEATS?
} }
if (length(errors) == 0) NULL else errors if (length(errors) == 0) NULL else errors
} }
......
...@@ -777,6 +777,15 @@ setMethod("weight_matrix", "siamcat", function(siamcat, verbose=1) { ...@@ -777,6 +777,15 @@ setMethod("weight_matrix", "siamcat", function(siamcat, verbose=1) {
} }
return(NULL) return(NULL)
} }
if (is(temp[[1]], "WrappedModel")){
stop("Legacy warning:\n",
"This SIAMCAT object seems to have been constructed with ",
"version 1.x, based on 'mlr'.\nYour current SIAMCAT version ",
"has been upgraded to use 'mlr3' internally.\nPlease consider ",
"re-training your SIAMCAT object or downgrading your SIAMCAT ",
"version in order to continue.")
}
feat.type <- feature_type(siamcat) feat.type <- feature_type(siamcat)
if (feat.type == 'original'){ if (feat.type == 'original'){
...@@ -791,8 +800,9 @@ setMethod("weight_matrix", "siamcat", function(siamcat, verbose=1) { ...@@ -791,8 +800,9 @@ setMethod("weight_matrix", "siamcat", function(siamcat, verbose=1) {
ncol=length(temp), dimnames=list(rownames(feat), ncol=length(temp), dimnames=list(rownames(feat),
paste0('Model_', seq_along(temp)))) paste0('Model_', seq_along(temp))))
for (i in seq_along(temp)){ for (i in seq_along(temp)){
m.idx <- match(temp[[i]]$features, make.names(rownames(weight.mat))) m.idx <- match(names(temp[[i]]$features),
weight.mat[m.idx, i] <- temp[[i]]$feat.weights make.names(rownames(weight.mat)))
weight.mat[m.idx, i] <- temp[[i]]$features
} }
weight.mat[is.na(weight.mat)] <- 0 weight.mat[is.na(weight.mat)] <- 0
......
...@@ -237,6 +237,7 @@ setReplaceMethod("data_split", c("siamcat", "list"), function(x, value) { ...@@ -237,6 +237,7 @@ setReplaceMethod("data_split", c("siamcat", "list"), function(x, value) {
#' #'
#' @examples #' @examples
#' data(siamcat_example) #' data(siamcat_example)
#' siamcat_example <- train.model(siamcat_example, method='lasso')
#' model_list(siamcat_example) <- model_list(siamcat_example) #' model_list(siamcat_example) <- model_list(siamcat_example)
setGeneric("model_list<-", function(x, value) setGeneric("model_list<-", function(x, value)
standardGeneric("model_list<-")) standardGeneric("model_list<-"))
......
...@@ -81,7 +81,7 @@ ...@@ -81,7 +81,7 @@
#' case='CRC') #' case='CRC')
siamcat <- function(..., feat=NULL, label=NULL, meta=NULL, phyloseq=NULL, siamcat <- function(..., feat=NULL, label=NULL, meta=NULL, phyloseq=NULL,
validate=TRUE, verbose=3) { validate=TRUE, verbose=3) {
if (is.null(phyloseq) && is.null(feat)){ if (is.null(phyloseq) && is.null(feat)){
stop(paste0('SIAMCAT needs either a feature matrix or a phyloseq', stop(paste0('SIAMCAT needs either a feature matrix or a phyloseq',
' object!!! Exiting...')) ' object!!! Exiting...'))
...@@ -96,6 +96,11 @@ siamcat <- function(..., feat=NULL, label=NULL, meta=NULL, phyloseq=NULL, ...@@ -96,6 +96,11 @@ siamcat <- function(..., feat=NULL, label=NULL, meta=NULL, phyloseq=NULL,
} else { } else {
case <- NULL case <- NULL
} }
if ('control' %in% names(other.args)){
control <- other.args$control
} else {
control <- NULL
}
# Remove names from arglist. Will replace them based on their class # Remove names from arglist. Will replace them based on their class
names(other.args) <- NULL names(other.args) <- NULL
...@@ -139,7 +144,7 @@ siamcat <- function(..., feat=NULL, label=NULL, meta=NULL, phyloseq=NULL, ...@@ -139,7 +144,7 @@ siamcat <- function(..., feat=NULL, label=NULL, meta=NULL, phyloseq=NULL,
# validate features and metadata # validate features and metadata
feat <- validate.features(feat)