Commit 7d6f1886 by Konrad Zych

removing unused parameters

parent 53491c01
Pipeline #3045 failed with stage
in 3 minutes 28 seconds
......@@ -14,7 +14,7 @@
##### function to train a LASSO model for a single given C
#' @export
train.plm <- function(data, method = c("lasso", "enet", "ridge", "lasso_ll", "ridge_ll", "randomForest"),
measure=list("acc"), min.nonzero.coeff=5){
measure=list("acc"), min.nonzero.coeff=5, param.set=NULL){
#model <- list(original.model=NULL, feat.weights=NULL)
## 1) Define the task
......@@ -23,9 +23,8 @@ train.plm <- function(data, method = c("lasso", "enet", "ridge", "lasso_ll", "ri
## 2) Define the learner
## Choose a specific algorithm (e.g. linear discriminant analysis)
cl <- "classif.cvglmnet" ### the most common learner defined here to remove redundancy
paramSet <- NULL
cost <- 10^seq(-2,3,length=6+5+10)
cl <- "classif.cvglmnet" ### the most common learner defined here to remove redundancy
parameters <- get.parameters.from.param.set(param.set=param.set, method=methos)
if(method == "lasso"){
lrn <- makeLearner(cl, predict.type="prob", 'nlambda'=100, 'alpha'=1)
......@@ -33,23 +32,20 @@ train.plm <- function(data, method = c("lasso", "enet", "ridge", "lasso_ll", "ri
lrn <- makeLearner(cl, predict.type="prob", 'nlambda'=100, 'alpha'=0)
} else if(method == "enet"){
lrn <- makeLearner(cl, predict.type="prob", 'nlambda'=10)
paramSet <- makeParamSet(makeNumericParam('alpha', lower=0, upper=1))
} else if(method == "lasso_ll"){
cl <- "classif.LiblineaRL1LogReg"
class.weights <- c(5, 1)
names(class.weights) <- c(label$negative.lab,label$positive.lab)
lrn <- makeLearner(cl, predict.type="prob", epsilon=1e-8, wi=class.weights)
paramSet <- makeParamSet(makeDiscreteParam("cost", values=cost))
} else if(method == "ridge_ll"){
cl <- "classif.LiblineaRL2LogReg"
lrn <- makeLearner(cl, predict.type="prob", epsilon=1e-8, type=0)
paramSet <- makeParamSet(makeDiscreteParam("cost", values=cost))
parameters <- makeParamSet(makeDiscreteParam("cost", values=cost))
} else if(method == "randomForest"){
sqrt.mdim <- sqrt(nrow(data))
cl <- "classif.randomForest"
lrn <- makeLearner(cl, predict.type = "prob", fix.factors.prediction = TRUE)
paramSet <- makeParamSet(makeNumericParam('ntree', lower=100, upper=1000),
makeDiscreteParam('mtry', values=c(round(sqrt.mdim/2), round(sqrt.mdim), round(sqrt.mdim*2))))
} else {
stop(method, " is not a valid method, currently supported: lasso, enet, ridge, libLineaR, randomForest.\n")
}
......@@ -57,11 +53,11 @@ train.plm <- function(data, method = c("lasso", "enet", "ridge", "lasso_ll", "ri
## 3) Fit the model
## Train the learner on the task using a random subset of the data as training set
if(!all(is.null(paramSet))){
if(!all(is.null(parameters))){
hyperPars <- tuneParams(learner = lrn,
task = task,
resampling = makeResampleDesc('CV', iters=5L, stratify=TRUE),
par.set = paramSet,
par.set = parameters,
control = makeTuneControlGrid(resolution = 10L),
measures=measure)
print(hyperPars)
......@@ -198,3 +194,28 @@ get.optimal.lambda.for.glmnet <- function(trained.model, training.task, perf.mea
}
return(opt.lambda)
}
get.parameters.from.param.set <- function(param.set, method){
cost <- 10^seq(-2,3,length=6+5+10)
ntree <- c(100,1000)
sqrt.mdim <- sqrt(nrow(data))
mtry <- c(round(sqrt.mdim/2), round(sqrt.mdim), round(sqrt.mdim*2)
alpha <- c(0,1)
parameters<- NULL
if(method == "lasso_ll"){
if(!all(is.null(param.set))){
if("cost"%in%names(param.set)) cost <- param.set$cost
}
parameters <- makeParamSet(makeDiscreteParam("cost", values=cost))
}else if(method == "randomForest"){
if(!all(is.null(param.set))){
if("ntree"%in%names(param.set)) ntree <- param.set$ntree
if("mtry"%in%names(param.set)) mtry <- param.set$mtry
}
parameters <- makeParamSet(makeNumericParam('ntree', lower=ntree[1], upper=ntree[2]),
makeDiscreteParam('mtry', values=mtry)))
}else if(method == "enet"){
parameters <- makeParamSet(makeNumericParam('alpha', lower=alpha[1], upper=alpha[2]))
}
return(parameters)
}
......@@ -20,6 +20,7 @@
#' @param stratify boolean, should the folds in the internal cross-validation be stratified?
#' @param modsel.crit list, specifies the model selection criterion during internal cross-validation, may contain these: \code{c("auc", "f1", "acc", "pr")}
#' @param min.nonzero.coeff integer number of minimum nonzero coefficients that should be present in the model (only for \code{"lasso"}, \code{"ridge"}, and \code{"enet"}
#' @param param.set a list of extra parameters for mlr run, may contain: \code{cost} - for lasso_ll and ridge_ll; \code{alpha} for enet and \code{ntree, mtry} for RandomForrest
#' @export
#' @keywords SIAMCAT plm.trainer
#' @return list containing \itemize{
......@@ -30,7 +31,7 @@
# TODO add details section for this function
train.model <- function(feat, label, method = c("lasso", "enet", "ridge", "lasso_ll", "ridge_ll", "randomForest"),
data.split=NULL, stratify = TRUE,
modsel.crit=list("auc"), min.nonzero.coeff = 1){
modsel.crit=list("auc"), min.nonzero.coeff = 1, param.set=NULL){
# TODO 1: modsel.criterion should be implemented
# check modsel.crit
if (!all(modsel.crit %in% c("auc", "f1", "acc", "pr", "auprc"))){
......@@ -91,7 +92,7 @@ train.model <- function(feat, label, method = c("lasso", "enet", "ridge", "lass
data$label <- train.label
### internal cross-validation for model selection
model <- train.plm(data=data, method = method, measure=measure, min.nonzero.coeff=min.nonzero.coeff)
model <- train.plm(data=data, method = method, measure=measure, min.nonzero.coeff=min.nonzero.coeff,param.set=param.set)
if(!all(model$feat.weights == 0)){
models.list[[r]] <- model
}else{
......
......@@ -34,10 +34,12 @@ DEBUG.CHECKS <- FALSE # performs additional checks (asserting tha
make_option('--stratify', type='logical', default=TRUE, help='Should cross-validation for model selection be stratified
such that an approx. equal proportion of positive examples
are contained in each subset (only for binary labels)?'),
make_option('--sel_criterion', type='character', default='auc', help='Evaluation criterion for model selection (options: \'acc\',
make_option('--sel_criterion', type='character', default='auc', help='Evaluation criterion for model selection (options: \'acc\',
\'auc\', \'auprc\', \'f1\')'),
make_option('--min_nonzero_coeff', type='integer', default=1, help='Minimum number of non-zero coefficients required for a model
to be considered in model selection')
make_option('--param_set', type='character', default=NULL, help='a list of extra parameters for mlr run, may contain: cost - for lasso_ll and ridge_ll;
alpha for enet and ntree, mtry for RandomForrest')
)
opt <- parse_args(OptionParser(option_list=option_list))
......@@ -54,6 +56,7 @@ cat('mlr_models_list =', opt$mlr_models_list, '\n')
cat('stratify =', opt$stratify, '\n')
cat('sel_criterion =', opt$sel_criterion, '\n')
cat('min_nonzero_coeff =', opt$min_nonzero_coeff, '\n')
cat('param_set =', opt$param_set, '\n')
cat('\n')
......@@ -82,7 +85,8 @@ models.list <- train.model(feat = feat,
data.split=opt$train_sets,
stratify = opt$stratify,
modsel.crit = opt$sel_criterion,
min.nonzero.coeff = opt$min_nonzero_coeff)
min.nonzero.coeff = opt$min_nonzero_coeff,
param.set = opt$param_set)
save(models.list , file=opt$mlr_models_list)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment