...
 
...@@ -67,42 +67,36 @@ create.label.from.metadata <- function(meta, column, case, control = NULL, ...@@ -67,42 +67,36 @@ create.label.from.metadata <- function(meta, column, case, control = NULL,
labels <- unique(metaColumn) labels <- unique(metaColumn)
if (length(labels) == 2){ ### checking case
if (verbose > 0) message("Column ", column, " contains binary label\n") if(!all(case%in%labels)){
if(!case%in%labels){ stop("Column ", column, " does not contain values:",
stop("Column ", column, " does not contain value:",case,"\n") paste(case,collapse=","),"\n")
} }
if (is.null(control)) {
control <- setdiff(unique(labels), case)
} else {
if(!control%in%labels){
stop("Column ", column, " does not contain value:",control,"\n")
}
}
}else if(length(labels) > 2){ ### checking control
if(!case%in%labels){ if (is.null(control)) {
stop("Column ", column, " does not contain value:",case,"\n") if((length(labels)-length(case))>1){
}
if (is.null(control)) {
control <- "rest" control <- "rest"
} else { }else{
if(!control%in%labels){ control <- setdiff(labels, case)
stop("Column ", column, " does not contain value:",control,"\n") }
} }else{
if(any(!labels%in%c(case, control))){ if(!control%in%labels){
metaColumn <- metaColumn[which(metaColumn%in%c(case, control))] stop("Column ", column, " does not contain value:",control,"\n")
warning("Dropping values: ", }
labels[which(!labels%in%c(case, control))]) ### dropping unused values
} if(any(!labels%in%c(case, control))){
metaColumn <- metaColumn[which(metaColumn%in%c(case, control))]
warning("Dropping values: ",
labels[which(!labels%in%c(case, control))])
} }
} }
if (verbose > 0) if (verbose > 0)
message("Label used as case:\n ",case, message("Label used as case:\n ",paste(case,collapse=","),
"\nLabel used as control:\n ", "\nLabel used as control:\n ",paste(control,collapse=","))
paste(labels[which(labels!=case)], collapse = ","))
label <- label <-
list( list(
label = rep(-1, length(metaColumn)), label = rep(-1, length(metaColumn)),
...@@ -110,14 +104,24 @@ create.label.from.metadata <- function(meta, column, case, control = NULL, ...@@ -110,14 +104,24 @@ create.label.from.metadata <- function(meta, column, case, control = NULL,
negative.lab = (-1) negative.lab = (-1)
) )
label$n.lab <- gsub("[_.-]", " ", control) label$n.lab <- gsub("[_.-]", " ", control)
label$p.lab <- gsub("[_.-]", " ", case) if(length(case)>1){
label$p.lab <- "Case"
}else{
label$p.lab <- gsub("[_.-]", " ", case)
}
class.descr <- c(-1, 1) class.descr <- c(-1, 1)
names(class.descr) <- c(label$n.lab, label$p.lab) names(class.descr) <- c(label$n.lab, label$p.lab)
names(label$label) <- names(metaColumn) names(label$label) <- names(metaColumn)
label$header <- label$header <-
paste0("#BINARY:1=", label$p.lab, ";-1=", label$n.lab) paste0("#BINARY:1=", label$p.lab, ";-1=", label$n.lab)
label$label[which(metaColumn == case)] <- 1 if(length(case)>1){
label$label[which(metaColumn%in%case)] <- 1
}else{
label$label[which(metaColumn == case)] <- 1
}
label$n.idx <- label$label == label$negative.lab label$n.idx <- label$label == label$negative.lab
label$p.idx <- label$label == label$positive.lab label$p.idx <- label$label == label$positive.lab
......
...@@ -29,7 +29,7 @@ ...@@ -29,7 +29,7 @@
#' # simple working example #' # simple working example
#' siamcat_evaluated <- summarize.features(siamcat_example) #' siamcat_evaluated <- summarize.features(siamcat_example)
#' #'
summarize.features <- function(siamcat, level = "__g", summarize.features <- function(siamcat, level = "g__",
keep_original_names = TRUE, verbose=1){ keep_original_names = TRUE, verbose=1){
if (verbose > 1) if (verbose > 1)
...@@ -38,26 +38,29 @@ summarize.features <- function(siamcat, level = "__g", ...@@ -38,26 +38,29 @@ summarize.features <- function(siamcat, level = "__g",
s.time <- proc.time()[3] s.time <- proc.time()[3]
if (verbose > 2) message("+++ sumamrizing on a level: ",level,"\n") if (verbose > 2) message("+++ sumamrizing on a level: ",level,"\n")
feat <- features(siamcat) feat <- get.features.matrix(siamcat)
rowSplitted <- strsplit(rownames(feat@.Data),"\\.") rowSplitted <- strsplit(rownames(feat),"\\.")
geniuses <- NULL genera <- NULL
origNames <- NULL origNames <- NULL
for(row in rowSplitted){ for(row in rowSplitted){
print(row)
levelPos <- grep(level,row) levelPos <- grep(level,row)
geniuses <- c(geniuses,row[levelPos]) if(length(levelPos)>0){
origNames<- paste(row[1:levelPos],sep="\\.") genera <- c(genera,row[levelPos])
origNames<- paste(row[1:levelPos],sep="\\.")
}
} }
geniusesSplitted <- strsplit(geniuses,level) generaSplitted <- strsplit(genera,level)
geniusesSpVec <- do.call(rbind,geniusesSplitted)[,2] generaSpVec <- do.call(rbind,generaSplitted)[,2]
geniusesSpVecMap <- cbind(geniusesSpVec,origNames) generaSpVecMap <- cbind(generaSpVec,origNames)
geniusesSpVecMap <- geniusesSpVecMap[!duplicated(geniusesSpVecMap[,1]),] generaSpVecMap <- generaSpVecMap[!duplicated(generaSpVecMap[,1]),]
summarized <- NULL summarized <- NULL
for(genus in seq_along(1:nrow(geniusesSpVecMap))){ for(genus in seq_along(1:nrow(generaSpVecMap))){
curRows <- feat@.Data[which(geniusesSpVec==genus),] curRows <- feat[which(generaSpVec==genus),]
if(!is.null(dim(curRows))){ if(!is.null(dim(curRows))){
...@@ -74,10 +77,10 @@ summarize.features <- function(siamcat, level = "__g", ...@@ -74,10 +77,10 @@ summarize.features <- function(siamcat, level = "__g",
if (verbose > 2) message("+++ summarized features table contains: ", if (verbose > 2) message("+++ summarized features table contains: ",
nrow(summarized)," features\n") nrow(summarized)," features\n")
rownames(summarized) <- unique(geniusesSpVec) rownames(summarized) <- unique(generaSpVec)
if(keep_original_names){ if(keep_original_names){
rownames(summarized) <- geniusesSpVecMap[,2] rownames(summarized) <- generaSpVecMap[,2]
} }
colnames(summarized) <- colnames(feat) colnames(summarized) <- colnames(feat)
......