Commit 3d0ef60f authored by Jean-Karim Heriche's avatar Jean-Karim Heriche

Implemented xgboost feature importance.

parent caf05f3b
......@@ -25,7 +25,7 @@ ui_dimensionality_reduction <- function(id) {
uiOutput(ns("dimReductionParams"))),
actionButton(ns("dimReductionButton"), "Apply", icon("check"),
style="color: #fff; background-color: #3C8DBC; border-color: #3C8DBC")
) #End column
) # End column
} # End ui_dimensionality_reduction()
......
......@@ -10,24 +10,35 @@ ui_feature_selection <- function(id) {
ns <- NS(id)
column(12,
box( width = 3,
title = "Features to consider", solidHeader = TRUE, status = "primary",
uiOutput(ns("featuresToProcess"))
fluidRow(
box( width = 3,
title = "Features to consider", solidHeader = TRUE, status = "primary",
uiOutput(ns("featuresToProcess"))
),
box( width = 3,
title = "Target annotation column", solidHeader = TRUE, status = "primary",
uiOutput(ns("targetCol"))
),
# box(width = 3,
# title = "Parameters", solidHeader = TRUE, status = "primary",
# uiOutput(ns("featSelectionParams"))),
actionButton(ns("featSelectionButton"), "Start", icon("check"),
style="color: #fff; background-color: #3C8DBC; border-color: #3C8DBC")
),
box( width = 3,
title = "Target annotation column", solidHeader = TRUE, status = "primary",
uiOutput(ns("targetCol"))
),
# box(width = 3,
# title = "Parameters", solidHeader = TRUE, status = "primary",
# uiOutput(ns("featSelectionParams"))),
actionButton(ns("featSelectionButton"), "Start", icon("check"),
style="color: #fff; background-color: #3C8DBC; border-color: #3C8DBC")
) #End column
} # End ui_cluster()
fluidRow(
box( width = 6, align = "left",
title = "Plot of feature importance", solidHeader = TRUE, status = "primary",
withSpinner(
plotlyOutput(ns("plot"), height = '500px'))
), # End box
box( width = 6,
tags$style(paste0("#", ns("hover"), " { height: 10em;}")),
h4(HTML("&nbsp; Accuracy of model predictions")),
textOutput(ns("accuracy"))
)
) # End fluidRow
) # End column
} # End ui_feature_selection()
feature_selection_server <- function(input, output, session, rv, session_parent) {
......@@ -77,14 +88,69 @@ feature_selection_server <- function(input, output, session, rv, session_parent
action_button_feature_selection <- function(){
shinyjs::disable("featSelectionButton")
req(rv$data)
tmp <- rv$data[,input$featuresToProcess]
# Remove rows with NAs and infinite values
tmp <- tmp[is.finite(rowSums(tmp)),]
# Target vector
target <- rv$data[, input$targetCol]
# Extract data with annotations
idx.to.keep <- which(!is.na(target) & tolower(target) != 'none' & target != "")
# Split into training and test set indices
# Note: if unlucky we may get class imbalance
# If this becomes a problem, we can use functions from the caret package
train.idx <- sample(idx.to.keep, floor(0.67*length(idx.to.keep)))
test.idx <- idx.to.keep[-train.idx]
# Convert annotations to integer values starting from 0
# This is required by xgboost
target <- as.integer(as.factor(target)) - 1
# Form training and test sets
train.data <- xgb.DMatrix(data = as.matrix(rv$data[train.idx, input$featuresToProcess]), label = target[train.idx])
test.data <- xgb.DMatrix(data = as.matrix(rv$data[test.idx, input$featuresToProcess]), label = target[test.idx])
# Determine classification type
# Use binary:logistic for binary classification
# and multi:softprob for multiple classes
# Both output probabilities of belonging to each class
nb.class <- length(unique(target))
if(nb.class == 2) {
classification_type <- "binary:logistic"
} else {
classification_type <- "multi:softprob"
}
# xgboost parameters with default values
params <- list(booster = "gbtree",
objective = classification_type,
eta = 0.3,
gamma = 0,
max_depth = 6,
min_child_weight = 1,
subsample = 1,
colsample_bytree = 1)
# Learn a model
xgb <- xgb.train (params = params,
data = train.data,
nrounds = 1000,
early_stopping_rounds = 10,
watchlist = list(train=train.data, test=test.data),
verbose = 0,
num_class = nb.class)
# Make predictions
# Outputs a matrix of probabilities per class (classes are in columns)
xgbpred <- predict(xgb, test.data, reshape = TRUE)
# Assign labels based on highest probability
xgbpred.classes <- max.col(xgbpred) - 1
accuracy <- sprintf("%1.2f%%", sum(xgbpred.classes==target[test.idx])/length(xgbpred.classes) * 100)
# Get feature importance
# This also clusters them. The number of clusters is automiatically determined (using BIC)
feature.importance <- xgb.importance(model = xgb, feature_names = input$featuresToProcess)
p <- xgb.ggplot.importance(feature.importance)
remove_modal_spinner()
output$plot <- renderPlotly(p)
output$accuracy <- renderText( accuracy )
shinyjs::enable("featSelectionButton")
}
......
......@@ -14,7 +14,8 @@ cran.mirrors <- c('https://ftp.gwdg.de/pub/misc/cran/', 'https://cloud.r-project
## Install required packages if missing
## CRAN packages
pkg <- c("devtools", "BiocManager", "DT", "shiny", "shinyFiles", "shinycssloaders", "shinydashboard", "shinyjs", "shinyWidgets", "shinybusy", "assertthat", "ggplot2", "plotly", "RANN", "MASS", "uwot")
pkg <- c("devtools", "BiocManager", "DT", "shiny", "shinyFiles", "shinycssloaders", "shinydashboard", "shinyjs",
"shinyWidgets", "shinybusy", "assertthat", "ggplot2", "plotly", "RANN", "MASS", "uwot", "xgboost", "Ckmeans.1d.dp")
new.pkg <- pkg[!(pkg %in% installed.packages())]
if (length(new.pkg)) {
message(paste0("Installing ", new.pkg, "\n"))
......@@ -48,6 +49,7 @@ library(assertthat)
library(RANN)
library(MASS)
library(uwot)
library(xgboost)
## Making data table filtering persistent
## from https://dev.to/awwsmm/reactive-datatables-in-r-with-persistent-filters-l26
......@@ -62,6 +64,7 @@ source("R/cluster.R")
source("R/annotate.R")
source("R/explore.R")
source("R/dim_reduction.R")
source("R/feature_selection.R")
ui <- function(request) {
......@@ -85,8 +88,9 @@ ui <- function(request) {
ui_explore("explore_module")
)),
tabItem( tabName = "annotate",
ui_annotate("annotate_module")
),
fluidRow(
ui_annotate("annotate_module")
)),
tabItem( tabName = "dimensionality_reduction",
fluidRow(
ui_dimensionality_reduction("dimensionality_reduction_module")
......@@ -157,11 +161,11 @@ server <- function(input,output,session){
}, ignoreNULL = TRUE, ignoreInit = TRUE)
# feature selection
# observeEvent(input$tabs_menu,{
# if(input$tabs_menu=="feature_selection"){
# callModule(feature_selection_server, "feature_selection_module", global_data, session)
# }
# }, ignoreNULL = TRUE, ignoreInit = TRUE)
observeEvent(input$tabs_menu,{
if(input$tabs_menu=="feature_selection"){
callModule(feature_selection_server, "feature_selection_module", global_data, session)
}
}, ignoreNULL = TRUE, ignoreInit = TRUE)
# cluster
# observeEvent(input$tabs_menu,{
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment