Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
10
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
Kimberly Isobel Meechan
Image Data Explorer
Commits
f5c2c01c
Commit
f5c2c01c
authored
Nov 18, 2020
by
Jean-Karim Heriche
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Fix problem with imbalanced partitioning of data and issue when making predictions.
parent
7326e390
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
22 additions
and
23 deletions
+22
-23
Dockerfile
Dockerfile
+1
-1
R/explore_image.R
R/explore_image.R
+0
-1
R/explore_image2.R
R/explore_image2.R
+0
-1
R/explore_plot.R
R/explore_plot.R
+5
-1
R/feature_selection.R
R/feature_selection.R
+16
-19
No files found.
Dockerfile
View file @
f5c2c01c
...
...
@@ -12,7 +12,7 @@ RUN R -e "install.packages('BiocManager', repos=c('https://cloud.r-project.org/
RUN
R
-e
"BiocManager::install('aoles/RBioFormats')"
RUN
R
-e
"BiocManager::install('EBImage')"
RUN
R
-e
"install.packages(c('DT', 'shiny', 'shinyFiles', 'shinycssloaders', 'shinydashboard', 'shinyjs', 'shinyWidgets', 'shinybusy', 'assertthat'), repos=c('https://cloud.r-project.org/', 'https://ftp.gwdg.de/pub/misc/cran/'))"
RUN
R
-e
"install.packages(c('ggplot2', 'plotly', 'RANN', 'MASS', 'uwot', 'xgboost', 'Ckmeans.1d.dp', 'caret'), repos=c('https://cloud.r-project.org/', 'https://ftp.gwdg.de/pub/misc/cran/'))"
RUN
R
-e
"install.packages(c('ggplot2', 'plotly', 'RANN', 'MASS', 'uwot', 'xgboost', 'Ckmeans.1d.dp',
'e1071',
'caret'), repos=c('https://cloud.r-project.org/', 'https://ftp.gwdg.de/pub/misc/cran/'))"
# Copy the app to the image
RUN
mkdir
-p
/usr/local/app/image-data-explorer
...
...
R/explore_image.R
View file @
f5c2c01c
...
...
@@ -34,7 +34,6 @@ explore_image_server <-function(input, output, session, rv){
if
(
!
is.null
(
rv
$
imgPath1
)
&&
length
(
unique
(
rv
$
imgPath1
))
==
1
&&
length
(
unique
(
rv
$
selectedFrame
))
<=
1
&&
!
is.null
(
rv
$
fileCol1
)
&&
rv
$
fileCol1
!=
""
){
rv
$
imgPath1
<-
gsub
(
"[[:space:]]"
,
""
,
rv
$
imgPath1
)
filePath1
<-
file.path
(
rootDir
,
strsplit
(
rv
$
imgPath1
[
1
],
'\\\\'
))
validate
(
need
(
filePath1
!=
""
&&
file.exists
(
filePath1
),
"File not found. Check that you selected the correct image root directory."
))
slice.def
<-
list
()
...
...
R/explore_image2.R
View file @
f5c2c01c
...
...
@@ -29,7 +29,6 @@ explore_image_server2 <-function(input, output, session, rv){
if
(
!
is.null
(
rv
$
imgPath2
)
&&
length
(
unique
(
rv
$
imgPath2
))
==
1
&&
length
(
unique
(
rv
$
selectedFrame
))
<=
1
&&
!
is.null
(
rv
$
fileCol2
)
&&
rv
$
fileCol2
!=
""
){
rv
$
imgPath2
<-
gsub
(
"[[:space:]]"
,
""
,
rv
$
imgPath2
)
filePath2
<-
file.path
(
rootDir
,
strsplit
(
rv
$
imgPath2
[
1
],
'\\\\'
))
validate
(
need
(
filePath2
!=
""
&&
file.exists
(
filePath2
),
"File not found. Check that you selected the correct image root directory."
))
slice.def
<-
list
()
...
...
R/explore_plot.R
View file @
f5c2c01c
...
...
@@ -46,8 +46,12 @@ plot_server <- function(input, output, session, rv) {
geom_point
(
colour
=
"grey"
)
+
coord_cartesian
(
expand
=
TRUE
)
if
(
!
is.null
(
rv
$
clusters
))
{
idx.to.na
<-
which
(
clusters
==
""
)
if
(
length
(
idx.to.na
)
>
0
)
{
clusters
[
idx.to.na
]
<-
NA
}
p
<-
p
+
geom_point
(
aes
(
colour
=
factor
(
clusters
[
1
:
nrow
(
D
)])),
alpha
=
0.5
)
+
scale_colour_brewer
(
palette
=
'Set1'
)
+
scale_colour_brewer
(
palette
=
'Set1'
)
+
theme
(
plot.title
=
element_blank
(),
legend.title
=
element_blank
())
}
p
<-
p
+
geom_point
(
data
=
D
[
rv
$
selectedRows
,],
...
...
R/feature_selection.R
View file @
f5c2c01c
...
...
@@ -101,27 +101,25 @@ feature_selection_server <- function(input, output, session, rv, session_parent
req
(
rv
$
data
)
tmp
<-
rv
$
data
[,
c
(
input
$
targetCol
,
input
$
featuresToProcess
)]
# Remove rows with NAs and infinite values
tmp
<-
tmp
[
is.finite
(
rowSums
(
tmp
[,
input
$
featuresToProcess
])),]
idx.valid.data
<-
which
(
is.finite
(
rowSums
(
tmp
[,
input
$
featuresToProcess
])))
tmp
<-
tmp
[
idx.valid.data
,]
# Target vector
target
<-
tmp
[,
input
$
targetCol
]
tmp
<-
tmp
[,
input
$
featuresToProcess
]
target
<-
as.character
(
tmp
[,
input
$
targetCol
])
# Extract data with annotations
idx.to.keep
<-
which
(
!
is.na
(
target
)
&
tolower
(
target
)
!=
'none'
&
target
!=
""
)
target
<-
target
[
idx.to.keep
]
tmp
<-
tmp
[
idx.to.keep
,
input
$
featuresToProcess
]
classes
<-
levels
(
as.factor
(
target
))
classifier.data
$
classes
<-
classes
# Extract data with annotations
idx.to.keep
<-
which
(
!
is.na
(
target
)
&
tolower
(
target
)
!=
'none'
&
target
!=
""
)
# Split into training and test set indices
# Note: if unlucky we may get class imbalance
# If this becomes a problem, we can use functions from the caret package
train.idx
<-
sample
(
idx.to.keep
,
floor
(
0.67
*
length
(
idx.to.keep
)))
test.idx
<-
idx.to.keep
[
-
train.idx
]
# Split into training and test set preserving class distribution
train.idx
<-
createDataPartition
(
as.factor
(
target
),
p
=
0.67
,
list
=
FALSE
,
times
=
1
)
# Form training and test sets
train.data
<-
as.matrix
(
tmp
[
train.idx
,
input
$
featuresToProcess
])
train.labels
<-
as.factor
(
target
[
train.idx
])
test.data
<-
as.matrix
(
tmp
[
test.idx
,
input
$
featuresToProcess
])
test.labels
<-
as.factor
(
target
[
test.idx
])
train.labels
<-
factor
(
target
[
train.idx
])
test.data
<-
as.matrix
(
tmp
[
-
train.idx
,
input
$
featuresToProcess
])
test.labels
<-
factor
(
target
[
-
train.idx
])
# Tune xgboost hyperparameters using caret
nrounds
<-
seq
(
from
=
100
,
to
=
500
,
by
=
50
)
eta
<-
c
(
0.025
,
0.05
,
0.1
,
0.3
,
0.4
)
...
...
@@ -167,7 +165,7 @@ feature_selection_server <- function(input, output, session, rv, session_parent
# Evaluate on held out data
xgbpred
<-
predict
(
xgbModel
,
newdata
=
test.data
)
confusion.matrix
<-
confusionMatrix
(
xgbpred
,
test.labels
,
mode
=
"everything"
)
# Get feature importance using the xgboost library
# This also clusters the features. The number of clusters is automatically determined (using BIC)
feature.importance
<-
xgb.importance
(
model
=
xgbModel
$
finalModel
,
feature_names
=
xgbModel
$
finalModel
$
feature_names
)
...
...
@@ -218,10 +216,9 @@ feature_selection_server <- function(input, output, session, rv, session_parent
rv
$
data
$
xgboost.predictions
<-
NA
}
tmp
<-
rv
$
data
[,
input
$
featuresToProcess
]
# Remove rows with NAs and infinite values
tmp
<-
as.matrix
(
tmp
[
is.finite
(
rowSums
(
tmp
)),])
preds
<-
predict
(
classifier.data
$
model
,
newdata
=
tmp
)
rv
$
data
[
rownames
(
tmp
),]
$
xgboost.predictions
<-
classifier.data
$
classes
[
preds
]
idx.valid.data
<-
which
(
is.finite
(
rowSums
(
tmp
)))
preds
<-
predict
(
classifier.data
$
model
,
newdata
=
tmp
[
idx.valid.data
,])
rv
$
data
[
idx.valid.data
,]
$
xgboost.predictions
<-
classifier.data
$
classes
[
preds
]
showNotification
(
"Model predictions have been added to the data."
,
type
=
"warning"
)
}
})
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment