Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
P
platy-browser-data
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Kimberly Isobel Meechan
platy-browser-data
Commits
7fe3e58d
Commit
7fe3e58d
authored
5 years ago
by
Constantin Pape
Browse files
Options
Downloads
Patches
Plain Diff
Refactor morphology feature implementation in separate file
parent
122d78b0
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
scripts/extension/attributes/morphology.py
+14
-204
14 additions, 204 deletions
scripts/extension/attributes/morphology.py
scripts/extension/attributes/morphology_impl.py
+238
-0
238 additions, 0 deletions
scripts/extension/attributes/morphology_impl.py
with
252 additions
and
204 deletions
scripts/extension/attributes/morphology.py
+
14
−
204
View file @
7fe3e58d
...
...
@@ -4,23 +4,15 @@ import os
import
sys
import
json
# this is a task called by multiple processes,
# so we need to restrict the number of threads used by numpy
from
cluster_tools.utils.numpy_utils
import
set_numpy_threads
set_numpy_threads
(
1
)
import
numpy
as
np
import
luigi
import
pandas
as
pd
import
nifty.tools
as
nt
from
skimage.measure
import
regionprops
,
marching_cubes_lewiner
,
mesh_surface_area
from
skimage.transform
import
resize
from
skimage.util
import
pad
import
pandas
as
pd
import
cluster_tools.utils.volume_utils
as
vu
import
cluster_tools.utils.function_utils
as
fu
from
cluster_tools.utils.task_utils
import
DummyTask
from
cluster_tools.cluster_tasks
import
SlurmTask
,
LocalTask
from
scripts.extension.attributes.morphology_impl
import
morphology_impl
#
# Morphology Attribute Tasks
...
...
@@ -116,161 +108,6 @@ class MorphologySlurm(MorphologyBase, SlurmTask):
#
# get shape of full data & downsampling factor
def
get_scale_factor
(
path
,
key_full
,
key
,
resolution
):
with
vu
.
file_reader
(
path
,
'
r
'
)
as
f
:
full_shape
=
f
[
key_full
].
shape
shape
=
f
[
key
].
shape
# scale factor for downsampling
scale_factor
=
[
res
*
(
fs
/
sh
)
for
res
,
fs
,
sh
in
zip
(
resolution
,
full_shape
,
shape
)]
return
scale_factor
def
filter_table
(
table
,
min_size
,
max_size
):
if
max_size
is
None
:
table
=
table
.
loc
[
table
[
'
n_pixels
'
]
>=
min_size
,
:]
else
:
criteria
=
np
.
logical_and
(
table
[
'
n_pixels
'
]
>
min_size
,
table
[
'
n_pixels
'
]
<
max_size
)
table
=
table
.
loc
[
criteria
,
:]
return
table
def
filter_table_from_mapping
(
table
,
mapping_path
):
# read in numpy array of mapping of cells to nuclei - first column cell id, second nucleus id
mapping
=
np
.
genfromtxt
(
mapping_path
,
skip_header
=
1
,
delimiter
=
'
\t
'
)[:,
:
2
].
astype
(
'
uint64
'
)
# remove zero labels from this table too, if exist
mapping
=
mapping
[
np
.
logical_and
(
mapping
[:,
0
]
!=
0
,
mapping
[:,
1
]
!=
0
)]
table
=
table
.
loc
[
np
.
isin
(
table
[
'
label_id
'
],
mapping
[:,
0
]),
:]
return
table
def
load_data
(
ds
,
row
,
scale
):
# compute the bounding box from the row information
mins
=
[
row
.
bb_min_z
,
row
.
bb_min_y
,
row
.
bb_min_x
]
maxs
=
[
row
.
bb_max_z
,
row
.
bb_max_y
,
row
.
bb_max_x
]
mins
=
[
int
(
mi
/
sca
)
for
mi
,
sca
in
zip
(
mins
,
scale
)]
maxs
=
[
int
(
ma
/
sca
)
+
1
for
ma
,
sca
in
zip
(
maxs
,
scale
)]
bb
=
tuple
(
slice
(
mi
,
ma
)
for
mi
,
ma
in
zip
(
mins
,
maxs
))
# load the data from the bounding box
return
ds
[
bb
]
def
morphology_row_features
(
mask
,
scale
):
# Calculate stats from skimage
ski_morph
=
regionprops
(
mask
.
astype
(
'
uint8
'
))
# volume in pixels
volume_in_pix
=
ski_morph
[
0
][
'
area
'
]
# extent
extent
=
ski_morph
[
0
][
'
extent
'
]
# The mesh calculation below fails if an edge of the segmentation is right up against the
# edge of the volume - gives an open, rather than a closed surface
# Pad by a few pixels to avoid this
mask
=
pad
(
mask
,
10
,
mode
=
'
constant
'
)
# surface area of mesh around object (other ways to calculate better?)
verts
,
faces
,
normals
,
values
=
marching_cubes_lewiner
(
mask
,
spacing
=
tuple
(
scale
))
surface_area
=
mesh_surface_area
(
verts
,
faces
)
# volume in microns
volume_in_microns
=
np
.
prod
(
scale
)
*
volume_in_pix
# sphericity (as in morpholibj)
# Should run from zero to one
sphericity
=
(
36
*
np
.
pi
*
(
float
(
volume_in_microns
)
**
2
))
/
(
float
(
surface_area
)
**
3
)
return
[
volume_in_microns
,
extent
,
surface_area
,
sphericity
]
def
intensity_row_features
(
raw
,
mask
):
intensity_vals_in_mask
=
raw
[
mask
]
# mean and stdev - use float64 to avoid silent overflow errors
mean_intensity
=
np
.
mean
(
intensity_vals_in_mask
,
dtype
=
np
.
float64
)
st_dev
=
np
.
std
(
intensity_vals_in_mask
,
dtype
=
np
.
float64
)
return
mean_intensity
,
st_dev
# compute morphology (and intensity features) for label range
def
morphology_features_for_label_range
(
table
,
ds
,
ds_raw
,
scale_factor_seg
,
scale_factor_raw
,
label_begin
,
label_end
):
label_range
=
np
.
logical_and
(
table
[
'
label_id
'
]
>=
label_begin
,
table
[
'
label_id
'
]
<
label_end
)
sub_table
=
table
.
loc
[
label_range
,
:]
stats
=
[]
for
row
in
sub_table
.
itertuples
(
index
=
False
):
label_id
=
int
(
row
.
label_id
)
# load the segmentation data from the bounding box corresponding
# to this row
seg
=
load_data
(
ds
,
row
,
scale_factor_seg
)
# compute the segmentation mask and check that we have
# foreground in the mask
seg_mask
=
seg
==
label_id
if
seg_mask
.
sum
()
==
0
:
# if the seg mask is empty, we simply skip this label-id
continue
# compute the morphology features from the segmentation mask
result
=
[
float
(
label_id
)]
+
morphology_row_features
(
seg_mask
,
scale_factor_seg
)
# compute the intensiry features from raw data and segmentation mask
if
ds_raw
is
not
None
:
raw
=
load_data
(
ds_raw
,
row
,
scale_factor_raw
)
# resize the segmentation mask if it does not fit the raw data
if
seg_mask
.
shape
!=
raw
.
shape
:
seg_mask
=
resize
(
seg_mask
,
raw
.
shape
,
order
=
0
,
mode
=
'
reflect
'
,
anti_aliasing
=
True
,
preserve_range
=
True
).
astype
(
'
bool
'
)
result
+=
intensity_row_features
(
raw
,
seg_mask
)
stats
.
append
(
result
)
return
stats
def
compute_morphology_features
(
table
,
segmentation_path
,
raw_path
,
seg_key
,
raw_key
,
scale_factor_seg
,
scale_factor_raw
,
blocking
,
block_list
):
if
raw_path
!=
''
:
assert
raw_key
is
not
None
and
scale_factor_raw
is
not
None
f_raw
=
vu
.
file_reader
(
raw_path
,
'
r
'
)
ds_raw
=
f_raw
[
raw_key
]
else
:
f_raw
=
ds_raw
=
None
with
vu
.
file_reader
(
segmentation_path
,
'
r
'
)
as
f
:
ds
=
f
[
seg_key
]
stats
=
[]
for
block_id
in
block_list
:
block
=
blocking
.
getBlock
(
block_id
)
label_a
,
label_b
=
block
.
begin
[
0
],
block
.
end
[
0
]
fu
.
log
(
"
Computing features from label-id %i to %i
"
%
(
label_a
,
label_b
))
stats
.
extend
(
morphology_features_for_label_range
(
table
,
ds
,
ds_raw
,
scale_factor_seg
,
scale_factor_raw
,
label_a
,
label_b
))
if
f_raw
is
not
None
:
f_raw
.
close
()
# convert to pandas table and add column names
stats
=
pd
.
DataFrame
(
stats
)
columns
=
[
'
label_id
'
,
'
shape_volume_in_microns
'
,
'
shape_extent
'
,
'
shape_surface_area
'
,
'
shape_sphericity
'
]
if
raw_path
!=
''
:
columns
+=
[
'
intensity_mean
'
,
'
intensity_st_dev
'
]
stats
.
columns
=
columns
return
stats
def
morphology
(
job_id
,
config_path
):
fu
.
log
(
"
start processing job %i
"
%
job_id
)
...
...
@@ -294,49 +131,22 @@ def morphology(job_id, config_path):
block_list
=
config
[
'
block_list
'
]
# keys to segmentation and raw data for the different scales
seg_key_full
=
'
t00000/s00/0/cells
'
seg_key
=
'
t00000/s00/%i/cells
'
%
seg_scale
raw_key_full
=
'
t00000/s00/0/cells
'
raw_key
=
'
t00000/s00/%i/cells
'
%
raw_scale
# get scale factor for the segmentation
scale_factor_seg
=
get_scale_factor
(
segmentation_path
,
seg_key_full
,
seg_key
,
resolution
)
# get scale factor for raw data (if it's given)
if
raw_path
!=
''
:
fu
.
log
(
"
Have raw path; compute intensity features
"
)
# NOTE for now we can hard-code the resolution for the raw data here,
# but we might need to change this if we get additional dataset(s)
raw_resolution
=
[
0.025
,
0.01
,
0.01
]
scale_factor_raw
=
get_scale_factor
(
raw_path
,
raw_key_full
,
raw_key
,
raw_resolution
)
else
:
fu
.
log
(
"
Don
'
t have raw path; do not compute intensity features
"
)
raw_resolution
=
scale_factor_raw
=
None
# read the base table
table
=
pd
.
read_csv
(
in_table_path
,
sep
=
'
\t
'
)
n_labels
=
table
.
shape
[
0
]
fu
.
log
(
"
Initial number of labels: %i
"
%
n_labels
)
# remove zero label if it exists
table
=
table
.
loc
[
table
[
'
label_id
'
]
!=
0
,
:]
# if we have a mappin, only keep objects in the mapping
# (i.e cells that have assigned nuclei)
if
mapping_path
!=
''
:
fu
.
log
(
"
Have mapping path %s
"
%
mapping_path
)
table
=
filter_table_from_mapping
(
table
,
mapping_path
)
fu
.
log
(
"
Number of labels after filter with mapping: %i
"
%
table
.
shape
[
0
])
# filter by size
table
=
filter_table
(
table
,
min_size
,
max_size
)
fu
.
log
(
"
Number of labels after size filte: %i
"
%
table
.
shape
[
0
])
# get the label ranges for this job
n_labels
=
table
.
shape
[
0
]
blocking
=
nt
.
blocking
([
0
],
[
n_labels
],
[
1000
])
fu
.
log
(
"
Computing morphology features
"
)
stats
=
compute_morphology_features
(
table
,
segmentation_path
,
raw_path
,
seg_key
,
raw_key
,
scale_factor_seg
,
scale_factor_raw
,
blocking
,
block_list
)
label_starts
,
label_stops
=
[],
[]
for
block_id
in
block_list
:
block
=
blocking
.
getBlock
(
block_id
)
label_starts
.
append
(
block
.
begin
)
label_stops
.
append
(
block
.
end
)
stats
=
morphology_impl
(
segmentation_path
,
raw_path
,
table
,
mapping_path
,
min_size
,
max_size
,
resolution
,
raw_scale
,
seg_scale
,
label_starts
,
label_stops
)
output_path
=
output_prefix
+
'
_job%i.csv
'
%
job_id
fu
.
log
(
"
Save result to %s
"
%
output_path
)
...
...
This diff is collapsed.
Click to expand it.
scripts/extension/attributes/morphology_impl.py
0 → 100644
+
238
−
0
View file @
7fe3e58d
#! /bin/python
# this is a task called by multiple processes,
# so we need to restrict the number of threads used by numpy
from
cluster_tools.utils.numpy_utils
import
set_numpy_threads
set_numpy_threads
(
1
)
import
numpy
as
np
import
h5py
import
pandas
as
pd
from
skimage.measure
import
regionprops
,
marching_cubes_lewiner
,
mesh_surface_area
from
skimage.transform
import
resize
from
skimage.util
import
pad
# get shape of full data & downsampling factor
def
get_scale_factor
(
path
,
key_full
,
key
,
resolution
):
with
h5py
.
File
(
path
,
'
r
'
)
as
f
:
full_shape
=
f
[
key_full
].
shape
shape
=
f
[
key
].
shape
# scale factor for downsampling
scale_factor
=
[
res
*
(
fs
/
sh
)
for
res
,
fs
,
sh
in
zip
(
resolution
,
full_shape
,
shape
)]
return
scale_factor
def
filter_table
(
table
,
min_size
,
max_size
):
if
max_size
is
None
:
table
=
table
.
loc
[
table
[
'
n_pixels
'
]
>=
min_size
,
:]
else
:
criteria
=
np
.
logical_and
(
table
[
'
n_pixels
'
]
>
min_size
,
table
[
'
n_pixels
'
]
<
max_size
)
table
=
table
.
loc
[
criteria
,
:]
return
table
def
filter_table_from_mapping
(
table
,
mapping_path
):
# read in numpy array of mapping of cells to nuclei - first column cell id, second nucleus id
mapping
=
np
.
genfromtxt
(
mapping_path
,
skip_header
=
1
,
delimiter
=
'
\t
'
)[:,
:
2
].
astype
(
'
uint64
'
)
# remove zero labels from this table too, if exist
mapping
=
mapping
[
np
.
logical_and
(
mapping
[:,
0
]
!=
0
,
mapping
[:,
1
]
!=
0
)]
table
=
table
.
loc
[
np
.
isin
(
table
[
'
label_id
'
],
mapping
[:,
0
]),
:]
return
table
def
load_data
(
ds
,
row
,
scale
):
# compute the bounding box from the row information
mins
=
[
row
.
bb_min_z
,
row
.
bb_min_y
,
row
.
bb_min_x
]
maxs
=
[
row
.
bb_max_z
,
row
.
bb_max_y
,
row
.
bb_max_x
]
mins
=
[
int
(
mi
/
sca
)
for
mi
,
sca
in
zip
(
mins
,
scale
)]
maxs
=
[
int
(
ma
/
sca
)
+
1
for
ma
,
sca
in
zip
(
maxs
,
scale
)]
bb
=
tuple
(
slice
(
mi
,
ma
)
for
mi
,
ma
in
zip
(
mins
,
maxs
))
# load the data from the bounding box
return
ds
[
bb
]
def
morphology_row_features
(
mask
,
scale
):
# Calculate stats from skimage
ski_morph
=
regionprops
(
mask
.
astype
(
'
uint8
'
))
# volume in pixels
volume_in_pix
=
ski_morph
[
0
][
'
area
'
]
# extent
extent
=
ski_morph
[
0
][
'
extent
'
]
# The mesh calculation below fails if an edge of the segmentation is right up against the
# edge of the volume - gives an open, rather than a closed surface
# Pad by a few pixels to avoid this
mask
=
pad
(
mask
,
10
,
mode
=
'
constant
'
)
# surface area of mesh around object (other ways to calculate better?)
verts
,
faces
,
normals
,
values
=
marching_cubes_lewiner
(
mask
,
spacing
=
tuple
(
scale
))
surface_area
=
mesh_surface_area
(
verts
,
faces
)
# volume in microns
volume_in_microns
=
np
.
prod
(
scale
)
*
volume_in_pix
# sphericity (as in morpholibj)
# Should run from zero to one
sphericity
=
(
36
*
np
.
pi
*
(
float
(
volume_in_microns
)
**
2
))
/
(
float
(
surface_area
)
**
3
)
return
[
volume_in_microns
,
extent
,
surface_area
,
sphericity
]
def
intensity_row_features
(
raw
,
mask
):
intensity_vals_in_mask
=
raw
[
mask
]
# mean and stdev - use float64 to avoid silent overflow errors
mean_intensity
=
np
.
mean
(
intensity_vals_in_mask
,
dtype
=
np
.
float64
)
st_dev
=
np
.
std
(
intensity_vals_in_mask
,
dtype
=
np
.
float64
)
return
mean_intensity
,
st_dev
# compute morphology (and intensity features) for label range
def
morphology_features_for_label_range
(
table
,
ds
,
ds_raw
,
scale_factor_seg
,
scale_factor_raw
,
label_begin
,
label_end
):
label_range
=
np
.
logical_and
(
table
[
'
label_id
'
]
>=
label_begin
,
table
[
'
label_id
'
]
<
label_end
)
sub_table
=
table
.
loc
[
label_range
,
:]
stats
=
[]
for
row
in
sub_table
.
itertuples
(
index
=
False
):
label_id
=
int
(
row
.
label_id
)
# load the segmentation data from the bounding box corresponding
# to this row
seg
=
load_data
(
ds
,
row
,
scale_factor_seg
)
# compute the segmentation mask and check that we have
# foreground in the mask
seg_mask
=
seg
==
label_id
if
seg_mask
.
sum
()
==
0
:
# if the seg mask is empty, we simply skip this label-id
continue
# compute the morphology features from the segmentation mask
result
=
[
float
(
label_id
)]
+
morphology_row_features
(
seg_mask
,
scale_factor_seg
)
# compute the intensiry features from raw data and segmentation mask
if
ds_raw
is
not
None
:
raw
=
load_data
(
ds_raw
,
row
,
scale_factor_raw
)
# resize the segmentation mask if it does not fit the raw data
if
seg_mask
.
shape
!=
raw
.
shape
:
seg_mask
=
resize
(
seg_mask
,
raw
.
shape
,
order
=
0
,
mode
=
'
reflect
'
,
anti_aliasing
=
True
,
preserve_range
=
True
).
astype
(
'
bool
'
)
result
+=
intensity_row_features
(
raw
,
seg_mask
)
stats
.
append
(
result
)
return
stats
def
compute_morphology_features
(
table
,
segmentation_path
,
raw_path
,
seg_key
,
raw_key
,
scale_factor_seg
,
scale_factor_raw
,
label_starts
,
label_stops
):
if
raw_path
!=
''
:
assert
raw_key
is
not
None
and
scale_factor_raw
is
not
None
f_raw
=
h5py
.
File
(
raw_path
,
'
r
'
)
ds_raw
=
f_raw
[
raw_key
]
else
:
f_raw
=
ds_raw
=
None
with
h5py
.
File
(
segmentation_path
,
'
r
'
)
as
f
:
ds
=
f
[
seg_key
]
stats
=
[]
for
label_a
,
label_b
in
zip
(
label_starts
,
label_stops
):
# fu.log("Computing features from label-id %i to %i" % (label_a, label_b))
stats
.
extend
(
morphology_features_for_label_range
(
table
,
ds
,
ds_raw
,
scale_factor_seg
,
scale_factor_raw
,
label_a
,
label_b
))
if
f_raw
is
not
None
:
f_raw
.
close
()
# convert to pandas table and add column names
stats
=
pd
.
DataFrame
(
stats
)
columns
=
[
'
label_id
'
,
'
shape_volume_in_microns
'
,
'
shape_extent
'
,
'
shape_surface_area
'
,
'
shape_sphericity
'
]
if
raw_path
!=
''
:
columns
+=
[
'
intensity_mean
'
,
'
intensity_st_dev
'
]
stats
.
columns
=
columns
return
stats
def
morphology_impl
(
segmentation_path
,
raw_path
,
table
,
mapping_path
,
min_size
,
max_size
,
resolution
,
raw_scale
,
seg_scale
,
label_starts
,
label_stops
):
"""
Compute morphology features for a segmentation.
Can compute features for multiple label ranges. If you want to
compute features for the full label range, pass
'
label_starts=[0]
'
and
'
label_stops=[number_of_labels]
'
Arguments:
segmentation_path [str] - path to segmentation stored as h5.
raw_path [str] - path to raw data stored as h5.
Pass
'
None
'
if you don
'
t want to compute features based on raw data.
table [pd.DataFrame] - table with default attributes
(sizes, center of mass and bounding boxes) for segmentation
mapping_path [str] - path to - path to nucleus id mapping.
Pass
'
None
'
if not relevant.
min_size [int] - minimal size for objects used in calcualtion
max_size [int] - maximum size for objects used in calcualtion
resolution [listlike] - resolution in nanometer.
Must be given in [Z, Y, X].
raw_scale [int] - scale level of the raw data
seg_scale [int] - scale level of the segmentation
label_starts [listlike] - list with label start positions
label_stops [listlike] - list with label stop positions
"""
# keys to segmentation and raw data for the different scales
seg_key_full
=
'
t00000/s00/0/cells
'
seg_key
=
'
t00000/s00/%i/cells
'
%
seg_scale
raw_key_full
=
'
t00000/s00/0/cells
'
raw_key
=
'
t00000/s00/%i/cells
'
%
raw_scale
# get scale factor for the segmentation
scale_factor_seg
=
get_scale_factor
(
segmentation_path
,
seg_key_full
,
seg_key
,
resolution
)
# get scale factor for raw data (if it's given)
if
raw_path
!=
''
:
# fu.log("Have raw path; compute intensity features")
# NOTE for now we can hard-code the resolution for the raw data here,
# but we might need to change this if we get additional dataset(s)
raw_resolution
=
[
0.025
,
0.01
,
0.01
]
scale_factor_raw
=
get_scale_factor
(
raw_path
,
raw_key_full
,
raw_key
,
raw_resolution
)
else
:
# fu.log("Don't have raw path; do not compute intensity features")
raw_resolution
=
scale_factor_raw
=
None
# remove zero label if it exists
table
=
table
.
loc
[
table
[
'
label_id
'
]
!=
0
,
:]
# if we have a mappin, only keep objects in the mapping
# (i.e cells that have assigned nuclei)
if
mapping_path
!=
''
:
# fu.log("Have mapping path %s" % mapping_path)
table
=
filter_table_from_mapping
(
table
,
mapping_path
)
# fu.log("Number of labels after filter with mapping: %i" % table.shape[0])
# filter by size
table
=
filter_table
(
table
,
min_size
,
max_size
)
# fu.log("Number of labels after size filte: %i" % table.shape[0])
# fu.log("Computing morphology features")
stats
=
compute_morphology_features
(
table
,
segmentation_path
,
raw_path
,
seg_key
,
raw_key
,
scale_factor_seg
,
scale_factor_raw
,
label_starts
,
label_stops
)
return
stats
if
__name__
==
'
__main__
'
:
pass
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment