Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
P
platy-browser-data
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Christian Tischer
platy-browser-data
Commits
53ebc4d7
Commit
53ebc4d7
authored
5 years ago
by
Constantin Pape
Browse files
Options
Downloads
Plain Diff
Merge branch 'morphology' into 'master'
Morphology See merge request
!9
parents
ce4192cb
25c62706
No related branches found
Branches containing commit
No related tags found
1 merge request
!9
Morphology
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
.gitignore
+2
-1
2 additions, 1 deletion
.gitignore
scripts/extension/attributes/morphology_impl.py
+437
-88
437 additions, 88 deletions
scripts/extension/attributes/morphology_impl.py
with
439 additions
and
89 deletions
.gitignore
+
2
−
1
View file @
53ebc4d7
*.h5
__pycache__/
tmp*
.idea/*
*.DS_Store
data/dev-*
*.tif
*.swp
software
*.zip
*.n5
*.n5
\ No newline at end of file
This diff is collapsed.
Click to expand it.
scripts/extension/attributes/morphology_impl.py
+
437
−
88
View file @
53ebc4d7
...
...
@@ -4,13 +4,17 @@ from datetime import datetime
# so we need to restrict the number of threads used by numpy
from
elf.util
import
set_numpy_threads
set_numpy_threads
(
1
)
import
numpy
as
np
import
vigra
import
h5py
import
pandas
as
pd
from
skimage.measure
import
regionprops
,
marching_cubes_lewiner
,
mesh_surface_area
from
skimage.transform
import
resize
from
skimage.util
import
pad
from
scipy.ndimage.morphology
import
distance_transform_edt
from
mahotas.features
import
haralick
from
skimage.morphology
import
label
,
remove_small_objects
def
log
(
msg
):
...
...
@@ -40,13 +44,74 @@ def filter_table(table, min_size, max_size):
return
table
# some cell segmentations have a sensible total pixel size but very large bounding boxes i.e. they are small spots
# distributed over a large region, not one cell > filter for these cases by capping the bounding box size
def
filter_table_bb
(
table
,
max_bb
):
total_bb
=
(
table
[
'
bb_max_z
'
]
-
table
[
'
bb_min_z
'
])
*
(
table
[
'
bb_max_y
'
]
-
table
[
'
bb_min_y
'
])
*
(
table
[
'
bb_max_x
'
]
-
table
[
'
bb_min_x
'
])
table
=
table
.
loc
[
total_bb
<
max_bb
,
:]
return
table
def
filter_table_from_mapping
(
table
,
mapping_path
):
# read in numpy array of mapping of cells to nuclei - first column cell id, second nucleus id
mapping
=
np
.
genfromtxt
(
mapping_path
,
skip_header
=
1
,
delimiter
=
'
\t
'
)[:,
:
2
].
astype
(
'
uint64
'
)
mapping
=
pd
.
read_csv
(
mapping_path
,
sep
=
'
\t
'
)
# remove zero labels from this table too, if exist
mapping
=
mapping
[
np
.
logical_and
(
mapping
[:,
0
]
!=
0
,
mapping
[:,
1
]
!=
0
)]
table
=
table
.
loc
[
np
.
isin
(
table
[
'
label_id
'
],
mapping
[:,
0
]),
:]
mapping
=
mapping
.
loc
[
np
.
logical_and
(
mapping
.
iloc
[:,
0
]
!=
0
,
mapping
.
iloc
[:,
1
]
!=
0
),
:]
table
=
table
.
loc
[
np
.
isin
(
table
[
'
label_id
'
],
mapping
[
'
label_id
'
]),
:]
# add a column for the 'nucleus_id' of the mapped nucleus (use this later to exclude the area covered
# by the nucleus)
table
=
table
.
join
(
mapping
.
set_index
(
'
label_id
'
),
on
=
'
label_id
'
,
how
=
'
left
'
)
return
table
# regions here are regions to exclude
def
filter_table_region
(
table
,
region_path
,
regions
=
(
'
empty
'
,
'
yolk
'
,
'
neuropil
'
,
'
cuticle
'
)):
region_mapping
=
pd
.
read_csv
(
region_path
,
sep
=
'
\t
'
)
# remove zero label if it exists
region_mapping
=
region_mapping
.
loc
[
region_mapping
[
'
label_id
'
]
!=
0
,
:]
for
region
in
regions
:
region_mapping
=
region_mapping
.
loc
[
region_mapping
[
region
]
==
0
,
:]
table
=
table
.
loc
[
np
.
isin
(
table
[
'
label_id
'
],
region_mapping
[
'
label_id
'
]),
:]
return
table
def
run_all_filters
(
table
,
min_size
,
max_size
,
max_bb
,
mapping_path
,
region_mapping_path
):
# remove zero label if present
table
=
table
.
loc
[
table
[
'
label_id
'
]
!=
0
,
:]
# filter to only keep cells with assigned nuclei
if
mapping_path
is
not
None
:
log
(
"
Have mapping path %s
"
%
mapping_path
)
table
=
filter_table_from_mapping
(
table
,
mapping_path
)
log
(
"
Number of labels after filter with mapping: %i
"
%
table
.
shape
[
0
])
# filter to exclude certain regions
if
region_mapping_path
is
not
None
:
log
(
"
Have region mapping path %s
"
%
region_mapping_path
)
table
=
filter_table_region
(
table
,
region_mapping_path
)
log
(
"
Number of labels after region filter: %i
"
%
table
.
shape
[
0
])
# filter by size of object (no. pixels)
if
min_size
is
not
None
or
max_size
is
not
None
:
table
=
filter_table
(
table
,
min_size
,
max_size
)
log
(
"
Number of labels after size filter: %i
"
%
table
.
shape
[
0
])
# filter by bounding box size
if
max_bb
is
not
None
:
table
=
filter_table_bb
(
table
,
max_bb
)
log
(
"
Number of labels after bounding box size filter %i
"
%
table
.
shape
[
0
])
return
table
...
...
@@ -61,16 +126,53 @@ def load_data(ds, row, scale):
return
ds
[
bb
]
def
morphology_row_features
(
mask
,
scale
):
def
generate_column_names
(
raw_path
,
chromatin_path
,
exclude_path
):
columns
=
[
'
label_id
'
]
morph_columns
=
[
'
shape_volume_in_microns
'
,
'
shape_extent
'
,
'
shape_equiv_diameter
'
,
'
shape_major_axis
'
,
'
shape_minor_axis
'
,
'
shape_surface_area
'
,
'
shape_sphericity
'
,
'
shape_max_radius
'
]
columns
+=
morph_columns
if
raw_path
is
not
None
:
intensity_columns
=
[
'
intensity_mean
'
,
'
intensity_st_dev
'
,
'
intensity_median
'
,
'
intensity_iqr
'
,
'
intensity_total
'
]
texture_columns
=
[
'
texture_hara%s
'
%
x
for
x
in
range
(
1
,
14
)]
columns
+=
intensity_columns
if
exclude_path
is
None
:
# radial intensity columns
for
val
in
[
25
,
50
,
75
,
100
]:
columns
+=
[
'
%s_%s
'
%
(
var
,
val
)
for
var
in
intensity_columns
]
columns
+=
texture_columns
if
chromatin_path
is
not
None
:
edt_columns
=
[
'
shape_edt_mean
'
,
'
shape_edt_stdev
'
,
'
shape_edt_median
'
,
'
shape_edt_iqr
'
]
edt_columns
+=
[
'
shape_percent_%s
'
%
var
for
var
in
[
25
,
50
,
75
,
100
]]
for
phase
in
[
'
_het
'
,
'
_eu
'
]:
columns
+=
[
var
+
phase
for
var
in
morph_columns
]
columns
+=
[
var
+
phase
for
var
in
edt_columns
]
if
raw_path
is
not
None
:
columns
+=
[
var
+
phase
for
var
in
intensity_columns
]
columns
+=
[
var
+
phase
for
var
in
texture_columns
]
return
columns
def
morphology_row_features
(
mask
,
scale
):
# Calculate stats from skimage
ski_morph
=
regionprops
(
mask
.
astype
(
'
uint8
'
))
# volume in pixels
volume_in_pix
=
ski_morph
[
0
][
'
area
'
]
# extent
volume_in_microns
=
np
.
prod
(
scale
)
*
volume_in_pix
extent
=
ski_morph
[
0
][
'
extent
'
]
equiv_diameter
=
ski_morph
[
0
][
'
equivalent_diameter
'
]
major_axis
=
ski_morph
[
0
][
'
major_axis_length
'
]
minor_axis
=
ski_morph
[
0
][
'
minor_axis_length
'
]
# The mesh calculation below fails if an edge of the segmentation is right up against the
# edge of the volume - gives an open, rather than a closed surface
...
...
@@ -81,14 +183,16 @@ def morphology_row_features(mask, scale):
verts
,
faces
,
normals
,
values
=
marching_cubes_lewiner
(
mask
,
spacing
=
tuple
(
scale
))
surface_area
=
mesh_surface_area
(
verts
,
faces
)
# volume in microns
volume_in_microns
=
np
.
prod
(
scale
)
*
volume_in_pix
# sphericity (as in morpholibj)
# Should run from zero to one
sphericity
=
(
36
*
np
.
pi
*
(
float
(
volume_in_microns
)
**
2
))
/
(
float
(
surface_area
)
**
3
)
sphericity
=
(
36
*
np
.
pi
*
(
float
(
volume_in_microns
)
**
2
))
/
(
float
(
surface_area
)
**
3
)
return
[
volume_in_microns
,
extent
,
surface_area
,
sphericity
]
# max radius = max distance from pixel to outside
edt
=
distance_transform_edt
(
mask
,
sampling
=
scale
,
return_distances
=
True
)
max_radius
=
np
.
max
(
edt
)
return
(
volume_in_microns
,
extent
,
equiv_diameter
,
major_axis
,
minor_axis
,
surface_area
,
sphericity
,
max_radius
)
def
intensity_row_features
(
raw
,
mask
):
...
...
@@ -96,17 +200,108 @@ def intensity_row_features(raw, mask):
# mean and stdev - use float64 to avoid silent overflow errors
mean_intensity
=
np
.
mean
(
intensity_vals_in_mask
,
dtype
=
np
.
float64
)
st_dev
=
np
.
std
(
intensity_vals_in_mask
,
dtype
=
np
.
float64
)
return
mean_intensity
,
st_dev
median_intensity
=
np
.
median
(
intensity_vals_in_mask
)
quartile_75
,
quartile_25
=
np
.
percentile
(
intensity_vals_in_mask
,
[
75
,
25
])
interquartile_range_intensity
=
quartile_75
-
quartile_25
total
=
np
.
sum
(
intensity_vals_in_mask
,
dtype
=
np
.
float64
)
return
mean_intensity
,
st_dev
,
median_intensity
,
interquartile_range_intensity
,
total
def
radial_intensity_row_features
(
raw
,
mask
,
scale
,
stops
=
(
0.0
,
0.25
,
0.5
,
0.75
,
1.0
)):
result
=
()
edt
=
distance_transform_edt
(
mask
,
sampling
=
scale
,
return_distances
=
True
)
edt
=
edt
/
np
.
max
(
edt
)
bottoms
=
stops
[
0
:
len
(
stops
)
-
1
]
tops
=
stops
[
1
:]
radial_masks
=
[
np
.
logical_and
(
edt
>
b
,
edt
<=
t
)
for
b
,
t
in
zip
(
bottoms
,
tops
)]
for
m
in
radial_masks
:
result
+=
intensity_row_features
(
raw
,
m
)
return
result
def
texture_row_features
(
raw
,
mask
):
# errors if there are small, isolated spots (because I'm using ignore zeros as true)
# so here remove components that are < 10 pixels
# may still error in some cases
labelled
=
label
(
mask
)
if
len
(
np
.
unique
(
labelled
))
>
2
:
labelled
=
remove_small_objects
(
labelled
,
min_size
=
10
)
mask
=
labelled
!=
0
mask
=
mask
.
astype
(
'
uint8
'
)
# set regions outside mask to zero
raw_copy
=
raw
.
copy
()
raw_copy
[
mask
==
0
]
=
0
try
:
hara
=
haralick
(
raw_copy
,
ignore_zeros
=
True
,
return_mean
=
True
,
distance
=
2
)
except
ValueError
:
log
(
'
Texture computation failed - can happen when using ignore_zeros
'
)
hara
=
(
0.
,)
*
13
return
tuple
(
hara
)
def
radial_distribution
(
edt
,
mask
,
stops
=
(
0.0
,
0.25
,
0.5
,
0.75
,
1.0
)):
result
=
()
bottoms
=
stops
[
0
:
len
(
stops
)
-
1
]
tops
=
stops
[
1
:]
radial_masks
=
[
np
.
logical_and
(
edt
>
b
,
edt
<=
t
)
for
b
,
t
in
zip
(
bottoms
,
tops
)]
for
m
in
radial_masks
:
# percent of that zone that is covered by the mask
result
+=
(
np
.
sum
(
mask
[
m
])
/
np
.
sum
(
m
),)
return
result
def
chromatin_row_features
(
chromatin
,
edt
,
raw
,
scale_chromatin
):
result
=
()
result
+=
morphology_row_features
(
chromatin
,
scale_chromatin
)
# edt stats i.e. stats on distribution of chromatin, dropping the total value
result
+=
intensity_row_features
(
edt
,
chromatin
)[:
-
1
]
result
+=
radial_distribution
(
edt
,
chromatin
)
if
raw
is
not
None
:
# resize the chromatin masks if not same size as raw
chromatin_type
=
chromatin
.
dtype
if
chromatin
.
shape
!=
raw
.
shape
:
chromatin
=
chromatin
.
astype
(
'
float32
'
)
chromatin
=
vigra
.
sampling
.
resize
(
chromatin
,
shape
=
raw
.
shape
,
order
=
0
)
chromatin
=
chromatin
.
astype
(
chromatin_type
)
result
+=
intensity_row_features
(
raw
,
chromatin
)
result
+=
texture_row_features
(
raw
,
chromatin
)
return
result
# compute morphology (and intensity features) for label range
def
morphology_features_for_label_range
(
table
,
ds
,
ds_raw
,
ds_chromatin
,
ds_exclude
,
scale_factor_seg
,
scale_factor_raw
,
scale_factor_chromatin
,
scale_factor_exclude
,
label_begin
,
label_end
):
label_range
=
np
.
logical_and
(
table
[
'
label_id
'
]
>=
label_begin
,
table
[
'
label_id
'
]
<
label_end
)
sub_table
=
table
.
loc
[
label_range
,
:]
stats
=
[]
for
row
in
sub_table
.
itertuples
(
index
=
False
):
log
(
str
(
row
.
label_id
))
label_id
=
int
(
row
.
label_id
)
# load the segmentation data from the bounding box corresponding
...
...
@@ -121,120 +316,274 @@ def morphology_features_for_label_range(table, ds, ds_raw,
continue
# compute the morphology features from the segmentation mask
result
=
[
float
(
label_id
)]
+
morphology_row_features
(
seg_mask
,
scale_factor_seg
)
result
=
(
float
(
label_id
),)
+
morphology_row_features
(
seg_mask
,
scale_factor_seg
)
if
ds_exclude
is
not
None
:
exclude
=
load_data
(
ds_exclude
,
row
,
scale_factor_exclude
)
exclude_type
=
exclude
.
dtype
# resize to fit seg
if
exclude
.
shape
!=
seg_mask
.
shape
:
exclude
=
exclude
.
astype
(
'
float32
'
)
exclude
=
vigra
.
sampling
.
resize
(
exclude
,
shape
=
seg_mask
.
shape
,
order
=
0
)
exclude
=
exclude
.
astype
(
exclude_type
)
# compute the intensiry features from raw data and segmentation mask
# binary for correct nucleus
exclude
=
exclude
==
int
(
row
.
nucleus_id
)
# remove nucleus area form seg_mask
seg_mask
[
exclude
]
=
False
# compute the intensity features from raw data and segmentation mask
if
ds_raw
is
not
None
:
raw
=
load_data
(
ds_raw
,
row
,
scale_factor_raw
)
# resize the segmentation mask if it does not fit the raw data
seg_mask_type
=
seg_mask
.
dtype
if
seg_mask
.
shape
!=
raw
.
shape
:
seg_mask
=
resize
(
seg_mask
,
raw
.
shape
,
order
=
0
,
mode
=
'
reflect
'
,
anti_aliasing
=
True
,
preserve_range
=
True
).
astype
(
'
bool
'
)
seg_mask
=
seg_mask
.
astype
(
'
float32
'
)
seg_mask
=
vigra
.
sampling
.
resize
(
seg_mask
,
shape
=
raw
.
shape
,
order
=
0
)
seg_mask
=
seg_mask
.
astype
(
seg_mask_type
)
result
+=
intensity_row_features
(
raw
,
seg_mask
)
# doesn't make sense to run the radial intensity if the nucleus area is being excluded, as the euclidean
# distance transform then gives distance from the outside & nuclear surface - hard to interpret
if
ds_exclude
is
None
:
result
+=
radial_intensity_row_features
(
raw
,
seg_mask
,
scale_factor_raw
)
result
+=
texture_row_features
(
raw
,
seg_mask
)
if
ds_chromatin
is
not
None
:
chromatin
=
load_data
(
ds_chromatin
,
row
,
scale_factor_chromatin
)
# set to 1 (heterochromatin), 2 (euchromatin)
heterochromatin
=
chromatin
==
label_id
+
12000
euchromatin
=
chromatin
==
label_id
# skip if no chromatin segmentation
total_heterochromatin
=
heterochromatin
.
sum
()
total_euchromatin
=
euchromatin
.
sum
()
if
total_heterochromatin
==
0
and
total_euchromatin
.
sum
()
==
0
:
continue
# euclidean distance transform for whole nucleus, normalised to run from 0 to 1
whole_nucleus
=
np
.
logical_or
(
heterochromatin
,
euchromatin
)
edt
=
distance_transform_edt
(
whole_nucleus
,
sampling
=
scale_factor_chromatin
,
return_distances
=
True
)
edt
=
edt
/
np
.
max
(
edt
)
if
ds_raw
is
None
:
raw
=
None
if
total_heterochromatin
!=
0
:
result
+=
chromatin_row_features
(
heterochromatin
,
edt
,
raw
,
scale_factor_chromatin
)
else
:
result
+=
(
0.
,)
*
36
if
total_euchromatin
!=
0
:
result
+=
chromatin_row_features
(
euchromatin
,
edt
,
raw
,
scale_factor_chromatin
)
else
:
result
+=
(
0.
,)
*
36
stats
.
append
(
result
)
return
stats
def
compute_morphology_features
(
table
,
segmentation_path
,
raw_path
,
seg_key
,
raw_key
,
scale_factor_seg
,
scale_factor_raw
,
label_starts
,
label_stops
):
def
morphology_impl_nucleus
(
nucleus_segmentation_path
,
raw_path
,
chromatin_path
,
table
,
min_size
,
max_size
,
max_bb
,
nucleus_resolution
,
chromatin_resolution
,
nucleus_seg_scale
,
raw_scale
,
chromatin_scale
,
label_starts
,
label_stops
):
"""
Compute morphology features for nucleus segmentation.
Can compute features for multiple label ranges. If you want to
compute features for the full label range, pass
'
label_starts=[0]
'
and
'
label_stops=[number_of_labels]
'
Arguments:
nucleus_segmentation_path [str] - path to nucleus segmentation data stored as h5.
raw_path [str] - path to raw data stored as h5.
Pass
'
None
'
if you don
'
t want to compute features based on raw data.
chromatin_path [str] - path to chromatin segmentation data stored as h5.
table [pd.DataFrame] - table with default attributes
(sizes, center of mass and bounding boxes) for segmentation
min_size [int] - minimal size for objects used in calculation
max_size [int] - maximum size for objects used in calculation
max_bb [int] - maximum total volume of bounding box for objects used in calculation
nucleus_resolution [listlike] - resolution in nanometer.
Must be given in [Z, Y, X].
chromatin_resolution [listlike] - resolution in nanometer.
Must be given in [Z, Y, X].
nucleus_seg_scale [int] - scale level of the segmentation.
raw_scale [int] - scale level of the raw data
chromatin_scale [int] - scale level of the segmentation.
label_starts [listlike] - list with label start positions
label_stops [listlike] - list with label stop positions
"""
# keys for the different scales
nucleus_seg_key_full
=
'
t00000/s00/0/cells
'
nucleus_seg_key
=
'
t00000/s00/%i/cells
'
%
nucleus_seg_scale
raw_key_full
=
'
t00000/s00/0/cells
'
raw_key
=
'
t00000/s00/%i/cells
'
%
raw_scale
chromatin_key_full
=
'
t00000/s00/0/cells
'
chromatin_key
=
'
t00000/s00/%i/cells
'
%
chromatin_scale
# filter table
table
=
run_all_filters
(
table
,
min_size
,
max_size
,
max_bb
,
None
,
None
)
if
raw_path
!=
''
:
assert
raw_key
is
not
None
and
scale_factor_raw
is
not
None
# get scale factors
if
raw_path
is
not
None
:
log
(
"
Have raw path; compute intensity features
"
)
# NOTE for now we can hard-code the resolution for the raw data here,
# but we might need to change this if we get additional dataset(s)
raw_resolution
=
[
0.025
,
0.01
,
0.01
]
scale_factor_raw
=
get_scale_factor
(
raw_path
,
raw_key_full
,
raw_key
,
raw_resolution
)
f_raw
=
h5py
.
File
(
raw_path
,
'
r
'
)
ds_raw
=
f_raw
[
raw_key
]
else
:
f_raw
=
ds_raw
=
None
log
(
"
Don
'
t have raw path; do not compute intensity features
"
)
scale_factor_raw
=
f_raw
=
ds_raw
=
None
if
chromatin_path
is
not
None
:
log
(
"
Have chromatin path; compute chromatin features
"
)
scale_factor_chromatin
=
get_scale_factor
(
chromatin_path
,
chromatin_key_full
,
chromatin_key
,
chromatin_resolution
)
f_chromatin
=
h5py
.
File
(
chromatin_path
,
'
r
'
)
ds_chromatin
=
f_chromatin
[
chromatin_key
]
else
:
log
(
"
Don
'
t have chromatin path; do not compute chromatin features
"
)
scale_factor_chromatin
=
f_chromatin
=
ds_chromatin
=
None
with
h5py
.
File
(
segmentation_path
,
'
r
'
)
as
f
:
ds
=
f
[
seg_key
]
log
(
"
Computing morphology features
"
)
scale_factor_nucleus_seg
=
get_scale_factor
(
nucleus_segmentation_path
,
nucleus_seg_key_full
,
nucleus_seg_key
,
nucleus_resolution
)
with
h5py
.
File
(
nucleus_segmentation_path
,
'
r
'
)
as
f
:
ds
=
f
[
nucleus_seg_key
]
stats
=
[]
for
label_a
,
label_b
in
zip
(
label_starts
,
label_stops
):
log
(
"
Computing features from label-id %i to %i
"
%
(
label_a
,
label_b
))
stats
.
extend
(
morphology_features_for_label_range
(
table
,
ds
,
ds_raw
,
scale_factor_seg
,
scale_factor_raw
,
ds_chromatin
,
None
,
scale_factor_nucleus_seg
,
scale_factor_raw
,
scale_factor_chromatin
,
None
,
label_a
,
label_b
))
if
f_raw
is
not
None
:
f_raw
.
close
()
for
var
in
f_raw
,
f_chromatin
:
if
var
is
not
None
:
var
.
close
()
# convert to pandas table and add column names
stats
=
pd
.
DataFrame
(
stats
)
columns
=
[
'
label_id
'
,
'
shape_volume_in_microns
'
,
'
shape_extent
'
,
'
shape_surface_area
'
,
'
shape_sphericity
'
]
if
raw_path
!=
''
:
columns
+=
[
'
intensity_mean
'
,
'
intensity_st_dev
'
]
stats
.
columns
=
columns
stats
.
columns
=
generate_column_names
(
raw_path
,
chromatin_path
,
None
)
return
stats
def
morphology_impl
(
segmentation_path
,
raw_path
,
table
,
mapping_path
,
min_size
,
max_size
,
resolution
,
raw_scale
,
seg_scale
,
label_starts
,
label_stops
):
"""
Compute morphology features for a segmentation.
Can compute features for multiple label ranges. If you want to
compute features for the full label range, pass
'
label_starts=[0]
'
and
'
label_stops=[number_of_labels]
'
Arguments:
segmentation_path [str] - path to segmentation stored as h5.
raw_path [str] - path to raw data stored as h5.
Pass
'
None
'
if you don
'
t want to compute features based on raw data.
table [pd.DataFrame] - table with default attributes
(sizes, center of mass and bounding boxes) for segmentation
mapping_path [str] - path to - path to nucleus id mapping.
Pass
'
None
'
if not relevant.
min_size [int] - minimal size for objects used in calcualtion
max_size [int] - maximum size for objects used in calcualtion
resolution [listlike] - resolution in nanometer.
Must be given in [Z, Y, X].
raw_scale [int] - scale level of the raw data
seg_scale [int] - scale level of the segmentation
label_starts [listlike] - list with label start positions
label_stops [listlike] - list with label stop positions
"""
# keys to segmentation and raw data for the different scales
seg_key_full
=
'
t00000/s00/0/cells
'
seg_key
=
'
t00000/s00/%i/cells
'
%
seg_scale
def
morphology_impl_cell
(
cell_segmentation_path
,
raw_path
,
nucleus_segmentation_path
,
table
,
mapping_path
,
region_mapping_path
,
min_size
,
max_size
,
max_bb
,
cell_resolution
,
nucleus_resolution
,
cell_seg_scale
,
raw_scale
,
nucleus_seg_scale
,
label_starts
,
label_stops
):
"""
Compute morphology features for cell segmentation.
Can compute features for multiple label ranges. If you want to
compute features for the full label range, pass
'
label_starts=[0]
'
and
'
label_stops=[number_of_labels]
'
Arguments:
cell_segmentation_path [str] - path to cell segmentation stored as h5.
raw_path [str] - path to raw data stored as h5.
Pass
'
None
'
if you don
'
t want to compute features based on raw data.
nucleus_segmentation_path [str] - path to nucleus segmentation data stored as h5.
Pass
'
None
'
if you don
'
t want to exclude the region covered by the nucleus from intensity &
texture calculations.
table [pd.DataFrame] - table with default attributes
(sizes, center of mass and bounding boxes) for segmentation
mapping_path [str] - path to - path to nucleus id mapping.
Pass
'
None
'
if not relevant.
region_mapping_path [str] - path to - path to cellid to region mapping
Pass
'
None
'
if not relevant
min_size [int] - minimal size for objects used in calculation
max_size [int] - maximum size for objects used in calculation
max_bb [int] - maximum total volume of bounding box for objects used in calculation
cell_resolution [listlike] - resolution in nanometer.
Must be given in [Z, Y, X].
nucleus_resolution [listlike] - resolution in nanometer.
Must be given in [Z, Y, X].
cell_seg_scale [int] - scale level of the segmentation
raw_scale [int] - scale level of the raw data
nucleus_seg_scale [int] - scale level of the segmentation.
label_starts [listlike] - list with label start positions
label_stops [listlike] - list with label stop positions
"""
# keys for the different scales
cell_seg_key_full
=
'
t00000/s00/0/cells
'
cell_seg_key
=
'
t00000/s00/%i/cells
'
%
cell_seg_scale
raw_key_full
=
'
t00000/s00/0/cells
'
raw_key
=
'
t00000/s00/%i/cells
'
%
raw_scale
nucleus_seg_key_full
=
'
t00000/s00/0/cells
'
nucleus_seg_key
=
'
t00000/s00/%i/cells
'
%
nucleus_seg_scale
#
get scale factor for the segmentation
scale_factor_seg
=
get_sc
al
e
_f
actor
(
segmentation_path
,
seg_key_full
,
seg_key
,
resolution
)
#
filter table
table
=
run_
al
l
_f
ilters
(
table
,
min_size
,
max_size
,
max_bb
,
mapping_path
,
region_mapping_path
)
# get scale factor
for raw data (if it's given)
if
raw_path
!=
''
:
# get scale factor
s
if
raw_path
is
not
None
:
log
(
"
Have raw path; compute intensity features
"
)
# NOTE for now we can hard-code the resolution for the raw data here,
# but we might need to change this if we get additional dataset(s)
raw_resolution
=
[
0.025
,
0.01
,
0.01
]
scale_factor_raw
=
get_scale_factor
(
raw_path
,
raw_key_full
,
raw_key
,
raw_resolution
)
f_raw
=
h5py
.
File
(
raw_path
,
'
r
'
)
ds_raw
=
f_raw
[
raw_key
]
else
:
log
(
"
Don
'
t have raw path; do not compute intensity features
"
)
raw_resolution
=
scale_factor_raw
=
None
scale_factor_raw
=
f_raw
=
ds_raw
=
None
if
nucleus_segmentation_path
is
not
None
:
log
(
"
Have nucleus path; exclude nucleus for intensity measures
"
)
scale_factor_nucleus
=
get_scale_factor
(
nucleus_segmentation_path
,
nucleus_seg_key_full
,
nucleus_seg_key
,
nucleus_resolution
)
f_nucleus
=
h5py
.
File
(
nucleus_segmentation_path
,
'
r
'
)
ds_nucleus
=
f_nucleus
[
nucleus_seg_key
]
else
:
log
(
"
Don
'
t have exclude path; don
'
t exclude nucleus area for intensity measures
"
)
scale_factor_nucleus
=
f_nucleus
=
ds_nucleus
=
None
# remove zero label if it exists
table
=
table
.
loc
[
table
[
'
label_id
'
]
!=
0
,
:]
log
(
"
Computing morphology features
"
)
scale_factor_cell_seg
=
get_scale_factor
(
cell_segmentation_path
,
cell_seg_key_full
,
cell_seg_key
,
cell_resolution
)
with
h5py
.
File
(
cell_segmentation_path
,
'
r
'
)
as
f
:
ds
=
f
[
cell_seg_key
]
# if we have a mappin, only keep objects in the mapping
# (i.e cells that have assigned nuclei)
if
mapping_path
!=
''
:
log
(
"
Have mapping path %s
"
%
mapping_path
)
table
=
filter_table_from_mapping
(
table
,
mapping_path
)
log
(
"
Number of labels after filter with mapping: %i
"
%
table
.
shape
[
0
])
# filter by size
table
=
filter_table
(
table
,
min_size
,
max_size
)
log
(
"
Number of labels after size filte: %i
"
%
table
.
shape
[
0
])
stats
=
[]
for
label_a
,
label_b
in
zip
(
label_starts
,
label_stops
):
log
(
"
Computing features from label-id %i to %i
"
%
(
label_a
,
label_b
))
stats
.
extend
(
morphology_features_for_label_range
(
table
,
ds
,
ds_raw
,
None
,
ds_nucleus
,
scale_factor_cell_seg
,
scale_factor_raw
,
None
,
scale_factor_nucleus
,
label_a
,
label_b
))
for
var
in
f_raw
,
f_nucleus
:
if
var
is
not
None
:
var
.
close
()
# convert to pandas table and add column names
stats
=
pd
.
DataFrame
(
stats
)
stats
.
columns
=
generate_column_names
(
raw_path
,
None
,
nucleus_segmentation_path
)
log
(
"
Computing morphology features
"
)
stats
=
compute_morphology_features
(
table
,
segmentation_path
,
raw_path
,
seg_key
,
raw_key
,
scale_factor_seg
,
scale_factor_raw
,
label_starts
,
label_stops
)
return
stats
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment