Skip to content
Snippets Groups Projects

Morphology

Merged Kimberly Isobel Meechan requested to merge morphology into master
1 file
+ 87
253
Compare changes
  • Side-by-side
  • Inline
@@ -37,6 +37,7 @@ def get_scale_factor(path, key_full, key, resolution):
shape)]
return scale_factor
def filter_table(table, min_size, max_size):
if max_size is None:
table = table.loc[table['n_pixels'] >= min_size, :]
@@ -87,8 +88,8 @@ def filter_table_region(table, region_path, regions=('empty', 'yolk', 'neuropil'
return table
def run_all_filters(table, min_size, max_size, max_bb, mapping_path, region_mapping_path):
def run_all_filters(table, min_size, max_size, max_bb, mapping_path, region_mapping_path):
# remove zero label if present
table = table.loc[table['label_id'] != 0, :]
@@ -127,7 +128,8 @@ def load_data(ds, row, scale):
# load the data from the bounding box
return ds[bb]
def generate_column_names (raw_path, chromatin_path):
def generate_column_names(raw_path, chromatin_path):
columns = ['label_id']
morph_columns = ['shape_volume_in_microns', 'shape_extent', 'shape_equiv_diameter',
'shape_major_axis', 'shape_minor_axis', 'shape_surface_area', 'shape_sphericity',
@@ -161,7 +163,7 @@ def generate_column_names (raw_path, chromatin_path):
return columns
# @profile
def morphology_row_features(mask, scale):
# Calculate stats from skimage
ski_morph = regionprops(mask.astype('uint8'))
@@ -193,7 +195,7 @@ def morphology_row_features(mask, scale):
return (volume_in_microns, extent, equiv_diameter, major_axis,
minor_axis, surface_area, sphericity, max_radius)
# @profile
def intensity_row_features(raw, mask):
intensity_vals_in_mask = raw[mask]
# mean and stdev - use float64 to avoid silent overflow errors
@@ -208,21 +210,13 @@ def intensity_row_features(raw, mask):
return mean_intensity, st_dev, median_intensity, interquartile_range_intensity, total
# @profile
def radial_intensity_row_features(raw, mask, scale, stops=(0.0, 0.25, 0.5, 0.75, 1.0)):
result = ()
edt = distance_transform_edt(mask, sampling=scale, return_distances=True)
edt = edt / np.max(edt)
#TESTING
# from skimage.io import imsave
# imsave('Z:\\Kimberly\\test_edt.tif', edt.astype('uint8'))
# import napari
# with napari.gui_qt():
# viewer = napari.view_image(edt)
bottoms = stops[0:len(stops) - 1]
tops = stops[1:]
@@ -233,7 +227,7 @@ def radial_intensity_row_features(raw, mask, scale, stops=(0.0, 0.25, 0.5, 0.75,
return result
# @profile
def texture_row_features(raw, mask):
# errors if there are small, isolated spots (because I'm using ignore zeros as true)
# so here remove components that are < 10 pixels
@@ -257,7 +251,7 @@ def texture_row_features(raw, mask):
return tuple(hara)
# @profile
def radial_distribution(edt, mask, stops=(0.0, 0.25, 0.5, 0.75, 1.0)):
result = ()
@@ -271,7 +265,7 @@ def radial_distribution(edt, mask, stops=(0.0, 0.25, 0.5, 0.75, 1.0)):
return result
# @profile
def chromatin_row_features(chromatin, edt, raw, scale_chromatin):
result = ()
@@ -282,7 +276,6 @@ def chromatin_row_features(chromatin, edt, raw, scale_chromatin):
result += radial_distribution(edt, chromatin)
if raw is not None:
# resize the chromatin masks if not same size as raw
chromatin_type = chromatin.dtype
if chromatin.shape != raw.shape:
@@ -295,7 +288,7 @@ def chromatin_row_features(chromatin, edt, raw, scale_chromatin):
return result
# @profile
# compute morphology (and intensity features) for label range
def morphology_features_for_label_range(table, ds, ds_raw,
ds_chromatin,
@@ -341,11 +334,6 @@ def morphology_features_for_label_range(table, ds, ds_raw,
# remove nucleus area form seg_mask
seg_mask[exclude] = False
# import napari
# with napari.gui_qt():
# viewer = napari.view_image(exclude, name='nucleus_resized')
# viewer.add_image(seg_mask, name='cell')
# compute the intensity features from raw data and segmentation mask
if ds_raw is not None:
raw = load_data(ds_raw, row, scale_factor_raw)
@@ -396,13 +384,40 @@ def morphology_features_for_label_range(table, ds, ds_raw,
stats.append(result)
return stats
def morphology_impl_nucleus (nucleus_segmentation_path, raw_path, chromatin_path,
table,
min_size, max_size,
max_bb,
nucleus_resolution, chromatin_resolution,
nucleus_seg_scale, raw_scale, chromatin_scale,
label_starts, label_stops):
def morphology_impl_nucleus(nucleus_segmentation_path, raw_path, chromatin_path,
table,
min_size, max_size,
max_bb,
nucleus_resolution, chromatin_resolution,
nucleus_seg_scale, raw_scale, chromatin_scale,
label_starts, label_stops):
""" Compute morphology features for nucleus segmentation.
Can compute features for multiple label ranges. If you want to
compute features for the full label range, pass
'label_starts=[0]' and 'label_stops=[number_of_labels]'
Arguments:
nucleus_segmentation_path [str] - path to nucleus segmentation data stored as h5.
raw_path [str] - path to raw data stored as h5.
Pass 'None' if you don't want to compute features based on raw data.
chromatin_path [str] - path to chromatin segmentation data stored as h5.
table [pd.DataFrame] - table with default attributes
(sizes, center of mass and bounding boxes) for segmentation
min_size [int] - minimal size for objects used in calculation
max_size [int] - maximum size for objects used in calculation
max_bb [int] - maximum total volume of bounding box for objects used in calculation
nucleus_resolution [listlike] - resolution in nanometer.
Must be given in [Z, Y, X].
chromatin_resolution [listlike] - resolution in nanometer.
Must be given in [Z, Y, X].
nucleus_seg_scale [int] - scale level of the segmentation.
raw_scale [int] - scale level of the raw data
chromatin_scale [int] - scale level of the segmentation.
label_starts [listlike] - list with label start positions
label_stops [listlike] - list with label stop positions
"""
# keys for the different scales
nucleus_seg_key_full = 't00000/s00/0/cells'
@@ -465,15 +480,48 @@ def morphology_impl_nucleus (nucleus_segmentation_path, raw_path, chromatin_path
return stats
def morphology_impl_cell (cell_segmentation_path, raw_path,
nucleus_segmentation_path,
table, mapping_path,
region_mapping_path,
min_size, max_size,
max_bb,
cell_resolution, nucleus_resolution,
cell_seg_scale, raw_scale, nucleus_seg_scale,
label_starts, label_stops):
def morphology_impl_cell(cell_segmentation_path, raw_path,
nucleus_segmentation_path,
table, mapping_path,
region_mapping_path,
min_size, max_size,
max_bb,
cell_resolution, nucleus_resolution,
cell_seg_scale, raw_scale, nucleus_seg_scale,
label_starts, label_stops):
""" Compute morphology features for cell segmentation.
Can compute features for multiple label ranges. If you want to
compute features for the full label range, pass
'label_starts=[0]' and 'label_stops=[number_of_labels]'
Arguments:
cell_segmentation_path [str] - path to cell segmentation stored as h5.
raw_path [str] - path to raw data stored as h5.
Pass 'None' if you don't want to compute features based on raw data.
nucleus_segmentation_path [str] - path to nucleus segmentation data stored as h5.
Pass 'None' if you don't want to exclude the region covered by the nucleus from intensity &
texture calculations.
table [pd.DataFrame] - table with default attributes
(sizes, center of mass and bounding boxes) for segmentation
mapping_path [str] - path to - path to nucleus id mapping.
Pass 'None' if not relevant.
region_mapping_path [str] - path to - path to cellid to region mapping
Pass 'None' if not relevant
min_size [int] - minimal size for objects used in calculation
max_size [int] - maximum size for objects used in calculation
max_bb [int] - maximum total volume of bounding box for objects used in calculation
cell_resolution [listlike] - resolution in nanometer.
Must be given in [Z, Y, X].
nucleus_resolution [listlike] - resolution in nanometer.
Must be given in [Z, Y, X].
cell_seg_scale [int] - scale level of the segmentation
raw_scale [int] - scale level of the raw data
nucleus_seg_scale [int] - scale level of the segmentation.
label_starts [listlike] - list with label start positions
label_stops [listlike] - list with label stop positions
"""
# keys for the different scales
cell_seg_key_full = 't00000/s00/0/cells'
@@ -536,219 +584,5 @@ def morphology_impl_cell (cell_segmentation_path, raw_path,
return stats
def morphology_impl(segmentation_path, raw_path, chromatin_path,
exclude_nuc_path,
table, mapping_path,
region_mapping_path,
min_size, max_size,
max_bb,
resolution, raw_scale, seg_scale,
chromatin_scale, exclude_nuc_scale,
label_starts, label_stops):
""" Compute morphology features for a segmentation.
Can compute features for multiple label ranges. If you want to
compute features for the full label range, pass
'label_starts=[0]' and 'label_stops=[number_of_labels]'
Arguments:
segmentation_path [str] - path to segmentation stored as h5.
raw_path [str] - path to raw data stored as h5.
Pass 'None' if you don't want to compute features based on raw data.
chromatin_path [str] - path to chromatin segmentation data stored as h5.
Pass 'None' if you don't want to compute features based on chromatin.
exclude_nuc_path [str] - path to nucleus segmentation data stored as h5.
Pass 'None' if you don't want to use the nucleus segmentation as a mask to exclude this region.
table [pd.DataFrame] - table with default attributes
(sizes, center of mass and bounding boxes) for segmentation
mapping_path [str] - path to - path to nucleus id mapping.
Pass 'None' if not relevant.
region_mapping_path [str] - path to - path to cellid to region mapping
Pass 'None' if not relevant
min_size [int] - minimal size for objects used in calculation
max_size [int] - maximum size for objects used in calculation
max_bb [int] - maximum total volume of bounding box for objects used in calculation
resolution [listlike] - resolution in nanometer.
Must be given in [Z, Y, X].
raw_scale [int] - scale level of the raw data
seg_scale [int] - scale level of the segmentation
chromatin_scale [int] - scale level of the chromatin segmentation
exclude_nuc_scale [int] - scale level of the nucleus segmentation
label_starts [listlike] - list with label start positions
label_stops [listlike] - list with label stop positions
"""
# keys to segmentation and raw data for the different scales
seg_key_full = 't00000/s00/0/cells'
seg_key = 't00000/s00/%i/cells' % seg_scale
raw_key_full = 't00000/s00/0/cells'
raw_key = 't00000/s00/%i/cells' % raw_scale
chromatin_key_full = 't00000/s00/0/cells'
chromatin_key = 't00000/s00/%i/cells' % chromatin_scale
exclude_key_full = 't00000/s00/0/cells'
exclude_key = 't00000/s00/%i/cells' % exclude_nuc_scale
# get scale factor for the segmentation
scale_factor_seg = get_scale_factor(segmentation_path, seg_key_full, seg_key, resolution)
# get scale factor for raw data (if it's given)
if raw_path != '':
log("Have raw path; compute intensity features")
# NOTE for now we can hard-code the resolution for the raw data here,
# but we might need to change this if we get additional dataset(s)
raw_resolution = [0.025, 0.01, 0.01]
scale_factor_raw = get_scale_factor(raw_path, raw_key_full, raw_key, raw_resolution)
else:
log("Don't have raw path; do not compute intensity features")
raw_resolution = scale_factor_raw = None
# get scale factor for chromatin (if it's given)
if chromatin_path != '':
log("Have chromatin path; compute chromatin features")
# NOTE for now we can hard-code the resolution for the chromatin data here,
# but we might need to change this if we get additional dataset(s)
chromatin_resolution = [0.025, 0.02, 0.02]
scale_factor_chromatin = get_scale_factor(chromatin_path, chromatin_key_full, chromatin_key,
chromatin_resolution)
else:
log("Don't have chromatin path; do not compute chromatin features")
chromatin_resolution = scale_factor_chromatin = None
# get scale factor for nuclei segmentation (to be used to exclude nucleus) if given
if exclude_nuc_path != '':
log("Have nucleus path; exclude nucleus for intensity measures")
# NOTE for now we can hard-code the resolution for the nuclei data here,
# but we might need to change this if we get additional dataset(s)
exclude_resolution = [0.1, 0.08, 0.08]
scale_factor_exclude = get_scale_factor(exclude_nuc_path, exclude_key_full, exclude_key,
exclude_resolution)
else:
log("Don't have exclude path; don't exclude nucleus area for intensity measures")
scale_factor_exclude = scale_factor_exclude = None
# remove zero label if it exists
table = table.loc[table['label_id'] != 0, :]
# if we have a mapping, only keep objects in the mapping
# (i.e cells that have assigned nuclei)
if mapping_path != '':
log("Have mapping path %s" % mapping_path)
table = filter_table_from_mapping(table, mapping_path)
log("Number of labels after filter with mapping: %i" % table.shape[0])
if region_mapping_path != '':
log("Have region mapping path %s" % region_mapping_path)
table = filter_table_region(table, region_mapping_path)
log("Number of labels after region filter: %i" % table.shape[0])
# filter by size
table = filter_table(table, min_size, max_size)
log("Number of labels after size filter: %i" % table.shape[0])
# filter by bounding box size
if max_bb is not None:
table = filter_table_bb(table, max_bb)
log("Number of labels after bounding box size filter %i" % table.shape[0])
log("Computing morphology features")
stats = compute_morphology_features(table, segmentation_path, raw_path,
chromatin_path, exclude_nuc_path,
seg_key, raw_key, chromatin_key,
exclude_key,
scale_factor_seg, scale_factor_raw,
scale_factor_chromatin,
scale_factor_exclude,
label_starts, label_stops)
return stats
def compute_morphology_features(table, segmentation_path, raw_path,
chromatin_path, exclude_nuc_path,
seg_key, raw_key, chromatin_key,
exclude_key,
scale_factor_seg, scale_factor_raw,
scale_factor_chromatin,
scale_factor_exclude,
label_starts, label_stops):
if raw_path != '':
assert raw_key is not None and scale_factor_raw is not None
f_raw = h5py.File(raw_path, 'r')
ds_raw = f_raw[raw_key]
else:
f_raw = ds_raw = None
if chromatin_path != '':
assert chromatin_key is not None and scale_factor_chromatin is not None
f_chromatin = h5py.File(chromatin_path, 'r')
ds_chromatin = f_chromatin[chromatin_key]
else:
f_chromatin = ds_chromatin = None
if exclude_nuc_path != '':
assert exclude_key is not None and scale_factor_exclude is not None
f_exclude = h5py.File(exclude_nuc_path, 'r')
ds_exclude = f_exclude[exclude_key]
else:
f_exclude = ds_exclude = None
with h5py.File(segmentation_path, 'r') as f:
ds = f[seg_key]
stats = []
for label_a, label_b in zip(label_starts, label_stops):
log("Computing features from label-id %i to %i" % (label_a, label_b))
stats.extend(morphology_features_for_label_range(table, ds, ds_raw,
ds_chromatin,
ds_exclude,
scale_factor_seg, scale_factor_raw,
scale_factor_chromatin,
scale_factor_exclude,
label_a, label_b))
if f_raw is not None:
f_raw.close()
if f_chromatin is not None:
f_chromatin.close()
if f_exclude is not None:
f_exclude.close()
# convert to pandas table and add column names
stats = pd.DataFrame(stats)
columns = ['label_id']
morph_columns = ['shape_volume_in_microns', 'shape_extent', 'shape_equiv_diameter',
'shape_major_axis', 'shape_minor_axis', 'shape_surface_area', 'shape_sphericity',
'shape_max_radius']
columns += morph_columns
if raw_path != '':
intensity_columns = ['intensity_mean', 'intensity_st_dev', 'intensity_median', 'intensity_iqr',
'intensity_total']
texture_columns = ['texture_hara%s' % x for x in range(1, 14)]
columns += intensity_columns
# radial intensity columns
for val in [25, 50, 75, 100]:
columns += ['%s_%s' % (var, val) for var in intensity_columns]
columns += texture_columns
if chromatin_path != '':
edt_columns = ['shape_edt_mean', 'shape_edt_stdev', 'shape_edt_median', 'shape_edt_iqr']
edt_columns += ['shape_percent_%s' % var for var in [25, 50, 75, 100]]
for phase in ['_het', '_eu']:
columns += [var + phase for var in morph_columns]
columns += [var + phase for var in edt_columns]
if raw_path != '':
columns += [var + phase for var in intensity_columns]
columns += [var + phase for var in texture_columns]
stats.columns = columns
return stats
if __name__ == '__main__':
pass
Loading