Kimberly Isobel Meechan · Constantin Pape
--- a/scripts/extension/attributes/morphology_impl.py

+ 87

− 253
+++ b/scripts/extension/attributes/morphology_impl.py

+ 87

− 253
 @@ -37,6 +37,7 @@ def get_scale_factor(path, key_full, key, resolution):
                                           shape)]
    return scale_factor

+
 def filter_table(table, min_size, max_size):
    if max_size is None:
        table = table.loc[table['n_pixels'] >= min_size, :]
 @@ -87,8 +88,8 @@ def filter_table_region(table, region_path, regions=('empty', 'yolk', 'neuropil'

    return table

-def run_all_filters(table, min_size, max_size, max_bb, mapping_path, region_mapping_path):

+def run_all_filters(table, min_size, max_size, max_bb, mapping_path, region_mapping_path):
    # remove zero label if present
    table = table.loc[table['label_id'] != 0, :]

 @@ -127,7 +128,8 @@ def load_data(ds, row, scale):
    # load the data from the bounding box
    return ds[bb]

-def generate_column_names (raw_path, chromatin_path):
+
+def generate_column_names(raw_path, chromatin_path):
    columns = ['label_id']
    morph_columns = ['shape_volume_in_microns', 'shape_extent', 'shape_equiv_diameter',
                     'shape_major_axis', 'shape_minor_axis', 'shape_surface_area', 'shape_sphericity',
 @@ -161,7 +163,7 @@ def generate_column_names (raw_path, chromatin_path):

    return columns

-# @profile
+
 def morphology_row_features(mask, scale):
    # Calculate stats from skimage
    ski_morph = regionprops(mask.astype('uint8'))
 @@ -193,7 +195,7 @@ def morphology_row_features(mask, scale):
    return (volume_in_microns, extent, equiv_diameter, major_axis,
            minor_axis, surface_area, sphericity, max_radius)

-# @profile
+
 def intensity_row_features(raw, mask):
    intensity_vals_in_mask = raw[mask]
    # mean and stdev - use float64 to avoid silent overflow errors
 @@ -208,21 +210,13 @@ def intensity_row_features(raw, mask):

    return mean_intensity, st_dev, median_intensity, interquartile_range_intensity, total

-# @profile
+
 def radial_intensity_row_features(raw, mask, scale, stops=(0.0, 0.25, 0.5, 0.75, 1.0)):
    result = ()

    edt = distance_transform_edt(mask, sampling=scale, return_distances=True)
    edt = edt / np.max(edt)

-
-    #TESTING
-    # from skimage.io import imsave
-    # imsave('Z:\\Kimberly\\test_edt.tif', edt.astype('uint8'))
-    # import napari
-    # with napari.gui_qt():
-    #     viewer = napari.view_image(edt)
-
    bottoms = stops[0:len(stops) - 1]
    tops = stops[1:]

 @@ -233,7 +227,7 @@ def radial_intensity_row_features(raw, mask, scale, stops=(0.0, 0.25, 0.5, 0.75,

    return result

-# @profile
+
 def texture_row_features(raw, mask):
    # errors if there are small, isolated spots (because I'm using ignore zeros as true)
    # so here remove components that are < 10 pixels
 @@ -257,7 +251,7 @@ def texture_row_features(raw, mask):

    return tuple(hara)

-# @profile
+
 def radial_distribution(edt, mask, stops=(0.0, 0.25, 0.5, 0.75, 1.0)):
    result = ()

 @@ -271,7 +265,7 @@ def radial_distribution(edt, mask, stops=(0.0, 0.25, 0.5, 0.75, 1.0)):

    return result

-# @profile
+
 def chromatin_row_features(chromatin, edt, raw, scale_chromatin):
    result = ()

 @@ -282,7 +276,6 @@ def chromatin_row_features(chromatin, edt, raw, scale_chromatin):
    result += radial_distribution(edt, chromatin)

    if raw is not None:
-
        # resize the chromatin masks if not same size as raw
        chromatin_type = chromatin.dtype
        if chromatin.shape != raw.shape:
 @@ -295,7 +288,7 @@ def chromatin_row_features(chromatin, edt, raw, scale_chromatin):

    return result

-# @profile
+
 # compute morphology (and intensity features) for label range
 def morphology_features_for_label_range(table, ds, ds_raw,
                                        ds_chromatin,
 @@ -341,11 +334,6 @@ def morphology_features_for_label_range(table, ds, ds_raw,
            # remove nucleus area form seg_mask
            seg_mask[exclude] = False

-            # import napari
-            # with napari.gui_qt():
-            #     viewer = napari.view_image(exclude, name='nucleus_resized')
-            #     viewer.add_image(seg_mask, name='cell')
-
        # compute the intensity features from raw data and segmentation mask
        if ds_raw is not None:
            raw = load_data(ds_raw, row, scale_factor_raw)
 @@ -396,13 +384,40 @@ def morphology_features_for_label_range(table, ds, ds_raw,
        stats.append(result)
    return stats

-def morphology_impl_nucleus (nucleus_segmentation_path, raw_path, chromatin_path,
-                    table,
-                    min_size, max_size,
-                    max_bb,
-                    nucleus_resolution, chromatin_resolution,
-                    nucleus_seg_scale, raw_scale, chromatin_scale,
-                    label_starts, label_stops):
+
+def morphology_impl_nucleus(nucleus_segmentation_path, raw_path, chromatin_path,
+                            table,
+                            min_size, max_size,
+                            max_bb,
+                            nucleus_resolution, chromatin_resolution,
+                            nucleus_seg_scale, raw_scale, chromatin_scale,
+                            label_starts, label_stops):
+    """ Compute morphology features for nucleus segmentation.
+
+       Can compute features for multiple label ranges. If you want to
+       compute features for the full label range, pass
+       'label_starts=[0]' and 'label_stops=[number_of_labels]'
+
+       Arguments:
+           nucleus_segmentation_path [str] - path to nucleus segmentation data stored as h5.
+           raw_path [str] - path to raw data stored as h5.
+               Pass 'None' if you don't want to compute features based on raw data.
+           chromatin_path [str] - path to chromatin segmentation data stored as h5.
+           table [pd.DataFrame] - table with default attributes
+               (sizes, center of mass and bounding boxes) for segmentation
+           min_size [int] - minimal size for objects used in calculation
+           max_size [int] - maximum size for objects used in calculation
+           max_bb [int] - maximum total volume of bounding box for objects used in calculation
+           nucleus_resolution [listlike] - resolution in nanometer.
+               Must be given in [Z, Y, X].
+           chromatin_resolution [listlike] - resolution in nanometer.
+               Must be given in [Z, Y, X].
+           nucleus_seg_scale [int] - scale level of the segmentation.
+           raw_scale [int] - scale level of the raw data
+           chromatin_scale [int] - scale level of the segmentation.
+           label_starts [listlike] - list with label start positions
+           label_stops [listlike] - list with label stop positions
+       """

    # keys for the different scales
    nucleus_seg_key_full = 't00000/s00/0/cells'
 @@ -465,15 +480,48 @@ def morphology_impl_nucleus (nucleus_segmentation_path, raw_path, chromatin_path

    return stats

-def morphology_impl_cell (cell_segmentation_path, raw_path,
-                    nucleus_segmentation_path,
-                    table, mapping_path,
-                    region_mapping_path,
-                    min_size, max_size,
-                    max_bb,
-                    cell_resolution, nucleus_resolution,
-                    cell_seg_scale, raw_scale, nucleus_seg_scale,
-                    label_starts, label_stops):
+
+def morphology_impl_cell(cell_segmentation_path, raw_path,
+                         nucleus_segmentation_path,
+                         table, mapping_path,
+                         region_mapping_path,
+                         min_size, max_size,
+                         max_bb,
+                         cell_resolution, nucleus_resolution,
+                         cell_seg_scale, raw_scale, nucleus_seg_scale,
+                         label_starts, label_stops):
+    """ Compute morphology features for cell segmentation.
+
+       Can compute features for multiple label ranges. If you want to
+       compute features for the full label range, pass
+       'label_starts=[0]' and 'label_stops=[number_of_labels]'
+
+       Arguments:
+           cell_segmentation_path [str] - path to cell segmentation stored as h5.
+           raw_path [str] - path to raw data stored as h5.
+               Pass 'None' if you don't want to compute features based on raw data.
+           nucleus_segmentation_path [str] - path to nucleus segmentation data stored as h5.
+               Pass 'None' if you don't want to exclude the region covered by the nucleus from intensity &
+               texture calculations.
+           table [pd.DataFrame] - table with default attributes
+               (sizes, center of mass and bounding boxes) for segmentation
+           mapping_path [str] - path to - path to nucleus id mapping.
+               Pass 'None' if not relevant.
+           region_mapping_path [str] - path to - path to cellid to region mapping
+               Pass 'None' if not relevant
+           min_size [int] - minimal size for objects used in calculation
+           max_size [int] - maximum size for objects used in calculation
+           max_bb [int] - maximum total volume of bounding box for objects used in calculation
+           cell_resolution [listlike] - resolution in nanometer.
+               Must be given in [Z, Y, X].
+           nucleus_resolution [listlike] - resolution in nanometer.
+               Must be given in [Z, Y, X].
+           cell_seg_scale [int] - scale level of the segmentation
+           raw_scale [int] - scale level of the raw data
+           nucleus_seg_scale [int] - scale level of the segmentation.
+           label_starts [listlike] - list with label start positions
+           label_stops [listlike] - list with label stop positions
+       """

    # keys for the different scales
    cell_seg_key_full = 't00000/s00/0/cells'
 @@ -536,219 +584,5 @@ def morphology_impl_cell (cell_segmentation_path, raw_path,
    return stats


-def morphology_impl(segmentation_path, raw_path, chromatin_path,
-                    exclude_nuc_path,
-                    table, mapping_path,
-                    region_mapping_path,
-                    min_size, max_size,
-                    max_bb,
-                    resolution, raw_scale, seg_scale,
-                    chromatin_scale, exclude_nuc_scale,
-                    label_starts, label_stops):
-    """ Compute morphology features for a segmentation.
-
-    Can compute features for multiple label ranges. If you want to
-    compute features for the full label range, pass
-    'label_starts=[0]' and 'label_stops=[number_of_labels]'
-
-    Arguments:
-        segmentation_path [str] - path to segmentation stored as h5.
-        raw_path [str] - path to raw data stored as h5.
-            Pass 'None' if you don't want to compute features based on raw data.
-        chromatin_path [str] - path to chromatin segmentation data stored as h5.
-            Pass 'None' if you don't want to compute features based on chromatin.
-        exclude_nuc_path [str] - path to nucleus segmentation data stored as h5.
-            Pass 'None' if you don't want to use the nucleus segmentation as a mask to exclude this region.
-        table [pd.DataFrame] - table with default attributes
-            (sizes, center of mass and bounding boxes) for segmentation
-        mapping_path [str] - path to - path to nucleus id mapping.
-            Pass 'None' if not relevant.
-        region_mapping_path [str] - path to - path to cellid to region mapping
-            Pass 'None' if not relevant
-        min_size [int] - minimal size for objects used in calculation
-        max_size [int] - maximum size for objects used in calculation
-        max_bb [int] - maximum total volume of bounding box for objects used in calculation
-        resolution [listlike] - resolution in nanometer.
-            Must be given in [Z, Y, X].
-        raw_scale [int] - scale level of the raw data
-        seg_scale [int] - scale level of the segmentation
-        chromatin_scale [int] - scale level of the chromatin segmentation
-        exclude_nuc_scale [int] - scale level of the nucleus segmentation
-        label_starts [listlike] - list with label start positions
-        label_stops [listlike] - list with label stop positions
-    """
-
-    # keys to segmentation and raw data for the different scales
-    seg_key_full = 't00000/s00/0/cells'
-    seg_key = 't00000/s00/%i/cells' % seg_scale
-    raw_key_full = 't00000/s00/0/cells'
-    raw_key = 't00000/s00/%i/cells' % raw_scale
-    chromatin_key_full = 't00000/s00/0/cells'
-    chromatin_key = 't00000/s00/%i/cells' % chromatin_scale
-    exclude_key_full = 't00000/s00/0/cells'
-    exclude_key = 't00000/s00/%i/cells' % exclude_nuc_scale
-
-    # get scale factor for the segmentation
-    scale_factor_seg = get_scale_factor(segmentation_path, seg_key_full, seg_key, resolution)
-
-    # get scale factor for raw data (if it's given)
-    if raw_path != '':
-        log("Have raw path; compute intensity features")
-        # NOTE for now we can hard-code the resolution for the raw data here,
-        # but we might need to change this if we get additional dataset(s)
-        raw_resolution = [0.025, 0.01, 0.01]
-        scale_factor_raw = get_scale_factor(raw_path, raw_key_full, raw_key, raw_resolution)
-    else:
-        log("Don't have raw path; do not compute intensity features")
-        raw_resolution = scale_factor_raw = None
-
-    # get scale factor for chromatin (if it's given)
-    if chromatin_path != '':
-        log("Have chromatin path; compute chromatin features")
-        # NOTE for now we can hard-code the resolution for the chromatin data here,
-        # but we might need to change this if we get additional dataset(s)
-        chromatin_resolution = [0.025, 0.02, 0.02]
-        scale_factor_chromatin = get_scale_factor(chromatin_path, chromatin_key_full, chromatin_key,
-                                                  chromatin_resolution)
-    else:
-        log("Don't have chromatin path; do not compute chromatin features")
-        chromatin_resolution = scale_factor_chromatin = None
-
-    # get scale factor for nuclei segmentation (to be used to exclude nucleus) if given
-    if exclude_nuc_path != '':
-        log("Have nucleus path; exclude nucleus for intensity measures")
-        # NOTE for now we can hard-code the resolution for the nuclei data here,
-        # but we might need to change this if we get additional dataset(s)
-        exclude_resolution = [0.1, 0.08, 0.08]
-        scale_factor_exclude = get_scale_factor(exclude_nuc_path, exclude_key_full, exclude_key,
-                                                exclude_resolution)
-    else:
-        log("Don't have exclude path; don't exclude nucleus area for intensity measures")
-        scale_factor_exclude = scale_factor_exclude = None
-
-    # remove zero label if it exists
-    table = table.loc[table['label_id'] != 0, :]
-
-    # if we have a mapping, only keep objects in the mapping
-    # (i.e cells that have assigned nuclei)
-    if mapping_path != '':
-        log("Have mapping path %s" % mapping_path)
-        table = filter_table_from_mapping(table, mapping_path)
-        log("Number of labels after filter with mapping: %i" % table.shape[0])
-
-    if region_mapping_path != '':
-        log("Have region mapping path %s" % region_mapping_path)
-        table = filter_table_region(table, region_mapping_path)
-        log("Number of labels after region filter: %i" % table.shape[0])
-
-    # filter by size
-    table = filter_table(table, min_size, max_size)
-    log("Number of labels after size filter: %i" % table.shape[0])
-
-    # filter by bounding box size
-    if max_bb is not None:
-        table = filter_table_bb(table, max_bb)
-        log("Number of labels after bounding box size filter %i" % table.shape[0])
-
-    log("Computing morphology features")
-    stats = compute_morphology_features(table, segmentation_path, raw_path,
-                                        chromatin_path, exclude_nuc_path,
-                                        seg_key, raw_key, chromatin_key,
-                                        exclude_key,
-                                        scale_factor_seg, scale_factor_raw,
-                                        scale_factor_chromatin,
-                                        scale_factor_exclude,
-                                        label_starts, label_stops)
-    return stats
-
-def compute_morphology_features(table, segmentation_path, raw_path,
-                                chromatin_path, exclude_nuc_path,
-                                seg_key, raw_key, chromatin_key,
-                                exclude_key,
-                                scale_factor_seg, scale_factor_raw,
-                                scale_factor_chromatin,
-                                scale_factor_exclude,
-                                label_starts, label_stops):
-    if raw_path != '':
-        assert raw_key is not None and scale_factor_raw is not None
-        f_raw = h5py.File(raw_path, 'r')
-        ds_raw = f_raw[raw_key]
-    else:
-        f_raw = ds_raw = None
-
-    if chromatin_path != '':
-        assert chromatin_key is not None and scale_factor_chromatin is not None
-        f_chromatin = h5py.File(chromatin_path, 'r')
-        ds_chromatin = f_chromatin[chromatin_key]
-    else:
-        f_chromatin = ds_chromatin = None
-
-    if exclude_nuc_path != '':
-        assert exclude_key is not None and scale_factor_exclude is not None
-        f_exclude = h5py.File(exclude_nuc_path, 'r')
-        ds_exclude = f_exclude[exclude_key]
-
-    else:
-        f_exclude = ds_exclude = None
-
-    with h5py.File(segmentation_path, 'r') as f:
-        ds = f[seg_key]
-
-        stats = []
-        for label_a, label_b in zip(label_starts, label_stops):
-            log("Computing features from label-id %i to %i" % (label_a, label_b))
-            stats.extend(morphology_features_for_label_range(table, ds, ds_raw,
-                                                             ds_chromatin,
-                                                             ds_exclude,
-                                                             scale_factor_seg, scale_factor_raw,
-                                                             scale_factor_chromatin,
-                                                             scale_factor_exclude,
-                                                             label_a, label_b))
-    if f_raw is not None:
-        f_raw.close()
-
-    if f_chromatin is not None:
-        f_chromatin.close()
-
-    if f_exclude is not None:
-        f_exclude.close()
-
-    # convert to pandas table and add column names
-    stats = pd.DataFrame(stats)
-    columns = ['label_id']
-    morph_columns = ['shape_volume_in_microns', 'shape_extent', 'shape_equiv_diameter',
-                     'shape_major_axis', 'shape_minor_axis', 'shape_surface_area', 'shape_sphericity',
-                     'shape_max_radius']
-
-    columns += morph_columns
-
-    if raw_path != '':
-        intensity_columns = ['intensity_mean', 'intensity_st_dev', 'intensity_median', 'intensity_iqr',
-                             'intensity_total']
-        texture_columns = ['texture_hara%s' % x for x in range(1, 14)]
-
-        columns += intensity_columns
-        # radial intensity columns
-        for val in [25, 50, 75, 100]:
-            columns += ['%s_%s' % (var, val) for var in intensity_columns]
-        columns += texture_columns
-
-    if chromatin_path != '':
-        edt_columns = ['shape_edt_mean', 'shape_edt_stdev', 'shape_edt_median', 'shape_edt_iqr']
-        edt_columns += ['shape_percent_%s' % var for var in [25, 50, 75, 100]]
-
-        for phase in ['_het', '_eu']:
-            columns += [var + phase for var in morph_columns]
-            columns += [var + phase for var in edt_columns]
-
-            if raw_path != '':
-                columns += [var + phase for var in intensity_columns]
-                columns += [var + phase for var in texture_columns]
-
-    stats.columns = columns
-    return stats
-
-
-
 if __name__ == '__main__':
    pass