diff --git a/scripts/extension/attributes/morphology_impl.py b/scripts/extension/attributes/morphology_impl.py index 44a2409ff1639f51f5f56db80cd35bb95f720240..f1a8581bf65cb866cf7332c1d1d83d98fd776094 100644 --- a/scripts/extension/attributes/morphology_impl.py +++ b/scripts/extension/attributes/morphology_impl.py @@ -37,6 +37,7 @@ def get_scale_factor(path, key_full, key, resolution): shape)] return scale_factor + def filter_table(table, min_size, max_size): if max_size is None: table = table.loc[table['n_pixels'] >= min_size, :] @@ -87,8 +88,8 @@ def filter_table_region(table, region_path, regions=('empty', 'yolk', 'neuropil' return table -def run_all_filters(table, min_size, max_size, max_bb, mapping_path, region_mapping_path): +def run_all_filters(table, min_size, max_size, max_bb, mapping_path, region_mapping_path): # remove zero label if present table = table.loc[table['label_id'] != 0, :] @@ -127,7 +128,8 @@ def load_data(ds, row, scale): # load the data from the bounding box return ds[bb] -def generate_column_names (raw_path, chromatin_path): + +def generate_column_names(raw_path, chromatin_path): columns = ['label_id'] morph_columns = ['shape_volume_in_microns', 'shape_extent', 'shape_equiv_diameter', 'shape_major_axis', 'shape_minor_axis', 'shape_surface_area', 'shape_sphericity', @@ -161,7 +163,7 @@ def generate_column_names (raw_path, chromatin_path): return columns -# @profile + def morphology_row_features(mask, scale): # Calculate stats from skimage ski_morph = regionprops(mask.astype('uint8')) @@ -193,7 +195,7 @@ def morphology_row_features(mask, scale): return (volume_in_microns, extent, equiv_diameter, major_axis, minor_axis, surface_area, sphericity, max_radius) -# @profile + def intensity_row_features(raw, mask): intensity_vals_in_mask = raw[mask] # mean and stdev - use float64 to avoid silent overflow errors @@ -208,21 +210,13 @@ def intensity_row_features(raw, mask): return mean_intensity, st_dev, median_intensity, interquartile_range_intensity, total -# @profile + def radial_intensity_row_features(raw, mask, scale, stops=(0.0, 0.25, 0.5, 0.75, 1.0)): result = () edt = distance_transform_edt(mask, sampling=scale, return_distances=True) edt = edt / np.max(edt) - - #TESTING - # from skimage.io import imsave - # imsave('Z:\\Kimberly\\test_edt.tif', edt.astype('uint8')) - # import napari - # with napari.gui_qt(): - # viewer = napari.view_image(edt) - bottoms = stops[0:len(stops) - 1] tops = stops[1:] @@ -233,7 +227,7 @@ def radial_intensity_row_features(raw, mask, scale, stops=(0.0, 0.25, 0.5, 0.75, return result -# @profile + def texture_row_features(raw, mask): # errors if there are small, isolated spots (because I'm using ignore zeros as true) # so here remove components that are < 10 pixels @@ -257,7 +251,7 @@ def texture_row_features(raw, mask): return tuple(hara) -# @profile + def radial_distribution(edt, mask, stops=(0.0, 0.25, 0.5, 0.75, 1.0)): result = () @@ -271,7 +265,7 @@ def radial_distribution(edt, mask, stops=(0.0, 0.25, 0.5, 0.75, 1.0)): return result -# @profile + def chromatin_row_features(chromatin, edt, raw, scale_chromatin): result = () @@ -282,7 +276,6 @@ def chromatin_row_features(chromatin, edt, raw, scale_chromatin): result += radial_distribution(edt, chromatin) if raw is not None: - # resize the chromatin masks if not same size as raw chromatin_type = chromatin.dtype if chromatin.shape != raw.shape: @@ -295,7 +288,7 @@ def chromatin_row_features(chromatin, edt, raw, scale_chromatin): return result -# @profile + # compute morphology (and intensity features) for label range def morphology_features_for_label_range(table, ds, ds_raw, ds_chromatin, @@ -341,11 +334,6 @@ def morphology_features_for_label_range(table, ds, ds_raw, # remove nucleus area form seg_mask seg_mask[exclude] = False - # import napari - # with napari.gui_qt(): - # viewer = napari.view_image(exclude, name='nucleus_resized') - # viewer.add_image(seg_mask, name='cell') - # compute the intensity features from raw data and segmentation mask if ds_raw is not None: raw = load_data(ds_raw, row, scale_factor_raw) @@ -396,13 +384,40 @@ def morphology_features_for_label_range(table, ds, ds_raw, stats.append(result) return stats -def morphology_impl_nucleus (nucleus_segmentation_path, raw_path, chromatin_path, - table, - min_size, max_size, - max_bb, - nucleus_resolution, chromatin_resolution, - nucleus_seg_scale, raw_scale, chromatin_scale, - label_starts, label_stops): + +def morphology_impl_nucleus(nucleus_segmentation_path, raw_path, chromatin_path, + table, + min_size, max_size, + max_bb, + nucleus_resolution, chromatin_resolution, + nucleus_seg_scale, raw_scale, chromatin_scale, + label_starts, label_stops): + """ Compute morphology features for nucleus segmentation. + + Can compute features for multiple label ranges. If you want to + compute features for the full label range, pass + 'label_starts=[0]' and 'label_stops=[number_of_labels]' + + Arguments: + nucleus_segmentation_path [str] - path to nucleus segmentation data stored as h5. + raw_path [str] - path to raw data stored as h5. + Pass 'None' if you don't want to compute features based on raw data. + chromatin_path [str] - path to chromatin segmentation data stored as h5. + table [pd.DataFrame] - table with default attributes + (sizes, center of mass and bounding boxes) for segmentation + min_size [int] - minimal size for objects used in calculation + max_size [int] - maximum size for objects used in calculation + max_bb [int] - maximum total volume of bounding box for objects used in calculation + nucleus_resolution [listlike] - resolution in nanometer. + Must be given in [Z, Y, X]. + chromatin_resolution [listlike] - resolution in nanometer. + Must be given in [Z, Y, X]. + nucleus_seg_scale [int] - scale level of the segmentation. + raw_scale [int] - scale level of the raw data + chromatin_scale [int] - scale level of the segmentation. + label_starts [listlike] - list with label start positions + label_stops [listlike] - list with label stop positions + """ # keys for the different scales nucleus_seg_key_full = 't00000/s00/0/cells' @@ -465,15 +480,48 @@ def morphology_impl_nucleus (nucleus_segmentation_path, raw_path, chromatin_path return stats -def morphology_impl_cell (cell_segmentation_path, raw_path, - nucleus_segmentation_path, - table, mapping_path, - region_mapping_path, - min_size, max_size, - max_bb, - cell_resolution, nucleus_resolution, - cell_seg_scale, raw_scale, nucleus_seg_scale, - label_starts, label_stops): + +def morphology_impl_cell(cell_segmentation_path, raw_path, + nucleus_segmentation_path, + table, mapping_path, + region_mapping_path, + min_size, max_size, + max_bb, + cell_resolution, nucleus_resolution, + cell_seg_scale, raw_scale, nucleus_seg_scale, + label_starts, label_stops): + """ Compute morphology features for cell segmentation. + + Can compute features for multiple label ranges. If you want to + compute features for the full label range, pass + 'label_starts=[0]' and 'label_stops=[number_of_labels]' + + Arguments: + cell_segmentation_path [str] - path to cell segmentation stored as h5. + raw_path [str] - path to raw data stored as h5. + Pass 'None' if you don't want to compute features based on raw data. + nucleus_segmentation_path [str] - path to nucleus segmentation data stored as h5. + Pass 'None' if you don't want to exclude the region covered by the nucleus from intensity & + texture calculations. + table [pd.DataFrame] - table with default attributes + (sizes, center of mass and bounding boxes) for segmentation + mapping_path [str] - path to - path to nucleus id mapping. + Pass 'None' if not relevant. + region_mapping_path [str] - path to - path to cellid to region mapping + Pass 'None' if not relevant + min_size [int] - minimal size for objects used in calculation + max_size [int] - maximum size for objects used in calculation + max_bb [int] - maximum total volume of bounding box for objects used in calculation + cell_resolution [listlike] - resolution in nanometer. + Must be given in [Z, Y, X]. + nucleus_resolution [listlike] - resolution in nanometer. + Must be given in [Z, Y, X]. + cell_seg_scale [int] - scale level of the segmentation + raw_scale [int] - scale level of the raw data + nucleus_seg_scale [int] - scale level of the segmentation. + label_starts [listlike] - list with label start positions + label_stops [listlike] - list with label stop positions + """ # keys for the different scales cell_seg_key_full = 't00000/s00/0/cells' @@ -536,219 +584,5 @@ def morphology_impl_cell (cell_segmentation_path, raw_path, return stats -def morphology_impl(segmentation_path, raw_path, chromatin_path, - exclude_nuc_path, - table, mapping_path, - region_mapping_path, - min_size, max_size, - max_bb, - resolution, raw_scale, seg_scale, - chromatin_scale, exclude_nuc_scale, - label_starts, label_stops): - """ Compute morphology features for a segmentation. - - Can compute features for multiple label ranges. If you want to - compute features for the full label range, pass - 'label_starts=[0]' and 'label_stops=[number_of_labels]' - - Arguments: - segmentation_path [str] - path to segmentation stored as h5. - raw_path [str] - path to raw data stored as h5. - Pass 'None' if you don't want to compute features based on raw data. - chromatin_path [str] - path to chromatin segmentation data stored as h5. - Pass 'None' if you don't want to compute features based on chromatin. - exclude_nuc_path [str] - path to nucleus segmentation data stored as h5. - Pass 'None' if you don't want to use the nucleus segmentation as a mask to exclude this region. - table [pd.DataFrame] - table with default attributes - (sizes, center of mass and bounding boxes) for segmentation - mapping_path [str] - path to - path to nucleus id mapping. - Pass 'None' if not relevant. - region_mapping_path [str] - path to - path to cellid to region mapping - Pass 'None' if not relevant - min_size [int] - minimal size for objects used in calculation - max_size [int] - maximum size for objects used in calculation - max_bb [int] - maximum total volume of bounding box for objects used in calculation - resolution [listlike] - resolution in nanometer. - Must be given in [Z, Y, X]. - raw_scale [int] - scale level of the raw data - seg_scale [int] - scale level of the segmentation - chromatin_scale [int] - scale level of the chromatin segmentation - exclude_nuc_scale [int] - scale level of the nucleus segmentation - label_starts [listlike] - list with label start positions - label_stops [listlike] - list with label stop positions - """ - - # keys to segmentation and raw data for the different scales - seg_key_full = 't00000/s00/0/cells' - seg_key = 't00000/s00/%i/cells' % seg_scale - raw_key_full = 't00000/s00/0/cells' - raw_key = 't00000/s00/%i/cells' % raw_scale - chromatin_key_full = 't00000/s00/0/cells' - chromatin_key = 't00000/s00/%i/cells' % chromatin_scale - exclude_key_full = 't00000/s00/0/cells' - exclude_key = 't00000/s00/%i/cells' % exclude_nuc_scale - - # get scale factor for the segmentation - scale_factor_seg = get_scale_factor(segmentation_path, seg_key_full, seg_key, resolution) - - # get scale factor for raw data (if it's given) - if raw_path != '': - log("Have raw path; compute intensity features") - # NOTE for now we can hard-code the resolution for the raw data here, - # but we might need to change this if we get additional dataset(s) - raw_resolution = [0.025, 0.01, 0.01] - scale_factor_raw = get_scale_factor(raw_path, raw_key_full, raw_key, raw_resolution) - else: - log("Don't have raw path; do not compute intensity features") - raw_resolution = scale_factor_raw = None - - # get scale factor for chromatin (if it's given) - if chromatin_path != '': - log("Have chromatin path; compute chromatin features") - # NOTE for now we can hard-code the resolution for the chromatin data here, - # but we might need to change this if we get additional dataset(s) - chromatin_resolution = [0.025, 0.02, 0.02] - scale_factor_chromatin = get_scale_factor(chromatin_path, chromatin_key_full, chromatin_key, - chromatin_resolution) - else: - log("Don't have chromatin path; do not compute chromatin features") - chromatin_resolution = scale_factor_chromatin = None - - # get scale factor for nuclei segmentation (to be used to exclude nucleus) if given - if exclude_nuc_path != '': - log("Have nucleus path; exclude nucleus for intensity measures") - # NOTE for now we can hard-code the resolution for the nuclei data here, - # but we might need to change this if we get additional dataset(s) - exclude_resolution = [0.1, 0.08, 0.08] - scale_factor_exclude = get_scale_factor(exclude_nuc_path, exclude_key_full, exclude_key, - exclude_resolution) - else: - log("Don't have exclude path; don't exclude nucleus area for intensity measures") - scale_factor_exclude = scale_factor_exclude = None - - # remove zero label if it exists - table = table.loc[table['label_id'] != 0, :] - - # if we have a mapping, only keep objects in the mapping - # (i.e cells that have assigned nuclei) - if mapping_path != '': - log("Have mapping path %s" % mapping_path) - table = filter_table_from_mapping(table, mapping_path) - log("Number of labels after filter with mapping: %i" % table.shape[0]) - - if region_mapping_path != '': - log("Have region mapping path %s" % region_mapping_path) - table = filter_table_region(table, region_mapping_path) - log("Number of labels after region filter: %i" % table.shape[0]) - - # filter by size - table = filter_table(table, min_size, max_size) - log("Number of labels after size filter: %i" % table.shape[0]) - - # filter by bounding box size - if max_bb is not None: - table = filter_table_bb(table, max_bb) - log("Number of labels after bounding box size filter %i" % table.shape[0]) - - log("Computing morphology features") - stats = compute_morphology_features(table, segmentation_path, raw_path, - chromatin_path, exclude_nuc_path, - seg_key, raw_key, chromatin_key, - exclude_key, - scale_factor_seg, scale_factor_raw, - scale_factor_chromatin, - scale_factor_exclude, - label_starts, label_stops) - return stats - -def compute_morphology_features(table, segmentation_path, raw_path, - chromatin_path, exclude_nuc_path, - seg_key, raw_key, chromatin_key, - exclude_key, - scale_factor_seg, scale_factor_raw, - scale_factor_chromatin, - scale_factor_exclude, - label_starts, label_stops): - if raw_path != '': - assert raw_key is not None and scale_factor_raw is not None - f_raw = h5py.File(raw_path, 'r') - ds_raw = f_raw[raw_key] - else: - f_raw = ds_raw = None - - if chromatin_path != '': - assert chromatin_key is not None and scale_factor_chromatin is not None - f_chromatin = h5py.File(chromatin_path, 'r') - ds_chromatin = f_chromatin[chromatin_key] - else: - f_chromatin = ds_chromatin = None - - if exclude_nuc_path != '': - assert exclude_key is not None and scale_factor_exclude is not None - f_exclude = h5py.File(exclude_nuc_path, 'r') - ds_exclude = f_exclude[exclude_key] - - else: - f_exclude = ds_exclude = None - - with h5py.File(segmentation_path, 'r') as f: - ds = f[seg_key] - - stats = [] - for label_a, label_b in zip(label_starts, label_stops): - log("Computing features from label-id %i to %i" % (label_a, label_b)) - stats.extend(morphology_features_for_label_range(table, ds, ds_raw, - ds_chromatin, - ds_exclude, - scale_factor_seg, scale_factor_raw, - scale_factor_chromatin, - scale_factor_exclude, - label_a, label_b)) - if f_raw is not None: - f_raw.close() - - if f_chromatin is not None: - f_chromatin.close() - - if f_exclude is not None: - f_exclude.close() - - # convert to pandas table and add column names - stats = pd.DataFrame(stats) - columns = ['label_id'] - morph_columns = ['shape_volume_in_microns', 'shape_extent', 'shape_equiv_diameter', - 'shape_major_axis', 'shape_minor_axis', 'shape_surface_area', 'shape_sphericity', - 'shape_max_radius'] - - columns += morph_columns - - if raw_path != '': - intensity_columns = ['intensity_mean', 'intensity_st_dev', 'intensity_median', 'intensity_iqr', - 'intensity_total'] - texture_columns = ['texture_hara%s' % x for x in range(1, 14)] - - columns += intensity_columns - # radial intensity columns - for val in [25, 50, 75, 100]: - columns += ['%s_%s' % (var, val) for var in intensity_columns] - columns += texture_columns - - if chromatin_path != '': - edt_columns = ['shape_edt_mean', 'shape_edt_stdev', 'shape_edt_median', 'shape_edt_iqr'] - edt_columns += ['shape_percent_%s' % var for var in [25, 50, 75, 100]] - - for phase in ['_het', '_eu']: - columns += [var + phase for var in morph_columns] - columns += [var + phase for var in edt_columns] - - if raw_path != '': - columns += [var + phase for var in intensity_columns] - columns += [var + phase for var in texture_columns] - - stats.columns = columns - return stats - - - if __name__ == '__main__': pass