Skip to content
Snippets Groups Projects
Commit 8c06a726 authored by Kimberly Isobel Meechan's avatar Kimberly Isobel Meechan
Browse files

neatened script

parent 8c04dd44
No related branches found
No related tags found
1 merge request!9Morphology
...@@ -38,45 +38,44 @@ def get_scale_factor(path, key_full, key, resolution): ...@@ -38,45 +38,44 @@ def get_scale_factor(path, key_full, key, resolution):
return scale_factor return scale_factor
# changed to shape_pixel_size, is the plan to go to n_pixels in future?
def filter_table(table, min_size, max_size): def filter_table(table, min_size, max_size):
if max_size is None: if max_size is None:
table = table.loc[table['shape_pixelsize'] >= min_size, :] table = table.loc[table['n_pixels'] >= min_size, :]
else: else:
criteria = np.logical_and(table['shape_pixelsize'] > min_size, table['shape_pixelsize'] < max_size) criteria = np.logical_and(table['n_pixels'] > min_size, table['n_pixels'] < max_size)
table = table.loc[criteria, :] table = table.loc[criteria, :]
return table return table
# some cell segmentations have a sensible total pixel size but very large bounding boxes i.e. they are small spots # some cell segmentations have a sensible total pixel size but very large bounding boxes i.e. they are small spots
# distributed over a large region, not one cell > filter for these cases by capping the bounding box size # distributed over a large region, not one cell > filter for these cases by capping the bounding box size
def filter_table_bb(table, max_bb): def filter_table_bb(table, max_bb):
total_bb = (table['bb_max_z'] - table['bb_min_z']) * (table['bb_max_y'] - table['bb_min_y']) * ( total_bb = (table['bb_max_z'] - table['bb_min_z']) * (table['bb_max_y'] - table['bb_min_y']) * (
table['bb_max_x'] - table['bb_min_x']) table['bb_max_x'] - table['bb_min_x'])
table = table.loc[total_bb < max_bb, :] table = table.loc[total_bb < max_bb, :]
return table return table
def filter_table_from_mapping(table, mapping_path): def filter_table_from_mapping(table, mapping_path):
# read in numpy array of mapping of cells to nuclei - first column cell id, second nucleus id # read in numpy array of mapping of cells to nuclei - first column cell id, second nucleus id
mapping = pd.read_csv(mapping_path, sep='\t') mapping = pd.read_csv(mapping_path, sep='\t')
# mapping = np.genfromtxt(mapping_path, skip_header=1, delimiter='\t')[:, :2].astype('uint64')
# remove zero labels from this table too, if exist # remove zero labels from this table too, if exist
mapping = mapping.loc[np.logical_and(mapping.iloc[:, 0] != 0, mapping = mapping.loc[np.logical_and(mapping.iloc[:, 0] != 0,
mapping.iloc[:, 1] != 0), :] mapping.iloc[:, 1] != 0), :]
table = table.loc[np.isin(table['label_id'], mapping['label_id']), :] table = table.loc[np.isin(table['label_id'], mapping['label_id']), :]
# add a column for the id of the mapped nucleus (use this later to exclude the area covered by the nucleus) # add a column for the 'nucleus_id' of the mapped nucleus (use this later to exclude the area covered
# by the nucleus)
table = table.join(mapping.set_index('label_id'), on='label_id', how='left') table = table.join(mapping.set_index('label_id'), on='label_id', how='left')
return table return table
# regions here are regions to exclude
def filter_table_region(table, region_path, regions=('empty', 'yolk', 'neuropil', 'cuticle')): def filter_table_region(table, region_path, regions=('empty', 'yolk', 'neuropil', 'cuticle')):
# regions here are regions to exclude
region_mapping = pd.read_csv(region_path, sep='\t') region_mapping = pd.read_csv(region_path, sep='\t')
# remove zero label if it exists # remove zero label if it exists
...@@ -105,34 +104,11 @@ def morphology_row_features(mask, scale): ...@@ -105,34 +104,11 @@ def morphology_row_features(mask, scale):
# Calculate stats from skimage # Calculate stats from skimage
ski_morph = regionprops(mask.astype('uint8')) ski_morph = regionprops(mask.astype('uint8'))
# volume in pixels
volume_in_pix = ski_morph[0]['area'] volume_in_pix = ski_morph[0]['area']
# volume in microns
volume_in_microns = np.prod(scale) * volume_in_pix volume_in_microns = np.prod(scale) * volume_in_pix
# extent
extent = ski_morph[0]['extent'] extent = ski_morph[0]['extent']
try:
# volume of convex hull
convex_area = ski_morph[0]['convex_area']
# solidity
solidity = ski_morph[0]['solidity']
except MemoryError:
log("Too large to compute convex hull")
convex_area = 0
solidity = 0
# equivalent diameter
equiv_diameter = ski_morph[0]['equivalent_diameter'] equiv_diameter = ski_morph[0]['equivalent_diameter']
# major axis length
major_axis = ski_morph[0]['major_axis_length'] major_axis = ski_morph[0]['major_axis_length']
# minor axis length
minor_axis = ski_morph[0]['minor_axis_length'] minor_axis = ski_morph[0]['minor_axis_length']
# The mesh calculation below fails if an edge of the segmentation is right up against the # The mesh calculation below fails if an edge of the segmentation is right up against the
...@@ -152,8 +128,8 @@ def morphology_row_features(mask, scale): ...@@ -152,8 +128,8 @@ def morphology_row_features(mask, scale):
edt = distance_transform_edt(mask, sampling=scale, return_distances=True) edt = distance_transform_edt(mask, sampling=scale, return_distances=True)
max_radius = np.max(edt) max_radius = np.max(edt)
return (volume_in_microns, extent, convex_area, equiv_diameter, major_axis, return (volume_in_microns, extent, equiv_diameter, major_axis,
minor_axis, solidity, surface_area, sphericity, max_radius) minor_axis, surface_area, sphericity, max_radius)
def intensity_row_features(raw, mask): def intensity_row_features(raw, mask):
...@@ -207,7 +183,7 @@ def texture_row_features(raw, mask): ...@@ -207,7 +183,7 @@ def texture_row_features(raw, mask):
except ValueError: except ValueError:
log('Texture computation failed - can happen when using ignore_zeros') log('Texture computation failed - can happen when using ignore_zeros')
hara = (0.,)*13 hara = (0.,) * 13
return tuple(hara) return tuple(hara)
...@@ -226,31 +202,25 @@ def radial_distribution(edt, mask, stops=(0.0, 0.25, 0.5, 0.75, 1.0)): ...@@ -226,31 +202,25 @@ def radial_distribution(edt, mask, stops=(0.0, 0.25, 0.5, 0.75, 1.0)):
return result return result
def chromatin_row_features(chromatin, edt, raw, scale_chromatin, scale_raw): def chromatin_row_features(chromatin, edt, raw, scale_chromatin):
"""
:param chromatin: numpy ndarray mask, boolean
:param raw:
:param scale:
:return:
"""
result = () result = ()
result += morphology_row_features(chromatin, scale_chromatin) result += morphology_row_features(chromatin, scale_chromatin)
# edt stats, dropping the total value # edt stats, dropping the total value
result += intensity_row_features(edt, chromatin)[:-1] result += intensity_row_features(edt, chromatin)[:-1]
result += radial_distribution(edt, chromatin) result += radial_distribution(edt, chromatin)
# resize the chromatin masks if not same size as raw if raw is not None:
if chromatin.shape != raw.shape:
chromatin = resize(chromatin, raw.shape, # resize the chromatin masks if not same size as raw
order=0, mode='reflect', if chromatin.shape != raw.shape:
anti_aliasing=True, preserve_range=True).astype('bool') chromatin = resize(chromatin, raw.shape,
order=0, mode='reflect',
anti_aliasing=True, preserve_range=True).astype('bool')
result += intensity_row_features(raw, chromatin) result += intensity_row_features(raw, chromatin)
result += texture_row_features(raw, chromatin) result += texture_row_features(raw, chromatin)
return result return result
...@@ -299,11 +269,7 @@ def morphology_features_for_label_range(table, ds, ds_raw, ...@@ -299,11 +269,7 @@ def morphology_features_for_label_range(table, ds, ds_raw,
# remove nucleus area form seg_mask # remove nucleus area form seg_mask
seg_mask[exclude] = False seg_mask[exclude] = False
# just testing # compute the intensity features from raw data and segmentation mask
from skimage.io import imsave
imsave('Z:\\Kimberly\\temp\\test_ex' + str(row.label_id) + '.tiff', seg_mask.astype('uint8'))
# compute the intensiry features from raw data and segmentation mask
if ds_raw is not None: if ds_raw is not None:
raw = load_data(ds_raw, row, scale_factor_raw) raw = load_data(ds_raw, row, scale_factor_raw)
# resize the segmentation mask if it does not fit the raw data # resize the segmentation mask if it does not fit the raw data
...@@ -322,11 +288,9 @@ def morphology_features_for_label_range(table, ds, ds_raw, ...@@ -322,11 +288,9 @@ def morphology_features_for_label_range(table, ds, ds_raw,
heterochromatin = chromatin == label_id + 12000 heterochromatin = chromatin == label_id + 12000
euchromatin = chromatin == label_id euchromatin = chromatin == label_id
# if no chromatin seg, skip this label-id (may want to change so can keep rest of nuclear features # skip if no chromatin segmentation
# if only the chromatin part fails?
total_heterochromatin = heterochromatin.sum() total_heterochromatin = heterochromatin.sum()
total_euchromatin = euchromatin.sum() total_euchromatin = euchromatin.sum()
if total_heterochromatin == 0 and total_euchromatin.sum() == 0: if total_heterochromatin == 0 and total_euchromatin.sum() == 0:
continue continue
...@@ -335,14 +299,16 @@ def morphology_features_for_label_range(table, ds, ds_raw, ...@@ -335,14 +299,16 @@ def morphology_features_for_label_range(table, ds, ds_raw,
edt = distance_transform_edt(whole_nucleus, sampling=scale_factor_chromatin, return_distances=True) edt = distance_transform_edt(whole_nucleus, sampling=scale_factor_chromatin, return_distances=True)
edt = edt / np.max(edt) edt = edt / np.max(edt)
if total_heterochromatin != 0: if ds_raw is None:
result += chromatin_row_features(heterochromatin, edt, raw, scale_factor_chromatin, scale_factor_raw) raw = None
if total_heterochromatin != 0:
result += chromatin_row_features(heterochromatin, edt, raw, scale_factor_chromatin)
else: else:
result += (0.,) * 36 result += (0.,) * 36
if total_euchromatin != 0: if total_euchromatin != 0:
result += chromatin_row_features(euchromatin, edt, raw, scale_factor_chromatin, scale_factor_raw) result += chromatin_row_features(euchromatin, edt, raw, scale_factor_chromatin)
else: else:
result += (0.,) * 36 result += (0.,) * 36
...@@ -405,8 +371,8 @@ def compute_morphology_features(table, segmentation_path, raw_path, ...@@ -405,8 +371,8 @@ def compute_morphology_features(table, segmentation_path, raw_path,
# convert to pandas table and add column names # convert to pandas table and add column names
stats = pd.DataFrame(stats) stats = pd.DataFrame(stats)
columns = ['label_id'] columns = ['label_id']
morph_columns = ['shape_volume_in_microns', 'shape_extent', 'shape_convex_area', 'shape_equiv_diameter', morph_columns = ['shape_volume_in_microns', 'shape_extent', 'shape_equiv_diameter',
'shape_major_axis', 'shape_minor_axis', 'shape_solidity', 'shape_surface_area', 'shape_sphericity', 'shape_major_axis', 'shape_minor_axis', 'shape_surface_area', 'shape_sphericity',
'shape_max_radius'] 'shape_max_radius']
columns += morph_columns columns += morph_columns
...@@ -414,23 +380,25 @@ def compute_morphology_features(table, segmentation_path, raw_path, ...@@ -414,23 +380,25 @@ def compute_morphology_features(table, segmentation_path, raw_path,
if raw_path != '': if raw_path != '':
intensity_columns = ['intensity_mean', 'intensity_st_dev', 'intensity_median', 'intensity_iqr', intensity_columns = ['intensity_mean', 'intensity_st_dev', 'intensity_median', 'intensity_iqr',
'intensity_total'] 'intensity_total']
texture_columns = ['texture_hara' + str(x) for x in range(1, 14)] texture_columns = ['texture_hara%s' % x for x in range(1, 14)]
columns += intensity_columns columns += intensity_columns
# radial intensity columns # radial intensity columns
for val in [25, 50, 75, 100]: for val in [25, 50, 75, 100]:
columns += [var + '_' + str(val) for var in intensity_columns] columns += ['%s_%s' % (var, val) for var in intensity_columns]
columns += texture_columns columns += texture_columns
if chromatin_path != '': if chromatin_path != '':
edt_columns = ['shape_edt_mean', 'shape_edt_stdev', 'shape_edt_median', 'shape_edt_iqr'] edt_columns = ['shape_edt_mean', 'shape_edt_stdev', 'shape_edt_median', 'shape_edt_iqr']
edt_columns += ['shape_%_' + str(var) for var in [25, 50, 75, 100]] edt_columns += ['shape_percent_%s' % var for var in [25, 50, 75, 100]]
for phase in ['_het', '_eu']: for phase in ['_het', '_eu']:
columns += [var + phase for var in morph_columns] columns += [var + phase for var in morph_columns]
columns += [var + phase for var in edt_columns] columns += [var + phase for var in edt_columns]
columns += [var + phase for var in intensity_columns]
columns += [var + phase for var in texture_columns] if raw_path != '':
columns += [var + phase for var in intensity_columns]
columns += [var + phase for var in texture_columns]
stats.columns = columns stats.columns = columns
return stats return stats
...@@ -455,18 +423,25 @@ def morphology_impl(segmentation_path, raw_path, chromatin_path, ...@@ -455,18 +423,25 @@ def morphology_impl(segmentation_path, raw_path, chromatin_path,
segmentation_path [str] - path to segmentation stored as h5. segmentation_path [str] - path to segmentation stored as h5.
raw_path [str] - path to raw data stored as h5. raw_path [str] - path to raw data stored as h5.
Pass 'None' if you don't want to compute features based on raw data. Pass 'None' if you don't want to compute features based on raw data.
chromatin_path [str] - path to chromatin segmentation data stored as h5.
Pass 'None' if you don't want to compute features based on chromatin.
exclude_nuc_path [str] - path to nucleus segmentation data stored as h5.
Pass 'None' if you don't want to use the nucleus segmentation as a mask to exclude this region.
table [pd.DataFrame] - table with default attributes table [pd.DataFrame] - table with default attributes
(sizes, center of mass and bounding boxes) for segmentation (sizes, center of mass and bounding boxes) for segmentation
mapping_path [str] - path to - path to nucleus id mapping. mapping_path [str] - path to - path to nucleus id mapping.
Pass 'None' if not relevant. Pass 'None' if not relevant.
region_mapping_path [str] - path to - path to cellid to region mapping region_mapping_path [str] - path to - path to cellid to region mapping
Pass 'None' if not relevant Pass 'None' if not relevant
min_size [int] - minimal size for objects used in calcualtion min_size [int] - minimal size for objects used in calculation
max_size [int] - maximum size for objects used in calcualtion max_size [int] - maximum size for objects used in calculation
max_bb [int] - maximum total volume of bounding box for objects used in calculation
resolution [listlike] - resolution in nanometer. resolution [listlike] - resolution in nanometer.
Must be given in [Z, Y, X]. Must be given in [Z, Y, X].
raw_scale [int] - scale level of the raw data raw_scale [int] - scale level of the raw data
seg_scale [int] - scale level of the segmentation seg_scale [int] - scale level of the segmentation
chromatin_scale [int] - scale level of the chromatin segmentation
exclude_nuc_scale [int] - scale level of the nucleus segmentation
label_starts [listlike] - list with label start positions label_starts [listlike] - list with label start positions
label_stops [listlike] - list with label stop positions label_stops [listlike] - list with label stop positions
""" """
...@@ -495,7 +470,7 @@ def morphology_impl(segmentation_path, raw_path, chromatin_path, ...@@ -495,7 +470,7 @@ def morphology_impl(segmentation_path, raw_path, chromatin_path,
log("Don't have raw path; do not compute intensity features") log("Don't have raw path; do not compute intensity features")
raw_resolution = scale_factor_raw = None raw_resolution = scale_factor_raw = None
# get scale fractor for chromatin (if it's given) # get scale factor for chromatin (if it's given)
if chromatin_path != '': if chromatin_path != '':
log("Have chromatin path; compute chromatin features") log("Have chromatin path; compute chromatin features")
# NOTE for now we can hard-code the resolution for the chromatin data here, # NOTE for now we can hard-code the resolution for the chromatin data here,
...@@ -522,7 +497,7 @@ def morphology_impl(segmentation_path, raw_path, chromatin_path, ...@@ -522,7 +497,7 @@ def morphology_impl(segmentation_path, raw_path, chromatin_path,
# remove zero label if it exists # remove zero label if it exists
table = table.loc[table['label_id'] != 0, :] table = table.loc[table['label_id'] != 0, :]
# if we have a mappin, only keep objects in the mapping # if we have a mapping, only keep objects in the mapping
# (i.e cells that have assigned nuclei) # (i.e cells that have assigned nuclei)
if mapping_path != '': if mapping_path != '':
log("Have mapping path %s" % mapping_path) log("Have mapping path %s" % mapping_path)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment