Skip to content
Snippets Groups Projects
Commit 8c04dd44 authored by Kimberly Isobel Meechan's avatar Kimberly Isobel Meechan
Browse files

More cell features added - rough

parent 8c34447f
No related branches found
No related tags found
1 merge request!9Morphology
from datetime import datetime from datetime import datetime
#TODO - uncomment this part # TODO - uncomment this part
# this is a task called by multiple processes, # this is a task called by multiple processes,
# so we need to restrict the number of threads used by numpy # so we need to restrict the number of threads used by numpy
#from cluster_tools.utils.numpy_utils import set_numpy_threads # from cluster_tools.utils.numpy_utils import set_numpy_threads
#set_numpy_threads(1) # set_numpy_threads(1)
from tmp.numpy_utils import set_numpy_threads
set_numpy_threads(1)
import numpy as np import numpy as np
import h5py import h5py
...@@ -44,16 +47,46 @@ def filter_table(table, min_size, max_size): ...@@ -44,16 +47,46 @@ def filter_table(table, min_size, max_size):
table = table.loc[criteria, :] table = table.loc[criteria, :]
return table return table
# some cell segmentations have a sensible total pixel size but very large bounding boxes i.e. they are small spots
# distributed over a large region, not one cell > filter for these cases by capping the bounding box size
def filter_table_bb(table, max_bb):
total_bb = (table['bb_max_z'] - table['bb_min_z']) * (table['bb_max_y'] - table['bb_min_y']) * (
table['bb_max_x'] - table['bb_min_x'])
table = table.loc[total_bb < max_bb, :]
return table
def filter_table_from_mapping(table, mapping_path): def filter_table_from_mapping(table, mapping_path):
# read in numpy array of mapping of cells to nuclei - first column cell id, second nucleus id # read in numpy array of mapping of cells to nuclei - first column cell id, second nucleus id
mapping = np.genfromtxt(mapping_path, skip_header=1, delimiter='\t')[:, :2].astype('uint64') mapping = pd.read_csv(mapping_path, sep='\t')
# mapping = np.genfromtxt(mapping_path, skip_header=1, delimiter='\t')[:, :2].astype('uint64')
# remove zero labels from this table too, if exist # remove zero labels from this table too, if exist
mapping = mapping[np.logical_and(mapping[:, 0] != 0, mapping = mapping.loc[np.logical_and(mapping.iloc[:, 0] != 0,
mapping[:, 1] != 0)] mapping.iloc[:, 1] != 0), :]
table = table.loc[np.isin(table['label_id'], mapping[:, 0]), :] table = table.loc[np.isin(table['label_id'], mapping['label_id']), :]
# add a column for the id of the mapped nucleus (use this later to exclude the area covered by the nucleus)
table = table.join(mapping.set_index('label_id'), on='label_id', how='left')
return table
def filter_table_region(table, region_path, regions=('empty', 'yolk', 'neuropil', 'cuticle')):
# regions here are regions to exclude
region_mapping = pd.read_csv(region_path, sep='\t')
# remove zero label if it exists
region_mapping = region_mapping.loc[region_mapping['label_id'] != 0, :]
for region in regions:
region_mapping = region_mapping.loc[region_mapping[region] == 0, :]
table = table.loc[np.isin(table['label_id'], region_mapping['label_id']), :]
# TODO - ADD a column for the nucleus id from the mapping
return table return table
...@@ -69,7 +102,6 @@ def load_data(ds, row, scale): ...@@ -69,7 +102,6 @@ def load_data(ds, row, scale):
def morphology_row_features(mask, scale): def morphology_row_features(mask, scale):
# Calculate stats from skimage # Calculate stats from skimage
ski_morph = regionprops(mask.astype('uint8')) ski_morph = regionprops(mask.astype('uint8'))
...@@ -82,8 +114,17 @@ def morphology_row_features(mask, scale): ...@@ -82,8 +114,17 @@ def morphology_row_features(mask, scale):
# extent # extent
extent = ski_morph[0]['extent'] extent = ski_morph[0]['extent']
# volume of convex hull try:
convex_area = ski_morph[0]['convex_area'] # volume of convex hull
convex_area = ski_morph[0]['convex_area']
# solidity
solidity = ski_morph[0]['solidity']
except MemoryError:
log("Too large to compute convex hull")
convex_area = 0
solidity = 0
# equivalent diameter # equivalent diameter
equiv_diameter = ski_morph[0]['equivalent_diameter'] equiv_diameter = ski_morph[0]['equivalent_diameter']
...@@ -94,9 +135,6 @@ def morphology_row_features(mask, scale): ...@@ -94,9 +135,6 @@ def morphology_row_features(mask, scale):
# minor axis length # minor axis length
minor_axis = ski_morph[0]['minor_axis_length'] minor_axis = ski_morph[0]['minor_axis_length']
# solidity
solidity = ski_morph[0]['solidity']
# The mesh calculation below fails if an edge of the segmentation is right up against the # The mesh calculation below fails if an edge of the segmentation is right up against the
# edge of the volume - gives an open, rather than a closed surface # edge of the volume - gives an open, rather than a closed surface
# Pad by a few pixels to avoid this # Pad by a few pixels to avoid this
...@@ -108,7 +146,7 @@ def morphology_row_features(mask, scale): ...@@ -108,7 +146,7 @@ def morphology_row_features(mask, scale):
# sphericity (as in morpholibj) # sphericity (as in morpholibj)
# Should run from zero to one # Should run from zero to one
sphericity = (36*np.pi*(float(volume_in_microns)**2))/(float(surface_area)**3) sphericity = (36 * np.pi * (float(volume_in_microns) ** 2)) / (float(surface_area) ** 3)
# max radius - max distance from pixel to outside # max radius - max distance from pixel to outside
edt = distance_transform_edt(mask, sampling=scale, return_distances=True) edt = distance_transform_edt(mask, sampling=scale, return_distances=True)
...@@ -132,7 +170,8 @@ def intensity_row_features(raw, mask): ...@@ -132,7 +170,8 @@ def intensity_row_features(raw, mask):
return mean_intensity, st_dev, median_intensity, interquartile_range_intensity, total return mean_intensity, st_dev, median_intensity, interquartile_range_intensity, total
def radial_intensity_row_features(raw, mask, scale, stops = (0.0, 0.25, 0.5, 0.75, 1.0)):
def radial_intensity_row_features(raw, mask, scale, stops=(0.0, 0.25, 0.5, 0.75, 1.0)):
result = () result = ()
edt = distance_transform_edt(mask, sampling=scale, return_distances=True) edt = distance_transform_edt(mask, sampling=scale, return_distances=True)
...@@ -152,6 +191,7 @@ def radial_intensity_row_features(raw, mask, scale, stops = (0.0, 0.25, 0.5, 0.7 ...@@ -152,6 +191,7 @@ def radial_intensity_row_features(raw, mask, scale, stops = (0.0, 0.25, 0.5, 0.7
def texture_row_features(raw, mask): def texture_row_features(raw, mask):
# errors if there are small, isolated spots (because I'm using ignore zeros as true) # errors if there are small, isolated spots (because I'm using ignore zeros as true)
# so here remove components that are < 10 pixels # so here remove components that are < 10 pixels
# may still error in some cases
labelled = label(mask) labelled = label(mask)
if len(np.unique(labelled)) > 2: if len(np.unique(labelled)) > 2:
labelled = remove_small_objects(labelled, min_size=10) labelled = remove_small_objects(labelled, min_size=10)
...@@ -162,11 +202,17 @@ def texture_row_features(raw, mask): ...@@ -162,11 +202,17 @@ def texture_row_features(raw, mask):
raw_copy = raw.copy() raw_copy = raw.copy()
raw_copy[mask == 0] = 0 raw_copy[mask == 0] = 0
hara = haralick(raw_copy, ignore_zeros=True, return_mean=True, distance=2) try:
hara = haralick(raw_copy, ignore_zeros=True, return_mean=True, distance=2)
except ValueError:
log('Texture computation failed - can happen when using ignore_zeros')
hara = (0.,)*13
return tuple(hara) return tuple(hara)
def radial_distribution (edt, mask, stops = (0.0, 0.25, 0.5, 0.75, 1.0)):
def radial_distribution(edt, mask, stops=(0.0, 0.25, 0.5, 0.75, 1.0)):
result = () result = ()
bottoms = stops[0:len(stops) - 1] bottoms = stops[0:len(stops) - 1]
...@@ -179,7 +225,8 @@ def radial_distribution (edt, mask, stops = (0.0, 0.25, 0.5, 0.75, 1.0)): ...@@ -179,7 +225,8 @@ def radial_distribution (edt, mask, stops = (0.0, 0.25, 0.5, 0.75, 1.0)):
return result return result
def chromatin_row_features (chromatin, edt, raw, scale_chromatin, scale_raw):
def chromatin_row_features(chromatin, edt, raw, scale_chromatin, scale_raw):
""" """
:param chromatin: numpy ndarray mask, boolean :param chromatin: numpy ndarray mask, boolean
...@@ -199,8 +246,8 @@ def chromatin_row_features (chromatin, edt, raw, scale_chromatin, scale_raw): ...@@ -199,8 +246,8 @@ def chromatin_row_features (chromatin, edt, raw, scale_chromatin, scale_raw):
# resize the chromatin masks if not same size as raw # resize the chromatin masks if not same size as raw
if chromatin.shape != raw.shape: if chromatin.shape != raw.shape:
chromatin = resize(chromatin, raw.shape, chromatin = resize(chromatin, raw.shape,
order=0, mode='reflect', order=0, mode='reflect',
anti_aliasing=True, preserve_range=True).astype('bool') anti_aliasing=True, preserve_range=True).astype('bool')
result += intensity_row_features(raw, chromatin) result += intensity_row_features(raw, chromatin)
result += texture_row_features(raw, chromatin) result += texture_row_features(raw, chromatin)
...@@ -211,13 +258,16 @@ def chromatin_row_features (chromatin, edt, raw, scale_chromatin, scale_raw): ...@@ -211,13 +258,16 @@ def chromatin_row_features (chromatin, edt, raw, scale_chromatin, scale_raw):
# compute morphology (and intensity features) for label range # compute morphology (and intensity features) for label range
def morphology_features_for_label_range(table, ds, ds_raw, def morphology_features_for_label_range(table, ds, ds_raw,
ds_chromatin, ds_chromatin,
ds_exclude,
scale_factor_seg, scale_factor_raw, scale_factor_seg, scale_factor_raw,
scale_factor_chromatin, scale_factor_chromatin,
scale_factor_exclude,
label_begin, label_end): label_begin, label_end):
label_range = np.logical_and(table['label_id'] >= label_begin, table['label_id'] < label_end) label_range = np.logical_and(table['label_id'] >= label_begin, table['label_id'] < label_end)
sub_table = table.loc[label_range, :] sub_table = table.loc[label_range, :]
stats = [] stats = []
for row in sub_table.itertuples(index=False): for row in sub_table.itertuples(index=False):
log(str(row.label_id))
label_id = int(row.label_id) label_id = int(row.label_id)
# load the segmentation data from the bounding box corresponding # load the segmentation data from the bounding box corresponding
...@@ -234,7 +284,24 @@ def morphology_features_for_label_range(table, ds, ds_raw, ...@@ -234,7 +284,24 @@ def morphology_features_for_label_range(table, ds, ds_raw,
# compute the morphology features from the segmentation mask # compute the morphology features from the segmentation mask
result = (float(label_id),) + morphology_row_features(seg_mask, scale_factor_seg) result = (float(label_id),) + morphology_row_features(seg_mask, scale_factor_seg)
# TODO - if exclude, remove that part form mask before continuing, use added nucleus_id column if ds_exclude is not None:
exclude = load_data(ds_exclude, row, scale_factor_exclude)
# resize to fit seg
if exclude.shape != seg_mask.shape:
exclude = resize(exclude, seg_mask.shape,
order=0, mode='reflect',
anti_aliasing=True, preserve_range=True).astype('bool')
# binary for correct nucleus
exclude = exclude == int(row.nucleus_id)
# remove nucleus area form seg_mask
seg_mask[exclude] = False
# just testing
from skimage.io import imsave
imsave('Z:\\Kimberly\\temp\\test_ex' + str(row.label_id) + '.tiff', seg_mask.astype('uint8'))
# compute the intensiry features from raw data and segmentation mask # compute the intensiry features from raw data and segmentation mask
if ds_raw is not None: if ds_raw is not None:
...@@ -266,31 +333,31 @@ def morphology_features_for_label_range(table, ds, ds_raw, ...@@ -266,31 +333,31 @@ def morphology_features_for_label_range(table, ds, ds_raw,
# euclidean distance transform for whole nucleus, normalised to run from 0 to 1 # euclidean distance transform for whole nucleus, normalised to run from 0 to 1
whole_nucleus = np.logical_or(heterochromatin, euchromatin) whole_nucleus = np.logical_or(heterochromatin, euchromatin)
edt = distance_transform_edt(whole_nucleus, sampling=scale_factor_chromatin, return_distances=True) edt = distance_transform_edt(whole_nucleus, sampling=scale_factor_chromatin, return_distances=True)
edt = edt/np.max(edt) edt = edt / np.max(edt)
if total_heterochromatin != 0: if total_heterochromatin != 0:
result += chromatin_row_features(heterochromatin, edt, raw, scale_factor_chromatin, scale_factor_raw) result += chromatin_row_features(heterochromatin, edt, raw, scale_factor_chromatin, scale_factor_raw)
else: else:
result += (0.,)*36 result += (0.,) * 36
if total_euchromatin != 0: if total_euchromatin != 0:
result += chromatin_row_features(euchromatin, edt, raw, scale_factor_chromatin, scale_factor_raw) result += chromatin_row_features(euchromatin, edt, raw, scale_factor_chromatin, scale_factor_raw)
else: else:
result += (0.,)*36 result += (0.,) * 36
stats.append(result) stats.append(result)
return stats return stats
def compute_morphology_features(table, segmentation_path, raw_path, def compute_morphology_features(table, segmentation_path, raw_path,
chromatin_path, chromatin_path, exclude_nuc_path,
seg_key, raw_key, chromatin_key, seg_key, raw_key, chromatin_key,
exclude_key,
scale_factor_seg, scale_factor_raw, scale_factor_seg, scale_factor_raw,
scale_factor_chromatin, scale_factor_chromatin,
scale_factor_exclude,
label_starts, label_stops): label_starts, label_stops):
if raw_path != '': if raw_path != '':
assert raw_key is not None and scale_factor_raw is not None assert raw_key is not None and scale_factor_raw is not None
f_raw = h5py.File(raw_path, 'r') f_raw = h5py.File(raw_path, 'r')
...@@ -305,8 +372,13 @@ def compute_morphology_features(table, segmentation_path, raw_path, ...@@ -305,8 +372,13 @@ def compute_morphology_features(table, segmentation_path, raw_path,
else: else:
f_chromatin = ds_chromatin = None f_chromatin = ds_chromatin = None
if exclude_nuc_path != '':
assert exclude_key is not None and scale_factor_exclude is not None
f_exclude = h5py.File(exclude_nuc_path, 'r')
ds_exclude = f_exclude[exclude_key]
# TODO - opening of exclude dataset else:
f_exclude = ds_exclude = None
with h5py.File(segmentation_path, 'r') as f: with h5py.File(segmentation_path, 'r') as f:
ds = f[seg_key] ds = f[seg_key]
...@@ -316,8 +388,10 @@ def compute_morphology_features(table, segmentation_path, raw_path, ...@@ -316,8 +388,10 @@ def compute_morphology_features(table, segmentation_path, raw_path,
log("Computing features from label-id %i to %i" % (label_a, label_b)) log("Computing features from label-id %i to %i" % (label_a, label_b))
stats.extend(morphology_features_for_label_range(table, ds, ds_raw, stats.extend(morphology_features_for_label_range(table, ds, ds_raw,
ds_chromatin, ds_chromatin,
ds_exclude,
scale_factor_seg, scale_factor_raw, scale_factor_seg, scale_factor_raw,
scale_factor_chromatin, scale_factor_chromatin,
scale_factor_exclude,
label_a, label_b)) label_a, label_b))
if f_raw is not None: if f_raw is not None:
f_raw.close() f_raw.close()
...@@ -325,6 +399,9 @@ def compute_morphology_features(table, segmentation_path, raw_path, ...@@ -325,6 +399,9 @@ def compute_morphology_features(table, segmentation_path, raw_path,
if f_chromatin is not None: if f_chromatin is not None:
f_chromatin.close() f_chromatin.close()
if f_exclude is not None:
f_exclude.close()
# convert to pandas table and add column names # convert to pandas table and add column names
stats = pd.DataFrame(stats) stats = pd.DataFrame(stats)
columns = ['label_id'] columns = ['label_id']
...@@ -335,8 +412,9 @@ def compute_morphology_features(table, segmentation_path, raw_path, ...@@ -335,8 +412,9 @@ def compute_morphology_features(table, segmentation_path, raw_path,
columns += morph_columns columns += morph_columns
if raw_path != '': if raw_path != '':
intensity_columns = ['intensity_mean', 'intensity_st_dev', 'intensity_median', 'intensity_iqr', 'intensity_total'] intensity_columns = ['intensity_mean', 'intensity_st_dev', 'intensity_median', 'intensity_iqr',
texture_columns = ['texture_hara' + str(x) for x in range(1,14)] 'intensity_total']
texture_columns = ['texture_hara' + str(x) for x in range(1, 14)]
columns += intensity_columns columns += intensity_columns
# radial intensity columns # radial intensity columns
...@@ -359,10 +437,13 @@ def compute_morphology_features(table, segmentation_path, raw_path, ...@@ -359,10 +437,13 @@ def compute_morphology_features(table, segmentation_path, raw_path,
def morphology_impl(segmentation_path, raw_path, chromatin_path, def morphology_impl(segmentation_path, raw_path, chromatin_path,
exclude_nuc_path,
table, mapping_path, table, mapping_path,
region_mapping_path,
min_size, max_size, min_size, max_size,
max_bb,
resolution, raw_scale, seg_scale, resolution, raw_scale, seg_scale,
chromatin_scale, chromatin_scale, exclude_nuc_scale,
label_starts, label_stops): label_starts, label_stops):
""" Compute morphology features for a segmentation. """ Compute morphology features for a segmentation.
...@@ -378,6 +459,8 @@ def morphology_impl(segmentation_path, raw_path, chromatin_path, ...@@ -378,6 +459,8 @@ def morphology_impl(segmentation_path, raw_path, chromatin_path,
(sizes, center of mass and bounding boxes) for segmentation (sizes, center of mass and bounding boxes) for segmentation
mapping_path [str] - path to - path to nucleus id mapping. mapping_path [str] - path to - path to nucleus id mapping.
Pass 'None' if not relevant. Pass 'None' if not relevant.
region_mapping_path [str] - path to - path to cellid to region mapping
Pass 'None' if not relevant
min_size [int] - minimal size for objects used in calcualtion min_size [int] - minimal size for objects used in calcualtion
max_size [int] - maximum size for objects used in calcualtion max_size [int] - maximum size for objects used in calcualtion
resolution [listlike] - resolution in nanometer. resolution [listlike] - resolution in nanometer.
...@@ -395,6 +478,8 @@ def morphology_impl(segmentation_path, raw_path, chromatin_path, ...@@ -395,6 +478,8 @@ def morphology_impl(segmentation_path, raw_path, chromatin_path,
raw_key = 't00000/s00/%i/cells' % raw_scale raw_key = 't00000/s00/%i/cells' % raw_scale
chromatin_key_full = 't00000/s00/0/cells' chromatin_key_full = 't00000/s00/0/cells'
chromatin_key = 't00000/s00/%i/cells' % chromatin_scale chromatin_key = 't00000/s00/%i/cells' % chromatin_scale
exclude_key_full = 't00000/s00/0/cells'
exclude_key = 't00000/s00/%i/cells' % exclude_nuc_scale
# get scale factor for the segmentation # get scale factor for the segmentation
scale_factor_seg = get_scale_factor(segmentation_path, seg_key_full, seg_key, resolution) scale_factor_seg = get_scale_factor(segmentation_path, seg_key_full, seg_key, resolution)
...@@ -416,11 +501,24 @@ def morphology_impl(segmentation_path, raw_path, chromatin_path, ...@@ -416,11 +501,24 @@ def morphology_impl(segmentation_path, raw_path, chromatin_path,
# NOTE for now we can hard-code the resolution for the chromatin data here, # NOTE for now we can hard-code the resolution for the chromatin data here,
# but we might need to change this if we get additional dataset(s) # but we might need to change this if we get additional dataset(s)
chromatin_resolution = [0.025, 0.02, 0.02] chromatin_resolution = [0.025, 0.02, 0.02]
scale_factor_chromatin = get_scale_factor(chromatin_path, chromatin_key_full, chromatin_key, chromatin_resolution) scale_factor_chromatin = get_scale_factor(chromatin_path, chromatin_key_full, chromatin_key,
chromatin_resolution)
else: else:
log("Don't have chromatin path; do not compute chromatin features") log("Don't have chromatin path; do not compute chromatin features")
chromatin_resolution = scale_factor_chromatin = None chromatin_resolution = scale_factor_chromatin = None
# get scale factor for nuclei segmentation (to be used to exclude nucleus) if given
if exclude_nuc_path != '':
log("Have nucleus path; exclude nucleus for intensity measures")
# NOTE for now we can hard-code the resolution for the nuclei data here,
# but we might need to change this if we get additional dataset(s)
exclude_resolution = [0.1, 0.08, 0.08]
scale_factor_exclude = get_scale_factor(exclude_nuc_path, exclude_key_full, exclude_key,
exclude_resolution)
else:
log("Don't have exclude path; don't exclude nucleus area for intensity measures")
scale_factor_exclude = scale_factor_exclude = None
# remove zero label if it exists # remove zero label if it exists
table = table.loc[table['label_id'] != 0, :] table = table.loc[table['label_id'] != 0, :]
...@@ -430,18 +528,29 @@ def morphology_impl(segmentation_path, raw_path, chromatin_path, ...@@ -430,18 +528,29 @@ def morphology_impl(segmentation_path, raw_path, chromatin_path,
log("Have mapping path %s" % mapping_path) log("Have mapping path %s" % mapping_path)
table = filter_table_from_mapping(table, mapping_path) table = filter_table_from_mapping(table, mapping_path)
log("Number of labels after filter with mapping: %i" % table.shape[0]) log("Number of labels after filter with mapping: %i" % table.shape[0])
if region_mapping_path != '':
log("Have region mapping path %s" % region_mapping_path)
table = filter_table_region(table, region_mapping_path)
log("Number of labels after region filter: %i" % table.shape[0])
# filter by size # filter by size
table = filter_table(table, min_size, max_size) table = filter_table(table, min_size, max_size)
log("Number of labels after size filte: %i" % table.shape[0]) log("Number of labels after size filter: %i" % table.shape[0])
# TODO - add filtering for region - just add another path like the mapping_path one # filter by bounding box size
if max_bb is not None:
table = filter_table_bb(table, max_bb)
log("Number of labels after bounding box size filter %i" % table.shape[0])
log("Computing morphology features") log("Computing morphology features")
stats = compute_morphology_features(table, segmentation_path, raw_path, stats = compute_morphology_features(table, segmentation_path, raw_path,
chromatin_path, chromatin_path, exclude_nuc_path,
seg_key, raw_key, chromatin_key, seg_key, raw_key, chromatin_key,
exclude_key,
scale_factor_seg, scale_factor_raw, scale_factor_seg, scale_factor_raw,
scale_factor_chromatin, scale_factor_chromatin,
scale_factor_exclude,
label_starts, label_stops) label_starts, label_stops)
return stats return stats
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment