Skip to content
Snippets Groups Projects
Commit 7fe3e58d authored by Constantin Pape's avatar Constantin Pape
Browse files

Refactor morphology feature implementation in separate file

parent 122d78b0
No related branches found
No related tags found
No related merge requests found
......@@ -4,23 +4,15 @@ import os
import sys
import json
# this is a task called by multiple processes,
# so we need to restrict the number of threads used by numpy
from cluster_tools.utils.numpy_utils import set_numpy_threads
set_numpy_threads(1)
import numpy as np
import luigi
import pandas as pd
import nifty.tools as nt
from skimage.measure import regionprops, marching_cubes_lewiner, mesh_surface_area
from skimage.transform import resize
from skimage.util import pad
import pandas as pd
import cluster_tools.utils.volume_utils as vu
import cluster_tools.utils.function_utils as fu
from cluster_tools.utils.task_utils import DummyTask
from cluster_tools.cluster_tasks import SlurmTask, LocalTask
from scripts.extension.attributes.morphology_impl import morphology_impl
#
# Morphology Attribute Tasks
......@@ -116,161 +108,6 @@ class MorphologySlurm(MorphologyBase, SlurmTask):
#
# get shape of full data & downsampling factor
def get_scale_factor(path, key_full, key, resolution):
with vu.file_reader(path, 'r') as f:
full_shape = f[key_full].shape
shape = f[key].shape
# scale factor for downsampling
scale_factor = [res * (fs / sh)
for res, fs, sh in zip(resolution,
full_shape,
shape)]
return scale_factor
def filter_table(table, min_size, max_size):
if max_size is None:
table = table.loc[table['n_pixels'] >= min_size, :]
else:
criteria = np.logical_and(table['n_pixels'] > min_size, table['n_pixels'] < max_size)
table = table.loc[criteria, :]
return table
def filter_table_from_mapping(table, mapping_path):
# read in numpy array of mapping of cells to nuclei - first column cell id, second nucleus id
mapping = np.genfromtxt(mapping_path, skip_header=1, delimiter='\t')[:, :2].astype('uint64')
# remove zero labels from this table too, if exist
mapping = mapping[np.logical_and(mapping[:, 0] != 0,
mapping[:, 1] != 0)]
table = table.loc[np.isin(table['label_id'], mapping[:, 0]), :]
return table
def load_data(ds, row, scale):
# compute the bounding box from the row information
mins = [row.bb_min_z, row.bb_min_y, row.bb_min_x]
maxs = [row.bb_max_z, row.bb_max_y, row.bb_max_x]
mins = [int(mi / sca) for mi, sca in zip(mins, scale)]
maxs = [int(ma / sca) + 1 for ma, sca in zip(maxs, scale)]
bb = tuple(slice(mi, ma) for mi, ma in zip(mins, maxs))
# load the data from the bounding box
return ds[bb]
def morphology_row_features(mask, scale):
# Calculate stats from skimage
ski_morph = regionprops(mask.astype('uint8'))
# volume in pixels
volume_in_pix = ski_morph[0]['area']
# extent
extent = ski_morph[0]['extent']
# The mesh calculation below fails if an edge of the segmentation is right up against the
# edge of the volume - gives an open, rather than a closed surface
# Pad by a few pixels to avoid this
mask = pad(mask, 10, mode='constant')
# surface area of mesh around object (other ways to calculate better?)
verts, faces, normals, values = marching_cubes_lewiner(mask, spacing=tuple(scale))
surface_area = mesh_surface_area(verts, faces)
# volume in microns
volume_in_microns = np.prod(scale)*volume_in_pix
# sphericity (as in morpholibj)
# Should run from zero to one
sphericity = (36*np.pi*(float(volume_in_microns)**2))/(float(surface_area)**3)
return [volume_in_microns, extent, surface_area, sphericity]
def intensity_row_features(raw, mask):
intensity_vals_in_mask = raw[mask]
# mean and stdev - use float64 to avoid silent overflow errors
mean_intensity = np.mean(intensity_vals_in_mask, dtype=np.float64)
st_dev = np.std(intensity_vals_in_mask, dtype=np.float64)
return mean_intensity, st_dev
# compute morphology (and intensity features) for label range
def morphology_features_for_label_range(table, ds, ds_raw,
scale_factor_seg, scale_factor_raw,
label_begin, label_end):
label_range = np.logical_and(table['label_id'] >= label_begin, table['label_id'] < label_end)
sub_table = table.loc[label_range, :]
stats = []
for row in sub_table.itertuples(index=False):
label_id = int(row.label_id)
# load the segmentation data from the bounding box corresponding
# to this row
seg = load_data(ds, row, scale_factor_seg)
# compute the segmentation mask and check that we have
# foreground in the mask
seg_mask = seg == label_id
if seg_mask.sum() == 0:
# if the seg mask is empty, we simply skip this label-id
continue
# compute the morphology features from the segmentation mask
result = [float(label_id)] + morphology_row_features(seg_mask, scale_factor_seg)
# compute the intensiry features from raw data and segmentation mask
if ds_raw is not None:
raw = load_data(ds_raw, row, scale_factor_raw)
# resize the segmentation mask if it does not fit the raw data
if seg_mask.shape != raw.shape:
seg_mask = resize(seg_mask, raw.shape,
order=0, mode='reflect',
anti_aliasing=True, preserve_range=True).astype('bool')
result += intensity_row_features(raw, seg_mask)
stats.append(result)
return stats
def compute_morphology_features(table, segmentation_path, raw_path,
seg_key, raw_key,
scale_factor_seg, scale_factor_raw,
blocking, block_list):
if raw_path != '':
assert raw_key is not None and scale_factor_raw is not None
f_raw = vu.file_reader(raw_path, 'r')
ds_raw = f_raw[raw_key]
else:
f_raw = ds_raw = None
with vu.file_reader(segmentation_path, 'r') as f:
ds = f[seg_key]
stats = []
for block_id in block_list:
block = blocking.getBlock(block_id)
label_a, label_b = block.begin[0], block.end[0]
fu.log("Computing features from label-id %i to %i" % (label_a, label_b))
stats.extend(morphology_features_for_label_range(table, ds, ds_raw,
scale_factor_seg, scale_factor_raw,
label_a, label_b))
if f_raw is not None:
f_raw.close()
# convert to pandas table and add column names
stats = pd.DataFrame(stats)
columns = ['label_id',
'shape_volume_in_microns', 'shape_extent', 'shape_surface_area', 'shape_sphericity']
if raw_path != '':
columns += ['intensity_mean', 'intensity_st_dev']
stats.columns = columns
return stats
def morphology(job_id, config_path):
fu.log("start processing job %i" % job_id)
......@@ -294,49 +131,22 @@ def morphology(job_id, config_path):
block_list = config['block_list']
# keys to segmentation and raw data for the different scales
seg_key_full = 't00000/s00/0/cells'
seg_key = 't00000/s00/%i/cells' % seg_scale
raw_key_full = 't00000/s00/0/cells'
raw_key = 't00000/s00/%i/cells' % raw_scale
# get scale factor for the segmentation
scale_factor_seg = get_scale_factor(segmentation_path, seg_key_full, seg_key, resolution)
# get scale factor for raw data (if it's given)
if raw_path != '':
fu.log("Have raw path; compute intensity features")
# NOTE for now we can hard-code the resolution for the raw data here,
# but we might need to change this if we get additional dataset(s)
raw_resolution = [0.025, 0.01, 0.01]
scale_factor_raw = get_scale_factor(raw_path, raw_key_full, raw_key, raw_resolution)
else:
fu.log("Don't have raw path; do not compute intensity features")
raw_resolution = scale_factor_raw = None
# read the base table
table = pd.read_csv(in_table_path, sep='\t')
n_labels = table.shape[0]
fu.log("Initial number of labels: %i" % n_labels)
# remove zero label if it exists
table = table.loc[table['label_id'] != 0, :]
# if we have a mappin, only keep objects in the mapping
# (i.e cells that have assigned nuclei)
if mapping_path != '':
fu.log("Have mapping path %s" % mapping_path)
table = filter_table_from_mapping(table, mapping_path)
fu.log("Number of labels after filter with mapping: %i" % table.shape[0])
# filter by size
table = filter_table(table, min_size, max_size)
fu.log("Number of labels after size filte: %i" % table.shape[0])
# get the label ranges for this job
n_labels = table.shape[0]
blocking = nt.blocking([0], [n_labels], [1000])
fu.log("Computing morphology features")
stats = compute_morphology_features(table, segmentation_path, raw_path,
seg_key, raw_key, scale_factor_seg, scale_factor_raw,
blocking, block_list)
label_starts, label_stops = [], []
for block_id in block_list:
block = blocking.getBlock(block_id)
label_starts.append(block.begin)
label_stops.append(block.end)
stats = morphology_impl(segmentation_path, raw_path, table, mapping_path,
min_size, max_size,
resolution, raw_scale, seg_scale,
label_starts, label_stops)
output_path = output_prefix + '_job%i.csv' % job_id
fu.log("Save result to %s" % output_path)
......
#! /bin/python
# this is a task called by multiple processes,
# so we need to restrict the number of threads used by numpy
from cluster_tools.utils.numpy_utils import set_numpy_threads
set_numpy_threads(1)
import numpy as np
import h5py
import pandas as pd
from skimage.measure import regionprops, marching_cubes_lewiner, mesh_surface_area
from skimage.transform import resize
from skimage.util import pad
# get shape of full data & downsampling factor
def get_scale_factor(path, key_full, key, resolution):
with h5py.File(path, 'r') as f:
full_shape = f[key_full].shape
shape = f[key].shape
# scale factor for downsampling
scale_factor = [res * (fs / sh)
for res, fs, sh in zip(resolution,
full_shape,
shape)]
return scale_factor
def filter_table(table, min_size, max_size):
if max_size is None:
table = table.loc[table['n_pixels'] >= min_size, :]
else:
criteria = np.logical_and(table['n_pixels'] > min_size, table['n_pixels'] < max_size)
table = table.loc[criteria, :]
return table
def filter_table_from_mapping(table, mapping_path):
# read in numpy array of mapping of cells to nuclei - first column cell id, second nucleus id
mapping = np.genfromtxt(mapping_path, skip_header=1, delimiter='\t')[:, :2].astype('uint64')
# remove zero labels from this table too, if exist
mapping = mapping[np.logical_and(mapping[:, 0] != 0,
mapping[:, 1] != 0)]
table = table.loc[np.isin(table['label_id'], mapping[:, 0]), :]
return table
def load_data(ds, row, scale):
# compute the bounding box from the row information
mins = [row.bb_min_z, row.bb_min_y, row.bb_min_x]
maxs = [row.bb_max_z, row.bb_max_y, row.bb_max_x]
mins = [int(mi / sca) for mi, sca in zip(mins, scale)]
maxs = [int(ma / sca) + 1 for ma, sca in zip(maxs, scale)]
bb = tuple(slice(mi, ma) for mi, ma in zip(mins, maxs))
# load the data from the bounding box
return ds[bb]
def morphology_row_features(mask, scale):
# Calculate stats from skimage
ski_morph = regionprops(mask.astype('uint8'))
# volume in pixels
volume_in_pix = ski_morph[0]['area']
# extent
extent = ski_morph[0]['extent']
# The mesh calculation below fails if an edge of the segmentation is right up against the
# edge of the volume - gives an open, rather than a closed surface
# Pad by a few pixels to avoid this
mask = pad(mask, 10, mode='constant')
# surface area of mesh around object (other ways to calculate better?)
verts, faces, normals, values = marching_cubes_lewiner(mask, spacing=tuple(scale))
surface_area = mesh_surface_area(verts, faces)
# volume in microns
volume_in_microns = np.prod(scale)*volume_in_pix
# sphericity (as in morpholibj)
# Should run from zero to one
sphericity = (36*np.pi*(float(volume_in_microns)**2))/(float(surface_area)**3)
return [volume_in_microns, extent, surface_area, sphericity]
def intensity_row_features(raw, mask):
intensity_vals_in_mask = raw[mask]
# mean and stdev - use float64 to avoid silent overflow errors
mean_intensity = np.mean(intensity_vals_in_mask, dtype=np.float64)
st_dev = np.std(intensity_vals_in_mask, dtype=np.float64)
return mean_intensity, st_dev
# compute morphology (and intensity features) for label range
def morphology_features_for_label_range(table, ds, ds_raw,
scale_factor_seg, scale_factor_raw,
label_begin, label_end):
label_range = np.logical_and(table['label_id'] >= label_begin, table['label_id'] < label_end)
sub_table = table.loc[label_range, :]
stats = []
for row in sub_table.itertuples(index=False):
label_id = int(row.label_id)
# load the segmentation data from the bounding box corresponding
# to this row
seg = load_data(ds, row, scale_factor_seg)
# compute the segmentation mask and check that we have
# foreground in the mask
seg_mask = seg == label_id
if seg_mask.sum() == 0:
# if the seg mask is empty, we simply skip this label-id
continue
# compute the morphology features from the segmentation mask
result = [float(label_id)] + morphology_row_features(seg_mask, scale_factor_seg)
# compute the intensiry features from raw data and segmentation mask
if ds_raw is not None:
raw = load_data(ds_raw, row, scale_factor_raw)
# resize the segmentation mask if it does not fit the raw data
if seg_mask.shape != raw.shape:
seg_mask = resize(seg_mask, raw.shape,
order=0, mode='reflect',
anti_aliasing=True, preserve_range=True).astype('bool')
result += intensity_row_features(raw, seg_mask)
stats.append(result)
return stats
def compute_morphology_features(table, segmentation_path, raw_path,
seg_key, raw_key,
scale_factor_seg, scale_factor_raw,
label_starts, label_stops):
if raw_path != '':
assert raw_key is not None and scale_factor_raw is not None
f_raw = h5py.File(raw_path, 'r')
ds_raw = f_raw[raw_key]
else:
f_raw = ds_raw = None
with h5py.File(segmentation_path, 'r') as f:
ds = f[seg_key]
stats = []
for label_a, label_b in zip(label_starts, label_stops):
# fu.log("Computing features from label-id %i to %i" % (label_a, label_b))
stats.extend(morphology_features_for_label_range(table, ds, ds_raw,
scale_factor_seg, scale_factor_raw,
label_a, label_b))
if f_raw is not None:
f_raw.close()
# convert to pandas table and add column names
stats = pd.DataFrame(stats)
columns = ['label_id',
'shape_volume_in_microns', 'shape_extent', 'shape_surface_area', 'shape_sphericity']
if raw_path != '':
columns += ['intensity_mean', 'intensity_st_dev']
stats.columns = columns
return stats
def morphology_impl(segmentation_path, raw_path, table, mapping_path,
min_size, max_size,
resolution, raw_scale, seg_scale,
label_starts, label_stops):
""" Compute morphology features for a segmentation.
Can compute features for multiple label ranges. If you want to
compute features for the full label range, pass
'label_starts=[0]' and 'label_stops=[number_of_labels]'
Arguments:
segmentation_path [str] - path to segmentation stored as h5.
raw_path [str] - path to raw data stored as h5.
Pass 'None' if you don't want to compute features based on raw data.
table [pd.DataFrame] - table with default attributes
(sizes, center of mass and bounding boxes) for segmentation
mapping_path [str] - path to - path to nucleus id mapping.
Pass 'None' if not relevant.
min_size [int] - minimal size for objects used in calcualtion
max_size [int] - maximum size for objects used in calcualtion
resolution [listlike] - resolution in nanometer.
Must be given in [Z, Y, X].
raw_scale [int] - scale level of the raw data
seg_scale [int] - scale level of the segmentation
label_starts [listlike] - list with label start positions
label_stops [listlike] - list with label stop positions
"""
# keys to segmentation and raw data for the different scales
seg_key_full = 't00000/s00/0/cells'
seg_key = 't00000/s00/%i/cells' % seg_scale
raw_key_full = 't00000/s00/0/cells'
raw_key = 't00000/s00/%i/cells' % raw_scale
# get scale factor for the segmentation
scale_factor_seg = get_scale_factor(segmentation_path, seg_key_full, seg_key, resolution)
# get scale factor for raw data (if it's given)
if raw_path != '':
# fu.log("Have raw path; compute intensity features")
# NOTE for now we can hard-code the resolution for the raw data here,
# but we might need to change this if we get additional dataset(s)
raw_resolution = [0.025, 0.01, 0.01]
scale_factor_raw = get_scale_factor(raw_path, raw_key_full, raw_key, raw_resolution)
else:
# fu.log("Don't have raw path; do not compute intensity features")
raw_resolution = scale_factor_raw = None
# remove zero label if it exists
table = table.loc[table['label_id'] != 0, :]
# if we have a mappin, only keep objects in the mapping
# (i.e cells that have assigned nuclei)
if mapping_path != '':
# fu.log("Have mapping path %s" % mapping_path)
table = filter_table_from_mapping(table, mapping_path)
# fu.log("Number of labels after filter with mapping: %i" % table.shape[0])
# filter by size
table = filter_table(table, min_size, max_size)
# fu.log("Number of labels after size filte: %i" % table.shape[0])
# fu.log("Computing morphology features")
stats = compute_morphology_features(table, segmentation_path, raw_path,
seg_key, raw_key, scale_factor_seg, scale_factor_raw,
label_starts, label_stops)
return stats
if __name__ == '__main__':
pass
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment