Skip to content
Snippets Groups Projects

RoiSet facilitates object detection models

Merged Christopher Randolph Rhodes requested to merge dev_obj_det into staging
2 files
+ 49
23
Compare changes
  • Side-by-side
  • Inline
Files
2
+ 549
232
import itertools
from math import sqrt, floor
from pathlib import Path
import re
from typing import List, Union
from typing import Dict, List, Union
from typing_extensions import Self
from uuid import uuid4
import numpy as np
@@ -10,9 +11,9 @@ from pydantic import BaseModel
from scipy.stats import moment
from skimage.filters import sobel
from skimage.measure import label, regionprops_table, shannon_entropy, find_contours
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from skimage import draw
from skimage.measure import approximate_polygon, find_contours, label, points_in_poly, regionprops, regionprops_table, shannon_entropy
from skimage.morphology import binary_dilation, disk
from .accessors import GenericImageDataAccessor, InMemoryDataAccessor, write_accessor_data_to_file
from .models import InstanceSegmentationModel
@@ -51,6 +52,7 @@ class RoiFilter(BaseModel):
class RoiSetMetaParams(BaseModel):
filters: Union[RoiFilter, None] = None
expand_box_by: List[int] = [128, 0]
deproject_channel: Union[int, None] = None
class RoiSetExportParams(BaseModel):
@@ -62,8 +64,7 @@ class RoiSetExportParams(BaseModel):
derived_channels: bool = False
def _get_label_ids(acc_seg_mask: GenericImageDataAccessor, allow_3d=False, connect_3d=True) -> InMemoryDataAccessor:
def get_label_ids(acc_seg_mask: GenericImageDataAccessor, allow_3d=False, connect_3d=True) -> InMemoryDataAccessor:
"""
Convert binary segmentation mask into either a 2D or 3D object identities map
:param acc_seg_mask: binary segmentation mask (mono) of either two or three dimensions
@@ -73,7 +74,7 @@ def _get_label_ids(acc_seg_mask: GenericImageDataAccessor, allow_3d=False, conne
"""
if allow_3d and connect_3d:
nda_la = label(
acc_seg_mask.data[:, :, 0, :],
acc_seg_mask.data_xyz,
connectivity=3,
).astype('uint16')
return InMemoryDataAccessor(np.expand_dims(nda_la, 2))
@@ -82,7 +83,7 @@ def _get_label_ids(acc_seg_mask: GenericImageDataAccessor, allow_3d=False, conne
la_3d = np.zeros((*acc_seg_mask.hw, 1, acc_seg_mask.nz), dtype='uint16')
for zi in range(0, acc_seg_mask.nz):
la_2d = label(
acc_seg_mask.data[:, :, 0, zi],
acc_seg_mask.data_xyz[:, :, zi],
connectivity=2,
).astype('uint16')
la_2d[la_2d > 0] = la_2d[la_2d > 0] + nla
@@ -92,13 +93,13 @@ def _get_label_ids(acc_seg_mask: GenericImageDataAccessor, allow_3d=False, conne
else:
return InMemoryDataAccessor(
label(
acc_seg_mask.data[:, :, 0, :].max(axis=-1),
acc_seg_mask.get_mip().data_xy,
connectivity=1,
).astype('uint16')
)
def _focus_metrics():
def focus_metrics():
return {
'max_intensity': lambda x: np.max(x),
'stdev': lambda x: np.std(x),
@@ -109,7 +110,216 @@ def _focus_metrics():
}
def _safe_add(a, g, b):
def filter_df(df: pd.DataFrame, filters: RoiFilter = None) -> pd.DataFrame:
query_str = 'label > 0' # always true
if filters is not None: # parse filters
for k, val in filters.dict(exclude_unset=True).items():
assert k in ('area')
vmin = val['min']
vmax = val['max']
assert vmin >= 0
query_str = query_str + f' & {k} > {vmin} & {k} < {vmax}'
return df.loc[df.query(query_str).index, :]
def filter_df_overlap_bbox(df1: pd.DataFrame, df2: pd.DataFrame = None) -> pd.DataFrame:
"""
If passed a single DataFrame, return the subset whose bounding boxes overlap in 3D space. If passed two DataFrames,
return the subset where a ROI in the first overlaps a ROI in the second. May return duplicates entries where a ROI
overlaps with multiple neighbors.
:param df1: DataFrame with potentially overlapping bounding boxes
:param df2: (optional) second DataFrame
:return DataFrame describing subset of overlapping ROIs
bbox_overlaps_with: index of ROI that overlaps
bbox_intersec: pixel area of intersecting region
"""
def _compare(r0, r1):
olx = (r0.x0 < r1.x1) and (r0.x1 > r1.x0)
oly = (r0.y0 < r1.y1) and (r0.y1 > r1.y0)
olz = (r0.zi == r1.zi)
return olx and oly and olz
def _intersec(r0, r1):
return (r0.x1 - r1.x0) * (r0.y1 - r1.y0)
first = []
second = []
intersec = []
if df2 is not None:
for pair in itertools.product(df1.index, df2.index):
if _compare(df1.iloc[pair[0]], df2.iloc[pair[1]]):
first.append(pair[0])
second.append(pair[1])
intersec.append(
_intersec(df1.iloc[pair[0]], df2.iloc[pair[1]])
)
else:
for pair in itertools.combinations(df1.index, 2):
if _compare(df1.iloc[pair[0]], df1.iloc[pair[1]]):
first.append(pair[0])
second.append(pair[1])
first.append(pair[1])
second.append(pair[0])
isc = _intersec(df1.iloc[pair[0]], df1.iloc[pair[1]])
intersec.append(isc)
intersec.append(isc)
sdf = df1.iloc[first]
sdf.loc[:, 'overlaps_with'] = second
sdf.loc[:, 'bbox_intersec'] = intersec
return sdf
def filter_df_overlap_seg(df1: pd.DataFrame, df2: pd.DataFrame = None) -> pd.DataFrame:
"""
If passed a single DataFrame, return the subset whose segmentations overlap in 3D space. If passed two DataFrames,
return the subset where a ROI in the first overlaps a ROI in the second. May return duplicates entries where a ROI
overlaps with multiple neighbors.
:param df1: DataFrame with potentially overlapping bounding boxes
:param df2: (optional) second DataFrame
:return DataFrame describing subset of overlapping ROIs
seg_overlaps_with: index of ROI that overlaps
seg_intersec: pixel area of intersecting region
seg_iou: intersection over union
"""
dfbb = filter_df_overlap_bbox(df1, df2)
def _overlap_seg(r):
roi1 = df1.loc[r.name]
if df2 is not None:
roi2 = df2.loc[r.overlaps_with]
else:
roi2 = df1.loc[r.overlaps_with]
ex0 = min(roi1.x0, roi2.x0, roi1.x1, roi2.x1)
ew = max(roi1.x0, roi2.x0, roi1.x1, roi2.x1) - ex0
ey0 = min(roi1.y0, roi2.y0, roi1.y1, roi2.y1)
eh = max(roi1.y0, roi2.y0, roi1.y1, roi2.y1) - ey0
emask = np.zeros((eh, ew), dtype='uint8')
sl1 = np.s_[(roi1.y0 - ey0): (roi1.y1 - ey0), (roi1.x0 - ex0): (roi1.x1 - ex0)]
sl2 = np.s_[(roi2.y0 - ey0): (roi2.y1 - ey0), (roi2.x0 - ex0): (roi2.x1 - ex0)]
emask[sl1] = roi1.binary_mask
emask[sl2] = emask[sl2] + roi2.binary_mask
return emask
emasks = dfbb.apply(_overlap_seg, axis=1)
dfbb['seg_overlaps'] = emasks.apply(lambda x: np.any(x > 1))
dfbb['seg_intersec'] = emasks.apply(lambda x: (x == 2).sum())
dfbb['seg_iou'] = emasks.apply(lambda x: (x == 2).sum() / (x > 0).sum())
return dfbb
def make_df_from_object_ids(acc_raw, acc_obj_ids, expand_box_by, deproject_channel=None) -> pd.DataFrame:
"""
Build dataframe that associate object IDs with summary stats;
:param acc_raw: accessor to raw image data
:param acc_obj_ids: accessor to map of object IDs
:param expand_box_by: number of pixels to expand bounding box in all directions (without exceeding image boundary)
:param deproject_channel: if objects' z-coordinates are not specified, compute them based on argmax of this channel
:return: pd.DataFrame
"""
# build dataframe of objects, assign z index to each object
if acc_obj_ids.nz == 1 and acc_raw.nz > 1:
if deproject_channel is None or deproject_channel >= acc_raw.chroma or deproject_channel < 0:
if acc_raw.chroma == 1:
deproject_channel = 0
else:
raise NoDeprojectChannelSpecifiedError(
f'When labeling objects, either their z-coordinates or a valid deprojection channel are required.'
)
acc_raw.get_mono(deproject_channel)
zi_map = acc_raw.get_mono(deproject_channel).get_z_argmax().data_xy.astype('uint16')
assert len(zi_map.shape) == 2
df = pd.DataFrame(regionprops_table(
acc_obj_ids.data_xy,
intensity_image=zi_map,
properties=('label', 'area', 'intensity_mean', 'bbox')
)).rename(columns={'bbox-0': 'y0', 'bbox-1': 'x0', 'bbox-2': 'y1', 'bbox-3': 'x1'})
df['zi'] = df['intensity_mean'].round().astype('int')
else: # objects' z-coordinates come from arg of max count in object identities map
df = pd.DataFrame(regionprops_table(
acc_obj_ids.data_xyz,
properties=('label', 'area', 'bbox')
)).rename(columns={
'bbox-0': 'y0', 'bbox-1': 'x0', 'bbox-2': 'z0', 'bbox-3': 'y1', 'bbox-4': 'x1', 'bbox-5': 'z1'
})
def _get_zi_from_label(la):
return acc_obj_ids.apply(lambda x: x == la).get_focus_vector().argmax()
df['zi'] = df['label'].apply(_get_zi_from_label)
df = df_insert_slices(df, acc_raw.shape_dict, expand_box_by)
def _make_binary_mask(r):
acc = InMemoryDataAccessor(acc_obj_ids.data == r.label)
cropped = acc.get_mono(0, mip=True).crop_hw((r.y0, r.x0, (r.y1 - r.y0), (r.x1 - r.x0))).data_xy
return cropped
df['binary_mask'] = df.apply(
_make_binary_mask,
axis=1,
result_type='reduce',
)
return df
def df_insert_slices(df: pd.DataFrame, sd: dict, expand_box_by) -> pd.DataFrame:
h = sd['Y']
w = sd['X']
nz = sd['Z']
df['h'] = df['y1'] - df['y0']
df['w'] = df['x1'] - df['x0']
ebxy, ebz = expand_box_by
df['ebb_y0'] = (df.y0 - ebxy).apply(lambda x: max(x, 0))
df['ebb_y1'] = (df.y1 + ebxy).apply(lambda x: min(x, h))
df['ebb_x0'] = (df.x0 - ebxy).apply(lambda x: max(x, 0))
df['ebb_x1'] = (df.x1 + ebxy).apply(lambda x: min(x, w))
df['ebb_z0'] = (df.zi - ebz).apply(lambda x: max(x, 0))
df['ebb_z1'] = (df.zi + ebz).apply(lambda x: min(x, nz))
df['ebb_h'] = df['ebb_y1'] - df['ebb_y0']
df['ebb_w'] = df['ebb_x1'] - df['ebb_x0']
df['ebb_nz'] = df['ebb_z1'] - df['ebb_z0'] + 1
# compute relative bounding boxes
df['rel_y0'] = df.y0 - df.ebb_y0
df['rel_y1'] = df.y1 - df.ebb_y0
df['rel_x0'] = df.x0 - df.ebb_x0
df['rel_x1'] = df.x1 - df.ebb_x0
assert np.all(df['rel_x1'] <= (df['ebb_x1'] - df['ebb_x0']))
assert np.all(df['rel_y1'] <= (df['ebb_y1'] - df['ebb_y0']))
df['slice'] = df.apply(
lambda r:
np.s_[int(r.y0): int(r.y1), int(r.x0): int(r.x1), :, int(r.zi): int(r.zi + 1)],
axis=1,
result_type='reduce',
)
df['expanded_slice'] = df.apply(
lambda r:
np.s_[int(r.ebb_y0): int(r.ebb_y1), int(r.ebb_x0): int(r.ebb_x1), :, int(r.ebb_z0): int(r.ebb_z1) + 1],
axis=1,
result_type='reduce',
)
df['relative_slice'] = df.apply(
lambda r:
np.s_[int(r.rel_y0): int(r.rel_y1), int(r.rel_x0): int(r.rel_x1), :, :],
axis=1,
result_type='reduce',
)
return df
def safe_add(a, g, b):
assert a.dtype == b.dtype
assert a.shape == b.shape
assert g >= 0.0
@@ -120,45 +330,125 @@ def _safe_add(a, g, b):
np.iinfo(a.dtype).max
).astype(a.dtype)
def make_object_ids_from_df(df: pd.DataFrame, sd: dict) -> InMemoryDataAccessor:
id_mask = np.zeros((sd['Y'], sd['X'], 1, sd['Z']), dtype='uint16')
if 'binary_mask' not in df.columns:
raise MissingSegmentationError('RoiSet dataframe does not contain segmentation')
def _label_obj(r):
sl = np.s_[r.y0:r.y1, r.x0:r.x1, :, r.zi:r.zi + 1]
mask = np.expand_dims(r.binary_mask, (2, 3))
id_mask[sl] = id_mask[sl] + r.label * mask
df.apply(_label_obj, axis=1)
return InMemoryDataAccessor(id_mask)
class RoiSet(object):
def __init__(
self,
acc_raw: GenericImageDataAccessor,
acc_obj_ids: GenericImageDataAccessor,
df: pd.DataFrame,
params: RoiSetMetaParams = RoiSetMetaParams(),
):
"""
A set of regions of interest, referenced by their positions and contours in the YXCZ space of stack acc_raw.
RoiSet contains their internal state, which may be exported as patches, maps, and other products by export methods.
:param acc_raw: accessor to a generally a multichannel z-stack
:param acc_obj_ids: accessor to a 2D single-channel object identities map, where each pixel's intensity
labels its membership in a connected object
:param df: dataframe containing at minimum bounding box and segmentation mask information
:param params: optional arguments that influence the definition and representation of ROIs
"""
assert acc_obj_ids.chroma == 1
self.acc_obj_ids = acc_obj_ids
self.acc_raw = acc_raw
self.accs_derived = []
self.params = params
self._df = self.filter_df(
self.make_df(
self.acc_raw, self.acc_obj_ids, expand_box_by=params.expand_box_by
),
params.filters,
)
self._df = df
self.count = len(self._df)
self.object_class_maps = {} # classification results
def __iter__(self):
"""Expose ROI meta information via the Pandas.DataFrame API"""
return self._df.itertuples(name='Roi')
@staticmethod
def from_segmentation(
def from_object_ids(
acc_raw: GenericImageDataAccessor,
acc_obj_ids: GenericImageDataAccessor,
params: RoiSetMetaParams = RoiSetMetaParams(),
):
"""
:param acc_raw:
:param acc_obj_ids: accessor to a 2D single-channel object identities map, where each pixel's intensity
labels its membership in a connected object
:param params:
:return:
"""
assert acc_obj_ids.chroma == 1
df = filter_df(
make_df_from_object_ids(
acc_raw, acc_obj_ids,
expand_box_by=params.expand_box_by,
deproject_channel=params.deproject_channel,
),
params.filters,
)
return RoiSet(acc_raw, df, params)
@staticmethod
def from_bounding_boxes(
acc_raw: GenericImageDataAccessor,
bbox_yxhw: List[Dict],
bbox_zi: Union[List[int], int] = None,
params: RoiSetMetaParams = RoiSetMetaParams()
):
bbox_df = pd.DataFrame(bbox_yxhw)
if list(bbox_df.columns.str.upper().sort_values()) != ['H', 'W', 'X', 'Y']:
raise BoundingBoxError(f'Expecting bounding box coordinates Y, X, H, and W, not {list(bbox_df.columns)}')
# deproject if zi is not specified
if bbox_zi is None:
dch = params.deproject_channel
if dch is None or dch >= acc_raw.chroma or dch < 0:
if acc_raw.chroma == 1:
dch = 0
else:
raise NoDeprojectChannelSpecifiedError(
f'When labeling objects, either their z-coordinates or a valid deprojection channel are required.'
)
bbox_df['zi'] = acc_raw.get_mono(dch).get_focus_vector().argmax()
else:
bbox_df['zi'] = bbox_zi
bbox_df['y0'] = bbox_df['y']
bbox_df['x0'] = bbox_df['x']
bbox_df['y1'] = bbox_df['y0'] + bbox_df['h']
bbox_df['x1'] = bbox_df['x0'] + bbox_df['w']
bbox_df['label'] = bbox_df.index
df = df_insert_slices(
bbox_df[['y0', 'x0', 'y1', 'x1', 'zi', 'label']],
acc_raw.shape_dict,
params.expand_box_by,
)
def _make_binary_mask(r):
return np.ones((r.h, r.w), dtype=bool)
df['binary_mask'] = df.apply(
_make_binary_mask,
axis=1,
result_type='reduce',
)
return RoiSet(acc_raw, df, params)
@staticmethod
def from_binary_mask(
acc_raw: GenericImageDataAccessor,
acc_seg: GenericImageDataAccessor,
allow_3d=False,
@@ -169,102 +459,43 @@ class RoiSet(object):
Create a RoiSet from a binary segmentation mask (either 2D or 3D)
:param acc_raw: accessor to a generally a multichannel z-stack
:param acc_seg: accessor of a binary segmentation mask (mono) of either two or three dimensions
:param allow_3d: return a 3D map if True; return a 2D map of the mask's maximum intensity project if False
:param allow_3d: use a 3D map if True; use a 2D map of the mask's maximum intensity project if False
:param connect_3d: objects can span multiple z-positions if True; objects are unique to a single z if False
:param params: optional arguments that influence the definition and representation of ROIs
:return: object identities map
"""
return RoiSet(acc_raw, _get_label_ids(acc_seg, allow_3d=allow_3d, connect_3d=connect_3d), params)
return RoiSet.from_object_ids(
acc_raw,
get_label_ids(
acc_seg,
allow_3d=allow_3d,
connect_3d=connect_3d
),
params
)
@staticmethod
def make_df(acc_raw, acc_obj_ids, expand_box_by) -> pd.DataFrame:
def from_polygons_2d(
acc_raw,
polygons: List[np.ndarray],
params: RoiSetMetaParams = RoiSetMetaParams()
):
"""
Build dataframe associate object IDs with summary stats
:param acc_raw: accessor to raw image data
:param acc_obj_ids: accessor to map of object IDs
:param expand_box_by: number of pixels to expand bounding box in all directions (without exceeding image boundary)
# :param deproject: assign object's z-position based on argmax of raw data if True
:return: pd.DataFrame
Create a RoiSet where objects are defined from a list of polygon coordinates
:param acc_raw: accessor to a generally a multichannel z-stack
:param polygons: list of (variable x 2) np.ndarrays describing (x, y) polymer coordinates
:param params: optional arguments that influence the definition and representation of ROIs
"""
# build dataframe of objects, assign z index to each object
if acc_obj_ids.nz == 1: # deproject objects' z-coordinates from argmax of raw image
df = pd.DataFrame(regionprops_table(
acc_obj_ids.data[:, :, 0, 0],
intensity_image=acc_raw.data.argmax(axis=3, keepdims=True)[:, :, 0, 0].astype('uint16'),
properties=('label', 'area', 'intensity_mean', 'bbox', 'centroid')
)).rename(columns={'bbox-0': 'y0', 'bbox-1': 'x0', 'bbox-2': 'y1', 'bbox-3': 'x1'})
df['zi'] = df['intensity_mean'].round().astype('int')
else: # objects' z-coordinates come from arg of max count in object identities map
df = pd.DataFrame(regionprops_table(
acc_obj_ids.data[:, :, 0, :],
properties=('label', 'area', 'bbox', 'centroid')
)).rename(columns={
'bbox-0': 'y0', 'bbox-1': 'x0', 'bbox-2': 'z0', 'bbox-3': 'y1', 'bbox-4': 'x1', 'bbox-5': 'z1'
})
df['zi'] = df['label'].apply(lambda x: (acc_obj_ids.data == x).sum(axis=(0, 1, 2)).argmax())
# compute expanded bounding boxes
h, w, c, nz = acc_raw.shape
df['h'] = df['y1'] - df['y0']
df['w'] = df['x1'] - df['x0']
ebxy, ebz = expand_box_by
df['ebb_y0'] = (df.y0 - ebxy).apply(lambda x: max(x, 0))
df['ebb_y1'] = (df.y1 + ebxy).apply(lambda x: min(x, h))
df['ebb_x0'] = (df.x0 - ebxy).apply(lambda x: max(x, 0))
df['ebb_x1'] = (df.x1 + ebxy).apply(lambda x: min(x, w))
df['ebb_z0'] = (df.zi - ebz).apply(lambda x: max(x, 0))
df['ebb_z1'] = (df.zi + ebz).apply(lambda x: min(x, nz))
df['ebb_h'] = df['ebb_y1'] - df['ebb_y0']
df['ebb_w'] = df['ebb_x1'] - df['ebb_x0']
df['ebb_nz'] = df['ebb_z1'] - df['ebb_z0'] + 1
# compute relative bounding boxes
df['rel_y0'] = df.y0 - df.ebb_y0
df['rel_y1'] = df.y1 - df.ebb_y0
df['rel_x0'] = df.x0 - df.ebb_x0
df['rel_x1'] = df.x1 - df.ebb_x0
assert np.all(df['rel_x1'] <= (df['ebb_x1'] - df['ebb_x0']))
assert np.all(df['rel_y1'] <= (df['ebb_y1'] - df['ebb_y0']))
df['slice'] = df.apply(
lambda r:
np.s_[int(r.y0): int(r.y1), int(r.x0): int(r.x1), :, int(r.zi): int(r.zi + 1)],
axis=1,
result_type='reduce',
)
df['expanded_slice'] = df.apply(
lambda r:
np.s_[int(r.ebb_y0): int(r.ebb_y1), int(r.ebb_x0): int(r.ebb_x1), :, int(r.ebb_z0): int(r.ebb_z1) + 1],
axis=1,
result_type='reduce',
)
df['relative_slice'] = df.apply(
lambda r:
np.s_[int(r.rel_y0): int(r.rel_y1), int(r.rel_x0): int(r.rel_x1), :, :],
axis=1,
result_type='reduce',
)
df['binary_mask'] = df.apply(
lambda r: (acc_obj_ids.data == r.label).max(axis=-1)[r.y0: r.y1, r.x0: r.x1, 0],
axis=1,
result_type='reduce',
mask = np.zeros(acc_raw.get_mono(0, mip=True).shape, dtype=bool)
for p in polygons:
sl = draw.polygon(p[:, 1], p[:, 0])
mask[sl] = True
return RoiSet.from_binary_mask(
acc_raw,
InMemoryDataAccessor(mask),
allow_3d=False,
connect_3d=False,
params=params,
)
return df
@staticmethod
def filter_df(df: pd.DataFrame, filters: RoiFilter = None) -> pd.DataFrame:
query_str = 'label > 0' # always true
if filters is not None: # parse filters
for k, val in filters.dict(exclude_unset=True).items():
assert k in ('area')
vmin = val['min']
vmax = val['max']
assert vmin >= 0
query_str = query_str + f' & {k} > {vmin} & {k} < {vmax}'
return df.loc[df.query(query_str).index, :]
def get_df(self) -> pd.DataFrame:
return self._df
@@ -275,19 +506,6 @@ class RoiSet(object):
def add_df_col(self, name, se: pd.Series) -> None:
self._df[name] = se
def get_multichannel_projection(self):
if self.count:
projected = project_stack_from_focal_points(
self._df['centroid-0'].to_numpy(),
self._df['centroid-1'].to_numpy(),
self._df['zi'].to_numpy(),
self.acc_raw,
degree=4,
)
else: # else just return MIP
projected = self.acc_raw.data.max(axis=-1)
return projected
def get_patches_acc(self, channels: list = None, **kwargs) -> PatchStack: # padded, un-annotated 2d patches
if channels and len(channels) == 1:
patches_df = self.get_patches(white_channel=channels[0], **kwargs)
@@ -301,7 +519,7 @@ class RoiSet(object):
write_accessor_data_to_file(fp, annotated)
return (prefix + '.tif')
def get_zmask(self, mask_type='boxes'):
def get_zmask(self, mask_type='boxes') -> np.ndarray:
"""
Return a mask of same dimensionality as raw data
@@ -352,7 +570,7 @@ class RoiSet(object):
:param channels: list of nc raw input channels to send to classifier
:param object_classification_model: InstanceSegmentation model object
:param derived_channel_functions: list of functions that each receive a PatchStack accessor with nc channels and
return a single-channel PatchStack accessor of the same shape
that return a single-channel PatchStack accessor of the same shape
:return: None
"""
@@ -385,26 +603,67 @@ class RoiSet(object):
self.get_patch_masks_acc(expanded=False, pad_to=None)
)
om = np.zeros(self.acc_obj_ids.shape, self.acc_obj_ids.dtype)
se = pd.Series(dtype='Int64', index=self._df.index)
self._df['classify_by_' + name] = pd.Series(dtype='Int64')
# assign labels to object map:
for i, roi in enumerate(self):
oc = np.unique(
mask_largest_object(
obmap_patches.iat(i).data
)
)[-1]
self._df.loc[roi.Index, 'classify_by_' + name] = oc
om[self.acc_obj_ids.data == roi.label] = oc
self.object_class_maps[name] = InMemoryDataAccessor(om)
se[roi.Index] = oc
self.set_classification(f'classify_by_{name}', se)
def export_dataframe(self, csv_path: Path) -> str:
csv_path.parent.mkdir(parents=True, exist_ok=True)
self._df.drop(['expanded_slice', 'slice', 'relative_slice', 'binary_mask'], axis=1).to_csv(csv_path, index=False)
return csv_path.name
def get_instance_classification(self, roiset_from: Self, iou_min: float = 0.5) -> pd.DataFrame:
"""
Transfer instance classification labels from another RoiSet based on intersection over union (IOU) similarity
:param roiset_from: RoiSet source of classification labels, same shape as this RoiSet
:param iou_min: threshold IOU below which a label is not transferred
:return DataFrame of source RoiSet, including overlaps with this RoiSet and IOU metric
"""
if self.acc_raw.shape != roiset_from.acc_raw.shape:
raise ShapeMismatchError(
f'Expecting two RoiSets of same shape: {self.acc_raw.shape} != {roiset_from.acc_raw.shape}')
columns = [f'classify_by_{c}' for c in roiset_from.classification_columns]
if len(columns) == 0:
raise MissingInstanceLabelsError('Expecting at least on instance classification channel but none found')
df_overlaps = filter_df_overlap_seg(
roiset_from.get_df(),
self.get_df()
)
df_overlaps['transfer'] = df_overlaps.seg_iou > iou_min
df_merge = pd.merge(
roiset_from.get_df()[columns],
df_overlaps.loc[df_overlaps.transfer, ['overlaps_with']],
left_index=True,
right_index=True,
how='inner',
).set_index('overlaps_with')
for col in columns:
self.set_classification(col, df_merge[col])
return df_overlaps
def get_object_class_map(self, name: str) -> InMemoryDataAccessor:
"""
For a given classification result, return a map where object IDs are replaced by each object's class
:param name: name of the classification result, same as passed to RoiSet.classify_by()
:return: accessor of object class map
"""
colname = ('classify_by_' + name)
assert colname in self._df.columns
obj_ids = self.acc_obj_ids
om = np.zeros(obj_ids.shape, obj_ids.dtype)
def _label_object_class(roi):
om[self.acc_obj_ids.data == roi.label] = roi[colname]
self._df.apply(_label_object_class, axis=1)
return InMemoryDataAccessor(om)
def export_patch_masks(self, where: Path, pad_to: int = None, prefix='mask', expanded=False) -> pd.DataFrame:
@@ -446,15 +705,13 @@ class RoiSet(object):
patch = np.zeros((roi.ebb_h, roi.ebb_w, 1, 1), dtype='uint8')
patch[roi.relative_slice][:, :, 0, 0] = roi.binary_mask * 255
else:
patch = np.zeros((roi.y1 - roi.y0, roi.x1 - roi.x0, 1, 1), dtype='uint8')
patch[:, :, 0, 0] = roi.binary_mask * 255
patch = (roi.binary_mask * 255).astype('uint8')
if pad_to:
patch = pad(patch, pad_to)
return patch
return np.expand_dims(patch, (2, 3))
dfe = self._df.copy()
dfe['patch_mask'] = dfe.apply(lambda r: _make_patch_mask(r), axis=1)
dfe['patch_mask'] = dfe.apply(_make_patch_mask, axis=1)
return dfe
def get_patch_masks_acc(self, **kwargs) -> PatchStack:
@@ -482,7 +739,8 @@ class RoiSet(object):
if white_channel:
assert white_channel < raw.chroma
stack = raw.data[:, :, [white_channel, white_channel, white_channel], :]
mono = raw.get_mono(white_channel).data_xyz
stack = np.stack([mono, mono, mono], axis=2)
else:
stack = np.zeros([*raw.shape[0:2], 3, raw.shape[3]], dtype=raw.dtype)
@@ -491,10 +749,10 @@ class RoiSet(object):
continue
assert isinstance(ci, int)
assert ci < raw.chroma
stack[:, :, ii, :] = _safe_add(
stack[:, :, ii, :] = safe_add(
stack[:, :, ii, :], # either black or grayscale channel
rgb_overlay_weights[ii],
raw.data[:, :, ci, :]
raw.get_mono(ci).data_xyz
)
else:
if white_channel is not None: # interpret as just a single channel
@@ -509,9 +767,10 @@ class RoiSet(object):
annotate_rgb = True
break
if annotate_rgb: # make RGB patches anyway to include annotation color
stack = raw.data[:, :, [white_channel, white_channel, white_channel], :]
mono = raw.get_mono(white_channel).data_xyz
stack = np.stack([mono, mono, mono], axis=2)
else: # make monochrome patches
stack = raw.data[:, :, [white_channel], :]
stack = raw.get_mono(white_channel).data
elif kwargs.get('channels'):
stack = raw.get_channels(kwargs['channels']).data
else:
@@ -533,7 +792,7 @@ class RoiSet(object):
# make a 2d patch, find optimal z-position determined by focus_metric function on each channel separately
elif focus_metric is not None:
foc = _focus_metrics()[focus_metric]
foc = focus_metrics()[focus_metric]
patch = np.zeros([ph, pw, pc, 1], dtype=patch3d.dtype)
@@ -593,6 +852,22 @@ class RoiSet(object):
dfe['patch'] = dfe.apply(lambda r: _make_patch(r), axis=1)
return dfe
@property
def classification_columns(self):
"""
Return list of columns that describe instance classification results
"""
pr = 'classify_by_'
return [c.split(pr)[1] for c in self._df.columns if c.startswith(pr)]
def set_classification(self, colname: str, se: pd.Series):
"""
Set instance classification result as a column addition on dataframe
:param colname: name of classification result
:param se: series containing class information
"""
self._df[colname] = se
def run_exports(self, where: Path, channel, prefix, params: RoiSetExportParams) -> dict:
"""
Export various representations of ROIs, e.g. patches, annotated stacks, and object maps.
@@ -633,10 +908,10 @@ class RoiSet(object):
if k == 'annotated_zstacks':
record[k] = str(Path(k) / self.export_annotated_zstack(subdir, prefix=pr, **kp))
if k == 'object_classes':
for kc, acc in self.object_class_maps.items():
fp = subdir / kc / (pr + '.tif')
write_accessor_data_to_file(fp, acc)
record[f'{k}_{kc}'] = str(fp)
for n in self.classification_columns:
fp = subdir / n / (pr + '.tif')
write_accessor_data_to_file(fp, self.get_object_class_map(n))
record[f'{k}_{n}'] = str(fp)
if k == 'derived_channels':
record[k] = []
for di, dacc in enumerate(self.accs_derived):
@@ -650,7 +925,7 @@ class RoiSet(object):
return record
def serialize(self, where: Path, prefix='') -> dict:
def serialize(self, where: Path, prefix='roiset') -> dict:
"""
Export the minimal information needed to recreate RoiSet object, i.e. CSV data file and tight patch masks
:param where: path of directory in which to write files
@@ -658,94 +933,136 @@ class RoiSet(object):
:return: nested dict of Path objects describing the locations of export products
"""
record = {}
df_exp = self.export_patch_masks(
where / 'tight_patch_masks',
prefix=prefix,
pad_to=None,
expanded=False
)
se_pa = df_exp.patch_mask_path.apply(
lambda x: str(Path('tight_patch_masks') / x)
).rename('tight_patch_masks_path')
self._df = self._df.join(se_pa)
df_fn = self.export_dataframe(where / 'dataframe' / (prefix + '.csv'))
record['dataframe'] = str(Path('dataframe') / df_fn)
record['tight_patch_masks'] = list(se_pa)
if not self._df.binary_mask.apply(lambda x: np.all(x)).all(): # binary masks aren't just all True
df_exp = self.export_patch_masks(
where / 'tight_patch_masks',
prefix=prefix,
pad_to=None,
expanded=False
)
# record patch masks paths to dataframe, then save static columns to CSV
se_pa = df_exp.patch_mask_path.apply(
lambda x: str(Path('tight_patch_masks') / x)
).rename('tight_patch_masks_path')
self._df = self._df.join(se_pa)
record['tight_patch_masks'] = list(se_pa)
csv_path = where / 'dataframe' / (prefix + '.csv')
csv_path.parent.mkdir(parents=True, exist_ok=True)
self._df.drop(
['expanded_slice', 'slice', 'relative_slice', 'binary_mask'],
axis=1
).to_csv(csv_path, index=False)
record['dataframe'] = str(Path('dataframe') / csv_path.name)
return record
def get_polygons(self, poly_threshold=0, dilation_radius=1) -> pd.DataFrame:
self.coordinates_ = """
Fit polygons to all object boundaries in the RoiSet
:param poly_threshold: threshold distance for polygon fit; a smaller number follows sharp features more closely
:param dilation_radius: radius of binary dilation to apply before fitting polygon
:return: Series of (variable x 2) np.ndarrays describing (x, y) polymer coordinates
"""
pad_to = 1
def _poly_from_mask(roi):
mask = roi.binary_mask
if len(mask.shape) != 2:
raise PatchMaskShapeError(f'Patch mask needs to be two dimensions to fit a polygon')
# label and fill holes
labeled = label(mask)
filled = [rp.image_filled for rp in regionprops(labeled)]
assert (np.unique(labeled)[-1] == 1) and (len(filled) == 1), 'Cannot fit multiple polygons in a single patch mask'
closed = binary_dilation(filled[0], footprint=disk(dilation_radius))
padded = np.pad(closed, pad_to) * 1.0
all_contours = find_contours(padded)
nc = len(all_contours)
for j in range(0, nc):
if all([points_in_poly(all_contours[k], all_contours[j]).all() for k in range(0, nc)]):
contour = all_contours[j]
break
rel_polygon = approximate_polygon(contour[:, [1, 0]], poly_threshold) - [pad_to, pad_to]
return rel_polygon + [roi.x0, roi.y0]
return self._df.apply(_poly_from_mask, axis=1)
@property
def acc_obj_ids(self):
return make_object_ids_from_df(self._df, self.acc_raw.shape_dict)
@staticmethod
def deserialize(acc_raw: GenericImageDataAccessor, where: Path, prefix=''):
def deserialize(acc_raw: GenericImageDataAccessor, where: Path, prefix='roiset') -> Self:
"""
Create an RoiSet object from saved files and an image accessor
:param acc_raw: accessor to image that contains ROIs
:param where: path to directory containing RoiSet serialization files, namely dataframe.csv and a subdirectory
named tight_patch_masks
:param prefix: starting prefix of patch mask filenames
:return: RoiSet object
"""
df = pd.read_csv(where / 'dataframe' / (prefix + '.csv'))[['label', 'zi', 'y0', 'y1', 'x0', 'x1']]
id_mask = np.zeros((*acc_raw.hw, 1, acc_raw.nz), dtype='uint16')
def _label_obj(r):
sl = np.s_[r.y0:r.y1, r.x0:r.x1, :, r.zi:r.zi + 1]
ext = 'png'
fname = f'{prefix}-la{r.label:04d}-zi{r.zi:04d}.{ext}'
try:
ma_acc = generate_file_accessor(where / 'tight_patch_masks' / fname)
bool_mask = ma_acc.data / np.iinfo(ma_acc.data.dtype).max
id_mask[sl] = id_mask[sl] + r.label * bool_mask
except Exception as e:
raise DeserializeRoiSet(e)
df.apply(_label_obj, axis=1)
return RoiSet(acc_raw, InMemoryDataAccessor(id_mask))
def project_stack_from_focal_points(
xx: np.ndarray,
yy: np.ndarray,
zz: np.ndarray,
stack: GenericImageDataAccessor,
degree: int = 2,
) -> np.ndarray:
"""
Given a set of 3D points, project a multichannel z-stack based on a surface fit of the provided points
:param xx: vector of point x-coordinates
:param yy: vector of point y-coordinates
:param zz: vector of point z-coordinates
:param stack: z-stack to project
:param degree: order of polynomial to fit
:return: multichannel 2d projected image array
"""
assert xx.shape == yy.shape
assert xx.shape == zz.shape
poly = PolynomialFeatures(degree=degree)
X = np.stack([xx, yy]).T
features = poly.fit_transform(X, zz)
model = LinearRegression(fit_intercept=False)
model.fit(features, zz)
xy_indices = np.indices(stack.hw).reshape(2, -1).T
xy_features = np.dot(
poly.fit_transform(xy_indices, zz),
model.coef_
)
zi_image = xy_features.reshape(
stack.hw
).round().clip(
0, (stack.nz - 1)
).astype('uint16')
return np.take_along_axis(
stack.data,
np.repeat(
np.expand_dims(zi_image, (2, 3)),
stack.chroma,
axis=2
),
axis=3
)
pa_masks = where / 'tight_patch_masks'
if pa_masks.exists(): # import segmentation masks
def _read_binary_mask(r):
ext = 'png'
fname = f'{prefix}-la{r.label:04d}-zi{r.zi:04d}.{ext}'
try:
ma_acc = generate_file_accessor(pa_masks / fname)
assert ma_acc.chroma == 1 and ma_acc.nz == 1
mask_data = ma_acc.data_xy / np.iinfo(ma_acc.data.dtype).max
return mask_data
except Exception as e:
raise DeserializeRoiSet(e)
df['binary_mask'] = df.apply(_read_binary_mask, axis=1)
id_mask = make_object_ids_from_df(df, acc_raw.shape_dict)
return RoiSet.from_object_ids(acc_raw, id_mask)
else: # assume bounding boxes only
df['y'] = df['y0']
df['x'] = df['x0']
df['h'] = df['y1'] - df['y0']
df['w'] = df['x1'] - df['x0']
return RoiSet.from_bounding_boxes(
acc_raw,
df[['y', 'x', 'h', 'w']].to_dict(orient='records'),
list(df['zi'])
)
class Error(Exception):
pass
class BoundingBoxError(Error):
pass
class DeserializeRoiSet(Error):
pass
class NoDeprojectChannelSpecifiedError(Error):
pass
class DerivedChannelError(Error):
pass
class MissingSegmentationError(Error):
pass
class PatchMaskShapeError(Error):
pass
class ShapeMismatchError(Error):
pass
class MissingInstanceLabelsError(Error):
pass
\ No newline at end of file
Loading