Christopher Randolph Rhodes
--- a/model_server/base/roiset.py

+ 549

− 232
+++ b/model_server/base/roiset.py

+ 549

− 232
+import itertools
 from math import sqrt, floor
 from pathlib import Path
-import re
-from typing import List, Union
+from typing import Dict, List, Union
+from typing_extensions import Self
 from uuid import uuid4

 import numpy as np
 @@ -10,9 +11,9 @@ from pydantic import BaseModel
 from scipy.stats import moment
 from skimage.filters import sobel

-from skimage.measure import label, regionprops_table, shannon_entropy, find_contours
-from sklearn.preprocessing import PolynomialFeatures
-from sklearn.linear_model import LinearRegression
+from skimage import draw
+from skimage.measure import approximate_polygon, find_contours, label, points_in_poly, regionprops, regionprops_table, shannon_entropy
+from skimage.morphology import binary_dilation, disk

 from .accessors import GenericImageDataAccessor, InMemoryDataAccessor, write_accessor_data_to_file
 from .models import InstanceSegmentationModel
 @@ -51,6 +52,7 @@ class RoiFilter(BaseModel):
 class RoiSetMetaParams(BaseModel):
    filters: Union[RoiFilter, None] = None
    expand_box_by: List[int] = [128, 0]
+    deproject_channel: Union[int, None] = None


 class RoiSetExportParams(BaseModel):
 @@ -62,8 +64,7 @@ class RoiSetExportParams(BaseModel):
    derived_channels: bool = False


-
-def _get_label_ids(acc_seg_mask: GenericImageDataAccessor, allow_3d=False, connect_3d=True) -> InMemoryDataAccessor:
+def get_label_ids(acc_seg_mask: GenericImageDataAccessor, allow_3d=False, connect_3d=True) -> InMemoryDataAccessor:
    """
    Convert binary segmentation mask into either a 2D or 3D object identities map
    :param acc_seg_mask: binary segmentation mask (mono) of either two or three dimensions
 @@ -73,7 +74,7 @@ def _get_label_ids(acc_seg_mask: GenericImageDataAccessor, allow_3d=False, conne
    """
    if allow_3d and connect_3d:
        nda_la = label(
-            acc_seg_mask.data[:, :, 0, :],
+            acc_seg_mask.data_xyz,
            connectivity=3,
        ).astype('uint16')
        return InMemoryDataAccessor(np.expand_dims(nda_la, 2))
 @@ -82,7 +83,7 @@ def _get_label_ids(acc_seg_mask: GenericImageDataAccessor, allow_3d=False, conne
        la_3d = np.zeros((*acc_seg_mask.hw, 1, acc_seg_mask.nz), dtype='uint16')
        for zi in range(0, acc_seg_mask.nz):
            la_2d = label(
-                acc_seg_mask.data[:, :, 0, zi],
+                acc_seg_mask.data_xyz[:, :, zi],
                connectivity=2,
            ).astype('uint16')
            la_2d[la_2d > 0] = la_2d[la_2d > 0] + nla
 @@ -92,13 +93,13 @@ def _get_label_ids(acc_seg_mask: GenericImageDataAccessor, allow_3d=False, conne
    else:
        return InMemoryDataAccessor(
            label(
-                acc_seg_mask.data[:, :, 0, :].max(axis=-1),
+                acc_seg_mask.get_mip().data_xy,
                connectivity=1,
            ).astype('uint16')
        )


-def _focus_metrics():
+def focus_metrics():
    return {
        'max_intensity': lambda x: np.max(x),
        'stdev': lambda x: np.std(x),
 @@ -109,7 +110,216 @@ def _focus_metrics():
    }


-def _safe_add(a, g, b):
+def filter_df(df: pd.DataFrame, filters: RoiFilter = None) -> pd.DataFrame:
+    query_str = 'label > 0'  # always true
+    if filters is not None:  # parse filters
+        for k, val in filters.dict(exclude_unset=True).items():
+            assert k in ('area')
+            vmin = val['min']
+            vmax = val['max']
+            assert vmin >= 0
+            query_str = query_str + f' & {k} > {vmin} & {k} < {vmax}'
+    return df.loc[df.query(query_str).index, :]
+
+
+def filter_df_overlap_bbox(df1: pd.DataFrame, df2: pd.DataFrame = None) -> pd.DataFrame:
+    """
+    If passed a single DataFrame, return the subset whose bounding boxes overlap in 3D space.  If passed two DataFrames,
+    return the subset where a ROI in the first overlaps a ROI in the second.  May return duplicates entries where a ROI
+    overlaps with multiple neighbors.
+    :param df1: DataFrame with potentially overlapping bounding boxes
+    :param df2: (optional) second DataFrame
+    :return DataFrame describing subset of overlapping ROIs
+        bbox_overlaps_with: index of ROI that overlaps
+        bbox_intersec: pixel area of intersecting region
+    """
+
+    def _compare(r0, r1):
+        olx = (r0.x0 < r1.x1) and (r0.x1 > r1.x0)
+        oly = (r0.y0 < r1.y1) and (r0.y1 > r1.y0)
+        olz = (r0.zi == r1.zi)
+        return olx and oly and olz
+
+    def _intersec(r0, r1):
+        return (r0.x1 - r1.x0) * (r0.y1 - r1.y0)
+
+    first = []
+    second = []
+    intersec = []
+
+    if df2 is not None:
+        for pair in itertools.product(df1.index, df2.index):
+            if _compare(df1.iloc[pair[0]], df2.iloc[pair[1]]):
+                first.append(pair[0])
+                second.append(pair[1])
+                intersec.append(
+                    _intersec(df1.iloc[pair[0]], df2.iloc[pair[1]])
+                )
+    else:
+        for pair in itertools.combinations(df1.index, 2):
+            if _compare(df1.iloc[pair[0]], df1.iloc[pair[1]]):
+                first.append(pair[0])
+                second.append(pair[1])
+                first.append(pair[1])
+                second.append(pair[0])
+                isc = _intersec(df1.iloc[pair[0]], df1.iloc[pair[1]])
+                intersec.append(isc)
+                intersec.append(isc)
+
+    sdf = df1.iloc[first]
+    sdf.loc[:, 'overlaps_with'] = second
+    sdf.loc[:, 'bbox_intersec'] = intersec
+    return sdf
+
+
+def filter_df_overlap_seg(df1: pd.DataFrame, df2: pd.DataFrame = None) -> pd.DataFrame:
+    """
+    If passed a single DataFrame, return the subset whose segmentations overlap in 3D space.  If passed two DataFrames,
+    return the subset where a ROI in the first overlaps a ROI in the second.  May return duplicates entries where a ROI
+    overlaps with multiple neighbors.
+    :param df1: DataFrame with potentially overlapping bounding boxes
+    :param df2: (optional) second DataFrame
+    :return DataFrame describing subset of overlapping ROIs
+        seg_overlaps_with: index of ROI that overlaps
+        seg_intersec: pixel area of intersecting region
+        seg_iou: intersection over union
+    """
+
+    dfbb = filter_df_overlap_bbox(df1, df2)
+
+    def _overlap_seg(r):
+        roi1 = df1.loc[r.name]
+        if df2 is not None:
+            roi2 = df2.loc[r.overlaps_with]
+        else:
+            roi2 = df1.loc[r.overlaps_with]
+        ex0 = min(roi1.x0, roi2.x0, roi1.x1, roi2.x1)
+        ew = max(roi1.x0, roi2.x0, roi1.x1, roi2.x1) - ex0
+        ey0 = min(roi1.y0, roi2.y0, roi1.y1, roi2.y1)
+        eh = max(roi1.y0, roi2.y0, roi1.y1, roi2.y1) - ey0
+        emask = np.zeros((eh, ew), dtype='uint8')
+        sl1 = np.s_[(roi1.y0 - ey0): (roi1.y1 - ey0), (roi1.x0 - ex0): (roi1.x1 - ex0)]
+        sl2 = np.s_[(roi2.y0 - ey0): (roi2.y1 - ey0), (roi2.x0 - ex0): (roi2.x1 - ex0)]
+        emask[sl1] = roi1.binary_mask
+        emask[sl2] = emask[sl2] + roi2.binary_mask
+        return emask
+
+    emasks = dfbb.apply(_overlap_seg, axis=1)
+    dfbb['seg_overlaps'] = emasks.apply(lambda x: np.any(x > 1))
+    dfbb['seg_intersec'] = emasks.apply(lambda x: (x == 2).sum())
+    dfbb['seg_iou'] = emasks.apply(lambda x: (x == 2).sum() / (x > 0).sum())
+    return dfbb
+
+
+def make_df_from_object_ids(acc_raw, acc_obj_ids, expand_box_by, deproject_channel=None) -> pd.DataFrame:
+    """
+    Build dataframe that associate object IDs with summary stats;
+    :param acc_raw: accessor to raw image data
+    :param acc_obj_ids: accessor to map of object IDs
+    :param expand_box_by: number of pixels to expand bounding box in all directions (without exceeding image boundary)
+    :param deproject_channel: if objects' z-coordinates are not specified, compute them based on argmax of this channel
+    :return: pd.DataFrame
+    """
+    # build dataframe of objects, assign z index to each object
+
+    if acc_obj_ids.nz == 1 and acc_raw.nz > 1:
+
+        if deproject_channel is None or deproject_channel >= acc_raw.chroma or deproject_channel < 0:
+            if acc_raw.chroma == 1:
+                deproject_channel = 0
+            else:
+                raise NoDeprojectChannelSpecifiedError(
+                    f'When labeling objects, either their z-coordinates or a valid deprojection channel are required.'
+                )
+        acc_raw.get_mono(deproject_channel)
+
+        zi_map = acc_raw.get_mono(deproject_channel).get_z_argmax().data_xy.astype('uint16')
+        assert len(zi_map.shape) == 2
+        df = pd.DataFrame(regionprops_table(
+            acc_obj_ids.data_xy,
+            intensity_image=zi_map,
+            properties=('label', 'area', 'intensity_mean', 'bbox')
+        )).rename(columns={'bbox-0': 'y0', 'bbox-1': 'x0', 'bbox-2': 'y1', 'bbox-3': 'x1'})
+        df['zi'] = df['intensity_mean'].round().astype('int')
+
+    else:  # objects' z-coordinates come from arg of max count in object identities map
+        df = pd.DataFrame(regionprops_table(
+            acc_obj_ids.data_xyz,
+            properties=('label', 'area', 'bbox')
+        )).rename(columns={
+            'bbox-0': 'y0', 'bbox-1': 'x0', 'bbox-2': 'z0', 'bbox-3': 'y1', 'bbox-4': 'x1', 'bbox-5': 'z1'
+        })
+
+        def _get_zi_from_label(la):
+            return acc_obj_ids.apply(lambda x: x == la).get_focus_vector().argmax()
+
+        df['zi'] = df['label'].apply(_get_zi_from_label)
+
+    df = df_insert_slices(df, acc_raw.shape_dict, expand_box_by)
+
+    def _make_binary_mask(r):
+        acc = InMemoryDataAccessor(acc_obj_ids.data == r.label)
+        cropped = acc.get_mono(0, mip=True).crop_hw((r.y0, r.x0, (r.y1 - r.y0), (r.x1 - r.x0))).data_xy
+        return cropped
+
+
+    df['binary_mask'] = df.apply(
+        _make_binary_mask,
+        axis=1,
+        result_type='reduce',
+    )
+    return df
+
+
+def df_insert_slices(df: pd.DataFrame, sd: dict, expand_box_by) -> pd.DataFrame:
+    h = sd['Y']
+    w = sd['X']
+    nz = sd['Z']
+
+    df['h'] = df['y1'] - df['y0']
+    df['w'] = df['x1'] - df['x0']
+    ebxy, ebz = expand_box_by
+    df['ebb_y0'] = (df.y0 - ebxy).apply(lambda x: max(x, 0))
+    df['ebb_y1'] = (df.y1 + ebxy).apply(lambda x: min(x, h))
+    df['ebb_x0'] = (df.x0 - ebxy).apply(lambda x: max(x, 0))
+    df['ebb_x1'] = (df.x1 + ebxy).apply(lambda x: min(x, w))
+    df['ebb_z0'] = (df.zi - ebz).apply(lambda x: max(x, 0))
+    df['ebb_z1'] = (df.zi + ebz).apply(lambda x: min(x, nz))
+    df['ebb_h'] = df['ebb_y1'] - df['ebb_y0']
+    df['ebb_w'] = df['ebb_x1'] - df['ebb_x0']
+    df['ebb_nz'] = df['ebb_z1'] - df['ebb_z0'] + 1
+
+    # compute relative bounding boxes
+    df['rel_y0'] = df.y0 - df.ebb_y0
+    df['rel_y1'] = df.y1 - df.ebb_y0
+    df['rel_x0'] = df.x0 - df.ebb_x0
+    df['rel_x1'] = df.x1 - df.ebb_x0
+
+    assert np.all(df['rel_x1'] <= (df['ebb_x1'] - df['ebb_x0']))
+    assert np.all(df['rel_y1'] <= (df['ebb_y1'] - df['ebb_y0']))
+
+    df['slice'] = df.apply(
+        lambda r:
+        np.s_[int(r.y0): int(r.y1), int(r.x0): int(r.x1), :, int(r.zi): int(r.zi + 1)],
+        axis=1,
+        result_type='reduce',
+    )
+    df['expanded_slice'] = df.apply(
+        lambda r:
+        np.s_[int(r.ebb_y0): int(r.ebb_y1), int(r.ebb_x0): int(r.ebb_x1), :, int(r.ebb_z0): int(r.ebb_z1) + 1],
+        axis=1,
+        result_type='reduce',
+    )
+    df['relative_slice'] = df.apply(
+        lambda r:
+        np.s_[int(r.rel_y0): int(r.rel_y1), int(r.rel_x0): int(r.rel_x1), :, :],
+        axis=1,
+        result_type='reduce',
+    )
+    return df
+
+
+def safe_add(a, g, b):
    assert a.dtype == b.dtype
    assert a.shape == b.shape
    assert g >= 0.0
 @@ -120,45 +330,125 @@ def _safe_add(a, g, b):
        np.iinfo(a.dtype).max
    ).astype(a.dtype)

+def make_object_ids_from_df(df: pd.DataFrame, sd: dict) -> InMemoryDataAccessor:
+    id_mask = np.zeros((sd['Y'], sd['X'], 1, sd['Z']), dtype='uint16')
+    if 'binary_mask' not in df.columns:
+        raise MissingSegmentationError('RoiSet dataframe does not contain segmentation')
+
+    def _label_obj(r):
+        sl = np.s_[r.y0:r.y1, r.x0:r.x1, :, r.zi:r.zi + 1]
+        mask = np.expand_dims(r.binary_mask, (2, 3))
+        id_mask[sl] = id_mask[sl] + r.label * mask
+
+    df.apply(_label_obj, axis=1)
+    return InMemoryDataAccessor(id_mask)
+

 class RoiSet(object):

    def __init__(
            self,
            acc_raw: GenericImageDataAccessor,
-            acc_obj_ids: GenericImageDataAccessor,
+            df: pd.DataFrame,
            params: RoiSetMetaParams = RoiSetMetaParams(),
    ):
        """
        A set of regions of interest, referenced by their positions and contours in the YXCZ space of stack acc_raw.
        RoiSet contains their internal state, which may be exported as patches, maps, and other products by export methods.
        :param acc_raw: accessor to a generally a multichannel z-stack
-        :param acc_obj_ids: accessor to a 2D single-channel object identities map, where each pixel's intensity
-            labels its membership in a connected object
+        :param df: dataframe containing at minimum bounding box and segmentation mask information
        :param params: optional arguments that influence the definition and representation of ROIs
        """
-        assert acc_obj_ids.chroma == 1
-        self.acc_obj_ids = acc_obj_ids
        self.acc_raw = acc_raw
        self.accs_derived = []
        self.params = params

-        self._df = self.filter_df(
-            self.make_df(
-                self.acc_raw, self.acc_obj_ids, expand_box_by=params.expand_box_by
-            ),
-            params.filters,
-        )
-
+        self._df = df
        self.count = len(self._df)
-        self.object_class_maps = {}  # classification results

    def __iter__(self):
        """Expose ROI meta information via the Pandas.DataFrame API"""
        return self._df.itertuples(name='Roi')

    @staticmethod
-    def from_segmentation(
+    def from_object_ids(
+            acc_raw: GenericImageDataAccessor,
+            acc_obj_ids: GenericImageDataAccessor,
+            params: RoiSetMetaParams = RoiSetMetaParams(),
+    ):
+        """
+
+        :param acc_raw:
+        :param acc_obj_ids: accessor to a 2D single-channel object identities map, where each pixel's intensity
+            labels its membership in a connected object
+        :param params:
+        :return:
+        """
+        assert acc_obj_ids.chroma == 1
+
+        df = filter_df(
+            make_df_from_object_ids(
+                acc_raw, acc_obj_ids,
+                expand_box_by=params.expand_box_by,
+                deproject_channel=params.deproject_channel,
+            ),
+            params.filters,
+        )
+
+        return RoiSet(acc_raw, df, params)
+
+    @staticmethod
+    def from_bounding_boxes(
+        acc_raw: GenericImageDataAccessor,
+        bbox_yxhw: List[Dict],
+        bbox_zi: Union[List[int], int] = None,
+        params: RoiSetMetaParams = RoiSetMetaParams()
+    ):
+        bbox_df = pd.DataFrame(bbox_yxhw)
+        if list(bbox_df.columns.str.upper().sort_values()) != ['H', 'W', 'X', 'Y']:
+            raise BoundingBoxError(f'Expecting bounding box coordinates Y, X, H, and W, not {list(bbox_df.columns)}')
+
+
+        # deproject if zi is not specified
+        if bbox_zi is None:
+            dch = params.deproject_channel
+            if dch is None or dch >= acc_raw.chroma or dch < 0:
+                if acc_raw.chroma == 1:
+                    dch = 0
+                else:
+                    raise NoDeprojectChannelSpecifiedError(
+                        f'When labeling objects, either their z-coordinates or a valid deprojection channel are required.'
+                    )
+            bbox_df['zi'] = acc_raw.get_mono(dch).get_focus_vector().argmax()
+        else:
+            bbox_df['zi'] = bbox_zi
+
+        bbox_df['y0'] = bbox_df['y']
+        bbox_df['x0'] = bbox_df['x']
+        bbox_df['y1'] = bbox_df['y0'] + bbox_df['h']
+        bbox_df['x1'] = bbox_df['x0'] + bbox_df['w']
+        bbox_df['label'] = bbox_df.index
+
+
+        df = df_insert_slices(
+            bbox_df[['y0', 'x0', 'y1', 'x1', 'zi', 'label']],
+            acc_raw.shape_dict,
+            params.expand_box_by,
+        )
+
+        def _make_binary_mask(r):
+            return np.ones((r.h, r.w), dtype=bool)
+
+        df['binary_mask'] = df.apply(
+            _make_binary_mask,
+            axis=1,
+            result_type='reduce',
+        )
+        return RoiSet(acc_raw, df, params)
+
+
+    @staticmethod
+    def from_binary_mask(
            acc_raw: GenericImageDataAccessor,
            acc_seg: GenericImageDataAccessor,
            allow_3d=False,
 @@ -169,102 +459,43 @@ class RoiSet(object):
        Create a RoiSet from a binary segmentation mask (either 2D or 3D)
        :param acc_raw: accessor to a generally a multichannel z-stack
        :param acc_seg: accessor of a binary segmentation mask (mono) of either two or three dimensions
-        :param allow_3d: return a 3D map if True; return a 2D map of the mask's maximum intensity project if False
+        :param allow_3d: use a 3D map if True; use a 2D map of the mask's maximum intensity project if False
        :param connect_3d: objects can span multiple z-positions if True; objects are unique to a single z if False
        :param params: optional arguments that influence the definition and representation of ROIs
-        :return: object identities map
        """
-        return RoiSet(acc_raw, _get_label_ids(acc_seg, allow_3d=allow_3d, connect_3d=connect_3d), params)
+        return RoiSet.from_object_ids(
+            acc_raw,
+            get_label_ids(
+                acc_seg,
+                allow_3d=allow_3d,
+                connect_3d=connect_3d
+            ),
+            params
+        )

    @staticmethod
-    def make_df(acc_raw, acc_obj_ids, expand_box_by) -> pd.DataFrame:
+    def from_polygons_2d(
+            acc_raw,
+            polygons: List[np.ndarray],
+            params: RoiSetMetaParams = RoiSetMetaParams()
+    ):
        """
-        Build dataframe associate object IDs with summary stats
-        :param acc_raw: accessor to raw image data
-        :param acc_obj_ids: accessor to map of object IDs
-        :param expand_box_by: number of pixels to expand bounding box in all directions (without exceeding image boundary)
-        # :param deproject: assign object's z-position based on argmax of raw data if True
-        :return: pd.DataFrame
+        Create a RoiSet where objects are defined from a list of polygon coordinates
+        :param acc_raw: accessor to a generally a multichannel z-stack
+        :param polygons: list of (variable x 2) np.ndarrays describing (x, y) polymer coordinates
+        :param params: optional arguments that influence the definition and representation of ROIs
        """
-        # build dataframe of objects, assign z index to each object
-
-        if acc_obj_ids.nz == 1:  # deproject objects' z-coordinates from argmax of raw image
-            df = pd.DataFrame(regionprops_table(
-                acc_obj_ids.data[:, :, 0, 0],
-                intensity_image=acc_raw.data.argmax(axis=3, keepdims=True)[:, :, 0, 0].astype('uint16'),
-                properties=('label', 'area', 'intensity_mean', 'bbox', 'centroid')
-            )).rename(columns={'bbox-0': 'y0', 'bbox-1': 'x0', 'bbox-2': 'y1', 'bbox-3': 'x1'})
-            df['zi'] = df['intensity_mean'].round().astype('int')
-
-        else:  # objects' z-coordinates come from arg of max count in object identities map
-            df = pd.DataFrame(regionprops_table(
-                acc_obj_ids.data[:, :, 0, :],
-                properties=('label', 'area', 'bbox', 'centroid')
-            )).rename(columns={
-                'bbox-0': 'y0', 'bbox-1': 'x0', 'bbox-2': 'z0', 'bbox-3': 'y1', 'bbox-4': 'x1', 'bbox-5': 'z1'
-            })
-            df['zi'] = df['label'].apply(lambda x: (acc_obj_ids.data == x).sum(axis=(0, 1, 2)).argmax())
-
-        # compute expanded bounding boxes
-        h, w, c, nz = acc_raw.shape
-        df['h'] = df['y1'] - df['y0']
-        df['w'] = df['x1'] - df['x0']
-        ebxy, ebz = expand_box_by
-        df['ebb_y0'] = (df.y0 - ebxy).apply(lambda x: max(x, 0))
-        df['ebb_y1'] = (df.y1 + ebxy).apply(lambda x: min(x, h))
-        df['ebb_x0'] = (df.x0 - ebxy).apply(lambda x: max(x, 0))
-        df['ebb_x1'] = (df.x1 + ebxy).apply(lambda x: min(x, w))
-        df['ebb_z0'] = (df.zi - ebz).apply(lambda x: max(x, 0))
-        df['ebb_z1'] = (df.zi + ebz).apply(lambda x: min(x, nz))
-        df['ebb_h'] = df['ebb_y1'] - df['ebb_y0']
-        df['ebb_w'] = df['ebb_x1'] - df['ebb_x0']
-        df['ebb_nz'] = df['ebb_z1'] - df['ebb_z0'] + 1
-
-        # compute relative bounding boxes
-        df['rel_y0'] = df.y0 - df.ebb_y0
-        df['rel_y1'] = df.y1 - df.ebb_y0
-        df['rel_x0'] = df.x0 - df.ebb_x0
-        df['rel_x1'] = df.x1 - df.ebb_x0
-
-        assert np.all(df['rel_x1'] <= (df['ebb_x1'] - df['ebb_x0']))
-        assert np.all(df['rel_y1'] <= (df['ebb_y1'] - df['ebb_y0']))
-
-        df['slice'] = df.apply(
-            lambda r:
-            np.s_[int(r.y0): int(r.y1), int(r.x0): int(r.x1), :, int(r.zi): int(r.zi + 1)],
-            axis=1,
-            result_type='reduce',
-        )
-        df['expanded_slice'] = df.apply(
-            lambda r:
-            np.s_[int(r.ebb_y0): int(r.ebb_y1), int(r.ebb_x0): int(r.ebb_x1), :, int(r.ebb_z0): int(r.ebb_z1) + 1],
-            axis=1,
-            result_type='reduce',
-        )
-        df['relative_slice'] = df.apply(
-            lambda r:
-            np.s_[int(r.rel_y0): int(r.rel_y1), int(r.rel_x0): int(r.rel_x1), :, :],
-            axis=1,
-            result_type='reduce',
-        )
-        df['binary_mask'] = df.apply(
-            lambda r: (acc_obj_ids.data == r.label).max(axis=-1)[r.y0: r.y1, r.x0: r.x1, 0],
-            axis=1,
-            result_type='reduce',
+        mask = np.zeros(acc_raw.get_mono(0, mip=True).shape, dtype=bool)
+        for p in polygons:
+            sl = draw.polygon(p[:, 1], p[:, 0])
+            mask[sl] = True
+        return RoiSet.from_binary_mask(
+            acc_raw,
+            InMemoryDataAccessor(mask),
+            allow_3d=False,
+            connect_3d=False,
+            params=params,
        )
-        return df
-
-    @staticmethod
-    def filter_df(df: pd.DataFrame, filters: RoiFilter = None) -> pd.DataFrame:
-        query_str = 'label > 0'  # always true
-        if filters is not None:  # parse filters
-            for k, val in filters.dict(exclude_unset=True).items():
-                assert k in ('area')
-                vmin = val['min']
-                vmax = val['max']
-                assert vmin >= 0
-                query_str = query_str + f' & {k} > {vmin} & {k} < {vmax}'
-        return df.loc[df.query(query_str).index, :]

    def get_df(self) -> pd.DataFrame:
        return self._df
 @@ -275,19 +506,6 @@ class RoiSet(object):
    def add_df_col(self, name, se: pd.Series) -> None:
        self._df[name] = se

-    def get_multichannel_projection(self):
-        if self.count:
-            projected = project_stack_from_focal_points(
-                self._df['centroid-0'].to_numpy(),
-                self._df['centroid-1'].to_numpy(),
-                self._df['zi'].to_numpy(),
-                self.acc_raw,
-                degree=4,
-            )
-        else:  # else just return MIP
-            projected = self.acc_raw.data.max(axis=-1)
-        return projected
-
    def get_patches_acc(self, channels: list = None, **kwargs) -> PatchStack:  # padded, un-annotated 2d patches
        if channels and len(channels) == 1:
            patches_df = self.get_patches(white_channel=channels[0], **kwargs)
 @@ -301,7 +519,7 @@ class RoiSet(object):
        write_accessor_data_to_file(fp, annotated)
        return (prefix + '.tif')

-    def get_zmask(self, mask_type='boxes'):
+    def get_zmask(self, mask_type='boxes') -> np.ndarray:
        """
        Return a mask of same dimensionality as raw data

 @@ -352,7 +570,7 @@ class RoiSet(object):
        :param channels: list of nc raw input channels to send to classifier
        :param object_classification_model: InstanceSegmentation model object
        :param derived_channel_functions: list of functions that each receive a PatchStack accessor with nc channels and
-            return a single-channel PatchStack accessor of the same shape
+            that return a single-channel PatchStack accessor of the same shape
        :return: None
        """

 @@ -385,26 +603,67 @@ class RoiSet(object):
            self.get_patch_masks_acc(expanded=False, pad_to=None)
        )

-        om = np.zeros(self.acc_obj_ids.shape, self.acc_obj_ids.dtype)
+        se = pd.Series(dtype='Int64', index=self._df.index)

-        self._df['classify_by_' + name] = pd.Series(dtype='Int64')
-
-        # assign labels to object map:
        for i, roi in enumerate(self):
            oc = np.unique(
                mask_largest_object(
                    obmap_patches.iat(i).data
                )
            )[-1]
-            self._df.loc[roi.Index, 'classify_by_' + name] = oc
-            om[self.acc_obj_ids.data == roi.label] = oc
-        self.object_class_maps[name] = InMemoryDataAccessor(om)
+            se[roi.Index] = oc
+        self.set_classification(f'classify_by_{name}', se)


-    def export_dataframe(self, csv_path: Path) -> str:
-        csv_path.parent.mkdir(parents=True, exist_ok=True)
-        self._df.drop(['expanded_slice', 'slice', 'relative_slice', 'binary_mask'], axis=1).to_csv(csv_path, index=False)
-        return csv_path.name
+    def get_instance_classification(self, roiset_from: Self, iou_min: float = 0.5) -> pd.DataFrame:
+        """
+        Transfer instance classification labels from another RoiSet based on intersection over union (IOU) similarity
+        :param roiset_from: RoiSet source of classification labels, same shape as this RoiSet
+        :param iou_min: threshold IOU below which a label is not transferred
+        :return DataFrame of source RoiSet, including overlaps with this RoiSet and IOU metric
+        """
+        if self.acc_raw.shape != roiset_from.acc_raw.shape:
+            raise ShapeMismatchError(
+                f'Expecting two RoiSets of same shape: {self.acc_raw.shape} != {roiset_from.acc_raw.shape}')
+
+        columns = [f'classify_by_{c}' for c in roiset_from.classification_columns]
+
+        if len(columns) == 0:
+            raise MissingInstanceLabelsError('Expecting at least on instance classification channel but none found')
+
+        df_overlaps = filter_df_overlap_seg(
+            roiset_from.get_df(),
+            self.get_df()
+        )
+        df_overlaps['transfer'] = df_overlaps.seg_iou > iou_min
+        df_merge = pd.merge(
+            roiset_from.get_df()[columns],
+            df_overlaps.loc[df_overlaps.transfer, ['overlaps_with']],
+            left_index=True,
+            right_index=True,
+            how='inner',
+        ).set_index('overlaps_with')
+        for col in columns:
+            self.set_classification(col, df_merge[col])
+
+        return df_overlaps
+
+    def get_object_class_map(self, name: str) -> InMemoryDataAccessor:
+        """
+        For a given classification result, return a map where object IDs are replaced by each object's class
+        :param name: name of the classification result, same as passed to RoiSet.classify_by()
+        :return: accessor of object class map
+        """
+        colname = ('classify_by_' + name)
+        assert colname in self._df.columns
+        obj_ids = self.acc_obj_ids
+        om = np.zeros(obj_ids.shape, obj_ids.dtype)
+
+        def _label_object_class(roi):
+            om[self.acc_obj_ids.data == roi.label] = roi[colname]
+        self._df.apply(_label_object_class, axis=1)
+
+        return InMemoryDataAccessor(om)


    def export_patch_masks(self, where: Path, pad_to: int = None, prefix='mask', expanded=False) -> pd.DataFrame:
 @@ -446,15 +705,13 @@ class RoiSet(object):
                patch = np.zeros((roi.ebb_h, roi.ebb_w, 1, 1), dtype='uint8')
                patch[roi.relative_slice][:, :, 0, 0] = roi.binary_mask * 255
            else:
-                patch = np.zeros((roi.y1 - roi.y0, roi.x1 - roi.x0, 1, 1), dtype='uint8')
-                patch[:, :, 0, 0] = roi.binary_mask * 255
-
+                patch = (roi.binary_mask * 255).astype('uint8')
            if pad_to:
                patch = pad(patch, pad_to)
-            return patch
+            return np.expand_dims(patch, (2, 3))

        dfe = self._df.copy()
-        dfe['patch_mask'] = dfe.apply(lambda r: _make_patch_mask(r), axis=1)
+        dfe['patch_mask'] = dfe.apply(_make_patch_mask, axis=1)
        return dfe

    def get_patch_masks_acc(self, **kwargs) -> PatchStack:
 @@ -482,7 +739,8 @@ class RoiSet(object):

            if white_channel:
                assert white_channel < raw.chroma
-                stack = raw.data[:, :, [white_channel, white_channel, white_channel], :]
+                mono = raw.get_mono(white_channel).data_xyz
+                stack = np.stack([mono, mono, mono], axis=2)
            else:
                stack = np.zeros([*raw.shape[0:2], 3, raw.shape[3]], dtype=raw.dtype)

 @@ -491,10 +749,10 @@ class RoiSet(object):
                    continue
                assert isinstance(ci, int)
                assert ci < raw.chroma
-                stack[:, :, ii, :] = _safe_add(
+                stack[:, :, ii, :] = safe_add(
                    stack[:, :, ii, :],  # either black or grayscale channel
                    rgb_overlay_weights[ii],
-                    raw.data[:, :, ci, :]
+                    raw.get_mono(ci).data_xyz
                )
        else:
            if white_channel is not None:  # interpret as just a single channel
 @@ -509,9 +767,10 @@ class RoiSet(object):
                        annotate_rgb = True
                        break
                if annotate_rgb:  # make RGB patches anyway to include annotation color
-                    stack = raw.data[:, :, [white_channel, white_channel, white_channel], :]
+                    mono = raw.get_mono(white_channel).data_xyz
+                    stack = np.stack([mono, mono, mono], axis=2)
                else:  # make monochrome patches
-                    stack = raw.data[:, :, [white_channel], :]
+                    stack = raw.get_mono(white_channel).data
            elif kwargs.get('channels'):
                stack = raw.get_channels(kwargs['channels']).data
            else:
 @@ -533,7 +792,7 @@ class RoiSet(object):

            # make a 2d patch, find optimal z-position determined by focus_metric function on each channel separately
            elif focus_metric is not None:
-                foc = _focus_metrics()[focus_metric]
+                foc = focus_metrics()[focus_metric]

                patch = np.zeros([ph, pw, pc, 1], dtype=patch3d.dtype)

 @@ -593,6 +852,22 @@ class RoiSet(object):
        dfe['patch'] = dfe.apply(lambda r: _make_patch(r), axis=1)
        return dfe

+    @property
+    def classification_columns(self):
+        """
+        Return list of columns that describe instance classification results
+        """
+        pr = 'classify_by_'
+        return [c.split(pr)[1] for c in self._df.columns if c.startswith(pr)]
+
+    def set_classification(self, colname: str, se: pd.Series):
+        """
+        Set instance classification result as a column addition on dataframe
+        :param colname: name of classification result
+        :param se: series containing class information
+        """
+        self._df[colname] = se
+
    def run_exports(self, where: Path, channel, prefix, params: RoiSetExportParams) -> dict:
        """
        Export various representations of ROIs, e.g. patches, annotated stacks, and object maps.
 @@ -633,10 +908,10 @@ class RoiSet(object):
            if k == 'annotated_zstacks':
                record[k] = str(Path(k) / self.export_annotated_zstack(subdir, prefix=pr, **kp))
            if k == 'object_classes':
-                for kc, acc in self.object_class_maps.items():
-                    fp = subdir / kc / (pr + '.tif')
-                    write_accessor_data_to_file(fp, acc)
-                    record[f'{k}_{kc}'] = str(fp)
+                for n in self.classification_columns:
+                    fp = subdir / n / (pr + '.tif')
+                    write_accessor_data_to_file(fp, self.get_object_class_map(n))
+                    record[f'{k}_{n}'] = str(fp)
            if k == 'derived_channels':
                record[k] = []
                for di, dacc in enumerate(self.accs_derived):
 @@ -650,7 +925,7 @@ class RoiSet(object):

        return record

-    def serialize(self, where: Path, prefix='') -> dict:
+    def serialize(self, where: Path, prefix='roiset') -> dict:
        """
        Export the minimal information needed to recreate RoiSet object, i.e. CSV data file and tight patch masks
        :param where: path of directory in which to write files
 @@ -658,94 +933,136 @@ class RoiSet(object):
        :return: nested dict of Path objects describing the locations of export products
        """
        record = {}
-        df_exp = self.export_patch_masks(
-            where / 'tight_patch_masks',
-            prefix=prefix,
-            pad_to=None,
-            expanded=False
-        )
-        se_pa = df_exp.patch_mask_path.apply(
-            lambda x: str(Path('tight_patch_masks') / x)
-        ).rename('tight_patch_masks_path')
-        self._df = self._df.join(se_pa)
-        df_fn = self.export_dataframe(where / 'dataframe' / (prefix + '.csv'))
-        record['dataframe'] = str(Path('dataframe') / df_fn)
-        record['tight_patch_masks'] = list(se_pa)
+        if not self._df.binary_mask.apply(lambda x: np.all(x)).all():  # binary masks aren't just all True
+            df_exp = self.export_patch_masks(
+                where / 'tight_patch_masks',
+                prefix=prefix,
+                pad_to=None,
+                expanded=False
+            )
+            # record patch masks paths to dataframe, then save static columns to CSV
+            se_pa = df_exp.patch_mask_path.apply(
+                lambda x: str(Path('tight_patch_masks') / x)
+            ).rename('tight_patch_masks_path')
+            self._df = self._df.join(se_pa)
+            record['tight_patch_masks'] = list(se_pa)
+
+        csv_path = where / 'dataframe' / (prefix + '.csv')
+        csv_path.parent.mkdir(parents=True, exist_ok=True)
+        self._df.drop(
+            ['expanded_slice', 'slice', 'relative_slice', 'binary_mask'],
+            axis=1
+        ).to_csv(csv_path, index=False)
+
+        record['dataframe'] = str(Path('dataframe') / csv_path.name)
+
        return record

+
+    def get_polygons(self, poly_threshold=0, dilation_radius=1) -> pd.DataFrame:
+        self.coordinates_ = """
+        Fit polygons to all object boundaries in the RoiSet
+        :param poly_threshold: threshold distance for polygon fit; a smaller number follows sharp features more closely
+        :param dilation_radius: radius of binary dilation to apply before fitting polygon
+        :return: Series of (variable x 2) np.ndarrays describing (x, y) polymer coordinates
+        """
+
+        pad_to = 1
+
+        def _poly_from_mask(roi):
+            mask = roi.binary_mask
+            if len(mask.shape) != 2:
+                raise PatchMaskShapeError(f'Patch mask needs to be two dimensions to fit a polygon')
+
+            # label and fill holes
+            labeled = label(mask)
+            filled = [rp.image_filled for rp in regionprops(labeled)]
+            assert (np.unique(labeled)[-1] == 1) and (len(filled) == 1), 'Cannot fit multiple polygons in a single patch mask'
+
+            closed = binary_dilation(filled[0], footprint=disk(dilation_radius))
+            padded = np.pad(closed, pad_to) * 1.0
+            all_contours = find_contours(padded)
+
+            nc = len(all_contours)
+            for j in range(0, nc):
+                if all([points_in_poly(all_contours[k], all_contours[j]).all() for k in range(0, nc)]):
+                    contour = all_contours[j]
+                    break
+
+            rel_polygon = approximate_polygon(contour[:, [1, 0]], poly_threshold) - [pad_to, pad_to]
+            return rel_polygon + [roi.x0, roi.y0]
+
+        return self._df.apply(_poly_from_mask, axis=1)
+
+    @property
+    def acc_obj_ids(self):
+        return make_object_ids_from_df(self._df, self.acc_raw.shape_dict)
+
    @staticmethod
-    def deserialize(acc_raw: GenericImageDataAccessor, where: Path, prefix=''):
+    def deserialize(acc_raw: GenericImageDataAccessor, where: Path, prefix='roiset') -> Self:
+        """
+        Create an RoiSet object from saved files and an image accessor
+        :param acc_raw: accessor to image that contains ROIs
+        :param where: path to directory containing RoiSet serialization files, namely dataframe.csv and a subdirectory
+            named tight_patch_masks
+        :param prefix: starting prefix of patch mask filenames
+        :return: RoiSet object
+        """
        df = pd.read_csv(where / 'dataframe' / (prefix + '.csv'))[['label', 'zi', 'y0', 'y1', 'x0', 'x1']]
-
-        id_mask = np.zeros((*acc_raw.hw, 1, acc_raw.nz), dtype='uint16')
-        def _label_obj(r):
-            sl = np.s_[r.y0:r.y1, r.x0:r.x1, :, r.zi:r.zi + 1]
-            ext = 'png'
-            fname = f'{prefix}-la{r.label:04d}-zi{r.zi:04d}.{ext}'
-            try:
-                ma_acc = generate_file_accessor(where / 'tight_patch_masks' / fname)
-                bool_mask = ma_acc.data / np.iinfo(ma_acc.data.dtype).max
-                id_mask[sl] = id_mask[sl] + r.label * bool_mask
-            except Exception as e:
-                raise DeserializeRoiSet(e)
-
-        df.apply(_label_obj, axis=1)
-        return RoiSet(acc_raw, InMemoryDataAccessor(id_mask))
-
-
-def project_stack_from_focal_points(
-        xx: np.ndarray,
-        yy: np.ndarray,
-        zz: np.ndarray,
-        stack: GenericImageDataAccessor,
-        degree: int = 2,
-) -> np.ndarray:
-    """
-    Given a set of 3D points, project a multichannel z-stack based on a surface fit of the provided points
-    :param xx: vector of point x-coordinates
-    :param yy: vector of point y-coordinates
-    :param zz: vector of point z-coordinates
-    :param stack: z-stack to project
-    :param degree: order of polynomial to fit
-    :return: multichannel 2d projected image array
-    """
-    assert xx.shape == yy.shape
-    assert xx.shape == zz.shape
-
-    poly = PolynomialFeatures(degree=degree)
-    X = np.stack([xx, yy]).T
-    features = poly.fit_transform(X, zz)
-    model = LinearRegression(fit_intercept=False)
-    model.fit(features, zz)
-
-    xy_indices = np.indices(stack.hw).reshape(2, -1).T
-    xy_features = np.dot(
-        poly.fit_transform(xy_indices, zz),
-        model.coef_
-    )
-    zi_image = xy_features.reshape(
-        stack.hw
-    ).round().clip(
-        0, (stack.nz - 1)
-    ).astype('uint16')
-
-    return np.take_along_axis(
-        stack.data,
-        np.repeat(
-            np.expand_dims(zi_image, (2, 3)),
-            stack.chroma,
-            axis=2
-        ),
-        axis=3
-    )
+        pa_masks = where / 'tight_patch_masks'
+
+        if pa_masks.exists():  # import segmentation masks
+            def _read_binary_mask(r):
+                ext = 'png'
+                fname = f'{prefix}-la{r.label:04d}-zi{r.zi:04d}.{ext}'
+                try:
+                    ma_acc = generate_file_accessor(pa_masks / fname)
+                    assert ma_acc.chroma == 1 and ma_acc.nz == 1
+                    mask_data = ma_acc.data_xy / np.iinfo(ma_acc.data.dtype).max
+                    return mask_data
+                except Exception as e:
+                    raise DeserializeRoiSet(e)
+
+            df['binary_mask'] = df.apply(_read_binary_mask, axis=1)
+            id_mask = make_object_ids_from_df(df, acc_raw.shape_dict)
+            return RoiSet.from_object_ids(acc_raw, id_mask)
+
+        else:  # assume bounding boxes only
+            df['y'] = df['y0']
+            df['x'] = df['x0']
+            df['h'] = df['y1'] - df['y0']
+            df['w'] = df['x1'] - df['x0']
+            return RoiSet.from_bounding_boxes(
+                acc_raw,
+                df[['y', 'x', 'h', 'w']].to_dict(orient='records'),
+                list(df['zi'])
+            )



 class Error(Exception):
    pass

+class BoundingBoxError(Error):
+    pass
+
 class DeserializeRoiSet(Error):
    pass

+class NoDeprojectChannelSpecifiedError(Error):
+    pass
+
 class DerivedChannelError(Error):
+    pass
+
+class MissingSegmentationError(Error):
+    pass
+
+class PatchMaskShapeError(Error):
+    pass
+
+class ShapeMismatchError(Error):
+    pass
+
+class MissingInstanceLabelsError(Error):
    pass
+\ No newline at end of file