workflows.py

from pathlib import Path
from typing import Dict, List
from uuid import uuid4

import numpy as np
import pandas as pd

from skimage.measure import label, regionprops_table
from skimage.morphology import dilation
from sklearn.model_selection import train_test_split

from extensions.chaeo.accessors import MonoPatchStack
from extensions.chaeo.annotators import draw_boxes_on_3d_image
from extensions.chaeo.models import PatchStackObjectClassifier
from extensions.chaeo.params import RoiSetExportParams
from extensions.chaeo.process import mask_largest_object
from extensions.chaeo.products import export_patches_from_zstack, export_patch_masks_from_zstack, export_multichannel_patches_from_zstack, get_patches_from_zmask_meta, get_patch_masks_from_zmask_meta
from extensions.chaeo.zmask import project_stack_from_focal_points, RoiSet
from extensions.ilastik.models import IlastikPixelClassifierModel

from model_server.accessors import generate_file_accessor, InMemoryDataAccessor, write_accessor_data_to_file
from model_server.models import Model, InstanceSegmentationModel, SemanticSegmentationModel
from model_server.process import rescale
from model_server.workflows import Timer

# def get_zmask_meta(
#     input_file_path: str,
#     ilastik_pixel_classifier: IlastikPixelClassifierModel,
#     segmentation_channel: int,
#     pxmap_threshold: float,
#     pxmap_foreground_channel: int = 0,
#     zmask_zindex: int = None,
#     zmask_clip: int = None,
#     zmask_filters: Dict = None,
#     zmask_type: str = 'boxes',
#     **kwargs,
# ) -> tuple:
#     ti = Timer()
#     stack = generate_file_accessor(Path(input_file_path))
#     fstem = Path(input_file_path).stem
#     ti.click('file_input')
#
#     # MIP if no zmask z-index is given, then classify pixels
#     if isinstance(zmask_zindex, int):
#         assert 0 < zmask_zindex < stack.nz
#         zmask_data = stack.get_one_channel_data(channel=segmentation_channel).data[:, :, :, zmask_zindex]
#     else:
#         zmask_data = stack.get_one_channel_data(channel=segmentation_channel).data.max(axis=-1, keepdims=True)
#     if zmask_clip:
#         zmask_data = rescale(zmask_data, zmask_clip)
#     mip = InMemoryDataAccessor(
#         zmask_data,
#     )
#     pxmap, _ = ilastik_pixel_classifier.infer(mip)
#     ti.click('infer_pixel_probability')
#
#     obmask = InMemoryDataAccessor(
#         pxmap.data > pxmap_threshold
#     )
#     ti.click('threshold_pixel_mask')
#
#     # make zmask
#     obj_table = ZMaskObjectTable(
#         obmask.get_one_channel_data(pxmap_foreground_channel),
#         stack.get_one_channel_data(segmentation_channel),
#         mask_type=zmask_type,
#         filters=zmask_filters,
#         expand_box_by=kwargs['zmask_expand_box_by'],
#     )
#     ti.click('generate_zmasks')
#
#     # record pixel scale
#     obj_table.df['pixel_scale_in_micrometers'] = float(stack.pixel_scale_in_micrometers.get('X'))
#
#     return ti, stack, fstem, obmask, pxmap, obj_table


# # called by batch runners
# def export_patches_from_multichannel_zstack(
#         input_file_path: str,
#         output_folder_path: str,
#         models: List[Model],
#         pxmap_threshold: float,
#         pxmap_foreground_channel: int,
#         segmentation_channel: int,
#         patches_channel: int,
#         zmask_zindex: int = None,  # None for MIP,
#         zmask_clip: int = None,
#         zmask_type: str = 'boxes',
#         zmask_filters: Dict = None,
#         zmask_expand_box_by: int = None,
#         export_pixel_probabilities=True,
#         export_2d_patches_for_training=True,
#         export_2d_patches_for_annotation=True,
#         draw_bounding_box_on_2d_patch=True,
#         draw_contour_on_2d_patch=False,
#         draw_mask_on_2d_patch=False,
#         export_3d_patches=True,
#         export_annotated_zstack=True,
#         draw_label_on_zstack=False,
#         export_patch_masks=True,
#         rgb_overlay_channels=(None, None, None),
#         rgb_overlay_weights=(1.0, 1.0, 1.0),
# ) -> Dict:
#     pixel_classifier = models[0]
#
#     # ti, stack, fstem, obmask, pxmap, obj_table = get_zmask_meta(
#     #     input_file_path,
#     #     pixel_classifier,
#     #     segmentation_channel,
#     #     pxmap_threshold,
#     #     pxmap_foreground_channel=pxmap_foreground_channel,
#     #     zmask_zindex=zmask_zindex,
#     #     zmask_clip=zmask_clip,
#     #     zmask_expand_box_by=zmask_expand_box_by,
#     #     zmask_filters=zmask_filters,
#     #     zmask_type=zmask_type,
#     # )
#
#     # obj_table = ZMaskObjectTable(
#     #     obmask.get_one_channel_data(pxmap_foreground_channel),
#     #     stack.get_one_channel_data(segmentation_channel),
#     #     mask_type=zmask_type,
#     #     filters=zmask_filters,
#     #     expand_box_by=kwargs['zmask_expand_box_by'],
#     # )
#
#     if export_pixel_probabilities:
#         write_accessor_data_to_file(
#             Path(output_folder_path) / 'pixel_probabilities' / (fstem + '.tif'),
#             pxmap
#         )
#         ti.click('export_pixel_probability')
#
#     if export_3d_patches and len(zmask_meta) > 0:
#         files = export_patches_from_zstack(
#             Path(output_folder_path) / '3d_patches',
#             stack.get_one_channel_data(patches_channel),
#             zmask_meta,
#             prefix=fstem,
#             draw_bounding_box=False,
#             rescale_clip=0.001,
#             make_3d=True,
#         )
#         ti.click('export_3d_patches')
#
#     if export_2d_patches_for_annotation and len(zmask_meta) > 0:
#         files = export_multichannel_patches_from_zstack(
#             Path(output_folder_path) / '2d_patches_annotation',
#             stack,
#             zmask_meta,
#             prefix=fstem,
#             rescale_clip=0.001,
#             make_3d=False,
#             focus_metric='max_sobel',
#             ch_white=patches_channel,
#             ch_rgb_overlay=rgb_overlay_channels,
#             draw_bounding_box=draw_bounding_box_on_2d_patch,
#             bounding_box_channel=1,
#             bounding_box_linewidth=2,
#             draw_contour=draw_contour_on_2d_patch,
#             draw_mask=draw_mask_on_2d_patch,
#             overlay_gain=rgb_overlay_weights,
#         )
#         df_patches = pd.DataFrame(files)
#         ti.click('export_2d_patches')
#         # associate 2d patches, dropping labeled objects that were not exported as patches
#         df = pd.merge(df, df_patches, left_index=True, right_on='df_index').drop(columns='df_index')
#         # prepopulate patch UUID
#         df['patch_id'] = df.apply(lambda _: uuid4(), axis=1)
#
#     if export_2d_patches_for_training and len(zmask_meta) > 0:
#         files = export_multichannel_patches_from_zstack(
#             Path(output_folder_path) / '2d_patches_training',
#             stack.get_one_channel_data(patches_channel),
#             zmask_meta,
#             prefix=fstem,
#             rescale_clip=0.001,
#             make_3d=False,
#             focus_metric='max_sobel',
#         )
#         ti.click('export_2d_patches')
#
#     if export_patch_masks and len(zmask_meta) > 0:
#         files = export_patch_masks_from_zstack(
#             Path(output_folder_path) / 'patch_masks',
#             stack.get_one_channel_data(patches_channel),
#             zmask_meta,
#             prefix=fstem,
#         )
#
#     if export_annotated_zstack:
#         annotated = InMemoryDataAccessor(
#             draw_boxes_on_3d_image(
#                 stack.get_one_channel_data(patches_channel).data,
#                 zmask_meta,
#                 add_label=draw_label_on_zstack,
#             )
#         )
#         write_accessor_data_to_file(
#             Path(output_folder_path) / 'annotated_zstacks' / (fstem + '.tif'),
#             annotated
#         )
#         ti.click('export_annotated_zstack')
#
#     # generate multichannel projection from label centroids
#     dff = df[df['keeper']]
#     if len(zmask_meta) > 0:
#         interm['projected'] = project_stack_from_focal_points(
#             dff['centroid-0'].to_numpy(),
#             dff['centroid-1'].to_numpy(),
#             dff['zi'].to_numpy(),
#             stack,
#             degree=4,
#         )
#     else: # else just return MIP
#         interm['projected'] = stack.data.max(axis=-1)
#
#     return {
#         'pixel_model_id': pixel_classifier.model_id,
#         'input_filepath': input_file_path,
#         'number_of_objects': len(zmask_meta),
#         'pixeL_scale_in_micrometers': stack.pixel_scale_in_micrometers,
#         'success': True,
#         'timer_results': ti.events,
#         'dataframe': df[df['keeper'] == True],
#         'interm': interm,
#     }

def infer_object_map_from_zstack(
        input_file_path: str,
        output_folder_path: str,
        models: List[Model],
        pxmap_foreground_channel: int,
        pxmap_threshold: float,
        segmentation_channel: int,
        patches_channel: int,
        zmask_zindex: int = None,  # None for MIP,
        zmask_clip: int = None,
        zmask_type: str = 'boxes',
        zmask_filters: Dict = None,
        # zmask_expand_box_by: int = None,
        exports: RoiSetExportParams = RoiSetExportParams(),
        **kwargs,
) -> Dict:
    assert len(models) == 2
    pixel_classifier = models[0]
    assert isinstance(pixel_classifier, SemanticSegmentationModel)
    object_classifier = models[1]
    assert isinstance(object_classifier, InstanceSegmentationModel)

    ti = Timer()
    stack = generate_file_accessor(Path(input_file_path))
    fstem = Path(input_file_path).stem
    ti.click('file_input')

    # MIP if no zmask z-index is given, then classify pixels
    if isinstance(zmask_zindex, int):
        assert 0 < zmask_zindex < stack.nz
        zmask_data = stack.get_one_channel_data(channel=segmentation_channel).data[:, :, :, zmask_zindex]
    else:
        zmask_data = stack.get_one_channel_data(channel=segmentation_channel).data.max(axis=-1, keepdims=True)
    if zmask_clip:
        zmask_data = rescale(zmask_data, zmask_clip)
    mip = InMemoryDataAccessor(
        zmask_data,
    )
    pxmap, _ = pixel_classifier.infer(mip)
    ti.click('infer_pixel_probability')

    if exports.pixel_probabilities:
        write_accessor_data_to_file(
            Path(output_folder_path) / 'pixel_probabilities' / (fstem + '.tif'),
            pxmap
        )
        ti.click('export_pixel_probability')

    obmask = InMemoryDataAccessor(
        pxmap.data > pxmap_threshold
    )
    ti.click('threshold_pixel_mask')

    # make zmask
    rois = RoiSet(
        obmask.get_one_channel_data(pxmap_foreground_channel),
        stack,
        mask_type=zmask_type,
        filters=zmask_filters,
        expand_box_by=exports.expand_box_by,
    )
    ti.click('generate_zmasks')

    # ti, stack, fstem, obmask, pxmap, obj_table = get_zmask_meta(
    #     input_file_path,
    #     pixel_classifier,
    #     segmentation_channel,
    #     pxmap_threshold,
    #     pxmap_foreground_channel=pxmap_foreground_channel,
    #     zmask_zindex=zmask_zindex,
    #     zmask_clip=zmask_clip,
    #     # zmask_expand_box_by=zmask_expand_box_by,
    #     zmask_filters=zmask_filters,
    #     zmask_type=zmask_type,
    #     **kwargs
    # )

    # # extract patches to accessor
    # patches_acc = get_patches_from_zmask_meta(
    #     stack.get_one_channel_data(patches_channel),
    #     obj_table.zmask_meta,
    #     rescale_clip=zmask_clip,
    #     make_3d=False,
    #     focus_metric='max_sobel',
    #     **kwargs
    # )
    #
    # # extract masks
    # patch_masks_acc = get_patch_masks_from_zmask_meta(
    #     stack,
    #     obj_table.zmask_meta,
    #     **kwargs
    # )

    # # send patches and mask stacks to object classifier
    # result_acc, _ = object_classifier.infer(patches_acc, patch_masks_acc)

    # labels_map = obj_table.interm['label_map']
    # output_map = np.zeros(labels_map.shape, dtype=labels_map.dtype)
    # assert labels_map.shape == obj_table.get_label_map().shape
    # assert labels_map.dtype == obj_table.get_label_map().dtype
    #
    # # assign labels to object map:
    # meta = []
    # for ii in range(0, len(obj_table.zmask_meta)):
    #     object_id = obj_table.zmask_meta[ii]['info'].label
    #     result_patch = mask_largest_object(result_acc.iat(ii))
    #     object_class = np.unique(result_patch)[1]
    #     output_map[labels_map == object_id] = object_class
    #     meta.append({'object_id': ii, 'object_class': object_id})

    object_class_map = rois.classify_by(patches_channel, object_classifier)

    # TODO: add ZMaskObjectTable method to export object map
    output_path = Path(output_folder_path) / ('obj_classes_' + (fstem + '.tif'))
    write_accessor_data_to_file(
        output_path,
        object_class_map
    )
    ti.click('export_object_classes')

    if exports.patches_3d:
            rois.export_3d_patches(
                Path(output_folder_path) / '3d_patches',
                fstem,
                patches_channel,
                exports.patches_3d
            )
    ti.click('export_3d_patches')

    if exports.patches_2d_for_annotation:
        rois.export_2d_patches_for_annotation(
            Path(output_folder_path) / '2d_patches_annotation',
            fstem,
            patches_channel,
            exports.patches_2d_for_annotation
        )
    ti.click('export_2d_patches_for_annotation')

    if exports.patches_2d_for_training:
        rois.export_2d_patches_for_training(
            Path(output_folder_path) / '2d_patches_training',
            fstem,
            patches_channel,
            exports.patches_2d_for_training
        )
    ti.click('export_2d_patches_for_training')

    if exports.patch_masks:
        rois.export_patch_masks(
            Path(output_folder_path) / 'patch_masks',
            fstem,
            patches_channel,
            exports.patch_masks
        )

    if exports.annotated_z_stack:
        rois.export_annotated_zstack(
            Path(output_folder_path) / 'patch_masks',
            fstem,
            patches_channel,
            exports.annotated_z_stack
        )
    ti.click('export_annotated_zstack')

    return {
        'timer_results': ti.events,
        'dataframe':     rois.df,
        'interm': {},
        'output_path': output_path.__str__(),
    }


def transfer_ecotaxa_labels_to_patch_stacks(
    where_masks: str,
    where_patches: str,
    object_csv: str,
    ecotaxa_tsv: str,
    where_output: str,
    patch_size: tuple = (256, 256),
    tr_split=0.6,
    dilate_label_mask: bool = True, # to mitigate connected components error in ilastik
    allow_multiple_objects: bool = False,
) -> Dict:
    assert tr_split > 0.5 # reduce chance that low-probability objects are omitted from training

    # read patch metadata
    df_obj = pd.read_csv(
        object_csv,
    )
    df_ecotaxa = pd.read_csv(
        ecotaxa_tsv,
        sep='\t',
        header=[0],
        dtype={
            ('object_annotation_date', '[t]'): str,
            ('object_annotation_time', '[t]'): str,
            ('object_annotation_category_id', '[t]'): str,
        }
    )
    df_merge = pd.merge(df_obj, df_ecotaxa, left_on='patch_id', right_on='object_id')

    # assign each unique lowest-level annotation to a class index
    se_unique = pd.Series(
        df_merge.object_annotation_hierarchy.unique()
    )
    df_split = (
        se_unique.str.rsplit(
            pat='>', n=1, expand=True
        )
    )
    df_labels = pd.DataFrame({
        'annotation_class_id': df_split.index + 1,
        'hierarchy': se_unique,
        'annotation_class': df_split.loc[:, 1].str.lower()
    })

    # join patch filenames and annotation classes
    df_pf = pd.merge(
        df_merge[['patch_filename', 'object_annotation_hierarchy']],
        df_labels,
        left_on='object_annotation_hierarchy',
        right_on='hierarchy',
    )
    df_pl = df_pf[df_pf['object_annotation_hierarchy'].notnull()]

    # export annotation classes and their summary stats
    df_tr, df_te = train_test_split(df_pl, train_size=tr_split)
    # df_labels['counts'] = df_pl['annotation_class_id'].value_counts()
    df_labels = pd.merge(
        df_labels,
        pd.DataFrame(
            [df_pl.annotation_class_id.value_counts(), df_tr.annotation_class_id.value_counts(), df_te.annotation_class_id.value_counts()],
            index=['total', 'to_train', 'to_test']
        ).T,
        left_on='annotation_class_id',
        right_index=True,
        how='outer'
    )
    df_labels.loc[df_labels.to_train.isna(), 'to_train'] = 0
    df_labels.loc[df_labels.to_test.isna(), 'to_test'] = 0
    for col in ['total', 'to_train', 'to_test']:
        df_labels.loc[df_labels[col].isna(), col] = 0
    df_labels.to_csv(Path(where_output) / 'labels_key.csv', index=False)

    # export patches as z-stacks
    for (dfk, dfv) in {'train': df_tr, 'test': df_te}.items():
        zstack_keys = ['mask', 'label', 'raw']
        zstacks = {f'{dfk}_{zsk}': np.zeros((*patch_size, 1, len(dfv)), dtype='uint8') for zsk in zstack_keys}
        stack_meta = []
        for fi, pl in enumerate(dfv.itertuples(name='PatchFile')):
            fn = pl._asdict()['patch_filename']
            ac = pl._asdict()['annotation_class']
            aci = pl._asdict()['annotation_class_id']

            stack_meta.append({'zi': fi, 'patch_filename': fn, 'annotation_class': ac, 'annotation_class_id': aci})
            acc_bm = generate_file_accessor(Path(where_masks) / fn)
            assert acc_bm.is_mask()
            assert acc_bm.hw == patch_size, f'Unexpected patch size {patch_size}'
            assert acc_bm.chroma == 1
            assert acc_bm.nz == 1
            mask = acc_bm.data[:, :, 0, 0]
            if dilate_label_mask:
                mask = dilation(mask)
            if not allow_multiple_objects:
                ob_id = label(acc_bm.data[:, :, 0, 0])
                mask = mask_largest_object(ob_id)
            zstacks[dfk + '_mask'][:, :, 0, fi] = mask
            zstacks[dfk + '_label'][:, :, 0, fi] = (mask == 255) * aci

            acc_pa = generate_file_accessor(Path(where_patches) / fn)
            zstacks[dfk + '_raw'][:, :, :, fi] = acc_pa.data[:, :, :, 0]

        for k in zstacks.keys():
            write_accessor_data_to_file(Path(where_output) / f'zstack_{k}.tif', InMemoryDataAccessor(zstacks[k]))

        pd.DataFrame(stack_meta).to_csv(Path(where_output) / f'{dfk}_stack.csv', index=False)