transfer_labels_to_ilastik_object_classifier.py

import shutil
from pathlib import Path
import h5py
import json
import numpy as np
import pandas as pd
import uuid

from extensions.chaeo.util import autonumber_new_file
from extensions.ilastik.models import IlastikObjectClassifierModel
from model_server.accessors import generate_file_accessor

def get_dataset_info(h5, lane=0):
    lns = f'{lane:04d}'
    lane = f'Input Data/infos/lane{lns}'
    info = {}
    for gk in ['Raw Data', 'Segmentation Image']:
        info[gk] = {}
        for dk in ['location', 'filePath', 'shape', 'nickname']:
            try:
                info[gk][dk] = h5[f'{lane}/{gk}/{dk}'][()]
            except Exception as e:
                print(e)
        try:
            info[gk]['id'] = uuid.UUID(h5[f'{lane}/{gk}/datasetId'][()].decode())
        except ValueError as e:
            info[gk]['id'] = '<invalid UUID>'
        info[gk]['axistags'] = json.loads(h5[f'{lane}/{gk}/axistags'][()].decode())
        info[gk]['axes'] = [ax['key'] for ax in info[gk]['axistags']['axes']]

    obj_cl_group = h5[f'ObjectClassification/LabelInputs/{lns}']
    info['misc'] = {
        'number_of_label_inputs': len(obj_cl_group.items())
    }
    return info

def transfer_labels_to_ilastik_ilp(ilp, df_stack_meta, dump_csv=False):

    with h5py.File(ilp, 'r+') as h5:
        # TODO: force make copy if ilp file starts with template_
        # TODO: enforce somehow that zstack and df_stack_meta are from same export run
        where_out = Path(ilp).parent

        # export complete HDF5 tree
        if dump_csv:
            with open(where_out / 'h5tree.txt', 'w') as hf:
                tt = []
                h5.visititems(lambda k, v: tt.append([k, str(v)]))
                for line in tt:
                    hf.write(f'{line[0]} --- {line[1]}\n')

        # put certain h5 groups in scope
        h5info = get_dataset_info(h5)

        # change key of label names
        ln = ['none'] + list(df_stack_meta.sort_values('annotation_class_id').annotation_class.unique())
        del h5['ObjectClassification/LabelNames']
        h5.create_dataset('ObjectClassification/LabelNames', data=np.array(ln).astype('O'))

        # change object labels
        ts = h5['ObjectClassification']['LabelInputs']['0000']
        for ti in ts.items():
            assert len(ti) == 2  # one for unlabeled area, one for labeled area
            idx = int(ti[0])  # important because keys are strings and hence not sorted numerically
            ds = ti[1]
            la_old = ds[1]

            # unit index, i.e. reserve 1 for no object
            ds[1] = float(df_stack_meta.loc[df_stack_meta.zi == idx, 'annotation_class_id'].iat[0])
            print(f'Changed label {ti} from {la_old} to {ds[1]}')

def generate_ilastik_object_classifier(template_ilp, where: str, lane=0):

    # validate z-stack input data
    root = Path(where)
    paths = {
        'Raw Data': root / 'zstack_train_raw.tif',
        'Segmentation Image': root / 'zstack_train_mask.tif',
    }

    accessors = {k: generate_file_accessor(pa) for k, pa in paths.items()}

    assert accessors['Raw Data'].chroma == 1
    assert accessors['Segmentation Image'].is_mask()
    assert len(set([a.hw for a in accessors.values()])) == 1  # same height and width
    assert len(set([a.nz for a in accessors.values()])) == 1  # same z-depth
    nz = accessors['Raw Data'].nz

    # now load CSV
    csv_path = root / 'train_stack.csv'
    assert csv_path.exists()
    df_patches = pd.read_csv(root / 'train_stack.csv')
    assert np.all(
        df_patches['zi'].sort_values().to_numpy() == np.arange(0, nz)
    )
    df_labels = pd.read_csv(root / 'labels_key.csv')
    label_names = list(df_labels.sort_values('annotation_class_id').annotation_class.unique())
    label_names[0] = 'none'
    assert len(label_names) >= 2

    # open, validate, and copy template project file
    with h5py.File(template_ilp, 'r') as h5:
        info = get_dataset_info(h5)

        for hg in ['Raw Data', 'Segmentation Image']:
            assert info[hg]['location'] == b'FileSystem'
            assert info[hg]['axes'] == ['t', 'y', 'x']

    new_ilp = shutil.copy(template_ilp, root / autonumber_new_file(root, 'auto-obj', 'ilp'))

    # write to new project file
    lns = f'{lane:04d}'
    with h5py.File(new_ilp, 'r+') as h5:
        def set_ds(grp, ds, val):
            ds = h5[f'Input Data/infos/lane{lns}/{grp}/{ds}']
            ds[()] = val
            return ds[()]

        def get_label(idx):
            return df_patches.loc[df_patches.zi == idx, 'annotation_class_id'].iat[0]

        for hg in ['Raw Data', 'Segmentation Image']:
            set_ds(hg, 'filePath', paths[hg].__str__())
            set_ds(hg, 'nickname', paths[hg].stem)
            shape_zyx = [accessors[hg].shape_dict[ax] for ax in ['Z', 'Y', 'X']]
            set_ds(hg, 'shape', np.array(shape_zyx))

        # change key of label names
        del h5['ObjectClassification/LabelNames']
        ln = np.array(label_names)
        h5.create_dataset('ObjectClassification/LabelNames', data=ln.astype('O'))

        # change object labels
        la_groupname = f'ObjectClassification/LabelInputs/{lns}'

        del h5[la_groupname]
        lag = h5.create_group(la_groupname)
        for zi in range(0, nz):
            lag[f'{zi}'] = np.array([0., float(get_label(zi))])

    return new_ilp

if __name__ == '__main__':
    root =  Path('c:/Users/rhodes/projects/proj0011-plankton-seg/')
    template_ilp = root / 'exp0014/template_obj.ilp'
    where_patch_stack = root / 'exp0009/output/labeled_patches-20231016-0002'

    new_ilp = generate_ilastik_object_classifier(
        template_ilp,
        where_patch_stack,
    )

    train_zstack = generate_file_accessor(where_patch_stack / 'zstack_train_raw.tif')
    mod = IlastikObjectClassifierModel({'project_file': new_ilp})


    print(mod.project_file_abspath)