From e5ce7b9d2b1888bc3e285fc2eb65a5b717d54763 Mon Sep 17 00:00:00 2001 From: Christopher Rhodes <christopher.rhodes@embl.de> Date: Mon, 30 Oct 2023 13:13:27 +0100 Subject: [PATCH] Re-organized object classifier generator --- ...fer_labels_to_ilastik_object_classifier.py | 141 ++++-------------- extensions/chaeo/models.py | 100 ++++++++++++- 2 files changed, 124 insertions(+), 117 deletions(-) diff --git a/extensions/chaeo/examples/transfer_labels_to_ilastik_object_classifier.py b/extensions/chaeo/examples/transfer_labels_to_ilastik_object_classifier.py index d9696e5a..97330c76 100644 --- a/extensions/chaeo/examples/transfer_labels_to_ilastik_object_classifier.py +++ b/extensions/chaeo/examples/transfer_labels_to_ilastik_object_classifier.py @@ -1,107 +1,13 @@ import shutil from pathlib import Path import h5py -import json import numpy as np import pandas as pd import skimage -import uuid -from extensions.chaeo.accessors import MonoPatchStack, MonoPatchStackFromFile -from extensions.chaeo.h5util import get_dataset_info -from extensions.chaeo.models import PatchStackObjectClassifier -from model_server.accessors import generate_file_accessor, GenericImageDataAccessor, write_accessor_data_to_file - - -def generate_ilastik_object_classifier( - template_ilp: Path, - target_ilp: Path, - raw_tif: Path, - mask_tif: Path, - label_tif: Path, - label_names: list, - lane: int = 0, -): - """ - Starting with a template project file, transfer input data and labels to a new project file. - :param template_ilp: path to existing ilastik object classifier to use as a template - :param target_ilp: path to new classifier - :param raw_tif: path to stack of patches containing raw data - :param mask_tif: path to stack of patches containing object masks - :param label_tif: path to stack of patches containing object labels - :param label_names: list of label names - :param lane: ilastik lane identifier - :return: - """ - new_ilp = shutil.copy(template_ilp, target_ilp) - - paths = { - 'Raw Data': raw_tif, - 'Segmentation Image': mask_tif, - } - accessors = {k: MonoPatchStackFromFile(root / pa) for k, pa in paths.items()} - - # get labels from label image - acc_labels = MonoPatchStackFromFile(label_tif) - labels = [] - for ii in range(0, acc_labels.count): - unique = np.unique(acc_labels.iat(ii)) - assert len(unique) >= 2, 'Label image contains more than one non-zero value' - assert unique[0] == 0, 'Label image does not contain unlabeled background' - assert unique[-1] < len(label_names) + 1, f'Label ID {unique[-1]} exceeds number of label names: {len(label_names)}' - labels.append(unique[-1]) - - # write to new project file - with h5py.File(new_ilp, 'r+') as h5: - - for gk in ['Raw Data', 'Segmentation Image']: - group = f'Input Data/infos/lane{lane:04d}/{gk}' - - # set path to input image files - del h5[f'{group}/filePath'] - h5[f'{group}/filePath'] = paths[gk].name - assert not Path(h5[f'{group}/filePath'][()].decode()).is_absolute() - assert h5[f'{group}/filePath'][()] == paths[gk].name.encode() - assert h5[f'{group}/location'][()] == 'FileSystem'.encode() - - # set input nickname - del h5[f'{group}/nickname'] - h5[f'{group}/nickname'] = paths[gk].stem - - # set input shape - del h5[f'{group}/shape'] - shape_zyx = [accessors[gk].shape_dict[ax] for ax in ['Z', 'Y', 'X']] - h5[f'{group}/shape'] = np.array(shape_zyx) - - # change key of label names - if (k := 'ObjectClassification/LabelNames') in h5.keys(): - del h5[k] - ln = np.array(label_names) - h5.create_dataset(k, data=ln.astype('O')) - - if (k := 'ObjectClassification/MaxNumObj') in h5.keys(): - del h5[k] - h5[k] = len(label_names) - 1 - - del h5['currentApplet'] - h5['currentApplet'] = 1 - - # change object labels - if (k := f'ObjectClassification/LabelInputs/{lane:04d}') in h5.keys(): - del h5[k] - lag = h5.create_group(k) - # for zi in range(0, nz): - # lag[f'{zi}'] = np.array([0., float(get_label(zi))]) - for zi, la in enumerate(labels): - lag[f'{zi}'] = np.array([0., float(la)]) - - # delete existing classification weights - if (k := f'ObjectExtraction/RegionFeatures/{lane:04d}') in h5.keys(): - del h5[k] - if (k := 'ObjectClassification/ClassifierForests') in h5.keys(): - del h5[k] - - return new_ilp +from extensions.chaeo.accessors import MonoPatchStackFromFile +from extensions.chaeo.models import generate_ilastik_object_classifier, PatchStackObjectClassifier +from model_server.accessors import GenericImageDataAccessor, write_accessor_data_to_file def compare_object_maps(truth: GenericImageDataAccessor, inferred: GenericImageDataAccessor) -> pd.DataFrame: @@ -137,6 +43,17 @@ def compare_object_maps(truth: GenericImageDataAccessor, inferred: GenericImageD labels.append(dd) return pd.DataFrame(labels) +def infer_and_compare(classifier: PatchStackObjectClassifier, prefix, raw, mask, labels): + result_acc, _ = classifier.infer(raw, mask) + write_accessor_data_to_file(root / f'zstack_train_result.tif', result_acc) + + # write comparison tables + df_comp = compare_object_maps(labels, result_acc) + df_comp.to_csv(root / f'compare_{prefix}_result.csv', index=False) + print(f'Generated ilastik project {classifier_file.name}') + print('Truth and inferred labels match?') + print(pd.value_counts(df_comp['truth_label'] == df_comp['inferred_label'])) + if __name__ == '__main__': root = Path('c:/Users/rhodes/projects/proj0011-plankton-seg/exp0009/output/labeled_patches-20231030-0002') template_ilp = Path('c:/Users/rhodes/projects/proj0011-plankton-seg/exp0014/template_obj.ilp') @@ -147,7 +64,7 @@ if __name__ == '__main__': assert len(label_names) >= 2 # auto-populate an object classifier - new_ilp = generate_ilastik_object_classifier( + classifier_file = generate_ilastik_object_classifier( template_ilp, root / 'new_auto_obj.ilp', root / 'zstack_train_raw.tif', @@ -155,27 +72,23 @@ if __name__ == '__main__': root / 'zstack_train_label.tif', label_names, ) + classifier = PatchStackObjectClassifier({'project_file': classifier_file}) - auto_ilp = new_ilp - - def infer_and_compare(ilp, prefix, raw, mask, labels): - mod = PatchStackObjectClassifier({'project_file': root / ilp}) - result_acc, _ = mod.infer(raw, mask) - write_accessor_data_to_file(root / f'zstack_train_result.tif', result_acc) - - # write comparison tables - df_comp = compare_object_maps(labels, result_acc) - df_comp.to_csv(root / f'compare_{prefix}_result.csv', index=False) - print(f'Generated ilastik project {ilp}') - print('Truth and inferred labels match?') - print(pd.value_counts(df_comp['truth_label'] == df_comp['inferred_label'])) - - # infer_and_compare_training_set(auto_ilp, 'before') + # verify self-consistency of training set infer_and_compare( - auto_ilp, + classifier, 'train', MonoPatchStackFromFile(root / 'zstack_train_raw.tif'), MonoPatchStackFromFile(root / 'zstack_train_mask.tif'), MonoPatchStackFromFile(root / 'zstack_train_label.tif') ) + # run test set + infer_and_compare( + classifier, + 'test', + MonoPatchStackFromFile(root / 'zstack_test_raw.tif'), + MonoPatchStackFromFile(root / 'zstack_test_mask.tif'), + MonoPatchStackFromFile(root / 'zstack_test_label.tif'), + ) + diff --git a/extensions/chaeo/models.py b/extensions/chaeo/models.py index 8a141b35..9e209be2 100644 --- a/extensions/chaeo/models.py +++ b/extensions/chaeo/models.py @@ -1,7 +1,11 @@ +from pathlib import Path +import shutil + +import h5py import numpy as np import vigra -from extensions.chaeo.accessors import MonoPatchStack +from extensions.chaeo.accessors import MonoPatchStack, MonoPatchStackFromFile from extensions.ilastik.models import IlastikObjectClassifierFromSegmentationModel from model_server.accessors import InMemoryDataAccessor @@ -12,7 +16,6 @@ class PatchStackObjectClassifier(IlastikObjectClassifierFromSegmentationModel): as time-series images where each frame contains only one object. """ - def infer(self, input_acc: MonoPatchStack, segmentation_acc: MonoPatchStack) -> (np.ndarray, dict): assert segmentation_acc.is_mask() assert input_acc.chroma == 1 @@ -41,4 +44,95 @@ class PatchStackObjectClassifier(IlastikObjectClassifierFromSegmentationModel): assert yxz.shape[0:2] == input_acc.hw assert yxz.shape[2] == input_acc.nz - return MonoPatchStack(data=yxz), {'success': True} \ No newline at end of file + return MonoPatchStack(data=yxz), {'success': True} + + +def generate_ilastik_object_classifier( + template_ilp: Path, + target_ilp: Path, + raw_tif: Path, + mask_tif: Path, + label_tif: Path, + label_names: list, + lane: int = 0, +) -> Path: + """ + Starting with a template project file, transfer input data and labels to a new project file. + :param template_ilp: path to existing ilastik object classifier to use as a template + :param target_ilp: path to new classifier + :param raw_tif: path to stack of patches containing raw data + :param mask_tif: path to stack of patches containing object masks + :param label_tif: path to stack of patches containing object labels + :param label_names: list of label names + :param lane: ilastik lane identifier + :return: path to generated object classifier + """ + new_ilp = shutil.copy(template_ilp, target_ilp) + + paths = { + 'Raw Data': raw_tif, + 'Segmentation Image': mask_tif, + } + root = raw_tif.parent + accessors = {k: MonoPatchStackFromFile(root / pa) for k, pa in paths.items()} + + # get labels from label image + acc_labels = MonoPatchStackFromFile(label_tif) + labels = [] + for ii in range(0, acc_labels.count): + unique = np.unique(acc_labels.iat(ii)) + assert len(unique) >= 2, 'Label image contains more than one non-zero value' + assert unique[0] == 0, 'Label image does not contain unlabeled background' + assert unique[-1] < len(label_names) + 1, f'Label ID {unique[-1]} exceeds number of label names: {len(label_names)}' + labels.append(unique[-1]) + + # write to new project file + with h5py.File(new_ilp, 'r+') as h5: + + for gk in ['Raw Data', 'Segmentation Image']: + group = f'Input Data/infos/lane{lane:04d}/{gk}' + + # set path to input image files + del h5[f'{group}/filePath'] + h5[f'{group}/filePath'] = paths[gk].name + assert not Path(h5[f'{group}/filePath'][()].decode()).is_absolute() + assert h5[f'{group}/filePath'][()] == paths[gk].name.encode() + assert h5[f'{group}/location'][()] == 'FileSystem'.encode() + + # set input nickname + del h5[f'{group}/nickname'] + h5[f'{group}/nickname'] = paths[gk].stem + + # set input shape + del h5[f'{group}/shape'] + shape_zyx = [accessors[gk].shape_dict[ax] for ax in ['Z', 'Y', 'X']] + h5[f'{group}/shape'] = np.array(shape_zyx) + + # change key of label names + if (k := 'ObjectClassification/LabelNames') in h5.keys(): + del h5[k] + ln = np.array(label_names) + h5.create_dataset(k, data=ln.astype('O')) + + if (k := 'ObjectClassification/MaxNumObj') in h5.keys(): + del h5[k] + h5[k] = len(label_names) - 1 + + del h5['currentApplet'] + h5['currentApplet'] = 1 + + # change object labels + if (k := f'ObjectClassification/LabelInputs/{lane:04d}') in h5.keys(): + del h5[k] + lag = h5.create_group(k) + for zi, la in enumerate(labels): + lag[f'{zi}'] = np.array([0., float(la)]) + + # delete existing classification weights + if (k := f'ObjectExtraction/RegionFeatures/{lane:04d}') in h5.keys(): + del h5[k] + if (k := 'ObjectClassification/ClassifierForests') in h5.keys(): + del h5[k] + + return Path(new_ilp) + -- GitLab