From e5ce7b9d2b1888bc3e285fc2eb65a5b717d54763 Mon Sep 17 00:00:00 2001
From: Christopher Rhodes <christopher.rhodes@embl.de>
Date: Mon, 30 Oct 2023 13:13:27 +0100
Subject: [PATCH] Re-organized object classifier generator

---
 ...fer_labels_to_ilastik_object_classifier.py | 141 ++++--------------
 extensions/chaeo/models.py                    | 100 ++++++++++++-
 2 files changed, 124 insertions(+), 117 deletions(-)

diff --git a/extensions/chaeo/examples/transfer_labels_to_ilastik_object_classifier.py b/extensions/chaeo/examples/transfer_labels_to_ilastik_object_classifier.py
index d9696e5a..97330c76 100644
--- a/extensions/chaeo/examples/transfer_labels_to_ilastik_object_classifier.py
+++ b/extensions/chaeo/examples/transfer_labels_to_ilastik_object_classifier.py
@@ -1,107 +1,13 @@
 import shutil
 from pathlib import Path
 import h5py
-import json
 import numpy as np
 import pandas as pd
 import skimage
-import uuid
 
-from extensions.chaeo.accessors import MonoPatchStack, MonoPatchStackFromFile
-from extensions.chaeo.h5util import get_dataset_info
-from extensions.chaeo.models import PatchStackObjectClassifier
-from model_server.accessors import generate_file_accessor, GenericImageDataAccessor, write_accessor_data_to_file
-
-
-def generate_ilastik_object_classifier(
-        template_ilp: Path,
-        target_ilp: Path,
-        raw_tif: Path,
-        mask_tif: Path,
-        label_tif: Path,
-        label_names: list,
-        lane: int = 0,
-):
-    """
-    Starting with a template project file, transfer input data and labels to a new project file.
-    :param template_ilp: path to existing ilastik object classifier to use as a template
-    :param target_ilp: path to new classifier
-    :param raw_tif: path to stack of patches containing raw data
-    :param mask_tif: path to stack of patches containing object masks
-    :param label_tif: path to stack of patches containing object labels
-    :param label_names: list of label names
-    :param lane: ilastik lane identifier
-    :return:
-    """
-    new_ilp = shutil.copy(template_ilp, target_ilp)
-
-    paths = {
-        'Raw Data': raw_tif,
-        'Segmentation Image': mask_tif,
-    }
-    accessors = {k: MonoPatchStackFromFile(root / pa) for k, pa in paths.items()}
-
-    # get labels from label image
-    acc_labels = MonoPatchStackFromFile(label_tif)
-    labels = []
-    for ii in range(0, acc_labels.count):
-        unique = np.unique(acc_labels.iat(ii))
-        assert len(unique) >= 2, 'Label image contains more than one non-zero value'
-        assert unique[0] == 0, 'Label image does not contain unlabeled background'
-        assert unique[-1] < len(label_names) + 1, f'Label ID {unique[-1]} exceeds number of label names: {len(label_names)}'
-        labels.append(unique[-1])
-
-    # write to new project file
-    with h5py.File(new_ilp, 'r+') as h5:
-
-        for gk in ['Raw Data', 'Segmentation Image']:
-            group = f'Input Data/infos/lane{lane:04d}/{gk}'
-
-            # set path to input image files
-            del h5[f'{group}/filePath']
-            h5[f'{group}/filePath'] = paths[gk].name
-            assert not Path(h5[f'{group}/filePath'][()].decode()).is_absolute()
-            assert h5[f'{group}/filePath'][()] == paths[gk].name.encode()
-            assert h5[f'{group}/location'][()] == 'FileSystem'.encode()
-
-            # set input nickname
-            del h5[f'{group}/nickname']
-            h5[f'{group}/nickname'] = paths[gk].stem
-
-            # set input shape
-            del h5[f'{group}/shape']
-            shape_zyx = [accessors[gk].shape_dict[ax] for ax in ['Z', 'Y', 'X']]
-            h5[f'{group}/shape'] = np.array(shape_zyx)
-
-        # change key of label names
-        if (k := 'ObjectClassification/LabelNames') in h5.keys():
-            del h5[k]
-        ln = np.array(label_names)
-        h5.create_dataset(k, data=ln.astype('O'))
-
-        if (k := 'ObjectClassification/MaxNumObj') in h5.keys():
-            del h5[k]
-        h5[k] = len(label_names) - 1
-
-        del h5['currentApplet']
-        h5['currentApplet'] = 1
-
-        # change object labels
-        if (k := f'ObjectClassification/LabelInputs/{lane:04d}') in h5.keys():
-            del h5[k]
-        lag = h5.create_group(k)
-        # for zi in range(0, nz):
-        #     lag[f'{zi}'] = np.array([0., float(get_label(zi))])
-        for zi, la in enumerate(labels):
-            lag[f'{zi}'] = np.array([0., float(la)])
-
-        # delete existing classification weights
-        if (k := f'ObjectExtraction/RegionFeatures/{lane:04d}') in h5.keys():
-            del h5[k]
-        if (k := 'ObjectClassification/ClassifierForests') in h5.keys():
-            del h5[k]
-
-    return new_ilp
+from extensions.chaeo.accessors import MonoPatchStackFromFile
+from extensions.chaeo.models import generate_ilastik_object_classifier, PatchStackObjectClassifier
+from model_server.accessors import GenericImageDataAccessor, write_accessor_data_to_file
 
 
 def compare_object_maps(truth: GenericImageDataAccessor, inferred: GenericImageDataAccessor) -> pd.DataFrame:
@@ -137,6 +43,17 @@ def compare_object_maps(truth: GenericImageDataAccessor, inferred: GenericImageD
         labels.append(dd)
     return pd.DataFrame(labels)
 
+def infer_and_compare(classifier: PatchStackObjectClassifier, prefix, raw, mask, labels):
+    result_acc, _ = classifier.infer(raw, mask)
+    write_accessor_data_to_file(root / f'zstack_train_result.tif', result_acc)
+
+    # write comparison tables
+    df_comp = compare_object_maps(labels, result_acc)
+    df_comp.to_csv(root / f'compare_{prefix}_result.csv', index=False)
+    print(f'Generated ilastik project {classifier_file.name}')
+    print('Truth and inferred labels match?')
+    print(pd.value_counts(df_comp['truth_label'] == df_comp['inferred_label']))
+
 if __name__ == '__main__':
     root = Path('c:/Users/rhodes/projects/proj0011-plankton-seg/exp0009/output/labeled_patches-20231030-0002')
     template_ilp = Path('c:/Users/rhodes/projects/proj0011-plankton-seg/exp0014/template_obj.ilp')
@@ -147,7 +64,7 @@ if __name__ == '__main__':
     assert len(label_names) >= 2
 
     # auto-populate an object classifier
-    new_ilp = generate_ilastik_object_classifier(
+    classifier_file = generate_ilastik_object_classifier(
         template_ilp,
         root / 'new_auto_obj.ilp',
         root / 'zstack_train_raw.tif',
@@ -155,27 +72,23 @@ if __name__ == '__main__':
         root / 'zstack_train_label.tif',
         label_names,
     )
+    classifier = PatchStackObjectClassifier({'project_file': classifier_file})
 
-    auto_ilp = new_ilp
-
-    def infer_and_compare(ilp, prefix, raw, mask, labels):
-        mod = PatchStackObjectClassifier({'project_file': root / ilp})
-        result_acc, _ = mod.infer(raw, mask)
-        write_accessor_data_to_file(root / f'zstack_train_result.tif', result_acc)
-
-        # write comparison tables
-        df_comp = compare_object_maps(labels, result_acc)
-        df_comp.to_csv(root / f'compare_{prefix}_result.csv', index=False)
-        print(f'Generated ilastik project {ilp}')
-        print('Truth and inferred labels match?')
-        print(pd.value_counts(df_comp['truth_label'] == df_comp['inferred_label']))
-
-    # infer_and_compare_training_set(auto_ilp, 'before')
+    # verify self-consistency of training set
     infer_and_compare(
-        auto_ilp,
+        classifier,
         'train',
         MonoPatchStackFromFile(root / 'zstack_train_raw.tif'),
         MonoPatchStackFromFile(root / 'zstack_train_mask.tif'),
         MonoPatchStackFromFile(root / 'zstack_train_label.tif')
     )
 
+    # run test set
+    infer_and_compare(
+        classifier,
+        'test',
+        MonoPatchStackFromFile(root / 'zstack_test_raw.tif'),
+        MonoPatchStackFromFile(root / 'zstack_test_mask.tif'),
+        MonoPatchStackFromFile(root / 'zstack_test_label.tif'),
+    )
+
diff --git a/extensions/chaeo/models.py b/extensions/chaeo/models.py
index 8a141b35..9e209be2 100644
--- a/extensions/chaeo/models.py
+++ b/extensions/chaeo/models.py
@@ -1,7 +1,11 @@
+from pathlib import Path
+import shutil
+
+import h5py
 import numpy as np
 import vigra
 
-from extensions.chaeo.accessors import MonoPatchStack
+from extensions.chaeo.accessors import MonoPatchStack, MonoPatchStackFromFile
 from extensions.ilastik.models import IlastikObjectClassifierFromSegmentationModel
 from model_server.accessors import InMemoryDataAccessor
 
@@ -12,7 +16,6 @@ class PatchStackObjectClassifier(IlastikObjectClassifierFromSegmentationModel):
     as time-series images where each frame contains only one object.
     """
 
-
     def infer(self, input_acc: MonoPatchStack, segmentation_acc: MonoPatchStack) -> (np.ndarray, dict):
         assert segmentation_acc.is_mask()
         assert input_acc.chroma == 1
@@ -41,4 +44,95 @@ class PatchStackObjectClassifier(IlastikObjectClassifierFromSegmentationModel):
 
         assert yxz.shape[0:2] == input_acc.hw
         assert yxz.shape[2] == input_acc.nz
-        return MonoPatchStack(data=yxz), {'success': True}
\ No newline at end of file
+        return MonoPatchStack(data=yxz), {'success': True}
+
+
+def generate_ilastik_object_classifier(
+        template_ilp: Path,
+        target_ilp: Path,
+        raw_tif: Path,
+        mask_tif: Path,
+        label_tif: Path,
+        label_names: list,
+        lane: int = 0,
+) -> Path:
+    """
+    Starting with a template project file, transfer input data and labels to a new project file.
+    :param template_ilp: path to existing ilastik object classifier to use as a template
+    :param target_ilp: path to new classifier
+    :param raw_tif: path to stack of patches containing raw data
+    :param mask_tif: path to stack of patches containing object masks
+    :param label_tif: path to stack of patches containing object labels
+    :param label_names: list of label names
+    :param lane: ilastik lane identifier
+    :return: path to generated object classifier
+    """
+    new_ilp = shutil.copy(template_ilp, target_ilp)
+
+    paths = {
+        'Raw Data': raw_tif,
+        'Segmentation Image': mask_tif,
+    }
+    root = raw_tif.parent
+    accessors = {k: MonoPatchStackFromFile(root / pa) for k, pa in paths.items()}
+
+    # get labels from label image
+    acc_labels = MonoPatchStackFromFile(label_tif)
+    labels = []
+    for ii in range(0, acc_labels.count):
+        unique = np.unique(acc_labels.iat(ii))
+        assert len(unique) >= 2, 'Label image contains more than one non-zero value'
+        assert unique[0] == 0, 'Label image does not contain unlabeled background'
+        assert unique[-1] < len(label_names) + 1, f'Label ID {unique[-1]} exceeds number of label names: {len(label_names)}'
+        labels.append(unique[-1])
+
+    # write to new project file
+    with h5py.File(new_ilp, 'r+') as h5:
+
+        for gk in ['Raw Data', 'Segmentation Image']:
+            group = f'Input Data/infos/lane{lane:04d}/{gk}'
+
+            # set path to input image files
+            del h5[f'{group}/filePath']
+            h5[f'{group}/filePath'] = paths[gk].name
+            assert not Path(h5[f'{group}/filePath'][()].decode()).is_absolute()
+            assert h5[f'{group}/filePath'][()] == paths[gk].name.encode()
+            assert h5[f'{group}/location'][()] == 'FileSystem'.encode()
+
+            # set input nickname
+            del h5[f'{group}/nickname']
+            h5[f'{group}/nickname'] = paths[gk].stem
+
+            # set input shape
+            del h5[f'{group}/shape']
+            shape_zyx = [accessors[gk].shape_dict[ax] for ax in ['Z', 'Y', 'X']]
+            h5[f'{group}/shape'] = np.array(shape_zyx)
+
+        # change key of label names
+        if (k := 'ObjectClassification/LabelNames') in h5.keys():
+            del h5[k]
+        ln = np.array(label_names)
+        h5.create_dataset(k, data=ln.astype('O'))
+
+        if (k := 'ObjectClassification/MaxNumObj') in h5.keys():
+            del h5[k]
+        h5[k] = len(label_names) - 1
+
+        del h5['currentApplet']
+        h5['currentApplet'] = 1
+
+        # change object labels
+        if (k := f'ObjectClassification/LabelInputs/{lane:04d}') in h5.keys():
+            del h5[k]
+        lag = h5.create_group(k)
+        for zi, la in enumerate(labels):
+            lag[f'{zi}'] = np.array([0., float(la)])
+
+        # delete existing classification weights
+        if (k := f'ObjectExtraction/RegionFeatures/{lane:04d}') in h5.keys():
+            del h5[k]
+        if (k := 'ObjectClassification/ClassifierForests') in h5.keys():
+            del h5[k]
+
+    return Path(new_ilp)
+
-- 
GitLab