From 06f366225bfcdfcd7d0cee1971e2564b2babea8b Mon Sep 17 00:00:00 2001
From: Christopher Rhodes <christopher.rhodes@embl.de>
Date: Mon, 30 Oct 2023 11:53:17 +0100
Subject: [PATCH] Obtaining correct results with cleanup script

---
 .../examples/test_modify_object_classifier.py |   2 +-
 ...fer_labels_to_ilastik_object_classifier.py | 216 ++++++++----------
 2 files changed, 94 insertions(+), 124 deletions(-)

diff --git a/extensions/chaeo/examples/test_modify_object_classifier.py b/extensions/chaeo/examples/test_modify_object_classifier.py
index 1c5343e5..671a0f78 100644
--- a/extensions/chaeo/examples/test_modify_object_classifier.py
+++ b/extensions/chaeo/examples/test_modify_object_classifier.py
@@ -31,7 +31,7 @@ def modify_ilastik_object_classifier(template_ilp: Path, lane: int = 0):
             key = f'Input Data/infos/lane{lane:04d}/{gk}'
             assert h5[f'{key}/location'][()] == 'FileSystem'.encode()
             pa = Path(h5[f'{key}/filePath'][()].decode())
-            assert pa.is_absolute()
+            # assert pa.is_absolute()
             rel_pa = pa.name
             assert (proj_dir / rel_pa).exists()
             del h5[f'{key}/filePath']
diff --git a/extensions/chaeo/examples/transfer_labels_to_ilastik_object_classifier.py b/extensions/chaeo/examples/transfer_labels_to_ilastik_object_classifier.py
index 78418b8b..748212ce 100644
--- a/extensions/chaeo/examples/transfer_labels_to_ilastik_object_classifier.py
+++ b/extensions/chaeo/examples/transfer_labels_to_ilastik_object_classifier.py
@@ -7,110 +7,99 @@ import pandas as pd
 import skimage
 import uuid
 
-from extensions.chaeo.accessors import MonoPatchStackFromFile
+from extensions.chaeo.accessors import MonoPatchStack, MonoPatchStackFromFile
 from extensions.chaeo.h5util import get_dataset_info
 from extensions.chaeo.models import PatchStackObjectClassifier
 from model_server.accessors import generate_file_accessor, GenericImageDataAccessor, write_accessor_data_to_file
 
 
 def generate_ilastik_object_classifier(
-        template_ilp: str,
-        where: str,
-        stack_name: str = 'train',
+        template_ilp: Path,
+        target_ilp: Path,
+        raw_tif: Path,
+        mask_tif: Path,
+        label_tif: Path,
+        label_names: list,
         lane: int = 0,
-        proj_name='auto_obj'
 ):
     """
-    Starting with a template project file, transfer input data and labels to a duplicate project file.
-
-    :param template_ilp: absolute path to existing ilastik object classifier to use as a template
-    :param where: absolute path to folder containing input data, segmentation maps, labels, and label descriptions
-    :poram stack_name: prefix of .tif and .csv files that contain classifier training data (e.g. train, test)
+    Starting with a template project file, transfer input data and labels to a new project file.
+    :param template_ilp: path to existing ilastik object classifier to use as a template
+    :param target_ilp: path to new classifier
+    :param raw_tif: path to stack of patches containing raw data
+    :param mask_tif: path to stack of patches containing object masks
+    :param label_tif: path to stack of patches containing object labels
+    :param label_names: list of label names
     :param lane: ilastik lane identifier
-    :return: (str) name of new ilastik classifier project file
+    :return:
     """
+    new_ilp = shutil.copy(template_ilp, target_ilp)
 
-    # validate z-stack input data
-    root = Path(where)
-    rel_paths = {
-        'Raw Data': Path(f'zstack_{stack_name}_raw.tif'),
-        'Segmentation Image': Path(f'zstack_{stack_name}_mask.tif'),
+    paths = {
+        'Raw Data': raw_tif,
+        'Segmentation Image': mask_tif,
     }
+    accessors = {k: MonoPatchStackFromFile(root / pa) for k, pa in paths.items()}
 
-    accessors = {k: generate_file_accessor(root / pa) for k, pa in rel_paths.items()}
-
-    assert accessors['Raw Data'].chroma == 1
-    assert accessors['Segmentation Image'].is_mask()
-    assert len(set([a.hw for a in accessors.values()])) == 1  # same height and width
-    assert len(set([a.nz for a in accessors.values()])) == 1  # same z-depth
-    nz = accessors['Raw Data'].nz
+    # get labels from label image
+    acc_labels = MonoPatchStackFromFile(label_tif)
+    labels = []
+    for ii in range(0, acc_labels.count):
+        unique = np.unique(acc_labels.iat(ii))
+        assert len(unique) >= 2, 'Label image contains more than one non-zero value'
+        assert unique[0] == 0, 'Label image does not contain unlabeled background'
+        assert unique[-1] < len(label_names) + 1, f'Label ID {unique[-1]} exceeds number of label names: {len(label_names)}'
+        labels.append(unique[-1])
 
-    # now load CSV
-    csv_path = root / f'{stack_name}_stack.csv'
-    assert csv_path.exists()
-    df_patches = pd.read_csv(csv_path)
-    assert np.all(
-        df_patches['zi'].sort_values().to_numpy() == np.arange(0, nz)
-    )
-    df_labels = pd.read_csv(root / 'labels_key.csv')
-    label_names = list(df_labels.sort_values('annotation_class_id').annotation_class.unique())
-    label_names[0] = 'none'
-    assert len(label_names) >= 2
+    # write to new project file
+    with h5py.File(new_ilp, 'r+') as h5:
 
-    # open, validate, and copy template project file
-    with h5py.File(template_ilp, 'r') as h5:
-        info = get_dataset_info(h5)
+        for gk in ['Raw Data', 'Segmentation Image']:
+            group = f'Input Data/infos/lane{lane:04d}/{gk}'
 
-        for hg in ['Raw Data', 'Segmentation Image']:
-            assert info[hg]['location'] == b'FileSystem'
-            assert info[hg]['axes'] == ['t', 'y', 'x']
+            # set path to input image files
+            del h5[f'{group}/filePath']
+            h5[f'{group}/filePath'] = paths[gk].name
+            assert not Path(h5[f'{group}/filePath'][()].decode()).is_absolute()
+            assert h5[f'{group}/filePath'][()] == paths[gk].name.encode()
+            assert h5[f'{group}/location'][()] == 'FileSystem'.encode()
 
-    new_ilp = shutil.copy(template_ilp, where / (proj_name + '.ilp'))
+            # set input nickname
+            del h5[f'{group}/nickname']
+            h5[f'{group}/nickname'] = paths[gk].stem
 
-    # write to new project file
-    lns = f'{lane:04d}'
-    with h5py.File(where / new_ilp, 'r+') as h5:
-        def set_ds(grp, ds, val):
-            ds = h5[f'Input Data/infos/lane{lns}/{grp}/{ds}']
-            ds[()] = val
-            return ds[()]
-
-        def get_label(idx):
-            return df_patches.loc[df_patches.zi == idx, 'annotation_class_id'].iat[0]
-
-        for hg in ['Raw Data', 'Segmentation Image']:
-            set_ds(hg, 'filePath', rel_paths[hg].__str__())
-            set_ds(hg, 'nickname', rel_paths[hg].stem)
-            shape_zyx = [accessors[hg].shape_dict[ax] for ax in ['Z', 'Y', 'X']]
-            set_ds(hg, 'shape', np.array(shape_zyx))
+            # set input shape
+            del h5[f'{group}/shape']
+            shape_zyx = [accessors[gk].shape_dict[ax] for ax in ['Z', 'Y', 'X']]
+            h5[f'{group}/shape'] = np.array(shape_zyx)
 
         # change key of label names
-        if (k_ln := 'ObjectClassification/LabelNames') in h5.keys():
-            del h5[k_ln]
+        if (k := 'ObjectClassification/LabelNames') in h5.keys():
+            del h5[k]
         ln = np.array(label_names)
-        h5.create_dataset(k_ln, data=ln.astype('O'))
+        h5.create_dataset(k, data=ln.astype('O'))
 
-        if (k_mn := 'ObjectClassification/MaxNumObj') in h5.keys():
-            del h5[k_mn]
-        # h5.create_dataset(k_mn, data=(len(label_names) - 1))
-        h5[k_mn] = len(label_names) - 1
+        if (k := 'ObjectClassification/MaxNumObj') in h5.keys():
+            del h5[k]
+        h5[k] = len(label_names) - 1
 
         del h5['currentApplet']
         h5['currentApplet'] = 1
 
         # change object labels
-        if (k_li := f'ObjectClassification/LabelInputs/{lns}') in h5.keys():
-            del h5[k_li]
-        lag = h5.create_group(k_li)
-        for zi in range(0, nz):
-            lag[f'{zi}'] = np.array([0., float(get_label(zi))])
+        if (k := f'ObjectClassification/LabelInputs/{lane:04d}') in h5.keys():
+            del h5[k]
+        lag = h5.create_group(k)
+        # for zi in range(0, nz):
+        #     lag[f'{zi}'] = np.array([0., float(get_label(zi))])
+        for zi, la in enumerate(labels):
+            lag[f'{zi}'] = np.array([0., float(la)])
 
         # delete existing classification weights
-        if (k_rf := f'ObjectExtraction/RegionFeatures/{lane:04d}') in h5.keys():
-            del h5[k_rf]
-        if (k_cf := 'ObjectClassification/ClassificationForests') in h5.keys():
-            del h5[k_cf]
-
+        if (k := f'ObjectExtraction/RegionFeatures/{lane:04d}') in h5.keys():
+            del h5[k]
+        if (k := 'ObjectClassification/ClassificationForests') in h5.keys():
+            del h5[k]
 
     return new_ilp
 
@@ -149,65 +138,46 @@ def compare_object_maps(truth: GenericImageDataAccessor, inferred: GenericImageD
     return pd.DataFrame(labels)
 
 if __name__ == '__main__':
-    root =  Path('c:/Users/rhodes/projects/proj0011-plankton-seg/')
-    template_ilp = root / 'exp0014/template_obj.ilp'
-    where_patch_stack = root / 'exp0009/output/labeled_patches-20231030-0002'
+    root = Path('c:/Users/rhodes/projects/proj0011-plankton-seg/exp0009/output/labeled_patches-20231030-0002')
+    template_ilp = Path('c:/Users/rhodes/projects/proj0011-plankton-seg/exp0014/template_obj.ilp')
+
+    df_labels = pd.read_csv(root / 'labels_key.csv')
+    label_names = list(df_labels.sort_values('annotation_class_id').annotation_class.unique())
+    label_names[0] = 'none'
+    assert len(label_names) >= 2
 
     # auto-populate an object classifier
-    auto_ilp = generate_ilastik_object_classifier(
+    new_ilp = generate_ilastik_object_classifier(
         template_ilp,
-        where_patch_stack,
-        stack_name='train',
-        proj_name='auto_obj_before'
+        root / 'new_auto_obj.ilp',
+        root / 'zstack_train_raw.tif',
+        root / 'zstack_train_mask.tif',
+        root / 'zstack_train_label.tif',
+        label_names,
     )
-    # auto_ilp = 'auto_obj_before.ilp'
-
-    def infer_and_compare_training_set(ilp, suffix):
-        # infer object labels from the same data used to train the classifier
-        train_zstack_raw = MonoPatchStackFromFile(where_patch_stack / 'zstack_train_raw.tif')
-        train_zstack_mask = MonoPatchStackFromFile(where_patch_stack / 'zstack_train_mask.tif')
-        train_truth_labels = MonoPatchStackFromFile(where_patch_stack / f'zstack_train_label.tif')
-        infer_and_compare(ilp, 'train', suffix, train_zstack_raw, train_zstack_mask, train_truth_labels)
-
-    def infer_and_compare_test_set(ilp, suffix):
-        # infer object labels from test dataset
-        test_zstack_raw = MonoPatchStackFromFile(where_patch_stack / 'zstack_test_raw.tif')
-        test_zstack_mask = MonoPatchStackFromFile(where_patch_stack / 'zstack_test_mask.tif')
-        test_truth_labels = MonoPatchStackFromFile(where_patch_stack / f'zstack_test_label.tif')
-        infer_and_compare(ilp, 'test', suffix, test_zstack_raw, test_zstack_mask, test_truth_labels)
-
-    def infer_and_compare(ilp, prefix, suffix, raw, mask, labels):
-        mod = PatchStackObjectClassifier({'project_file': where_patch_stack / ilp})
+
+    from extensions.chaeo.examples.test_modify_object_classifier import modify_ilastik_object_classifier
+    auto_ilp = modify_ilastik_object_classifier(new_ilp)
+    # auto_ilp = new_ilp
+
+    def infer_and_compare(ilp, prefix, raw, mask, labels):
+        mod = PatchStackObjectClassifier({'project_file': root / ilp})
         result_acc, _ = mod.infer(raw, mask)
-        write_accessor_data_to_file(where_patch_stack / f'zstack_train_result_{suffix}.tif', result_acc)
+        write_accessor_data_to_file(root / f'zstack_train_result.tif', result_acc)
 
         # write comparison tables
         df_comp = compare_object_maps(labels, result_acc)
-        df_comp.to_csv(where_patch_stack / f'compare_{prefix}_result_{suffix}.csv', index=False)
+        df_comp.to_csv(root / f'compare_{prefix}_result.csv', index=False)
         print(f'Generated ilastik project {ilp}')
         print('Truth and inferred labels match?')
         print(pd.value_counts(df_comp['truth_label'] == df_comp['inferred_label']))
 
-        # report out some things for debugging
-        rks = [
-            'ObjectClassification/MaxNumObj',
-            'ObjectClassification/LabelInputs/0000/0',
-            'currentApplet'
-        ]
-        with h5py.File(ilp, 'r') as h5:
-            for r in rks:
-                print(f'{r}: {h5[r][()]}')
-
-    # infer object labels from the same data used to train the classifier
-    infer_and_compare_training_set(auto_ilp, 'before')
-
-    # copy project and prompt user input once ilastik file has been modified in-app
-    mod_ilp = shutil.copy(auto_ilp, where_patch_stack / 'auto_obj_after.ilp')
-    print(f'Press enter when project file {mod_ilp} has been updated in ilastik')
-    input()
-
-    # repeat inference with the modified project file
-    infer_and_compare_training_set(mod_ilp, 'after')
-    infer_and_compare_test_set(mod_ilp, 'after')
-
+    # infer_and_compare_training_set(auto_ilp, 'before')
+    infer_and_compare(
+        auto_ilp,
+        'train',
+        MonoPatchStackFromFile(root / 'zstack_train_raw.tif'),
+        MonoPatchStackFromFile(root / 'zstack_train_mask.tif'),
+        MonoPatchStackFromFile(root / 'zstack_train_label.tif')
+    )
 
-- 
GitLab