From 5ee20572a1cb0cc0219ca3cbd86559162fb0f3c9 Mon Sep 17 00:00:00 2001 From: Christopher Rhodes <christopher.rhodes@embl.de> Date: Mon, 30 Oct 2023 14:09:05 +0100 Subject: [PATCH] Implemented check for multiple connected objects in mask frame, and optional restriction to the largest-area one; inference on autogenerated model now works with 100% accuracy when passing on training data --- ...fer_labels_to_ilastik_object_classifier.py | 36 +++++++++---------- extensions/chaeo/workflows.py | 26 +++++++++----- 2 files changed, 36 insertions(+), 26 deletions(-) diff --git a/extensions/chaeo/examples/transfer_labels_to_ilastik_object_classifier.py b/extensions/chaeo/examples/transfer_labels_to_ilastik_object_classifier.py index c69b9b96..9975f4cc 100644 --- a/extensions/chaeo/examples/transfer_labels_to_ilastik_object_classifier.py +++ b/extensions/chaeo/examples/transfer_labels_to_ilastik_object_classifier.py @@ -58,7 +58,7 @@ def infer_and_compare(classifier: PatchStackObjectClassifier, prefix, raw, mask, if __name__ == '__main__': - root = Path('c:/Users/rhodes/projects/proj0011-plankton-seg/exp0009/output/labeled_patches-20231030-0002') + root = Path('c:/Users/rhodes/projects/proj0011-plankton-seg/exp0009/output/labeled_patches-20231030-0007') template_ilp = Path('c:/Users/rhodes/projects/proj0011-plankton-seg/exp0014/template_obj.ilp') df_labels = pd.read_csv(root / 'labels_key.csv') @@ -78,21 +78,21 @@ if __name__ == '__main__': ) classifier = PatchStackObjectClassifier({'project_file': classifier_file}) - # # verify self-consistency of training set - # infer_and_compare( - # classifier, - # 'train', - # MonoPatchStackFromFile(root / 'zstack_train_raw.tif'), - # MonoPatchStackFromFile(root / 'zstack_train_mask.tif'), - # MonoPatchStackFromFile(root / 'zstack_train_label.tif') - # ) - # - # # run test set - # infer_and_compare( - # classifier, - # 'test', - # MonoPatchStackFromFile(root / 'zstack_test_raw.tif'), - # MonoPatchStackFromFile(root / 'zstack_test_mask.tif'), - # MonoPatchStackFromFile(root / 'zstack_test_label.tif'), - # ) + # verify self-consistency of training set + infer_and_compare( + classifier, + 'train', + MonoPatchStackFromFile(root / 'zstack_train_raw.tif'), + MonoPatchStackFromFile(root / 'zstack_train_mask.tif'), + MonoPatchStackFromFile(root / 'zstack_train_label.tif') + ) + + # run test set + infer_and_compare( + classifier, + 'test', + MonoPatchStackFromFile(root / 'zstack_test_raw.tif'), + MonoPatchStackFromFile(root / 'zstack_test_mask.tif'), + MonoPatchStackFromFile(root / 'zstack_test_label.tif'), + ) diff --git a/extensions/chaeo/workflows.py b/extensions/chaeo/workflows.py index 3847d5e2..1a67984b 100644 --- a/extensions/chaeo/workflows.py +++ b/extensions/chaeo/workflows.py @@ -4,6 +4,7 @@ from uuid import uuid4 import numpy as np import pandas as pd +from skimage.measure import label, regionprops_table from skimage.morphology import dilation from sklearn.model_selection import train_test_split @@ -292,14 +293,15 @@ def infer_object_map_from_zstack( def transfer_ecotaxa_labels_to_patch_stacks( - where_masks: str, - where_patches: str, - object_csv: str, - ecotaxa_tsv: str, - where_output: str, - patch_size: tuple = (256, 256), - tr_split=0.6, - dilate_label_mask: bool = True, # to mitigate connected components error in ilastik + where_masks: str, + where_patches: str, + object_csv: str, + ecotaxa_tsv: str, + where_output: str, + patch_size: tuple = (256, 256), + tr_split=0.6, + dilate_label_mask: bool = True, # to mitigate connected components error in ilastik + allow_multiple_objects: bool = False, ) -> Dict: assert tr_split > 0.5 # reduce chance that low-probability objects are omitted from training @@ -381,6 +383,14 @@ def transfer_ecotaxa_labels_to_patch_stacks( mask = acc_bm.data[:, :, 0, 0] if dilate_label_mask: mask = dilation(mask) + if not allow_multiple_objects: + ob_id = label(acc_bm.data[:, :, 0, 0]) + num_obj = len(np.unique(ob_id)) - 1 + if num_obj > 1: + print(f'Found multiple nonzero unique values in mask {fi}; keeping the one with largest area') + pr = regionprops_table(ob_id, properties=['label', 'area']) + idx_max_area = pr['area'].argmax() + mask = 255 * (ob_id == pr['label'][idx_max_area]) zstacks[dfk + '_mask'][:, :, 0, fi] = mask zstacks[dfk + '_label'][:, :, 0, fi] = (mask == 255) * aci -- GitLab