diff --git a/extensions/chaeo/examples/transfer_labels_to_ilastik_object_classifier.py b/extensions/chaeo/examples/transfer_labels_to_ilastik_object_classifier.py index c69b9b960bfdaf4b95b0afe16549e833ea36492c..9975f4cc373e2f30372c8d807be12660898d9313 100644 --- a/extensions/chaeo/examples/transfer_labels_to_ilastik_object_classifier.py +++ b/extensions/chaeo/examples/transfer_labels_to_ilastik_object_classifier.py @@ -58,7 +58,7 @@ def infer_and_compare(classifier: PatchStackObjectClassifier, prefix, raw, mask, if __name__ == '__main__': - root = Path('c:/Users/rhodes/projects/proj0011-plankton-seg/exp0009/output/labeled_patches-20231030-0002') + root = Path('c:/Users/rhodes/projects/proj0011-plankton-seg/exp0009/output/labeled_patches-20231030-0007') template_ilp = Path('c:/Users/rhodes/projects/proj0011-plankton-seg/exp0014/template_obj.ilp') df_labels = pd.read_csv(root / 'labels_key.csv') @@ -78,21 +78,21 @@ if __name__ == '__main__': ) classifier = PatchStackObjectClassifier({'project_file': classifier_file}) - # # verify self-consistency of training set - # infer_and_compare( - # classifier, - # 'train', - # MonoPatchStackFromFile(root / 'zstack_train_raw.tif'), - # MonoPatchStackFromFile(root / 'zstack_train_mask.tif'), - # MonoPatchStackFromFile(root / 'zstack_train_label.tif') - # ) - # - # # run test set - # infer_and_compare( - # classifier, - # 'test', - # MonoPatchStackFromFile(root / 'zstack_test_raw.tif'), - # MonoPatchStackFromFile(root / 'zstack_test_mask.tif'), - # MonoPatchStackFromFile(root / 'zstack_test_label.tif'), - # ) + # verify self-consistency of training set + infer_and_compare( + classifier, + 'train', + MonoPatchStackFromFile(root / 'zstack_train_raw.tif'), + MonoPatchStackFromFile(root / 'zstack_train_mask.tif'), + MonoPatchStackFromFile(root / 'zstack_train_label.tif') + ) + + # run test set + infer_and_compare( + classifier, + 'test', + MonoPatchStackFromFile(root / 'zstack_test_raw.tif'), + MonoPatchStackFromFile(root / 'zstack_test_mask.tif'), + MonoPatchStackFromFile(root / 'zstack_test_label.tif'), + ) diff --git a/extensions/chaeo/workflows.py b/extensions/chaeo/workflows.py index 3847d5e2c99e9b14d005b43161a9c07360af6f2f..1a67984bc0e4935556370836ecc42a6dc367f377 100644 --- a/extensions/chaeo/workflows.py +++ b/extensions/chaeo/workflows.py @@ -4,6 +4,7 @@ from uuid import uuid4 import numpy as np import pandas as pd +from skimage.measure import label, regionprops_table from skimage.morphology import dilation from sklearn.model_selection import train_test_split @@ -292,14 +293,15 @@ def infer_object_map_from_zstack( def transfer_ecotaxa_labels_to_patch_stacks( - where_masks: str, - where_patches: str, - object_csv: str, - ecotaxa_tsv: str, - where_output: str, - patch_size: tuple = (256, 256), - tr_split=0.6, - dilate_label_mask: bool = True, # to mitigate connected components error in ilastik + where_masks: str, + where_patches: str, + object_csv: str, + ecotaxa_tsv: str, + where_output: str, + patch_size: tuple = (256, 256), + tr_split=0.6, + dilate_label_mask: bool = True, # to mitigate connected components error in ilastik + allow_multiple_objects: bool = False, ) -> Dict: assert tr_split > 0.5 # reduce chance that low-probability objects are omitted from training @@ -381,6 +383,14 @@ def transfer_ecotaxa_labels_to_patch_stacks( mask = acc_bm.data[:, :, 0, 0] if dilate_label_mask: mask = dilation(mask) + if not allow_multiple_objects: + ob_id = label(acc_bm.data[:, :, 0, 0]) + num_obj = len(np.unique(ob_id)) - 1 + if num_obj > 1: + print(f'Found multiple nonzero unique values in mask {fi}; keeping the one with largest area') + pr = regionprops_table(ob_id, properties=['label', 'area']) + idx_max_area = pr['area'].argmax() + mask = 255 * (ob_id == pr['label'][idx_max_area]) zstacks[dfk + '_mask'][:, :, 0, fi] = mask zstacks[dfk + '_label'][:, :, 0, fi] = (mask == 255) * aci