diff --git a/extensions/chaeo/examples/test_modify_object_classifier.py b/extensions/chaeo/examples/test_modify_object_classifier.py index 1c5343e5fa39dddd5d72b8c77f60b2df35f78aa3..671a0f7865ef88f22c635b49fb8b6b1252c0e1f7 100644 --- a/extensions/chaeo/examples/test_modify_object_classifier.py +++ b/extensions/chaeo/examples/test_modify_object_classifier.py @@ -31,7 +31,7 @@ def modify_ilastik_object_classifier(template_ilp: Path, lane: int = 0): key = f'Input Data/infos/lane{lane:04d}/{gk}' assert h5[f'{key}/location'][()] == 'FileSystem'.encode() pa = Path(h5[f'{key}/filePath'][()].decode()) - assert pa.is_absolute() + # assert pa.is_absolute() rel_pa = pa.name assert (proj_dir / rel_pa).exists() del h5[f'{key}/filePath'] diff --git a/extensions/chaeo/examples/transfer_labels_to_ilastik_object_classifier.py b/extensions/chaeo/examples/transfer_labels_to_ilastik_object_classifier.py index 78418b8b6be12f1985e6c7d4942f73c71aac3a6a..748212ce140aff5780b17893abbd1069efd97d0e 100644 --- a/extensions/chaeo/examples/transfer_labels_to_ilastik_object_classifier.py +++ b/extensions/chaeo/examples/transfer_labels_to_ilastik_object_classifier.py @@ -7,110 +7,99 @@ import pandas as pd import skimage import uuid -from extensions.chaeo.accessors import MonoPatchStackFromFile +from extensions.chaeo.accessors import MonoPatchStack, MonoPatchStackFromFile from extensions.chaeo.h5util import get_dataset_info from extensions.chaeo.models import PatchStackObjectClassifier from model_server.accessors import generate_file_accessor, GenericImageDataAccessor, write_accessor_data_to_file def generate_ilastik_object_classifier( - template_ilp: str, - where: str, - stack_name: str = 'train', + template_ilp: Path, + target_ilp: Path, + raw_tif: Path, + mask_tif: Path, + label_tif: Path, + label_names: list, lane: int = 0, - proj_name='auto_obj' ): """ - Starting with a template project file, transfer input data and labels to a duplicate project file. - - :param template_ilp: absolute path to existing ilastik object classifier to use as a template - :param where: absolute path to folder containing input data, segmentation maps, labels, and label descriptions - :poram stack_name: prefix of .tif and .csv files that contain classifier training data (e.g. train, test) + Starting with a template project file, transfer input data and labels to a new project file. + :param template_ilp: path to existing ilastik object classifier to use as a template + :param target_ilp: path to new classifier + :param raw_tif: path to stack of patches containing raw data + :param mask_tif: path to stack of patches containing object masks + :param label_tif: path to stack of patches containing object labels + :param label_names: list of label names :param lane: ilastik lane identifier - :return: (str) name of new ilastik classifier project file + :return: """ + new_ilp = shutil.copy(template_ilp, target_ilp) - # validate z-stack input data - root = Path(where) - rel_paths = { - 'Raw Data': Path(f'zstack_{stack_name}_raw.tif'), - 'Segmentation Image': Path(f'zstack_{stack_name}_mask.tif'), + paths = { + 'Raw Data': raw_tif, + 'Segmentation Image': mask_tif, } + accessors = {k: MonoPatchStackFromFile(root / pa) for k, pa in paths.items()} - accessors = {k: generate_file_accessor(root / pa) for k, pa in rel_paths.items()} - - assert accessors['Raw Data'].chroma == 1 - assert accessors['Segmentation Image'].is_mask() - assert len(set([a.hw for a in accessors.values()])) == 1 # same height and width - assert len(set([a.nz for a in accessors.values()])) == 1 # same z-depth - nz = accessors['Raw Data'].nz + # get labels from label image + acc_labels = MonoPatchStackFromFile(label_tif) + labels = [] + for ii in range(0, acc_labels.count): + unique = np.unique(acc_labels.iat(ii)) + assert len(unique) >= 2, 'Label image contains more than one non-zero value' + assert unique[0] == 0, 'Label image does not contain unlabeled background' + assert unique[-1] < len(label_names) + 1, f'Label ID {unique[-1]} exceeds number of label names: {len(label_names)}' + labels.append(unique[-1]) - # now load CSV - csv_path = root / f'{stack_name}_stack.csv' - assert csv_path.exists() - df_patches = pd.read_csv(csv_path) - assert np.all( - df_patches['zi'].sort_values().to_numpy() == np.arange(0, nz) - ) - df_labels = pd.read_csv(root / 'labels_key.csv') - label_names = list(df_labels.sort_values('annotation_class_id').annotation_class.unique()) - label_names[0] = 'none' - assert len(label_names) >= 2 + # write to new project file + with h5py.File(new_ilp, 'r+') as h5: - # open, validate, and copy template project file - with h5py.File(template_ilp, 'r') as h5: - info = get_dataset_info(h5) + for gk in ['Raw Data', 'Segmentation Image']: + group = f'Input Data/infos/lane{lane:04d}/{gk}' - for hg in ['Raw Data', 'Segmentation Image']: - assert info[hg]['location'] == b'FileSystem' - assert info[hg]['axes'] == ['t', 'y', 'x'] + # set path to input image files + del h5[f'{group}/filePath'] + h5[f'{group}/filePath'] = paths[gk].name + assert not Path(h5[f'{group}/filePath'][()].decode()).is_absolute() + assert h5[f'{group}/filePath'][()] == paths[gk].name.encode() + assert h5[f'{group}/location'][()] == 'FileSystem'.encode() - new_ilp = shutil.copy(template_ilp, where / (proj_name + '.ilp')) + # set input nickname + del h5[f'{group}/nickname'] + h5[f'{group}/nickname'] = paths[gk].stem - # write to new project file - lns = f'{lane:04d}' - with h5py.File(where / new_ilp, 'r+') as h5: - def set_ds(grp, ds, val): - ds = h5[f'Input Data/infos/lane{lns}/{grp}/{ds}'] - ds[()] = val - return ds[()] - - def get_label(idx): - return df_patches.loc[df_patches.zi == idx, 'annotation_class_id'].iat[0] - - for hg in ['Raw Data', 'Segmentation Image']: - set_ds(hg, 'filePath', rel_paths[hg].__str__()) - set_ds(hg, 'nickname', rel_paths[hg].stem) - shape_zyx = [accessors[hg].shape_dict[ax] for ax in ['Z', 'Y', 'X']] - set_ds(hg, 'shape', np.array(shape_zyx)) + # set input shape + del h5[f'{group}/shape'] + shape_zyx = [accessors[gk].shape_dict[ax] for ax in ['Z', 'Y', 'X']] + h5[f'{group}/shape'] = np.array(shape_zyx) # change key of label names - if (k_ln := 'ObjectClassification/LabelNames') in h5.keys(): - del h5[k_ln] + if (k := 'ObjectClassification/LabelNames') in h5.keys(): + del h5[k] ln = np.array(label_names) - h5.create_dataset(k_ln, data=ln.astype('O')) + h5.create_dataset(k, data=ln.astype('O')) - if (k_mn := 'ObjectClassification/MaxNumObj') in h5.keys(): - del h5[k_mn] - # h5.create_dataset(k_mn, data=(len(label_names) - 1)) - h5[k_mn] = len(label_names) - 1 + if (k := 'ObjectClassification/MaxNumObj') in h5.keys(): + del h5[k] + h5[k] = len(label_names) - 1 del h5['currentApplet'] h5['currentApplet'] = 1 # change object labels - if (k_li := f'ObjectClassification/LabelInputs/{lns}') in h5.keys(): - del h5[k_li] - lag = h5.create_group(k_li) - for zi in range(0, nz): - lag[f'{zi}'] = np.array([0., float(get_label(zi))]) + if (k := f'ObjectClassification/LabelInputs/{lane:04d}') in h5.keys(): + del h5[k] + lag = h5.create_group(k) + # for zi in range(0, nz): + # lag[f'{zi}'] = np.array([0., float(get_label(zi))]) + for zi, la in enumerate(labels): + lag[f'{zi}'] = np.array([0., float(la)]) # delete existing classification weights - if (k_rf := f'ObjectExtraction/RegionFeatures/{lane:04d}') in h5.keys(): - del h5[k_rf] - if (k_cf := 'ObjectClassification/ClassificationForests') in h5.keys(): - del h5[k_cf] - + if (k := f'ObjectExtraction/RegionFeatures/{lane:04d}') in h5.keys(): + del h5[k] + if (k := 'ObjectClassification/ClassificationForests') in h5.keys(): + del h5[k] return new_ilp @@ -149,65 +138,46 @@ def compare_object_maps(truth: GenericImageDataAccessor, inferred: GenericImageD return pd.DataFrame(labels) if __name__ == '__main__': - root = Path('c:/Users/rhodes/projects/proj0011-plankton-seg/') - template_ilp = root / 'exp0014/template_obj.ilp' - where_patch_stack = root / 'exp0009/output/labeled_patches-20231030-0002' + root = Path('c:/Users/rhodes/projects/proj0011-plankton-seg/exp0009/output/labeled_patches-20231030-0002') + template_ilp = Path('c:/Users/rhodes/projects/proj0011-plankton-seg/exp0014/template_obj.ilp') + + df_labels = pd.read_csv(root / 'labels_key.csv') + label_names = list(df_labels.sort_values('annotation_class_id').annotation_class.unique()) + label_names[0] = 'none' + assert len(label_names) >= 2 # auto-populate an object classifier - auto_ilp = generate_ilastik_object_classifier( + new_ilp = generate_ilastik_object_classifier( template_ilp, - where_patch_stack, - stack_name='train', - proj_name='auto_obj_before' + root / 'new_auto_obj.ilp', + root / 'zstack_train_raw.tif', + root / 'zstack_train_mask.tif', + root / 'zstack_train_label.tif', + label_names, ) - # auto_ilp = 'auto_obj_before.ilp' - - def infer_and_compare_training_set(ilp, suffix): - # infer object labels from the same data used to train the classifier - train_zstack_raw = MonoPatchStackFromFile(where_patch_stack / 'zstack_train_raw.tif') - train_zstack_mask = MonoPatchStackFromFile(where_patch_stack / 'zstack_train_mask.tif') - train_truth_labels = MonoPatchStackFromFile(where_patch_stack / f'zstack_train_label.tif') - infer_and_compare(ilp, 'train', suffix, train_zstack_raw, train_zstack_mask, train_truth_labels) - - def infer_and_compare_test_set(ilp, suffix): - # infer object labels from test dataset - test_zstack_raw = MonoPatchStackFromFile(where_patch_stack / 'zstack_test_raw.tif') - test_zstack_mask = MonoPatchStackFromFile(where_patch_stack / 'zstack_test_mask.tif') - test_truth_labels = MonoPatchStackFromFile(where_patch_stack / f'zstack_test_label.tif') - infer_and_compare(ilp, 'test', suffix, test_zstack_raw, test_zstack_mask, test_truth_labels) - - def infer_and_compare(ilp, prefix, suffix, raw, mask, labels): - mod = PatchStackObjectClassifier({'project_file': where_patch_stack / ilp}) + + from extensions.chaeo.examples.test_modify_object_classifier import modify_ilastik_object_classifier + auto_ilp = modify_ilastik_object_classifier(new_ilp) + # auto_ilp = new_ilp + + def infer_and_compare(ilp, prefix, raw, mask, labels): + mod = PatchStackObjectClassifier({'project_file': root / ilp}) result_acc, _ = mod.infer(raw, mask) - write_accessor_data_to_file(where_patch_stack / f'zstack_train_result_{suffix}.tif', result_acc) + write_accessor_data_to_file(root / f'zstack_train_result.tif', result_acc) # write comparison tables df_comp = compare_object_maps(labels, result_acc) - df_comp.to_csv(where_patch_stack / f'compare_{prefix}_result_{suffix}.csv', index=False) + df_comp.to_csv(root / f'compare_{prefix}_result.csv', index=False) print(f'Generated ilastik project {ilp}') print('Truth and inferred labels match?') print(pd.value_counts(df_comp['truth_label'] == df_comp['inferred_label'])) - # report out some things for debugging - rks = [ - 'ObjectClassification/MaxNumObj', - 'ObjectClassification/LabelInputs/0000/0', - 'currentApplet' - ] - with h5py.File(ilp, 'r') as h5: - for r in rks: - print(f'{r}: {h5[r][()]}') - - # infer object labels from the same data used to train the classifier - infer_and_compare_training_set(auto_ilp, 'before') - - # copy project and prompt user input once ilastik file has been modified in-app - mod_ilp = shutil.copy(auto_ilp, where_patch_stack / 'auto_obj_after.ilp') - print(f'Press enter when project file {mod_ilp} has been updated in ilastik') - input() - - # repeat inference with the modified project file - infer_and_compare_training_set(mod_ilp, 'after') - infer_and_compare_test_set(mod_ilp, 'after') - + # infer_and_compare_training_set(auto_ilp, 'before') + infer_and_compare( + auto_ilp, + 'train', + MonoPatchStackFromFile(root / 'zstack_train_raw.tif'), + MonoPatchStackFromFile(root / 'zstack_train_mask.tif'), + MonoPatchStackFromFile(root / 'zstack_train_label.tif') + )