-
Christopher Randolph Rhodes authored
Start online validation of autogenerated model; but need to make inference model that takes binary inputs
Christopher Randolph Rhodes authoredStart online validation of autogenerated model; but need to make inference model that takes binary inputs
transfer_labels_to_ilastik_object_classifier.py 5.74 KiB
import shutil
from pathlib import Path
import h5py
import json
import numpy as np
import pandas as pd
import uuid
from extensions.chaeo.util import autonumber_new_file
from extensions.ilastik.models import IlastikObjectClassifierModel
from model_server.accessors import generate_file_accessor
def get_dataset_info(h5, lane=0):
lns = f'{lane:04d}'
lane = f'Input Data/infos/lane{lns}'
info = {}
for gk in ['Raw Data', 'Segmentation Image']:
info[gk] = {}
for dk in ['location', 'filePath', 'shape', 'nickname']:
try:
info[gk][dk] = h5[f'{lane}/{gk}/{dk}'][()]
except Exception as e:
print(e)
try:
info[gk]['id'] = uuid.UUID(h5[f'{lane}/{gk}/datasetId'][()].decode())
except ValueError as e:
info[gk]['id'] = '<invalid UUID>'
info[gk]['axistags'] = json.loads(h5[f'{lane}/{gk}/axistags'][()].decode())
info[gk]['axes'] = [ax['key'] for ax in info[gk]['axistags']['axes']]
obj_cl_group = h5[f'ObjectClassification/LabelInputs/{lns}']
info['misc'] = {
'number_of_label_inputs': len(obj_cl_group.items())
}
return info
def transfer_labels_to_ilastik_ilp(ilp, df_stack_meta, dump_csv=False):
with h5py.File(ilp, 'r+') as h5:
# TODO: force make copy if ilp file starts with template_
# TODO: enforce somehow that zstack and df_stack_meta are from same export run
where_out = Path(ilp).parent
# export complete HDF5 tree
if dump_csv:
with open(where_out / 'h5tree.txt', 'w') as hf:
tt = []
h5.visititems(lambda k, v: tt.append([k, str(v)]))
for line in tt:
hf.write(f'{line[0]} --- {line[1]}\n')
# put certain h5 groups in scope
h5info = get_dataset_info(h5)
# change key of label names
ln = ['none'] + list(df_stack_meta.sort_values('annotation_class_id').annotation_class.unique())
del h5['ObjectClassification/LabelNames']
h5.create_dataset('ObjectClassification/LabelNames', data=np.array(ln).astype('O'))
# change object labels
ts = h5['ObjectClassification']['LabelInputs']['0000']
for ti in ts.items():
assert len(ti) == 2 # one for unlabeled area, one for labeled area
idx = int(ti[0]) # important because keys are strings and hence not sorted numerically
ds = ti[1]
la_old = ds[1]
# unit index, i.e. reserve 1 for no object
ds[1] = float(df_stack_meta.loc[df_stack_meta.zi == idx, 'annotation_class_id'].iat[0])
print(f'Changed label {ti} from {la_old} to {ds[1]}')
def generate_ilastik_object_classifier(template_ilp, where: str, lane=0):
# validate z-stack input data
root = Path(where)
paths = {
'Raw Data': root / 'zstack_train_raw.tif',
'Segmentation Image': root / 'zstack_train_mask.tif',
}
accessors = {k: generate_file_accessor(pa) for k, pa in paths.items()}
assert accessors['Raw Data'].chroma == 1
assert accessors['Segmentation Image'].is_mask()
assert len(set([a.hw for a in accessors.values()])) == 1 # same height and width
assert len(set([a.nz for a in accessors.values()])) == 1 # same z-depth
nz = accessors['Raw Data'].nz
# now load CSV
csv_path = root / 'train_stack.csv'
assert csv_path.exists()
df_patches = pd.read_csv(root / 'train_stack.csv')
assert np.all(
df_patches['zi'].sort_values().to_numpy() == np.arange(0, nz)
)
df_labels = pd.read_csv(root / 'labels_key.csv')
label_names = list(df_labels.sort_values('annotation_class_id').annotation_class.unique())
label_names[0] = 'none'
assert len(label_names) >= 2
# open, validate, and copy template project file
with h5py.File(template_ilp, 'r') as h5:
info = get_dataset_info(h5)
for hg in ['Raw Data', 'Segmentation Image']:
assert info[hg]['location'] == b'FileSystem'
assert info[hg]['axes'] == ['t', 'y', 'x']
new_ilp = shutil.copy(template_ilp, root / autonumber_new_file(root, 'auto-obj', 'ilp'))
# write to new project file
lns = f'{lane:04d}'
with h5py.File(new_ilp, 'r+') as h5:
def set_ds(grp, ds, val):
ds = h5[f'Input Data/infos/lane{lns}/{grp}/{ds}']
ds[()] = val
return ds[()]
def get_label(idx):
return df_patches.loc[df_patches.zi == idx, 'annotation_class_id'].iat[0]
for hg in ['Raw Data', 'Segmentation Image']:
set_ds(hg, 'filePath', paths[hg].__str__())
set_ds(hg, 'nickname', paths[hg].stem)
shape_zyx = [accessors[hg].shape_dict[ax] for ax in ['Z', 'Y', 'X']]
set_ds(hg, 'shape', np.array(shape_zyx))
# change key of label names
del h5['ObjectClassification/LabelNames']
ln = np.array(label_names)
h5.create_dataset('ObjectClassification/LabelNames', data=ln.astype('O'))
# change object labels
la_groupname = f'ObjectClassification/LabelInputs/{lns}'
del h5[la_groupname]
lag = h5.create_group(la_groupname)
for zi in range(0, nz):
lag[f'{zi}'] = np.array([0., float(get_label(zi))])
return new_ilp
if __name__ == '__main__':
root = Path('c:/Users/rhodes/projects/proj0011-plankton-seg/')
template_ilp = root / 'exp0014/template_obj.ilp'
where_patch_stack = root / 'exp0009/output/labeled_patches-20231016-0002'
new_ilp = generate_ilastik_object_classifier(
template_ilp,
where_patch_stack,
)
train_zstack = generate_file_accessor(where_patch_stack / 'zstack_train_raw.tif')
mod = IlastikObjectClassifierModel({'project_file': new_ilp})
print(mod.project_file_abspath)