diff --git a/model_server/czi_util.py b/model_server/czi_util.py new file mode 100644 index 0000000000000000000000000000000000000000..4e97310ab247abbb2cc4e0f696105d40573cea6b --- /dev/null +++ b/model_server/czi_util.py @@ -0,0 +1,72 @@ +import csv +from pathlib import Path + +import czifile +import numpy as np +import pandas as pd + +from model_server.accessors import InMemoryDataAccessor + + +def dump_czi_subblock_table(czif: czifile.CziFile, where: Path): + csvfn = Path(where) / 'subblocks.csv' + with open(csvfn, 'w', newline='') as csvf: + he_shape = ['shape_' + a for a in list(czif.axes)] + he_start = ['start_' + a for a in list(czif.axes)] + wr = csv.DictWriter(csvf, he_shape + he_start) + wr.writeheader() + for sb in czif.subblock_directory: + shape = dict(zip(['shape_' + a for a in sb.axes], sb.shape)) + start = dict(zip(['start_' + a for a in sb.axes], sb.start)) + wr.writerow(shape | start) + print(f'Dumped CSV to {csvfn}') + + +def dump_czi_metadata(czif: czifile.CziFile, where: Path): + xmlfn = Path(where) / 'czi_meta.xml' + with open(xmlfn, 'w') as xmlf: + xmlf.write(czif.metadata()) + print(f'Dumped XML to {xmlfn}') + + +def get_accessor_from_multiposition_czi(cf: czifile.CziFile, pi: int): + # assumes different channels across different subblocks + + df = pd.DataFrame([dict(zip(sbd.axes, sbd.start)) for sbd in cf.subblock_directory]) + dfq = df[(df['S'] == pi)] + + c_arr = dfq['C'].sort_values() + nc = len(dfq) + + # assert that c_arr is sequential and 0-indexed + assert list(c_arr) == list(range(0, nc)) + + # assert all other dimensions in dfq are the same + assert all(dfq.drop(['C'], axis=1).nunique() == 1) + + # sbis = list(dfq.index) # subblock indices + sbd = cf.subblock_directory + df_shapes = pd.DataFrame([dict(zip(sbd[i].axes, sbd[i].shape)) for i in dfq.index]) + assert all(df_shapes.nunique() == 1) + + (h, w, nz) = tuple(df_shapes.loc[0, ['Y', 'X', 'Z']]) + yxcz = np.zeros((h, w, nc, nz), dtype=cf.dtype) + + # iterate over mono subblocks + for i in range(0, len(c_arr)): + sbi = c_arr[c_arr == i].index[0] + sb = list(cf.subblocks())[sbi] + data = sb.data() + sd = {ch: sb.shape[sb.axes.index(ch)] for ch in sb.axes} + # only non-unit dimensions are Y, X, C, and Z + assert len({k: v for k, v in sd.items() if v != 1 and k not in list('YXZ')}) == 0 + + yxz = np.moveaxis( + data, + [sb.axes.index(k) for k in list('YXZ')], + [0, 1, 2] + ).squeeze( + axis=tuple(range(3, len(sd))) + ) + yxcz[:, :, i, :] = yxz + return InMemoryDataAccessor(yxcz) \ No newline at end of file