From cd2fe9b81b525af99a46093c033b20a92e16c7d0 Mon Sep 17 00:00:00 2001 From: Christopher Rhodes <christopher.rhodes@embl.de> Date: Thu, 12 Oct 2023 10:53:51 +0200 Subject: [PATCH] Moved more of batch runner into utility methods --- .../chaeo/examples/batch_run_patches.py | 77 +++++++------------ extensions/chaeo/util.py | 59 +++++++++++++- extensions/chaeo/workflows.py | 40 +++++++++- 3 files changed, 123 insertions(+), 53 deletions(-) diff --git a/extensions/chaeo/examples/batch_run_patches.py b/extensions/chaeo/examples/batch_run_patches.py index f0edacd7..15ef8c99 100644 --- a/extensions/chaeo/examples/batch_run_patches.py +++ b/extensions/chaeo/examples/batch_run_patches.py @@ -1,14 +1,8 @@ from pathlib import Path -import re -# from time import localtime, strftime -import pandas as pd - -from extensions.chaeo.util import autonumber_new_directory, get_matching_files +from extensions.chaeo.util import autonumber_new_directory, get_matching_files, loop_workflow from extensions.chaeo.workflows import export_patches_from_multichannel_zstack -from model_server.accessors import InMemoryDataAccessor, write_accessor_data_to_file - if __name__ == '__main__': where_czi = 'z:/rhodes/projects/proj0004-marine-photoactivation/data/exp0038/AutoMic/20230906-163415/Selection' @@ -17,48 +11,33 @@ if __name__ == '__main__': 'batch-output' ) - csv_args = {'mode': 'w', 'header': True} # when creating file px_ilp = Path.home() / 'model-server' / 'ilastik' / 'AF405-bodies_boundaries.ilp' - #TODO: try/catch blocks and error handling around workflow calls - #TODO: pack JSON-serializable workflow inputs - - input_files = get_matching_files(where_czi, 'czi', coord_filter={'P': (0, 10)}) - for ff in input_files: - - export_kwargs = { - 'input_zstack_path': Path(where_czi) / ff.__str__(), - 'ilastik_project_file': px_ilp.__str__(), - 'pxmap_threshold': 0.25, - 'pixel_class': 0, - 'zmask_channel': 0, - 'patches_channel': 4, - 'where_output': where_output, - 'mask_type': 'boxes', - 'zmask_filters': {'area': (1e3, 1e8)}, - 'zmask_expand_box_by': (128, 3), - 'export_pixel_probabilities': False, - 'export_2d_patches_for_training': True, - 'export_2d_patches_for_annotation': False, - 'export_3d_patches': False, - 'export_annotated_zstack': False, - 'export_patch_masks': False, - 'export_patch_label_maps': True, - } - - result = export_patches_from_multichannel_zstack(**export_kwargs) - - # parse and record results - df = result['dataframe'] - df['source_path'] = ff - df.to_csv(where_output / 'df_objects.csv', index=False, **csv_args) - pd.DataFrame(result['timer_results'], index=[0]).to_csv(where_output / 'timer_results.csv', **csv_args) - pd.json_normalize(export_kwargs).to_csv(where_output / 'workflow_params.csv', **csv_args) - csv_args = {'mode': 'a', 'header': False} # append to CSV from here on + params = { + 'ilastik_project_file': px_ilp.__str__(), + 'pxmap_threshold': 0.25, + 'pixel_class': 0, + 'zmask_channel': 0, + 'patches_channel': 4, + 'mask_type': 'boxes', + 'zmask_filters': {'area': (1e3, 1e8)}, + 'zmask_expand_box_by': (128, 3), + 'export_pixel_probabilities': False, + 'export_2d_patches_for_training': True, + 'export_2d_patches_for_annotation': False, + 'export_3d_patches': False, + 'export_annotated_zstack': False, + 'export_patch_masks': False, + 'export_patch_label_maps': True, + } + + input_files = get_matching_files(where_czi, 'czi', coord_filter={'P': (0, 10)}, ) + + loop_workflow( + input_files, + where_output, + export_patches_from_multichannel_zstack, + params, + ) - # export intermediate data if flagged - for k in result['interm'].keys(): - write_accessor_data_to_file( - where_output / k / (ff.stem + '.tif'), - InMemoryDataAccessor(result['interm'][k]) - ) \ No newline at end of file + print('Finished') \ No newline at end of file diff --git a/extensions/chaeo/util.py b/extensions/chaeo/util.py index 402d05dd..7a8406db 100644 --- a/extensions/chaeo/util.py +++ b/extensions/chaeo/util.py @@ -2,6 +2,10 @@ from pathlib import Path import re from time import localtime, strftime +import pandas as pd + +from model_server.accessors import InMemoryDataAccessor, write_accessor_data_to_file + def autonumber_new_directory(where: str, prefix: str) -> str: yyyymmdd = strftime('%Y%m%d', localtime()) @@ -10,8 +14,9 @@ def autonumber_new_directory(where: str, prefix: str) -> str: ma = re.match(f'{prefix}-{yyyymmdd}-([\d]+)', ff.name) if ma: idx = max(idx, int(ma.groups()[0]) + 1) - - return (Path(where) / f'batch-output-{yyyymmdd}-{idx:04d}').__str__() + new_path = (Path(where) / f'batch-output-{yyyymmdd}-{idx:04d}') + new_path.mkdir(parents=True, exist_ok=False) + return new_path.__str__() def get_matching_files(where: str, ext: str, coord_filter: dict={}) -> str: files = [] @@ -33,4 +38,52 @@ def get_matching_files(where: str, ext: str, coord_filter: dict={}) -> str: if is_filtered_out(ff): continue files.append(ff.__str__()) - return files \ No newline at end of file + return files + + +def loop_workflow(files, where_output, workflow_func, params, + write_intermediate_products=True): + failures = [] + for ii, ff in enumerate(files): + export_kwargs = { + 'input_zstack_path': ff, + 'where_output': where_output, + **params, + } + + # record failure information + try: + result = workflow_func(**export_kwargs) + except Exception as e: + failures.append({ + 'input_file': ff, + 'error_message': e.__str__(), + }) + print(f'Caught failure on {ff}:\n{e.__str__()}') + continue + + # record dataframes associated with workflow results + batch_csv = { + 'workflow_data': result['dataframe'], + 'timer_results': pd.DataFrame(result['timer_results'], index=[0]), + 'workflow_parameters': pd.json_normalize(export_kwargs), + } + for k in batch_csv.keys(): + df = batch_csv[k] + df['input_file'] = ff + if ii == 0: + csv_args = {'mode': 'w', 'header': True} + else: # append to existing file + csv_args = {'mode': 'a', 'header': False} + csv_path = Path(where_output) / f'{k}.csv' + df.to_csv(csv_path, index=False, **csv_args) + + # export intermediate data if flagged + if write_intermediate_products: + for k in result['interm'].keys(): + write_accessor_data_to_file( + Path(where_output) / k / (Path(ff).stem + '.tif'), + InMemoryDataAccessor(result['interm'][k]) + ) + + pd.DataFrame(failures).to_csv(Path(where_output) / 'failures.csv') \ No newline at end of file diff --git a/extensions/chaeo/workflows.py b/extensions/chaeo/workflows.py index 9a290ec5..332f8f9f 100644 --- a/extensions/chaeo/workflows.py +++ b/extensions/chaeo/workflows.py @@ -12,6 +12,7 @@ from model_server.accessors import generate_file_accessor, InMemoryDataAccessor, from model_server.workflows import Timer # TODO: unpack and validate inputs +# TODO: expose channel indices and color balance vectors to caller def export_patches_from_multichannel_zstack( input_zstack_path: str, ilastik_project_file: str, @@ -106,6 +107,23 @@ def export_patches_from_multichannel_zstack( # prepopulate patch UUID df['patch_id'] = df.apply(lambda _: uuid4(), axis=1) + if export_2d_patches_for_training: + files = export_multichannel_patches_from_zstack( + Path(where_output) / '2d_patches', + stack.get_one_channel_data(4), + zmask_meta, + prefix=fstem, + rescale_clip=0.001, + make_3d=False, + focus_metric='max_sobel', + ) + df_patches = pd.DataFrame(files) + ti.click('export_2d_patches') + # associate 2d patches, dropping labeled objects that were not exported as patches + df = pd.merge(df, df_patches, left_index=True, right_on='df_index').drop(columns='df_index') + # prepopulate patch UUID + df['patch_id'] = df.apply(lambda _: uuid4(), axis=1) + if export_patch_masks: files = export_patch_masks_from_zstack( Path(where_output) / 'patch_masks', @@ -143,4 +161,24 @@ def export_patches_from_multichannel_zstack( 'timer_results': ti.events, 'dataframe': df, 'interm': interm, - } \ No newline at end of file + } + +def transfer_ecotaxa_labels_to_patch_object_maps( + path_to_patches: str, + path_to_ecotaxa_tsv: str, + path_output: str, +) -> Dict: + where_patches = Path(path_to_patches) + df_meta = pd.read_csv( + path_to_ecotaxa_tsv, + sep='\t', + header=[0, 1], + dtype={ + ('object_annotation_date', '[t]'): str, + ('object_annotation_time', '[t]'): str, + ('object_annotation_category_id', '[t]'): str, + } + ) + for pp in where_patches.iterdir(): + patch = generate_file_accessor(pp) + -- GitLab