Skip to content
Snippets Groups Projects
Commit cd2fe9b8 authored by Christopher Randolph Rhodes's avatar Christopher Randolph Rhodes
Browse files

Moved more of batch runner into utility methods

parent 7695e1f0
No related branches found
No related tags found
No related merge requests found
from pathlib import Path
import re
# from time import localtime, strftime
import pandas as pd
from extensions.chaeo.util import autonumber_new_directory, get_matching_files
from extensions.chaeo.util import autonumber_new_directory, get_matching_files, loop_workflow
from extensions.chaeo.workflows import export_patches_from_multichannel_zstack
from model_server.accessors import InMemoryDataAccessor, write_accessor_data_to_file
if __name__ == '__main__':
where_czi = 'z:/rhodes/projects/proj0004-marine-photoactivation/data/exp0038/AutoMic/20230906-163415/Selection'
......@@ -17,48 +11,33 @@ if __name__ == '__main__':
'batch-output'
)
csv_args = {'mode': 'w', 'header': True} # when creating file
px_ilp = Path.home() / 'model-server' / 'ilastik' / 'AF405-bodies_boundaries.ilp'
#TODO: try/catch blocks and error handling around workflow calls
#TODO: pack JSON-serializable workflow inputs
input_files = get_matching_files(where_czi, 'czi', coord_filter={'P': (0, 10)})
for ff in input_files:
export_kwargs = {
'input_zstack_path': Path(where_czi) / ff.__str__(),
'ilastik_project_file': px_ilp.__str__(),
'pxmap_threshold': 0.25,
'pixel_class': 0,
'zmask_channel': 0,
'patches_channel': 4,
'where_output': where_output,
'mask_type': 'boxes',
'zmask_filters': {'area': (1e3, 1e8)},
'zmask_expand_box_by': (128, 3),
'export_pixel_probabilities': False,
'export_2d_patches_for_training': True,
'export_2d_patches_for_annotation': False,
'export_3d_patches': False,
'export_annotated_zstack': False,
'export_patch_masks': False,
'export_patch_label_maps': True,
}
result = export_patches_from_multichannel_zstack(**export_kwargs)
# parse and record results
df = result['dataframe']
df['source_path'] = ff
df.to_csv(where_output / 'df_objects.csv', index=False, **csv_args)
pd.DataFrame(result['timer_results'], index=[0]).to_csv(where_output / 'timer_results.csv', **csv_args)
pd.json_normalize(export_kwargs).to_csv(where_output / 'workflow_params.csv', **csv_args)
csv_args = {'mode': 'a', 'header': False} # append to CSV from here on
params = {
'ilastik_project_file': px_ilp.__str__(),
'pxmap_threshold': 0.25,
'pixel_class': 0,
'zmask_channel': 0,
'patches_channel': 4,
'mask_type': 'boxes',
'zmask_filters': {'area': (1e3, 1e8)},
'zmask_expand_box_by': (128, 3),
'export_pixel_probabilities': False,
'export_2d_patches_for_training': True,
'export_2d_patches_for_annotation': False,
'export_3d_patches': False,
'export_annotated_zstack': False,
'export_patch_masks': False,
'export_patch_label_maps': True,
}
input_files = get_matching_files(where_czi, 'czi', coord_filter={'P': (0, 10)}, )
loop_workflow(
input_files,
where_output,
export_patches_from_multichannel_zstack,
params,
)
# export intermediate data if flagged
for k in result['interm'].keys():
write_accessor_data_to_file(
where_output / k / (ff.stem + '.tif'),
InMemoryDataAccessor(result['interm'][k])
)
\ No newline at end of file
print('Finished')
\ No newline at end of file
......@@ -2,6 +2,10 @@ from pathlib import Path
import re
from time import localtime, strftime
import pandas as pd
from model_server.accessors import InMemoryDataAccessor, write_accessor_data_to_file
def autonumber_new_directory(where: str, prefix: str) -> str:
yyyymmdd = strftime('%Y%m%d', localtime())
......@@ -10,8 +14,9 @@ def autonumber_new_directory(where: str, prefix: str) -> str:
ma = re.match(f'{prefix}-{yyyymmdd}-([\d]+)', ff.name)
if ma:
idx = max(idx, int(ma.groups()[0]) + 1)
return (Path(where) / f'batch-output-{yyyymmdd}-{idx:04d}').__str__()
new_path = (Path(where) / f'batch-output-{yyyymmdd}-{idx:04d}')
new_path.mkdir(parents=True, exist_ok=False)
return new_path.__str__()
def get_matching_files(where: str, ext: str, coord_filter: dict={}) -> str:
files = []
......@@ -33,4 +38,52 @@ def get_matching_files(where: str, ext: str, coord_filter: dict={}) -> str:
if is_filtered_out(ff):
continue
files.append(ff.__str__())
return files
\ No newline at end of file
return files
def loop_workflow(files, where_output, workflow_func, params,
write_intermediate_products=True):
failures = []
for ii, ff in enumerate(files):
export_kwargs = {
'input_zstack_path': ff,
'where_output': where_output,
**params,
}
# record failure information
try:
result = workflow_func(**export_kwargs)
except Exception as e:
failures.append({
'input_file': ff,
'error_message': e.__str__(),
})
print(f'Caught failure on {ff}:\n{e.__str__()}')
continue
# record dataframes associated with workflow results
batch_csv = {
'workflow_data': result['dataframe'],
'timer_results': pd.DataFrame(result['timer_results'], index=[0]),
'workflow_parameters': pd.json_normalize(export_kwargs),
}
for k in batch_csv.keys():
df = batch_csv[k]
df['input_file'] = ff
if ii == 0:
csv_args = {'mode': 'w', 'header': True}
else: # append to existing file
csv_args = {'mode': 'a', 'header': False}
csv_path = Path(where_output) / f'{k}.csv'
df.to_csv(csv_path, index=False, **csv_args)
# export intermediate data if flagged
if write_intermediate_products:
for k in result['interm'].keys():
write_accessor_data_to_file(
Path(where_output) / k / (Path(ff).stem + '.tif'),
InMemoryDataAccessor(result['interm'][k])
)
pd.DataFrame(failures).to_csv(Path(where_output) / 'failures.csv')
\ No newline at end of file
......@@ -12,6 +12,7 @@ from model_server.accessors import generate_file_accessor, InMemoryDataAccessor,
from model_server.workflows import Timer
# TODO: unpack and validate inputs
# TODO: expose channel indices and color balance vectors to caller
def export_patches_from_multichannel_zstack(
input_zstack_path: str,
ilastik_project_file: str,
......@@ -106,6 +107,23 @@ def export_patches_from_multichannel_zstack(
# prepopulate patch UUID
df['patch_id'] = df.apply(lambda _: uuid4(), axis=1)
if export_2d_patches_for_training:
files = export_multichannel_patches_from_zstack(
Path(where_output) / '2d_patches',
stack.get_one_channel_data(4),
zmask_meta,
prefix=fstem,
rescale_clip=0.001,
make_3d=False,
focus_metric='max_sobel',
)
df_patches = pd.DataFrame(files)
ti.click('export_2d_patches')
# associate 2d patches, dropping labeled objects that were not exported as patches
df = pd.merge(df, df_patches, left_index=True, right_on='df_index').drop(columns='df_index')
# prepopulate patch UUID
df['patch_id'] = df.apply(lambda _: uuid4(), axis=1)
if export_patch_masks:
files = export_patch_masks_from_zstack(
Path(where_output) / 'patch_masks',
......@@ -143,4 +161,24 @@ def export_patches_from_multichannel_zstack(
'timer_results': ti.events,
'dataframe': df,
'interm': interm,
}
\ No newline at end of file
}
def transfer_ecotaxa_labels_to_patch_object_maps(
path_to_patches: str,
path_to_ecotaxa_tsv: str,
path_output: str,
) -> Dict:
where_patches = Path(path_to_patches)
df_meta = pd.read_csv(
path_to_ecotaxa_tsv,
sep='\t',
header=[0, 1],
dtype={
('object_annotation_date', '[t]'): str,
('object_annotation_time', '[t]'): str,
('object_annotation_category_id', '[t]'): str,
}
)
for pp in where_patches.iterdir():
patch = generate_file_accessor(pp)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment