Documented utility functions for batch running

e6ba8694 · Christopher Randolph Rhodes · f4cd6625 · e6ba8694 · e6ba8694 · e6ba8694
Commit e6ba8694 authored 1 year ago by Christopher Randolph Rhodes
--- a/extensions/chaeo/actual_runs/20230805_kristineberg_PA.py
+++ b/extensions/chaeo/actual_runs/20230805_kristineberg_PA.py
 from pathlib import Path
-from extensions.chaeo.util import autonumber_new_directory, get_matching_files, loop_workflow
+from model_server.util import autonumber_new_directory, get_matching_files, loop_workflow
 from extensions.chaeo.workflows import export_patches_from_multichannel_zstack
 from model_server.accessors import CziImageFileAccessor, write_accessor_data_to_file, InMemoryDataAccessor

--- a/extensions/chaeo/actual_runs/20230807_kristineberg_spiked.py
+++ b/extensions/chaeo/actual_runs/20230807_kristineberg_spiked.py
 from pathlib import Path
-from extensions.chaeo.util import autonumber_new_directory, get_matching_files, loop_workflow
+from model_server.util import autonumber_new_directory, get_matching_files, loop_workflow
 from extensions.chaeo.workflows import export_patches_from_multichannel_zstack
-from model_server.accessors import CziImageFileAccessor, write_accessor_data_to_file, InMemoryDataAccessor
-from model_server.process import rescale
 # TODO: support list-comp of single image sequence in multiple locations
 # TODO: split multi-pos CZI into sequence of accessors, append to list

--- a/extensions/chaeo/actual_runs/20231008_Bilbao_PA.py
+++ b/extensions/chaeo/actual_runs/20231008_Bilbao_PA.py
@@ -2,7 +2,7 @@ from pathlib import Path
 import pandas as pd
-from extensions.chaeo.util import autonumber_new_directory, get_matching_files, loop_workflow
+from model_server.util import autonumber_new_directory, get_matching_files, loop_workflow
 from extensions.chaeo.workflows import export_patches_from_multichannel_zstack
 def write_ecotaxa_tsv(patches_csv_path, where):

--- a/extensions/chaeo/actual_runs/proj0004-exp0038-fixed.py
+++ b/extensions/chaeo/actual_runs/proj0004-exp0038-fixed.py
 from pathlib import Path
-from extensions.chaeo.util import autonumber_new_directory, get_matching_files, loop_workflow
+from model_server.util import autonumber_new_directory, get_matching_files, loop_workflow
 from extensions.chaeo.workflows import export_patches_from_multichannel_zstack
 if __name__ == '__main__':

--- a/extensions/chaeo/examples/batch_run_patches.py
+++ b/extensions/chaeo/examples/batch_run_patches.py
 from pathlib import Path
-from extensions.chaeo.util import autonumber_new_directory, get_matching_files, loop_workflow
+from model_server.util import autonumber_new_directory, get_matching_files, loop_workflow
 from extensions.chaeo.workflows import export_patches_from_multichannel_zstack
 if __name__ == '__main__':

--- a/extensions/chaeo/examples/label_patches.py
+++ b/extensions/chaeo/examples/label_patches.py
 from pathlib import Path
-from extensions.chaeo.util import autonumber_new_directory, get_matching_files, loop_workflow
+from model_server.util import autonumber_new_directory
 from extensions.chaeo.workflows import transfer_ecotaxa_labels_to_patch_stacks
 if __name__ == '__main__':

--- a/extensions/chaeo/util.py
+++ b/extensions/chaeo/util.py
@@ -7,6 +7,12 @@ import pandas as pd
 from model_server.accessors import InMemoryDataAccessor, write_accessor_data_to_file
 def autonumber_new_directory(where: str, prefix: str) -> str:
+    """
+    Create a new subdirectory with a unique name that includes today's date
+    :param where: path of top-level directory in which to create a subdirectory
+    :param prefix: prefix of new subdirectory's name
+    :return: path to newly created subdirectory
+    """
    Path(where).mkdir(parents=True, exist_ok=True)
    yyyymmdd = strftime('%Y%m%d', localtime())
@@ -20,6 +26,13 @@ def autonumber_new_directory(where: str, prefix: str) -> str:
    return new_path.__str__()
 def autonumber_new_file(where: str, prefix: str, ext: str) -> str:
+    """
+    Create a filename that is unique in the specified directory
+    :param where: path of top-level directory where new file should be
+    :param prefix: prefix of new file's name
+    :param ext: extension of new file, not including '.'
+    :return: full name of new file
+    """
    idx = 0
    for ff in Path(where).iterdir():
        ma = re.match(f'{prefix}-([\d]+).{ext}', ff.name)
@@ -28,6 +41,14 @@ def autonumber_new_file(where: str, prefix: str, ext: str) -> str:
    return f'{prefix}-{idx:04d}.{ext}'
 def get_matching_files(where: str, ext: str, coord_filter: dict={}) -> list:
+    """
+    Return a list of files in the specified directory with the given extension
+    :param where: path of directory in which to search for files
+    :param ext: search only for files with this extension, not including '.'
+    :param coord_filter: (optional) return only filenames with dash-delimited coordinates in this range:
+        e.g. {'X': (5, 10)} includes file_X06.ext but neither file_X02.ext nor file_X10.ext
+    :return: list of paths to files
+    """
    files = []
    def is_filtered_out(ff):
@@ -50,9 +71,25 @@ def get_matching_files(where: str, ext: str, coord_filter: dict={}) -> list:
    return files
-def loop_workflow(files, output_folder_path, workflow_func, params,
+def loop_workflow(
-                  export_batch_csvs=True,
+        files: list,
-                  write_intermediate_products=True, catch_and_continue=True):
+        output_folder_path: str,
+        workflow_func: callable,
+        params: dict,
+        export_batch_csvs: bool = True,
+        write_intermediate_products: bool = True,
+        catch_and_continue: bool = True,
+):
+    """
+    Iteratively call the specified workflow function on each of a list of input files
+    :param files: list of filepaths
+    :param output_folder_path: path to top-level directory to which all results will be written
+    :param workflow_func: function where first two arguments are an input filename and an output directory
+    :param params: dictionary of keyword arguments that get passed to workflow_func
+    :param export_batch_csvs: if True, write any tabular data returned by workflow_func to CSV files
+    :param write_intermediate_products: if True, write any intermediate image products to TIF files
+    :param catch_and_continue: if True, catch exceptions returned by workflow_func and keep iterating
+    """
    failures = []
    for ii, ff in enumerate(files):
        export_kwargs = {