From 6488faa04fa1ea2fed5b379569bed5b7c6219929 Mon Sep 17 00:00:00 2001 From: Constantin Pape <constantin.pape@iwr.uni-heidelberg.de> Date: Thu, 15 Aug 2019 18:05:36 +0200 Subject: [PATCH] Refactor update_patch to account for new data management --- scripts/files/sources.py | 25 ++- update_patch.py | 328 +++++++++++++-------------------------- 2 files changed, 132 insertions(+), 221 deletions(-) diff --git a/scripts/files/sources.py b/scripts/files/sources.py index 55beb50..2a2018c 100644 --- a/scripts/files/sources.py +++ b/scripts/files/sources.py @@ -136,7 +136,30 @@ def add_segmentation(source_name, name, segmentation_path=None, resolution=None, table_update_function=None, copy_data=True): """ Add segmentation volume to the platy browser data. - TODO explain more + We distinguish between static and dynamic segmentations. A dynamic segmentation is generated from + a paintera project and can change due to corrections made in paintera, while a static segmentation + is just added once and does not change. + In addition, we can add tables associated with the data that contain derived data. + For static segmentations you need to pass a dict containing table names and paths, + for dynamic segmentations you need to register a function name that will compute the tables. + + Adding a static segmentation: + ``` + # if you add tables, one must have the name 'default' + add_segmentation(source_name, seg_name, + segmentation_path='/path/to/input-segmentation.xml', + table_path_dict={'default': '/path/to/default-table.csv', + 'other': '/path/to/other-table.csv'}) + ``` + + Adding a dynamic segmentation: + ``` + # 'update_seg_table' must be importable from 'scripts.attributes' + add_segmentation(source_name, seg_name, + paintera_project=('/path/to/paintera/root.n5', '/path/in/file'), + resolution=(.025, .02, .02), # resolution in microns, must be passed for dynamic seg + table_update_function='update_seg_table') + ``` Paramter: source_name [str] - prefix of the primary data source. diff --git a/update_patch.py b/update_patch.py index 1fccef7..b4929b2 100755 --- a/update_patch.py +++ b/update_patch.py @@ -2,145 +2,67 @@ import os import argparse -from subprocess import check_output, call +from copy import deepcopy +from glob import glob from shutil import rmtree +from subprocess import check_output, call -from scripts.attributes import make_cell_tables, make_nucleus_tables, make_cilia_tables +import scripts.attributes +from scripts.files import get_segmentation_names, get_segmentations +from scripts.files import (copy_image_data, copy_misc_data, copy_segmentation, copy_tables, + make_bdv_server_file, make_folder_structure) from scripts.export import export_segmentation -from scripts.files import copy_image_data, copy_misc_data, make_bdv_server_file -from scripts.files import copy_tables, copy_segmentation, make_folder_structure - - -# paths for paintera projects -# in order to get the new segmentation, changes need to be committed, -# s.t. they are stored in these files! -PAINTERA_ROOT = '/g/kreshuk/data/arendt/platyneris_v1/data.n5' -PROJECT_CELLS = 'volumes/paintera/proofread_cells' -PROJECT_NUCLEI = 'volumes/paintera/nuclei' -PROJECT_CILIA = 'volumes/paintera/cilia' - -# name for cell and nucleus segmentations -NAME_CELLS = 'sbem-6dpf-1-whole-segmented-cells-labels' -NAME_NUCLEI = 'sbem-6dpf-1-whole-segmented-nuclei-labels' -NAME_CILIA = 'sbem-6dpf-1-whole-segmented-cilia-labels' - -# resolutions of cell and nucleus segmentation -RES_CELLS = [.025, .02, .02] -RES_NUCLEI = [.1, .08, .08] -RES_CILIA = [.025, .01, .01] - - -def check_inputs(update_cell_segmentation, - update_nucleus_segmentation, - update_cilia_segmentation, - update_cell_tables, - update_nucleus_tables, - update_cilia_tables): - inputs = (update_cell_segmentation, update_nucleus_segmentation, - update_cell_tables, update_nucleus_tables, - update_cilia_segmentation, update_cilia_tables) - - have_changes = any(inputs) - if update_cell_segmentation: - update_cell_tables = True - if update_nucleus_segmentation: - update_nucleus_tables = True - if update_cilia_segmentation: - update_cilia_tables = True - - return {'have_changes': have_changes, - 'update_cell_tables': update_cell_tables, - 'update_nucleus_tables': update_nucleus_tables, - 'update_cilia_tables': update_cilia_tables} - - -def get_tags(new_tag): + + +def get_tags(): tag = check_output(['git', 'describe', '--abbrev=0']).decode('utf-8').rstrip('\n') - if new_tag == '': - new_tag = tag.split('.') - new_tag[-1] = str(int(new_tag[-1]) + 1) - new_tag = '.'.join(new_tag) + new_tag = tag.split('.') + new_tag[-1] = str(int(new_tag[-1]) + 1) + new_tag = '.'.join(new_tag) return tag, new_tag -def export_segmentations(folder, new_folder, - update_cell_segmentation, - update_nucleus_segmentation, - update_cilia_segmentation, - target, max_jobs): - # update or copy cell segmentation - if update_cell_segmentation: - tmp_cells_seg = 'tmp_export_cells' - export_segmentation(PAINTERA_ROOT, PROJECT_CELLS, - folder, new_folder, NAME_CELLS, - resolution=RES_CELLS, - tmp_folder=tmp_cells_seg, - target=target, max_jobs=max_jobs) - else: - copy_segmentation(folder, new_folder, NAME_CELLS) - - # update or copy nucleus segmentation - if update_nucleus_segmentation: - tmp_nuc_seg = 'tmp_export_nuclei' - export_segmentation(PAINTERA_ROOT, PROJECT_NUCLEI, - folder, new_folder, NAME_NUCLEI, - resolution=RES_NUCLEI, - tmp_folder=tmp_nuc_seg, - target=target, max_jobs=max_jobs) - else: - copy_segmentation(folder, new_folder, NAME_NUCLEI) - - # update or copy cilia segmentation - if update_cilia_segmentation: - tmp_cilia_seg = 'tmp_export_cilia' - export_segmentation(PAINTERA_ROOT, PROJECT_CILIA, - folder, new_folder, NAME_CILIA, - resolution=RES_CILIA, - tmp_folder=tmp_cilia_seg, - target=target, max_jobs=max_jobs) - else: - copy_segmentation(folder, new_folder, NAME_CILIA) - - # copy static segmentations - # we also treat the chromatin segmentation as static for now, - # but might change this at some point - static_seg_names = ('sbem-6dpf-1-whole-segmented-muscles', - 'sbem-6dpf-1-whole-segmented-tissue-labels', - 'sbem-6dpf-1-whole-segmented-chromatin-labels') - for seg_name in static_seg_names: - copy_segmentation(folder, new_folder, seg_name) - - -def make_attributes(folder, new_folder, - update_cell_tables, - update_nucleus_tables, - update_cilia_tables, - target, max_jobs): - # update or copy cell tables - if update_cell_tables: - make_cell_tables(new_folder, NAME_CELLS, 'tmp_tables_cells', RES_CELLS, - target=target, max_jobs=max_jobs) - else: - copy_tables(folder, new_folder, NAME_CELLS) +def update_segmentation(name, seg_dict, folder, new_folder, + target, max_jobs): + tmp_folder = 'tmp_export_%s' % name + paintera_root, paintera_key = seg_dict['paintera_project'] + export_segmentation(paintera_root, paintera_key, + folder, new_folder, name, + resolution=seg_dict['resolution'], + tmp_folder=tmp_folder, + target=target, max_jobs=max_jobs) - # update or copy nucleus tables - if update_nucleus_tables: - make_nucleus_tables(new_folder, NAME_NUCLEI, 'tmp_tables_nuclei', RES_NUCLEI, - target=target, max_jobs=max_jobs) - else: - copy_tables(folder, new_folder, NAME_NUCLEI) - if update_cilia_tables: - make_cilia_tables(new_folder, NAME_CILIA, 'tmp_tables_cilia', RES_CILIA, - target=target, max_jobs=max_jobs) - else: - copy_tables(folder, new_folder, NAME_CILIA) +def update_segmentations(folder, new_folder, names_to_update, target, max_jobs): + segmentations = get_segmentations() + segmentation_names = get_segmentation_names() + + for name in segmentation_names: + if name in names_to_update: + update_segmentation(name, segmentations[name], folder, new_folder, + target, max_jobs) + else: + copy_segmentation(folder, new_folder, name) + + +def update_table(name, seg_dict, folder, new_folder, + target, max_jobs): + tmp_folder = 'tmp_tables_%s' % name + update_function = getattr(scripts.attribute, seg_dict['table_update_function']) + update_function(new_folder, name, tmp_folder, seg_dict['resolution'], + target=target, max_jobs=max_jobs) + - # copy tables associated with static segmentations - static_seg_names = ('sbem-6dpf-1-whole-segmented-tissue-labels', - 'sbem-6dpf-1-whole-segmented-chromatin-labels') - for seg_name in static_seg_names: - copy_tables(folder, new_folder, seg_name) +def update_tables(folder, new_folder, names_to_update, target, max_jobs): + segmentations = get_segmentations() + segmentation_names = get_segmentation_names() + + for name in segmentation_names: + if name in names_to_update: + update_table(name, segmentations[name], folder, new_folder, + target, max_jobs) + else: + copy_tables(folder, new_folder, name) # TODO check for errors @@ -156,6 +78,8 @@ def make_release(tag, folder, description=''): def clean_up(): + """ Clean up all tmp folders + """ def remove_dir(dir_name): try: @@ -163,58 +87,52 @@ def clean_up(): except OSError: pass - # remove all tmp folders - remove_dir('tmp_export_cells') - remove_dir('tmp_export_nuclei') - remove_dir('tmp_tables_cells') - remove_dir('tmp_tables_nuclei') + tmp_folders = glob('tmp_*') + for tmp_folder in tmp_folders: + remove_dir(tmp_folder) + + +def check_requested_updates(names_to_update): + segmentations = get_segmentations() + for name in names_to_update: + if name not in segmentations: + raise ValueError("Requested update for %s, which is not a registered segmentation" % name) + if segmentations[name]['is_static']: + raise ValueError("Requested update for %s, which is a static segmentation" % name) # TODO catch all exceptions and handle them properly -def update_platy_browser(update_cell_segmentation=False, - update_nucleus_segmentation=False, - update_cilia_segmentation=False, - update_cell_tables=False, - update_nucleus_tables=False, - update_cilia_tables=False, - description='', - new_tag=''): - """ Generate new version of platy-browser derived data. +def update_patch(update_seg_names, update_table_names, + description='', force_update=False, + target='slurm', max_jobs=250): + """ Generate new patch version of platy-browser derived data. + + The patch version is increased if derived data changes, e.g. by + incorporating corrections for a segmentation or updating tables. Arguments: - update_cell_segmentation: Update the cell segmentation volume. - update_nucleus_segmentation: Update the nucleus segmentation volume. - update_cilia_segmentation: Update the cilia segmentation volume. - update_cell_tables: Update the cell tables. This needs to be specified if the cell - segmentation is not update, but the tables should be updated. - update_nucleus_tables: Update the nucleus tables. This needs to be specified if the nucleus - segmentation is not updated, but the tables should be updated. - update_cilia_tables: Update the cilia tables. This needs to be specified if the cilia - segmentation is not updated, but the tables should be updated. - description: Optional descrption for release message. - new_tag: Optional tag to override the default new tag. + update_seg_names [list[str]] - names of segmentations to be updated. + update_table_names [list[str]] - names of tables to be updated. + Not that these only need to be specified if the corresponding segmentation is not + updated, but the tables should be updated. + description [str] - Optional descrption for release message (default: ''). + force_update [bool] - Force an update if no changes are specified (default: False). + target [str] - + max_jobs [int] - """ - # check inputs - update_dict = check_inputs(update_cell_segmentation, - update_nucleus_segmentation, - update_cilia_segmentation, - update_cell_tables, - update_nucleus_tables, - update_cilia_tables) - - # if an explicit tag was given, we force an update - force_update = new_tag != '' - if not update_dict['have_changes'] and not force_update: - print("Nothing needs to be update, skipping") - return - - update_cell_tables, update_nucleus_tables, update_cilia_tables =\ - update_dict['update_cell_tables'], update_dict['update_nucleus_tables'], update_dict['update_cilia_tables'] - - # we always increase the release tag (in the last digit) - # when making a new version of segmentation or attributes - tag, new_tag = get_tags(new_tag) + # check if we have anything to update + have_seg_updates = len(update_seg_names) > 0 + have_table_updates = len(update_seg_names) > 0 + if not have_seg_updates and not have_table_updates and not force_update: + raise ValueError("No updates where provdied and force_update was not set") + + table_updates = deepcopy(update_seg_names) + table_updates.extend(update_table_names) + check_requested_updates(table_updates) + + # increase the patch (last digit) release tag + tag, new_tag = get_tags() print("Updating platy browser from", tag, "to", new_tag) # make new folder structure @@ -222,11 +140,6 @@ def update_platy_browser(update_cell_segmentation=False, new_folder = os.path.join('data', new_tag) make_folder_structure(new_folder) - target = 'slurm' - max_jobs = 250 - # target = 'local' - # max_jobs = 48 - # copy static image and misc data copy_image_data(os.path.join(folder, 'images'), os.path.join(new_folder, 'images')) @@ -234,18 +147,12 @@ def update_platy_browser(update_cell_segmentation=False, os.path.join(new_folder, 'misc')) # export new segmentations - export_segmentations(folder, new_folder, - update_cell_segmentation, - update_nucleus_segmentation, - update_cilia_segmentation, + update_segmentations(folder, new_folder, update_seg_names, target=target, max_jobs=max_jobs) # generate new attribute tables - make_attributes(folder, new_folder, - update_cell_tables, - update_nucleus_tables, - update_cilia_tables, - target=target, max_jobs=max_jobs) + update_tables(folder, new_folder, table_updates, + target=target, max_jobs=max_jobs) make_bdv_server_file([os.path.join(new_folder, 'images'), os.path.join(new_folder, 'segmentations')], @@ -275,40 +182,21 @@ def str2bool(v): raise argparse.ArgumentTypeError('Boolean value expected.') -def table_help_str(name): - help_str = """Update the %s tables. - Only needs to be specified if the %s segmentation is not updated, - but the tables should be updated.""" - return help_str % (name, name) - - -# TODO instead of specifying all these things, pass list of stuff that should be changed +# TODO expose target and max_jobs as well if __name__ == '__main__': - parser = argparse.ArgumentParser(description='Update derived data for the platy browser') - - parser.add_argument('--update_cell_segmentation', type=str2bool, - default=False, help="Update the cell segmentation.") - parser.add_argument('--update_nucleus_segmentation', type=str2bool, - default=False, help="Update the nucleus segmentation.") - parser.add_argument('--update_cilia_segmentation', type=str2bool, - default=False, help="Update the cilia segmentation.") - parser.add_argument('--update_cell_tables', type=str2bool, - default=False, help=table_help_str("cell")) - parser.add_argument('--update_nucleus_tables', type=str2bool, - default=False, help=table_help_str("nucleus")) - parser.add_argument('--update_cilia_tables', type=str2bool, - default=False, help=table_help_str("cilia")) + parser = argparse.ArgumentParser(description='Update patch version of platy-browser-data.') + parser.add_argument('--segmentation_names', type=str, nargs='+', default=[], + help="Names of the segmentations to update.") + table_help_str = ("Names of the tables to update." + "The tables for segmentations in 'segmentation_names' will be updated without being passed here.") + parser.add_argument('--table_names', type=str, nargs='+', default=[], + help=table_help_str) + parser.add_argument('--description', type=str, default='', help="Optional description for release message") - parser.add_argument('--new_tag', type=str, default='', - help="Specify a new tag that will override the default new tag") + parser.add_argument('--force_update', type=str2bool, default='no', + help="Create new release even if nothing needs to be updated.") args = parser.parse_args() - update_platy_browser(args.update_cell_segmentation, - args.update_nucleus_segmentation, - args.update_cilia_segmentation, - args.update_cell_tables, - args.update_nucleus_tables, - args.update_cilia_tables, - args.description, - args.new_tag) + update_patch(args.segmentation_names, args.table_names, + args.description, args.force_update) -- GitLab