Skip to content
Snippets Groups Projects
Commit 6488faa0 authored by Constantin Pape's avatar Constantin Pape
Browse files

Refactor update_patch to account for new data management

parent 881918f9
No related branches found
No related tags found
1 merge request!3Refactor sources
......@@ -136,7 +136,30 @@ def add_segmentation(source_name, name, segmentation_path=None,
resolution=None, table_update_function=None, copy_data=True):
""" Add segmentation volume to the platy browser data.
TODO explain more
We distinguish between static and dynamic segmentations. A dynamic segmentation is generated from
a paintera project and can change due to corrections made in paintera, while a static segmentation
is just added once and does not change.
In addition, we can add tables associated with the data that contain derived data.
For static segmentations you need to pass a dict containing table names and paths,
for dynamic segmentations you need to register a function name that will compute the tables.
Adding a static segmentation:
```
# if you add tables, one must have the name 'default'
add_segmentation(source_name, seg_name,
segmentation_path='/path/to/input-segmentation.xml',
table_path_dict={'default': '/path/to/default-table.csv',
'other': '/path/to/other-table.csv'})
```
Adding a dynamic segmentation:
```
# 'update_seg_table' must be importable from 'scripts.attributes'
add_segmentation(source_name, seg_name,
paintera_project=('/path/to/paintera/root.n5', '/path/in/file'),
resolution=(.025, .02, .02), # resolution in microns, must be passed for dynamic seg
table_update_function='update_seg_table')
```
Paramter:
source_name [str] - prefix of the primary data source.
......
......@@ -2,145 +2,67 @@
import os
import argparse
from subprocess import check_output, call
from copy import deepcopy
from glob import glob
from shutil import rmtree
from subprocess import check_output, call
from scripts.attributes import make_cell_tables, make_nucleus_tables, make_cilia_tables
import scripts.attributes
from scripts.files import get_segmentation_names, get_segmentations
from scripts.files import (copy_image_data, copy_misc_data, copy_segmentation, copy_tables,
make_bdv_server_file, make_folder_structure)
from scripts.export import export_segmentation
from scripts.files import copy_image_data, copy_misc_data, make_bdv_server_file
from scripts.files import copy_tables, copy_segmentation, make_folder_structure
# paths for paintera projects
# in order to get the new segmentation, changes need to be committed,
# s.t. they are stored in these files!
PAINTERA_ROOT = '/g/kreshuk/data/arendt/platyneris_v1/data.n5'
PROJECT_CELLS = 'volumes/paintera/proofread_cells'
PROJECT_NUCLEI = 'volumes/paintera/nuclei'
PROJECT_CILIA = 'volumes/paintera/cilia'
# name for cell and nucleus segmentations
NAME_CELLS = 'sbem-6dpf-1-whole-segmented-cells-labels'
NAME_NUCLEI = 'sbem-6dpf-1-whole-segmented-nuclei-labels'
NAME_CILIA = 'sbem-6dpf-1-whole-segmented-cilia-labels'
# resolutions of cell and nucleus segmentation
RES_CELLS = [.025, .02, .02]
RES_NUCLEI = [.1, .08, .08]
RES_CILIA = [.025, .01, .01]
def check_inputs(update_cell_segmentation,
update_nucleus_segmentation,
update_cilia_segmentation,
update_cell_tables,
update_nucleus_tables,
update_cilia_tables):
inputs = (update_cell_segmentation, update_nucleus_segmentation,
update_cell_tables, update_nucleus_tables,
update_cilia_segmentation, update_cilia_tables)
have_changes = any(inputs)
if update_cell_segmentation:
update_cell_tables = True
if update_nucleus_segmentation:
update_nucleus_tables = True
if update_cilia_segmentation:
update_cilia_tables = True
return {'have_changes': have_changes,
'update_cell_tables': update_cell_tables,
'update_nucleus_tables': update_nucleus_tables,
'update_cilia_tables': update_cilia_tables}
def get_tags(new_tag):
def get_tags():
tag = check_output(['git', 'describe', '--abbrev=0']).decode('utf-8').rstrip('\n')
if new_tag == '':
new_tag = tag.split('.')
new_tag[-1] = str(int(new_tag[-1]) + 1)
new_tag = '.'.join(new_tag)
new_tag = tag.split('.')
new_tag[-1] = str(int(new_tag[-1]) + 1)
new_tag = '.'.join(new_tag)
return tag, new_tag
def export_segmentations(folder, new_folder,
update_cell_segmentation,
update_nucleus_segmentation,
update_cilia_segmentation,
target, max_jobs):
# update or copy cell segmentation
if update_cell_segmentation:
tmp_cells_seg = 'tmp_export_cells'
export_segmentation(PAINTERA_ROOT, PROJECT_CELLS,
folder, new_folder, NAME_CELLS,
resolution=RES_CELLS,
tmp_folder=tmp_cells_seg,
target=target, max_jobs=max_jobs)
else:
copy_segmentation(folder, new_folder, NAME_CELLS)
# update or copy nucleus segmentation
if update_nucleus_segmentation:
tmp_nuc_seg = 'tmp_export_nuclei'
export_segmentation(PAINTERA_ROOT, PROJECT_NUCLEI,
folder, new_folder, NAME_NUCLEI,
resolution=RES_NUCLEI,
tmp_folder=tmp_nuc_seg,
target=target, max_jobs=max_jobs)
else:
copy_segmentation(folder, new_folder, NAME_NUCLEI)
# update or copy cilia segmentation
if update_cilia_segmentation:
tmp_cilia_seg = 'tmp_export_cilia'
export_segmentation(PAINTERA_ROOT, PROJECT_CILIA,
folder, new_folder, NAME_CILIA,
resolution=RES_CILIA,
tmp_folder=tmp_cilia_seg,
target=target, max_jobs=max_jobs)
else:
copy_segmentation(folder, new_folder, NAME_CILIA)
# copy static segmentations
# we also treat the chromatin segmentation as static for now,
# but might change this at some point
static_seg_names = ('sbem-6dpf-1-whole-segmented-muscles',
'sbem-6dpf-1-whole-segmented-tissue-labels',
'sbem-6dpf-1-whole-segmented-chromatin-labels')
for seg_name in static_seg_names:
copy_segmentation(folder, new_folder, seg_name)
def make_attributes(folder, new_folder,
update_cell_tables,
update_nucleus_tables,
update_cilia_tables,
target, max_jobs):
# update or copy cell tables
if update_cell_tables:
make_cell_tables(new_folder, NAME_CELLS, 'tmp_tables_cells', RES_CELLS,
target=target, max_jobs=max_jobs)
else:
copy_tables(folder, new_folder, NAME_CELLS)
def update_segmentation(name, seg_dict, folder, new_folder,
target, max_jobs):
tmp_folder = 'tmp_export_%s' % name
paintera_root, paintera_key = seg_dict['paintera_project']
export_segmentation(paintera_root, paintera_key,
folder, new_folder, name,
resolution=seg_dict['resolution'],
tmp_folder=tmp_folder,
target=target, max_jobs=max_jobs)
# update or copy nucleus tables
if update_nucleus_tables:
make_nucleus_tables(new_folder, NAME_NUCLEI, 'tmp_tables_nuclei', RES_NUCLEI,
target=target, max_jobs=max_jobs)
else:
copy_tables(folder, new_folder, NAME_NUCLEI)
if update_cilia_tables:
make_cilia_tables(new_folder, NAME_CILIA, 'tmp_tables_cilia', RES_CILIA,
target=target, max_jobs=max_jobs)
else:
copy_tables(folder, new_folder, NAME_CILIA)
def update_segmentations(folder, new_folder, names_to_update, target, max_jobs):
segmentations = get_segmentations()
segmentation_names = get_segmentation_names()
for name in segmentation_names:
if name in names_to_update:
update_segmentation(name, segmentations[name], folder, new_folder,
target, max_jobs)
else:
copy_segmentation(folder, new_folder, name)
def update_table(name, seg_dict, folder, new_folder,
target, max_jobs):
tmp_folder = 'tmp_tables_%s' % name
update_function = getattr(scripts.attribute, seg_dict['table_update_function'])
update_function(new_folder, name, tmp_folder, seg_dict['resolution'],
target=target, max_jobs=max_jobs)
# copy tables associated with static segmentations
static_seg_names = ('sbem-6dpf-1-whole-segmented-tissue-labels',
'sbem-6dpf-1-whole-segmented-chromatin-labels')
for seg_name in static_seg_names:
copy_tables(folder, new_folder, seg_name)
def update_tables(folder, new_folder, names_to_update, target, max_jobs):
segmentations = get_segmentations()
segmentation_names = get_segmentation_names()
for name in segmentation_names:
if name in names_to_update:
update_table(name, segmentations[name], folder, new_folder,
target, max_jobs)
else:
copy_tables(folder, new_folder, name)
# TODO check for errors
......@@ -156,6 +78,8 @@ def make_release(tag, folder, description=''):
def clean_up():
""" Clean up all tmp folders
"""
def remove_dir(dir_name):
try:
......@@ -163,58 +87,52 @@ def clean_up():
except OSError:
pass
# remove all tmp folders
remove_dir('tmp_export_cells')
remove_dir('tmp_export_nuclei')
remove_dir('tmp_tables_cells')
remove_dir('tmp_tables_nuclei')
tmp_folders = glob('tmp_*')
for tmp_folder in tmp_folders:
remove_dir(tmp_folder)
def check_requested_updates(names_to_update):
segmentations = get_segmentations()
for name in names_to_update:
if name not in segmentations:
raise ValueError("Requested update for %s, which is not a registered segmentation" % name)
if segmentations[name]['is_static']:
raise ValueError("Requested update for %s, which is a static segmentation" % name)
# TODO catch all exceptions and handle them properly
def update_platy_browser(update_cell_segmentation=False,
update_nucleus_segmentation=False,
update_cilia_segmentation=False,
update_cell_tables=False,
update_nucleus_tables=False,
update_cilia_tables=False,
description='',
new_tag=''):
""" Generate new version of platy-browser derived data.
def update_patch(update_seg_names, update_table_names,
description='', force_update=False,
target='slurm', max_jobs=250):
""" Generate new patch version of platy-browser derived data.
The patch version is increased if derived data changes, e.g. by
incorporating corrections for a segmentation or updating tables.
Arguments:
update_cell_segmentation: Update the cell segmentation volume.
update_nucleus_segmentation: Update the nucleus segmentation volume.
update_cilia_segmentation: Update the cilia segmentation volume.
update_cell_tables: Update the cell tables. This needs to be specified if the cell
segmentation is not update, but the tables should be updated.
update_nucleus_tables: Update the nucleus tables. This needs to be specified if the nucleus
segmentation is not updated, but the tables should be updated.
update_cilia_tables: Update the cilia tables. This needs to be specified if the cilia
segmentation is not updated, but the tables should be updated.
description: Optional descrption for release message.
new_tag: Optional tag to override the default new tag.
update_seg_names [list[str]] - names of segmentations to be updated.
update_table_names [list[str]] - names of tables to be updated.
Not that these only need to be specified if the corresponding segmentation is not
updated, but the tables should be updated.
description [str] - Optional descrption for release message (default: '').
force_update [bool] - Force an update if no changes are specified (default: False).
target [str] -
max_jobs [int] -
"""
# check inputs
update_dict = check_inputs(update_cell_segmentation,
update_nucleus_segmentation,
update_cilia_segmentation,
update_cell_tables,
update_nucleus_tables,
update_cilia_tables)
# if an explicit tag was given, we force an update
force_update = new_tag != ''
if not update_dict['have_changes'] and not force_update:
print("Nothing needs to be update, skipping")
return
update_cell_tables, update_nucleus_tables, update_cilia_tables =\
update_dict['update_cell_tables'], update_dict['update_nucleus_tables'], update_dict['update_cilia_tables']
# we always increase the release tag (in the last digit)
# when making a new version of segmentation or attributes
tag, new_tag = get_tags(new_tag)
# check if we have anything to update
have_seg_updates = len(update_seg_names) > 0
have_table_updates = len(update_seg_names) > 0
if not have_seg_updates and not have_table_updates and not force_update:
raise ValueError("No updates where provdied and force_update was not set")
table_updates = deepcopy(update_seg_names)
table_updates.extend(update_table_names)
check_requested_updates(table_updates)
# increase the patch (last digit) release tag
tag, new_tag = get_tags()
print("Updating platy browser from", tag, "to", new_tag)
# make new folder structure
......@@ -222,11 +140,6 @@ def update_platy_browser(update_cell_segmentation=False,
new_folder = os.path.join('data', new_tag)
make_folder_structure(new_folder)
target = 'slurm'
max_jobs = 250
# target = 'local'
# max_jobs = 48
# copy static image and misc data
copy_image_data(os.path.join(folder, 'images'),
os.path.join(new_folder, 'images'))
......@@ -234,18 +147,12 @@ def update_platy_browser(update_cell_segmentation=False,
os.path.join(new_folder, 'misc'))
# export new segmentations
export_segmentations(folder, new_folder,
update_cell_segmentation,
update_nucleus_segmentation,
update_cilia_segmentation,
update_segmentations(folder, new_folder, update_seg_names,
target=target, max_jobs=max_jobs)
# generate new attribute tables
make_attributes(folder, new_folder,
update_cell_tables,
update_nucleus_tables,
update_cilia_tables,
target=target, max_jobs=max_jobs)
update_tables(folder, new_folder, table_updates,
target=target, max_jobs=max_jobs)
make_bdv_server_file([os.path.join(new_folder, 'images'),
os.path.join(new_folder, 'segmentations')],
......@@ -275,40 +182,21 @@ def str2bool(v):
raise argparse.ArgumentTypeError('Boolean value expected.')
def table_help_str(name):
help_str = """Update the %s tables.
Only needs to be specified if the %s segmentation is not updated,
but the tables should be updated."""
return help_str % (name, name)
# TODO instead of specifying all these things, pass list of stuff that should be changed
# TODO expose target and max_jobs as well
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Update derived data for the platy browser')
parser.add_argument('--update_cell_segmentation', type=str2bool,
default=False, help="Update the cell segmentation.")
parser.add_argument('--update_nucleus_segmentation', type=str2bool,
default=False, help="Update the nucleus segmentation.")
parser.add_argument('--update_cilia_segmentation', type=str2bool,
default=False, help="Update the cilia segmentation.")
parser.add_argument('--update_cell_tables', type=str2bool,
default=False, help=table_help_str("cell"))
parser.add_argument('--update_nucleus_tables', type=str2bool,
default=False, help=table_help_str("nucleus"))
parser.add_argument('--update_cilia_tables', type=str2bool,
default=False, help=table_help_str("cilia"))
parser = argparse.ArgumentParser(description='Update patch version of platy-browser-data.')
parser.add_argument('--segmentation_names', type=str, nargs='+', default=[],
help="Names of the segmentations to update.")
table_help_str = ("Names of the tables to update."
"The tables for segmentations in 'segmentation_names' will be updated without being passed here.")
parser.add_argument('--table_names', type=str, nargs='+', default=[],
help=table_help_str)
parser.add_argument('--description', type=str, default='',
help="Optional description for release message")
parser.add_argument('--new_tag', type=str, default='',
help="Specify a new tag that will override the default new tag")
parser.add_argument('--force_update', type=str2bool, default='no',
help="Create new release even if nothing needs to be updated.")
args = parser.parse_args()
update_platy_browser(args.update_cell_segmentation,
args.update_nucleus_segmentation,
args.update_cilia_segmentation,
args.update_cell_tables,
args.update_nucleus_tables,
args.update_cilia_tables,
args.description,
args.new_tag)
update_patch(args.segmentation_names, args.table_names,
args.description, args.force_update)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment