Continue implementation for initial data version

1f8a9249 · Constantin Pape · 5a93369a · 1f8a9249 · 1f8a9249 · 1f8a9249
Commit 1f8a9249 authored 5 years ago by Constantin Pape
--- a/make_initial_version.py
+++ b/make_initial_version.py
+#! /g/kreshuk/pape/Work/software/conda/miniconda3/envs/cluster_env37/bin/python
+
 import os

+from scripts.files import make_folder_structure
+from script.export import export_segmentation
+
+
+def make_segmentations(old_folder, folder):
+    path = '/g/kreshuk/data/arendt/platyneris_v1/data.n5'
+
+    # export nucleus segemntation
+    key_nuclei = 'volumes/paintera/nuclei'
+    nuclei_name = 'em-segmented-nuclei-labels'
+    res_nuclei = [.1, .08, .08]
+    export_segmentation(path, key_nuclei, old_folder, folder, nuclei_name, res_nuclei)
+    return
+
+    # export cell segemntation
+    key_cells = 'volumes/paintera/proofread_cells'
+    cells_name = 'em-segmented-cells-labels'
+    res_cells = [.025, .02, .02]
+    export_segmentation(path, key_cells, old_folder, folder, cells_name, res_cells)
+

 def make_initial_version():
-    pass
+
+    old_folder = '/g/arendt/EM...'
+    tag = '0.0.0'
+    folder = os.path.join('data', tag)
+
+    make_folder_structure(folder)
+
+    make_segmentations(old_folder, folder)
+
+    # TODO make xmls for all necessary image data
+
+    # TODO make tables


 if __name__ == '__main__':

--- a/scripts/export/export_segmentation.py
+++ b/scripts/export/export_segmentation.py
+import os
+import json
+import luigi
+import z5py
+
+from cluster_tools.downscaling import DownscalingWorkflow
+from paintera_tools import serialize_from_commit
 from .to_bdv import to_bdv
+from .map_segmentation_ids import map_segmentation_ids
+
+
+def get_n_scales(paintera_path, paintera_key):
+    f = z5py.File(paintera_path)
+    g = f[paintera_key]['data']
+    keys = list(g.keys())
+    scales = [key for key in keys
+              if os.path.isdir(os.path.join(g.path, key)) and key.startswith('s')]
+    return len(scales)
+
+
+def downscale(path, in_key, out_key,
+              n_scales, tmp_folder, max_jobs, target):
+    task = DownscalingWorkflow
+
+    config_folder = os.path.join(tmp_folder, 'configs')
+    os.makedirs(config_folder, exist_ok=True)
+    configs = task.get_config()
+
+    global_conf = configs['global']
+    global_conf.update({'shebang':
+                        "#! /g/kreshuk/pape/Work/software/conda/miniconda3/envs/cluster_env37/bin/python",
+                        'block_shape': [64, 512, 512]})
+    with open(os.path.join(config_folder, 'global.config'), 'w') as f:
+        json.dump(global_conf, f)
+
+    config = configs['downscaling']
+    config.update({'mem_limit': 8, 'time_limit': 120,
+                   'library_kwargs': {'order': 0}})
+    with open(os.path.join(config_folder, 'downscaling.config'), 'w') as f:
+        json.dump(config, f)
+
+    # for now we hard-code scale factors to 2, but it would be cleaner to infer this from the data
+    scales = [[2, 2, 2]] * n_scales
+    halos = [[0, 0, 0]] * n_scales
+
+    t = task(tmp_folder=tmp_folder, config_dir=config_folder,
+             target=target, max_jobs=max_jobs,
+             input_path=path, input_key=in_key,
+             scale_factors=scales, halos=halos)
+    ret = luigi.build([t], local_scheduler=True)
+    if not ret:
+        raise RuntimeError("Downscaling the segmentation failed")


 def export_segmentation(paintera_path, paintera_key, folder, new_folder, name, resolution):
@@ -13,8 +64,30 @@ def export_segmentation(paintera_path, paintera_key, folder, new_folder, name, r
        name: name of segmentation
        resolution: resolution [z, y, x] in micrometer
    """
-    # export segmentation from paintera
+    # TODO should make this a param
+    max_jobs = 250
+    target = 'slurm'

-    # compute mapping to old segmentation
+    tmp_folder = 'tmp_export_seg'
+    tmp_path = os.path.join(tmp_folder, 'data.n5')
+    tmp_key = 'seg'
+    tmp_key0 = os.path.join(tmp_key, 's0')
+
+    # export segmentation from paintera commit for all scales
+    serialize_from_commit(paintera_path, paintera_key, tmp_path, tmp_key0, tmp_folder,
+                          max_jobs, target)
+
+    # downscale the segemntation
+    n_scales = get_n_scales(paintera_path, paintera_key)
+    downscale(tmp_path, tmp_key0, tmp_key, n_scales, tmp_folder, max_jobs, target)

    # convert to bdv
+    out_path = os.path.join(new_folder, 'data', '%s.h5' % name)
+    to_bdv(tmp_path, tmp_key, out_path, resolution)
+
+    # compute mapping to old segmentation
+    map_segmentation_ids(folder, new_folder, name, tmp_folder, max_jobs, target)
+
+    # TODO
+    # clean up tmp
+    # rmtree(tmp_folder)
--- a/scripts/export/map_segmentation_ids.py
+++ b/scripts/export/map_segmentation_ids.py
+import os
+import json
+import luigi
+import z5py
+
+from cluster_tools.node_labels import NodeLabelWorkflow
+
+
+def get_seg_path(folder, name):
+    # check if we have a data sub folder, if we have it load
+    # the segmentation from there
+    data_folder = os.path.join(folder, 'data')
+    data_folder = data_folder if os.path.exists(data_folder) else folder
+
+    # check if we have a h5
+    path = os.path.join(data_folder, '%s.h5' % name)
+    if os.path.exists(path):
+        return path
+
+    # check if we have an xml
+    path = os.path.join(data_folder, '%s.xml' % name)
+    # read h5 path from the xml
+    if os.path.exists(path):
+        # TODO
+        raise NotImplementedError("File path from xml not implemented")
+        # path = get_h5_path_from_xml(path)
+        # return path
+    else:
+        raise RuntimeError("The specified folder does not contain a segmentation")
+
+
+def map_ids(path1, path2, out_path, tmp_folder, max_jobs, target, prefix):
+    task = NodeLabelWorkflow
+
+    config_folder = os.path.join(tmp_folder, 'configs')
+    os.makedirs(config_folder, exist_ok=True)
+    configs = task.get_config()
+
+    global_conf = configs['global']
+    global_conf.update({'shebang':
+                        "#! /g/kreshuk/pape/Work/software/conda/miniconda3/envs/cluster_env37/bin/python",
+                        'block_shape': [64, 512, 512]})
+    with open(os.path.join(config_folder, 'global.config'), 'w') as f:
+        json.dump(global_conf, f)
+
+    key = 't00000/s00/0/cells'
+    tmp_path = os.path.join(tmp_folder, 'data.n5')
+    tmp_key = prefix
+    t = task(tmp_folder=tmp_folder, config_dir=config_folder,
+             target=target, max_jobs=max_jobs,
+             ws_path=path1, ws_key=key,
+             input_path=path2, input_key=key,
+             output_path=tmp_path, output_key=tmp_key,
+             prefix=prefix, max_overlap=True)
+    ret = luigi.build([t], local_scheduler=True)
+    if not ret:
+        raise RuntimeError("Id-mapping failed")
+
+    ds = z5py.File(tmp_path)[tmp_key]
+    lut = ds[:]
+    lut = dict(zip(lut[:, 0], lut[:, 1]))
+
+    with open(out_path, 'w') as f:
+        json.dump(lut, f)
+
+
+def map_segmentation_ids(src_folder, dest_folder, out_path, name, tmp_folder, max_jobs, target):
+    src_path = get_seg_path(src_folder, name)
+    dest_path = get_seg_path(dest_folder, name)
+
+    # map ids from src to dest via maximal overlap
+    out_path = os.path.join(dest_folder, 'misc', 'new_id_lut_%s.json' % name)
+    map_ids(src_path, dest_path, out_path, tmp_folder, max_jobs, target,
+            prefix='to_dest')
+
+    # TODO do we need to do this?
+    # map ids from dest to src via maximal overlap
+    # out_path = os.path.join(dest_folder, 'misc', 'old_id_lut_%s.json' % name)
+    # map_ids(dest_path, src_path, out_path, tmp_folder, max_jobs, target,
+    #         prefix='to_src')
--- a/scripts/export/to_bdv.py
+++ b/scripts/export/to_bdv.py
-#! /g/kreshuk/pape/Work/software/conda/miniconda3/envs/cluster_env37/bin/python
-
 import os
 import json
 import luigi
+
+import z5py
 from shutil import rmtree
 from cluster_tools.downscaling import PainteraToBdvWorkflow

@@ -20,10 +20,10 @@ def check_max_id(path, key):

 def to_bdv(in_path, in_key, out_path, resolution, target='slurm'):
    check_max_id(in_path, in_key)
-    tmp_folder = 'tmp_export'
+    tmp_folder = 'tmp_export_bdv'

    config_folder = os.path.join(tmp_folder, 'configs')
-    os.makedirs(config_folder)
+    os.makedirs(config_folder, exist_ok=True)
    configs = PainteraToBdvWorkflow.get_config()

    global_conf = configs['global']

--- a/scripts/files/__init__.py
+++ b/scripts/files/__init__.py
 from .copy_helper import copy_tables, copy_segmentation, copy_static_files
+from .folders import make_folder_structure
+
--- a/scripts/files/folders.py
+++ b/scripts/files/folders.py
+import os
+
+
+def make_folder_structure(root):
+    # make all sub-folders
+    os.makedirs(os.path.join(root, 'tables'))
+    os.makedirs(os.path.join(root, 'images'))
+    os.makedirs(os.path.join(root, 'segmentations'))
+    os.makedirs(os.path.join(root, 'misc'))
--- a/update_platy_browser.py
+++ b/update_platy_browser.py
+#! /g/kreshuk/pape/Work/software/conda/miniconda3/envs/cluster_env37/bin/python
+
 import os
 import argparse
 from subprocess import check_output, call

 from scripts.attributes import make_cell_tables, make_nucleus_tables
 from scripts.export import export_segmentation
-from scripts.files import copy_tables, copy_segmentation, copy_static_files
+from scripts.files import copy_tables, copy_segmentation, copy_static_files, make_folder_structure


 # paths for paintera projects
@@ -12,8 +14,8 @@ from scripts.files import copy_tables, copy_segmentation, copy_static_files
 # s.t. they are stored in these files!
 PAINTERA_ROOT = '/g/kreshuk/data/arendt/platyneris_v1/data.n5'
 # TODO do we need the data postfix ???
-PROJECT_CELLS = 'volumes/paintera/proofread_cells/data'
-PROJECT_NUCLEI = 'volumes/paintera/nuclei/data'
+PROJECT_CELLS = 'volumes/paintera/proofread_cells'
+PROJECT_NUCLEI = 'volumes/paintera/nuclei'

 # name for cell and nucleus segmentations
 NAME_CELLS = 'em-segmented-cells-labels'
@@ -42,16 +44,6 @@ def get_tags():
    return tag, new_tag


-def make_folder_structure(tag):
-    new_folder = os.makedirs('data', tag)
-    # make all sub-folders
-    os.makedirs(os.path.join(new_folder, 'tables'))
-    os.makedirs(os.path.join(new_folder, 'images'))
-    os.makedirs(os.path.join(new_folder, 'segmentations'))
-    os.makedirs(os.path.join(new_folder, 'misc'))
-    return new_folder
-
-
 # TODO
 # need lut from new to old segmentation ids
 # to auto translate custom attributes
@@ -135,7 +127,8 @@ def update_platy_browser(update_cell_segmentation=False,

    # make new folder structure
    folder = os.path.join('data', tag)
-    new_folder = make_folder_structure(new_tag)
+    new_folder = os.makedirs('data', new_tag)
+    make_folder_structure(new_folder)

    # export new segmentation(s)
    export_segmentations(folder, new_folder,