Update registration scripts

60bcd005 · Constantin Pape · 6b0c921f · 60bcd005 · 60bcd005 · 60bcd005
Commit 60bcd005 authored 5 years ago by Constantin Pape
--- a/scripts/extension/registration/apply_registration.py
+++ b/scripts/extension/registration/apply_registration.py
@@ -17,6 +17,7 @@ class ApplyRegistrationBase(luigi.Task):
    """
    default_fiji = '/g/arendt/EM_6dpf_segmentation/platy-browser-data/software/Fiji.app/ImageJ-linux64'
    default_elastix = '/g/arendt/EM_6dpf_segmentation/platy-browser-data/software/elastix_v4.8'
+    formats = ('bdv', 'tif')

    task_name = 'apply_registration'
    src_file = os.path.abspath(__file__)
@@ -25,6 +26,7 @@ class ApplyRegistrationBase(luigi.Task):
    input_path_file = luigi.Parameter()
    output_path_file = luigi.Parameter()
    transformation_file = luigi.Parameter()
+    output_format = luigi.Parameter(default='bdv')
    fiji_executable = luigi.Parameter(default=default_fiji)
    elastix_directory = luigi.Parameter(default=default_elastix)
    dependency = luigi.TaskParameter(default=DummyTask())
@@ -42,13 +44,14 @@ class ApplyRegistrationBase(luigi.Task):
        with open(self.output_path_file) as f:
            outputs = json.load(f)

-        assert len(inputs) == len(outputs)
+        assert len(inputs) == len(outputs), "%i, %i" % (len(inputs), len(outputs))
        assert all(os.path.exists(inp) for inp in inputs)
        n_files = len(inputs)

        assert os.path.exists(self.transformation_file)
        assert os.path.exists(self.fiji_executable)
        assert os.path.exists(self.elastix_directory)
+        assert self.output_format in (self.formats)

        # get the split of file-ids to the volume
        file_list = vu.blocks_in_volume((n_files,), (1,))
@@ -59,7 +62,7 @@ class ApplyRegistrationBase(luigi.Task):
                  "transformation_file": self.transformation_file,
                  "fiji_executable": self.fiji_executable,
                  "elastix_directory": self.elastix_directory,
-                  "tmp_folder": self.tmp_folder}
+                  "tmp_folder": self.tmp_folder, 'output_format': self.output_format}

        # prime and run the jobs
        n_jobs = min(self.max_jobs, n_files)
@@ -98,7 +101,8 @@ class ApplyRegistrationLSF(ApplyRegistrationBase, LSFTask):

 def apply_for_file(input_path, output_path,
                   transformation_file, fiji_executable,
-                   elastix_directory, tmp_folder, n_threads):
+                   elastix_directory, tmp_folder, n_threads,
+                   output_format):

    assert os.path.exists(elastix_directory)
    assert os.path.exists(tmp_folder)
@@ -106,6 +110,13 @@ def apply_for_file(input_path, output_path,
    assert os.path.exists(transformation_file)
    assert os.path.exists(os.path.split(output_path)[0])

+    if output_format == 'tif':
+        format_str = 'Save as Tiff'
+    elif output_format == 'bdv':
+        format_str = 'Save as BigDataViewer .xml/.h5'
+    else:
+        assert False, "Invalid output format %s" % output_format
+
    # transformix arguments need to be passed as one string,
    # with individual arguments comma separated
    # the argument to transformaix needs to be one large comma separated string
@@ -114,7 +125,7 @@ def apply_for_file(input_path, output_path,
                            "inputImageFile=\'%s\'" % input_path,
                            "transformationFile=\'%s\'" % transformation_file,
                            "outputFile=\'%s\'" % output_path,
-                            "outputModality=\'Save as BigDataViewer .xml/.h5\'",
+                            "outputModality=\'%s\'" % format_str,
                            "numThreads=\'%i\'" % n_threads]
    transformix_argument = ",".join(transformix_argument)
    transformix_argument = "\"%s\"" % transformix_argument
@@ -175,6 +186,8 @@ def apply_registration(job_id, config_path):
    elastix_directory = config['elastix_directory']
    tmp_folder = config['tmp_folder']
    working_dir = os.path.join(tmp_folder, 'work_dir%i' % job_id)
+    output_format = config['output_format']
+
    os.makedirs(working_dir, exist_ok=True)

    file_list = config['block_list']
@@ -194,7 +207,8 @@ def apply_registration(job_id, config_path):
        fu.log("Output: %s" % outfile)
        apply_for_file(infile, outfile,
                       transformation_file, fiji_executable,
-                       elastix_directory, working_dir, n_threads)
+                       elastix_directory, working_dir, n_threads,
+                       output_format)
        fu.log_block_success(file_id)

    fu.log_job_success(job_id)

--- a/scripts/files/copy_helper.py
+++ b/scripts/files/copy_helper.py
@@ -51,11 +51,18 @@ def copy_segmentation(src_folder, dst_folder, name):
        os.symlink(rel_path, lut_out)


-def copy_image_data(src_folder, dst_folder):
+def copy_image_data(src_folder, dst_folder, exclude_prefixes=[]):
    # get all image names that need to be copied
    names = get_image_names()

    for name in names:
+
+        prefix = name.split('-')[:4]
+        prefix = '-'.join(prefix)
+
+        if prefix in exclude_prefixes:
+            continue
+
        name += '.xml'
        in_path = os.path.join(src_folder, 'images', name)
        out_path = os.path.join(dst_folder, 'images', name)
@@ -94,9 +101,9 @@ def copy_all_tables(src_folder, dst_folder):
        copy_tables(src_folder, dst_folder, name)


-def copy_release_folder(src_folder, dst_folder):
+def copy_release_folder(src_folder, dst_folder, exclude_prefixes=[]):
    # copy static image and misc data
-    copy_image_data(src_folder, dst_folder)
+    copy_image_data(src_folder, dst_folder, exclude_prefixes)
    copy_misc_data(src_folder, dst_folder)
    copy_segmentations(src_folder, dst_folder)
    copy_all_tables(src_folder, dst_folder)
--- a/update_regestration.py
+++ b/update_regestration.py
@@ -4,7 +4,12 @@ import os
 import json
 import argparse
 from subprocess import check_output
+from concurrent import futures
+
+import imageio
 import luigi
+import numpy as np
+from pybdv import make_bdv

 from scripts.files import copy_release_folder, make_folder_structure, make_bdv_server_file
 from scripts.release_helper import add_version
@@ -12,14 +17,61 @@ from scripts.extension.registration import ApplyRegistrationLocal, ApplyRegistra
 from scripts.default_config import get_default_shebang


-def get_tags():
+REGION_NAMES = ('AllGlands',
+                'CrypticSegment',
+                'Glands',
+                'Head',
+                'PNS',
+                'Pygidium',
+                'RestOfAnimal',
+                'Stomodeum',
+                'VNC',
+                'ProSPr6-Ref')
+
+
+def get_tags(new_tag):
    tag = check_output(['git', 'describe', '--abbrev=0']).decode('utf-8').rstrip('\n')
-    new_tag = tag.split('.')
-    new_tag[-1] = str(int(new_tag[-1]) + 1)
-    new_tag = '.'.join(new_tag)
+    if new_tag is None:
+        new_tag = tag.split('.')
+        new_tag[-1] = str(int(new_tag[-1]) + 1)
+        new_tag = '.'.join(new_tag)
    return tag, new_tag


+def get_out_name(prefix, name):
+    name = os.path.split(name)[1]
+    name = os.path.splitext(name)[0]
+    name = name.split('--')[0]
+    # TODO split off further extensions here?
+    # name = name.split('-')[0]
+    if name in REGION_NAMES:
+        name = '-'.join([prefix, 'segmented', name])
+    elif name.startswith('edu'):  # edus are no meds
+        name = '-'.join([prefix, name])
+    else:
+        name = '-'.join([prefix, name, 'MED'])
+    return name
+
+
+def copy_file(in_path, out_path, resolution=[.55, .55, .55]):
+    if os.path.exists(out_path + '.xml'):
+        return
+    print("Copy", in_path, "to", out_path)
+    vol = np.asarray(imageio.volread(in_path + '-ch0.tif'))
+    downscale_factors = [[2, 2, 2], [2, 2, 2], [2, 2, 2]]
+    make_bdv(vol, out_path, downscale_factors,
+             unit='micrometer', resolution=resolution)
+
+
+def copy_to_h5(inputs, output_folder):
+    print("Copy tifs to bdv/hdf5 in", output_folder)
+    outputs = [os.path.join(output_folder, os.path.split(inp)[1]) for inp in inputs]
+    n_jobs = 48
+    with futures.ProcessPoolExecutor(n_jobs) as tp:
+        tasks = [tp.submit(copy_file, inp, outp) for inp, outp in zip(inputs, outputs)]
+        [t.result() for t in tasks]
+
+
 def apply_registration(input_folder, new_folder,
                       transformation_file, source_prefix,
                       target, max_jobs):
@@ -35,9 +87,13 @@ def apply_registration(input_folder, new_folder,
        raise RuntimeError("Did not find any files with prefix %s in %s" % (source_prefix,
                                                                            input_folder))

-    output_folder = os.path.join(new_folder, 'images')
-    # TODO parse names to get output names
-    output_names = []
+    # writing multiple hdf5 files in parallel with the elastix plugin is broken,
+    # so we write temporary files to tif instead and copy them to hdf5 with python
+
+    # output_folder = os.path.join(new_folder, 'images')
+    output_folder = os.path.join(tmp_folder, 'outputs')
+    os.makedirs(output_folder, exist_ok=True)
+    output_names = [get_out_name(source_prefix, name) for name in inputs]
    outputs = [os.path.join(output_folder, name) for name in output_names]

    # update the task config
@@ -50,30 +106,35 @@ def apply_registration(input_folder, new_folder,
    with open(os.path.join(config_dir, 'global.config'), 'w') as f:
        json.dump(global_config, f)

-    # TODO more time than 3 hrs?
    task_config = task.default_task_config()
-    task_config.update({'mem_limit': 16, 'time_limit': 180})
+    task_config.update({'mem_limit': 16, 'time_limit': 240, 'threads_per_job': 4})
    with open(os.path.join(config_dir, 'apply_registration.config'), 'w') as f:
        json.dump(task_config, f)

    # write path name files to json
    input_file = os.path.join(tmp_folder, 'input_files.json')
+    inputs = [os.path.abspath(inpath) for inpath in inputs]
    with open(input_file, 'w') as f:
        json.dump(inputs, f)
-    output_file = os.path.joout(tmp_folder, 'output_files.json')
+
+    output_file = os.path.join(tmp_folder, 'output_files.json')
+    outputs = [os.path.abspath(outpath) for outpath in outputs]
    with open(output_file, 'w') as f:
        json.dump(outputs, f)

    t = task(tmp_folder=tmp_folder, config_dir=config_dir, max_jobs=max_jobs,
             input_path_file=input_file, output_path_file=output_file,
-             transformation_file=transformation_file)
-
+             transformation_file=transformation_file, output_format='tif')
    ret = luigi.build([t], local_scheduler=True)
    if not ret:
        raise RuntimeError("Registration failed")

+    output_folder = os.path.join(new_folder, 'images')
+    copy_to_h5(outputs, output_folder)
+

-def update_regestration(transformation_file, input_folder, source_prefix, target, max_jobs):
+def update_regestration(transformation_file, input_folder, source_prefix, target, max_jobs,
+                        new_tag=None):
    """ Update the prospr segmentation.
    This is a special case of 'update_patch', that applies a new prospr registration.

@@ -83,8 +144,9 @@ def update_regestration(transformation_file, input_folder, source_prefix, target
        source_prefix [str] - prefix of the source data to apply the registration to
        target [str] - target of computation
        max_jobs [int] - max number of jobs for computation
+        new_tag [str] - new version tag (default: None)
    """
-    tag, new_tag = get_tags()
+    tag, new_tag = get_tags(new_tag)
    print("Updating platy browser from", tag, "to", new_tag)

    # make new folder structure
@@ -93,9 +155,10 @@ def update_regestration(transformation_file, input_folder, source_prefix, target
    make_folder_structure(new_folder)

    # copy the release folder
-    copy_release_folder(folder, new_folder)
+    copy_release_folder(folder, new_folder, exclude_prefixes=[source_prefix])

    # apply new registration to all files of the source prefix
+    transformation_file = os.path.abspath(transformation_file)
    apply_registration(input_folder, new_folder,
                       transformation_file, source_prefix,
                       target, max_jobs)
@@ -119,6 +182,12 @@ if __name__ == '__main__':
    parser.add_argument('--max_jobs', type=int, default=100,
                        help="Maximal number of jobs used for computation")

+    parser.add_argument('--new_tag', type=str, default='',
+                        help="New version tag")
+
    args = parser.parse_args()
+    new_tag = args.new_tag
+    new_tag = None if new_tag == '' else new_tag
+
    update_regestration(args.transformation_file, args.input_folder, args.source_prefix,
-                        args.target, args.max_jobs)
+                        args.target, args.max_jobs, new_tag)