diff --git a/scripts/extension/registration/apply_registration.py b/scripts/extension/registration/apply_registration.py index 7fa913a4ba4b930c1a42580a04eaea5f10dc4f2b..051bcab9dd2d8bb368242ec0015a582bf16cef62 100644 --- a/scripts/extension/registration/apply_registration.py +++ b/scripts/extension/registration/apply_registration.py @@ -17,6 +17,7 @@ class ApplyRegistrationBase(luigi.Task): """ default_fiji = '/g/arendt/EM_6dpf_segmentation/platy-browser-data/software/Fiji.app/ImageJ-linux64' default_elastix = '/g/arendt/EM_6dpf_segmentation/platy-browser-data/software/elastix_v4.8' + formats = ('bdv', 'tif') task_name = 'apply_registration' src_file = os.path.abspath(__file__) @@ -25,6 +26,7 @@ class ApplyRegistrationBase(luigi.Task): input_path_file = luigi.Parameter() output_path_file = luigi.Parameter() transformation_file = luigi.Parameter() + output_format = luigi.Parameter(default='bdv') fiji_executable = luigi.Parameter(default=default_fiji) elastix_directory = luigi.Parameter(default=default_elastix) dependency = luigi.TaskParameter(default=DummyTask()) @@ -42,13 +44,14 @@ class ApplyRegistrationBase(luigi.Task): with open(self.output_path_file) as f: outputs = json.load(f) - assert len(inputs) == len(outputs) + assert len(inputs) == len(outputs), "%i, %i" % (len(inputs), len(outputs)) assert all(os.path.exists(inp) for inp in inputs) n_files = len(inputs) assert os.path.exists(self.transformation_file) assert os.path.exists(self.fiji_executable) assert os.path.exists(self.elastix_directory) + assert self.output_format in (self.formats) # get the split of file-ids to the volume file_list = vu.blocks_in_volume((n_files,), (1,)) @@ -59,7 +62,7 @@ class ApplyRegistrationBase(luigi.Task): "transformation_file": self.transformation_file, "fiji_executable": self.fiji_executable, "elastix_directory": self.elastix_directory, - "tmp_folder": self.tmp_folder} + "tmp_folder": self.tmp_folder, 'output_format': self.output_format} # prime and run the jobs n_jobs = min(self.max_jobs, n_files) @@ -98,7 +101,8 @@ class ApplyRegistrationLSF(ApplyRegistrationBase, LSFTask): def apply_for_file(input_path, output_path, transformation_file, fiji_executable, - elastix_directory, tmp_folder, n_threads): + elastix_directory, tmp_folder, n_threads, + output_format): assert os.path.exists(elastix_directory) assert os.path.exists(tmp_folder) @@ -106,6 +110,13 @@ def apply_for_file(input_path, output_path, assert os.path.exists(transformation_file) assert os.path.exists(os.path.split(output_path)[0]) + if output_format == 'tif': + format_str = 'Save as Tiff' + elif output_format == 'bdv': + format_str = 'Save as BigDataViewer .xml/.h5' + else: + assert False, "Invalid output format %s" % output_format + # transformix arguments need to be passed as one string, # with individual arguments comma separated # the argument to transformaix needs to be one large comma separated string @@ -114,7 +125,7 @@ def apply_for_file(input_path, output_path, "inputImageFile=\'%s\'" % input_path, "transformationFile=\'%s\'" % transformation_file, "outputFile=\'%s\'" % output_path, - "outputModality=\'Save as BigDataViewer .xml/.h5\'", + "outputModality=\'%s\'" % format_str, "numThreads=\'%i\'" % n_threads] transformix_argument = ",".join(transformix_argument) transformix_argument = "\"%s\"" % transformix_argument @@ -175,6 +186,8 @@ def apply_registration(job_id, config_path): elastix_directory = config['elastix_directory'] tmp_folder = config['tmp_folder'] working_dir = os.path.join(tmp_folder, 'work_dir%i' % job_id) + output_format = config['output_format'] + os.makedirs(working_dir, exist_ok=True) file_list = config['block_list'] @@ -194,7 +207,8 @@ def apply_registration(job_id, config_path): fu.log("Output: %s" % outfile) apply_for_file(infile, outfile, transformation_file, fiji_executable, - elastix_directory, working_dir, n_threads) + elastix_directory, working_dir, n_threads, + output_format) fu.log_block_success(file_id) fu.log_job_success(job_id) diff --git a/scripts/files/copy_helper.py b/scripts/files/copy_helper.py index f9fba57dcc55c340aa04af20b1b3bc8c3c146deb..5c56fe9057e89481c52e5dfe250a75bc49954c21 100644 --- a/scripts/files/copy_helper.py +++ b/scripts/files/copy_helper.py @@ -51,11 +51,18 @@ def copy_segmentation(src_folder, dst_folder, name): os.symlink(rel_path, lut_out) -def copy_image_data(src_folder, dst_folder): +def copy_image_data(src_folder, dst_folder, exclude_prefixes=[]): # get all image names that need to be copied names = get_image_names() for name in names: + + prefix = name.split('-')[:4] + prefix = '-'.join(prefix) + + if prefix in exclude_prefixes: + continue + name += '.xml' in_path = os.path.join(src_folder, 'images', name) out_path = os.path.join(dst_folder, 'images', name) @@ -94,9 +101,9 @@ def copy_all_tables(src_folder, dst_folder): copy_tables(src_folder, dst_folder, name) -def copy_release_folder(src_folder, dst_folder): +def copy_release_folder(src_folder, dst_folder, exclude_prefixes=[]): # copy static image and misc data - copy_image_data(src_folder, dst_folder) + copy_image_data(src_folder, dst_folder, exclude_prefixes) copy_misc_data(src_folder, dst_folder) copy_segmentations(src_folder, dst_folder) copy_all_tables(src_folder, dst_folder) diff --git a/update_regestration.py b/update_regestration.py index d0146c9e175333d2df98d4649aee985d9462ca77..605b0e16cd9e287adbac5715f5ce3eb7eb90ff04 100755 --- a/update_regestration.py +++ b/update_regestration.py @@ -4,7 +4,12 @@ import os import json import argparse from subprocess import check_output +from concurrent import futures + +import imageio import luigi +import numpy as np +from pybdv import make_bdv from scripts.files import copy_release_folder, make_folder_structure, make_bdv_server_file from scripts.release_helper import add_version @@ -12,14 +17,61 @@ from scripts.extension.registration import ApplyRegistrationLocal, ApplyRegistra from scripts.default_config import get_default_shebang -def get_tags(): +REGION_NAMES = ('AllGlands', + 'CrypticSegment', + 'Glands', + 'Head', + 'PNS', + 'Pygidium', + 'RestOfAnimal', + 'Stomodeum', + 'VNC', + 'ProSPr6-Ref') + + +def get_tags(new_tag): tag = check_output(['git', 'describe', '--abbrev=0']).decode('utf-8').rstrip('\n') - new_tag = tag.split('.') - new_tag[-1] = str(int(new_tag[-1]) + 1) - new_tag = '.'.join(new_tag) + if new_tag is None: + new_tag = tag.split('.') + new_tag[-1] = str(int(new_tag[-1]) + 1) + new_tag = '.'.join(new_tag) return tag, new_tag +def get_out_name(prefix, name): + name = os.path.split(name)[1] + name = os.path.splitext(name)[0] + name = name.split('--')[0] + # TODO split off further extensions here? + # name = name.split('-')[0] + if name in REGION_NAMES: + name = '-'.join([prefix, 'segmented', name]) + elif name.startswith('edu'): # edus are no meds + name = '-'.join([prefix, name]) + else: + name = '-'.join([prefix, name, 'MED']) + return name + + +def copy_file(in_path, out_path, resolution=[.55, .55, .55]): + if os.path.exists(out_path + '.xml'): + return + print("Copy", in_path, "to", out_path) + vol = np.asarray(imageio.volread(in_path + '-ch0.tif')) + downscale_factors = [[2, 2, 2], [2, 2, 2], [2, 2, 2]] + make_bdv(vol, out_path, downscale_factors, + unit='micrometer', resolution=resolution) + + +def copy_to_h5(inputs, output_folder): + print("Copy tifs to bdv/hdf5 in", output_folder) + outputs = [os.path.join(output_folder, os.path.split(inp)[1]) for inp in inputs] + n_jobs = 48 + with futures.ProcessPoolExecutor(n_jobs) as tp: + tasks = [tp.submit(copy_file, inp, outp) for inp, outp in zip(inputs, outputs)] + [t.result() for t in tasks] + + def apply_registration(input_folder, new_folder, transformation_file, source_prefix, target, max_jobs): @@ -35,9 +87,13 @@ def apply_registration(input_folder, new_folder, raise RuntimeError("Did not find any files with prefix %s in %s" % (source_prefix, input_folder)) - output_folder = os.path.join(new_folder, 'images') - # TODO parse names to get output names - output_names = [] + # writing multiple hdf5 files in parallel with the elastix plugin is broken, + # so we write temporary files to tif instead and copy them to hdf5 with python + + # output_folder = os.path.join(new_folder, 'images') + output_folder = os.path.join(tmp_folder, 'outputs') + os.makedirs(output_folder, exist_ok=True) + output_names = [get_out_name(source_prefix, name) for name in inputs] outputs = [os.path.join(output_folder, name) for name in output_names] # update the task config @@ -50,30 +106,35 @@ def apply_registration(input_folder, new_folder, with open(os.path.join(config_dir, 'global.config'), 'w') as f: json.dump(global_config, f) - # TODO more time than 3 hrs? task_config = task.default_task_config() - task_config.update({'mem_limit': 16, 'time_limit': 180}) + task_config.update({'mem_limit': 16, 'time_limit': 240, 'threads_per_job': 4}) with open(os.path.join(config_dir, 'apply_registration.config'), 'w') as f: json.dump(task_config, f) # write path name files to json input_file = os.path.join(tmp_folder, 'input_files.json') + inputs = [os.path.abspath(inpath) for inpath in inputs] with open(input_file, 'w') as f: json.dump(inputs, f) - output_file = os.path.joout(tmp_folder, 'output_files.json') + + output_file = os.path.join(tmp_folder, 'output_files.json') + outputs = [os.path.abspath(outpath) for outpath in outputs] with open(output_file, 'w') as f: json.dump(outputs, f) t = task(tmp_folder=tmp_folder, config_dir=config_dir, max_jobs=max_jobs, input_path_file=input_file, output_path_file=output_file, - transformation_file=transformation_file) - + transformation_file=transformation_file, output_format='tif') ret = luigi.build([t], local_scheduler=True) if not ret: raise RuntimeError("Registration failed") + output_folder = os.path.join(new_folder, 'images') + copy_to_h5(outputs, output_folder) + -def update_regestration(transformation_file, input_folder, source_prefix, target, max_jobs): +def update_regestration(transformation_file, input_folder, source_prefix, target, max_jobs, + new_tag=None): """ Update the prospr segmentation. This is a special case of 'update_patch', that applies a new prospr registration. @@ -83,8 +144,9 @@ def update_regestration(transformation_file, input_folder, source_prefix, target source_prefix [str] - prefix of the source data to apply the registration to target [str] - target of computation max_jobs [int] - max number of jobs for computation + new_tag [str] - new version tag (default: None) """ - tag, new_tag = get_tags() + tag, new_tag = get_tags(new_tag) print("Updating platy browser from", tag, "to", new_tag) # make new folder structure @@ -93,9 +155,10 @@ def update_regestration(transformation_file, input_folder, source_prefix, target make_folder_structure(new_folder) # copy the release folder - copy_release_folder(folder, new_folder) + copy_release_folder(folder, new_folder, exclude_prefixes=[source_prefix]) # apply new registration to all files of the source prefix + transformation_file = os.path.abspath(transformation_file) apply_registration(input_folder, new_folder, transformation_file, source_prefix, target, max_jobs) @@ -119,6 +182,12 @@ if __name__ == '__main__': parser.add_argument('--max_jobs', type=int, default=100, help="Maximal number of jobs used for computation") + parser.add_argument('--new_tag', type=str, default='', + help="New version tag") + args = parser.parse_args() + new_tag = args.new_tag + new_tag = None if new_tag == '' else new_tag + update_regestration(args.transformation_file, args.input_folder, args.source_prefix, - args.target, args.max_jobs) + args.target, args.max_jobs, new_tag)