diff --git a/analysis/correct_intensities.py b/analysis/correct_intensities.py index 477ac7e7eccc1ae18e81c94fef8779f3b649779b..3fbe83959120051eb92710d0206381aaa337e40e 100644 --- a/analysis/correct_intensities.py +++ b/analysis/correct_intensities.py @@ -1,8 +1,11 @@ #! /g/arendt/EM_6dpf_segmentation/platy-browser-data/software/conda/miniconda3/envs/platybrowser/bin/python import os +import json +from concurrent import futures + import numpy as np -import h5py import z5py +import h5py import vigra from scipy.ndimage.morphology import binary_dilation @@ -75,6 +78,41 @@ def correct_intensities(target='slurm', max_jobs=250): tmp_path=tmp_path) +def check_chunks(): + import nifty.tools as nt + path = '/g/kreshuk/pape/Work/platy_tmp.n5' + key = 'data' + f = z5py.File(path, 'r') + ds = f[key] + + shape = ds.shape + chunks = ds.chunks + blocking = nt.blocking([0, 0, 0], shape, chunks) + + def check_chunk(block_id): + print("Check", block_id, "/", blocking.numberOfBlocks) + block = blocking.getBlock(block_id) + chunk_id = tuple(beg // ch for beg, ch in zip(block.begin, chunks)) + try: + ds.read_chunk(chunk_id) + except RuntimeError: + print("Failed:", chunk_id) + return chunk_id + + print("Start checking", blocking.numberOfBlocks, "blocks") + with futures.ThreadPoolExecutor(32) as tp: + tasks = [tp.submit(check_chunk, block_id) for block_id in range(blocking.numberOfBlocks)] + results = [t.result() for t in tasks] + + results = [res for res in results if res is not None] + print() + print(results) + print() + with open('./failed_chunks.json', 'w') as f: + json.dump(results, f) + + if __name__ == '__main__': # combine_mask() - correct_intensities('local', 64) + correct_intensities('slurm', 125) + # check_chunks() diff --git a/analysis/extent.py b/analysis/extent.py new file mode 100644 index 0000000000000000000000000000000000000000..8a8c7fc6fb6c03d1e5a08c2a34e0b1ea128006a4 --- /dev/null +++ b/analysis/extent.py @@ -0,0 +1,38 @@ +import h5py +import numpy as np + + +def number_of_voxels(): + p = '../data/rawdata/sbem-6dpf-1-whole-raw.h5' + with h5py.File(p, 'r') as f: + ds = f['t00000/s00/0/cells'] + shape = ds.shape + n_vox = np.prod(list(shape)) + print("Number of voxel:") + print(n_vox) + print("corresponds to") + print(float(n_vox) / 1e12, "TVoxel") + + +def animal(): + p = '../data/rawdata/sbem-6dpf-1-whole-mask-inside.h5' + with h5py.File(p, 'r') as f: + mask = f['t00000/s00/0/cells'][:] + + bb = np.where(mask > 0) + mins = [b.min() for b in bb] + maxs = [b.max() for b in bb] + size = [ma - mi for mi, ma in zip(mins, maxs)] + + print("Animal size in pixel:") + print(size) + res = [.4, .32, .32] + + size = [si * re for si, re in zip(size, res)] + print("Animal size in micron:") + print(size) + + +if __name__ == '__main__': + number_of_voxels() + animal() diff --git a/scripts/transformation/intensity_correction.py b/scripts/transformation/intensity_correction.py index 8ab1666606740c0b76a396a3b22f2aece7e9a6fc..be30d54618ce4dc52577afa77661917c77a692fe 100644 --- a/scripts/transformation/intensity_correction.py +++ b/scripts/transformation/intensity_correction.py @@ -50,7 +50,7 @@ def downsample(ref_path, in_path, in_key, out_path, resolution, config_dir = os.path.join(tmp_folder, 'configs') task = DownscalingWorkflow config = task.get_config()['downscaling'] - config.update({'library': 'skimage', 'time_limit': 240, 'mem_limit': 4}) + config.update({'library': 'skimage', 'time_limit': 360, 'mem_limit': 8}) with open(os.path.join(config_dir, 'downscaling.config'), 'w') as f: json.dump(config, f) @@ -104,7 +104,7 @@ def intensity_correction(in_path, out_path, mask_path, mask_key, task = LinearTransformationWorkflow conf = task.get_config()['linear'] - conf.update({'time_limit': 600, 'mem_limit': 4}) + conf.update({'time_limit': 360, 'mem_limit': 8}) with open(os.path.join(config_dir, 'linear.config'), 'w') as f: json.dump(conf, f) diff --git a/scripts/util.py b/scripts/util.py new file mode 100644 index 0000000000000000000000000000000000000000..9114d76d6f1fb248d7b749195a1d927362b51a90 --- /dev/null +++ b/scripts/util.py @@ -0,0 +1,9 @@ +import h5py # TODO exchange with elf.io.open_file + + +def add_max_id(path, key): + with h5py.File(path) as f: + ds = f[key] + data = ds[:] + max_id = int(data.max()) + ds.attrs['maxId'] = max_id diff --git a/update_registration.py b/update_registration.py index 56ca9c81c61d0b0d4bef32b72cfc42c40faed1de..4ecc8d8059b5d6d9ce22c546d912dbd2c0ca9a86 100755 --- a/update_registration.py +++ b/update_registration.py @@ -17,7 +17,9 @@ from scripts.files.xml_utils import get_h5_path_from_xml, write_simple_xml from scripts.release_helper import add_version from scripts.extension.registration import ApplyRegistrationLocal, ApplyRegistrationSlurm from scripts.default_config import get_default_shebang +from scripts.attributes.base_attributes import base_attributes from scripts.attributes.genes import create_auxiliary_gene_file, write_genes_table +from scripts.util import add_max_id REGION_NAMES = ('AllGlands', @@ -74,29 +76,20 @@ def copy_to_h5(inputs, output_folder): [t.result() for t in tasks] -def apply_registration(input_folder, new_folder, - transformation_file, source_prefix, - target, max_jobs, name_parser): +def registration_impl(inputs, outputs, transformation_file, output_folder, + tmp_folder, target, max_jobs, interpolation, dtype='unsigned char'): task = ApplyRegistrationSlurm if target == 'slurm' else ApplyRegistrationLocal - tmp_folder = './tmp_registration' - os.makedirs(tmp_folder, exist_ok=True) - - # find all input files - names = os.listdir(input_folder) - inputs = [os.path.join(input_folder, name) for name in names] - - if len(inputs) == 0: - raise RuntimeError("Did not find any files with prefix %s in %s" % (source_prefix, - input_folder)) - # writing multiple hdf5 files in parallel with the elastix plugin is broken, - # so we write temporary files to tif instead and copy them to hdf5 with python + # write path name files to json + input_file = os.path.join(tmp_folder, 'input_files.json') + inputs = [os.path.abspath(inpath) for inpath in inputs] + with open(input_file, 'w') as f: + json.dump(inputs, f) - # output_folder = os.path.join(new_folder, 'images') - output_folder = os.path.join(tmp_folder, 'outputs') - os.makedirs(output_folder, exist_ok=True) - output_names = [name_parser(source_prefix, name) for name in inputs] - outputs = [os.path.join(output_folder, name) for name in output_names] + output_file = os.path.join(tmp_folder, 'output_files.json') + outputs = [os.path.abspath(outpath) for outpath in outputs] + with open(output_file, 'w') as f: + json.dump(outputs, f) # update the task config config_dir = os.path.join(tmp_folder, 'configs') @@ -109,24 +102,11 @@ def apply_registration(input_folder, new_folder, json.dump(global_config, f) task_config = task.default_task_config() - task_config.update({'mem_limit': 16, 'time_limit': 240, 'threads_per_job': 4}) + task_config.update({'mem_limit': 16, 'time_limit': 240, 'threads_per_job': 4, + 'ResultImagePixelType': dtype}) with open(os.path.join(config_dir, 'apply_registration.config'), 'w') as f: json.dump(task_config, f) - # write path name files to json - input_file = os.path.join(tmp_folder, 'input_files.json') - inputs = [os.path.abspath(inpath) for inpath in inputs] - with open(input_file, 'w') as f: - json.dump(inputs, f) - - output_file = os.path.join(tmp_folder, 'output_files.json') - outputs = [os.path.abspath(outpath) for outpath in outputs] - with open(output_file, 'w') as f: - json.dump(outputs, f) - - # once we have other sources that are registered to the em space, - # we should expose the interpolation mode - interpolation = 'nearest' t = task(tmp_folder=tmp_folder, config_dir=config_dir, max_jobs=max_jobs, input_path_file=input_file, output_path_file=output_file, transformation_file=transformation_file, output_format='tif', @@ -135,13 +115,42 @@ def apply_registration(input_folder, new_folder, if not ret: raise RuntimeError("Registration failed") - output_folder = os.path.join(new_folder, 'images') copy_to_h5(outputs, output_folder) -def update_prospr(new_folder, target): +# TODO expose interpolation mode +def apply_registration(input_folder, new_folder, + transformation_file, source_prefix, + target, max_jobs, name_parser): + tmp_folder = './tmp_registration' + os.makedirs(tmp_folder, exist_ok=True) + + # find all input files + names = os.listdir(input_folder) + inputs = [os.path.join(input_folder, name) for name in names] + # we ignore subfolders, because we might have some special volumes there + # (e.g. virtual cells for prospr, which are treated as segmentation) + inputs = [inp for inp in inputs if not os.path.isdir(inp)] + + if len(inputs) == 0: + raise RuntimeError("Did not find any files with prefix %s in %s" % (source_prefix, + input_folder)) + + # writing multiple hdf5 files in parallel with the elastix plugin is broken, + # so we write temporary files to tif instead and copy them to hdf5 with python + output_folder = os.path.join(tmp_folder, 'outputs') + os.makedirs(output_folder, exist_ok=True) + output_names = [name_parser(source_prefix, name) for name in inputs] + outputs = [os.path.join(output_folder, name) for name in output_names] + + output_folder = os.path.join(new_folder, 'images') + registration_impl(inputs, outputs, transformation_file, output_folder, + tmp_folder, target, max_jobs, interpolation='nearest') + + +def update_prospr(new_folder, input_folder, transformation_file, target, max_jobs): - # update the auxiliaty gene volume + # # update the auxiliaty gene volume image_folder = os.path.join(new_folder, 'images') aux_out_path = os.path.join(new_folder, 'misc', 'prospr-6dpf-1-whole_meds_all_genes.h5') create_auxiliary_gene_file(image_folder, aux_out_path) @@ -170,7 +179,33 @@ def update_prospr(new_folder, target): write_genes_table(seg_path, aux_out_path, out_path, labels, tmp_folder, target) - # TODO we should register the virtual cells here as well + # register virtual cells + vc_name = 'prospr-6dpf-1-whole-virtual-cells-labels' + vc_path = os.path.join(input_folder, 'virtual_cells', 'virtual_cells--prosprspaceMEDs.tif') + inputs = [vc_path] + outputs = [os.path.join(tmp_folder, 'outputs', vc_name)] + output_folder = os.path.join(new_folder, 'segmentations') + registration_impl(inputs, outputs, transformation_file, output_folder, + tmp_folder, target, max_jobs, interpolation='nearest', + dtype='unsigned short') + + # compute the table for the virtual cells + vc_table_folder = os.path.join(new_folder, 'tables', vc_name) + os.makedirs(vc_table_folder, exist_ok=True) + vc_table = os.path.join(vc_table_folder, 'default.csv') + if os.path.exists(vc_table): + assert os.path.islink(vc_table), vc_table + print("Remove link to previous gene table:", vc_table) + os.unlink(vc_table) + vc_path = os.path.join(new_folder, 'segmentations', vc_name + '.h5') + key = 't00000/s00/0/cells' + add_max_id(vc_path, key) + + assert os.path.exists(vc_path), vc_path + resolution = [.55, .55, .55] + base_attributes(vc_path, key, vc_table, resolution, + tmp_folder, target, max_jobs, + correct_anchors=False) # we should encode the source prefix and the transformation file to be used @@ -215,7 +250,7 @@ def update_registration(transformation_file, input_folder, source_prefix, target target, max_jobs, name_parser) if source_prefix == "prospr-6dpf-1-whole": - update_prospr(new_folder, target) + update_prospr(new_folder, input_folder, transformation_file, target, max_jobs) else: raise NotImplementedError