Register virtual-cells with prospr

a6765622 · Constantin Pape · 0570f294 · a6765622 · a6765622 · a6765622
Commit a6765622 authored 5 years ago by Constantin Pape
--- a/analysis/correct_intensities.py
+++ b/analysis/correct_intensities.py
 #! /g/arendt/EM_6dpf_segmentation/platy-browser-data/software/conda/miniconda3/envs/platybrowser/bin/python
 import os
+import json
+from concurrent import futures
 import numpy as np
-import h5py
 import z5py
+import h5py
 import vigra
 from scipy.ndimage.morphology import binary_dilation
@@ -75,6 +78,41 @@ def correct_intensities(target='slurm', max_jobs=250):
                         tmp_path=tmp_path)
+def check_chunks():
+    import nifty.tools as nt
+    path = '/g/kreshuk/pape/Work/platy_tmp.n5'
+    key = 'data'
+    f = z5py.File(path, 'r')
+    ds = f[key]
+    shape = ds.shape
+    chunks = ds.chunks
+    blocking = nt.blocking([0, 0, 0], shape, chunks)
+    def check_chunk(block_id):
+        print("Check", block_id, "/", blocking.numberOfBlocks)
+        block = blocking.getBlock(block_id)
+        chunk_id = tuple(beg // ch for beg, ch in zip(block.begin, chunks))
+        try:
+            ds.read_chunk(chunk_id)
+        except RuntimeError:
+            print("Failed:", chunk_id)
+            return chunk_id
+    print("Start checking", blocking.numberOfBlocks, "blocks")
+    with futures.ThreadPoolExecutor(32) as tp:
+        tasks = [tp.submit(check_chunk, block_id) for block_id in range(blocking.numberOfBlocks)]
+        results = [t.result() for t in tasks]
+    results = [res for res in results if res is not None]
+    print()
+    print(results)
+    print()
+    with open('./failed_chunks.json', 'w') as f:
+        json.dump(results, f)
 if __name__ == '__main__':
    # combine_mask()
-    correct_intensities('local', 64)
+    correct_intensities('slurm', 125)
+    # check_chunks()
--- a/analysis/extent.py
+++ b/analysis/extent.py
+import h5py
+import numpy as np
+def number_of_voxels():
+    p = '../data/rawdata/sbem-6dpf-1-whole-raw.h5'
+    with h5py.File(p, 'r') as f:
+        ds = f['t00000/s00/0/cells']
+        shape = ds.shape
+    n_vox = np.prod(list(shape))
+    print("Number of voxel:")
+    print(n_vox)
+    print("corresponds to")
+    print(float(n_vox) / 1e12, "TVoxel")
+def animal():
+    p = '../data/rawdata/sbem-6dpf-1-whole-mask-inside.h5'
+    with h5py.File(p, 'r') as f:
+        mask = f['t00000/s00/0/cells'][:]
+    bb = np.where(mask > 0)
+    mins = [b.min() for b in bb]
+    maxs = [b.max() for b in bb]
+    size = [ma - mi for mi, ma in zip(mins, maxs)]
+    print("Animal size in pixel:")
+    print(size)
+    res = [.4, .32, .32]
+    size = [si * re for si, re in zip(size, res)]
+    print("Animal size in micron:")
+    print(size)
+if __name__ == '__main__':
+    number_of_voxels()
+    animal()
--- a/scripts/transformation/intensity_correction.py
+++ b/scripts/transformation/intensity_correction.py
@@ -50,7 +50,7 @@ def downsample(ref_path, in_path, in_key, out_path, resolution,
    config_dir = os.path.join(tmp_folder, 'configs')
    task = DownscalingWorkflow
    config = task.get_config()['downscaling']
-    config.update({'library': 'skimage', 'time_limit': 240, 'mem_limit': 4})
+    config.update({'library': 'skimage', 'time_limit': 360, 'mem_limit': 8})
    with open(os.path.join(config_dir, 'downscaling.config'), 'w') as f:
        json.dump(config, f)
@@ -104,7 +104,7 @@ def intensity_correction(in_path, out_path, mask_path, mask_key,
    task = LinearTransformationWorkflow
    conf = task.get_config()['linear']
-    conf.update({'time_limit': 600, 'mem_limit': 4})
+    conf.update({'time_limit': 360, 'mem_limit': 8})
    with open(os.path.join(config_dir, 'linear.config'), 'w') as f:
        json.dump(conf, f)

--- a/scripts/util.py
+++ b/scripts/util.py
+import h5py  # TODO exchange with elf.io.open_file
+def add_max_id(path, key):
+    with h5py.File(path) as f:
+        ds = f[key]
+        data = ds[:]
+        max_id = int(data.max())
+        ds.attrs['maxId'] = max_id
--- a/update_registration.py
+++ b/update_registration.py
@@ -17,7 +17,9 @@ from scripts.files.xml_utils import get_h5_path_from_xml, write_simple_xml
 from scripts.release_helper import add_version
 from scripts.extension.registration import ApplyRegistrationLocal, ApplyRegistrationSlurm
 from scripts.default_config import get_default_shebang
+from scripts.attributes.base_attributes import base_attributes
 from scripts.attributes.genes import create_auxiliary_gene_file, write_genes_table
+from scripts.util import add_max_id
 REGION_NAMES = ('AllGlands',
@@ -74,29 +76,20 @@ def copy_to_h5(inputs, output_folder):
        [t.result() for t in tasks]
-def apply_registration(input_folder, new_folder,
+def registration_impl(inputs, outputs, transformation_file, output_folder,
-                       transformation_file, source_prefix,
+                      tmp_folder, target, max_jobs, interpolation, dtype='unsigned char'):
-                       target, max_jobs, name_parser):
    task = ApplyRegistrationSlurm if target == 'slurm' else ApplyRegistrationLocal
-    tmp_folder = './tmp_registration'
-    os.makedirs(tmp_folder, exist_ok=True)
-    # find all input files
-    names = os.listdir(input_folder)
-    inputs = [os.path.join(input_folder, name) for name in names]
-    if len(inputs) == 0:
-        raise RuntimeError("Did not find any files with prefix %s in %s" % (source_prefix,
-                                                                            input_folder))
-    # writing multiple hdf5 files in parallel with the elastix plugin is broken,
+    # write path name files to json
-    # so we write temporary files to tif instead and copy them to hdf5 with python
+    input_file = os.path.join(tmp_folder, 'input_files.json')
+    inputs = [os.path.abspath(inpath) for inpath in inputs]
+    with open(input_file, 'w') as f:
+        json.dump(inputs, f)
-    # output_folder = os.path.join(new_folder, 'images')
+    output_file = os.path.join(tmp_folder, 'output_files.json')
-    output_folder = os.path.join(tmp_folder, 'outputs')
+    outputs = [os.path.abspath(outpath) for outpath in outputs]
-    os.makedirs(output_folder, exist_ok=True)
+    with open(output_file, 'w') as f:
-    output_names = [name_parser(source_prefix, name) for name in inputs]
+        json.dump(outputs, f)
-    outputs = [os.path.join(output_folder, name) for name in output_names]
    # update the task config
    config_dir = os.path.join(tmp_folder, 'configs')
@@ -109,24 +102,11 @@ def apply_registration(input_folder, new_folder,
        json.dump(global_config, f)
    task_config = task.default_task_config()
-    task_config.update({'mem_limit': 16, 'time_limit': 240, 'threads_per_job': 4})
+    task_config.update({'mem_limit': 16, 'time_limit': 240, 'threads_per_job': 4,
+                        'ResultImagePixelType': dtype})
    with open(os.path.join(config_dir, 'apply_registration.config'), 'w') as f:
        json.dump(task_config, f)
-    # write path name files to json
-    input_file = os.path.join(tmp_folder, 'input_files.json')
-    inputs = [os.path.abspath(inpath) for inpath in inputs]
-    with open(input_file, 'w') as f:
-        json.dump(inputs, f)
-    output_file = os.path.join(tmp_folder, 'output_files.json')
-    outputs = [os.path.abspath(outpath) for outpath in outputs]
-    with open(output_file, 'w') as f:
-        json.dump(outputs, f)
-    # once we have other sources that are registered to the em space,
-    # we should expose the interpolation mode
-    interpolation = 'nearest'
    t = task(tmp_folder=tmp_folder, config_dir=config_dir, max_jobs=max_jobs,
             input_path_file=input_file, output_path_file=output_file,
             transformation_file=transformation_file, output_format='tif',
@@ -135,13 +115,42 @@ def apply_registration(input_folder, new_folder,
    if not ret:
        raise RuntimeError("Registration failed")
-    output_folder = os.path.join(new_folder, 'images')
    copy_to_h5(outputs, output_folder)
-def update_prospr(new_folder, target):
+# TODO expose interpolation mode
+def apply_registration(input_folder, new_folder,
+                       transformation_file, source_prefix,
+                       target, max_jobs, name_parser):
+    tmp_folder = './tmp_registration'
+    os.makedirs(tmp_folder, exist_ok=True)
+    # find all input files
+    names = os.listdir(input_folder)
+    inputs = [os.path.join(input_folder, name) for name in names]
+    # we ignore subfolders, because we might have some special volumes there
+    # (e.g. virtual cells for prospr, which are treated as segmentation)
+    inputs = [inp for inp in inputs if not os.path.isdir(inp)]
+    if len(inputs) == 0:
+        raise RuntimeError("Did not find any files with prefix %s in %s" % (source_prefix,
+                                                                            input_folder))
+    # writing multiple hdf5 files in parallel with the elastix plugin is broken,
+    # so we write temporary files to tif instead and copy them to hdf5 with python
+    output_folder = os.path.join(tmp_folder, 'outputs')
+    os.makedirs(output_folder, exist_ok=True)
+    output_names = [name_parser(source_prefix, name) for name in inputs]
+    outputs = [os.path.join(output_folder, name) for name in output_names]
+    output_folder = os.path.join(new_folder, 'images')
+    registration_impl(inputs, outputs, transformation_file, output_folder,
+                      tmp_folder, target, max_jobs, interpolation='nearest')
+def update_prospr(new_folder, input_folder, transformation_file, target, max_jobs):
-    # update the auxiliaty gene volume
+    # # update the auxiliaty gene volume
    image_folder = os.path.join(new_folder, 'images')
    aux_out_path = os.path.join(new_folder, 'misc', 'prospr-6dpf-1-whole_meds_all_genes.h5')
    create_auxiliary_gene_file(image_folder, aux_out_path)
@@ -170,7 +179,33 @@ def update_prospr(new_folder, target):
    write_genes_table(seg_path, aux_out_path, out_path,
                      labels, tmp_folder, target)
-    # TODO we should register the virtual cells here as well
+    # register virtual cells
+    vc_name = 'prospr-6dpf-1-whole-virtual-cells-labels'
+    vc_path = os.path.join(input_folder, 'virtual_cells', 'virtual_cells--prosprspaceMEDs.tif')
+    inputs = [vc_path]
+    outputs = [os.path.join(tmp_folder, 'outputs', vc_name)]
+    output_folder = os.path.join(new_folder, 'segmentations')
+    registration_impl(inputs, outputs, transformation_file, output_folder,
+                      tmp_folder, target, max_jobs, interpolation='nearest',
+                      dtype='unsigned short')
+    # compute the table for the virtual cells
+    vc_table_folder = os.path.join(new_folder, 'tables', vc_name)
+    os.makedirs(vc_table_folder, exist_ok=True)
+    vc_table = os.path.join(vc_table_folder, 'default.csv')
+    if os.path.exists(vc_table):
+        assert os.path.islink(vc_table), vc_table
+        print("Remove link to previous gene table:", vc_table)
+        os.unlink(vc_table)
+    vc_path = os.path.join(new_folder, 'segmentations', vc_name + '.h5')
+    key = 't00000/s00/0/cells'
+    add_max_id(vc_path, key)
+    assert os.path.exists(vc_path), vc_path
+    resolution = [.55, .55, .55]
+    base_attributes(vc_path, key, vc_table, resolution,
+                    tmp_folder, target, max_jobs,
+                    correct_anchors=False)
 # we should encode the source prefix and the transformation file to be used
@@ -215,7 +250,7 @@ def update_registration(transformation_file, input_folder, source_prefix, target
                       target, max_jobs, name_parser)
    if source_prefix == "prospr-6dpf-1-whole":
-        update_prospr(new_folder, target)
+        update_prospr(new_folder, input_folder, transformation_file, target, max_jobs)
    else:
        raise NotImplementedError