Working version of subvolume extraction

7fe70128 · Constantin Pape · 3bbef304 · 7fe70128 · 7fe70128
Commit 7fe70128 authored 5 years ago by Constantin Pape
--- a/extract_from_platybrowser.py
+++ b/extract_from_platybrowser.py
-from scripts.export.extract_subvolume import make_cutout
+import argparse
+import os
+import pandas as pd
+from scripts.export.extract_subvolume import make_cutout, get_res_level, parse_coordinate, name_to_path


+def get_bounding_box(tag, lower, upper, table_name, table_id):
+
+    # validate the inputs
+    have_coordinates = lower and upper
+    have_table = table_name and table_id is not None
+
+    assert have_coordinates != have_table, "Need one of coordinates or table"
+    if have_coordinates:
+        print("Converting coordinates from physical coordinates %s:%s" % (lower,
+                                                                          upper))
+        bb_start = parse_coordinate(lower)
+        bb_stop = parse_coordinate(upper)
+        print("to pixel coordinates %s:%s" % (str(bb_start),
+                                              str(bb_stop)))
+    else:
+        name = os.path.splitext(os.path.split(name_to_path(table_name))[1])[0]
+        table_path = os.path.join('data', tag, 'tables', name, 'base.csv')
+        assert os.path.exists(table_path), "Could not find table for %s at %s" % (table_name,
+                                                                                  table_path)
+        table = pd.read_csv(table_path, sep='\t')
+        sub_table = table.loc[table['label_id'] == table_id]
+        assert sub_table.shape[0] == 1
+        for row in sub_table.itertuples(index=False):
+            bb_start = [row.bb_min_x, row.bb_min_y, row.bb_min_z]
+            bb_stop = [row.bb_max_x, row.bb_max_y, row.bb_max_z]
+        print("Read bounding box from table %s for id %i:" % (table_name, table_id))
+        print(bb_start, ":", bb_stop)
+    return bb_start, bb_stop
+
+
+# TODO
+# - expose out_format as argument
+# - saving multiple names in one go currently only works for h5 and n5, fix this!
 if __name__ == '__main__':
    levels = get_res_level()
    res_string = " ".join("%i: %s" % (lev, res) for lev, res in enumerate(levels))
@@ -8,21 +44,49 @@ if __name__ == '__main__':
                  "The levels range from 0 to 6 with resolutions in micrometer: "
                  "%s" % res_string)

-    parser = argparse.ArgumentParser("Make a cutout from the platynereis EM data-set and save it as tif stack.")
+    parser = argparse.ArgumentParser("Make a cutout from the platynereis EM data-set.")
+    # Mandatory arguments: which tag and scale
+    parser.add_argument("tag", type=str, help="Version tag of the data to extract from")
    parser.add_argument("scale_level", type=int, help=scale_help)
-    parser.add_argument("lower_corner", type=str, help="Lower corner of the bounding box to cut out")
-    parser.add_argument("upper_corner", type=str, help="Upper corner of the bounding box to cut out")
-    parser.add_argument("save_file", type=str, help="Where to save the cutout.")
+
+    # Optional: data names
+    parser.add_argument("--names", type=str, nargs='+', default=['raw'],
+                        help=("Names of the data to be extracted.\
+                              Choose from 'raw', 'cells', 'nuclei', 'chromatin', 'cilia'"))
+
+    # Optional: select bounding box from platy browser coordinates
+    parser.add_argument("--lower_corner", type=str, default='',
+                        help="Lower corner of the bounding box to cut out",)
+    parser.add_argument("--upper_corner", type=str, default='',
+                        help="Upper corner of the bounding box to cut out")
+
+    # Optional: Select bounding box from table and id
+    parser.add_argument('--table_name', type=str, default='',
+                        help="Table from where to read the bounding box, can be from the same option as names")
+    parser.add_argument('--table_id', type=int, default=None,
+                        help="Id for bounding box")
+
+    # Optional: select where to save the data
+    parser.add_argument("--save_file", type=str, help="Where to save the cutout.",
+                        default='')
    args = parser.parse_args()

-    print("Converting coordinates from physical coordinates %s:%s" % (args.lower_corner,
-                                                                      args.upper_corner))
-    bb_start = parse_coordinate(args.lower_corner)
-    bb_stop = parse_coordinate(args.upper_corner)
-    print("to pixel coordinates %s:%s" % (str(bb_start),
-                                          str(bb_stop)))
-
-    print("Extracting raw data")
-    raw = make_cutout(args.scale_level, bb_start, bb_stop)
-    print("Saving data to tif-stack %s" % args.save_file)
-    save_tif(raw, args.save_file)
+    # 1.) validate the tag
+    tag = args.tag
+    folder = os.path.join('data', tag)
+    assert os.path.exists(folder), "Invalid tag %s" % tag
+
+    # 2.) figure out the bounding box
+    lower = args.lower_corner
+    upper = args.upper_corner
+    table_name = args.table_name
+    table_id = args.table_id
+    bb_start, bb_stop = get_bounding_box(tag, lower, upper, table_name, table_id)
+
+    # 3.) extract data for all names
+    names = args.names
+    save_file = args.save_file
+    scale = args.scale_level
+    for name in names:
+        print("Extracting data for %s" % name)
+        make_cutout(tag, name, scale, bb_start, bb_stop, save_file)
--- a/scripts/export/extract_subvolume.py
+++ b/scripts/export/extract_subvolume.py
-import argparse
 import os
 import h5py
 import imageio
@@ -6,6 +5,17 @@ import imageio
 from ..files.xml_utils import get_h5_path_from_xml


+def parse_coordinate(coord):
+    coord = coord.rstrip('\n')
+    pos_start = coord.find('(') + 1
+    pos_stop = coord.find(')')
+    coord = coord[pos_start:pos_stop]
+    coord = coord.split(',')
+    coord = [float(co) for co in coord]
+    assert len(coord) == 3, "Coordinate conversion failed"
+    return coord
+
+
 def get_res_level(level=None):
    res0 = [.01, .01, .025]
    res1 = [.02, .02, .025]
@@ -18,22 +28,23 @@ def get_res_level(level=None):
        return resolutions[level]


+# TODO figure out compression in imageio
 def save_tif_stack(raw, save_file):
    try:
        imageio.volwrite(save_file, raw)
        return True
    except RuntimeError:
-        return False
+        print("Could not save tif stack, saving slices to folder %s instead"
+              % save_file)
+        save_tif_slices(raw, save_file)


 def save_tif_slices(raw, save_file):
-    print("Could not save tif stack, saving slices to folder %s instead"
-          % save_file)
    save_folder = os.path.splitext(save_file)[0]
    os.makedirs(save_folder, exist_ok=True)
    for z in range(raw.shape[0]):
        out_file = os.path.join(save_folder, "z%05i.tif" % z)
-        io.imsave(out_file, raw[z])
+        imageio.imwrite(out_file, raw[z])


 def save_tif(raw, save_file):
@@ -45,29 +56,39 @@ def save_tif(raw, save_file):


 def name_to_path(name):
-    name_dict = {'raw': 'images/',
-                 'cells': 'segmentations/',
-                 'nuclei': 'segmentations/',
-                 'cilia': 'segmentations/',
-                 'chromatin': 'segmentations/'}
+    name_dict = {'raw': 'images/sbem-6dpf-1-whole-raw.xml',
+                 'cells': 'segmentations/sbem-6dpf-1-whole-segmented-cells-labels.xml',
+                 'nuclei': 'segmentations/sbem-6dpf-1-whole-segmented-nuclei-labels.xml',
+                 'cilia': 'segmentations/sbem-6dpf-1-whole-segmented-cilia-labels.xml',
+                 'chromatin': 'segmentations/sbem-6dpf-1-whole-segmented-chromatin-labels.xml'}
    assert name in name_dict, "Name must be one of %s, not %s" % (str(name_dict.keys()),
                                                                  name)
    return name_dict[name]


-def make_cutout(tag, name, scale, bb_start, bb_stop):
+def name_to_base_scale(name):
+    scale_dict = {'raw': 0,
+                  'cells': 1,
+                  'nuclei': 3,
+                  'cilia': 0,
+                  'chromatin': 1}
+    return scale_dict[name]
+
+
+def cutout_data(tag, name, scale, bb_start, bb_stop):
    assert all(sta < sto for sta, sto in zip(bb_start, bb_stop))

    path = os.path.join('data', tag, name_to_path(name))
-    data_resolution = read_resolution(path)
    path = get_h5_path_from_xml(path, return_absolute_path=True)
    resolution = get_res_level(scale)

-    data_scale = match_scale(resolution, data_resolution)
+    base_scale = name_to_base_scale(name)
+    assert base_scale <= scale, "%s does not support scale %i; minimum is %i" % (name, scale, base_scale)
+    data_scale = scale - base_scale

-    bb_start = [int(sta / re) for sta, re in zip(bb_start, resolution)][::-1]
-    bb_stop = [int(sto / re) for sto, re in zip(bb_stop, resolution)][::-1]
-    bb = tuple(slice(sta, sto) for sta, sto in zip(bb_start, bb_stop))
+    bb_start_ = [int(sta / re) for sta, re in zip(bb_start, resolution)][::-1]
+    bb_stop_ = [int(sto / re) for sto, re in zip(bb_stop, resolution)][::-1]
+    bb = tuple(slice(sta, sto) for sta, sto in zip(bb_start_, bb_stop_))

    key = 't00000/s00/%i/cells' % data_scale
    with h5py.File(path, 'r') as f:
@@ -76,12 +97,45 @@ def make_cutout(tag, name, scale, bb_start, bb_stop):
    return data


-def parse_coordinate(coord):
-    coord = coord.rstrip('\n')
-    pos_start = coord.find('(') + 1
-    pos_stop = coord.find(')')
-    coord = coord[pos_start:pos_stop]
-    coord = coord.split(',')
-    coord = [float(co) for co in coord]
-    assert len(coord) == 3, "Coordinate conversion failed"
-    return coord
+# TODO support bdv-hdf5 as additional format
+def to_format(path):
+    ext = os.path.splitext(path)[1]
+    if ext.lower() in ('.hdf', '.hdf5', '.h5'):
+        return 'hdf5'
+    elif ext.lower() in ('.n5',):
+        return 'n5'
+    elif ext.lower() in ('.tif', '.tiff'):
+        return 'tif-stack'
+    elif ext.lower() in ('.zr', '.zarr'):
+        return 'zarr'
+    else:
+        print('Could not match', ext, 'to data format. Displaying the data instead')
+        return 'view'
+
+
+def save_data(data, path, save_format, name):
+    if save_format == 'hdf5':
+        with h5py.File(path) as f:
+            f.create_dataset(name, data=data, compression='gzip')
+    elif save_format in ('n5', 'zarr'):
+        import z5py  # import here, because we don't want to make z5py mandatory dependency
+        with z5py.File(path, use_zarr_format=save_format == 'zarr') as f:
+            f.create_dataset(name, data=data, compression='gzip',
+                             chunks=(64, 64, 64))
+    elif save_format == 'tif-stack':
+        save_tif_stack(data, path)
+    elif save_format == 'tif-slices':
+        save_tif_slices(data, path)
+    elif save_format == 'view':
+        from cremi_tools.viewer.volumina import view
+        view([data])
+    else:
+        raise RuntimeError("Unsupported format %s" % save_format)
+
+
+def make_cutout(tag, name, scale, bb_start, bb_stop, out_path, out_format=None):
+    data = cutout_data(tag, name, scale, bb_start, bb_stop)
+    out_format = to_format(out_path) if out_format is None else out_format
+    assert out_format in ('hdf5', 'n5', 'tif-stack', 'tif-slices', 'zarr', 'view'), "Invalid format:" % out_format
+    data = cutout_data(tag, name, scale, bb_start, bb_stop)
+    save_data(data, out_path, out_format, name)