Skip to content
Snippets Groups Projects
Commit 7fe70128 authored by Constantin Pape's avatar Constantin Pape
Browse files

Working version of subvolume extraction

parent 3bbef304
No related branches found
No related tags found
No related merge requests found
from scripts.export.extract_subvolume import make_cutout
import argparse
import os
import pandas as pd
from scripts.export.extract_subvolume import make_cutout, get_res_level, parse_coordinate, name_to_path
def get_bounding_box(tag, lower, upper, table_name, table_id):
# validate the inputs
have_coordinates = lower and upper
have_table = table_name and table_id is not None
assert have_coordinates != have_table, "Need one of coordinates or table"
if have_coordinates:
print("Converting coordinates from physical coordinates %s:%s" % (lower,
upper))
bb_start = parse_coordinate(lower)
bb_stop = parse_coordinate(upper)
print("to pixel coordinates %s:%s" % (str(bb_start),
str(bb_stop)))
else:
name = os.path.splitext(os.path.split(name_to_path(table_name))[1])[0]
table_path = os.path.join('data', tag, 'tables', name, 'base.csv')
assert os.path.exists(table_path), "Could not find table for %s at %s" % (table_name,
table_path)
table = pd.read_csv(table_path, sep='\t')
sub_table = table.loc[table['label_id'] == table_id]
assert sub_table.shape[0] == 1
for row in sub_table.itertuples(index=False):
bb_start = [row.bb_min_x, row.bb_min_y, row.bb_min_z]
bb_stop = [row.bb_max_x, row.bb_max_y, row.bb_max_z]
print("Read bounding box from table %s for id %i:" % (table_name, table_id))
print(bb_start, ":", bb_stop)
return bb_start, bb_stop
# TODO
# - expose out_format as argument
# - saving multiple names in one go currently only works for h5 and n5, fix this!
if __name__ == '__main__':
levels = get_res_level()
res_string = " ".join("%i: %s" % (lev, res) for lev, res in enumerate(levels))
......@@ -8,21 +44,49 @@ if __name__ == '__main__':
"The levels range from 0 to 6 with resolutions in micrometer: "
"%s" % res_string)
parser = argparse.ArgumentParser("Make a cutout from the platynereis EM data-set and save it as tif stack.")
parser = argparse.ArgumentParser("Make a cutout from the platynereis EM data-set.")
# Mandatory arguments: which tag and scale
parser.add_argument("tag", type=str, help="Version tag of the data to extract from")
parser.add_argument("scale_level", type=int, help=scale_help)
parser.add_argument("lower_corner", type=str, help="Lower corner of the bounding box to cut out")
parser.add_argument("upper_corner", type=str, help="Upper corner of the bounding box to cut out")
parser.add_argument("save_file", type=str, help="Where to save the cutout.")
# Optional: data names
parser.add_argument("--names", type=str, nargs='+', default=['raw'],
help=("Names of the data to be extracted.\
Choose from 'raw', 'cells', 'nuclei', 'chromatin', 'cilia'"))
# Optional: select bounding box from platy browser coordinates
parser.add_argument("--lower_corner", type=str, default='',
help="Lower corner of the bounding box to cut out",)
parser.add_argument("--upper_corner", type=str, default='',
help="Upper corner of the bounding box to cut out")
# Optional: Select bounding box from table and id
parser.add_argument('--table_name', type=str, default='',
help="Table from where to read the bounding box, can be from the same option as names")
parser.add_argument('--table_id', type=int, default=None,
help="Id for bounding box")
# Optional: select where to save the data
parser.add_argument("--save_file", type=str, help="Where to save the cutout.",
default='')
args = parser.parse_args()
print("Converting coordinates from physical coordinates %s:%s" % (args.lower_corner,
args.upper_corner))
bb_start = parse_coordinate(args.lower_corner)
bb_stop = parse_coordinate(args.upper_corner)
print("to pixel coordinates %s:%s" % (str(bb_start),
str(bb_stop)))
print("Extracting raw data")
raw = make_cutout(args.scale_level, bb_start, bb_stop)
print("Saving data to tif-stack %s" % args.save_file)
save_tif(raw, args.save_file)
# 1.) validate the tag
tag = args.tag
folder = os.path.join('data', tag)
assert os.path.exists(folder), "Invalid tag %s" % tag
# 2.) figure out the bounding box
lower = args.lower_corner
upper = args.upper_corner
table_name = args.table_name
table_id = args.table_id
bb_start, bb_stop = get_bounding_box(tag, lower, upper, table_name, table_id)
# 3.) extract data for all names
names = args.names
save_file = args.save_file
scale = args.scale_level
for name in names:
print("Extracting data for %s" % name)
make_cutout(tag, name, scale, bb_start, bb_stop, save_file)
import argparse
import os
import h5py
import imageio
......@@ -6,6 +5,17 @@ import imageio
from ..files.xml_utils import get_h5_path_from_xml
def parse_coordinate(coord):
coord = coord.rstrip('\n')
pos_start = coord.find('(') + 1
pos_stop = coord.find(')')
coord = coord[pos_start:pos_stop]
coord = coord.split(',')
coord = [float(co) for co in coord]
assert len(coord) == 3, "Coordinate conversion failed"
return coord
def get_res_level(level=None):
res0 = [.01, .01, .025]
res1 = [.02, .02, .025]
......@@ -18,22 +28,23 @@ def get_res_level(level=None):
return resolutions[level]
# TODO figure out compression in imageio
def save_tif_stack(raw, save_file):
try:
imageio.volwrite(save_file, raw)
return True
except RuntimeError:
return False
print("Could not save tif stack, saving slices to folder %s instead"
% save_file)
save_tif_slices(raw, save_file)
def save_tif_slices(raw, save_file):
print("Could not save tif stack, saving slices to folder %s instead"
% save_file)
save_folder = os.path.splitext(save_file)[0]
os.makedirs(save_folder, exist_ok=True)
for z in range(raw.shape[0]):
out_file = os.path.join(save_folder, "z%05i.tif" % z)
io.imsave(out_file, raw[z])
imageio.imwrite(out_file, raw[z])
def save_tif(raw, save_file):
......@@ -45,29 +56,39 @@ def save_tif(raw, save_file):
def name_to_path(name):
name_dict = {'raw': 'images/',
'cells': 'segmentations/',
'nuclei': 'segmentations/',
'cilia': 'segmentations/',
'chromatin': 'segmentations/'}
name_dict = {'raw': 'images/sbem-6dpf-1-whole-raw.xml',
'cells': 'segmentations/sbem-6dpf-1-whole-segmented-cells-labels.xml',
'nuclei': 'segmentations/sbem-6dpf-1-whole-segmented-nuclei-labels.xml',
'cilia': 'segmentations/sbem-6dpf-1-whole-segmented-cilia-labels.xml',
'chromatin': 'segmentations/sbem-6dpf-1-whole-segmented-chromatin-labels.xml'}
assert name in name_dict, "Name must be one of %s, not %s" % (str(name_dict.keys()),
name)
return name_dict[name]
def make_cutout(tag, name, scale, bb_start, bb_stop):
def name_to_base_scale(name):
scale_dict = {'raw': 0,
'cells': 1,
'nuclei': 3,
'cilia': 0,
'chromatin': 1}
return scale_dict[name]
def cutout_data(tag, name, scale, bb_start, bb_stop):
assert all(sta < sto for sta, sto in zip(bb_start, bb_stop))
path = os.path.join('data', tag, name_to_path(name))
data_resolution = read_resolution(path)
path = get_h5_path_from_xml(path, return_absolute_path=True)
resolution = get_res_level(scale)
data_scale = match_scale(resolution, data_resolution)
base_scale = name_to_base_scale(name)
assert base_scale <= scale, "%s does not support scale %i; minimum is %i" % (name, scale, base_scale)
data_scale = scale - base_scale
bb_start = [int(sta / re) for sta, re in zip(bb_start, resolution)][::-1]
bb_stop = [int(sto / re) for sto, re in zip(bb_stop, resolution)][::-1]
bb = tuple(slice(sta, sto) for sta, sto in zip(bb_start, bb_stop))
bb_start_ = [int(sta / re) for sta, re in zip(bb_start, resolution)][::-1]
bb_stop_ = [int(sto / re) for sto, re in zip(bb_stop, resolution)][::-1]
bb = tuple(slice(sta, sto) for sta, sto in zip(bb_start_, bb_stop_))
key = 't00000/s00/%i/cells' % data_scale
with h5py.File(path, 'r') as f:
......@@ -76,12 +97,45 @@ def make_cutout(tag, name, scale, bb_start, bb_stop):
return data
def parse_coordinate(coord):
coord = coord.rstrip('\n')
pos_start = coord.find('(') + 1
pos_stop = coord.find(')')
coord = coord[pos_start:pos_stop]
coord = coord.split(',')
coord = [float(co) for co in coord]
assert len(coord) == 3, "Coordinate conversion failed"
return coord
# TODO support bdv-hdf5 as additional format
def to_format(path):
ext = os.path.splitext(path)[1]
if ext.lower() in ('.hdf', '.hdf5', '.h5'):
return 'hdf5'
elif ext.lower() in ('.n5',):
return 'n5'
elif ext.lower() in ('.tif', '.tiff'):
return 'tif-stack'
elif ext.lower() in ('.zr', '.zarr'):
return 'zarr'
else:
print('Could not match', ext, 'to data format. Displaying the data instead')
return 'view'
def save_data(data, path, save_format, name):
if save_format == 'hdf5':
with h5py.File(path) as f:
f.create_dataset(name, data=data, compression='gzip')
elif save_format in ('n5', 'zarr'):
import z5py # import here, because we don't want to make z5py mandatory dependency
with z5py.File(path, use_zarr_format=save_format == 'zarr') as f:
f.create_dataset(name, data=data, compression='gzip',
chunks=(64, 64, 64))
elif save_format == 'tif-stack':
save_tif_stack(data, path)
elif save_format == 'tif-slices':
save_tif_slices(data, path)
elif save_format == 'view':
from cremi_tools.viewer.volumina import view
view([data])
else:
raise RuntimeError("Unsupported format %s" % save_format)
def make_cutout(tag, name, scale, bb_start, bb_stop, out_path, out_format=None):
data = cutout_data(tag, name, scale, bb_start, bb_stop)
out_format = to_format(out_path) if out_format is None else out_format
assert out_format in ('hdf5', 'n5', 'tif-stack', 'tif-slices', 'zarr', 'view'), "Invalid format:" % out_format
data = cutout_data(tag, name, scale, bb_start, bb_stop)
save_data(data, out_path, out_format, name)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment