Skip to content
Snippets Groups Projects
Commit 3f682f72 authored by Constantin Pape's avatar Constantin Pape
Browse files

Add script for chunk estimates

parent 710ea4fb
No related branches found
No related tags found
No related merge requests found
import numpy as np
import pandas as pd
from make_cell_table import right_nephr_ids, left_nephr_ids
from matplotlib import pyplot as plt
TABLE_PATH = '../../data/0.6.2/tables/sbem-6dpf-1-whole-segmented-cilia-labels/cell_mapping.csv'
def get_nephr_ids(version):
table_path = '../../data/%s/tables/sbem-6dpf-1-whole-segmented-cells-labels/regions.csv' % version
table = pd.read_csv(table_path, sep='\t')
nephr_ids = table['nephridia'].values
right_nephr_ids = np.where(nephr_ids == 1)[0]
left_nephr_ids = np.where(nephr_ids == 2)[0]
return right_nephr_ids, left_nephr_ids
def check_cell_ids():
table = pd.read_csv(TABLE_PATH, sep='\t')
def check_cell_ids(version):
table_path = '../../data/%s/tables/sbem-6dpf-1-whole-segmented-cilia-labels/cell_mapping.csv' % version
right_nephr_ids, left_nephr_ids = get_nephr_ids(version)
table = pd.read_csv(table_path, sep='\t')
cell_ids = table['cell_id'].values
matched_right = []
......@@ -35,16 +44,18 @@ def check_cell_ids():
print("With cilia:", len(matched_left))
def plot_cilia_per_cell():
def plot_cilia_per_cell(version):
counts_left = []
counts_right = []
table = pd.read_csv(TABLE_PATH, sep='\t')
table_path = '../../data/%s/tables/sbem-6dpf-1-whole-segmented-cilia-labels/cell_mapping.csv' % version
table = pd.read_csv(table_path, sep='\t')
cell_ids = table['cell_id']
cell_ids = cell_ids[cell_ids != 0]
cell_ids = cell_ids[~np.isnan(cell_ids)]
cell_ids = cell_ids.astype('uint32')
right_nephr_ids, left_nephr_ids = get_nephr_ids(version)
unique_cell_ids = np.unique(cell_ids)
total_count = 0
......@@ -84,5 +95,6 @@ def plot_cilia_per_cell():
if __name__ == '__main__':
check_cell_ids()
plot_cilia_per_cell()
version = '0.6.5'
check_cell_ids(version)
plot_cilia_per_cell(version)
......@@ -214,7 +214,7 @@ def copy_segmentations(in_folder, out_folder, segmentations_to_copy, output_root
# path in bucket is the relative path from out_file to output_root
path_in_bucket = os.path.relpath(out_file, output_root)
out_file = os.path.join(s3_folder, seg_name + '.xml')
make_xml_s3(in_file, out_file, path_in_bucket, s3_config, shape, resolution)
make_xml_s3(in_file, out_file, path_in_bucket, None, shape, resolution)
# check if we need to copy tables
seg_table_in = os.path.join(table_in, seg_name)
......@@ -258,7 +258,7 @@ def copy_folder_for_s3(version, images_to_copy, segmentations_to_copy, output_ro
segmentations_to_copy, output_root)
if __name__ == '__main__':
def make_test_folder():
res = [.1, .08, .08]
im_names = {'sbem-6dpf-1-whole-raw': {'start_scale': 3, 'resolution': res},
'prospr-6dpf-1-whole-AChE-MED': {'resolution': [.55, .55, .55]}}
......@@ -268,3 +268,91 @@ if __name__ == '__main__':
out = '/g/arendt/EM_6dpf_segmentation/platy-browser-data/data/test_n5'
s3_config = {}
copy_folder_for_s3('0.6.5', im_names, seg_names, out, s3_config)
def make_different_chunkings():
path = '/g/arendt/EM_6dpf_segmentation/platy-browser-data/data/rawdata/sbem-6dpf-1-whole-raw.h5'
xml_path = '/g/arendt/EM_6dpf_segmentation/platy-browser-data/data/0.6.5/images/sbem-6dpf-1-whole-raw.xml'
output_root = '/g/arendt/EM_6dpf_segmentation/platy-browser-data/data/test_n5'
data_out_folder = os.path.join(output_root, 'rawdata')
start_scale = 3
resolution = [.1, .08, .08]
chunk_shapes = [(32, 256, 256), (32, 128, 128), (64, 64, 64)]
chunk_shapes = [(128, 128, 128)]
for ii, chunks in enumerate(chunk_shapes, 4):
out_path = os.path.join(data_out_folder, 'sbem-6dpf-1-whole-raw-%i.n5' % ii)
copy_file_to_bdv_n5(path, out_path, resolution, chunks, start_scale)
# make the xml
path_in_bucket = os.path.relpath(out_path, output_root)
with open_file(out_path, 'r') as f:
shape = f['setup0/timepoint0/s0'].shape
out_path = os.path.join(data_out_folder, 'sbem-6dpf-1-whole-raw-%i.xml' % ii)
make_xml_s3(xml_path, out_path, path_in_bucket, None, shape, resolution)
def iterate_chunks(path, key):
with open_file(path, 'r') as f:
ds = f[key]
n_chunks = ds.number_of_chunks
ds_path = os.path.join(path, key)
chunk_sizes = []
for root, dirs, files in os.walk(ds_path):
for name in files:
if name == 'attributes.json':
continue
size = os.path.getsize(os.path.join(root, name))
chunk_sizes.append(size)
n_filled = len(chunk_sizes)
percent_filled = float(n_filled) / n_chunks
return percent_filled, chunk_sizes
def check_block_shapes():
import nifty.tools as nt
full_path = '/g/arendt/EM_6dpf_segmentation/platy-browser-data/data/rawdata/sbem-6dpf-1-whole-raw.h5'
key = 't00000/s00/0/cells'
with open_file(full_path, 'r') as f:
shape = f[key].shape
prefix = '/g/arendt/EM_6dpf_segmentation/platy-browser-data/data/test_n5/rawdata/sbem-6dpf-1-whole-raw-%i.n5'
ds_key = 'setup0/timepoint0/s0'
block_shapes = [[32, 256, 256], [32, 128, 128], [64, 64, 64], [128, 128, 128]]
for ii, block_shape in enumerate(block_shapes, 1):
path = prefix % ii
percent_filled, sizes = iterate_chunks(path, ds_key)
n_total = nt.blocking([0, 0, 0], shape, block_shape).numberOfBlocks
n_filled = int(n_total * percent_filled)
print("Chunk-shape:", block_shape)
print("Nr. chunks at highest res:", n_filled)
print("Mean chunk size in MB:", np.mean(sizes) / 1.e6, "+-", np.std(sizes) / 1.e6)
print("Min/max chunk size in MB:", np.min(sizes) / 1.e6, "/", np.max(sizes) / 1.e6)
print()
def estimate_chunk_sizes():
ref_chunk_shape = [128, 128, 128]
ref_chunk_size = float(np.prod(ref_chunk_shape))
ref_chunk_mb = 1.0160599442530536
ref_n_chunks = 1553606.
start = 64
stop = 128
step = 16
for chunk_len in range(start, stop + step, step):
print("Chunks: %i^3" % chunk_len)
rel_size = chunk_len ** 3 / ref_chunk_size
chunk_mb = ref_chunk_mb * rel_size
n_chunks = ref_n_chunks / rel_size
print("Nr. chunks at highest res:", int(n_chunks))
print("Mean chunk size in MB:", chunk_mb)
print()
if __name__ == '__main__':
# make_test_folder()
# make_different_chunkings()
# check_block_shapes()
estimate_chunk_sizes()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment