diff --git a/analysis/nephridia/analyse_cilia.py b/analysis/nephridia/analyse_cilia.py index 4ee31cd27f9ad3ace914e872b9f19a46fd8539b9..7fd4db3eae5c2d00f32f0982efec41ef13c359ec 100644 --- a/analysis/nephridia/analyse_cilia.py +++ b/analysis/nephridia/analyse_cilia.py @@ -1,13 +1,22 @@ import numpy as np import pandas as pd -from make_cell_table import right_nephr_ids, left_nephr_ids from matplotlib import pyplot as plt -TABLE_PATH = '../../data/0.6.2/tables/sbem-6dpf-1-whole-segmented-cilia-labels/cell_mapping.csv' +def get_nephr_ids(version): + table_path = '../../data/%s/tables/sbem-6dpf-1-whole-segmented-cells-labels/regions.csv' % version + table = pd.read_csv(table_path, sep='\t') + nephr_ids = table['nephridia'].values + right_nephr_ids = np.where(nephr_ids == 1)[0] + left_nephr_ids = np.where(nephr_ids == 2)[0] + return right_nephr_ids, left_nephr_ids -def check_cell_ids(): - table = pd.read_csv(TABLE_PATH, sep='\t') + +def check_cell_ids(version): + table_path = '../../data/%s/tables/sbem-6dpf-1-whole-segmented-cilia-labels/cell_mapping.csv' % version + + right_nephr_ids, left_nephr_ids = get_nephr_ids(version) + table = pd.read_csv(table_path, sep='\t') cell_ids = table['cell_id'].values matched_right = [] @@ -35,16 +44,18 @@ def check_cell_ids(): print("With cilia:", len(matched_left)) -def plot_cilia_per_cell(): +def plot_cilia_per_cell(version): counts_left = [] counts_right = [] - table = pd.read_csv(TABLE_PATH, sep='\t') + table_path = '../../data/%s/tables/sbem-6dpf-1-whole-segmented-cilia-labels/cell_mapping.csv' % version + table = pd.read_csv(table_path, sep='\t') cell_ids = table['cell_id'] cell_ids = cell_ids[cell_ids != 0] cell_ids = cell_ids[~np.isnan(cell_ids)] cell_ids = cell_ids.astype('uint32') + right_nephr_ids, left_nephr_ids = get_nephr_ids(version) unique_cell_ids = np.unique(cell_ids) total_count = 0 @@ -84,5 +95,6 @@ def plot_cilia_per_cell(): if __name__ == '__main__': - check_cell_ids() - plot_cilia_per_cell() + version = '0.6.5' + check_cell_ids(version) + plot_cilia_per_cell(version) diff --git a/scripts/files/for_upload.py b/scripts/files/for_upload.py index 87531c6fe4364edb8f563ecf1c0aae9754a9e7df..0e38ea54b65ccb36bd84c360214c680cd25a5dfb 100644 --- a/scripts/files/for_upload.py +++ b/scripts/files/for_upload.py @@ -214,7 +214,7 @@ def copy_segmentations(in_folder, out_folder, segmentations_to_copy, output_root # path in bucket is the relative path from out_file to output_root path_in_bucket = os.path.relpath(out_file, output_root) out_file = os.path.join(s3_folder, seg_name + '.xml') - make_xml_s3(in_file, out_file, path_in_bucket, s3_config, shape, resolution) + make_xml_s3(in_file, out_file, path_in_bucket, None, shape, resolution) # check if we need to copy tables seg_table_in = os.path.join(table_in, seg_name) @@ -258,7 +258,7 @@ def copy_folder_for_s3(version, images_to_copy, segmentations_to_copy, output_ro segmentations_to_copy, output_root) -if __name__ == '__main__': +def make_test_folder(): res = [.1, .08, .08] im_names = {'sbem-6dpf-1-whole-raw': {'start_scale': 3, 'resolution': res}, 'prospr-6dpf-1-whole-AChE-MED': {'resolution': [.55, .55, .55]}} @@ -268,3 +268,91 @@ if __name__ == '__main__': out = '/g/arendt/EM_6dpf_segmentation/platy-browser-data/data/test_n5' s3_config = {} copy_folder_for_s3('0.6.5', im_names, seg_names, out, s3_config) + + +def make_different_chunkings(): + path = '/g/arendt/EM_6dpf_segmentation/platy-browser-data/data/rawdata/sbem-6dpf-1-whole-raw.h5' + xml_path = '/g/arendt/EM_6dpf_segmentation/platy-browser-data/data/0.6.5/images/sbem-6dpf-1-whole-raw.xml' + output_root = '/g/arendt/EM_6dpf_segmentation/platy-browser-data/data/test_n5' + data_out_folder = os.path.join(output_root, 'rawdata') + start_scale = 3 + resolution = [.1, .08, .08] + chunk_shapes = [(32, 256, 256), (32, 128, 128), (64, 64, 64)] + chunk_shapes = [(128, 128, 128)] + for ii, chunks in enumerate(chunk_shapes, 4): + out_path = os.path.join(data_out_folder, 'sbem-6dpf-1-whole-raw-%i.n5' % ii) + copy_file_to_bdv_n5(path, out_path, resolution, chunks, start_scale) + # make the xml + path_in_bucket = os.path.relpath(out_path, output_root) + with open_file(out_path, 'r') as f: + shape = f['setup0/timepoint0/s0'].shape + out_path = os.path.join(data_out_folder, 'sbem-6dpf-1-whole-raw-%i.xml' % ii) + make_xml_s3(xml_path, out_path, path_in_bucket, None, shape, resolution) + + +def iterate_chunks(path, key): + with open_file(path, 'r') as f: + ds = f[key] + n_chunks = ds.number_of_chunks + + ds_path = os.path.join(path, key) + chunk_sizes = [] + for root, dirs, files in os.walk(ds_path): + for name in files: + if name == 'attributes.json': + continue + size = os.path.getsize(os.path.join(root, name)) + chunk_sizes.append(size) + + n_filled = len(chunk_sizes) + percent_filled = float(n_filled) / n_chunks + + return percent_filled, chunk_sizes + + +def check_block_shapes(): + import nifty.tools as nt + full_path = '/g/arendt/EM_6dpf_segmentation/platy-browser-data/data/rawdata/sbem-6dpf-1-whole-raw.h5' + key = 't00000/s00/0/cells' + with open_file(full_path, 'r') as f: + shape = f[key].shape + + prefix = '/g/arendt/EM_6dpf_segmentation/platy-browser-data/data/test_n5/rawdata/sbem-6dpf-1-whole-raw-%i.n5' + ds_key = 'setup0/timepoint0/s0' + block_shapes = [[32, 256, 256], [32, 128, 128], [64, 64, 64], [128, 128, 128]] + for ii, block_shape in enumerate(block_shapes, 1): + path = prefix % ii + percent_filled, sizes = iterate_chunks(path, ds_key) + n_total = nt.blocking([0, 0, 0], shape, block_shape).numberOfBlocks + n_filled = int(n_total * percent_filled) + print("Chunk-shape:", block_shape) + print("Nr. chunks at highest res:", n_filled) + print("Mean chunk size in MB:", np.mean(sizes) / 1.e6, "+-", np.std(sizes) / 1.e6) + print("Min/max chunk size in MB:", np.min(sizes) / 1.e6, "/", np.max(sizes) / 1.e6) + print() + + +def estimate_chunk_sizes(): + ref_chunk_shape = [128, 128, 128] + ref_chunk_size = float(np.prod(ref_chunk_shape)) + ref_chunk_mb = 1.0160599442530536 + ref_n_chunks = 1553606. + + start = 64 + stop = 128 + step = 16 + for chunk_len in range(start, stop + step, step): + print("Chunks: %i^3" % chunk_len) + rel_size = chunk_len ** 3 / ref_chunk_size + chunk_mb = ref_chunk_mb * rel_size + n_chunks = ref_n_chunks / rel_size + print("Nr. chunks at highest res:", int(n_chunks)) + print("Mean chunk size in MB:", chunk_mb) + print() + + +if __name__ == '__main__': + # make_test_folder() + # make_different_chunkings() + # check_block_shapes() + estimate_chunk_sizes()