From e8b7221c45e2fc4e6928293a75eb907e67af1b13 Mon Sep 17 00:00:00 2001 From: Constantin Pape <constantin.pape@iwr.uni-heidelberg.de> Date: Fri, 7 Feb 2020 16:35:22 +0100 Subject: [PATCH] Remove deprecated scripts --- deprecated/add_existing_data.py | 94 ----------------------- deprecated/join_tables.py | 13 ---- deprecated/legacy_export.py | 47 ------------ deprecated/make_data_sources.py | 91 ----------------------- deprecated/make_initial_version.py | 115 ----------------------------- deprecated/swap_links.py | 40 ---------- deprecated/update_prospr_names.py | 34 --------- misc/check_n5_file.py | 40 ++++++++++ 8 files changed, 40 insertions(+), 434 deletions(-) delete mode 100644 deprecated/add_existing_data.py delete mode 100644 deprecated/join_tables.py delete mode 100644 deprecated/legacy_export.py delete mode 100644 deprecated/make_data_sources.py delete mode 100755 deprecated/make_initial_version.py delete mode 100644 deprecated/swap_links.py delete mode 100644 deprecated/update_prospr_names.py create mode 100644 misc/check_n5_file.py diff --git a/deprecated/add_existing_data.py b/deprecated/add_existing_data.py deleted file mode 100644 index aa466f5..0000000 --- a/deprecated/add_existing_data.py +++ /dev/null @@ -1,94 +0,0 @@ -#! /g/arendt/pape/miniconda3/envs/platybrowser/bin/python - -import os -from glob import glob -from scripts.files import add_source, add_image, add_segmentation - - -def add_sources(): - # add em source - add_source('sbem', '6dpf') - # add prospr source - add_source('prospr', '6dpf') - - -def add_images(): - base_folder = './data/0.2.1/images' - - # add sbem raw data - sbem_prefix = 'sbem-6dpf-1-whole' - sbem_raw = './data/0.2.1/images/sbem-6dpf-1-whole-raw.xml' - name = 'raw' - add_image(sbem_prefix, name, sbem_raw, copy_data=False) - - # add all prospr images - prospr_prefix = 'prospr-6dpf-1-whole' - prospr_ims = glob(os.path.join(base_folder, 'prospr-6dpf-1-whole-*')) - for impath in prospr_ims: - name = os.path.split(impath)[1] - name, ext = os.path.splitext(name) - if ext != '.xml': - continue - name = name[(len(prospr_prefix) + 1):] - add_image(prospr_prefix, name, impath, copy_data=False) - - -def add_static_segmentations(): - source = 'sbem-6dpf-1-whole' - - # chromatin segmentation - chromatin_tables = {'default': './data/0.2.0/tables/sbem-6dpf-1-whole-segmented-chromatin-labels/default.csv'} - add_segmentation(source, 'segmented-chromatin-labels', - segmentation_path='./data/rawdata/sbem-6dpf-1-whole-segmented-chromatin-labels.xml', - table_path_dict=chromatin_tables, copy_data=False) - - # tissue segmentation - tissue_tables = {'default': './data/0.1.0/tables/sbem-6dpf-1-whole-segmented-tissue-labels/base.csv'} - add_segmentation(source, 'segmented-tissue-labels', - segmentation_path='./data/rawdata/sbem-6dpf-1-whole-segmented-tissue-labels.xml', - table_path_dict=tissue_tables, copy_data=False) - - # muscle segmentation - add_segmentation(source, 'segmented-muscle', - segmentation_path='./data/rawdata/sbem-6dpf-1-whole-segmented-muscle.h5', copy_data=False) - - -def add_dynamic_segmentations(): - source = 'sbem-6dpf-1-whole' - paintera_root = '/g/kreshuk/data/arendt/platyneris_v1/data.n5' - - # cell segmentation - add_segmentation(source, 'segmented-cells-labels', - paintera_project=(paintera_root, 'volumes/paintera/proofread_cells'), - resolution=[.025, .02, .02], - table_update_function='make_cell_tables') - - # nucleus segmentation - add_segmentation(source, 'segmented-nuclei-labels', - paintera_project=(paintera_root, 'volumes/paintera/nuclei'), - resolution=[.1, .08, .08], - table_update_function='make_nucleus_tables') - - # cilia segmentation - add_segmentation(source, 'segmented-cilia-labels', - paintera_project=(paintera_root, 'volumes/paintera/cilia'), - resolution=[.025, .01, .01], - table_update_function='make_cilia_tables') - - -def add_segmentations(): - add_static_segmentations() - add_dynamic_segmentations() - - -def add_existing_data(): - """ Add existing data to the json files that keep track of - sources, image data and segmentations. - """ - add_sources() - add_images() - add_segmentations() - - -if __name__ == '__main__': - add_existing_data() diff --git a/deprecated/join_tables.py b/deprecated/join_tables.py deleted file mode 100644 index 8ce58f0..0000000 --- a/deprecated/join_tables.py +++ /dev/null @@ -1,13 +0,0 @@ -import pandas as pd - -def_path = '../data/0.5.1/tables/sbem-6dpf-1-whole-segmented-cilia-labels/default.csv.bkp' -t1 = pd.read_csv(def_path, sep='\t') -t2 = pd.read_csv('../data/0.5.1/tables/sbem-6dpf-1-whole-segmented-cilia-labels/cilia.csv', sep='\t') - -l1 = t1[['label_id']].values -l2 = t2[['label_id']].values -assert (l1 == l2).all() - -t = pd.concat([t1, t2['cell_id']], axis=1) -def_path = '../data/0.5.1/tables/sbem-6dpf-1-whole-segmented-cilia-labels/default.csv' -t.to_csv(def_path, index=False, sep='\t') diff --git a/deprecated/legacy_export.py b/deprecated/legacy_export.py deleted file mode 100644 index ce6fee9..0000000 --- a/deprecated/legacy_export.py +++ /dev/null @@ -1,47 +0,0 @@ -import os -# from pathlib import Path -from shutil import copyfile - - -# TODO need to replace the name in xml to make this work out of the box -def export_segmentation(folder, name, dest_folder, out_name): - seg_file = os.path.join(folder, 'segmentations', '%s.h5' % name) - xml_file = os.path.join(folder, 'segmentations', '%s.xml' % name) - table_file = os.path.join(folder, 'tables', name, 'default.csv') - assert os.path.exists(seg_file) - assert os.path.exists(xml_file) - assert os.path.exists(table_file), table_file - - seg_out = os.path.join(dest_folder, '%s.h5' % out_name) - print("Copying segmentation from", seg_file, "to", seg_out) - copyfile(seg_file, seg_out) - - xml_out = os.path.join(dest_folder, '%s.xml' % out_name) - print("Copying xml from", xml_file, "to", xml_out) - copyfile(xml_file, xml_out) - - table_out = os.path.join(dest_folder, 'tables', '%s.csv' % out_name) - print("Copying table from", table_file, "to", table_out) - copyfile(table_file, table_out) - - -def export_tag(tag, export_cells=False, export_cilia=False): - tag_folder = './data/%s' % tag - assert os.path.exists(tag_folder) - - dest_folder = '/g/arendt/EM_6dpf_segmentation/EM-Prospr' - - if export_cells: - name = 'sbem-6dpf-1-whole-segmented-cells-labels' - out_name = 'em-segmented-cells-new-labels' - export_segmentation(tag_folder, name, dest_folder, out_name) - - if export_cilia: - name = 'sbem-6dpf-1-whole-segmented-cilia-labels' - out_name = 'em-segmented-cilia-labels' - export_segmentation(tag_folder, name, dest_folder, out_name) - - -if __name__ == '__main__': - # export_tag('0.1.1', export_cells=True) - export_tag('0.1.0', export_cilia=True) diff --git a/deprecated/make_data_sources.py b/deprecated/make_data_sources.py deleted file mode 100644 index 096246b..0000000 --- a/deprecated/make_data_sources.py +++ /dev/null @@ -1,91 +0,0 @@ -#! /g/arendt/pape/miniconda3/envs/platybrowser/bin/python - -import os -import glob -from shutil import copyfile -from scripts.files import get_h5_path_from_xml, copy_xml_with_newpath, write_simple_xml - - -def copy_xmls_and_symlink_h5(name_dict, src_folder, trgt_folder): - for n1, n2 in name_dict.items(): - src_xml = os.path.join(src_folder, n1) - trgt_xml = os.path.join(trgt_folder, n2) - - # we make a softlink from src to target h5 - # NOTE eventually, we want to copy all the data, but - # for now, we use softlinks in order to keep the current - # version of the platy browser working - src_h5 = get_h5_path_from_xml(src_xml, return_absolute_path=True) - trgt_h5 = os.path.splitext(n2)[0] + '.h5' - os.symlink(src_h5, os.path.join(trgt_folder, trgt_h5)) - - copy_xml_with_newpath(src_xml, trgt_xml, trgt_h5) - - -def make_initial_data_sources(copy_sbem, - copy_prospr, - copy_fib, - copy_regions): - old_folder = '/g/arendt/EM_6dpf_segmentation/EM-Prospr' - raw_folder = './data/rawdata' - os.makedirs(raw_folder, exist_ok=True) - - # TODO - # copy cellular models - if copy_sbem: - print("Copy sbem data") - # copy the sbem data - sbem_prefix = 'sbem-6dpf-1-whole' - name_dict = {'em-raw-full-res.xml': 'raw.xml', - 'em-segmented-muscles-ariadne.xml': 'segmented-muscle.xml', - 'em-segmented-tissue-labels.xml': 'segmented-tissue-labels.xml'} - name_dict = {k: '%s-%s' % (sbem_prefix, v) - for k, v in name_dict.items()} - copy_xmls_and_symlink_h5(name_dict, old_folder, raw_folder) - - if copy_prospr: - print("Copy prospr data") - prospr_prefix = 'prospr-6dpf-1-whole' - # copy the prospr meds - prospr_names = glob.glob(os.path.join(old_folder, "*-MED*")) - prospr_names = [os.path.split(f)[1] for f in prospr_names] - prospr_names = [name for name in prospr_names if os.path.splitext(name)[1] == '.xml'] - name_dict = {n: '%s-%s' % (prospr_prefix, n) for n in prospr_names} - copy_xmls_and_symlink_h5(name_dict, old_folder, raw_folder) - - # copy valentyna's med file - input_path = '/g/kreshuk/zinchenk/cell_match/data/meds_all_genes_500nm.h5' - output_path = os.path.join(raw_folder, '%s_meds_all_genes.h5' % prospr_prefix) - xml_path = os.path.join(raw_folder, '%s_meds_all_genes.xml' % prospr_prefix) - write_simple_xml(xml_path, os.path.split(output_path)[1], - path_type='relative') - copyfile(input_path, output_path) - - if copy_fib: - print("Coby fibsem data") - # copy the fibsem data - fib_prefix = 'fibsem-6dpf-1-parapod' - name_dict = {'em-raw-': 'em-raw', - 'em-segmented-': ''} - name_dict = {k: '%s-%s' % (fib_prefix, v) - for k, v in name_dict.items()} - copy_xmls_and_symlink_h5(name_dict, old_folder, raw_folder) - - if copy_regions: - - print("Copy regions") - prospr_prefix = 'prospr-6dpf-1-whole-segmented' - prospr_names = glob.glob(os.path.join(old_folder, "BodyPart_*")) - prospr_names = [os.path.split(f)[1] for f in prospr_names] - prospr_names = [name for name in prospr_names if os.path.splitext(name)[1] == '.xml'] - name_dict = {n: '%s-%s' % (prospr_prefix, n.split('_')[-1]) for n in prospr_names} - copy_xmls_and_symlink_h5(name_dict, old_folder, raw_folder) - - -if __name__ == '__main__': - copy_sbem = False - copy_prospr = False - copy_fib = False - copy_regions = True - make_initial_data_sources(copy_sbem, copy_prospr, copy_fib, - copy_regions) diff --git a/deprecated/make_initial_version.py b/deprecated/make_initial_version.py deleted file mode 100755 index e22e1e5..0000000 --- a/deprecated/make_initial_version.py +++ /dev/null @@ -1,115 +0,0 @@ -#! /g/arendt/pape/miniconda3/envs/platybrowser/bin/python - -import os -from shutil import copyfile -from glob import glob - -import h5py -from scripts.files import make_folder_structure -from scripts.export import export_segmentation -from scripts.files import make_bdv_server_file, copy_image_data, copy_misc_data -from scripts.files import copy_segmentation -from scripts.attributes import make_nucleus_tables, make_cell_tables -from pybdv.converter import make_bdv - - -def make_sbem_segmentations(old_folder, folder): - path = '/g/kreshuk/data/arendt/platyneris_v1/data.n5' - - # export nucleus segemntation - tmp_nuclei = 'tmp_export_nuclei' - key_nuclei = 'volumes/paintera/nuclei' - nuclei_name = 'sbem-6dpf-1-whole-segmented-nuclei-labels' - res_nuclei = [.1, .08, .08] - export_segmentation(path, key_nuclei, old_folder, folder, nuclei_name, res_nuclei, tmp_nuclei) - - # export cell segemntation - tmp_cells = 'tmp_export_cells' - key_cells = 'volumes/paintera/proofread_cells' - cells_name = 'sbem-6dpf-1-whole-segmented-cells-labels' - res_cells = [.025, .02, .02] - export_segmentation(path, key_cells, old_folder, folder, cells_name, res_cells, tmp_cells, - target='local', max_jobs=8) - - -def make_sbem_tables(folder): - # make cell segmentation tables - name_cells = 'sbem-6dpf-1-whole-segmented-cells-labels' - res_cells = [.025, .02, .02] - make_cell_tables(folder, name_cells, 'tmp_tables_cells', - res_cells, target='local', max_jobs=32) - - # make nucleus segmentation tables - name_nuclei = 'sbem-6dpf-1-whole-segmented-nuclei-labels' - res_nuclei = [.1, .08, .08] - make_nucleus_tables(folder, name_nuclei, 'tmp_tables_nuclei', - res_nuclei, target='local', max_jobs=32) - - old_folder = '/g/arendt/EM_6dpf_segmentation/EM-Prospr' - # copy tissue segmentation table - tissue_name_out = 'sbem-6dpf-1-whole-segmented-tissue-labels' - table_folder = os.path.join(folder, 'tables', tissue_name_out) - os.makedirs(table_folder, exist_ok=True) - tissue_table_in = os.path.join(old_folder, 'tables', 'em-segmented-tissue-labels.csv') - tissue_table_out = os.path.join(table_folder, 'default.csv') - copyfile(tissue_table_in, tissue_table_out) - - -def make_prospr_region_segmentations(): - in_prefix = '/g/arendt/EM_6dpf_segmentation/EM-Prospr/BodyPart_*.h5' - out_prefix = './data/rawdata/prospr-6dpf-1-whole-segmented-' - files = glob(in_prefix) - for p in files: - name = p.split('_')[-1][:-3] - o = out_prefix + name + '.h5' - print(p, "to", o) - with h5py.File(p) as f: - key = 't00000/s00/0/cells' - data = f[key][:] - data[data > 0] = 0 - data[data < 0] = 255 - make_bdv(data, o, 3 * [[2, 2, 2]], - unit='micrometer', resolution=[0.5, 0.5, 0.5]) - - -def insert_chromatin(): - src_folder = 'data/rawdata' - dst_folder = 'data/0.2.0' - name = 'sbem-6dpf-1-whole-segmented-chromatin-labels' - copy_segmentation(src_folder, dst_folder, name) - src_table = '/g/arendt/EM_6dpf_segmentation/EM-Prospr/tables/em-segmented-chromatin-labels.csv' - dst_table = os.path.join(dst_folder, 'tables', name) - os.makedirs(dst_table, exist_ok=True) - dst_table = os.path.join(dst_table, 'base.csv') - copyfile(src_table, dst_table) - - -def make_initial_version(): - - src_folder = 'data/rawdata' - old_folder = '/g/arendt/EM_6dpf_segmentation/EM-Prospr' - tag = '0.0.0' - folder = os.path.join('data', tag) - - make_folder_structure(folder) - - # make xmls for all necessary image data - copy_image_data(src_folder, os.path.join(folder, 'images')) - copy_misc_data(src_folder, os.path.join(folder, 'misc')) - - # export the initial sbem segmentations - make_sbem_segmentations(old_folder, folder) - - # make the tables for sbem segmentations - make_sbem_tables(folder) - - # make the bdv server file - make_bdv_server_file([os.path.join(folder, 'images'), - os.path.join(folder, 'segmentations')], - os.path.join(folder, 'misc', 'bdvserver.txt')) - - -if __name__ == '__main__': - insert_chromatin() - # make_prospr_region_segmentations() - # make_initial_version() diff --git a/deprecated/swap_links.py b/deprecated/swap_links.py deleted file mode 100644 index 1cf5559..0000000 --- a/deprecated/swap_links.py +++ /dev/null @@ -1,40 +0,0 @@ -import os -from shutil import move - - -def swap_link(link, dest): - root_folder = '/g/arendt/EM_6dpf_segmentation/EM-Prospr' - print(link, '->', dest) - - # 1.) remove the link - os.unlink(link) - - # 2.) move dest to link - move(dest, link) - - # 3.) make link with relative path from dest -> link - rel_path = os.path.relpath(os.path.abspath(link), - root_folder) - # print(dest) - # print(rel_path) - os.symlink(rel_path, dest) - - -def swap_links(): - link_folder = './data/rawdata' - cwd = os.getcwd() - os.chdir(link_folder) - files = os.listdir('.') - - for path in files: - is_link = os.path.islink(path) - if is_link: - dest = os.readlink(path) - assert os.path.exists(dest) and os.path.isfile(dest) - swap_link(path, dest) - - os.chdir(cwd) - - -if __name__ == '__main__': - swap_links() diff --git a/deprecated/update_prospr_names.py b/deprecated/update_prospr_names.py deleted file mode 100644 index 9290a93..0000000 --- a/deprecated/update_prospr_names.py +++ /dev/null @@ -1,34 +0,0 @@ -import os -import json -from glob import glob - - -def update_prospr_names(reference_folder): - with open('../data/images.json') as f: - im_names = json.load(f) - prefix = 'prospr-6dpf-1-whole' - prospr_names = [name for name in im_names if name.startswith(prefix)] - print(len(prospr_names)) - - new_prospr_names = glob(os.path.join(reference_folder, '%s-*.xml' % prefix)) - print(len(new_prospr_names)) - new_prospr_names = [os.path.splitext(name)[0] for name in new_prospr_names] - new_prospr_names = [os.path.split(name)[1] for name in new_prospr_names] - - new_names = [name for name in im_names if not name.startswith(prefix)] - print(len(new_names)) - new_names += new_prospr_names - print(len(new_names)) - - # check - for name in new_names: - path = os.path.join(reference_folder, name) + '.xml' - assert os.path.exists(path), path - print("Check passed") - - with open('../data/images.json', 'w') as f: - json.dump(new_names, f) - - -if __name__ == '__main__': - update_prospr_names('../data/0.5.4/images') diff --git a/misc/check_n5_file.py b/misc/check_n5_file.py new file mode 100644 index 0000000..870d671 --- /dev/null +++ b/misc/check_n5_file.py @@ -0,0 +1,40 @@ +import os +import numpy as np +import z5py + + +def get_chunk_stats(path, key): + with z5py.File(path, 'r') as f: + ds = f[key] + n_chunks_tot = ds.number_of_chunks + + ds_path = os.path.join(path, key) + chunk_sizes = [] + for root, dirs, files in os.walk(ds_path): + for name in files: + if name == 'attributes.json': + continue + size = os.path.getsize(os.path.join(root, name)) + chunk_sizes.append(size) + + n_chunks_filled = len(chunk_sizes) + + return n_chunks_tot, n_chunks_filled, chunk_sizes + + +def summarise_chunk_stats(path, key): + n_chunks, n_filled, sizes = get_chunk_stats(path, key) + percent_filled = float(n_filled) / n_chunks + with z5py.File(path, 'r') as f: + ds = f[key] + chunk_shape = ds.chunks + print("Checked dataset with chunk shape", chunk_shape) + print("Number of existing chunks", n_filled, "/", n_chunks, "(", percent_filled, ")") + print("Mean chunk size in MB:", np.mean(sizes) / 1.e6, "+-", np.std(sizes) / 1.e6) + print("Min/max chunk size in MB:", np.min(sizes) / 1.e6, "/", np.max(sizes) / 1.e6) + + +if __name__ == '__main__': + p = '../data/0.6.5/images/local/sbem-6dpf-1-whole-segmented-cells.n5' + k = 'setup0/timepoint0/s0' + summarise_chunk_stats(p, k) -- GitLab