From e8b7221c45e2fc4e6928293a75eb907e67af1b13 Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@iwr.uni-heidelberg.de>
Date: Fri, 7 Feb 2020 16:35:22 +0100
Subject: [PATCH] Remove deprecated scripts

---
 deprecated/add_existing_data.py    |  94 -----------------------
 deprecated/join_tables.py          |  13 ----
 deprecated/legacy_export.py        |  47 ------------
 deprecated/make_data_sources.py    |  91 -----------------------
 deprecated/make_initial_version.py | 115 -----------------------------
 deprecated/swap_links.py           |  40 ----------
 deprecated/update_prospr_names.py  |  34 ---------
 misc/check_n5_file.py              |  40 ++++++++++
 8 files changed, 40 insertions(+), 434 deletions(-)
 delete mode 100644 deprecated/add_existing_data.py
 delete mode 100644 deprecated/join_tables.py
 delete mode 100644 deprecated/legacy_export.py
 delete mode 100644 deprecated/make_data_sources.py
 delete mode 100755 deprecated/make_initial_version.py
 delete mode 100644 deprecated/swap_links.py
 delete mode 100644 deprecated/update_prospr_names.py
 create mode 100644 misc/check_n5_file.py

diff --git a/deprecated/add_existing_data.py b/deprecated/add_existing_data.py
deleted file mode 100644
index aa466f5..0000000
--- a/deprecated/add_existing_data.py
+++ /dev/null
@@ -1,94 +0,0 @@
-#! /g/arendt/pape/miniconda3/envs/platybrowser/bin/python
-
-import os
-from glob import glob
-from scripts.files import add_source, add_image, add_segmentation
-
-
-def add_sources():
-    # add em source
-    add_source('sbem', '6dpf')
-    # add prospr source
-    add_source('prospr', '6dpf')
-
-
-def add_images():
-    base_folder = './data/0.2.1/images'
-
-    # add sbem raw data
-    sbem_prefix = 'sbem-6dpf-1-whole'
-    sbem_raw = './data/0.2.1/images/sbem-6dpf-1-whole-raw.xml'
-    name = 'raw'
-    add_image(sbem_prefix, name, sbem_raw, copy_data=False)
-
-    # add all prospr images
-    prospr_prefix = 'prospr-6dpf-1-whole'
-    prospr_ims = glob(os.path.join(base_folder, 'prospr-6dpf-1-whole-*'))
-    for impath in prospr_ims:
-        name = os.path.split(impath)[1]
-        name, ext = os.path.splitext(name)
-        if ext != '.xml':
-            continue
-        name = name[(len(prospr_prefix) + 1):]
-        add_image(prospr_prefix, name, impath, copy_data=False)
-
-
-def add_static_segmentations():
-    source = 'sbem-6dpf-1-whole'
-
-    # chromatin segmentation
-    chromatin_tables = {'default': './data/0.2.0/tables/sbem-6dpf-1-whole-segmented-chromatin-labels/default.csv'}
-    add_segmentation(source, 'segmented-chromatin-labels',
-                     segmentation_path='./data/rawdata/sbem-6dpf-1-whole-segmented-chromatin-labels.xml',
-                     table_path_dict=chromatin_tables, copy_data=False)
-
-    # tissue segmentation
-    tissue_tables = {'default': './data/0.1.0/tables/sbem-6dpf-1-whole-segmented-tissue-labels/base.csv'}
-    add_segmentation(source, 'segmented-tissue-labels',
-                     segmentation_path='./data/rawdata/sbem-6dpf-1-whole-segmented-tissue-labels.xml',
-                     table_path_dict=tissue_tables, copy_data=False)
-
-    # muscle segmentation
-    add_segmentation(source, 'segmented-muscle',
-                     segmentation_path='./data/rawdata/sbem-6dpf-1-whole-segmented-muscle.h5', copy_data=False)
-
-
-def add_dynamic_segmentations():
-    source = 'sbem-6dpf-1-whole'
-    paintera_root = '/g/kreshuk/data/arendt/platyneris_v1/data.n5'
-
-    # cell segmentation
-    add_segmentation(source, 'segmented-cells-labels',
-                     paintera_project=(paintera_root, 'volumes/paintera/proofread_cells'),
-                     resolution=[.025, .02, .02],
-                     table_update_function='make_cell_tables')
-
-    # nucleus segmentation
-    add_segmentation(source, 'segmented-nuclei-labels',
-                     paintera_project=(paintera_root, 'volumes/paintera/nuclei'),
-                     resolution=[.1, .08, .08],
-                     table_update_function='make_nucleus_tables')
-
-    # cilia segmentation
-    add_segmentation(source, 'segmented-cilia-labels',
-                     paintera_project=(paintera_root, 'volumes/paintera/cilia'),
-                     resolution=[.025, .01, .01],
-                     table_update_function='make_cilia_tables')
-
-
-def add_segmentations():
-    add_static_segmentations()
-    add_dynamic_segmentations()
-
-
-def add_existing_data():
-    """ Add existing data to the json files that keep track of
-        sources, image data and segmentations.
-    """
-    add_sources()
-    add_images()
-    add_segmentations()
-
-
-if __name__ == '__main__':
-    add_existing_data()
diff --git a/deprecated/join_tables.py b/deprecated/join_tables.py
deleted file mode 100644
index 8ce58f0..0000000
--- a/deprecated/join_tables.py
+++ /dev/null
@@ -1,13 +0,0 @@
-import pandas as pd
-
-def_path = '../data/0.5.1/tables/sbem-6dpf-1-whole-segmented-cilia-labels/default.csv.bkp'
-t1 = pd.read_csv(def_path, sep='\t')
-t2 = pd.read_csv('../data/0.5.1/tables/sbem-6dpf-1-whole-segmented-cilia-labels/cilia.csv', sep='\t')
-
-l1 = t1[['label_id']].values
-l2 = t2[['label_id']].values
-assert (l1 == l2).all()
-
-t = pd.concat([t1, t2['cell_id']], axis=1)
-def_path = '../data/0.5.1/tables/sbem-6dpf-1-whole-segmented-cilia-labels/default.csv'
-t.to_csv(def_path, index=False, sep='\t')
diff --git a/deprecated/legacy_export.py b/deprecated/legacy_export.py
deleted file mode 100644
index ce6fee9..0000000
--- a/deprecated/legacy_export.py
+++ /dev/null
@@ -1,47 +0,0 @@
-import os
-# from pathlib import Path
-from shutil import copyfile
-
-
-# TODO need to replace the name in xml to make this work out of the box
-def export_segmentation(folder, name, dest_folder, out_name):
-    seg_file = os.path.join(folder, 'segmentations', '%s.h5' % name)
-    xml_file = os.path.join(folder, 'segmentations', '%s.xml' % name)
-    table_file = os.path.join(folder, 'tables', name, 'default.csv')
-    assert os.path.exists(seg_file)
-    assert os.path.exists(xml_file)
-    assert os.path.exists(table_file), table_file
-
-    seg_out = os.path.join(dest_folder, '%s.h5' % out_name)
-    print("Copying segmentation from", seg_file, "to", seg_out)
-    copyfile(seg_file, seg_out)
-
-    xml_out = os.path.join(dest_folder, '%s.xml' % out_name)
-    print("Copying xml from", xml_file, "to", xml_out)
-    copyfile(xml_file, xml_out)
-
-    table_out = os.path.join(dest_folder, 'tables', '%s.csv' % out_name)
-    print("Copying table from", table_file, "to", table_out)
-    copyfile(table_file, table_out)
-
-
-def export_tag(tag, export_cells=False, export_cilia=False):
-    tag_folder = './data/%s' % tag
-    assert os.path.exists(tag_folder)
-
-    dest_folder = '/g/arendt/EM_6dpf_segmentation/EM-Prospr'
-
-    if export_cells:
-        name = 'sbem-6dpf-1-whole-segmented-cells-labels'
-        out_name = 'em-segmented-cells-new-labels'
-        export_segmentation(tag_folder, name, dest_folder, out_name)
-
-    if export_cilia:
-        name = 'sbem-6dpf-1-whole-segmented-cilia-labels'
-        out_name = 'em-segmented-cilia-labels'
-        export_segmentation(tag_folder, name, dest_folder, out_name)
-
-
-if __name__ == '__main__':
-    # export_tag('0.1.1', export_cells=True)
-    export_tag('0.1.0', export_cilia=True)
diff --git a/deprecated/make_data_sources.py b/deprecated/make_data_sources.py
deleted file mode 100644
index 096246b..0000000
--- a/deprecated/make_data_sources.py
+++ /dev/null
@@ -1,91 +0,0 @@
-#! /g/arendt/pape/miniconda3/envs/platybrowser/bin/python
-
-import os
-import glob
-from shutil import copyfile
-from scripts.files import get_h5_path_from_xml, copy_xml_with_newpath, write_simple_xml
-
-
-def copy_xmls_and_symlink_h5(name_dict, src_folder, trgt_folder):
-    for n1, n2 in name_dict.items():
-        src_xml = os.path.join(src_folder, n1)
-        trgt_xml = os.path.join(trgt_folder, n2)
-
-        # we make a softlink from src to target h5
-        # NOTE eventually, we want to copy all the data, but
-        # for now, we use softlinks in order to keep the current
-        # version of the platy browser working
-        src_h5 = get_h5_path_from_xml(src_xml, return_absolute_path=True)
-        trgt_h5 = os.path.splitext(n2)[0] + '.h5'
-        os.symlink(src_h5, os.path.join(trgt_folder, trgt_h5))
-
-        copy_xml_with_newpath(src_xml, trgt_xml, trgt_h5)
-
-
-def make_initial_data_sources(copy_sbem,
-                              copy_prospr,
-                              copy_fib,
-                              copy_regions):
-    old_folder = '/g/arendt/EM_6dpf_segmentation/EM-Prospr'
-    raw_folder = './data/rawdata'
-    os.makedirs(raw_folder, exist_ok=True)
-
-    # TODO
-    # copy cellular models
-    if copy_sbem:
-        print("Copy sbem data")
-        # copy the sbem data
-        sbem_prefix = 'sbem-6dpf-1-whole'
-        name_dict = {'em-raw-full-res.xml': 'raw.xml',
-                     'em-segmented-muscles-ariadne.xml': 'segmented-muscle.xml',
-                     'em-segmented-tissue-labels.xml': 'segmented-tissue-labels.xml'}
-        name_dict = {k: '%s-%s' % (sbem_prefix, v)
-                     for k, v in name_dict.items()}
-        copy_xmls_and_symlink_h5(name_dict, old_folder, raw_folder)
-
-    if copy_prospr:
-        print("Copy prospr data")
-        prospr_prefix = 'prospr-6dpf-1-whole'
-        # copy the prospr meds
-        prospr_names = glob.glob(os.path.join(old_folder, "*-MED*"))
-        prospr_names = [os.path.split(f)[1] for f in prospr_names]
-        prospr_names = [name for name in prospr_names if os.path.splitext(name)[1] == '.xml']
-        name_dict = {n: '%s-%s' % (prospr_prefix, n) for n in prospr_names}
-        copy_xmls_and_symlink_h5(name_dict, old_folder, raw_folder)
-
-        # copy valentyna's med file
-        input_path = '/g/kreshuk/zinchenk/cell_match/data/meds_all_genes_500nm.h5'
-        output_path = os.path.join(raw_folder, '%s_meds_all_genes.h5' % prospr_prefix)
-        xml_path = os.path.join(raw_folder, '%s_meds_all_genes.xml' % prospr_prefix)
-        write_simple_xml(xml_path, os.path.split(output_path)[1],
-                         path_type='relative')
-        copyfile(input_path, output_path)
-
-    if copy_fib:
-        print("Coby fibsem data")
-        # copy the fibsem data
-        fib_prefix = 'fibsem-6dpf-1-parapod'
-        name_dict = {'em-raw-': 'em-raw',
-                     'em-segmented-': ''}
-        name_dict = {k: '%s-%s' % (fib_prefix, v)
-                     for k, v in name_dict.items()}
-        copy_xmls_and_symlink_h5(name_dict, old_folder, raw_folder)
-
-    if copy_regions:
-
-        print("Copy regions")
-        prospr_prefix = 'prospr-6dpf-1-whole-segmented'
-        prospr_names = glob.glob(os.path.join(old_folder, "BodyPart_*"))
-        prospr_names = [os.path.split(f)[1] for f in prospr_names]
-        prospr_names = [name for name in prospr_names if os.path.splitext(name)[1] == '.xml']
-        name_dict = {n: '%s-%s' % (prospr_prefix, n.split('_')[-1]) for n in prospr_names}
-        copy_xmls_and_symlink_h5(name_dict, old_folder, raw_folder)
-
-
-if __name__ == '__main__':
-    copy_sbem = False
-    copy_prospr = False
-    copy_fib = False
-    copy_regions = True
-    make_initial_data_sources(copy_sbem, copy_prospr, copy_fib,
-                              copy_regions)
diff --git a/deprecated/make_initial_version.py b/deprecated/make_initial_version.py
deleted file mode 100755
index e22e1e5..0000000
--- a/deprecated/make_initial_version.py
+++ /dev/null
@@ -1,115 +0,0 @@
-#! /g/arendt/pape/miniconda3/envs/platybrowser/bin/python
-
-import os
-from shutil import copyfile
-from glob import glob
-
-import h5py
-from scripts.files import make_folder_structure
-from scripts.export import export_segmentation
-from scripts.files import make_bdv_server_file, copy_image_data, copy_misc_data
-from scripts.files import copy_segmentation
-from scripts.attributes import make_nucleus_tables, make_cell_tables
-from pybdv.converter import make_bdv
-
-
-def make_sbem_segmentations(old_folder, folder):
-    path = '/g/kreshuk/data/arendt/platyneris_v1/data.n5'
-
-    # export nucleus segemntation
-    tmp_nuclei = 'tmp_export_nuclei'
-    key_nuclei = 'volumes/paintera/nuclei'
-    nuclei_name = 'sbem-6dpf-1-whole-segmented-nuclei-labels'
-    res_nuclei = [.1, .08, .08]
-    export_segmentation(path, key_nuclei, old_folder, folder, nuclei_name, res_nuclei, tmp_nuclei)
-
-    # export cell segemntation
-    tmp_cells = 'tmp_export_cells'
-    key_cells = 'volumes/paintera/proofread_cells'
-    cells_name = 'sbem-6dpf-1-whole-segmented-cells-labels'
-    res_cells = [.025, .02, .02]
-    export_segmentation(path, key_cells, old_folder, folder, cells_name, res_cells, tmp_cells,
-                        target='local', max_jobs=8)
-
-
-def make_sbem_tables(folder):
-    # make cell segmentation tables
-    name_cells = 'sbem-6dpf-1-whole-segmented-cells-labels'
-    res_cells = [.025, .02, .02]
-    make_cell_tables(folder, name_cells, 'tmp_tables_cells',
-                     res_cells, target='local', max_jobs=32)
-
-    # make nucleus segmentation tables
-    name_nuclei = 'sbem-6dpf-1-whole-segmented-nuclei-labels'
-    res_nuclei = [.1, .08, .08]
-    make_nucleus_tables(folder, name_nuclei, 'tmp_tables_nuclei',
-                        res_nuclei, target='local', max_jobs=32)
-
-    old_folder = '/g/arendt/EM_6dpf_segmentation/EM-Prospr'
-    # copy tissue segmentation table
-    tissue_name_out = 'sbem-6dpf-1-whole-segmented-tissue-labels'
-    table_folder = os.path.join(folder, 'tables', tissue_name_out)
-    os.makedirs(table_folder, exist_ok=True)
-    tissue_table_in = os.path.join(old_folder, 'tables', 'em-segmented-tissue-labels.csv')
-    tissue_table_out = os.path.join(table_folder, 'default.csv')
-    copyfile(tissue_table_in, tissue_table_out)
-
-
-def make_prospr_region_segmentations():
-    in_prefix = '/g/arendt/EM_6dpf_segmentation/EM-Prospr/BodyPart_*.h5'
-    out_prefix = './data/rawdata/prospr-6dpf-1-whole-segmented-'
-    files = glob(in_prefix)
-    for p in files:
-        name = p.split('_')[-1][:-3]
-        o = out_prefix + name + '.h5'
-        print(p, "to", o)
-        with h5py.File(p) as f:
-            key = 't00000/s00/0/cells'
-            data = f[key][:]
-            data[data > 0] = 0
-            data[data < 0] = 255
-        make_bdv(data, o, 3 * [[2, 2, 2]],
-                 unit='micrometer', resolution=[0.5, 0.5, 0.5])
-
-
-def insert_chromatin():
-    src_folder = 'data/rawdata'
-    dst_folder = 'data/0.2.0'
-    name = 'sbem-6dpf-1-whole-segmented-chromatin-labels'
-    copy_segmentation(src_folder, dst_folder, name)
-    src_table = '/g/arendt/EM_6dpf_segmentation/EM-Prospr/tables/em-segmented-chromatin-labels.csv'
-    dst_table = os.path.join(dst_folder, 'tables', name)
-    os.makedirs(dst_table, exist_ok=True)
-    dst_table = os.path.join(dst_table, 'base.csv')
-    copyfile(src_table, dst_table)
-
-
-def make_initial_version():
-
-    src_folder = 'data/rawdata'
-    old_folder = '/g/arendt/EM_6dpf_segmentation/EM-Prospr'
-    tag = '0.0.0'
-    folder = os.path.join('data', tag)
-
-    make_folder_structure(folder)
-
-    # make xmls for all necessary image data
-    copy_image_data(src_folder, os.path.join(folder, 'images'))
-    copy_misc_data(src_folder, os.path.join(folder, 'misc'))
-
-    # export the initial sbem segmentations
-    make_sbem_segmentations(old_folder, folder)
-
-    # make the tables for sbem segmentations
-    make_sbem_tables(folder)
-
-    # make the bdv server file
-    make_bdv_server_file([os.path.join(folder, 'images'),
-                          os.path.join(folder, 'segmentations')],
-                         os.path.join(folder, 'misc', 'bdvserver.txt'))
-
-
-if __name__ == '__main__':
-    insert_chromatin()
-    # make_prospr_region_segmentations()
-    # make_initial_version()
diff --git a/deprecated/swap_links.py b/deprecated/swap_links.py
deleted file mode 100644
index 1cf5559..0000000
--- a/deprecated/swap_links.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import os
-from shutil import move
-
-
-def swap_link(link, dest):
-    root_folder = '/g/arendt/EM_6dpf_segmentation/EM-Prospr'
-    print(link, '->', dest)
-
-    # 1.) remove the link
-    os.unlink(link)
-
-    # 2.) move dest to link
-    move(dest, link)
-
-    # 3.) make link with relative path from dest -> link
-    rel_path = os.path.relpath(os.path.abspath(link),
-                               root_folder)
-    # print(dest)
-    # print(rel_path)
-    os.symlink(rel_path, dest)
-
-
-def swap_links():
-    link_folder = './data/rawdata'
-    cwd = os.getcwd()
-    os.chdir(link_folder)
-    files = os.listdir('.')
-
-    for path in files:
-        is_link = os.path.islink(path)
-        if is_link:
-            dest = os.readlink(path)
-            assert os.path.exists(dest) and os.path.isfile(dest)
-            swap_link(path, dest)
-
-    os.chdir(cwd)
-
-
-if __name__ == '__main__':
-    swap_links()
diff --git a/deprecated/update_prospr_names.py b/deprecated/update_prospr_names.py
deleted file mode 100644
index 9290a93..0000000
--- a/deprecated/update_prospr_names.py
+++ /dev/null
@@ -1,34 +0,0 @@
-import os
-import json
-from glob import glob
-
-
-def update_prospr_names(reference_folder):
-    with open('../data/images.json') as f:
-        im_names = json.load(f)
-    prefix = 'prospr-6dpf-1-whole'
-    prospr_names = [name for name in im_names if name.startswith(prefix)]
-    print(len(prospr_names))
-
-    new_prospr_names = glob(os.path.join(reference_folder, '%s-*.xml' % prefix))
-    print(len(new_prospr_names))
-    new_prospr_names = [os.path.splitext(name)[0] for name in new_prospr_names]
-    new_prospr_names = [os.path.split(name)[1] for name in new_prospr_names]
-
-    new_names = [name for name in im_names if not name.startswith(prefix)]
-    print(len(new_names))
-    new_names += new_prospr_names
-    print(len(new_names))
-
-    # check
-    for name in new_names:
-        path = os.path.join(reference_folder, name) + '.xml'
-        assert os.path.exists(path), path
-    print("Check passed")
-
-    with open('../data/images.json', 'w') as f:
-        json.dump(new_names, f)
-
-
-if __name__ == '__main__':
-    update_prospr_names('../data/0.5.4/images')
diff --git a/misc/check_n5_file.py b/misc/check_n5_file.py
new file mode 100644
index 0000000..870d671
--- /dev/null
+++ b/misc/check_n5_file.py
@@ -0,0 +1,40 @@
+import os
+import numpy as np
+import z5py
+
+
+def get_chunk_stats(path, key):
+    with z5py.File(path, 'r') as f:
+        ds = f[key]
+        n_chunks_tot = ds.number_of_chunks
+
+    ds_path = os.path.join(path, key)
+    chunk_sizes = []
+    for root, dirs, files in os.walk(ds_path):
+        for name in files:
+            if name == 'attributes.json':
+                continue
+            size = os.path.getsize(os.path.join(root, name))
+            chunk_sizes.append(size)
+
+    n_chunks_filled = len(chunk_sizes)
+
+    return n_chunks_tot, n_chunks_filled, chunk_sizes
+
+
+def summarise_chunk_stats(path, key):
+    n_chunks, n_filled, sizes = get_chunk_stats(path, key)
+    percent_filled = float(n_filled) / n_chunks
+    with z5py.File(path, 'r') as f:
+        ds = f[key]
+        chunk_shape = ds.chunks
+    print("Checked dataset with chunk shape", chunk_shape)
+    print("Number of existing chunks", n_filled, "/", n_chunks, "(", percent_filled, ")")
+    print("Mean chunk size in MB:", np.mean(sizes) / 1.e6, "+-", np.std(sizes) / 1.e6)
+    print("Min/max chunk size in MB:", np.min(sizes) / 1.e6, "/", np.max(sizes) / 1.e6)
+
+
+if __name__ == '__main__':
+    p = '../data/0.6.5/images/local/sbem-6dpf-1-whole-segmented-cells.n5'
+    k = 'setup0/timepoint0/s0'
+    summarise_chunk_stats(p, k)
-- 
GitLab