From a49fb42143e97f5b8bb6875247c4a7192ae87b51 Mon Sep 17 00:00:00 2001
From: Constantin Pape <>
Date: Fri, 20 Sep 2019 14:54:13 +0200
Subject: [PATCH] Implement simple table propagation and update cilia

 data/segmentations.json                |  2 +-
 scripts/attributes/  | 31 +++++++++++++++++++
 scripts/attributes/ | 38 +++++++++++------------
 scripts/attributes/           | 42 +++++++++-----------------
 segmentation/                  | 34 +++++++++++++++++++++                        |  2 +-
 6 files changed, 101 insertions(+), 48 deletions(-)
 create mode 100644 segmentation/

diff --git a/data/segmentations.json b/data/segmentations.json
index 8944bb5..c7219ca 100644
--- a/data/segmentations.json
+++ b/data/segmentations.json
@@ -1 +1 @@
-{"sbem-6dpf-1-whole-segmented-chromatin-labels": {"is_static": true, "has_tables": true}, "sbem-6dpf-1-whole-segmented-tissue-labels": {"is_static": true, "has_tables": true}, "sbem-6dpf-1-whole-segmented-muscle": {"is_static": true, "has_tables": false}, "sbem-6dpf-1-whole-segmented-cells-labels": {"is_static": false, "paintera_project": ["/g/kreshuk/data/arendt/platyneris_v1/data.n5", "volumes/paintera/proofread_cells"], "resolution": [0.025, 0.02, 0.02], "table_update_function": "make_cell_tables"}, "sbem-6dpf-1-whole-segmented-nuclei-labels": {"is_static": false, "paintera_project": ["/g/kreshuk/data/arendt/platyneris_v1/data.n5", "volumes/paintera/nuclei"], "resolution": [0.1, 0.08, 0.08], "table_update_function": "make_nucleus_tables"}, "sbem-6dpf-1-whole-segmented-cilia-labels": {"is_static": false, "paintera_project": ["/g/kreshuk/data/arendt/platyneris_v1/data.n5", "volumes/paintera/cilia"], "resolution": [0.025, 0.01, 0.01], "table_update_function": "make_cilia_tables"}, "sbem-6dpf-1-whole-segmented-ariande-neuropil": {"is_static": true, "has_tables": false}, "sbem-6dpf-1-whole-segmented-cats-neuropil": {"is_static": true, "has_tables": false}, "sbem-6dpf-1-whole-traces-labels": {"is_static": true, "has_tables": true}}
\ No newline at end of file
+{"sbem-6dpf-1-whole-segmented-chromatin-labels": {"is_static": true, "has_tables": true}, "sbem-6dpf-1-whole-segmented-tissue-labels": {"is_static": true, "has_tables": true}, "sbem-6dpf-1-whole-segmented-muscle": {"is_static": true, "has_tables": false}, "sbem-6dpf-1-whole-segmented-cells-labels": {"is_static": false, "paintera_project": ["/g/kreshuk/data/arendt/platyneris_v1/data.n5", "volumes/paintera/proofread_cells"], "resolution": [0.025, 0.02, 0.02], "table_update_function": "make_cell_tables"}, "sbem-6dpf-1-whole-segmented-nuclei-labels": {"is_static": false, "paintera_project": ["/g/kreshuk/data/arendt/platyneris_v1/data.n5", "volumes/paintera/nuclei"], "resolution": [0.1, 0.08, 0.08], "table_update_function": "make_nucleus_tables"}, "sbem-6dpf-1-whole-segmented-cilia-labels": {"is_static": false, "paintera_project": ["/g/kreshuk/data/arendt/platyneris_v1/data.n5", "volumes/paintera/cilia_label_multiset"], "resolution": [0.025, 0.01, 0.01], "table_update_function": "make_cilia_tables"}, "sbem-6dpf-1-whole-segmented-ariande-neuropil": {"is_static": true, "has_tables": false}, "sbem-6dpf-1-whole-segmented-cats-neuropil": {"is_static": true, "has_tables": false}, "sbem-6dpf-1-whole-traces-labels": {"is_static": true, "has_tables": true}}
diff --git a/scripts/attributes/ b/scripts/attributes/
index 9c1aba3..fad0384 100644
--- a/scripts/attributes/
+++ b/scripts/attributes/
@@ -3,6 +3,7 @@ import json
 import h5py
 import z5py
 import numpy as np
+import pandas as pd
 import luigi
 from cluster_tools.morphology import MorphologyWorkflow
@@ -157,3 +158,33 @@ def base_attributes(input_path, input_key, output_path, resolution,
     with z5py.File(tmp_path, 'r') as f:
         label_ids = f[tmp_key][:, 0]
     return label_ids
+# TODO this is un-tested !!!
+def propagate_attributes(id_mapping_path, old_table_path, output_path):
+    """ Propagate all attributes to new ids. (column label id)
+    """
+    # if the output already exists, we assume that the propagation
+    # was already done and we just continue
+    if os.path.exists(output_path):
+        return
+    with open(id_mapping_path, 'r') as f:
+        id_mapping = json.load(f)
+    id_mapping = {int(k): v for k, v in id_mapping.items()}
+    n_new_ids = max(id_mapping.keys()) + 1
+    table = pd.read_csv(old_table_path, sep='\t')
+    col_names = table.columns.values
+    table = table.values
+    out_table = np.zeros((n_new_ids, table.shape[1]))
+    # can be vectorized
+    for new_id, old_id in id_mapping.items():
+        out_table[new_id, 0] = new_id
+        out_table[new_id, 1:] = table[old_id, 1:]
+    out_table = pd.DataFrame(out_table, name=col_names)
+    out_table.to_csv(output_path, index=False, sep='\t')
diff --git a/scripts/attributes/ b/scripts/attributes/
index 6d2eb52..0d137eb 100644
--- a/scripts/attributes/
+++ b/scripts/attributes/
@@ -69,20 +69,24 @@ def compute_centerline(obj, resolution):
     return coordinates, max_plen
-def get_bb(base_table, cid, resolution):
+def get_bb(base_table, cid, resolution, shape):
+    halo = (2, 2, 2)
     # get the row for this cilia id
     row = base_table.loc[cid]
     # compute the bounding box
-    bb_min = (row.bb_min_z, row.bb_min_y, row.bb_min_x)
-    bb_max = (row.bb_max_z, row.bb_max_y, row.bb_max_x)
-    bb = tuple(slice(int(mi / re), int(ma / re))
-               for mi, ma, re in zip(bb_min, bb_max, resolution))
+    bb_min = [row.bb_min_z, row.bb_min_y, row.bb_min_x]
+    bb_max = [row.bb_max_z, row.bb_max_y, row.bb_max_x]
+    bb_min = [int(mi / re) for mi, re in zip(bb_min, resolution)]
+    bb_max = [int(ma / re) for ma, re in zip(bb_max, resolution)]
+    bb = tuple(slice(max(mi - ha, 0),
+                     min(ma + ha, sh))
+               for mi, ma, sh, ha in zip(bb_min, bb_max, shape, halo))
     return bb
 def load_seg(ds, base_table, cid, resolution):
     # load segmentation from the bounding box and get foreground
-    bb = get_bb(base_table, cid, resolution)
+    bb = get_bb(base_table, cid, resolution, ds.shape)
     obj = ds[bb] == cid
     return obj
@@ -137,28 +141,24 @@ def measure_cilia_attributes(seg_path, seg_key, base_table, resolution):
     return attributes, names
-# TODO the cell id mapping table should be separate
-# TODO wrap this into a luigi task so we don't recompute it every time
-def cilia_attributes(seg_path, seg_key,
-                     base_table_path, manual_mapping_table_path, table_out_path,
+# TODO results are not trust-worthy yet
+# TODO wrap this into a cluster task so we don't recompute it every time
+# and can be scheduled on slurm
+def cilia_morphology(seg_path, seg_key,
+                     base_table_path, out_path,
                      resolution, tmp_folder, target, max_jobs):
     # read the base table
     base_table = pd.read_csv(base_table_path, sep='\t')
     cilia_ids = base_table['label_id'].values.astype('uint64')
-    # add the manually mapped cell ids
-    # cell_ids = get_mapped_cell_ids(cilia_ids, manual_mapping_table_path)
-    # FIXME
-    cell_ids = np.zeros_like(cilia_ids)
-    assert len(cell_ids) == len(cilia_ids)
     # measure cilia specific attributes: length, diameter, ? (could try curvature)
+    print("Start to compute cilia morphology ...")
     attributes, names = measure_cilia_attributes(seg_path, seg_key, base_table, resolution)
     assert len(attributes) == len(cilia_ids)
     assert attributes.shape[1] == len(names)
-    table = np.concatenate([cilia_ids[:, None], cell_ids[:, None], attributes], axis=1)
-    col_names = ['label_id', 'cell_id'] + names
+    table = np.concatenate([cilia_ids[:, None], attributes], axis=1)
+    col_names = ['label_id'] + names
     table = pd.DataFrame(table, columns=col_names)
-    table.to_csv(table_out_path, index=False, sep='\t')
+    table.to_csv(out_path, index=False, sep='\t')
diff --git a/scripts/attributes/ b/scripts/attributes/
index e717720..105404f 100644
--- a/scripts/attributes/
+++ b/scripts/attributes/
@@ -1,12 +1,12 @@
 import os
 import h5py
-from .base_attributes import base_attributes
+from .base_attributes import base_attributes, propagate_attributes
 from .cell_nucleus_mapping import map_cells_to_nuclei
 from .genes import write_genes_table
 from .morphology import write_morphology_cells, write_morphology_nuclei
 from .region_attributes import region_attributes
-from .cilia_attributes import cilia_attributes
+from .cilia_attributes import cilia_morphology
 from ..files.xml_utils import get_h5_path_from_xml
@@ -19,7 +19,7 @@ def get_seg_path(folder, name, key):
     return path
-def make_cell_tables(folder, name, tmp_folder, resolution,
+def make_cell_tables(old_folder, folder, name, tmp_folder, resolution,
                      target='slurm', max_jobs=100):
     # make the table folder
     table_folder = os.path.join(folder, 'tables', name)
@@ -67,7 +67,7 @@ def make_cell_tables(folder, name, tmp_folder, resolution,
                       label_ids, tmp_folder, target, max_jobs)
-def make_nucleus_tables(folder, name, tmp_folder, resolution,
+def make_nucleus_tables(old_folder, folder, name, tmp_folder, resolution,
                         target='slurm', max_jobs=100):
     # make the table folder
     table_folder = os.path.join(folder, 'tables', name)
@@ -91,11 +91,8 @@ def make_nucleus_tables(folder, name, tmp_folder, resolution,
                             n_labels, resolution, tmp_folder,
                             target, max_jobs)
-    # TODO additional tables:
-    # ???
-def make_cilia_tables(folder, name, tmp_folder, resolution,
+def make_cilia_tables(old_folder, folder, name, tmp_folder, resolution,
                       target='slurm', max_jobs=100):
     # make the table folder
     table_folder = os.path.join(folder, 'tables', name)
@@ -110,22 +107,13 @@ def make_cilia_tables(folder, name, tmp_folder, resolution,
                     tmp_folder, target=target, max_jobs=max_jobs,
-    # NOTE this is preliminary.
-    # In the end, we wan't this table to just live in the platy-browser data.
-    # Right now, something with the mapping to cell ids is off - I think the ids come
-    # from two different versions of the segmentation.
-    # We will keep this for now, but it needs another round of corrections.
-    # But this should be done in the platy-browser directly; need to wait for this
-    # until Tischi is back.
-    # Then, we will always need to update the cell id mapping table when the
-    # segmentation changes as well.
-    manual_mapping_table = os.path.join(folder, 'misc', 'cilia_id_mapping.csv')
-    assert os.path.exists(manual_mapping_table)
-    # compute cilia specific attributes at lower resolution ?
-    # add cilia specific attributes (length, diameter) and manual cell mapping done by rachel
-    cilia_out = os.path.join(table_folder, 'cilia.csv')
-    cilia_attributes(seg_path, seg_key,
-                     base_out, manual_mapping_table, cilia_out,
-                     resolution, tmp_folder, target=target, max_jobs=max_jobs)
+    # TODO when we change the cell segmentation, we also need to update this!
+    propagate_attributes(os.path.join(folder, 'misc', 'new_id_lut_sbem-6dpf-1-whole-segmented-cilia-labels.json'),
+                         os.path.join(old_folder, 'tables', name, 'cell_id_mapping.csv'),
+                         os.path.join(table_folder, 'cell_id_mapping.csv'))
+    # add cilia specific attributes (length, diameter)
+    morpho_out = os.path.join(table_folder, 'morphology.csv')
+    cilia_morphology(seg_path, seg_key,
+                     base_out, morpho_out, resolution,
+                     tmp_folder, target=target, max_jobs=max_jobs)
diff --git a/segmentation/ b/segmentation/
new file mode 100644
index 0000000..7e624c0
--- /dev/null
+++ b/segmentation/
@@ -0,0 +1,34 @@
+import json
+import numpy as np
+import pandas as pd
+def map_cell_ids():
+    new_cil_ids = '../data/0.5.3/misc/new_id_lut_sbem-6dpf-1-whole-segmented-cilia-labels.json'
+    with open(new_cil_ids) as f:
+        new_cil_ids = json.load(f)
+    new_cil_ids = {int(k): v for k, v in new_cil_ids.items()}
+    old_cell_mapping = '../data/0.5.2/misc/cilia_id_mapping.csv'
+    old_cell_mapping = pd.read_csv(old_cell_mapping, sep='\t')
+    names = old_cell_mapping.columns.values
+    old_cell_mapping = old_cell_mapping.values
+    old_cell_mapping = dict(zip(old_cell_mapping[:, 0], old_cell_mapping[:, 1]))
+    n_new_cilia = max(new_cil_ids.keys()) + 1
+    new_cell_mapping = np.zeros((n_new_cilia, 2), dtype='uint32')
+    for new_cil_id, old_cil_id in new_cil_ids.items():
+        new_cell_mapping[new_cil_id, 0] = new_cil_id
+        cell_id = old_cell_mapping.get(old_cil_id, 0)
+        new_cell_mapping[new_cil_id, 1] = cell_id
+        if cell_id != 0:
+            print(new_cil_id, old_cil_id, cell_id)
+    new_cell_mapping = pd.DataFrame(new_cell_mapping, columns=names)
+    out = '../data/0.5.3/tables/sbem-6dpf-1-whole-segmented-cilia-labels/cell_id_mapping.csv'
+    new_cell_mapping.to_csv(out, sep='\t', index=False)
+if __name__ == '__main__':
+    map_cell_ids()
diff --git a/ b/
index e63b4df..337d7a2 100755
--- a/
+++ b/
@@ -51,7 +51,7 @@ def update_table(name, seg_dict, folder, new_folder,
                  target, max_jobs):
     tmp_folder = 'tmp_tables_%s' % name
     update_function = getattr(scripts.attributes, seg_dict['table_update_function'])
-    update_function(new_folder, name, tmp_folder, seg_dict['resolution'],
+    update_function(folder, new_folder, name, tmp_folder, seg_dict['resolution'],
                     target=target, max_jobs=max_jobs)