Implement simple table propagation and update cilia attributes

a49fb421 · Constantin Pape · d4bacc74 · a49fb421 · a49fb421 · a49fb421
Commit a49fb421 authored 5 years ago by Constantin Pape
--- a/data/segmentations.json
+++ b/data/segmentations.json
-{"sbem-6dpf-1-whole-segmented-chromatin-labels": {"is_static": true, "has_tables": true}, "sbem-6dpf-1-whole-segmented-tissue-labels": {"is_static": true, "has_tables": true}, "sbem-6dpf-1-whole-segmented-muscle": {"is_static": true, "has_tables": false}, "sbem-6dpf-1-whole-segmented-cells-labels": {"is_static": false, "paintera_project": ["/g/kreshuk/data/arendt/platyneris_v1/data.n5", "volumes/paintera/proofread_cells"], "resolution": [0.025, 0.02, 0.02], "table_update_function": "make_cell_tables"}, "sbem-6dpf-1-whole-segmented-nuclei-labels": {"is_static": false, "paintera_project": ["/g/kreshuk/data/arendt/platyneris_v1/data.n5", "volumes/paintera/nuclei"], "resolution": [0.1, 0.08, 0.08], "table_update_function": "make_nucleus_tables"}, "sbem-6dpf-1-whole-segmented-cilia-labels": {"is_static": false, "paintera_project": ["/g/kreshuk/data/arendt/platyneris_v1/data.n5", "volumes/paintera/cilia"], "resolution": [0.025, 0.01, 0.01], "table_update_function": "make_cilia_tables"}, "sbem-6dpf-1-whole-segmented-ariande-neuropil": {"is_static": true, "has_tables": false}, "sbem-6dpf-1-whole-segmented-cats-neuropil": {"is_static": true, "has_tables": false}, "sbem-6dpf-1-whole-traces-labels": {"is_static": true, "has_tables": true}}
\ No newline at end of file
+{"sbem-6dpf-1-whole-segmented-chromatin-labels": {"is_static": true, "has_tables": true}, "sbem-6dpf-1-whole-segmented-tissue-labels": {"is_static": true, "has_tables": true}, "sbem-6dpf-1-whole-segmented-muscle": {"is_static": true, "has_tables": false}, "sbem-6dpf-1-whole-segmented-cells-labels": {"is_static": false, "paintera_project": ["/g/kreshuk/data/arendt/platyneris_v1/data.n5", "volumes/paintera/proofread_cells"], "resolution": [0.025, 0.02, 0.02], "table_update_function": "make_cell_tables"}, "sbem-6dpf-1-whole-segmented-nuclei-labels": {"is_static": false, "paintera_project": ["/g/kreshuk/data/arendt/platyneris_v1/data.n5", "volumes/paintera/nuclei"], "resolution": [0.1, 0.08, 0.08], "table_update_function": "make_nucleus_tables"}, "sbem-6dpf-1-whole-segmented-cilia-labels": {"is_static": false, "paintera_project": ["/g/kreshuk/data/arendt/platyneris_v1/data.n5", "volumes/paintera/cilia_label_multiset"], "resolution": [0.025, 0.01, 0.01], "table_update_function": "make_cilia_tables"}, "sbem-6dpf-1-whole-segmented-ariande-neuropil": {"is_static": true, "has_tables": false}, "sbem-6dpf-1-whole-segmented-cats-neuropil": {"is_static": true, "has_tables": false}, "sbem-6dpf-1-whole-traces-labels": {"is_static": true, "has_tables": true}}
--- a/scripts/attributes/base_attributes.py
+++ b/scripts/attributes/base_attributes.py
@@ -3,6 +3,7 @@ import json
 import h5py
 import z5py
 import numpy as np
+import pandas as pd

 import luigi
 from cluster_tools.morphology import MorphologyWorkflow
@@ -157,3 +158,33 @@ def base_attributes(input_path, input_key, output_path, resolution,
    with z5py.File(tmp_path, 'r') as f:
        label_ids = f[tmp_key][:, 0]
    return label_ids
+
+
+# TODO this is un-tested !!!
+def propagate_attributes(id_mapping_path, old_table_path, output_path):
+    """ Propagate all attributes to new ids. (column label id)
+    """
+    # if the output already exists, we assume that the propagation
+    # was already done and we just continue
+    if os.path.exists(output_path):
+        return
+
+    with open(id_mapping_path, 'r') as f:
+        id_mapping = json.load(f)
+    id_mapping = {int(k): v for k, v in id_mapping.items()}
+
+    n_new_ids = max(id_mapping.keys()) + 1
+
+    table = pd.read_csv(old_table_path, sep='\t')
+    col_names = table.columns.values
+    table = table.values
+
+    out_table = np.zeros((n_new_ids, table.shape[1]))
+
+    # can be vectorized
+    for new_id, old_id in id_mapping.items():
+        out_table[new_id, 0] = new_id
+        out_table[new_id, 1:] = table[old_id, 1:]
+
+    out_table = pd.DataFrame(out_table, name=col_names)
+    out_table.to_csv(output_path, index=False, sep='\t')
--- a/scripts/attributes/cilia_attributes.py
+++ b/scripts/attributes/cilia_attributes.py
@@ -69,20 +69,24 @@ def compute_centerline(obj, resolution):
    return coordinates, max_plen


-def get_bb(base_table, cid, resolution):
+def get_bb(base_table, cid, resolution, shape):
+    halo = (2, 2, 2)
    # get the row for this cilia id
    row = base_table.loc[cid]
    # compute the bounding box
-    bb_min = (row.bb_min_z, row.bb_min_y, row.bb_min_x)
-    bb_max = (row.bb_max_z, row.bb_max_y, row.bb_max_x)
-    bb = tuple(slice(int(mi / re), int(ma / re))
-               for mi, ma, re in zip(bb_min, bb_max, resolution))
+    bb_min = [row.bb_min_z, row.bb_min_y, row.bb_min_x]
+    bb_max = [row.bb_max_z, row.bb_max_y, row.bb_max_x]
+    bb_min = [int(mi / re) for mi, re in zip(bb_min, resolution)]
+    bb_max = [int(ma / re) for ma, re in zip(bb_max, resolution)]
+    bb = tuple(slice(max(mi - ha, 0),
+                     min(ma + ha, sh))
+               for mi, ma, sh, ha in zip(bb_min, bb_max, shape, halo))
    return bb


 def load_seg(ds, base_table, cid, resolution):
    # load segmentation from the bounding box and get foreground
-    bb = get_bb(base_table, cid, resolution)
+    bb = get_bb(base_table, cid, resolution, ds.shape)
    obj = ds[bb] == cid
    return obj

@@ -137,28 +141,24 @@ def measure_cilia_attributes(seg_path, seg_key, base_table, resolution):
    return attributes, names


-# TODO the cell id mapping table should be separate
-# TODO wrap this into a luigi task so we don't recompute it every time
-def cilia_attributes(seg_path, seg_key,
-                     base_table_path, manual_mapping_table_path, table_out_path,
+# TODO results are not trust-worthy yet
+# TODO wrap this into a cluster task so we don't recompute it every time
+# and can be scheduled on slurm
+def cilia_morphology(seg_path, seg_key,
+                     base_table_path, out_path,
                     resolution, tmp_folder, target, max_jobs):

    # read the base table
    base_table = pd.read_csv(base_table_path, sep='\t')
    cilia_ids = base_table['label_id'].values.astype('uint64')

-    # add the manually mapped cell ids
-    # cell_ids = get_mapped_cell_ids(cilia_ids, manual_mapping_table_path)
-    # FIXME
-    cell_ids = np.zeros_like(cilia_ids)
-    assert len(cell_ids) == len(cilia_ids)
-
    # measure cilia specific attributes: length, diameter, ? (could try curvature)
+    print("Start to compute cilia morphology ...")
    attributes, names = measure_cilia_attributes(seg_path, seg_key, base_table, resolution)
    assert len(attributes) == len(cilia_ids)
    assert attributes.shape[1] == len(names)

-    table = np.concatenate([cilia_ids[:, None], cell_ids[:, None], attributes], axis=1)
-    col_names = ['label_id', 'cell_id'] + names
+    table = np.concatenate([cilia_ids[:, None], attributes], axis=1)
+    col_names = ['label_id'] + names
    table = pd.DataFrame(table, columns=col_names)
-    table.to_csv(table_out_path, index=False, sep='\t')
+    table.to_csv(out_path, index=False, sep='\t')
--- a/scripts/attributes/master.py
+++ b/scripts/attributes/master.py
 import os
 import h5py

-from .base_attributes import base_attributes
+from .base_attributes import base_attributes, propagate_attributes
 from .cell_nucleus_mapping import map_cells_to_nuclei
 from .genes import write_genes_table
 from .morphology import write_morphology_cells, write_morphology_nuclei
 from .region_attributes import region_attributes
-from .cilia_attributes import cilia_attributes
+from .cilia_attributes import cilia_morphology
 from ..files.xml_utils import get_h5_path_from_xml


@@ -19,7 +19,7 @@ def get_seg_path(folder, name, key):
    return path


-def make_cell_tables(folder, name, tmp_folder, resolution,
+def make_cell_tables(old_folder, folder, name, tmp_folder, resolution,
                     target='slurm', max_jobs=100):
    # make the table folder
    table_folder = os.path.join(folder, 'tables', name)
@@ -67,7 +67,7 @@ def make_cell_tables(folder, name, tmp_folder, resolution,
                      label_ids, tmp_folder, target, max_jobs)


-def make_nucleus_tables(folder, name, tmp_folder, resolution,
+def make_nucleus_tables(old_folder, folder, name, tmp_folder, resolution,
                        target='slurm', max_jobs=100):
    # make the table folder
    table_folder = os.path.join(folder, 'tables', name)
@@ -91,11 +91,8 @@ def make_nucleus_tables(folder, name, tmp_folder, resolution,
                            n_labels, resolution, tmp_folder,
                            target, max_jobs)

-    # TODO additional tables:
-    # ???

-
-def make_cilia_tables(folder, name, tmp_folder, resolution,
+def make_cilia_tables(old_folder, folder, name, tmp_folder, resolution,
                      target='slurm', max_jobs=100):
    # make the table folder
    table_folder = os.path.join(folder, 'tables', name)
@@ -110,22 +107,13 @@ def make_cilia_tables(folder, name, tmp_folder, resolution,
                    tmp_folder, target=target, max_jobs=max_jobs,
                    correct_anchors=True)

-    # NOTE this is preliminary.
-    # In the end, we wan't this table to just live in the platy-browser data.
-    # Right now, something with the mapping to cell ids is off - I think the ids come
-    # from two different versions of the segmentation.
-    # We will keep this for now, but it needs another round of corrections.
-    # But this should be done in the platy-browser directly; need to wait for this
-    # until Tischi is back.
-    # Then, we will always need to update the cell id mapping table when the
-    # segmentation changes as well.
-    manual_mapping_table = os.path.join(folder, 'misc', 'cilia_id_mapping.csv')
-    assert os.path.exists(manual_mapping_table)
-
-    # compute cilia specific attributes at lower resolution ?
-
-    # add cilia specific attributes (length, diameter) and manual cell mapping done by rachel
-    cilia_out = os.path.join(table_folder, 'cilia.csv')
-    cilia_attributes(seg_path, seg_key,
-                     base_out, manual_mapping_table, cilia_out,
-                     resolution, tmp_folder, target=target, max_jobs=max_jobs)
+    # TODO when we change the cell segmentation, we also need to update this!
+    propagate_attributes(os.path.join(folder, 'misc', 'new_id_lut_sbem-6dpf-1-whole-segmented-cilia-labels.json'),
+                         os.path.join(old_folder, 'tables', name, 'cell_id_mapping.csv'),
+                         os.path.join(table_folder, 'cell_id_mapping.csv'))
+
+    # add cilia specific attributes (length, diameter)
+    morpho_out = os.path.join(table_folder, 'morphology.csv')
+    cilia_morphology(seg_path, seg_key,
+                     base_out, morpho_out, resolution,
+                     tmp_folder, target=target, max_jobs=max_jobs)
--- a/segmentation/cilia.py
+++ b/segmentation/cilia.py
+import json
+import numpy as np
+import pandas as pd
+
+
+def map_cell_ids():
+    new_cil_ids = '../data/0.5.3/misc/new_id_lut_sbem-6dpf-1-whole-segmented-cilia-labels.json'
+    with open(new_cil_ids) as f:
+        new_cil_ids = json.load(f)
+    new_cil_ids = {int(k): v for k, v in new_cil_ids.items()}
+
+    old_cell_mapping = '../data/0.5.2/misc/cilia_id_mapping.csv'
+    old_cell_mapping = pd.read_csv(old_cell_mapping, sep='\t')
+    names = old_cell_mapping.columns.values
+    old_cell_mapping = old_cell_mapping.values
+    old_cell_mapping = dict(zip(old_cell_mapping[:, 0], old_cell_mapping[:, 1]))
+
+    n_new_cilia = max(new_cil_ids.keys()) + 1
+    new_cell_mapping = np.zeros((n_new_cilia, 2), dtype='uint32')
+
+    for new_cil_id, old_cil_id in new_cil_ids.items():
+        new_cell_mapping[new_cil_id, 0] = new_cil_id
+        cell_id = old_cell_mapping.get(old_cil_id, 0)
+        new_cell_mapping[new_cil_id, 1] = cell_id
+        if cell_id != 0:
+            print(new_cil_id, old_cil_id, cell_id)
+
+    new_cell_mapping = pd.DataFrame(new_cell_mapping, columns=names)
+    out = '../data/0.5.3/tables/sbem-6dpf-1-whole-segmented-cilia-labels/cell_id_mapping.csv'
+    new_cell_mapping.to_csv(out, sep='\t', index=False)
+
+
+if __name__ == '__main__':
+    map_cell_ids()
--- a/update_patch.py
+++ b/update_patch.py
@@ -51,7 +51,7 @@ def update_table(name, seg_dict, folder, new_folder,
                 target, max_jobs):
    tmp_folder = 'tmp_tables_%s' % name
    update_function = getattr(scripts.attributes, seg_dict['table_update_function'])
-    update_function(new_folder, name, tmp_folder, seg_dict['resolution'],
+    update_function(folder, new_folder, name, tmp_folder, seg_dict['resolution'],
                    target=target, max_jobs=max_jobs)