Add script for object mapping tables

80c1e89b · Constantin Pape · 2d559a1f · 80c1e89b · 80c1e89b · 80c1e89b
Commit 80c1e89b authored 5 years ago by Constantin Pape
--- a/scripts/attributes/base_attributes.py
+++ b/scripts/attributes/base_attributes.py
@@ -2,7 +2,6 @@
 # TODO new platy-browser env

 import os
-import csv
 import json
 import z5py
 import numpy as np
@@ -10,6 +9,7 @@ import numpy as np
 import luigi
 from cluster_tools.morphology import MorphologyWorkflow
 from cluster_tools.morphology import CorrectAnchorsWorkflow
+from .util import write_csv


 def make_config(tmp_folder):
@@ -71,7 +71,7 @@ def to_csv(input_path, input_key, output_path, resolution):
    label_ids = attributes[:, 0:1]

    # the colomn names
-    col_names = ['label_ids',
+    col_names = ['label_id',
                 'anchor_x', 'anchor_y', 'anchor_z',
                 'bb_min_x', 'bb_min_y', 'bb_min_z',
                 'bb_max_x', 'bb_max_y', 'bb_max_z',
@@ -98,13 +98,7 @@ def to_csv(input_path, input_key, output_path, resolution):
    # NOTE: attributes[1] = size in pixel
    # make the output attributes
    data = np.concatenate([label_ids, com, minc, maxc, attributes[:, 1:2]], axis=1)
-    assert data.shape[1] == len(col_names)
-
-    # write to csv
-    with open(output_path, 'w', newline='') as f:
-        writer = csv.writer(f, delimiter='\t')
-        writer.writerow(col_names)
-        writer.writerows(data)
+    write_csv(output_path, data, col_names)


 def base_attributes(input_path, input_key, output_path, resolution,
@@ -126,3 +120,8 @@ def base_attributes(input_path, input_key, output_path, resolution,

    # write output to csv
    to_csv(tmp_path, tmp_key, output_path, resolution)
+
+    # load and return label_ids
+    with z5py.File(tmp_path, 'r') as f:
+        label_ids = f[tmp_key][:, 0]
+    return label_ids
--- a/scripts/attributes/map_objects.py
+++ b/scripts/attributes/map_objects.py
+#! /g/kreshuk/pape/Work/software/conda/miniconda3/envs/cluster_env37/bin/python
+# TODO new platy-browser env
+
+import os
+import numpy as np
+import luigi
+import z5py
+
+from cluster_tools.node_labels import NodeLabelWorkflow
+from .util import write_csv
+
+
+def object_labels(seg_path, seg_key,
+                  input_path, input_key, prefix,
+                  tmp_folder, target, max_jobs):
+    task = NodeLabelWorkflow
+    config_folder = os.path.join(tmp_folder, 'configs')
+
+    out_path = os.path.join(tmp_folder, 'data.n5')
+    out_key = 'node_labels_%s' % prefix
+
+    t = task(tmp_folder=tmp_folder, config_dir=config_folder,
+             max_jobs=max_jobs, target=target,
+             ws_path=seg_path, ws_key=seg_key,
+             input_path=input_path, input_key=input_key,
+             output_path=out_path, output_key=out_key,
+             prefix=prefix, )
+    ret = luigi.build([t], local_scheduler=True)
+    if not ret:
+        raise RuntimeError("Node labels for %s" % prefix)
+
+    with z5py.File(out_path, 'r') as f:
+        data = f[out_key][:]
+    return data
+
+
+def map_objects(label_ids, seg_path, seg_key, map_out,
+                map_paths, map_keys, map_names,
+                tmp_folder, target, max_jobs):
+    assert len(map_paths) == len(map_keys) == len(map_names)
+
+    data = []
+    for map_path, map_key, prefix in zip(map_paths, map_keys, map_names):
+        labels = object_labels(seg_path, seg_key,
+                               map_path, map_key, prefix,
+                               tmp_folder, target, max_jobs)
+        data.append(labels[:, None])
+
+    col_names = ['label_id'] + map_names
+    data = np.concatenate([label_ids[:, None]] + data, axis=0)
+    write_csv(map_out, data, col_names)
--- a/scripts/attributes/master.py
+++ b/scripts/attributes/master.py
@@ -2,6 +2,7 @@ import os
 import h5py

 from .base_attributes import base_attributes
+from .map_objects import map_objects
 from ..files import get_h5_path_from_xml


@@ -10,7 +11,6 @@ def get_seg_path(folder, name, key):
    path = get_h5_path_from_xml(xml_path, return_absolute_path=True)
    assert os.path.exists(path), path
    with h5py.File(path, 'r') as f:
-        print(path)
        assert key in f, "%s not in %s" % (key, str(list(f.keys())))
    return path

@@ -26,17 +26,23 @@ def make_cell_tables(folder, name, tmp_folder, resolution,

    # make the basic attributes table
    base_out = os.path.join(table_folder, 'default.csv')
-    base_attributes(seg_path, seg_key, base_out, resolution,
-                    tmp_folder, target=target, max_jobs=max_jobs,
-                    correct_anchors=True)
+    label_ids = base_attributes(seg_path, seg_key, base_out, resolution,
+                                tmp_folder, target=target, max_jobs=max_jobs,
+                                correct_anchors=True)

-    # TODO
    # make table with mapping to other objects
-    # nuclei
-    # cellular models
+    # nuclei, cellular models (TODO), ...
+    map_out = os.path.join(table_folder, 'objects.csv')
+    map_paths = [get_seg_path(folder, 'em-segmented-nuclei-labels')]
+    map_keys = [seg_key]
+    map_names = ['nucleus_id']
+    map_objects(label_ids, seg_path, seg_key, map_out,
+                map_paths, map_keys, map_names,
+                tmp_folder, target, max_jobs)

    # TODO additional tables:
-    # gene mapping
+    # regions / semantics
+    # gene expression
    # ???


@@ -55,9 +61,10 @@ def make_nucleus_tables(folder, name, tmp_folder, resolution,
                    tmp_folder, target=target, max_jobs=max_jobs,
                    correct_anchors=True)

-    # TODO
+    # TODO do we need this for nuclei as well ?
    # make table with mapping to other objects
-    # cells
+    # cells, ...

    # TODO additional tables:
    # kimberly's nucleus attributes
+    # ???
--- a/scripts/attributes/util.py
+++ b/scripts/attributes/util.py
+import csv
+
+
+def write_csv(output_path, data, col_names):
+    assert data.shape[1] == len(col_names), "%i %i" % (data.shape[1],
+                                                       len(col_names))
+    with open(output_path, 'w', newline='') as f:
+        writer = csv.writer(f, delimiter='\t')
+        writer.writerow(col_names)
+        writer.writerows(data)
--- a/scripts/export/export_segmentation.py
+++ b/scripts/export/export_segmentation.py
@@ -53,7 +53,8 @@ def downscale(path, in_key, out_key,
        raise RuntimeError("Downscaling the segmentation failed")


-def export_segmentation(paintera_path, paintera_key, folder, new_folder, name, resolution, tmp_folder):
+def export_segmentation(paintera_path, paintera_key, folder, new_folder, name, resolution,
+                        tmp_folder, target='slurm', max_jobs=200):
    """ Export a segmentation from paintera project to bdv file and
    compute segment lut for previous segmentation.

@@ -66,9 +67,6 @@ def export_segmentation(paintera_path, paintera_key, folder, new_folder, name, r
        resolution: resolution [z, y, x] in micrometer
        tmp_folder: folder for temporary files
    """
-    # TODO should make this a param
-    max_jobs = 250
-    target = 'slurm'

    tmp_path = os.path.join(tmp_folder, 'data.n5')
    tmp_key = 'seg'