From fe2a9c013b2a132f4ae4310d774bb033782887f5 Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@iwr.uni-heidelberg.de>
Date: Tue, 11 Jun 2019 15:39:24 +0200
Subject: [PATCH] Refactor morphology functions and simplify bounding box
 calculation

---
 scripts/attributes/morphology.py | 128 +++++++++++++++++++------------
 1 file changed, 77 insertions(+), 51 deletions(-)

diff --git a/scripts/attributes/morphology.py b/scripts/attributes/morphology.py
index 9bbc33e..0095e6d 100644
--- a/scripts/attributes/morphology.py
+++ b/scripts/attributes/morphology.py
@@ -8,7 +8,7 @@ from skimage.transform import resize
 from skimage.util import pad
 
 
-def calculate_slice(scale, minmax):
+def calculate_slice(scale, mins, maxs):
     """
     Takes a scale (get this from xml file of object), and list of min and
     max coordinates in microns [min_coord_x_microns, min_coord_y_microns,
@@ -17,13 +17,6 @@ def calculate_slice(scale, minmax):
     Return a slice to use to extract
     that bounding box from the original image.
     """
-    # Get mins and flip order - so now z, y, x
-    mins = minmax[0:3][::-1]
-    # Get maxes and flip order - so now z, y, x
-    maxs = minmax[3:][::-1]
-
-    # flip order of the scale
-    scale = scale[::-1]
 
     # convert min and max in microns to pixel ranges
     mins = [int(mi / sca) for mi, sca in zip(mins, scale)]
@@ -104,20 +97,19 @@ def calculate_row_stats(row, seg_path, input_type, morphology, intensity):
     result = (row.label_id,)
 
     if input_type == 'nucleus':
-        full_seg_scale = [0.08, 0.08, 0.1]
+        full_seg_scale = [0.1, 0.08, 0.08]
         # use full res of segmentation - 0.08, 0.08, 0.1
         seg_level = 0
         # Use the /t00000/s00/3/cells for raw data - gives pixel size similar to the nuclear segmentation
         raw_level = 3
 
     elif input_type == 'cell':
-        full_seg_scale = [0.02, 0.02, 0.025]
+        full_seg_scale = [0.025, 0.02, 0.02]
         # look at 4x downsampled segmentation - close to 0.08, 0.08, 0.1
         seg_level = 2
 
-    # min max bounding box in microns for segmentation
-    minmax_seg = [row.bb_min_x, row.bb_min_y, row.bb_min_z, row.bb_max_x, row.bb_max_y, row.bb_max_z]
-
+    bb_min = [row.bb_min_z, row.bb_min_y, row.bb_min_x]
+    bb_max = [row.bb_max_z, row.bb_max_y, row.bb_max_x]
     with h5py.File(seg_path, 'r') as f:
 
         # get shape of full data & downsampled
@@ -125,12 +117,12 @@ def calculate_row_stats(row, seg_path, input_type, morphology, intensity):
         down_data_shape = f['t00000/s00/' + str(seg_level) + '/cells'].shape
 
         # scale for downsampled
-        down_scale = [full_seg_scale[0]*(full_data_shape[2]/down_data_shape[2]),
-                      full_seg_scale[1]*(full_data_shape[1]/down_data_shape[1]),
-                      full_seg_scale[2]*(full_data_shape[0]/down_data_shape[0])]
-
+        down_scale = [full_scale * (full_shape / down_shape)
+                      for full_scale, full_shape, down_shape in zip(full_seg_scale,
+                                                                    full_data_shape,
+                                                                    down_data_shape)]
         # slice for seg file
-        seg_slice = calculate_slice(down_scale, minmax_seg)
+        seg_slice = calculate_slice(down_scale, bb_min, bb_max)
 
         # get specified layer in bdv file
         data = f['t00000/s00/' + str(seg_level) + '/cells']
@@ -140,6 +132,19 @@ def calculate_row_stats(row, seg_path, input_type, morphology, intensity):
     binary = img_array == int(row.label_id)
     binary = binary.astype('uint8')
 
+    # we need to skip for empty labels
+    n_fg = binary.sum()
+    if n_fg == 0:
+        print("Skipping label", row.label_id, "due to empty segmentation mask")
+        n_stats = 0
+        if morphology:
+            n_stats += 4
+        if intensity:
+            n_stats += 2
+        dummy_result = (0.,) * n_stats
+        result = result + dummy_result
+        return result
+
     img_array = None
 
     # calculate morphology stats straight from segmentation
@@ -150,21 +155,23 @@ def calculate_row_stats(row, seg_path, input_type, morphology, intensity):
     if intensity:
 
         # scale for full resolution raw data
-        full_raw_scale = [0.01, 0.01, 0.025]
+        full_raw_scale = [0.025, 0.01, 0.01]
 
+        # FIXME don't hardcode this path here
         with h5py.File('/g/arendt/EM_6dpf_segmentation/EM-Prospr/em-raw-full-res.h5', 'r') as f:
 
             # get shape of full data & downsampled
             full_data_shape = f['t00000/s00/0/cells'].shape
             down_data_shape = f['t00000/s00/' + str(raw_level) + '/cells'].shape
 
-            # scale for donwampled
-            down_scale = [full_raw_scale[0]*(full_data_shape[2]/down_data_shape[2]),
-                          full_raw_scale[1]*(full_data_shape[1]/down_data_shape[1]),
-                          full_raw_scale[2]*(full_data_shape[0]/down_data_shape[0])]
+            # scale for downsampled
+            down_scale = [full_scale * (full_shape / down_shape)
+                          for full_scale, full_shape, down_shape in zip(full_raw_scale,
+                                                                        full_data_shape,
+                                                                        down_data_shape)]
 
             # slice for raw file
-            raw_slice = calculate_slice(down_scale, minmax_seg)
+            raw_slice = calculate_slice(down_scale, bb_min, bb_max)
 
             # get specified layer in bdv file
             data = f['t00000/s00/' + str(raw_level) + '/cells']
@@ -195,10 +202,10 @@ def calculate_morphology_stats(table, seg_path, input_type, morphology=True, int
     # size cutoffs (at the moment just browsed through segmentation and chose sensible values)
     # not very stringent, still some rather large / small things included
     if input_type == 'nucleus':
-        table = table.loc[table['shape_pixelsize'] >= 18313.0, :]
+        table = table.loc[table['n_pixels'] >= 18313.0, :]
 
     elif input_type == 'cell':
-        criteria = np.logical_and(table['shape_pixelsize'] > 88741.0, table['shape_pixelsize'] < 600000000.0)
+        criteria = np.logical_and(table['n_pixels'] > 88741.0, table['n_pixels'] < 600000000.0)
         table = table.loc[criteria, :]
 
     stats = [calculate_row_stats(row, seg_path, input_type, morphology, intensity)
@@ -222,42 +229,61 @@ def calculate_morphology_stats(table, seg_path, input_type, morphology=True, int
     return stats
 
 
-def write_morphology_table(cell_seg_path, nucleus_seg_path, cell_table_path, nucleus_table_path,
-                           cell_nuc_mapping_path, nucleus_out_path, cell_out_path):
+def load_cell_nucleus_mapping(cell_nuc_mapping_path):
+    # read in numpy array of mapping of cells to nuclei - first column cell id, second nucleus id
+    cell_nucleus_mapping = np.genfromtxt(cell_nuc_mapping_path, skip_header=1, delimiter='\t')[:, :2]
+    cell_nucleus_mapping = cell_nucleus_mapping.astype('uint64')
+    # remove zero labels from this table too, if exist
+    cell_nucleus_mapping = cell_nucleus_mapping[np.logical_and(cell_nucleus_mapping[:, 0] != 0,
+                                                               cell_nucleus_mapping[:, 1] != 0)]
+    return cell_nucleus_mapping
+
+
+# TODO wrap this in a luigi task / cluster tools task, so we have caching and can move computation
+# to the cluster
+def write_morphology_nuclei(seg_path, table_in_path, table_out_path):
     """
     Write csv files of morphology stats for both the nucleus and cell segmentation
 
-    cell_seg_path - string, file path to cell segmentation
-    nucleus_seg_path - string, file path to nucleus segmentation
+    seg_path - string, file path to nucleus segmentation
     cell_table_path - string, file path to cell table
-    nucleus_table_path - string, file path to nucleus table
+    table_in_path - string, file path to nucleus table
+    table_out_path - string, file path to save new nucleus table
+    """
+
+    nuclei_table = pd.read_csv(table_in_path, sep='\t')
+
+    # remove zero label if it exists
+    nuclei_table = nuclei_table.loc[nuclei_table['label_id'] != 0, :]
+
+    # calculate stats for both tables and save results to csv
+    result_nuclei = calculate_morphology_stats(nuclei_table, seg_path, 'nucleus',
+                                               morphology=True, intensity=True)
+    result_nuclei.to_csv(table_out_path, index=False, sep='\t')
+
+
+# TODO wrap this in a luigi task / cluster tools task, so we have caching and can move computation
+# to the cluster
+def write_morphology_cells(seg_path, table_in_path,
+                           cell_nuc_mapping_path, table_out_path):
+    """
+    Write csv files of morphology stats for both the nucleus and cell segmentation
+
+    seg_path - string, file path to cell segmentation
+    table_in_path - string, file path to cell table
     cell_nuc_mapping_path - string, file path to numpy array mapping cells to nuclei
         (first column cell id, second nucleus id)
-    nucleus_out_path - string, file path to save new nucleus table
-    cell_out_path - string, file path to save new cell table
+    table_out_path - string, file path to save new cell table
     """
 
-    cell_table = pd.read_csv(cell_table_path, sep='\t')
+    cell_table = pd.read_csv(table_in_path, sep='\t')
 
-    nuclei_table = pd.read_csv(nucleus_table_path, sep='\t')
-
-    # remove zero label form both tables if it exists
-    nuclei_table = nuclei_table.loc[nuclei_table['label_id'] != 0, :]
+    # remove zero label if it exists
     cell_table = cell_table.loc[cell_table['label_id'] != 0, :]
-
-    # read in numpy array of mapping of cells to nuclei - first column cell id, second nucleus id
-    cell_nucleus_mapping = np.load(cell_nuc_mapping_path)
-    # remove zero labels from this table too, if exist
-    cell_nucleus_mapping = cell_nucleus_mapping[np.logical_and(cell_nucleus_mapping[:, 0] != 0,
-                                                               cell_nucleus_mapping[:, 1] != 0)]
+    cell_nucleus_mapping = load_cell_nucleus_mapping(cell_nuc_mapping_path)
 
     # only keep cells in the cell_nuc_mapping (i.e those that have assigned nuclei)
     cell_table = cell_table.loc[np.isin(cell_table['label_id'], cell_nucleus_mapping[:, 0]), :]
 
-    # calculate stats for both tables and save results to csv
-    result_nuclei = calculate_morphology_stats(nuclei_table, nucleus_seg_path, 'nucleus',
-                                               morphology=True, intensity=True)
-    result_nuclei.to_csv(nucleus_out_path, index=False, sep='\t')
-
-    result_cells = calculate_morphology_stats(cell_table, cell_seg_path, 'cell', morphology=True, intensity=False)
-    result_cells.to_csv(cell_out_path, index=False, sep='\t')
+    result_cells = calculate_morphology_stats(cell_table, seg_path, 'cell', morphology=True, intensity=False)
+    result_cells.to_csv(table_out_path, index=False, sep='\t')
-- 
GitLab