From ff35f303ad36ad85274c7ac57b0393c71cfa54f0 Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@iwr.uni-heidelberg.de>
Date: Fri, 7 Feb 2020 14:43:03 +0100
Subject: [PATCH] Add cell criterion column functionality

---
 mmpb/attributes/base_attributes.py | 27 ++++++++++++++++++++++++++-
 mmpb/attributes/master.py          |  6 +++++-
 2 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/mmpb/attributes/base_attributes.py b/mmpb/attributes/base_attributes.py
index 61bee01..528068d 100644
--- a/mmpb/attributes/base_attributes.py
+++ b/mmpb/attributes/base_attributes.py
@@ -215,7 +215,7 @@ def propagate_attributes(id_mapping_path, table_path, output_path,
     id_col[np.isnan(id_col)] = 0
     id_col = id_col.astype('uint32')
 
-    keys = list(id_mapping.keys())
+    # keys = list(id_mapping.keys())
     id_col = nt.takeDict(id_mapping, id_col)
 
     # TODO need to implement merge rules
@@ -226,3 +226,28 @@ def propagate_attributes(id_mapping_path, table_path, output_path,
 
     table[column_name] = id_col
     table.to_csv(output_path, index=False, sep='\t')
+
+
+# Do we need to extend the cell criterion? Possibilties:
+# - size threshold
+def add_cell_criterion_column(base_table_path, nucleus_mapping_path, out_table_path=None):
+    """ Add a column to the cell defaults table that indicates whether the id
+        is considered as a cell or not.
+
+        Currently the criterion is based on having a unique nucleus id mapped to the cell.
+    """
+    base_table = pd.read_csv(base_table_path, sep='\t')
+    nucleus_mapping = pd.read_csv(nucleus_mapping_path, sep='\t')
+    assert len(base_table) == len(nucleus_mapping)
+
+    mapped_nucleus_ids = nucleus_mapping['nucleus_id'].values
+    mapped, mapped_counts = np.unique(mapped_nucleus_ids, return_counts=True)
+
+    unique_mapped_nuclei = mapped[mapped_counts == 1]
+    cell_criterion = np.isin(mapped_nucleus_ids, unique_mapped_nuclei)
+    assert len(cell_criterion) == len(base_table)
+
+    base_table['cells'] = cell_criterion.astype('uint8')
+
+    out_path = base_table_path if out_table_path is None else out_table_path
+    base_table.to_csv(out_path, index=False, sep='\t')
diff --git a/mmpb/attributes/master.py b/mmpb/attributes/master.py
index 7fe9bdb..d4cefd5 100644
--- a/mmpb/attributes/master.py
+++ b/mmpb/attributes/master.py
@@ -2,7 +2,8 @@ import os
 import h5py
 from pybdv.metadata import get_data_path
 
-from .base_attributes import base_attributes, propagate_attributes, write_additional_table_file
+from .base_attributes import (add_cell_criterion_column, base_attributes,
+                              propagate_attributes, write_additional_table_file)
 from .cell_nucleus_mapping import map_cells_to_nuclei
 from .genes import gene_assignment_table, vc_assignment_table
 from .morphology import write_morphology_cells, write_morphology_nuclei
@@ -41,6 +42,9 @@ def make_cell_tables(old_folder, folder, name, tmp_folder, resolution,
     map_cells_to_nuclei(label_ids, seg_path, nuc_path, nuc_mapping_table,
                         tmp_folder, target, max_jobs)
 
+    # add a column with (somewhat stringent) cell criterion to the default table
+    add_cell_criterion_column(base_out, nuc_mapping_table)
+
     # make table with gene mapping
     aux_gene_xml = os.path.join(folder, 'misc', 'prospr-6dpf-1-whole_meds_all_genes.xml')
     aux_gene_path = get_data_path(aux_gene_xml, return_absolute_path=True)
-- 
GitLab