From ff35f303ad36ad85274c7ac57b0393c71cfa54f0 Mon Sep 17 00:00:00 2001 From: Constantin Pape <constantin.pape@iwr.uni-heidelberg.de> Date: Fri, 7 Feb 2020 14:43:03 +0100 Subject: [PATCH] Add cell criterion column functionality --- mmpb/attributes/base_attributes.py | 27 ++++++++++++++++++++++++++- mmpb/attributes/master.py | 6 +++++- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/mmpb/attributes/base_attributes.py b/mmpb/attributes/base_attributes.py index 61bee01..528068d 100644 --- a/mmpb/attributes/base_attributes.py +++ b/mmpb/attributes/base_attributes.py @@ -215,7 +215,7 @@ def propagate_attributes(id_mapping_path, table_path, output_path, id_col[np.isnan(id_col)] = 0 id_col = id_col.astype('uint32') - keys = list(id_mapping.keys()) + # keys = list(id_mapping.keys()) id_col = nt.takeDict(id_mapping, id_col) # TODO need to implement merge rules @@ -226,3 +226,28 @@ def propagate_attributes(id_mapping_path, table_path, output_path, table[column_name] = id_col table.to_csv(output_path, index=False, sep='\t') + + +# Do we need to extend the cell criterion? Possibilties: +# - size threshold +def add_cell_criterion_column(base_table_path, nucleus_mapping_path, out_table_path=None): + """ Add a column to the cell defaults table that indicates whether the id + is considered as a cell or not. + + Currently the criterion is based on having a unique nucleus id mapped to the cell. + """ + base_table = pd.read_csv(base_table_path, sep='\t') + nucleus_mapping = pd.read_csv(nucleus_mapping_path, sep='\t') + assert len(base_table) == len(nucleus_mapping) + + mapped_nucleus_ids = nucleus_mapping['nucleus_id'].values + mapped, mapped_counts = np.unique(mapped_nucleus_ids, return_counts=True) + + unique_mapped_nuclei = mapped[mapped_counts == 1] + cell_criterion = np.isin(mapped_nucleus_ids, unique_mapped_nuclei) + assert len(cell_criterion) == len(base_table) + + base_table['cells'] = cell_criterion.astype('uint8') + + out_path = base_table_path if out_table_path is None else out_table_path + base_table.to_csv(out_path, index=False, sep='\t') diff --git a/mmpb/attributes/master.py b/mmpb/attributes/master.py index 7fe9bdb..d4cefd5 100644 --- a/mmpb/attributes/master.py +++ b/mmpb/attributes/master.py @@ -2,7 +2,8 @@ import os import h5py from pybdv.metadata import get_data_path -from .base_attributes import base_attributes, propagate_attributes, write_additional_table_file +from .base_attributes import (add_cell_criterion_column, base_attributes, + propagate_attributes, write_additional_table_file) from .cell_nucleus_mapping import map_cells_to_nuclei from .genes import gene_assignment_table, vc_assignment_table from .morphology import write_morphology_cells, write_morphology_nuclei @@ -41,6 +42,9 @@ def make_cell_tables(old_folder, folder, name, tmp_folder, resolution, map_cells_to_nuclei(label_ids, seg_path, nuc_path, nuc_mapping_table, tmp_folder, target, max_jobs) + # add a column with (somewhat stringent) cell criterion to the default table + add_cell_criterion_column(base_out, nuc_mapping_table) + # make table with gene mapping aux_gene_xml = os.path.join(folder, 'misc', 'prospr-6dpf-1-whole_meds_all_genes.xml') aux_gene_path = get_data_path(aux_gene_xml, return_absolute_path=True) -- GitLab