From a49fb42143e97f5b8bb6875247c4a7192ae87b51 Mon Sep 17 00:00:00 2001 From: Constantin Pape <constantin.pape@iwr.uni-heidelberg.de> Date: Fri, 20 Sep 2019 14:54:13 +0200 Subject: [PATCH] Implement simple table propagation and update cilia attributes --- data/segmentations.json | 2 +- scripts/attributes/base_attributes.py | 31 +++++++++++++++++++ scripts/attributes/cilia_attributes.py | 38 +++++++++++------------ scripts/attributes/master.py | 42 +++++++++----------------- segmentation/cilia.py | 34 +++++++++++++++++++++ update_patch.py | 2 +- 6 files changed, 101 insertions(+), 48 deletions(-) create mode 100644 segmentation/cilia.py diff --git a/data/segmentations.json b/data/segmentations.json index 8944bb5..c7219ca 100644 --- a/data/segmentations.json +++ b/data/segmentations.json @@ -1 +1 @@ -{"sbem-6dpf-1-whole-segmented-chromatin-labels": {"is_static": true, "has_tables": true}, "sbem-6dpf-1-whole-segmented-tissue-labels": {"is_static": true, "has_tables": true}, "sbem-6dpf-1-whole-segmented-muscle": {"is_static": true, "has_tables": false}, "sbem-6dpf-1-whole-segmented-cells-labels": {"is_static": false, "paintera_project": ["/g/kreshuk/data/arendt/platyneris_v1/data.n5", "volumes/paintera/proofread_cells"], "resolution": [0.025, 0.02, 0.02], "table_update_function": "make_cell_tables"}, "sbem-6dpf-1-whole-segmented-nuclei-labels": {"is_static": false, "paintera_project": ["/g/kreshuk/data/arendt/platyneris_v1/data.n5", "volumes/paintera/nuclei"], "resolution": [0.1, 0.08, 0.08], "table_update_function": "make_nucleus_tables"}, "sbem-6dpf-1-whole-segmented-cilia-labels": {"is_static": false, "paintera_project": ["/g/kreshuk/data/arendt/platyneris_v1/data.n5", "volumes/paintera/cilia"], "resolution": [0.025, 0.01, 0.01], "table_update_function": "make_cilia_tables"}, "sbem-6dpf-1-whole-segmented-ariande-neuropil": {"is_static": true, "has_tables": false}, "sbem-6dpf-1-whole-segmented-cats-neuropil": {"is_static": true, "has_tables": false}, "sbem-6dpf-1-whole-traces-labels": {"is_static": true, "has_tables": true}} \ No newline at end of file +{"sbem-6dpf-1-whole-segmented-chromatin-labels": {"is_static": true, "has_tables": true}, "sbem-6dpf-1-whole-segmented-tissue-labels": {"is_static": true, "has_tables": true}, "sbem-6dpf-1-whole-segmented-muscle": {"is_static": true, "has_tables": false}, "sbem-6dpf-1-whole-segmented-cells-labels": {"is_static": false, "paintera_project": ["/g/kreshuk/data/arendt/platyneris_v1/data.n5", "volumes/paintera/proofread_cells"], "resolution": [0.025, 0.02, 0.02], "table_update_function": "make_cell_tables"}, "sbem-6dpf-1-whole-segmented-nuclei-labels": {"is_static": false, "paintera_project": ["/g/kreshuk/data/arendt/platyneris_v1/data.n5", "volumes/paintera/nuclei"], "resolution": [0.1, 0.08, 0.08], "table_update_function": "make_nucleus_tables"}, "sbem-6dpf-1-whole-segmented-cilia-labels": {"is_static": false, "paintera_project": ["/g/kreshuk/data/arendt/platyneris_v1/data.n5", "volumes/paintera/cilia_label_multiset"], "resolution": [0.025, 0.01, 0.01], "table_update_function": "make_cilia_tables"}, "sbem-6dpf-1-whole-segmented-ariande-neuropil": {"is_static": true, "has_tables": false}, "sbem-6dpf-1-whole-segmented-cats-neuropil": {"is_static": true, "has_tables": false}, "sbem-6dpf-1-whole-traces-labels": {"is_static": true, "has_tables": true}} diff --git a/scripts/attributes/base_attributes.py b/scripts/attributes/base_attributes.py index 9c1aba3..fad0384 100644 --- a/scripts/attributes/base_attributes.py +++ b/scripts/attributes/base_attributes.py @@ -3,6 +3,7 @@ import json import h5py import z5py import numpy as np +import pandas as pd import luigi from cluster_tools.morphology import MorphologyWorkflow @@ -157,3 +158,33 @@ def base_attributes(input_path, input_key, output_path, resolution, with z5py.File(tmp_path, 'r') as f: label_ids = f[tmp_key][:, 0] return label_ids + + +# TODO this is un-tested !!! +def propagate_attributes(id_mapping_path, old_table_path, output_path): + """ Propagate all attributes to new ids. (column label id) + """ + # if the output already exists, we assume that the propagation + # was already done and we just continue + if os.path.exists(output_path): + return + + with open(id_mapping_path, 'r') as f: + id_mapping = json.load(f) + id_mapping = {int(k): v for k, v in id_mapping.items()} + + n_new_ids = max(id_mapping.keys()) + 1 + + table = pd.read_csv(old_table_path, sep='\t') + col_names = table.columns.values + table = table.values + + out_table = np.zeros((n_new_ids, table.shape[1])) + + # can be vectorized + for new_id, old_id in id_mapping.items(): + out_table[new_id, 0] = new_id + out_table[new_id, 1:] = table[old_id, 1:] + + out_table = pd.DataFrame(out_table, name=col_names) + out_table.to_csv(output_path, index=False, sep='\t') diff --git a/scripts/attributes/cilia_attributes.py b/scripts/attributes/cilia_attributes.py index 6d2eb52..0d137eb 100644 --- a/scripts/attributes/cilia_attributes.py +++ b/scripts/attributes/cilia_attributes.py @@ -69,20 +69,24 @@ def compute_centerline(obj, resolution): return coordinates, max_plen -def get_bb(base_table, cid, resolution): +def get_bb(base_table, cid, resolution, shape): + halo = (2, 2, 2) # get the row for this cilia id row = base_table.loc[cid] # compute the bounding box - bb_min = (row.bb_min_z, row.bb_min_y, row.bb_min_x) - bb_max = (row.bb_max_z, row.bb_max_y, row.bb_max_x) - bb = tuple(slice(int(mi / re), int(ma / re)) - for mi, ma, re in zip(bb_min, bb_max, resolution)) + bb_min = [row.bb_min_z, row.bb_min_y, row.bb_min_x] + bb_max = [row.bb_max_z, row.bb_max_y, row.bb_max_x] + bb_min = [int(mi / re) for mi, re in zip(bb_min, resolution)] + bb_max = [int(ma / re) for ma, re in zip(bb_max, resolution)] + bb = tuple(slice(max(mi - ha, 0), + min(ma + ha, sh)) + for mi, ma, sh, ha in zip(bb_min, bb_max, shape, halo)) return bb def load_seg(ds, base_table, cid, resolution): # load segmentation from the bounding box and get foreground - bb = get_bb(base_table, cid, resolution) + bb = get_bb(base_table, cid, resolution, ds.shape) obj = ds[bb] == cid return obj @@ -137,28 +141,24 @@ def measure_cilia_attributes(seg_path, seg_key, base_table, resolution): return attributes, names -# TODO the cell id mapping table should be separate -# TODO wrap this into a luigi task so we don't recompute it every time -def cilia_attributes(seg_path, seg_key, - base_table_path, manual_mapping_table_path, table_out_path, +# TODO results are not trust-worthy yet +# TODO wrap this into a cluster task so we don't recompute it every time +# and can be scheduled on slurm +def cilia_morphology(seg_path, seg_key, + base_table_path, out_path, resolution, tmp_folder, target, max_jobs): # read the base table base_table = pd.read_csv(base_table_path, sep='\t') cilia_ids = base_table['label_id'].values.astype('uint64') - # add the manually mapped cell ids - # cell_ids = get_mapped_cell_ids(cilia_ids, manual_mapping_table_path) - # FIXME - cell_ids = np.zeros_like(cilia_ids) - assert len(cell_ids) == len(cilia_ids) - # measure cilia specific attributes: length, diameter, ? (could try curvature) + print("Start to compute cilia morphology ...") attributes, names = measure_cilia_attributes(seg_path, seg_key, base_table, resolution) assert len(attributes) == len(cilia_ids) assert attributes.shape[1] == len(names) - table = np.concatenate([cilia_ids[:, None], cell_ids[:, None], attributes], axis=1) - col_names = ['label_id', 'cell_id'] + names + table = np.concatenate([cilia_ids[:, None], attributes], axis=1) + col_names = ['label_id'] + names table = pd.DataFrame(table, columns=col_names) - table.to_csv(table_out_path, index=False, sep='\t') + table.to_csv(out_path, index=False, sep='\t') diff --git a/scripts/attributes/master.py b/scripts/attributes/master.py index e717720..105404f 100644 --- a/scripts/attributes/master.py +++ b/scripts/attributes/master.py @@ -1,12 +1,12 @@ import os import h5py -from .base_attributes import base_attributes +from .base_attributes import base_attributes, propagate_attributes from .cell_nucleus_mapping import map_cells_to_nuclei from .genes import write_genes_table from .morphology import write_morphology_cells, write_morphology_nuclei from .region_attributes import region_attributes -from .cilia_attributes import cilia_attributes +from .cilia_attributes import cilia_morphology from ..files.xml_utils import get_h5_path_from_xml @@ -19,7 +19,7 @@ def get_seg_path(folder, name, key): return path -def make_cell_tables(folder, name, tmp_folder, resolution, +def make_cell_tables(old_folder, folder, name, tmp_folder, resolution, target='slurm', max_jobs=100): # make the table folder table_folder = os.path.join(folder, 'tables', name) @@ -67,7 +67,7 @@ def make_cell_tables(folder, name, tmp_folder, resolution, label_ids, tmp_folder, target, max_jobs) -def make_nucleus_tables(folder, name, tmp_folder, resolution, +def make_nucleus_tables(old_folder, folder, name, tmp_folder, resolution, target='slurm', max_jobs=100): # make the table folder table_folder = os.path.join(folder, 'tables', name) @@ -91,11 +91,8 @@ def make_nucleus_tables(folder, name, tmp_folder, resolution, n_labels, resolution, tmp_folder, target, max_jobs) - # TODO additional tables: - # ??? - -def make_cilia_tables(folder, name, tmp_folder, resolution, +def make_cilia_tables(old_folder, folder, name, tmp_folder, resolution, target='slurm', max_jobs=100): # make the table folder table_folder = os.path.join(folder, 'tables', name) @@ -110,22 +107,13 @@ def make_cilia_tables(folder, name, tmp_folder, resolution, tmp_folder, target=target, max_jobs=max_jobs, correct_anchors=True) - # NOTE this is preliminary. - # In the end, we wan't this table to just live in the platy-browser data. - # Right now, something with the mapping to cell ids is off - I think the ids come - # from two different versions of the segmentation. - # We will keep this for now, but it needs another round of corrections. - # But this should be done in the platy-browser directly; need to wait for this - # until Tischi is back. - # Then, we will always need to update the cell id mapping table when the - # segmentation changes as well. - manual_mapping_table = os.path.join(folder, 'misc', 'cilia_id_mapping.csv') - assert os.path.exists(manual_mapping_table) - - # compute cilia specific attributes at lower resolution ? - - # add cilia specific attributes (length, diameter) and manual cell mapping done by rachel - cilia_out = os.path.join(table_folder, 'cilia.csv') - cilia_attributes(seg_path, seg_key, - base_out, manual_mapping_table, cilia_out, - resolution, tmp_folder, target=target, max_jobs=max_jobs) + # TODO when we change the cell segmentation, we also need to update this! + propagate_attributes(os.path.join(folder, 'misc', 'new_id_lut_sbem-6dpf-1-whole-segmented-cilia-labels.json'), + os.path.join(old_folder, 'tables', name, 'cell_id_mapping.csv'), + os.path.join(table_folder, 'cell_id_mapping.csv')) + + # add cilia specific attributes (length, diameter) + morpho_out = os.path.join(table_folder, 'morphology.csv') + cilia_morphology(seg_path, seg_key, + base_out, morpho_out, resolution, + tmp_folder, target=target, max_jobs=max_jobs) diff --git a/segmentation/cilia.py b/segmentation/cilia.py new file mode 100644 index 0000000..7e624c0 --- /dev/null +++ b/segmentation/cilia.py @@ -0,0 +1,34 @@ +import json +import numpy as np +import pandas as pd + + +def map_cell_ids(): + new_cil_ids = '../data/0.5.3/misc/new_id_lut_sbem-6dpf-1-whole-segmented-cilia-labels.json' + with open(new_cil_ids) as f: + new_cil_ids = json.load(f) + new_cil_ids = {int(k): v for k, v in new_cil_ids.items()} + + old_cell_mapping = '../data/0.5.2/misc/cilia_id_mapping.csv' + old_cell_mapping = pd.read_csv(old_cell_mapping, sep='\t') + names = old_cell_mapping.columns.values + old_cell_mapping = old_cell_mapping.values + old_cell_mapping = dict(zip(old_cell_mapping[:, 0], old_cell_mapping[:, 1])) + + n_new_cilia = max(new_cil_ids.keys()) + 1 + new_cell_mapping = np.zeros((n_new_cilia, 2), dtype='uint32') + + for new_cil_id, old_cil_id in new_cil_ids.items(): + new_cell_mapping[new_cil_id, 0] = new_cil_id + cell_id = old_cell_mapping.get(old_cil_id, 0) + new_cell_mapping[new_cil_id, 1] = cell_id + if cell_id != 0: + print(new_cil_id, old_cil_id, cell_id) + + new_cell_mapping = pd.DataFrame(new_cell_mapping, columns=names) + out = '../data/0.5.3/tables/sbem-6dpf-1-whole-segmented-cilia-labels/cell_id_mapping.csv' + new_cell_mapping.to_csv(out, sep='\t', index=False) + + +if __name__ == '__main__': + map_cell_ids() diff --git a/update_patch.py b/update_patch.py index e63b4df..337d7a2 100755 --- a/update_patch.py +++ b/update_patch.py @@ -51,7 +51,7 @@ def update_table(name, seg_dict, folder, new_folder, target, max_jobs): tmp_folder = 'tmp_tables_%s' % name update_function = getattr(scripts.attributes, seg_dict['table_update_function']) - update_function(new_folder, name, tmp_folder, seg_dict['resolution'], + update_function(folder, new_folder, name, tmp_folder, seg_dict['resolution'], target=target, max_jobs=max_jobs) -- GitLab