From 5087199e14226cd23027bab26ceaca8b5ed88153 Mon Sep 17 00:00:00 2001 From: Constantin Pape <constantin.pape@iwr.uni-heidelberg.de> Date: Wed, 11 Sep 2019 11:40:56 +0200 Subject: [PATCH] Add scripts for initial nephridia analysis --- analysis/nephridia.py | 50 +++++++++++++++++++++ scripts/analysis/nephridia.py | 82 +++++++++++++++++++++++++++++++++++ 2 files changed, 132 insertions(+) create mode 100644 analysis/nephridia.py create mode 100644 scripts/analysis/nephridia.py diff --git a/analysis/nephridia.py b/analysis/nephridia.py new file mode 100644 index 0000000..36426ce --- /dev/null +++ b/analysis/nephridia.py @@ -0,0 +1,50 @@ +import os +import numpy as np +import pandas as pd +from scripts.analysis.nephridia import filter_by_size, plot_sizes +from scripts.analysis.nephridia import match_cilia_to_cells + +cell_ids = [24449, 22584, 21904, 21590, 21594, 21595, 21910, 21911, 21915] +print("Number of cells:", len(cell_ids)) + +tmp_path = './cilia_ids.npy' +if os.path.exists(tmp_path): + cilia_ids = np.load(tmp_path) +else: + cell_table = '../data/0.5.2/tables/sbem-6dpf-1-whole-segmented-cells-labels/default.csv' + cilia_path = '../data/0.5.2/segmentations/sbem-6dpf-1-whole-segmented-cilia-labels.h5' + cilia_key = 't00000/s00/2/cells' + cilia_res = [0.1, 0.04, 0.04] + cilia_ids = match_cilia_to_cells(cell_ids, cell_table, + cilia_path, cilia_key, cilia_res) + np.save(tmp_path, cilia_ids) + +table_path = '../data/0.5.2/tables/sbem-6dpf-1-whole-segmented-cilia-labels/default.csv' +table_path2 = '../data/0.5.2/tables/sbem-6dpf-1-whole-segmented-cilia-labels/cilia.csv' + +table1 = pd.read_csv(table_path, sep='\t') +table2 = pd.read_csv(table_path2, sep='\t') +table2 = table2[['length', 'diameter_mean']] +table = pd.concat([table1, table2], axis=1) +assert len(table1) == len(table2) == len(table) + +table.set_index('label_id') +table = table[3:] +table = table.loc[table['length'] > 0.1] +ids = table['label_id'].values +ids = ids[np.isin(ids, cilia_ids)] +print(ids) +table = table.loc[ids] + +# plot_sizes(table) +size_threshold = 5000 +table = filter_by_size(table, size_threshold) +print("Number of cilia:", len(table)) + +lens = table["length"].values +for idd, leng in zip(ids, lens): + print(idd, leng) +print("Average len:", np.mean(lens), "+-", np.std(lens)) + +diameters = table["diameter_mean"].values +print("Average diameter:", np.mean(diameters), "+-", np.std(diameters)) diff --git a/scripts/analysis/nephridia.py b/scripts/analysis/nephridia.py new file mode 100644 index 0000000..950e089 --- /dev/null +++ b/scripts/analysis/nephridia.py @@ -0,0 +1,82 @@ +import numpy as np +import pandas as pd +import matplotlib.pyplot as plt +import h5py +from concurrent import futures + + +def get_bb(idd, table, res): + row = table.loc[idd] + bb_min = [row.bb_min_z, row.bb_min_y, row.bb_min_x] + bb_max = [row.bb_max_z, row.bb_max_y, row.bb_max_x] + return tuple(slice(int(mi / re), int(ma / re)) + for mi, ma, re in zip(bb_min, bb_max, res)) + + +def match_cilia_to_cells(cell_ids, cell_table, + cilia_seg_path, cilia_seg_key, cilia_res): + cell_table = pd.read_csv(cell_table, sep='\t') + cell_table.set_index('label_id') + + with h5py.File(cilia_seg_path, 'r') as f: + ds_cil = f[cilia_seg_key] + + def match_single_cell(cell_id): + # get the bounding box of this cell + bb = get_bb(cell_id, cell_table, cilia_res) + seg = ds_cil[bb] + return np.unique(seg) + + with futures.ThreadPoolExecutor(9) as tp: + tasks = [tp.submit(match_single_cell, cell_id) for cell_id in cell_ids] + res = [t.result() for t in tasks] + + cilia_ids = np.concatenate(res) + cilia_ids = np.unique(cilia_ids) + return cilia_ids + + +def plot_sizes(table): + sizes = table['n_pixels'].values[1:] + print(sizes.max(), sizes.min()) + + fig, ax = plt.subplots() + _, bins, patches = ax.hist(sizes, 32) + ax.set_xlabel("Size in pixel") + ax.set_ylabel("Count") + plt.show() + + sizes = sizes[sizes <= bins[1]] + fig, ax = plt.subplots() + _, bins, patches = ax.hist(sizes, 32) + print(bins) + ax.set_xlabel("Size in pixel") + ax.set_ylabel("Count") + plt.show() + + +def filter_by_size(table, size_threshold): + table = table.loc[table['n_pixels'] > size_threshold] + return table + + +def compute_offsets(table): + df = table[['anchor_x', 'anchor_y', 'anchor_y']] + pos = df.values + offsets = np.linalg.norm(pos, axis=1) + return offsets + + +def plot_offsets(table): + offsets = compute_offsets(table) + fig, ax = plt.subplots() + ax.hist(offsets, 32) + ax.set_xlabel("Offset in microns") + ax.set_ylabel("Count") + plt.show() + + +def filter_by_offset(table, offset_threshold): + offsets = compute_offsets(table) + table = table.loc[offsets > offset_threshold] + return table -- GitLab