From 5087199e14226cd23027bab26ceaca8b5ed88153 Mon Sep 17 00:00:00 2001
From: Constantin Pape <constantin.pape@iwr.uni-heidelberg.de>
Date: Wed, 11 Sep 2019 11:40:56 +0200
Subject: [PATCH] Add scripts for initial nephridia analysis

---
 analysis/nephridia.py         | 50 +++++++++++++++++++++
 scripts/analysis/nephridia.py | 82 +++++++++++++++++++++++++++++++++++
 2 files changed, 132 insertions(+)
 create mode 100644 analysis/nephridia.py
 create mode 100644 scripts/analysis/nephridia.py

diff --git a/analysis/nephridia.py b/analysis/nephridia.py
new file mode 100644
index 0000000..36426ce
--- /dev/null
+++ b/analysis/nephridia.py
@@ -0,0 +1,50 @@
+import os
+import numpy as np
+import pandas as pd
+from scripts.analysis.nephridia import filter_by_size, plot_sizes
+from scripts.analysis.nephridia import match_cilia_to_cells
+
+cell_ids = [24449, 22584, 21904, 21590, 21594, 21595, 21910, 21911, 21915]
+print("Number of cells:", len(cell_ids))
+
+tmp_path = './cilia_ids.npy'
+if os.path.exists(tmp_path):
+    cilia_ids = np.load(tmp_path)
+else:
+    cell_table = '../data/0.5.2/tables/sbem-6dpf-1-whole-segmented-cells-labels/default.csv'
+    cilia_path = '../data/0.5.2/segmentations/sbem-6dpf-1-whole-segmented-cilia-labels.h5'
+    cilia_key = 't00000/s00/2/cells'
+    cilia_res = [0.1, 0.04, 0.04]
+    cilia_ids = match_cilia_to_cells(cell_ids, cell_table,
+                                     cilia_path, cilia_key, cilia_res)
+    np.save(tmp_path, cilia_ids)
+
+table_path = '../data/0.5.2/tables/sbem-6dpf-1-whole-segmented-cilia-labels/default.csv'
+table_path2 = '../data/0.5.2/tables/sbem-6dpf-1-whole-segmented-cilia-labels/cilia.csv'
+
+table1 = pd.read_csv(table_path, sep='\t')
+table2 = pd.read_csv(table_path2, sep='\t')
+table2 = table2[['length', 'diameter_mean']]
+table = pd.concat([table1, table2], axis=1)
+assert len(table1) == len(table2) == len(table)
+
+table.set_index('label_id')
+table = table[3:]
+table = table.loc[table['length'] > 0.1]
+ids = table['label_id'].values
+ids = ids[np.isin(ids, cilia_ids)]
+print(ids)
+table = table.loc[ids]
+
+# plot_sizes(table)
+size_threshold = 5000
+table = filter_by_size(table, size_threshold)
+print("Number of cilia:", len(table))
+
+lens = table["length"].values
+for idd, leng in zip(ids, lens):
+    print(idd, leng)
+print("Average len:", np.mean(lens), "+-", np.std(lens))
+
+diameters = table["diameter_mean"].values
+print("Average diameter:", np.mean(diameters), "+-", np.std(diameters))
diff --git a/scripts/analysis/nephridia.py b/scripts/analysis/nephridia.py
new file mode 100644
index 0000000..950e089
--- /dev/null
+++ b/scripts/analysis/nephridia.py
@@ -0,0 +1,82 @@
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+import h5py
+from concurrent import futures
+
+
+def get_bb(idd, table, res):
+    row = table.loc[idd]
+    bb_min = [row.bb_min_z, row.bb_min_y, row.bb_min_x]
+    bb_max = [row.bb_max_z, row.bb_max_y, row.bb_max_x]
+    return tuple(slice(int(mi / re), int(ma / re))
+                 for mi, ma, re in zip(bb_min, bb_max, res))
+
+
+def match_cilia_to_cells(cell_ids, cell_table,
+                         cilia_seg_path, cilia_seg_key, cilia_res):
+    cell_table = pd.read_csv(cell_table, sep='\t')
+    cell_table.set_index('label_id')
+
+    with h5py.File(cilia_seg_path, 'r') as f:
+        ds_cil = f[cilia_seg_key]
+
+        def match_single_cell(cell_id):
+            # get the bounding box of this cell
+            bb = get_bb(cell_id, cell_table, cilia_res)
+            seg = ds_cil[bb]
+            return np.unique(seg)
+
+        with futures.ThreadPoolExecutor(9) as tp:
+            tasks = [tp.submit(match_single_cell, cell_id) for cell_id in cell_ids]
+            res = [t.result() for t in tasks]
+
+        cilia_ids = np.concatenate(res)
+        cilia_ids = np.unique(cilia_ids)
+    return cilia_ids
+
+
+def plot_sizes(table):
+    sizes = table['n_pixels'].values[1:]
+    print(sizes.max(), sizes.min())
+
+    fig, ax = plt.subplots()
+    _, bins, patches = ax.hist(sizes, 32)
+    ax.set_xlabel("Size in pixel")
+    ax.set_ylabel("Count")
+    plt.show()
+
+    sizes = sizes[sizes <= bins[1]]
+    fig, ax = plt.subplots()
+    _, bins, patches = ax.hist(sizes, 32)
+    print(bins)
+    ax.set_xlabel("Size in pixel")
+    ax.set_ylabel("Count")
+    plt.show()
+
+
+def filter_by_size(table, size_threshold):
+    table = table.loc[table['n_pixels'] > size_threshold]
+    return table
+
+
+def compute_offsets(table):
+    df = table[['anchor_x', 'anchor_y', 'anchor_y']]
+    pos = df.values
+    offsets = np.linalg.norm(pos, axis=1)
+    return offsets
+
+
+def plot_offsets(table):
+    offsets = compute_offsets(table)
+    fig, ax = plt.subplots()
+    ax.hist(offsets, 32)
+    ax.set_xlabel("Offset in microns")
+    ax.set_ylabel("Count")
+    plt.show()
+
+
+def filter_by_offset(table, offset_threshold):
+    offsets = compute_offsets(table)
+    table = table.loc[offsets > offset_threshold]
+    return table
-- 
GitLab