From be78a9d9abcbfa51db99cdeac493fbd94d666ff8 Mon Sep 17 00:00:00 2001
From: Christopher Rhodes <christopher.rhodes@embl.de>
Date: Sun, 22 Oct 2023 18:35:36 +0200
Subject: [PATCH] Add EcoTaxa-spec'd TSV export to batch running script

---
 .../chaeo/actual_runs/20231008_Bilbao_PA.py   | 54 ++++++++++++++-----
 1 file changed, 40 insertions(+), 14 deletions(-)

diff --git a/extensions/chaeo/actual_runs/20231008_Bilbao_PA.py b/extensions/chaeo/actual_runs/20231008_Bilbao_PA.py
index 4312bccd..3fe547b1 100644
--- a/extensions/chaeo/actual_runs/20231008_Bilbao_PA.py
+++ b/extensions/chaeo/actual_runs/20231008_Bilbao_PA.py
@@ -1,23 +1,46 @@
 from pathlib import Path
 
+import pandas as pd
+
 from extensions.chaeo.util import autonumber_new_directory, get_matching_files, loop_workflow
 from extensions.chaeo.workflows import export_patches_from_multichannel_zstack
 
-from model_server.accessors import CziImageFileAccessor, write_accessor_data_to_file, InMemoryDataAccessor
-from model_server.process import rescale
-
+def write_ecotaxa_tsv(patches_csv_path, where):
+    # import patch output table
+    df_patches = pd.read_csv(patches_csv_path)
+    df_patches['img_file_name'] = df_patches['patch_filename'].apply(lambda x: '2d_patches_annotation/' + x)
 
-def export_single_channel_tif_from_multichannel_czi(input_file_path, output_folder_path, channel, **kwargs):
-    in_acc = CziImageFileAccessor(input_file_path)
-    data = in_acc.get_one_channel_data(channel).data
-    if 'rescale_zmask_clip' in kwargs:
-        data = rescale(data, clip=kwargs['rescale_zmask_clip'])
-    outf = Path(output_folder_path) / (Path(input_file_path).stem + '.tif')
-    write_accessor_data_to_file(
-        outf,
-        InMemoryDataAccessor(data),
+    # make second column index level to comply w/ EcoTaxa schema
+    df_patches.columns = pd.MultiIndex.from_frame(
+        pd.DataFrame([
+            df_patches.columns,
+            df_patches.dtypes.apply(
+                lambda x: '[f]' if x in ['float64'] else '[t]'
+            )
+        ]).T,
+        names=['variable', 'data type']
     )
-    print(f'Wrote file: {outf}')
+
+    # add new columns for ecotaxa
+    df_patches.loc[:, ('acq_instrument', '[t]')] = 'Other microscope'
+    df_patches.loc[:, ('acq_instrument_microscope', '[t]')] = 'EMBL-MS-Zeiss-LSM900'
+    df_patches.loc[:, ('sample_id', '[t]')] = '20231008-bilbao'
+
+    df_patches.loc[:, ('acq_id', '[t]')] = df_patches.loc[:, ('input_file', '[t]')]
+    df_patches.loc[:, ('object_id', '[t]')] = df_patches.loc[:, ('patch_id', '[t]')]
+    df_patches.loc[:, ('process_id', '[t]')] = df_patches.loc[:, ('patch_id', '[t]')]
+
+    cols_to_transfer = [
+        'img_file_name',
+        'object_id',
+        'acq_id',
+        'acq_instrument',
+        'acq_instrument_microscope',
+        'sample_id',
+        'process_id'
+    ]
+    df_export = df_patches.loc[:, pd.IndexSlice[cols_to_transfer, :]]
+    df_export.to_csv(Path(where) / 'ecotaxa.tsv', sep='\t', index=False)
 
 if __name__ == '__main__':
     root = Path('c:/Users/rhodes/projects/proj0012-trec-handoff/owncloud-sync/TREC-HD/Images/')
@@ -48,7 +71,7 @@ if __name__ == '__main__':
         'export_patch_masks': True,
         'rescale_zmask_clip': 0.01,
         'rgb_overlay_channels': (1, None, None),
-        'rgb_overlay_weights': (0.5, 1.0, 1.0)
+        'rgb_overlay_weights': (0.2, 1.0, 1.0)
     }
 
     input_files = get_matching_files(where_czi, 'czi', coord_filter={})
@@ -61,4 +84,7 @@ if __name__ == '__main__':
         catch_and_continue=False,
     )
 
+    csv_path = (Path(where_output) / 'workflow_data.csv').__str__()
+    write_ecotaxa_tsv(csv_path, where_output)
+
     print('Finished')
\ No newline at end of file
-- 
GitLab