From 38be90ae22bc5bc1fc96b047e369219401a60a29 Mon Sep 17 00:00:00 2001
From: Christopher Rhodes <christopher.rhodes@embl.de>
Date: Tue, 7 Nov 2023 11:15:13 +0100
Subject: [PATCH] Batch method for multi-chunk Ecotaxa TSV

---
 extensions/chaeo/batch_jobs/20231028_Porto_PA.py | 10 +++++++---
 extensions/chaeo/ecotaxa.py                      |  6 +++++-
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/extensions/chaeo/batch_jobs/20231028_Porto_PA.py b/extensions/chaeo/batch_jobs/20231028_Porto_PA.py
index f41950fd..7e02c856 100644
--- a/extensions/chaeo/batch_jobs/20231028_Porto_PA.py
+++ b/extensions/chaeo/batch_jobs/20231028_Porto_PA.py
@@ -1,7 +1,7 @@
 from pathlib import Path
 
 from model_server.util import autonumber_new_directory, get_matching_files, loop_workflow
-from extensions.chaeo.ecotaxa import write_ecotaxa_tsv
+from extensions.chaeo.ecotaxa import write_ecotaxa_tsv_chunked_subdirectories
 from extensions.chaeo.workflows import export_patches_from_multichannel_zstack
 from extensions.ilastik.models import IlastikPixelClassifierModel
 
@@ -51,7 +51,11 @@ if __name__ == '__main__':
         chunk_size=25,
     )
 
-    # csv_path = (Path(where_output) / 'workflow_data.csv').__str__()
-    # write_ecotaxa_tsv(csv_path, where_output, sample_id=sample_id, scope_id='EMBL-MS-Zeiss-LSM900')
+    write_ecotaxa_tsv_chunked_subdirectories(
+        where_output,
+        'workflow_data.csv',
+        sample_id=sample_id,
+        scope_id='EMBL-MS-Zeiss-LSM900'
+    )
 
     print('Finished')
\ No newline at end of file
diff --git a/extensions/chaeo/ecotaxa.py b/extensions/chaeo/ecotaxa.py
index 12a8bb6d..255552f3 100644
--- a/extensions/chaeo/ecotaxa.py
+++ b/extensions/chaeo/ecotaxa.py
@@ -38,4 +38,8 @@ def write_ecotaxa_tsv(patches_csv_path: str, where: str, sample_id: str, scope_i
         'process_id'
     ]
     df_export = df_patches.loc[:, pd.IndexSlice[cols_to_transfer, :]]
-    df_export.to_csv(Path(where) / 'ecotaxa.tsv', sep='\t', index=False)
\ No newline at end of file
+    df_export.to_csv(Path(where) / 'ecotaxa.tsv', sep='\t', index=False)
+
+def write_ecotaxa_tsv_chunked_subdirectories(top_dir: str, csv_filename: str, sample_id: str, scope_id: str):
+    for sd in Path(top_dir).iterdir():
+        write_ecotaxa_tsv((sd / csv_filename).__str__(), sd.__str__(), sample_id, scope_id)
-- 
GitLab