From 38be90ae22bc5bc1fc96b047e369219401a60a29 Mon Sep 17 00:00:00 2001 From: Christopher Rhodes <christopher.rhodes@embl.de> Date: Tue, 7 Nov 2023 11:15:13 +0100 Subject: [PATCH] Batch method for multi-chunk Ecotaxa TSV --- extensions/chaeo/batch_jobs/20231028_Porto_PA.py | 10 +++++++--- extensions/chaeo/ecotaxa.py | 6 +++++- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/extensions/chaeo/batch_jobs/20231028_Porto_PA.py b/extensions/chaeo/batch_jobs/20231028_Porto_PA.py index f41950fd..7e02c856 100644 --- a/extensions/chaeo/batch_jobs/20231028_Porto_PA.py +++ b/extensions/chaeo/batch_jobs/20231028_Porto_PA.py @@ -1,7 +1,7 @@ from pathlib import Path from model_server.util import autonumber_new_directory, get_matching_files, loop_workflow -from extensions.chaeo.ecotaxa import write_ecotaxa_tsv +from extensions.chaeo.ecotaxa import write_ecotaxa_tsv_chunked_subdirectories from extensions.chaeo.workflows import export_patches_from_multichannel_zstack from extensions.ilastik.models import IlastikPixelClassifierModel @@ -51,7 +51,11 @@ if __name__ == '__main__': chunk_size=25, ) - # csv_path = (Path(where_output) / 'workflow_data.csv').__str__() - # write_ecotaxa_tsv(csv_path, where_output, sample_id=sample_id, scope_id='EMBL-MS-Zeiss-LSM900') + write_ecotaxa_tsv_chunked_subdirectories( + where_output, + 'workflow_data.csv', + sample_id=sample_id, + scope_id='EMBL-MS-Zeiss-LSM900' + ) print('Finished') \ No newline at end of file diff --git a/extensions/chaeo/ecotaxa.py b/extensions/chaeo/ecotaxa.py index 12a8bb6d..255552f3 100644 --- a/extensions/chaeo/ecotaxa.py +++ b/extensions/chaeo/ecotaxa.py @@ -38,4 +38,8 @@ def write_ecotaxa_tsv(patches_csv_path: str, where: str, sample_id: str, scope_i 'process_id' ] df_export = df_patches.loc[:, pd.IndexSlice[cols_to_transfer, :]] - df_export.to_csv(Path(where) / 'ecotaxa.tsv', sep='\t', index=False) \ No newline at end of file + df_export.to_csv(Path(where) / 'ecotaxa.tsv', sep='\t', index=False) + +def write_ecotaxa_tsv_chunked_subdirectories(top_dir: str, csv_filename: str, sample_id: str, scope_id: str): + for sd in Path(top_dir).iterdir(): + write_ecotaxa_tsv((sd / csv_filename).__str__(), sd.__str__(), sample_id, scope_id) -- GitLab