From be78a9d9abcbfa51db99cdeac493fbd94d666ff8 Mon Sep 17 00:00:00 2001 From: Christopher Rhodes <christopher.rhodes@embl.de> Date: Sun, 22 Oct 2023 18:35:36 +0200 Subject: [PATCH] Add EcoTaxa-spec'd TSV export to batch running script --- .../chaeo/actual_runs/20231008_Bilbao_PA.py | 54 ++++++++++++++----- 1 file changed, 40 insertions(+), 14 deletions(-) diff --git a/extensions/chaeo/actual_runs/20231008_Bilbao_PA.py b/extensions/chaeo/actual_runs/20231008_Bilbao_PA.py index 4312bccd..3fe547b1 100644 --- a/extensions/chaeo/actual_runs/20231008_Bilbao_PA.py +++ b/extensions/chaeo/actual_runs/20231008_Bilbao_PA.py @@ -1,23 +1,46 @@ from pathlib import Path +import pandas as pd + from extensions.chaeo.util import autonumber_new_directory, get_matching_files, loop_workflow from extensions.chaeo.workflows import export_patches_from_multichannel_zstack -from model_server.accessors import CziImageFileAccessor, write_accessor_data_to_file, InMemoryDataAccessor -from model_server.process import rescale - +def write_ecotaxa_tsv(patches_csv_path, where): + # import patch output table + df_patches = pd.read_csv(patches_csv_path) + df_patches['img_file_name'] = df_patches['patch_filename'].apply(lambda x: '2d_patches_annotation/' + x) -def export_single_channel_tif_from_multichannel_czi(input_file_path, output_folder_path, channel, **kwargs): - in_acc = CziImageFileAccessor(input_file_path) - data = in_acc.get_one_channel_data(channel).data - if 'rescale_zmask_clip' in kwargs: - data = rescale(data, clip=kwargs['rescale_zmask_clip']) - outf = Path(output_folder_path) / (Path(input_file_path).stem + '.tif') - write_accessor_data_to_file( - outf, - InMemoryDataAccessor(data), + # make second column index level to comply w/ EcoTaxa schema + df_patches.columns = pd.MultiIndex.from_frame( + pd.DataFrame([ + df_patches.columns, + df_patches.dtypes.apply( + lambda x: '[f]' if x in ['float64'] else '[t]' + ) + ]).T, + names=['variable', 'data type'] ) - print(f'Wrote file: {outf}') + + # add new columns for ecotaxa + df_patches.loc[:, ('acq_instrument', '[t]')] = 'Other microscope' + df_patches.loc[:, ('acq_instrument_microscope', '[t]')] = 'EMBL-MS-Zeiss-LSM900' + df_patches.loc[:, ('sample_id', '[t]')] = '20231008-bilbao' + + df_patches.loc[:, ('acq_id', '[t]')] = df_patches.loc[:, ('input_file', '[t]')] + df_patches.loc[:, ('object_id', '[t]')] = df_patches.loc[:, ('patch_id', '[t]')] + df_patches.loc[:, ('process_id', '[t]')] = df_patches.loc[:, ('patch_id', '[t]')] + + cols_to_transfer = [ + 'img_file_name', + 'object_id', + 'acq_id', + 'acq_instrument', + 'acq_instrument_microscope', + 'sample_id', + 'process_id' + ] + df_export = df_patches.loc[:, pd.IndexSlice[cols_to_transfer, :]] + df_export.to_csv(Path(where) / 'ecotaxa.tsv', sep='\t', index=False) if __name__ == '__main__': root = Path('c:/Users/rhodes/projects/proj0012-trec-handoff/owncloud-sync/TREC-HD/Images/') @@ -48,7 +71,7 @@ if __name__ == '__main__': 'export_patch_masks': True, 'rescale_zmask_clip': 0.01, 'rgb_overlay_channels': (1, None, None), - 'rgb_overlay_weights': (0.5, 1.0, 1.0) + 'rgb_overlay_weights': (0.2, 1.0, 1.0) } input_files = get_matching_files(where_czi, 'czi', coord_filter={}) @@ -61,4 +84,7 @@ if __name__ == '__main__': catch_and_continue=False, ) + csv_path = (Path(where_output) / 'workflow_data.csv').__str__() + write_ecotaxa_tsv(csv_path, where_output) + print('Finished') \ No newline at end of file -- GitLab