from pathlib import Path import pandas as pd def write_ecotaxa_tsv(patches_csv_path: str, where: str, sample_id: str, scope_id: str): # import patch output table df_patches = pd.read_csv(patches_csv_path) df_patches['img_file_name'] = df_patches['patch_filename'].apply(lambda x: '2d_patches_annotation/' + x) # make second column index level to comply w/ EcoTaxa schema df_patches.columns = pd.MultiIndex.from_frame( pd.DataFrame([ df_patches.columns, df_patches.dtypes.apply( lambda x: '[f]' if x in ['float64'] else '[t]' ) ]).T, names=['variable', 'data type'] ) # add new columns for ecotaxa df_patches.loc[:, ('acq_instrument', '[t]')] = 'Other microscope' df_patches.loc[:, ('acq_instrument_microscope', '[t]')] = scope_id df_patches.loc[:, ('sample_id', '[t]')] = sample_id df_patches.loc[:, ('acq_id', '[t]')] = df_patches.loc[:, ('input_file', '[t]')] df_patches.loc[:, ('object_id', '[t]')] = df_patches.loc[:, ('patch_id', '[t]')] df_patches.loc[:, ('process_id', '[t]')] = df_patches.loc[:, ('patch_id', '[t]')] cols_to_transfer = [ 'img_file_name', 'object_id', 'acq_id', 'acq_instrument', 'acq_instrument_microscope', 'sample_id', 'process_id' ] df_export = df_patches.loc[:, pd.IndexSlice[cols_to_transfer, :]] df_export.to_csv(Path(where) / 'ecotaxa.tsv', sep='\t', index=False)