-
Christopher Randolph Rhodes authored
Corrected argument-forwarding error; set up job for porto data; split off ecotaxa TSV generation into its own module
Christopher Randolph Rhodes authoredCorrected argument-forwarding error; set up job for porto data; split off ecotaxa TSV generation into its own module
ecotaxa.py 1.47 KiB
from pathlib import Path
import pandas as pd
def write_ecotaxa_tsv(patches_csv_path: str, where: str, sample_id: str, scope_id: str):
# import patch output table
df_patches = pd.read_csv(patches_csv_path)
df_patches['img_file_name'] = df_patches['patch_filename'].apply(lambda x: '2d_patches_annotation/' + x)
# make second column index level to comply w/ EcoTaxa schema
df_patches.columns = pd.MultiIndex.from_frame(
pd.DataFrame([
df_patches.columns,
df_patches.dtypes.apply(
lambda x: '[f]' if x in ['float64'] else '[t]'
)
]).T,
names=['variable', 'data type']
)
# add new columns for ecotaxa
df_patches.loc[:, ('acq_instrument', '[t]')] = 'Other microscope'
df_patches.loc[:, ('acq_instrument_microscope', '[t]')] = scope_id
df_patches.loc[:, ('sample_id', '[t]')] = sample_id
df_patches.loc[:, ('acq_id', '[t]')] = df_patches.loc[:, ('input_file', '[t]')]
df_patches.loc[:, ('object_id', '[t]')] = df_patches.loc[:, ('patch_id', '[t]')]
df_patches.loc[:, ('process_id', '[t]')] = df_patches.loc[:, ('patch_id', '[t]')]
cols_to_transfer = [
'img_file_name',
'object_id',
'acq_id',
'acq_instrument',
'acq_instrument_microscope',
'sample_id',
'process_id'
]
df_export = df_patches.loc[:, pd.IndexSlice[cols_to_transfer, :]]
df_export.to_csv(Path(where) / 'ecotaxa.tsv', sep='\t', index=False)