diff --git a/extensions/chaeo/workflows.py b/extensions/chaeo/workflows.py index 5ef2ec1601cfb57840afd161713422c9e25a3513..49c114a3d13f6e7c68aa47b602a539ca29871280 100644 --- a/extensions/chaeo/workflows.py +++ b/extensions/chaeo/workflows.py @@ -223,12 +223,12 @@ def transfer_ecotaxa_labels_to_patch_stacks( # export annotation classes and their summary stats df_tr, df_te = train_test_split(df_pl, train_size=tr_split) - df_labels['counts'] = df_pl['annotation_class_id'].value_counts() + # df_labels['counts'] = df_pl['annotation_class_id'].value_counts() df_labels = pd.merge( df_labels, pd.DataFrame( - [df_tr.annotation_class_id.value_counts(), df_te.annotation_class_id.value_counts()], - index=['to_train', 'to_test'] + [df_pl.annotation_class_id.value_counts(), df_tr.annotation_class_id.value_counts(), df_te.annotation_class_id.value_counts()], + index=['total', 'to_train', 'to_test'] ).T, left_on='annotation_class_id', right_index=True, @@ -236,7 +236,7 @@ def transfer_ecotaxa_labels_to_patch_stacks( ) df_labels.loc[df_labels.to_train.isna(), 'to_train'] = 0 df_labels.loc[df_labels.to_test.isna(), 'to_test'] = 0 - for col in ['to_train', 'to_test', 'counts']: + for col in ['total', 'to_train', 'to_test']: df_labels.loc[df_labels[col].isna(), col] = 0 df_labels.to_csv(Path(where_output) / 'labels_key.csv', index=False)