From 4bcad216027f9b11b17a11c2a13aaa118a1317b5 Mon Sep 17 00:00:00 2001
From: Christopher Rhodes <christopher.rhodes@embl.de>
Date: Tue, 17 Oct 2023 09:58:07 +0200
Subject: [PATCH] Corrected problem in labels key where total counts per
 annotation were misreported

---
 extensions/chaeo/workflows.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/extensions/chaeo/workflows.py b/extensions/chaeo/workflows.py
index 5ef2ec16..49c114a3 100644
--- a/extensions/chaeo/workflows.py
+++ b/extensions/chaeo/workflows.py
@@ -223,12 +223,12 @@ def transfer_ecotaxa_labels_to_patch_stacks(
 
     # export annotation classes and their summary stats
     df_tr, df_te = train_test_split(df_pl, train_size=tr_split)
-    df_labels['counts'] = df_pl['annotation_class_id'].value_counts()
+    # df_labels['counts'] = df_pl['annotation_class_id'].value_counts()
     df_labels = pd.merge(
         df_labels,
         pd.DataFrame(
-            [df_tr.annotation_class_id.value_counts(), df_te.annotation_class_id.value_counts()],
-            index=['to_train', 'to_test']
+            [df_pl.annotation_class_id.value_counts(), df_tr.annotation_class_id.value_counts(), df_te.annotation_class_id.value_counts()],
+            index=['total', 'to_train', 'to_test']
         ).T,
         left_on='annotation_class_id',
         right_index=True,
@@ -236,7 +236,7 @@ def transfer_ecotaxa_labels_to_patch_stacks(
     )
     df_labels.loc[df_labels.to_train.isna(), 'to_train'] = 0
     df_labels.loc[df_labels.to_test.isna(), 'to_test'] = 0
-    for col in ['to_train', 'to_test', 'counts']:
+    for col in ['total', 'to_train', 'to_test']:
         df_labels.loc[df_labels[col].isna(), col] = 0
     df_labels.to_csv(Path(where_output) / 'labels_key.csv', index=False)
 
-- 
GitLab