Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
SVLT
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
ALMF
SVLT
Commits
da091a5d
Commit
da091a5d
authored
1 year ago
by
Christopher Randolph Rhodes
Browse files
Options
Downloads
Patches
Plain Diff
Implemented training-test split in label mask export
parent
05af7b71
No related branches found
Branches containing commit
No related tags found
Tags containing commit
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
extensions/chaeo/examples/label_patches.py
+1
-0
1 addition, 0 deletions
extensions/chaeo/examples/label_patches.py
extensions/chaeo/workflows.py
+29
-8
29 additions, 8 deletions
extensions/chaeo/workflows.py
with
30 additions
and
8 deletions
extensions/chaeo/examples/label_patches.py
+
1
−
0
View file @
da091a5d
...
...
@@ -11,3 +11,4 @@ if __name__ == '__main__':
ecotaxa_tsv
=
'
c:/Users/rhodes/projects/proj0011-plankton-seg/exp0013/ecotaxa_export_10468_20231012_0930.tsv
'
,
where_output
=
autonumber_new_directory
(
root
,
'
labeled_patches
'
)
)
print
(
'
Finished
'
)
This diff is collapsed.
Click to expand it.
extensions/chaeo/workflows.py
+
29
−
8
View file @
da091a5d
...
...
@@ -4,6 +4,7 @@ from uuid import uuid4
import
numpy
as
np
import
pandas
as
pd
from
sklearn.model_selection
import
train_test_split
from
extensions.ilastik.models
import
IlastikPixelClassifierModel
from
extensions.chaeo.annotators
import
draw_boxes_on_3d_image
...
...
@@ -173,7 +174,11 @@ def transfer_ecotaxa_labels_to_patch_stacks(
ecotaxa_tsv
:
str
,
where_output
:
str
,
patch_size
:
tuple
=
(
256
,
256
),
tr_split
=
0.6
,
)
->
Dict
:
assert
tr_split
>
0.5
# reduce chance that low-probability objects are omitted from training
# read patch metadata
df_obj
=
pd
.
read_csv
(
object_csv
,
)
...
...
@@ -188,6 +193,8 @@ def transfer_ecotaxa_labels_to_patch_stacks(
}
)
df_merge
=
pd
.
merge
(
df_obj
,
df_ecotaxa
,
left_on
=
'
patch_id
'
,
right_on
=
'
object_id
'
)
# assign each unique lowest-level annotation to a class index
se_unique
=
pd
.
Series
(
df_merge
.
object_annotation_hierarchy
.
unique
()
)
...
...
@@ -202,6 +209,7 @@ def transfer_ecotaxa_labels_to_patch_stacks(
'
annotation_class
'
:
df_split
.
loc
[:,
1
].
str
.
lower
()
})
# join patch filenames and annotation classes
df_pf
=
pd
.
merge
(
df_merge
[[
'
patch_filename
'
,
'
object_annotation_hierarchy
'
]],
df_labels
,
...
...
@@ -210,13 +218,28 @@ def transfer_ecotaxa_labels_to_patch_stacks(
)
df_pl
=
df_pf
[
df_pf
[
'
object_annotation_hierarchy
'
].
notnull
()]
zstack
=
np
.
zeros
((
*
patch_size
,
1
,
len
(
df_pl
)),
dtype
=
'
uint8
'
)
# export annotation classes and their summary stats
df_tr
,
df_te
=
train_test_split
(
df_pl
,
train_size
=
tr_split
)
df_labels
[
'
counts
'
]
=
df_pl
[
'
annotation_class_id
'
].
value_counts
()
df_labels
.
to_csv
(
Path
(
where_output
)
/
'
labels_key.csv
'
)
# export patches as z-stack
for
fi
,
pl
in
enumerate
(
df_pl
.
itertuples
(
name
=
'
PatchFile
'
)):
df_labels
=
pd
.
merge
(
df_labels
,
pd
.
DataFrame
(
[
df_tr
.
annotation_class_id
.
value_counts
(),
df_te
.
annotation_class_id
.
value_counts
()],
index
=
[
'
to_train
'
,
'
to_test
'
]
).
T
,
left_on
=
'
annotation_class_id
'
,
right_index
=
True
,
how
=
'
outer
'
)
df_labels
.
loc
[
df_labels
.
to_train
.
isna
(),
'
to_train
'
]
=
0
df_labels
.
loc
[
df_labels
.
to_test
.
isna
(),
'
to_test
'
]
=
0
for
col
in
[
'
to_train
'
,
'
to_test
'
,
'
counts
'
]:
df_labels
.
loc
[
df_labels
[
col
].
isna
(),
col
]
=
0
df_labels
.
to_csv
(
Path
(
where_output
)
/
'
labels_key.csv
'
,
index
=
False
)
# export patches as a single z-stack
zstack
=
np
.
zeros
((
*
patch_size
,
1
,
len
(
df_tr
)),
dtype
=
'
uint8
'
)
for
fi
,
pl
in
enumerate
(
df_tr
.
itertuples
(
name
=
'
PatchFile
'
)):
fn
=
pl
.
_asdict
()[
'
patch_filename
'
]
ac
=
pl
.
_asdict
()[
'
annotation_class_id
'
]
acc_bm
=
generate_file_accessor
(
Path
(
where_masks
)
/
fn
)
...
...
@@ -224,8 +247,6 @@ def transfer_ecotaxa_labels_to_patch_stacks(
assert
acc_bm
.
chroma
==
1
assert
acc_bm
.
nz
==
1
zstack
[:,
:,
0
,
fi
]
=
(
acc_bm
.
data
[:,
:,
0
,
0
]
==
255
)
*
ac
# export masks as z-stack
write_accessor_data_to_file
(
Path
(
where_output
)
/
'
zstack_object_label.tif
'
,
InMemoryDataAccessor
(
zstack
))
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment