Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
model_server
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Christopher Randolph Rhodes
model_server
Commits
cd2fe9b8
Commit
cd2fe9b8
authored
1 year ago
by
Christopher Randolph Rhodes
Browse files
Options
Downloads
Patches
Plain Diff
Moved more of batch runner into utility methods
parent
7695e1f0
No related branches found
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
extensions/chaeo/examples/batch_run_patches.py
+28
-49
28 additions, 49 deletions
extensions/chaeo/examples/batch_run_patches.py
extensions/chaeo/util.py
+56
-3
56 additions, 3 deletions
extensions/chaeo/util.py
extensions/chaeo/workflows.py
+39
-1
39 additions, 1 deletion
extensions/chaeo/workflows.py
with
123 additions
and
53 deletions
extensions/chaeo/examples/batch_run_patches.py
+
28
−
49
View file @
cd2fe9b8
from
pathlib
import
Path
import
re
# from time import localtime, strftime
import
pandas
as
pd
from
extensions.chaeo.util
import
autonumber_new_directory
,
get_matching_files
from
extensions.chaeo.util
import
autonumber_new_directory
,
get_matching_files
,
loop_workflow
from
extensions.chaeo.workflows
import
export_patches_from_multichannel_zstack
from
model_server.accessors
import
InMemoryDataAccessor
,
write_accessor_data_to_file
if
__name__
==
'
__main__
'
:
where_czi
=
'
z:/rhodes/projects/proj0004-marine-photoactivation/data/exp0038/AutoMic/20230906-163415/Selection
'
...
...
@@ -17,48 +11,33 @@ if __name__ == '__main__':
'
batch-output
'
)
csv_args
=
{
'
mode
'
:
'
w
'
,
'
header
'
:
True
}
# when creating file
px_ilp
=
Path
.
home
()
/
'
model-server
'
/
'
ilastik
'
/
'
AF405-bodies_boundaries.ilp
'
#TODO: try/catch blocks and error handling around workflow calls
#TODO: pack JSON-serializable workflow inputs
input_files
=
get_matching_files
(
where_czi
,
'
czi
'
,
coord_filter
=
{
'
P
'
:
(
0
,
10
)})
for
ff
in
input_files
:
export_kwargs
=
{
'
input_zstack_path
'
:
Path
(
where_czi
)
/
ff
.
__str__
(),
'
ilastik_project_file
'
:
px_ilp
.
__str__
(),
'
pxmap_threshold
'
:
0.25
,
'
pixel_class
'
:
0
,
'
zmask_channel
'
:
0
,
'
patches_channel
'
:
4
,
'
where_output
'
:
where_output
,
'
mask_type
'
:
'
boxes
'
,
'
zmask_filters
'
:
{
'
area
'
:
(
1e3
,
1e8
)},
'
zmask_expand_box_by
'
:
(
128
,
3
),
'
export_pixel_probabilities
'
:
False
,
'
export_2d_patches_for_training
'
:
True
,
'
export_2d_patches_for_annotation
'
:
False
,
'
export_3d_patches
'
:
False
,
'
export_annotated_zstack
'
:
False
,
'
export_patch_masks
'
:
False
,
'
export_patch_label_maps
'
:
True
,
}
result
=
export_patches_from_multichannel_zstack
(
**
export_kwargs
)
# parse and record results
df
=
result
[
'
dataframe
'
]
df
[
'
source_path
'
]
=
ff
df
.
to_csv
(
where_output
/
'
df_objects.csv
'
,
index
=
False
,
**
csv_args
)
pd
.
DataFrame
(
result
[
'
timer_results
'
],
index
=
[
0
]).
to_csv
(
where_output
/
'
timer_results.csv
'
,
**
csv_args
)
pd
.
json_normalize
(
export_kwargs
).
to_csv
(
where_output
/
'
workflow_params.csv
'
,
**
csv_args
)
csv_args
=
{
'
mode
'
:
'
a
'
,
'
header
'
:
False
}
# append to CSV from here on
params
=
{
'
ilastik_project_file
'
:
px_ilp
.
__str__
(),
'
pxmap_threshold
'
:
0.25
,
'
pixel_class
'
:
0
,
'
zmask_channel
'
:
0
,
'
patches_channel
'
:
4
,
'
mask_type
'
:
'
boxes
'
,
'
zmask_filters
'
:
{
'
area
'
:
(
1e3
,
1e8
)},
'
zmask_expand_box_by
'
:
(
128
,
3
),
'
export_pixel_probabilities
'
:
False
,
'
export_2d_patches_for_training
'
:
True
,
'
export_2d_patches_for_annotation
'
:
False
,
'
export_3d_patches
'
:
False
,
'
export_annotated_zstack
'
:
False
,
'
export_patch_masks
'
:
False
,
'
export_patch_label_maps
'
:
True
,
}
input_files
=
get_matching_files
(
where_czi
,
'
czi
'
,
coord_filter
=
{
'
P
'
:
(
0
,
10
)},
)
loop_workflow
(
input_files
,
where_output
,
export_patches_from_multichannel_zstack
,
params
,
)
# export intermediate data if flagged
for
k
in
result
[
'
interm
'
].
keys
():
write_accessor_data_to_file
(
where_output
/
k
/
(
ff
.
stem
+
'
.tif
'
),
InMemoryDataAccessor
(
result
[
'
interm
'
][
k
])
)
\ No newline at end of file
print
(
'
Finished
'
)
\ No newline at end of file
This diff is collapsed.
Click to expand it.
extensions/chaeo/util.py
+
56
−
3
View file @
cd2fe9b8
...
...
@@ -2,6 +2,10 @@ from pathlib import Path
import
re
from
time
import
localtime
,
strftime
import
pandas
as
pd
from
model_server.accessors
import
InMemoryDataAccessor
,
write_accessor_data_to_file
def
autonumber_new_directory
(
where
:
str
,
prefix
:
str
)
->
str
:
yyyymmdd
=
strftime
(
'
%Y%m%d
'
,
localtime
())
...
...
@@ -10,8 +14,9 @@ def autonumber_new_directory(where: str, prefix: str) -> str:
ma
=
re
.
match
(
f
'
{
prefix
}
-
{
yyyymmdd
}
-([\d]+)
'
,
ff
.
name
)
if
ma
:
idx
=
max
(
idx
,
int
(
ma
.
groups
()[
0
])
+
1
)
return
(
Path
(
where
)
/
f
'
batch-output-
{
yyyymmdd
}
-
{
idx
:
04
d
}
'
).
__str__
()
new_path
=
(
Path
(
where
)
/
f
'
batch-output-
{
yyyymmdd
}
-
{
idx
:
04
d
}
'
)
new_path
.
mkdir
(
parents
=
True
,
exist_ok
=
False
)
return
new_path
.
__str__
()
def
get_matching_files
(
where
:
str
,
ext
:
str
,
coord_filter
:
dict
=
{})
->
str
:
files
=
[]
...
...
@@ -33,4 +38,52 @@ def get_matching_files(where: str, ext: str, coord_filter: dict={}) -> str:
if
is_filtered_out
(
ff
):
continue
files
.
append
(
ff
.
__str__
())
return
files
\ No newline at end of file
return
files
def
loop_workflow
(
files
,
where_output
,
workflow_func
,
params
,
write_intermediate_products
=
True
):
failures
=
[]
for
ii
,
ff
in
enumerate
(
files
):
export_kwargs
=
{
'
input_zstack_path
'
:
ff
,
'
where_output
'
:
where_output
,
**
params
,
}
# record failure information
try
:
result
=
workflow_func
(
**
export_kwargs
)
except
Exception
as
e
:
failures
.
append
({
'
input_file
'
:
ff
,
'
error_message
'
:
e
.
__str__
(),
})
print
(
f
'
Caught failure on
{
ff
}
:
\n
{
e
.
__str__
()
}
'
)
continue
# record dataframes associated with workflow results
batch_csv
=
{
'
workflow_data
'
:
result
[
'
dataframe
'
],
'
timer_results
'
:
pd
.
DataFrame
(
result
[
'
timer_results
'
],
index
=
[
0
]),
'
workflow_parameters
'
:
pd
.
json_normalize
(
export_kwargs
),
}
for
k
in
batch_csv
.
keys
():
df
=
batch_csv
[
k
]
df
[
'
input_file
'
]
=
ff
if
ii
==
0
:
csv_args
=
{
'
mode
'
:
'
w
'
,
'
header
'
:
True
}
else
:
# append to existing file
csv_args
=
{
'
mode
'
:
'
a
'
,
'
header
'
:
False
}
csv_path
=
Path
(
where_output
)
/
f
'
{
k
}
.csv
'
df
.
to_csv
(
csv_path
,
index
=
False
,
**
csv_args
)
# export intermediate data if flagged
if
write_intermediate_products
:
for
k
in
result
[
'
interm
'
].
keys
():
write_accessor_data_to_file
(
Path
(
where_output
)
/
k
/
(
Path
(
ff
).
stem
+
'
.tif
'
),
InMemoryDataAccessor
(
result
[
'
interm
'
][
k
])
)
pd
.
DataFrame
(
failures
).
to_csv
(
Path
(
where_output
)
/
'
failures.csv
'
)
\ No newline at end of file
This diff is collapsed.
Click to expand it.
extensions/chaeo/workflows.py
+
39
−
1
View file @
cd2fe9b8
...
...
@@ -12,6 +12,7 @@ from model_server.accessors import generate_file_accessor, InMemoryDataAccessor,
from
model_server.workflows
import
Timer
# TODO: unpack and validate inputs
# TODO: expose channel indices and color balance vectors to caller
def
export_patches_from_multichannel_zstack
(
input_zstack_path
:
str
,
ilastik_project_file
:
str
,
...
...
@@ -106,6 +107,23 @@ def export_patches_from_multichannel_zstack(
# prepopulate patch UUID
df
[
'
patch_id
'
]
=
df
.
apply
(
lambda
_
:
uuid4
(),
axis
=
1
)
if
export_2d_patches_for_training
:
files
=
export_multichannel_patches_from_zstack
(
Path
(
where_output
)
/
'
2d_patches
'
,
stack
.
get_one_channel_data
(
4
),
zmask_meta
,
prefix
=
fstem
,
rescale_clip
=
0.001
,
make_3d
=
False
,
focus_metric
=
'
max_sobel
'
,
)
df_patches
=
pd
.
DataFrame
(
files
)
ti
.
click
(
'
export_2d_patches
'
)
# associate 2d patches, dropping labeled objects that were not exported as patches
df
=
pd
.
merge
(
df
,
df_patches
,
left_index
=
True
,
right_on
=
'
df_index
'
).
drop
(
columns
=
'
df_index
'
)
# prepopulate patch UUID
df
[
'
patch_id
'
]
=
df
.
apply
(
lambda
_
:
uuid4
(),
axis
=
1
)
if
export_patch_masks
:
files
=
export_patch_masks_from_zstack
(
Path
(
where_output
)
/
'
patch_masks
'
,
...
...
@@ -143,4 +161,24 @@ def export_patches_from_multichannel_zstack(
'
timer_results
'
:
ti
.
events
,
'
dataframe
'
:
df
,
'
interm
'
:
interm
,
}
\ No newline at end of file
}
def
transfer_ecotaxa_labels_to_patch_object_maps
(
path_to_patches
:
str
,
path_to_ecotaxa_tsv
:
str
,
path_output
:
str
,
)
->
Dict
:
where_patches
=
Path
(
path_to_patches
)
df_meta
=
pd
.
read_csv
(
path_to_ecotaxa_tsv
,
sep
=
'
\t
'
,
header
=
[
0
,
1
],
dtype
=
{
(
'
object_annotation_date
'
,
'
[t]
'
):
str
,
(
'
object_annotation_time
'
,
'
[t]
'
):
str
,
(
'
object_annotation_category_id
'
,
'
[t]
'
):
str
,
}
)
for
pp
in
where_patches
.
iterdir
():
patch
=
generate_file_accessor
(
pp
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment