Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
P
platy-browser-data
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Christian Tischer
platy-browser-data
Commits
b75430bc
Commit
b75430bc
authored
5 years ago
by
Valentyna Zinchenko
Browse files
Options
Downloads
Patches
Plain Diff
script to write gene table
parent
80c1e89b
No related branches found
Branches containing commit
No related tags found
Tags containing commit
1 merge request
!2
Merge versioning and scripts into master
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
scripts/attributes/genes.py
+92
-0
92 additions, 0 deletions
scripts/attributes/genes.py
with
92 additions
and
0 deletions
scripts/attributes/genes.py
0 → 100755
+
92
−
0
View file @
b75430bc
import
os
import
csv
import
h5py
import
numpy
as
np
from
vigra.analysis
import
extractRegionFeatures
from
vigra.sampling
import
resize
def
get_bbs
(
data
):
shape
=
data
.
shape
num_cells
=
(
np
.
max
(
data
)).
astype
(
'
int
'
)
+
1
cells_bbs
=
[[]
for
i
in
range
(
num_cells
)]
mins_and_maxs
=
extractRegionFeatures
(
data
.
astype
(
'
float32
'
),
data
.
astype
(
'
uint32
'
),
features
=
[
'
Coord<Maximum >
'
,
'
Coord<Minimum >
'
])
mins
=
mins_and_maxs
[
'
Coord<Minimum >
'
].
astype
(
'
uint32
'
)
maxs
=
mins_and_maxs
[
'
Coord<Maximum >
'
].
astype
(
'
uint32
'
)
+
1
for
cell
in
range
(
num_cells
):
cell_bb
=
[]
cell_min
=
mins
[
cell
]
cell_max
=
maxs
[
cell
]
for
axis
in
range
(
3
):
cell_bb
.
append
(
slice
(
cell_min
[
axis
],
cell_max
[
axis
]))
cells_bbs
[
cell
]
=
tuple
(
cell_bb
)
return
cells_bbs
def
get_cell_sizes
(
data
):
max_label
=
(
np
.
max
(
data
)).
astype
(
'
uint32
'
)
cell_sizes
=
[
0
]
*
(
max_label
+
1
)
Z
,
X
,
Y
=
data
.
shape
for
z
in
range
(
Z
):
for
x
in
range
(
X
):
for
y
in
range
(
Y
):
label
=
data
[
z
,
x
,
y
]
cell_sizes
[
label
]
+=
1
cell_sizes
=
np
.
array
(
cell_sizes
)
return
cell_sizes
def
get_cell_expression
(
segm_data
,
all_genes
):
num_genes
=
all_genes
.
shape
[
0
]
labels
=
list
(
np
.
unique
(
segm_data
))
cells_expression
=
np
.
zeros
((
len
(
labels
),
num_genes
),
dtype
=
'
float32
'
)
cell_sizes
=
get_cell_sizes
(
segm_data
)
cell_bbs
=
get_bbs
(
segm_data
)
for
cell_idx
in
range
(
len
(
labels
)):
cell_label
=
labels
[
cell_idx
]
if
cell_label
==
0
:
continue
cell_size
=
cell_sizes
[
cell_label
]
bb
=
cell_bbs
[
cell_label
]
cell_masked
=
(
segm_data
[
bb
]
==
cell_label
)
genes_in_cell
=
all_genes
[
tuple
([
slice
(
0
,
None
),]
+
list
(
bb
))]
for
gene
in
range
(
num_genes
):
gene_expr
=
genes_in_cell
[
gene
]
try
:
gene_expr_sum
=
np
.
sum
(
gene_expr
[
cell_masked
]
>
0
)
except
:
print
(
gene_expr
.
shape
,
cell_masked
.
shape
)
cells_expression
[
cell_idx
,
gene
]
=
gene_expr_sum
/
cell_size
return
labels
,
cells_expression
def
write_genes_table
(
segm_file
,
genes_file
,
table_file
,
labels
):
DSET
=
'
t00000/s00/4/cells
'
NEW_SHAPE
=
(
570
,
518
,
550
)
GENES_DSET
=
'
genes
'
NAMES_DSET
=
'
gene_names
'
genes_table_file
=
os
.
path
.
splitext
(
table_file
)[
0
]
+
"
_genes2
"
+
os
.
path
.
splitext
(
table_file
)[
1
]
with
h5py
.
File
(
segm_file
,
'
r
'
)
as
f
:
segment_data
=
f
[
DSET
][:]
with
h5py
.
File
(
genes_file
,
'
r
'
)
as
f
:
all_genes
=
f
[
GENES_DSET
][:]
gene_names
=
[
i
.
decode
(
'
utf-8
'
)
for
i
in
f
[
NAMES_DSET
]]
num_genes
=
len
(
gene_names
)
downsampled_data
=
resize
(
segment_data
.
astype
(
"
float32
"
),
shape
=
NEW_SHAPE
,
order
=
0
).
astype
(
'
uint16
'
)
avail_labels
,
expression
=
get_cell_expression
(
downsampled_data
,
all_genes
)
with
open
(
genes_table_file
,
'
w
'
)
as
genes_table
:
csv_writer
=
csv
.
writer
(
genes_table
,
delimiter
=
'
\t
'
)
_
=
csv_writer
.
writerow
([
'
label_id
'
,]
+
gene_names
)
for
label
in
labels
:
if
label
in
avail_labels
:
idx
=
avail_labels
.
index
(
label
)
_
=
csv_writer
.
writerow
([
label
,
]
+
list
(
expression
[
idx
]))
else
:
_
=
csv_writer
.
writerow
([
label
,
]
+
[
0
]
*
num_genes
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment