Skip to content
Snippets Groups Projects
Commit f2f6846d authored by Constantin Pape's avatar Constantin Pape
Browse files

Prepare for folder migration

parent af87a1bd
No related branches found
No related tags found
1 merge request!11New storage layout
...@@ -29,8 +29,6 @@ def copy_file(xml_in, xml_out): ...@@ -29,8 +29,6 @@ def copy_file(xml_in, xml_out):
copy_xml_with_newpath(xml_in, xml_out, h5path, path_type='relative') copy_xml_with_newpath(xml_in, xml_out, h5path, path_type='relative')
# For now we put symlinks with relative paths, but I am not sure
# if this is the best idea, because I don't know if it will work on windows
def copy_tables(src_folder, dst_folder, name): def copy_tables(src_folder, dst_folder, name):
table_in = os.path.join(src_folder, 'tables', name) table_in = os.path.join(src_folder, 'tables', name)
table_out = os.path.join(dst_folder, 'tables', name) table_out = os.path.join(dst_folder, 'tables', name)
...@@ -45,9 +43,6 @@ def copy_tables(src_folder, dst_folder, name): ...@@ -45,9 +43,6 @@ def copy_tables(src_folder, dst_folder, name):
make_squashed_link(src_file, dst_file) make_squashed_link(src_file, dst_file)
# rel_path = os.path.relpath(src_file, table_out)
# if not os.path.exists(dst_file):
# os.symlink(rel_path, dst_file)
# write the txt file for additional tables # write the txt file for additional tables
write_additional_table_file(table_out) write_additional_table_file(table_out)
......
...@@ -2,8 +2,10 @@ import os ...@@ -2,8 +2,10 @@ import os
import json import json
import shutil import shutil
from glob import glob from glob import glob
from mmpb.files.name_lookup import look_up_filename, get_image_properties from mmpb.files.name_lookup import (look_up_filename, get_image_properties,
DYNAMIC_SEGMENTATIONS, get_dynamic_segmentation_properties)
from mmpb.files.xml_utils import get_h5_path_from_xml, copy_xml_with_newpath from mmpb.files.xml_utils import get_h5_path_from_xml, copy_xml_with_newpath
from mmpb.files.copy_helper import make_squashed_link
ROOT = '/g/arendt/EM_6dpf_segmentation/platy-browser-data/data' ROOT = '/g/arendt/EM_6dpf_segmentation/platy-browser-data/data'
DRY_RUN = True DRY_RUN = True
...@@ -25,8 +27,6 @@ def new_folder_structure(folder): ...@@ -25,8 +27,6 @@ def new_folder_structure(folder):
def move_image_file(image_folder, xml_path): def move_image_file(image_folder, xml_path):
name = os.path.splitext(os.path.split(xml_path)[1])[0] name = os.path.splitext(os.path.split(xml_path)[1])[0]
new_name = look_up_filename(name) new_name = look_up_filename(name)
if new_name is None:
new_name = name
# get the linked hdf5 path # get the linked hdf5 path
image_path = get_h5_path_from_xml(xml_path, return_absolute_path=True) image_path = get_h5_path_from_xml(xml_path, return_absolute_path=True)
...@@ -39,7 +39,7 @@ def move_image_file(image_folder, xml_path): ...@@ -39,7 +39,7 @@ def move_image_file(image_folder, xml_path):
shutil.move(xml_path, new_xml_path) shutil.move(xml_path, new_xml_path)
# if the hdf5 file is in the same folder, move it to 'images/local' as well # if the hdf5 file is in the same folder, move it to 'images/local' as well
h5_is_local = len(os.relpath(image_path, os.path.split(xml_path)[0]).split('/')) > 1 h5_is_local = len(os.path.relpath(image_path, os.path.split(xml_path)[0]).split('/')) == 1
if h5_is_local: if h5_is_local:
new_image_path = os.path.join(image_folder, 'local', new_name + '.h5') new_image_path = os.path.join(image_folder, 'local', new_name + '.h5')
if DRY_RUN: if DRY_RUN:
...@@ -64,8 +64,7 @@ def move_image_file(image_folder, xml_path): ...@@ -64,8 +64,7 @@ def move_image_file(image_folder, xml_path):
# set path in xml # set path in xml
copy_xml_with_newpath(new_xml_path, new_xml_path, new_rel_data_path) copy_xml_with_newpath(new_xml_path, new_xml_path, new_rel_data_path)
# update the h5 path in the new xml return new_name
return {new_name: get_image_properties(new_name)}
def update_image_dict(image_folder, image_dict): def update_image_dict(image_folder, image_dict):
...@@ -84,46 +83,105 @@ def update_image_data(folder): ...@@ -84,46 +83,105 @@ def update_image_data(folder):
xmls = glob(os.path.join(image_folder, "*.xml")) xmls = glob(os.path.join(image_folder, "*.xml"))
for xml in xmls: for xml in xmls:
image_properties = move_image_file(image_folder, xml) name = move_image_file(image_folder, xml)
image_dict.update(image_properties) image_dict.update({name: get_image_properties(name)})
if DRY_RUN: if DRY_RUN:
print("New image dict:") print("New image dict:")
print(image_dict) # print(image_dict)
else: else:
update_image_dict(image_folder, image_dict) update_image_dict(image_folder, image_dict)
# rename the table folders correctly
# fix links to account for the updated names
def update_tables(folder):
table_folder = os.path.join(folder, 'tables')
tables = os.listdir(table_folder)
for name in tables:
new_name = look_up_filename(name)
path, new_path = os.path.join(table_folder, name), os.path.join(table_folder, new_name)
if DRY_RUN:
print("Rename", path, "to", new_path)
else:
os.rename(path, new_path)
# update links if necessary
table_files = glob(os.path.join(new_path, '*.csv'))
for table_file in table_files:
if os.path.isfile(table_file):
continue
# read the link location
link_location = os.path.realpath(table_file)
# check if this soft-link is still valid
if os.path.exists(link_location):
continue
# otherwise try to link to the renamed table file
link_folder, table_name = os.path.split(table_file)
link_folder = os.path.split(link_folder)[0]
link_location = os.path.join(link_folder, new_name, table_name)
assert os.path.exists(link_location), link_location
if DRY_RUN:
print("Moving link from", table_file, link_location)
else:
make_squashed_link(link_location, table_file, override=True)
def update_segmentation_data(folder): def update_segmentation_data(folder):
image_dict = {} image_dict = {}
dynamic_seg_dict = {}
image_folder = os.path.join(folder, 'images') image_folder = os.path.join(folder, 'images')
seg_folder = os.path.join(folder, 'segmentations') seg_folder = os.path.join(folder, 'segmentations')
xmls = glob(os.path.join(seg_folder, "*.xml")) xmls = glob(os.path.join(seg_folder, "*.xml"))
for xml in xmls: for xml in xmls:
image_properties = move_image_file(image_folder, xml) name = move_image_file(image_folder, xml)
image_dict.update(image_properties) image_dict.update({name: get_image_properties(name)})
if name in DYNAMIC_SEGMENTATIONS:
dynamic_seg_dict.update({name: get_dynamic_segmentation_properties(name)})
if DRY_RUN: if DRY_RUN:
print("New image dict:") print("New image dict:")
print(image_dict) # print(image_dict)
else: else:
update_image_dict(image_folder, image_dict) update_image_dict(image_folder, image_dict)
# TODO need to update tables: if DRY_RUN:
# - rename the table folders correctly print("New dynamic seg dict")
# - fix links to account for the updated names # print(dynamic_seg_dict)
else:
dynamic_seg_path = os.path.join(folder, 'misc', 'dynamic_segmentations.json')
with open(dynamic_seg_path, 'w') as f:
json.dump(dynamic_seg_dict, f)
# update the tables
update_tables(folder)
def clean_up(version_folder): def clean_up(version_folder):
# remove segmentation folder (needs to be empty!) # remove segmentation folder (needs to be empty!)
seg_folder = os.path.join(version_folder, 'segmentations') seg_folder = os.path.join(version_folder, 'segmentations')
os.rmdir(seg_folder) if DRY_RUN:
print("Removing", seg_folder)
else:
os.rmdir(seg_folder)
# remove bdv server config # remove bdv server config
bdv_server_config = os.path.join(version_folder, 'misc', 'bdv_server.txt') bdv_server_config = os.path.join(version_folder, 'misc', 'bdv_server.txt')
if os.path.exists(bdv_server_config): if DRY_RUN:
os.remove(bdv_server_config) print("Removing", bdv_server_config)
else:
if os.path.exists(bdv_server_config):
os.remove(bdv_server_config)
def make_readme(version):
pass
# migrate version folder from old to new data layout # migrate version folder from old to new data layout
...@@ -144,7 +202,9 @@ def migrate_version(version): ...@@ -144,7 +202,9 @@ def migrate_version(version):
# - remove segmentations folder (make sure it's empty) # - remove segmentations folder (make sure it's empty)
# - remove bdv server config # - remove bdv server config
clean_up(version_folder) clean_up(version_folder)
# TODO make README for version
# 5.) Make a readme for this version
make_readme(version)
# migrate all the data in the raw folder # migrate all the data in the raw folder
...@@ -155,8 +215,6 @@ def migrate_rawfolder(): ...@@ -155,8 +215,6 @@ def migrate_rawfolder():
for xml_path in xmls: for xml_path in xmls:
name = os.path.splitext(os.path.split(xml_path)[1])[0] name = os.path.splitext(os.path.split(xml_path)[1])[0]
new_name = look_up_filename(name) new_name = look_up_filename(name)
if new_name is None:
new_name = name
# get the linked hdf5 path # get the linked hdf5 path
image_path = get_h5_path_from_xml(xml_path, return_absolute_path=True) image_path = get_h5_path_from_xml(xml_path, return_absolute_path=True)
...@@ -206,6 +264,9 @@ def make_remote_xmls(version): ...@@ -206,6 +264,9 @@ def make_remote_xmls(version):
def remove_deprecated_data(): def remove_deprecated_data():
# cats-neuropil # cats-neuropil
# traces # traces
# AcTub (this was removed at some point)
# Pty2 (this was removed at some point)
# meds_all_genes (does not belong in image folder)
# xray (is not part of any version yet, but we need to move the raw data) # xray (is not part of any version yet, but we need to move the raw data)
def remove_deprecated_seg(folder, pattern): def remove_deprecated_seg(folder, pattern):
...@@ -227,12 +288,25 @@ def remove_deprecated_data(): ...@@ -227,12 +288,25 @@ def remove_deprecated_data():
else: else:
shutil.rmtree(files[0]) shutil.rmtree(files[0])
def remove_deprecated_im(folder, pattern):
# remove xml for traces
files = glob(os.path.join(vfolder, 'images', pattern))
if len(files) > 0:
assert len(files) == 1, str(files)
if DRY_RUN:
print("Remove", files[0])
else:
os.remove(files[0])
# remove xmls from the version folders # remove xmls from the version folders
# (data from rawfolder should be backed up by hand!) # (data from rawfolder should be backed up by hand!)
version_folders = glob(os.path.join(ROOT, "0.*")) version_folders = glob(os.path.join(ROOT, "0.*"))
for vfolder in version_folders: for vfolder in version_folders:
remove_deprecated_seg(vfolder, '*traces*') remove_deprecated_seg(vfolder, '*traces*')
remove_deprecated_seg(vfolder, '*cats*') remove_deprecated_seg(vfolder, '*cats*')
remove_deprecated_im(vfolder, '*AcTub*')
remove_deprecated_im(vfolder, '*Pty2*')
remove_deprecated_im(vfolder, '*meds_all_genes*')
if __name__ == '__main__': if __name__ == '__main__':
...@@ -240,7 +314,7 @@ if __name__ == '__main__': ...@@ -240,7 +314,7 @@ if __name__ == '__main__':
# remove_deprecated_data() # remove_deprecated_data()
# change names and xmls in the rawfolder # change names and xmls in the rawfolder
migrate_rawfolder() # migrate_rawfolder()
# version = '0.0.0' version = '0.0.0'
# migrate_version(version) migrate_version(version)
...@@ -41,6 +41,12 @@ NEW_GENE_NAMES = { ...@@ -41,6 +41,12 @@ NEW_GENE_NAMES = {
"ENR69": "BCA1", "ENR69": "BCA1",
"ENR71": "Patched" "ENR71": "Patched"
} }
#
DYNAMIC_SEGMENTATIONS = ['sbem-6dpf-1-whole-segmented-cells',
'sbem-6dpf-1-whole-segmented-cilia',
'sbem-6dpf-1-whole-segmented-nuclei']
ROOT = '/g/arendt/EM_6dpf_segmentation/platy-browser-data/data' ROOT = '/g/arendt/EM_6dpf_segmentation/platy-browser-data/data'
FILE_NAME_LUT = {} FILE_NAME_LUT = {}
...@@ -104,7 +110,7 @@ def update_name_lut(): ...@@ -104,7 +110,7 @@ def update_name_lut():
def update_image_properties(): def update_image_properties():
global IMAGE_PROPERTIES global IMAGE_PROPERTIES
for name in FILE_NAME_LUT.values(): for name in FILE_NAME_LUT.values():
properties = {} properties = {'Storage': {'local': 'local/%s.xml' % name}}
table_folder = 'tables/%s' % name table_folder = 'tables/%s' % name
# prospr: Color Magenta # prospr: Color Magenta
...@@ -115,29 +121,21 @@ def update_image_properties(): ...@@ -115,29 +121,21 @@ def update_image_properties():
else: else:
properties.update({'Color': 'Magenta', 'MinValue': 0, 'MaxValue': 1000}) properties.update({'Color': 'Magenta', 'MinValue': 0, 'MaxValue': 1000})
# handle all special segmentations: # handle all real segmentations with glasbey color map and tables
# - dynamic and with tables: # - cells
# -- cells # - chromatin
# -- cilia # - cilia
# -- nuclei # - ganglia
elif 'segmented-cells' in name: # - nuclei
paintera_project = '' # - tissue
table_update_function = '' elif ('segmented-cells' in name
# TODO postprocessing options in Dynamic or 'segmented-chromatin' in name
properties.update({'ColorMap': 'Glasbey', or 'segmented-cilia' in name
'TableFolder': table_folder, or 'segmented-ganglia' in name
'Dynamic': {'PainteraProject': paintera_project, or 'segmented-nuclei' in name
'TableUpdateFunction': table_update_function}}) or 'segmented-tissue' in name):
# - static but with tables:
# -- chromatin
# -- tissue
# -- ganglia
elif ('segmented-chromatin' in name
or 'segmented-tissue' in name
or 'segmented-ganglia' in name):
properties.update({'ColorMap': 'Glasbey', 'TableFolder': table_folder}) properties.update({'ColorMap': 'Glasbey', 'TableFolder': table_folder})
# TODO is white correct ?
# all other segmentations are binary masks # all other segmentations are binary masks
elif '-segmented' in name: elif '-segmented' in name:
properties.update({'Color': 'White', 'MinValue': 0, 'MaxValue': 1}) properties.update({'Color': 'White', 'MinValue': 0, 'MaxValue': 1})
...@@ -155,15 +153,64 @@ update_image_properties() ...@@ -155,15 +153,64 @@ update_image_properties()
def look_up_filename(file_name): def look_up_filename(file_name):
return FILE_NAME_LUT.get(file_name, None) new_file_name = FILE_NAME_LUT.get(file_name, None)
# Try to match ENR/NOV filenames
if new_file_name is None:
old_gene_name = file_name.split('-')[4]
# hox5 was renamed to hox4
if old_gene_name.lower() == 'hox5':
gene_name = 'hox4'
# irx was renamed to irx6
elif old_gene_name.lower() == 'irx':
gene_name = 'irx6'
# prospr reference volume was renamed
elif old_gene_name.lower() == 'ref':
gene_name = 'segmented-prospr6-ref'
# muscles lost an s at some point
elif old_gene_name == 'segmented' and file_name.split('-')[5] == 'muscles':
gene_name = 'segmented-muscle'
else:
assert old_gene_name in NEW_GENE_NAMES, file_name
gene_name = NEW_GENE_NAMES[old_gene_name].lower()
new_file_name = '-'.join(file_name.split('-')[:4] + [gene_name])
assert new_file_name in FILE_NAME_LUT.values(), new_file_name
return new_file_name
def get_image_properties(name): def get_image_properties(name):
return IMAGE_PROPERTIES[name] return IMAGE_PROPERTIES[name]
# TODO currently we have a lot of different version of paintera projects.
# for cells and cilia, the most up-to-date are actually the label-multiset variants
# need to clean that up and move the most up to date versions to the names used here,
# but need to coordinate with valentyna first
def get_dynamic_segmentation_properties(name):
# cell segmentation
if name == DYNAMIC_SEGMENTATIONS[0]:
return {'PainteraProject:': ['/g/kreshuk/data/arendt/platyneris_v1/data.n5',
'volumes/paintera/proofread_cells'],
'TableUpdateFunction': 'make_cell_tables',
'Postprocess': {"BoundaryPath": "/g/kreshuk/data/arendt/platyneris_v1/data.n5",
"BoundaryKey": "volumes/affinities/s1",
"MaxSegmentNumber": 32700,
"LabelSegmentation": False}}
# cilia segmentation
elif name == DYNAMIC_SEGMENTATIONS[1]:
return {'PainteraProject:': ['/g/kreshuk/data/arendt/platyneris_v1/data.n5',
'volumes/paintera/proofread_cilia'],
'TableUpdateFunction': 'make_cilia_tables'}
# nuclei segmentation
elif name == DYNAMIC_SEGMENTATIONS[2]:
return {'PainteraProject:': ['/g/kreshuk/data/arendt/platyneris_v1/data.n5',
'volumes/paintera/nuclei'],
'TableUpdateFunction': 'make_nuclei_tables'}
else:
return None
if __name__ == '__main__': if __name__ == '__main__':
x = json.dumps(FILE_NAME_LUT, sort_keys=True, indent=2) # x = json.dumps(FILE_NAME_LUT, sort_keys=True, indent=2)
print(x) # print(x)
# with open('/home/pape/new_names.json', 'w') as f: with open('/g/kreshuk/pape/new_names.json', 'w') as f:
# json.dump(FILE_NAME_LUT, f, sort_keys=True, indent=2) json.dump(FILE_NAME_LUT, f, sort_keys=True, indent=2)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment