From 423233da6f6899b57fd15c422b70631dd9b5ea79 Mon Sep 17 00:00:00 2001 From: Constantin Pape <c.pape@gmx.net> Date: Fri, 21 Jun 2019 12:47:11 +0200 Subject: [PATCH] Implemnt file renaming WIP --- make_data_sources.py | 51 ++++++++++++++++++++++++++++++++++++++ scripts/__init__.py | 1 + scripts/files/__init__.py | 1 + scripts/files/xml_utils.py | 14 +++++++++++ scripts/sources.py | 25 +++++++++++++++++++ 5 files changed, 92 insertions(+) create mode 100644 make_data_sources.py create mode 100644 scripts/sources.py diff --git a/make_data_sources.py b/make_data_sources.py new file mode 100644 index 0000000..1c62b54 --- /dev/null +++ b/make_data_sources.py @@ -0,0 +1,51 @@ +import os +import glob +from .scripts.files import get_h5_path_from_xml, copy_xml_with_newpath + + +def copy_xmls_and_symlink_h5(name_dict, src_folder, trgt_folder): + for n1, n2 in name_dict.items(): + src_xml = os.path.join(src_folder, n1) + trgt_xml = os.path.join(trgt_folder, n2) + + # we make a softlink from src to target h5 + # NOTE eventually, we want to copy all the data, but + # for now, we use softlinks in order to keep the current + # version of the platy browser working + src_h5 = get_h5_path_from_xml(src_xml, return_absolute_path=True) + trgt_h5 = os.path.splitext(n2)[0] + '.h5' + os.symlink(src_h5, trgt_h5) + + copy_xml_with_newpath(src_xml, trgt_xml, trgt_h5) + + +def make_initial_data_sources(): + old_folder = '/g/arendt/EM_6dpf_segmentation/EM-Prospr' + raw_folder = './data/rawdata' + + # copy the sbem data + sbem_prefix = 'sbem-6dpf-1-whole' + name_dict = {'em-raw-', 'em-raw', + 'em-segmented-', ''} + name_dict = {k: '%s-%s' % (sbem_prefix, v) + for k, v in name_dict.items()} + copy_xmls_and_symlink_h5(name_dict, old_folder, raw_folder) + + # copy the prospr meds + prospr_prefix = 'prospr-6dpf-1-whole' + prospr_names = glob.glob(os.path.join(old_folder, "*-MED*")) + prospr_names = [os.path.split(f) for f in prospr_names] + name_dict = {n: '%s-%s' % (prospr_prefix, n) for n in prospr_names} + copy_xmls_and_symlink_h5(name_dict, old_folder, raw_folder) + + # copy the fibsem data + fib_prefix = 'fibsem-6dpf-1-parapod' + name_dict = {'em-raw-', 'em-raw', + 'em-segmented-', ''} + name_dict = {k: '%s-%s' % (fib_prefix, v) + for k, v in name_dict.items()} + copy_xmls_and_symlink_h5(name_dict, old_folder, raw_folder) + + +if __name__ == '__main__': + make_initial_data_sources() diff --git a/scripts/__init__.py b/scripts/__init__.py index e69de29..fdf8054 100644 --- a/scripts/__init__.py +++ b/scripts/__init__.py @@ -0,0 +1 @@ +from .sources import get_name_prefixes diff --git a/scripts/files/__init__.py b/scripts/files/__init__.py index 111cf66..bc36dfb 100644 --- a/scripts/files/__init__.py +++ b/scripts/files/__init__.py @@ -2,4 +2,5 @@ from .copy_helper import copy_tables, copy_segmentation, copy_static_files from .copy_helper import copy_files_with_pattern from .folders import make_folder_structure from .xml_utils import get_h5_path_from_xml, copy_xml_with_abspath, write_simple_xml +from .xml_utils import copy_xml_with_newpath from .bdv_server import make_bdv_server_file diff --git a/scripts/files/xml_utils.py b/scripts/files/xml_utils.py index e297343..7021d71 100644 --- a/scripts/files/xml_utils.py +++ b/scripts/files/xml_utils.py @@ -58,6 +58,20 @@ def copy_xml_with_abspath(xml_in, xml_out): tree.write(xml_out) +def copy_xml_with_newpath(xml_in, xml_out, h5path): + # get the h5 path from the xml + et_root = ET.parse(xml_in).getroot() + et = et_root[1] + et = et[0] + et = et[0] + # write new xml with the new path + et.text = h5path + et.set('type', 'absolute') + indent_xml(et_root) + tree = ET.ElementTree(et_root) + tree.write(xml_out) + + def write_simple_xml(xml_path, h5_path, path_type='absolute'): # write top-level data root = ET.Element('SpimData') diff --git a/scripts/sources.py b/scripts/sources.py new file mode 100644 index 0000000..9e736b1 --- /dev/null +++ b/scripts/sources.py @@ -0,0 +1,25 @@ +# this folder contains information about the current data sources, see +# https://git.embl.de/tischer/platy-browser-tables/blob/dev/README.md#file-naming + +# TODO maybe store this as exteral file in json +# list of the current data sources +SOURCES = [{'modality': 'sbem', 'stage': '6dpf', 'id': '1', 'region': 'whole'}, + {'modality': 'prospr', 'stage': '6dpf', 'id': '1', 'region': 'whole'}, + {'modality': 'fibsem', 'stage': '6dpf', 'id': '1', 'region': 'parapod'}] + + +def get_sources(): + return SOURCES + + +def source_to_prefix(source): + return '%s-%s-%s-%s' % (source['modality'], + source['stage'], + source['id'], + source['region']) + + +def get_name_prefixes(): + sources = get_sources() + prefixes = [source_to_prefix(source) for source in sources] + return prefixes -- GitLab