From 423233da6f6899b57fd15c422b70631dd9b5ea79 Mon Sep 17 00:00:00 2001
From: Constantin Pape <c.pape@gmx.net>
Date: Fri, 21 Jun 2019 12:47:11 +0200
Subject: [PATCH] Implemnt file renaming WIP

---
 make_data_sources.py       | 51 ++++++++++++++++++++++++++++++++++++++
 scripts/__init__.py        |  1 +
 scripts/files/__init__.py  |  1 +
 scripts/files/xml_utils.py | 14 +++++++++++
 scripts/sources.py         | 25 +++++++++++++++++++
 5 files changed, 92 insertions(+)
 create mode 100644 make_data_sources.py
 create mode 100644 scripts/sources.py

diff --git a/make_data_sources.py b/make_data_sources.py
new file mode 100644
index 0000000..1c62b54
--- /dev/null
+++ b/make_data_sources.py
@@ -0,0 +1,51 @@
+import os
+import glob
+from .scripts.files import get_h5_path_from_xml, copy_xml_with_newpath
+
+
+def copy_xmls_and_symlink_h5(name_dict, src_folder, trgt_folder):
+    for n1, n2 in name_dict.items():
+        src_xml = os.path.join(src_folder, n1)
+        trgt_xml = os.path.join(trgt_folder, n2)
+
+        # we make a softlink from src to target h5
+        # NOTE eventually, we want to copy all the data, but
+        # for now, we use softlinks in order to keep the current
+        # version of the platy browser working
+        src_h5 = get_h5_path_from_xml(src_xml, return_absolute_path=True)
+        trgt_h5 = os.path.splitext(n2)[0] + '.h5'
+        os.symlink(src_h5, trgt_h5)
+
+        copy_xml_with_newpath(src_xml, trgt_xml, trgt_h5)
+
+
+def make_initial_data_sources():
+    old_folder = '/g/arendt/EM_6dpf_segmentation/EM-Prospr'
+    raw_folder = './data/rawdata'
+
+    # copy the sbem data
+    sbem_prefix = 'sbem-6dpf-1-whole'
+    name_dict = {'em-raw-', 'em-raw',
+                 'em-segmented-', ''}
+    name_dict = {k: '%s-%s' % (sbem_prefix, v)
+                 for k, v in name_dict.items()}
+    copy_xmls_and_symlink_h5(name_dict, old_folder, raw_folder)
+
+    # copy the prospr meds
+    prospr_prefix = 'prospr-6dpf-1-whole'
+    prospr_names = glob.glob(os.path.join(old_folder, "*-MED*"))
+    prospr_names = [os.path.split(f) for f in prospr_names]
+    name_dict = {n: '%s-%s' % (prospr_prefix, n) for n in prospr_names}
+    copy_xmls_and_symlink_h5(name_dict, old_folder, raw_folder)
+
+    # copy the fibsem data
+    fib_prefix = 'fibsem-6dpf-1-parapod'
+    name_dict = {'em-raw-', 'em-raw',
+                 'em-segmented-', ''}
+    name_dict = {k: '%s-%s' % (fib_prefix, v)
+                 for k, v in name_dict.items()}
+    copy_xmls_and_symlink_h5(name_dict, old_folder, raw_folder)
+
+
+if __name__ == '__main__':
+    make_initial_data_sources()
diff --git a/scripts/__init__.py b/scripts/__init__.py
index e69de29..fdf8054 100644
--- a/scripts/__init__.py
+++ b/scripts/__init__.py
@@ -0,0 +1 @@
+from .sources import get_name_prefixes
diff --git a/scripts/files/__init__.py b/scripts/files/__init__.py
index 111cf66..bc36dfb 100644
--- a/scripts/files/__init__.py
+++ b/scripts/files/__init__.py
@@ -2,4 +2,5 @@ from .copy_helper import copy_tables, copy_segmentation, copy_static_files
 from .copy_helper import copy_files_with_pattern
 from .folders import make_folder_structure
 from .xml_utils import get_h5_path_from_xml, copy_xml_with_abspath, write_simple_xml
+from .xml_utils import copy_xml_with_newpath
 from .bdv_server import make_bdv_server_file
diff --git a/scripts/files/xml_utils.py b/scripts/files/xml_utils.py
index e297343..7021d71 100644
--- a/scripts/files/xml_utils.py
+++ b/scripts/files/xml_utils.py
@@ -58,6 +58,20 @@ def copy_xml_with_abspath(xml_in, xml_out):
     tree.write(xml_out)
 
 
+def copy_xml_with_newpath(xml_in, xml_out, h5path):
+    # get the h5 path from the xml
+    et_root = ET.parse(xml_in).getroot()
+    et = et_root[1]
+    et = et[0]
+    et = et[0]
+    # write new xml with the new path
+    et.text = h5path
+    et.set('type', 'absolute')
+    indent_xml(et_root)
+    tree = ET.ElementTree(et_root)
+    tree.write(xml_out)
+
+
 def write_simple_xml(xml_path, h5_path, path_type='absolute'):
     # write top-level data
     root = ET.Element('SpimData')
diff --git a/scripts/sources.py b/scripts/sources.py
new file mode 100644
index 0000000..9e736b1
--- /dev/null
+++ b/scripts/sources.py
@@ -0,0 +1,25 @@
+# this folder contains information about the current data sources, see
+# https://git.embl.de/tischer/platy-browser-tables/blob/dev/README.md#file-naming
+
+# TODO maybe store this as exteral file in json
+# list of the current data sources
+SOURCES = [{'modality': 'sbem', 'stage': '6dpf', 'id': '1', 'region': 'whole'},
+           {'modality': 'prospr', 'stage': '6dpf', 'id': '1', 'region': 'whole'},
+           {'modality': 'fibsem', 'stage': '6dpf', 'id': '1', 'region': 'parapod'}]
+
+
+def get_sources():
+    return SOURCES
+
+
+def source_to_prefix(source):
+    return '%s-%s-%s-%s' % (source['modality'],
+                            source['stage'],
+                            source['id'],
+                            source['region'])
+
+
+def get_name_prefixes():
+    sources = get_sources()
+    prefixes = [source_to_prefix(source) for source in sources]
+    return prefixes
-- 
GitLab