Implement mechanism to add new image data and start retrofitting the existing data

083d67bd · Constantin Pape · 36bb50bb · 083d67bd · 083d67bd · 083d67bd
Commit 083d67bd authored 5 years ago by Constantin Pape
--- a/add_existing_data.py
+++ b/add_existing_data.py
+import os
+from glob import glob
+from scripts.sources import add_source, add_image, add_segmentation
+
+
+def add_sources():
+    # add em source
+    add_source('sbem', '6dpf')
+    # add prospr source
+    add_source('prospr', '6dpf')
+
+
+def add_images():
+    base_folder = './data/0.2.1/images'
+    # add all prospr images
+    prospr_prefix = 'prospr-6dpf-1-whole'
+    prospr_ims = glob.glob(os.path.join(base_folder, 'prospr-6dpf-1-whole-*'))
+    for impath in prospr_ims:
+        name = os.path.split(impath)[1]
+        name, ext = os.path.splitext()
+        if ext != '.xml':
+            continue
+        name = name[(len(prospr_prefix) + 1):]
+        add_image(impath, prospr_prefix, name, copy_data=False)
+
+
+def add_segmentations():
+    pass
+
+
+def add_existing_data():
+    """ Add existing data to the json files that keep track of
+        sources, image data and segmentations.
+    """
+    add_sources()
+    add_images()
+    add_segmentations()
+
+
+if __name__ == '__main__':
+    add_existing_data()
--- a/scripts/files/__init__.py
+++ b/scripts/files/__init__.py
@@ -3,3 +3,4 @@ from .folders import make_folder_structure
 from .xml_utils import get_h5_path_from_xml, copy_xml_with_abspath, write_simple_xml
 from .xml_utils import copy_xml_with_newpath
 from .bdv_server import make_bdv_server_file
+from .checks import check_bdv
--- a/scripts/files/checks.py
+++ b/scripts/files/checks.py
+import os
+from .xml_utils import get_h5_path_from_xml
+
+
+# TODO check more attributes in the xml to make sure that this actually is
+# a bdv format file
+def check_bdv(path):
+    ext = os.path.splitext(path)[1]
+    if ext != '.xml':
+        return False
+    h5_path = get_h5_path_from_xml(path, return_absolute_path=True)
+    if not os.path.exists(h5_path):
+        return False
+    return True
--- a/scripts/files/copy_helper.py
+++ b/scripts/files/copy_helper.py
 import os
-import glob
 import shutil
 from .xml_utils import copy_xml_with_newpath, get_h5_path_from_xml
+from ..sources import get_image_names, RAW_FOLDER


 def copy_file(xml_in, xml_out):
@@ -11,17 +11,6 @@ def copy_file(xml_in, xml_out):
    copy_xml_with_newpath(xml_in, xml_out, h5path, path_type='relative')


-def copy_files_with_pattern(src_folder, dst_folder, pattern):
-    files = glob.glob(os.path.join(src_folder, pattern))
-    for ff in files:
-        ext = os.path.splitext(ff)[1]
-        if ext != '.xml':
-            continue
-        xml_in = ff
-        xml_out = os.path.join(dst_folder, os.path.split(ff)[1])
-        copy_file(xml_in, xml_out)
-
-
 # For now we put symlinks with relative paths, but I am not sure
 # if this is the best idea, because I don't know if it will work on windows
 def copy_tables(src_folder, dst_folder, name):
@@ -60,17 +49,24 @@ def copy_segmentation(src_folder, dst_folder, name):


 def copy_image_data(src_folder, dst_folder):
-    # copy sbem image data
-    sbem_prefix = 'sbem-6dpf-1-whole'
-    raw_name = '%s-raw.xml' % sbem_prefix
-    copy_file(os.path.join(src_folder, raw_name),
-              os.path.join(dst_folder, raw_name))
-
-    # copy the prospr med image data
-    copy_files_with_pattern(src_folder, dst_folder, '*-MED*')
-
-    # copy the segmented prospr regions
-    copy_files_with_pattern(src_folder, dst_folder, 'prospr-6dpf-1-whole-segmented-*')
+    # get all image names that need to be copied
+    names = get_image_names()
+
+    for name in names:
+        name += '.xml'
+        in_path = os.path.join(src_folder, name)
+        out_path = os.path.join(dst_folder, name)
+        # we might have just added he image name, so it's not
+        # in the old version folder yet. It must be in the raw folder
+        # in that case
+        if not os.path.exists(in_path):
+            in_path = os.path.join(RAW_FOLDER, name)
+        if not os.path.exists(in_path):
+            raise RuntimeError("Could not find %s in either the src folder %s or raw folder %s" % (name,
+                                                                                                   src_folder,
+                                                                                                   RAW_FOLDER))
+        # copy the xml
+        copy_file(in_path, out_path)


 def copy_misc_data(src_folder, dst_folder):

--- a/scripts/sources.py
+++ b/scripts/sources.py
 import json
-from.files import check_bdv
+import os
+from shutil import copyfile
+from.files import check_bdv, get_h5_path_from_xml, copy_xml_with_newpath

-SOURCE_FILE = '../data/sources.json'
-SEGMENTATION_FILE = '../data/segmentations.json'
-IMAGE_FILE = '../data/images.json'
+RAW_FOLDER = 'data/rawdata'
+SOURCE_FILE = 'data/sources.json'
+SEGMENTATION_FILE = 'data/segmentations.json'
+IMAGE_FILE = 'data/images.json'


 def get_sources():
@@ -52,20 +55,22 @@ def get_name_prefixes():
    return prefixes


-def load_image_names():
+def get_image_names():
    with open(IMAGE_FILE) as f:
        names = json.load(f)
    return names


-def add_image(input_path, source_prefix, name):
+def add_image(input_path, source_prefix, name, copy_data=True):
    """ Add image volume to the platy browser data.

    Parameter:
        input_path [str] - path to the data that should be added.
-            Needs to be in bdv data format.
+            Data needs to be in bdv-hdf5 format and the path needs to point to the xml.
        source_prefix [str] - prefix of the primary data source.
        name [str] - name of the data.
+        copy_data [bool] - whether to copy the data. This should be set to True,
+            unless adding an image that is already in the rawdata folder. (default: True)
    """
    # validate the inputs
    prefixes = get_name_prefixes()
@@ -76,11 +81,22 @@ def add_image(input_path, source_prefix, name):
    if not is_bdv:
        raise ValueError("Expect input to be in bdv format")
    output_name = '%s-%s' % (source_prefix, name)
-    names = load_image_names()
+    names = get_image_names()
    if output_name in names:
        raise ValueError("Name %s is already taken" % output_name)

-    # TODO copy h5 and xml to the rawdata folder, update the xml with new relative path
+    h5_path = get_h5_path_from_xml(input_path, return_absolute_path=True)
+    name_h5 = '%s.h5' % output_name
+    out_xml = os.path.join(RAW_FOLDER, '%s.xml' % output_name)
+    out_h5 = os.path.join(RAW_FOLDER, name_h5)
+    if copy_data:
+        # copy h5 and xml to the rawdata folder, update the xml with new relative path
+        copyfile(h5_path, out_h5)
+        copy_xml_with_newpath(input_path, out_xml, name_h5)
+    else:
+        if not os.path.exists(out_xml) or not os.path.exists(out_h5):
+            raise RuntimeError("""You did not specify to copy the data, but
+                                  %s and %s do not exist yet""" % (out_xml, out_h5))

    # add name to the name list and serialze
    names.append(output_name)
@@ -88,7 +104,7 @@ def add_image(input_path, source_prefix, name):
        json.dump(names, f)


-def load_segmentation_names():
+def get_segmentation_names():
    with open(SEGMENTATION_FILE) as f:
        names = list(json.load(f).keys())
    return names