diff --git a/add_existing_data.py b/add_existing_data.py
new file mode 100644
index 0000000000000000000000000000000000000000..9177985fd671d3fb29b8b86125d90abf0de2e81d
--- /dev/null
+++ b/add_existing_data.py
@@ -0,0 +1,41 @@
+import os
+from glob import glob
+from scripts.sources import add_source, add_image, add_segmentation
+
+
+def add_sources():
+    # add em source
+    add_source('sbem', '6dpf')
+    # add prospr source
+    add_source('prospr', '6dpf')
+
+
+def add_images():
+    base_folder = './data/0.2.1/images'
+    # add all prospr images
+    prospr_prefix = 'prospr-6dpf-1-whole'
+    prospr_ims = glob.glob(os.path.join(base_folder, 'prospr-6dpf-1-whole-*'))
+    for impath in prospr_ims:
+        name = os.path.split(impath)[1]
+        name, ext = os.path.splitext()
+        if ext != '.xml':
+            continue
+        name = name[(len(prospr_prefix) + 1):]
+        add_image(impath, prospr_prefix, name, copy_data=False)
+
+
+def add_segmentations():
+    pass
+
+
+def add_existing_data():
+    """ Add existing data to the json files that keep track of
+        sources, image data and segmentations.
+    """
+    add_sources()
+    add_images()
+    add_segmentations()
+
+
+if __name__ == '__main__':
+    add_existing_data()
diff --git a/scripts/files/__init__.py b/scripts/files/__init__.py
index 68fd6c7b885204a1f06523e8196f42ddbb8380c0..390c39cf1ac8ba23242a6797d1765e30a33f3bdd 100644
--- a/scripts/files/__init__.py
+++ b/scripts/files/__init__.py
@@ -3,3 +3,4 @@ from .folders import make_folder_structure
 from .xml_utils import get_h5_path_from_xml, copy_xml_with_abspath, write_simple_xml
 from .xml_utils import copy_xml_with_newpath
 from .bdv_server import make_bdv_server_file
+from .checks import check_bdv
diff --git a/scripts/files/checks.py b/scripts/files/checks.py
new file mode 100644
index 0000000000000000000000000000000000000000..5f9f433ea03f30687c3e7a143a4ed503f71bd0c7
--- /dev/null
+++ b/scripts/files/checks.py
@@ -0,0 +1,14 @@
+import os
+from .xml_utils import get_h5_path_from_xml
+
+
+# TODO check more attributes in the xml to make sure that this actually is
+# a bdv format file
+def check_bdv(path):
+    ext = os.path.splitext(path)[1]
+    if ext != '.xml':
+        return False
+    h5_path = get_h5_path_from_xml(path, return_absolute_path=True)
+    if not os.path.exists(h5_path):
+        return False
+    return True
diff --git a/scripts/files/copy_helper.py b/scripts/files/copy_helper.py
index 7496bcfaab20ce20e6d89a3707d5705ad69665f7..9e4d2324e6c8f05f2e2118b1ceeb3c005ab51a18 100644
--- a/scripts/files/copy_helper.py
+++ b/scripts/files/copy_helper.py
@@ -1,7 +1,7 @@
 import os
-import glob
 import shutil
 from .xml_utils import copy_xml_with_newpath, get_h5_path_from_xml
+from ..sources import get_image_names, RAW_FOLDER
 
 
 def copy_file(xml_in, xml_out):
@@ -11,17 +11,6 @@ def copy_file(xml_in, xml_out):
     copy_xml_with_newpath(xml_in, xml_out, h5path, path_type='relative')
 
 
-def copy_files_with_pattern(src_folder, dst_folder, pattern):
-    files = glob.glob(os.path.join(src_folder, pattern))
-    for ff in files:
-        ext = os.path.splitext(ff)[1]
-        if ext != '.xml':
-            continue
-        xml_in = ff
-        xml_out = os.path.join(dst_folder, os.path.split(ff)[1])
-        copy_file(xml_in, xml_out)
-
-
 # For now we put symlinks with relative paths, but I am not sure
 # if this is the best idea, because I don't know if it will work on windows
 def copy_tables(src_folder, dst_folder, name):
@@ -60,17 +49,24 @@ def copy_segmentation(src_folder, dst_folder, name):
 
 
 def copy_image_data(src_folder, dst_folder):
-    # copy sbem image data
-    sbem_prefix = 'sbem-6dpf-1-whole'
-    raw_name = '%s-raw.xml' % sbem_prefix
-    copy_file(os.path.join(src_folder, raw_name),
-              os.path.join(dst_folder, raw_name))
-
-    # copy the prospr med image data
-    copy_files_with_pattern(src_folder, dst_folder, '*-MED*')
-
-    # copy the segmented prospr regions
-    copy_files_with_pattern(src_folder, dst_folder, 'prospr-6dpf-1-whole-segmented-*')
+    # get all image names that need to be copied
+    names = get_image_names()
+
+    for name in names:
+        name += '.xml'
+        in_path = os.path.join(src_folder, name)
+        out_path = os.path.join(dst_folder, name)
+        # we might have just added he image name, so it's not
+        # in the old version folder yet. It must be in the raw folder
+        # in that case
+        if not os.path.exists(in_path):
+            in_path = os.path.join(RAW_FOLDER, name)
+        if not os.path.exists(in_path):
+            raise RuntimeError("Could not find %s in either the src folder %s or raw folder %s" % (name,
+                                                                                                   src_folder,
+                                                                                                   RAW_FOLDER))
+        # copy the xml
+        copy_file(in_path, out_path)
 
 
 def copy_misc_data(src_folder, dst_folder):
diff --git a/scripts/sources.py b/scripts/sources.py
index 8e8bb090b77788b62d7c8d5e3ec31d9ebcef86d2..6b113ccfec4a0ca9b2e6fa614180e4f62fc2c9e7 100644
--- a/scripts/sources.py
+++ b/scripts/sources.py
@@ -1,9 +1,12 @@
 import json
-from.files import check_bdv
+import os
+from shutil import copyfile
+from.files import check_bdv, get_h5_path_from_xml, copy_xml_with_newpath
 
-SOURCE_FILE = '../data/sources.json'
-SEGMENTATION_FILE = '../data/segmentations.json'
-IMAGE_FILE = '../data/images.json'
+RAW_FOLDER = 'data/rawdata'
+SOURCE_FILE = 'data/sources.json'
+SEGMENTATION_FILE = 'data/segmentations.json'
+IMAGE_FILE = 'data/images.json'
 
 
 def get_sources():
@@ -52,20 +55,22 @@ def get_name_prefixes():
     return prefixes
 
 
-def load_image_names():
+def get_image_names():
     with open(IMAGE_FILE) as f:
         names = json.load(f)
     return names
 
 
-def add_image(input_path, source_prefix, name):
+def add_image(input_path, source_prefix, name, copy_data=True):
     """ Add image volume to the platy browser data.
 
     Parameter:
         input_path [str] - path to the data that should be added.
-            Needs to be in bdv data format.
+            Data needs to be in bdv-hdf5 format and the path needs to point to the xml.
         source_prefix [str] - prefix of the primary data source.
         name [str] - name of the data.
+        copy_data [bool] - whether to copy the data. This should be set to True,
+            unless adding an image that is already in the rawdata folder. (default: True)
     """
     # validate the inputs
     prefixes = get_name_prefixes()
@@ -76,11 +81,22 @@ def add_image(input_path, source_prefix, name):
     if not is_bdv:
         raise ValueError("Expect input to be in bdv format")
     output_name = '%s-%s' % (source_prefix, name)
-    names = load_image_names()
+    names = get_image_names()
     if output_name in names:
         raise ValueError("Name %s is already taken" % output_name)
 
-    # TODO copy h5 and xml to the rawdata folder, update the xml with new relative path
+    h5_path = get_h5_path_from_xml(input_path, return_absolute_path=True)
+    name_h5 = '%s.h5' % output_name
+    out_xml = os.path.join(RAW_FOLDER, '%s.xml' % output_name)
+    out_h5 = os.path.join(RAW_FOLDER, name_h5)
+    if copy_data:
+        # copy h5 and xml to the rawdata folder, update the xml with new relative path
+        copyfile(h5_path, out_h5)
+        copy_xml_with_newpath(input_path, out_xml, name_h5)
+    else:
+        if not os.path.exists(out_xml) or not os.path.exists(out_h5):
+            raise RuntimeError("""You did not specify to copy the data, but
+                                  %s and %s do not exist yet""" % (out_xml, out_h5))
 
     # add name to the name list and serialze
     names.append(output_name)
@@ -88,7 +104,7 @@ def add_image(input_path, source_prefix, name):
         json.dump(names, f)
 
 
-def load_segmentation_names():
+def get_segmentation_names():
     with open(SEGMENTATION_FILE) as f:
         names = list(json.load(f).keys())
     return names