From 351eb33f897061a1f10d6fde85e775c4547ffa2a Mon Sep 17 00:00:00 2001
From: Constantin Pape <c.pape@gmx.net>
Date: Thu, 6 Feb 2020 21:57:51 +0100
Subject: [PATCH] Start to implement n5 conversion

---
 mmpb/files/copy_helper.py | 49 +++++++++++++++++++++++++++++++++++++++
 mmpb/files/migration.py   | 49 +++++++++++++++++++++++++++++++++++----
 2 files changed, 94 insertions(+), 4 deletions(-)

diff --git a/mmpb/files/copy_helper.py b/mmpb/files/copy_helper.py
index 4d032dd..9411984 100644
--- a/mmpb/files/copy_helper.py
+++ b/mmpb/files/copy_helper.py
@@ -1,5 +1,12 @@
 import os
 import shutil
+import numpy as np
+
+from elf.io import open_file
+from pybdv.converter import copy_dataset
+from pybdv.metadata import write_n5_metadata
+from pybdv.util import get_key, get_number_of_scales, get_scale_factors
+
 from .xml_utils import copy_xml_with_newpath, get_h5_path_from_xml
 from .sources import get_image_names, get_segmentation_names, get_segmentations
 from ..attributes.base_attributes import write_additional_table_file
@@ -130,3 +137,45 @@ def copy_release_folder(src_folder, dst_folder, exclude_prefixes=[]):
     copy_misc_data(src_folder, dst_folder)
     copy_segmentations(src_folder, dst_folder, exclude_prefixes)
     copy_all_tables(src_folder, dst_folder)
+
+
+def normalize_scale_factors(scale_factors, start_scale):
+    # we expect scale_factors[0] == [1 1 1]
+    assert np.prod(scale_factors[0]) == 1
+
+    # convert to relative scale factors
+    rel_scales = [scale_factors[0]]
+    for scale in range(1, len(scale_factors)):
+        rel_factor = [sf / prev_sf for sf, prev_sf in zip(scale_factors[scale],
+                                                          scale_factors[scale - 1])]
+        rel_scales.append(rel_factor)
+
+    # return the relative scales starting at the new scale
+    new_factors = [[1., 1., 1.]] + rel_scales[(start_scale + 1):]
+    return new_factors
+
+
+def copy_to_bdv_n5(in_file, out_file, chunks, resolution,
+                   n_threads=32, start_scale=0):
+
+    n_scales = get_number_of_scales(in_file, 0, 0)
+    scale_factors = get_scale_factors(in_file, 0)
+    # double check newly implemented functions in pybdv
+    assert n_scales == len(scale_factors)
+
+    scale_factors = normalize_scale_factors(scale_factors, start_scale)
+
+    for out_scale, in_scale in enumerate(range(start_scale, n_scales)):
+        in_key = get_key(True, 0, 0, in_scale)
+        out_key = get_key(False, 0, 0, out_scale)
+
+        if chunks is None:
+            with open_file(in_file, 'r') as f:
+                chunks_ = f[in_key].chunks
+        else:
+            chunks_ = chunks
+
+        copy_dataset(in_file, in_key, out_file, out_key, False,
+                     chunks_, n_threads)
+
+    write_n5_metadata(out_file, scale_factors, resolution, setup_id=0)
diff --git a/mmpb/files/migration.py b/mmpb/files/migration.py
index 948cf76..e32097f 100644
--- a/mmpb/files/migration.py
+++ b/mmpb/files/migration.py
@@ -2,9 +2,11 @@ import os
 import json
 import shutil
 from glob import glob
+from pybdv.metadata import get_resolution
 from mmpb.files.name_lookup import (look_up_filename, get_image_properties,
                                     DYNAMIC_SEGMENTATIONS, get_dynamic_segmentation_properties)
 from mmpb.files.xml_utils import get_h5_path_from_xml, copy_xml_with_newpath
+from mmpb.files.copy_helper import copy_to_bdv_n5
 
 ROOT = '/g/arendt/EM_6dpf_segmentation/platy-browser-data/data'
 DRY_RUN = True
@@ -250,8 +252,42 @@ def migrate_rawfolder():
 
 
 # iterate over all the xmls in this version, follow the links
-# and replace h5 files with n5 (if necessary)
-def to_n5(version):
+# and make corresponding n5 files (if they don't exist yet)
+def make_n5_files(version):
+    version_folder = os.path.join(ROOT, version)
+
+    # default chunk size
+    default_chunks = 3 * (128,)
+    # special chunk sizes
+    chunk_dict = {'sbem-6dpf-1-whole-raw': None}  # don't copy raw yet
+
+    paths_to_remove = []
+
+    xmls = glob(os.path.join(version_folder, 'images', 'local', '*.xml'))
+    for xml in xmls:
+        name = os.path.splitext(os.path.split(xml)[1])[0]
+        chunks = chunk_dict.get(name, default_chunks)
+        # chunks None means we skip copying for now
+        if chunks is None:
+            continue
+
+        h5_path = get_h5_path_from_xml(xml, return_absolute_path=True)
+        n5_path = os.path.splitext(h5_path)[0] + '.n5'
+        if os.path.exists(n5_path):
+            continue
+
+        # load resolution from xml
+        resolution = get_resolution(xml)
+        copy_to_bdv_n5(h5_path, n5_path, resolution, chunks)
+
+        paths_to_remove.append(h5_path)
+
+    return paths_to_remove
+
+
+# TODO
+# switch xmls to n5 format if n5 file at image location exists
+def update_n5_xmls(version):
     pass
 
 
@@ -314,5 +350,10 @@ if __name__ == '__main__':
     # change names and xmls in the rawfolder
     # migrate_rawfolder()
 
-    version = '0.0.1'
-    migrate_version(version)
+    # version = '0.0.1'
+    # migrate_version(version)
+
+    version = '0.6.5'
+    paths_to_remove = make_n5_files(version)
+    print(paths_to_remove)
+    # update_n5_xmls(version)
-- 
GitLab