Commit 6fd7d80d authored by Constantin Pape's avatar Constantin Pape

Merge branch 'master' of https://git.embl.de/grp-bio-it/ai4ia

parents 8ea26bba a5521c30
......@@ -9,7 +9,7 @@ Available tools:
## Data Layout
The goal of this small package is to provide an easy way to train different tools via the command line from the cell data layout.
The goal of this small package is to provide an easy way to train different tools via the command line from the same data layout.
In order to use it, you will need training data images and labels in the following layout:
```
root-folder/
......@@ -17,7 +17,11 @@ root-folder/
labels/
```
The folder `images` contains the training image data and `labels` the training data, i.e. the segmented ground-truth images.
The corresponding images and labels **must have exactly the same name**.
The corresponding images and labels **must have exactly the same name** and **have exactly the same size**.
The image data can be provided as 8bit or 16bit integers as well as floating point.
The label imiages should be either 16bit, 32bit or 64bit integers.
The data should be stored in tif format. For multi-channel images, we assume that they are stored channel-first, i.e. in cyx order.
......@@ -80,7 +84,7 @@ TODO
- Enter you EMBL password.
- Ask for an interactive job on a GPU node:
- `srun -t 60:00 -N1 -n4 --mem 32G -p gpu -C "gpu=2080Ti|gpu=1080Ti" --gres=gpu:1 -W 0 --pty -E $SHELL`
- The time and memory needs to be adjusted (HOW?)
- The time and memory may need to be adjusted, see [the embl cluster wiki](https://wiki.embl.de/cluster/Main_Page) (only accessible via EMBL intranet) or the [slurm documentation](https://slurm.schedmd.com/srun.html) for details.
- If the cluster is busy, it may take some time until you get the job...
- Store the IP address of the cluster node into a variable:
- `IP=$(hostname -i)`
......
name: cellpose-cpu
dependencies:
- python>3.4
- pip
- mkl=2019.3
- numpy
- scikit-image
- numba>=0.43.1
- pyqt
- scipy
- matplotlib
- pip:
- mxnet-mkl
- opencv_python
- pyqtgraph==0.11.0rc0
- natsort
- google-cloud-storage
- tqdm
- cellpose
name: cellpose-gpu
dependencies:
- python>3.4
- pip
- numpy
- scikit-image
- numba>=0.43.1
- pyqt
- scipy
- matplotlib
- pip:
# this needs to be adapted to your cuda version:
# for 10.0 choose 100 etc.
- mxnet-cu102
- opencv_python
- pyqtgraph==0.11.0rc0
- natsort
- google-cloud-storage
- tqdm
- cellpose
......@@ -50,6 +50,11 @@ Note that you don't need the `labels` folder for the prediction script.
The `CUDA_VISIBLE_DEVICES=0` part determines which gpu is used. If you have a machine with multiple GPUs and don't want to
use the first one, you need to change the `0` to the id of the GPU you want to use.
To check for optional parameters for the script, check out the help by running for example
```
train_stardist_2d --help
```
In order to run these scripts on the embl via slurm, you can use the `submit_slurm` script from `ai4ia.utils`, e.g.
```
submit_slurm train_stardist_2d /path/to/data /path/to/model
......
......@@ -49,7 +49,7 @@ def load_training_data(root, image_folder, labels_folder, ext, multichannel):
train_images.sort()
label_pattern = os.path.join(root, labels_folder, f'*{ext}')
print("Looking for labels with the pattern", image_pattern)
print("Looking for labels with the pattern", label_pattern)
train_labels = glob(label_pattern)
assert len(train_labels) > 0, "Did not find any labels"
train_labels.sort()
......@@ -123,6 +123,10 @@ def train_model(x_train, y_train, x_val, y_val, save_path,
pretrained_model_path=None, n_rays=32):
# make the model config
# Stardist supports optional data processing on the gpu to speed up
# the training process. This can only be used if the additional module
# 'gputools' is available.
use_gpu = False and gputools_available()
save_root, save_name = os.path.split(save_path)
......@@ -177,7 +181,7 @@ def train_stardist_model(root, model_save_path, image_folder, labels_folder, ext
print("Made train validation split with validation fraction",
validation_fraction, "resulting in")
print(len(x_train), "training images")
print(len(y_train), "validation images")
print(len(x_val), "validation images")
print("Start model training ...")
print("You can connect to the tensorboard by typing 'tensorboaed --logdir=.' in the folder where the training runs")
......
......@@ -36,7 +36,7 @@ def load_training_data(root, image_folder, labels_folder, ext):
train_images.sort()
label_pattern = os.path.join(root, labels_folder, f'*{ext}')
print("Looking for labels with the pattern", image_pattern)
print("Looking for labels with the pattern", label_pattern)
train_labels = glob(label_pattern)
assert len(train_labels) > 0, "Did not find any labels"
train_labels.sort()
......@@ -110,26 +110,28 @@ def train_model(x_train, y_train,
rays = Rays_GoldenSpiral(n_rays, anisotropy=anisotropy)
# make the model config
# copied from the stardist training notebook, this is a very weird line ...
use_gpu = False and gputools_available()
# Stardist supports optional data processing on the gpu to speed up
# the training process. This can only be used if the additional module
# 'gputools' is available.
use_gpu_for_dataprocessing = gputools_available()
# predict on subsampled image for increased efficiency
grid = tuple(1 if a > 1.5 else 2 for a in anisotropy)
config = Config3D(
rays=rays,
grid=grid,
use_gpu=use_gpu,
use_gpu=use_gpu_for_dataprocessing,
n_channel_in=1,
train_patch_size=patch_size,
anisotropy=anisotropy
)
if use_gpu:
print("Using a GPU for training")
# limit gpu memory
if use_gpu_for_dataprocessing:
# limit gpu memory if we use the gpu for data preprocessing
# this is necessary because by default tensorflow allocates all gpu ram
from csbdeep.utils.tf import limit_gpu_memory
limit_gpu_memory(0.8)
else:
print("GPU not found, using the CPU for training")
save_root, save_name = os.path.split(save_path)
os.makedirs(save_root, exist_ok=True)
......@@ -160,7 +162,7 @@ def train_stardist_model(root, model_save_path, image_folder, labels_folder, ext
print("Made train validation split with validation fraction", validation_fraction, "resulting in")
print(len(x_train), "training images")
print(len(y_train), "validation images")
print(len(x_val), "validation images")
print("Start model training ...")
print("You can connect to the tensorboard by typing 'tensorboaed --logdir=.' in the folder where the training runs")
......@@ -184,12 +186,11 @@ def main():
parser.add_argument('--ext', type=str, default='.tif', help="Image file extension, default: .tif")
parser.add_argument('--validation_fraction', type=float, default=.1,
help="The fraction of available data that is used for validation, default: .1")
parser.add_argument('--patch_size', type=int, nargs=3, default=[128, 128, 128],
help="Size of the image patches used to train the network, default: 128, 128, 128")
aniso_help = """Anisotropy factor, needs to be passed as json encoded list, e.g. \"[.05,0.5,0.5]\".
psize_help = "Size of the image patches, given in ZYX axis order, used to train the network, default: 128, 128, 128."
parser.add_argument('--patch_size', type=int, nargs=3, default=[128, 128, 128], help=psize_help)
aniso_help = """Anisotropy factor, needs to be passed as json encoded list, e.g. \"[0.5,1.0,1.0]\" in ZYX axis order.
If not given, will be computed from the dimensions of the input data, default: None"""
parser.add_argument('--anisotropy', type=str, default=None,
help=aniso_help)
parser.add_argument('--anisotropy', type=str, default=None, help=aniso_help)
args = parser.parse_args()
anisotropy = args.anisotropy
......
# EMBL-Tools
Tools for data visualisation and to submit gpu jobs on the embl cluster.
## Installation
Activate your conda environment, e.g. from `../stardist/environment-gpu.yaml` and run
```
pip install -e .
```
## Usage
This will install two scripts:
```
view_data /path/to/folder
```
that can be used to visualise training data and predictions stored in our training data layout.
```
submit_slurm <SCRIPT_NAME> <SCRIPT_ARGS>
```
to run an arbitrary script on the cluster gpu queue.
from setuptools import setup, find_packages
setup(
name="ai4ia.embl_tools",
packages=find_packages(),
version="0.0.1",
author="Constantin Pape",
url="https://git.embl.de/grp-bio-it/ai4ia",
license='MIT',
entry_points={
"console_scripts": [
"view_data = utils_impl.view_data:main",
"submit_slurm = utils_impl.submit_to_slurm:main"
]
},
)
#! /usr/bin/python3
import os
import sys
import inspect
import subprocess
from datetime import datetime
# two days in minutes
TWO_DAYS = 2 * 24 * 60
def write_slurm_template(script, out_path, env_name,
n_threads, gpu_type, n_gpus,
mem_limit, time_limit, qos,
mail_address):
slurm_template = ("#!/bin/bash\n"
"#SBATCH -N 1\n"
"#SBATCH -c %s\n"
"#SBATCH --mem %s\n"
"#SBATCH -t %i\n"
"#SBATCH --qos=%s\n"
"#SBATCH -p gpu\n"
"#SBATCH -C gpu=%s\n"
"#SBATCH --gres=gpu:%i\n") % (n_threads,
mem_limit, time_limit,
qos, gpu_type, n_gpus)
if mail_address is not None:
slurm_template += ("#SBATCH --mail-type=FAIL,BEGIN,END\n"
"#SBATCH --mail-user=%s") % mail_address
slurm_template += ("\n"
"module purge \n"
"module load GCC \n"
"source activate %s\n"
"\n"
"python %s $@ \n") % (env_name, script)
with open(out_path, 'w') as f:
f.write(slurm_template)
def submit_slurm(script, input_, n_threads=7, n_gpus=1,
gpu_type='2080Ti|1080Ti', mem_limit='64G',
time_limit=TWO_DAYS, qos='normal',
env_name=None, mail_address=None):
""" Submit python script that needs gpus with given inputs on a slurm node.
"""
tmp_folder = os.path.expanduser('~/.deep-cell/slurm')
os.makedirs(tmp_folder, exist_ok=True)
print("Submitting training script %s to cluster" % script)
print("with arguments %s" % " ".join(input_))
script_name = os.path.split(script)[1]
dt = datetime.now().strftime('%Y_%m_%d_%M')
tmp_name = os.path.splitext(script_name)[0] + dt
batch_script = os.path.join(tmp_folder, '%s.sh' % tmp_name)
log = os.path.join(tmp_folder, '%s.log' % tmp_name)
err = os.path.join(tmp_folder, '%s.err' % tmp_name)
if env_name is None:
env_name = os.environ.get('CONDA_DEFAULT_ENV', None)
if env_name is None:
raise RuntimeError("Could not find conda")
print("Batch script saved at", batch_script)
print("Log will be written to %s, error log to %s" % (log, err))
write_slurm_template(script, batch_script, env_name,
int(n_threads), gpu_type, int(n_gpus),
mem_limit, int(time_limit), qos, mail_address)
cmd = ['sbatch', '-o', log, '-e', err, '-J', script_name, batch_script]
cmd.extend(input_)
subprocess.run(cmd)
def scrape_kwargs(input_):
params = inspect.signature(submit_slurm).parameters
kwarg_names = [name for name in params
if params[name].default != inspect._empty]
kwarg_positions = [i for i, inp in enumerate(input_)
if inp in kwarg_names]
kwargs = {input_[i]: input_[i + 1] for i in kwarg_positions}
kwarg_positions += [i + 1 for i in kwarg_positions]
input_ = [inp for i, inp in enumerate(input_) if i not in kwarg_positions]
return input_, kwargs
def main():
script = os.path.realpath(os.path.abspath(sys.argv[1]))
input_ = sys.argv[2:]
# scrape the additional arguments (n_threads, mem_limit, etc. from the input)
input_, kwargs = scrape_kwargs(input_)
submit_slurm(script, input_, **kwargs)
if __name__ == '__main__':
main()
import argparse
import os
from glob import glob
import imageio
import napari
def view_data(root, image_folder, labels_folder, prediction_folder,
ext, prediction_is_labels):
image_folder = os.path.join(root, image_folder)
assert os.path.exists(image_folder), f"Could not find {image_folder}"
if labels_folder is not None:
labels_folder = os.path.join(root, labels_folder)
assert os.path.exists(labels_folder), f"Could not find {labels_folder}"
if prediction_folder is not None:
prediction_folder = os.path.join(root, prediction_folder)
assert os.path.exists(prediction_folder), f"Could not find {prediction_folder}"
files = glob(os.path.join(image_folder, f"*{ext}"))
files.sort()
def _load(path):
try:
im = imageio.imread(path)
name = os.path.split(path)[1]
except Exception as e:
print(f"Could not open {path}")
print(f"Failed with {e}")
im, name = None, None
return im, name
# TODO instead of looping over images load them in napari with selection gui
for ff in files:
im, name = _load(ff)
if im is None:
continue
if labels_folder is not None:
label_file = os.path.join(labels_folder, name)
labels, _ = _load(label_file)
else:
labels = None
if prediction_folder is not None:
pred_file = os.path.join(prediction_folder, name)
prediction, _ = _load(pred_file)
else:
prediction = None
with napari.gui_qt():
viewer = napari.Viewer(title=name)
viewer.add_image(im)
if labels is not None:
viewer.add_labels(labels)
if prediction is not None:
if prediction_is_labels:
viewer.add_labels(prediction)
else:
viewer.add_image(prediction)
def main():
parser = argparse.ArgumentParser()
parser.add_argument('root')
parser.add_argument('--image_folder', type=str, default='images')
parser.add_argument('--labels_folder', type=str, default=None)
parser.add_argument('--prediction_folder', type=str, default=None)
parser.add_argument('--prediction_is_labels', type=int, default=1)
parser.add_argument('--ext', type=str, default='.tif')
args = parser.parse_args()
view_data(args.root, args.image_folder, args.labels_folder, args.prediction_folder,
args.ext, bool(args.prediction_is_labels))
if __name__ == '__main__':
main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment