From 681609326c1b599bc0995726241dda60ec5b4072 Mon Sep 17 00:00:00 2001 From: Sascha Meiers <meiers@embl.de> Date: Thu, 21 Jun 2018 22:11:14 +0200 Subject: [PATCH] Fix for miscommunication problem between SLURM and snakemake by Jelle Scholtalbers --- cluster_status.py | 30 +++++++++++++++++++++++++++++ run_pipeline_cluster-singularity.sh | 5 ++++- 2 files changed, 34 insertions(+), 1 deletion(-) create mode 100755 cluster_status.py diff --git a/cluster_status.py b/cluster_status.py new file mode 100755 index 0000000..51bbd53 --- /dev/null +++ b/cluster_status.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python + +# Code by Jelle Scholtalbers +import subprocess +import sys +import re + +jobid = sys.argv[1] + +try: + output = str(subprocess.check_output("sacct -j %s --format State --noheader | head -1 | awk '{print $1}'" % jobid, shell=True).strip()) +except subprocess.CalledProcessError: + print("failed") + sys.exit(0) + +running_status=["PENDING", "CONFIGURING", "COMPLETING", "RUNNING", "SUSPENDED"] +if "COMPLETED" in output: + try: + output = str(subprocess.check_output("grep 'slurmstepd: error: Exceeded step memory limit at some point.' slurm-%s.out" % jobid, shell=True)) + except subprocess.CalledProcessError: + # grep fails to find error (or fails to find log file): success + print("success") + else: + print("failed") + sys.exit(0) +elif any(s in output for s in running_status): + print("running") +else: + print("failed") + diff --git a/run_pipeline_cluster-singularity.sh b/run_pipeline_cluster-singularity.sh index 5cb097b..d61b6f4 100755 --- a/run_pipeline_cluster-singularity.sh +++ b/run_pipeline_cluster-singularity.sh @@ -4,6 +4,8 @@ REF="/g/korbel/shared/datasets/refgenomes/human/GCA_000001405.15_GRCh38_no_alt_analysis_set.fna" R_REF="/g/korbel/meiers/R-lib/3.4.0-foss-2016b/BSgenome.Hsapiens.UCSC.hg38/extdata/single_sequences.2bit" +mkdir slurm 2> /dev/null + snakemake \ -j 100 \ --configfile Snake.config-singularity.json \ @@ -13,7 +15,8 @@ snakemake \ -B ${R_REF}:/usr/local/lib/R/site-library/BSgenome.Hsapiens.UCSC.hg38/extdata/single_sequences.2bit:ro" \ --cluster-config cluster.json \ --local-cores 8 \ - --cluster "sbatch -o slurm/{rule}.%j.log -e slurm/{rule}.%j.log --cpus-per-task {cluster.n} --time {cluster.time} --mem {cluster.mem}" \ + --cluster "sbatch --parsable -o slurm/{rule}.%j.log -e slurm/{rule}.%j.log --cpus-per-task {cluster.n} --time {cluster.time} --mem {cluster.mem}" \ + --cluster-status ./cluster_status.py \ --latency-wait 60 \ --timestamp \ --keep-going \ -- GitLab