From fb71b261c468b419ffb3b4ee8de488e93f0c0716 Mon Sep 17 00:00:00 2001
From: Renato Alves <alves.rjc@gmail.com>
Date: Sun, 21 Jan 2018 20:50:54 +0100
Subject: [PATCH] ENH Allow limiting max-time of execution and validate number
 of cores

---
 bin/submitjob | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/bin/submitjob b/bin/submitjob
index ef7f45e..2856781 100755
--- a/bin/submitjob
+++ b/bin/submitjob
@@ -13,6 +13,7 @@ CORES="1"
 RESERVE=""
 NODES="1"
 MEM="4"  # 4 GB by default
+TIME=""
 MEMALL="$MEM"
 ACTIVE="100"
 QUEUE=""
@@ -67,6 +68,7 @@ usage() {
     echo >&2 "                     for less than 1G/job use fractionals such as 0.1 (100M/job)"
     echo >&2 "    -a --active    = limit number of simultaneously active jobs. Defaults to ${ACTIVE}"
     echo >&2 "    -q --queue     = which queue to use. Uses cluster's default if unspecified"
+    echo >&2 "  * -k --time      = maximum execution time. Unlimited if unspecified. (hh:mm[:ss])"
     echo >&2 "  * -p --preempt   = specify preemption level (aka QoS) (SLURM). Defaults to ${PREEMPT}"
     echo >&2 "  * -t --target    = restricts to running on the given host(s) [comma separated] (SGE)"
     echo >&2 "  * -F --features  = request specific resources to constrain your jobs (SLURM)"
@@ -101,7 +103,8 @@ usage() {
     echo >&2 ""
     echo >&2 "  In addition to standard SGE/LSF/SLURM environment variables, the following"
     echo >&2 "  can be used in the jobs file referencing the values used."
-    echo >&2 "    \${Q_NAME} \${Q_CORES} \${Q_MEM} \${Q_MEMALL} \${Q_ACTIVE} \${Q_QUEUE} \${Q_LOGFILE} \${Q_WAITFOR}"
+    echo >&2 "    \${Q_NAME} \${Q_CORES} \${Q_MEM} \${Q_MEMALL} \${Q_TIME} \${Q_ACTIVE}"
+    echo >&2 "    \${Q_QUEUE} \${Q_LOGFILE} \${Q_WAITFOR}"
     echo >&2 ""
     echo >&2 ""
 }
@@ -195,6 +198,7 @@ cat << EOF | ${LOCALCMD}
 #$ -S /bin/bash
 #$ -tc ${ACTIVE}
 ${BANG_BEGIN}
+${BANG_TIME}
 ${BANG_QUEUE}
 ${BANG_EMAIL}
 ${BANG_EMAIL_WHEN}
@@ -205,6 +209,7 @@ export Q_LOGFILE="${LOGFILE}"
 export Q_CORES="${CORES}"
 export Q_MEM="${MEM}"
 export Q_MEMALL="${MEMALL}"
+export Q_TIME="${TIME}"
 export Q_ACTIVE="${ACTIVE}"
 export Q_QUEUE="${QUEUE}"
 export Q_WAITFOR="${WAITFOR}"
@@ -240,6 +245,7 @@ cat << EOF | ${LOCALCMD}
 #SBATCH --job-name=${NAME}
 #SBATCH --qos=${PREEMPT}
 ${BANG_BEGIN}
+${BANG_TIME}
 ${BANG_QUEUE}
 ${BANG_EMAIL}
 ${BANG_EMAIL_WHEN}
@@ -251,6 +257,7 @@ export Q_LOGFILE="${LOGFILE}"
 export Q_CORES="${CORES}"
 export Q_MEM="${MEM}"
 export Q_MEMALL="${MEMALL}"
+export Q_TIME="${TIME}"
 export Q_ACTIVE="${ACTIVE}"
 export Q_QUEUE="${QUEUE}"
 export Q_WAITFOR="${WAITFOR}"
@@ -284,6 +291,7 @@ cat << EOF | ${LOCALCMD}
 #BSUB -J ${NAME}[${JOB_COUNT}]%${ACTIVE}
 #BSUB -R "select[(mem>=${MEM})] rusage[mem=${MEM}] span[hosts=${NODES}]"
 ${BANG_BEGIN}
+${BANG_TIME}
 ${BANG_QUEUE}
 ${BANG_EMAIL}
 ${BANG_WAITFOR}
@@ -293,6 +301,7 @@ export Q_LOGFILE="${LOGFILE}"
 export Q_CORES="${CORES}"
 export Q_MEM="${MEM}"
 export Q_MEMALL="${MEMALL}"
+export Q_TIME="${TIME}"
 export Q_ACTIVE="${ACTIVE}"
 export Q_QUEUE="${QUEUE}"
 export Q_WAITFOR="${WAITFOR}"
@@ -315,7 +324,7 @@ EOF
 
 }
 
-ARG_PARSE="getopt -o s:n:c:N:m:a:q:p:t:F:l:e:E:w:b:d:fh -l system:,name:,cores:,nodes:,mem:,active:,queue:,preempt:,target:,features:,logfile:,email:,emailwhen:,waitfor:,begin:,debug:,fatal,help -n $0 --"
+ARG_PARSE="getopt -o s:n:c:N:m:a:q:k:p:t:F:l:e:E:w:b:d:fh -l system:,name:,cores:,nodes:,mem:,active:,queue:,time:,preempt:,target:,features:,logfile:,email:,emailwhen:,waitfor:,begin:,debug:,fatal,help -n $0 --"
 
 # We process arguments twice to handle any argument parsing error:
 ARG_ERROR=$($ARG_PARSE "$@" 2>&1 1>/dev/null)
@@ -372,6 +381,11 @@ while true; do
             QUEUE="$1"
             shift
             ;;
+        -k|--time)
+            shift
+            TIME="$1"
+            shift
+            ;;
         -p|--preempt)
             shift
             PREEMPT="$1"
@@ -443,6 +457,7 @@ JOBFILE="$1"
 [ -n "$JOBFILE" ] || required_arg "jobs"
 [ ! -d "$JOBFILE" ] || generic_error "Path '$JOBFILE' cannot be a directory"
 ([ -e "$JOBFILE" ] && [ -r "$JOBFILE" ]) || generic_error "File '$JOBFILE' doesn't exist or is not readable"
+[ "$CORES" -gt 0 ] || generic_error "Number of cores must be an integer and greater than 0"
 
 # Special variables. FATAL sets +e which causes any error to exit the script
 if [ "$FATAL" == "fatal" ]; then
@@ -465,6 +480,7 @@ if [ "$SYSTEM" == "SGE" ]; then
     [ "$QUEUE" != "" ] && BANG_QUEUE="#$ -q ${QUEUE}"
     [ "$WAITFOR" != "" ] && BANG_WAITFOR="#$ -hold_jid ${WAITFOR}"
     [ "$BEGIN" != "" ] && BANG_BEGIN="#$ -a ${BEGIN}"
+    [ "$TIME" != "" ] && BANG_TIME="#$ -l h_rt=${TIME}"
     [ "$TARGET" != "" ] && TARGET=",h=${TARGET}"
     [ "$CORES" -gt "1" ] && RESERVE=" -R y"
     if [ "$EMAIL" != "" ]; then
@@ -487,6 +503,7 @@ elif [ "$SYSTEM" == "SLURM" ]; then
     [ "$QUEUE" != "" ] && BANG_QUEUE="#SBATCH --partition=${QUEUE}"
     [ "$WAITFOR" != "" ] && BANG_WAITFOR="#SBATCH --depend=${WAITFOR}"
     [ "$BEGIN" != "" ] && BANG_BEGIN="#SBATCH --begin=${BEGIN}"
+    [ "$TIME" != "" ] && BANG_TIME="#SBATCH --time=${TIME}"
     [ "$FEATURES" != "" ] && BANG_FEATURES="#SBATCH --constraint=${FEATURES}"
     if [ "$EMAIL" != "" ]; then
         BANG_EMAIL="#SBATCH --mail-user=${EMAIL}"
@@ -511,6 +528,7 @@ elif [ "$SYSTEM" == "LSF" ]; then
     [ "$QUEUE" != "" ] && BANG_QUEUE="#BSUB -q ${QUEUE}"
     [ "$WAITFOR" != "" ] && BANG_WAITFOR="#BSUB -w ${WAITFOR}"
     [ "$BEGIN" != "" ] && BANG_BEGIN="#BSUB -b ${BEGIN}"
+    [ "$TIME" != "" ] && BANG_TIME="#BSUB -W ${TIME}"
     if [ "$EMAIL" != "" ]; then
         BANG_EMAIL="#BSUB -u ${EMAIL}"
         # LSF doesn't have different email options
-- 
GitLab