Skip to content
Snippets Groups Projects
Verified Commit fb71b261 authored by Renato Alves's avatar Renato Alves :seedling:
Browse files

ENH Allow limiting max-time of execution and validate number of cores

parent 07287037
No related branches found
No related tags found
No related merge requests found
......@@ -13,6 +13,7 @@ CORES="1"
RESERVE=""
NODES="1"
MEM="4" # 4 GB by default
TIME=""
MEMALL="$MEM"
ACTIVE="100"
QUEUE=""
......@@ -67,6 +68,7 @@ usage() {
echo >&2 " for less than 1G/job use fractionals such as 0.1 (100M/job)"
echo >&2 " -a --active = limit number of simultaneously active jobs. Defaults to ${ACTIVE}"
echo >&2 " -q --queue = which queue to use. Uses cluster's default if unspecified"
echo >&2 " * -k --time = maximum execution time. Unlimited if unspecified. (hh:mm[:ss])"
echo >&2 " * -p --preempt = specify preemption level (aka QoS) (SLURM). Defaults to ${PREEMPT}"
echo >&2 " * -t --target = restricts to running on the given host(s) [comma separated] (SGE)"
echo >&2 " * -F --features = request specific resources to constrain your jobs (SLURM)"
......@@ -101,7 +103,8 @@ usage() {
echo >&2 ""
echo >&2 " In addition to standard SGE/LSF/SLURM environment variables, the following"
echo >&2 " can be used in the jobs file referencing the values used."
echo >&2 " \${Q_NAME} \${Q_CORES} \${Q_MEM} \${Q_MEMALL} \${Q_ACTIVE} \${Q_QUEUE} \${Q_LOGFILE} \${Q_WAITFOR}"
echo >&2 " \${Q_NAME} \${Q_CORES} \${Q_MEM} \${Q_MEMALL} \${Q_TIME} \${Q_ACTIVE}"
echo >&2 " \${Q_QUEUE} \${Q_LOGFILE} \${Q_WAITFOR}"
echo >&2 ""
echo >&2 ""
}
......@@ -195,6 +198,7 @@ cat << EOF | ${LOCALCMD}
#$ -S /bin/bash
#$ -tc ${ACTIVE}
${BANG_BEGIN}
${BANG_TIME}
${BANG_QUEUE}
${BANG_EMAIL}
${BANG_EMAIL_WHEN}
......@@ -205,6 +209,7 @@ export Q_LOGFILE="${LOGFILE}"
export Q_CORES="${CORES}"
export Q_MEM="${MEM}"
export Q_MEMALL="${MEMALL}"
export Q_TIME="${TIME}"
export Q_ACTIVE="${ACTIVE}"
export Q_QUEUE="${QUEUE}"
export Q_WAITFOR="${WAITFOR}"
......@@ -240,6 +245,7 @@ cat << EOF | ${LOCALCMD}
#SBATCH --job-name=${NAME}
#SBATCH --qos=${PREEMPT}
${BANG_BEGIN}
${BANG_TIME}
${BANG_QUEUE}
${BANG_EMAIL}
${BANG_EMAIL_WHEN}
......@@ -251,6 +257,7 @@ export Q_LOGFILE="${LOGFILE}"
export Q_CORES="${CORES}"
export Q_MEM="${MEM}"
export Q_MEMALL="${MEMALL}"
export Q_TIME="${TIME}"
export Q_ACTIVE="${ACTIVE}"
export Q_QUEUE="${QUEUE}"
export Q_WAITFOR="${WAITFOR}"
......@@ -284,6 +291,7 @@ cat << EOF | ${LOCALCMD}
#BSUB -J ${NAME}[${JOB_COUNT}]%${ACTIVE}
#BSUB -R "select[(mem>=${MEM})] rusage[mem=${MEM}] span[hosts=${NODES}]"
${BANG_BEGIN}
${BANG_TIME}
${BANG_QUEUE}
${BANG_EMAIL}
${BANG_WAITFOR}
......@@ -293,6 +301,7 @@ export Q_LOGFILE="${LOGFILE}"
export Q_CORES="${CORES}"
export Q_MEM="${MEM}"
export Q_MEMALL="${MEMALL}"
export Q_TIME="${TIME}"
export Q_ACTIVE="${ACTIVE}"
export Q_QUEUE="${QUEUE}"
export Q_WAITFOR="${WAITFOR}"
......@@ -315,7 +324,7 @@ EOF
}
ARG_PARSE="getopt -o s:n:c:N:m:a:q:p:t:F:l:e:E:w:b:d:fh -l system:,name:,cores:,nodes:,mem:,active:,queue:,preempt:,target:,features:,logfile:,email:,emailwhen:,waitfor:,begin:,debug:,fatal,help -n $0 --"
ARG_PARSE="getopt -o s:n:c:N:m:a:q:k:p:t:F:l:e:E:w:b:d:fh -l system:,name:,cores:,nodes:,mem:,active:,queue:,time:,preempt:,target:,features:,logfile:,email:,emailwhen:,waitfor:,begin:,debug:,fatal,help -n $0 --"
# We process arguments twice to handle any argument parsing error:
ARG_ERROR=$($ARG_PARSE "$@" 2>&1 1>/dev/null)
......@@ -372,6 +381,11 @@ while true; do
QUEUE="$1"
shift
;;
-k|--time)
shift
TIME="$1"
shift
;;
-p|--preempt)
shift
PREEMPT="$1"
......@@ -443,6 +457,7 @@ JOBFILE="$1"
[ -n "$JOBFILE" ] || required_arg "jobs"
[ ! -d "$JOBFILE" ] || generic_error "Path '$JOBFILE' cannot be a directory"
([ -e "$JOBFILE" ] && [ -r "$JOBFILE" ]) || generic_error "File '$JOBFILE' doesn't exist or is not readable"
[ "$CORES" -gt 0 ] || generic_error "Number of cores must be an integer and greater than 0"
# Special variables. FATAL sets +e which causes any error to exit the script
if [ "$FATAL" == "fatal" ]; then
......@@ -465,6 +480,7 @@ if [ "$SYSTEM" == "SGE" ]; then
[ "$QUEUE" != "" ] && BANG_QUEUE="#$ -q ${QUEUE}"
[ "$WAITFOR" != "" ] && BANG_WAITFOR="#$ -hold_jid ${WAITFOR}"
[ "$BEGIN" != "" ] && BANG_BEGIN="#$ -a ${BEGIN}"
[ "$TIME" != "" ] && BANG_TIME="#$ -l h_rt=${TIME}"
[ "$TARGET" != "" ] && TARGET=",h=${TARGET}"
[ "$CORES" -gt "1" ] && RESERVE=" -R y"
if [ "$EMAIL" != "" ]; then
......@@ -487,6 +503,7 @@ elif [ "$SYSTEM" == "SLURM" ]; then
[ "$QUEUE" != "" ] && BANG_QUEUE="#SBATCH --partition=${QUEUE}"
[ "$WAITFOR" != "" ] && BANG_WAITFOR="#SBATCH --depend=${WAITFOR}"
[ "$BEGIN" != "" ] && BANG_BEGIN="#SBATCH --begin=${BEGIN}"
[ "$TIME" != "" ] && BANG_TIME="#SBATCH --time=${TIME}"
[ "$FEATURES" != "" ] && BANG_FEATURES="#SBATCH --constraint=${FEATURES}"
if [ "$EMAIL" != "" ]; then
BANG_EMAIL="#SBATCH --mail-user=${EMAIL}"
......@@ -511,6 +528,7 @@ elif [ "$SYSTEM" == "LSF" ]; then
[ "$QUEUE" != "" ] && BANG_QUEUE="#BSUB -q ${QUEUE}"
[ "$WAITFOR" != "" ] && BANG_WAITFOR="#BSUB -w ${WAITFOR}"
[ "$BEGIN" != "" ] && BANG_BEGIN="#BSUB -b ${BEGIN}"
[ "$TIME" != "" ] && BANG_TIME="#BSUB -W ${TIME}"
if [ "$EMAIL" != "" ]; then
BANG_EMAIL="#BSUB -u ${EMAIL}"
# LSF doesn't have different email options
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment