Commit 58952685 authored by Martin Schorb's avatar Martin Schorb
Browse files

Merge remote-tracking branch 'origin/gc3less' into gc3less

parents 362b7e5a 067476b6
...@@ -89,7 +89,7 @@ n_cpu_standalone = 8 ...@@ -89,7 +89,7 @@ n_cpu_standalone = 8
# spark # spark
n_cpu_spark = 200 n_cpu_spark = 200
cpu_pernode_spark = 20 cpu_pernode_spark = 4
mipmaps=dict() mipmaps=dict()
mipmaps['min/Gpix/CPU'] = 6 mipmaps['min/Gpix/CPU'] = 6
...@@ -224,4 +224,4 @@ render_base_url += v_base_url ...@@ -224,4 +224,4 @@ render_base_url += v_base_url
url = render_base_url + render_version + 'owners' url = render_base_url + render_version + 'owners'
render_owners = requests.get(url).json() render_owners = requests.get(url).json()
default_store['init_render']['allowners'] = render_owners default_store['init_render']['allowners'] = render_owners
\ No newline at end of file
...@@ -39,12 +39,10 @@ def args2string(args,separator='='): ...@@ -39,12 +39,10 @@ def args2string(args,separator='='):
def status(run_state): def status(run_state):
run_state.update({'id':31801858,'type':'sparkslurm'}) run_state.update({'id':31801858,'type':'sparkslurm'})
res_status = checkstatus(run_state) res_status,link = checkstatus(run_state)
print(run_state) # print(run_state)
print('res_status:') # print('res_status:')
print(res_status) # print(res_status)
link=''
if res_status is None: if res_status is None:
return 'input',link return 'input',link
...@@ -90,17 +88,17 @@ def checkstatus(run_state): ...@@ -90,17 +88,17 @@ def checkstatus(run_state):
if p.is_running(): if p.is_running():
if not p.status() == 'zombie': if not p.status() == 'zombie':
return 'running' return 'running',''
if os.path.exists(run_state['logfile']+'_exit'): if os.path.exists(run_state['logfile']+'_exit'):
return 'error' return 'error',''
else: else:
return 'done' return 'done',''
else: else:
return run_state['status'] return run_state['status'],''
else: else:
return cluster_status(run_state) return cluster_status(run_state)
...@@ -111,7 +109,7 @@ def checkstatus(run_state): ...@@ -111,7 +109,7 @@ def checkstatus(run_state):
def cluster_status(run_state): def cluster_status(run_state):
my_env = os.environ.copy() my_env = os.environ.copy()
out_stat=list() out_stat=list()
link='' sp_master=''
j_id = run_state['id'] j_id = run_state['id']
# print('JOB-ID:') # print('JOB-ID:')
...@@ -124,13 +122,13 @@ def cluster_status(run_state): ...@@ -124,13 +122,13 @@ def cluster_status(run_state):
logfile = run_state['logfile'] logfile = run_state['logfile']
if cl_type == 'slurm': if cl_type == 'slurm':
command = 'sacct --jobs=' command = 'sacct --jobs='
command += j_id command += str(j_id)
command += ' --format=jobid,state --parsable' command += ' --format=jobid,state --parsable'
elif cl_type == 'sparkslurm': elif cl_type == 'sparkslurm':
command = 'sacct --jobs=' command = 'sacct --jobs='
command += j_id command += str(j_id)
command += ' --format=jobid,state,node --parsable' command += ' --format=jobid,state,node --parsable'
# commands for other cluster types go HERE # commands for other cluster types go HERE
...@@ -182,14 +180,14 @@ def cluster_status(run_state): ...@@ -182,14 +180,14 @@ def cluster_status(run_state):
for job_item in stat_list[1:]: for job_item in stat_list[1:]:
jobstat = job_item.split('|') jobstat = job_item.split('|')
if jobstat[0] == j_id + '+0': if jobstat[0] == str(j_id) + '+0':
# master job # master job
masterhost = jobstat[2] masterhost = jobstat[2]
slurm_stat = jobstat[1] slurm_stat = jobstat[1]
if 'RUNNING' in slurm_stat: if 'RUNNING' in slurm_stat:
sp_masterfile = os.path.join(logfile.rsplit(os.extsep)[0],'spark-master-' + j_id,'master') sp_masterfile = os.path.join(logfile.rsplit(os.extsep)[0],'spark-master-' + str(j_id),'master')
with open(sp_masterfile) as f: sp_master=f.read().strip('\n') with open(sp_masterfile) as f: sp_master=f.read().strip('\n')
...@@ -220,20 +218,18 @@ def cluster_status(run_state): ...@@ -220,20 +218,18 @@ def cluster_status(run_state):
else: else:
if 'FINISHED' in sp_query['completedapps'][0]['state']: if 'FINISHED' in sp_query['completedapps'][0]['state']:
drop = canceljobs('sparkslurm__'+j_id) drop = canceljobs('sparkslurm__'+str(j_id))
out_stat.append('done') out_stat.append('done')
elif 'KILLED' in sp_query['completedapps'][0]['state']: elif 'KILLED' in sp_query['completedapps'][0]['state']:
drop = canceljobs('sparkslurm__'+j_id) drop = canceljobs('sparkslurm__'+str(j_id))
out_stat.append('Spark app was killed.') out_stat.append('Spark app was killed.')
else: else:
out_stat.append('running' + link) out_stat.append('running' + link)
else: else:
out_stat.append(sp_query['activeapps'][0]['state'].lower() + link) out_stat.append(sp_query['activeapps'][0]['state'].lower() + link)
elif slurm_stat=='COMPLETED': elif slurm_stat=='COMPLETED':
out_stat.append('done') out_stat.append('done')
elif 'FAILED' in slurm_stat: elif 'FAILED' in slurm_stat:
...@@ -245,7 +241,7 @@ def cluster_status(run_state): ...@@ -245,7 +241,7 @@ def cluster_status(run_state):
elif 'CANCELLED' in slurm_stat: elif 'CANCELLED' in slurm_stat:
out_stat.append('cancelled') out_stat.append('cancelled')
return out_stat return out_stat[0],sp_master
def canceljobs(run_state): def canceljobs(run_state):
...@@ -256,7 +252,7 @@ def canceljobs(run_state): ...@@ -256,7 +252,7 @@ def canceljobs(run_state):
cl_type = run_state['type'] cl_type = run_state['type']
if 'slurm' in cl_type: if 'slurm' in cl_type:
command = 'scancel '+j_id command = 'scancel '+str(j_id)
os.system(command) os.system(command)
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment