When you are working on HPC and dealing with a great amount of jobs and folders, clear and straightforward management is essencially critical.
Folders & Files format
To improve teamwork, researchers should get agreement in their name formats for jobs and paths.
Here is an example of folders:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
Project_Folder
|-- Temperature_1/
| |-- vc_pressure_1/
| | |-- vc_pressure_1.in
| | |-- vc_pressure_1.out
| |-- vc_pressure_2/
| | |-- vc_pressure_2.in
| | |-- vc_pressure_2.out
| |-- scf_pressure_1/
| | |-- scf_pressure_1.in
| | |-- scf_pressure_1.out
| | |-- ph_pressure_1.in
| | |-- ph_pressure_1.out
| |-- scf_pressure_2/
| | |-- scf_pressure_2.in
| | |-- scf_pressure_2.out
| | |-- ph_pressure_2.in
| | |-- ph_pressure_2.out
Cancel your jobs
Create the script file
Copy the following script.
A quick creation can be like:
1
2
touch ~/cancel_job.py
vim ~/cancel_job.py
and paste the content into ~/cancel_job.py
.
Use the script to cancel jobs
Run python ~/cancel_job.py
to cancel all the current jobs in your user id.
To cancel one job, you can specify your job name by running python ~/cancel_job.py job_name
If you have a list of jobs have the same prefix and you want to cancel all of these, you can run python ~/cancel_job.py job_prefix
Script:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import subprocess
import re
import os
import sys
#--------------------------------------------------#
# where you can customize #
#--------------------------------------------------#
fn = "/rigel/home/#USRNAME/tmp/sq.tmp" # a tmp file to keep job queue info
order = "squeue -u #USRNAME" # the order to show all the jobs by this user
cancel = "scancel" # the order to cancel jobs on the working HPC
#--------------------------------------------------#
# color format #
#--------------------------------------------------#
class tmcolors:
HEADER = '\033[95m'
OKBLUE = '\033[94m'
OKGREEN = '\033[92m'
WARNING = '\033[93m'
FAIL = '\033[91m'
ENDC = '\033[0m'
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
class color:
def __init__(self, content):
self.content = content
@property
def header(self):
return "%s%s%s" % (tmcolors.HEADER, self.content, tmcolors.ENDC)
@property
def warning(self):
return "%s%s%s" % (tmcolors.WARNING, self.content, tmcolors.ENDC)
@property
def bold(self):
return "%s%s%s" % (tmcolors.BOLD, self.content, tmcolors.ENDC)
@property
def green(self):
return "%s%s%s" % (tmcolors.OKGREEN, self.content, tmcolors.ENDC)
@property
def blue(self):
return "%s%s%s" % (tmcolors.OKBLUE, self.content, tmcolors.ENDC)
@property
def underline(self):
return "%s%s%s" % (tmcolors.UNDERLINE, self.content, tmcolors.ENDC)
#--------------------------------------------------#
# main functions #
#--------------------------------------------------#
def get_mysq():
'''
return lines of squeue
'''
if os.path.exists(fn):
subprocess.call("rm -f %s" % fn, shell=True)
subprocess.call("%s > %s" % (order,fn), shell=True)
with open(fn, "r") as f:
lines = f.readlines()
subprocess.call("rm -f %s" % fn, shell=True)
return lines
def rua(argus , job_name):
'''
return if this item is needed to be scanceled
'''
status = False
if len(argus) >1:
for argu in argus:
if re.findall(argu,job_name):
status = True
break
else:
status = True
return status
if __name__ == "__main__":
lines = get_mysq()
argu = sys.argv
if len(lines) >1:
for line in lines[1:]:
job_id = line.split()[0]
job_name = line.split()[2]
if rua(argu,job_name) :
subprocess.call("%s %s"%(cancel, job_id), shell = True)
job_name = color(job_name).header
print("%s is cancelled!"%job_name)
else:
print( color("No job now!").warning)