amirali1985 commited on
Commit
7420444
·
verified ·
1 Parent(s): e548a1f

Upload queue_status.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. queue_status.json +32 -128
queue_status.json CHANGED
@@ -1,156 +1,60 @@
1
  {
2
- "timestamp": "2026-04-24 22:14:25",
3
- "total": 12,
4
- "pending": 0,
5
- "running": 0,
6
- "done": 1,
7
- "failed": 11,
8
  "stale": 0,
9
  "retrying": 0,
10
  "jobs": [
11
  {
12
  "job_id": 0,
13
  "name": "PYTHONPATH=.",
14
- "cmd": "PYTHONPATH=. python -m scripts.train_qwen --rate 0.01 --seed 0 --lr 5e-5 --proxy openwebtext/subset_20M_seed0.jsonl --re",
15
  "gpu": 0,
16
- "status": "done",
17
- "elapsed": 555,
18
- "idle_time": 1,
19
- "exit_code": 0,
20
  "retries": 0,
21
  "log_file": "/tmp/stride_applications_queue/job_000_PYTHONPATH=._gpu0.log"
22
  },
23
  {
24
  "job_id": 1,
25
  "name": "PYTHONPATH=.",
26
- "cmd": "PYTHONPATH=. python -m scripts.train_qwen --rate 0.01 --seed 1 --lr 5e-5 --proxy openwebtext/subset_20M_seed0.jsonl --re",
27
  "gpu": 1,
28
- "status": "failed",
29
- "elapsed": 21,
30
- "idle_time": 515,
31
- "exit_code": 1,
32
- "retries": 1,
33
  "log_file": "/tmp/stride_applications_queue/job_001_PYTHONPATH=._gpu1.log"
34
  },
35
  {
36
  "job_id": 2,
37
  "name": "PYTHONPATH=.",
38
- "cmd": "PYTHONPATH=. python -m scripts.train_qwen --rate 0.01 --seed 2 --lr 5e-5 --proxy openwebtext/subset_20M_seed0.jsonl --re",
39
- "gpu": 1,
40
- "status": "failed",
41
- "elapsed": 21,
42
- "idle_time": 467,
43
- "exit_code": 1,
44
- "retries": 1,
45
- "log_file": "/tmp/stride_applications_queue/job_002_PYTHONPATH=._gpu1.log"
46
- },
47
- {
48
- "job_id": 3,
49
- "name": "PYTHONPATH=.",
50
- "cmd": "PYTHONPATH=. python -m scripts.train_qwen --rate 0.01 --seed 3 --lr 5e-5 --proxy openwebtext/subset_20M_seed0.jsonl --re",
51
- "gpu": 1,
52
- "status": "failed",
53
- "elapsed": 21,
54
- "idle_time": 426,
55
- "exit_code": 1,
56
- "retries": 1,
57
- "log_file": "/tmp/stride_applications_queue/job_003_PYTHONPATH=._gpu1.log"
58
- },
59
- {
60
- "job_id": 4,
61
- "name": "PYTHONPATH=.",
62
  "cmd": "PYTHONPATH=. python -m scripts.train_qwen --rate 0.01 --seed 4 --lr 5e-5 --proxy openwebtext/subset_20M_seed0.jsonl --re",
63
- "gpu": 1,
64
- "status": "failed",
65
- "elapsed": 25,
66
- "idle_time": 376,
67
- "exit_code": 1,
68
- "retries": 1,
69
- "log_file": "/tmp/stride_applications_queue/job_004_PYTHONPATH=._gpu1.log"
70
- },
71
- {
72
- "job_id": 5,
73
- "name": "PYTHONPATH=.",
74
- "cmd": "PYTHONPATH=. python -m scripts.train_qwen --rate 0.01 --seed 5 --lr 5e-5 --proxy openwebtext/subset_20M_seed0.jsonl --re",
75
- "gpu": 1,
76
- "status": "failed",
77
- "elapsed": 21,
78
- "idle_time": 291,
79
- "exit_code": 1,
80
- "retries": 1,
81
- "log_file": "/tmp/stride_applications_queue/job_005_PYTHONPATH=._gpu1.log"
82
- },
83
- {
84
- "job_id": 6,
85
- "name": "PYTHONPATH=.",
86
- "cmd": "PYTHONPATH=. python -m scripts.train_qwen --rate 0.01 --seed 0 --lr 5e-5 --proxy openwebtext/subset_20M_seed0.jsonl --re",
87
- "gpu": 1,
88
- "status": "failed",
89
- "elapsed": 19,
90
- "idle_time": 334,
91
- "exit_code": 1,
92
- "retries": 1,
93
- "log_file": "/tmp/stride_applications_queue/job_006_PYTHONPATH=._gpu1.log"
94
- },
95
- {
96
- "job_id": 7,
97
- "name": "PYTHONPATH=.",
98
- "cmd": "PYTHONPATH=. python -m scripts.train_qwen --rate 0.01 --seed 1 --lr 5e-5 --proxy openwebtext/subset_20M_seed0.jsonl --re",
99
- "gpu": 1,
100
- "status": "failed",
101
- "elapsed": 17,
102
- "idle_time": 96,
103
- "exit_code": 1,
104
- "retries": 1,
105
- "log_file": "/tmp/stride_applications_queue/job_007_PYTHONPATH=._gpu1.log"
106
- },
107
- {
108
- "job_id": 8,
109
- "name": "PYTHONPATH=.",
110
- "cmd": "PYTHONPATH=. python -m scripts.train_qwen --rate 0.01 --seed 2 --lr 5e-5 --proxy openwebtext/subset_20M_seed0.jsonl --re",
111
- "gpu": 1,
112
- "status": "failed",
113
- "elapsed": 18,
114
- "idle_time": 135,
115
- "exit_code": 1,
116
- "retries": 1,
117
- "log_file": "/tmp/stride_applications_queue/job_008_PYTHONPATH=._gpu1.log"
118
- },
119
- {
120
- "job_id": 9,
121
- "name": "PYTHONPATH=.",
122
- "cmd": "PYTHONPATH=. python -m scripts.train_qwen --rate 0.01 --seed 3 --lr 5e-5 --proxy openwebtext/subset_20M_seed0.jsonl --re",
123
- "gpu": 1,
124
- "status": "failed",
125
- "elapsed": 20,
126
- "idle_time": 251,
127
- "exit_code": 1,
128
- "retries": 1,
129
- "log_file": "/tmp/stride_applications_queue/job_009_PYTHONPATH=._gpu1.log"
130
- },
131
- {
132
- "job_id": 10,
133
- "name": "PYTHONPATH=.",
134
- "cmd": "PYTHONPATH=. python -m scripts.train_qwen --rate 0.01 --seed 4 --lr 5e-5 --proxy openwebtext/subset_20M_seed0.jsonl --re",
135
- "gpu": 1,
136
- "status": "failed",
137
- "elapsed": 18,
138
- "idle_time": 175,
139
- "exit_code": 1,
140
- "retries": 1,
141
- "log_file": "/tmp/stride_applications_queue/job_010_PYTHONPATH=._gpu1.log"
142
  },
143
  {
144
- "job_id": 11,
145
  "name": "PYTHONPATH=.",
146
  "cmd": "PYTHONPATH=. python -m scripts.train_qwen --rate 0.01 --seed 5 --lr 5e-5 --proxy openwebtext/subset_20M_seed0.jsonl --re",
147
- "gpu": 1,
148
- "status": "failed",
149
- "elapsed": 18,
150
- "idle_time": 215,
151
- "exit_code": 1,
152
- "retries": 1,
153
- "log_file": "/tmp/stride_applications_queue/job_011_PYTHONPATH=._gpu1.log"
154
  }
155
  ]
156
  }
 
1
  {
2
+ "timestamp": "2026-04-24 22:17:09",
3
+ "total": 4,
4
+ "pending": 2,
5
+ "running": 2,
6
+ "done": 0,
7
+ "failed": 0,
8
  "stale": 0,
9
  "retrying": 0,
10
  "jobs": [
11
  {
12
  "job_id": 0,
13
  "name": "PYTHONPATH=.",
14
+ "cmd": "PYTHONPATH=. python -m scripts.train_qwen --rate 0.01 --seed 1 --lr 5e-5 --proxy openwebtext/subset_20M_seed0.jsonl --re",
15
  "gpu": 0,
16
+ "status": "running",
17
+ "elapsed": 30,
18
+ "idle_time": 14,
19
+ "exit_code": -1,
20
  "retries": 0,
21
  "log_file": "/tmp/stride_applications_queue/job_000_PYTHONPATH=._gpu0.log"
22
  },
23
  {
24
  "job_id": 1,
25
  "name": "PYTHONPATH=.",
26
+ "cmd": "PYTHONPATH=. python -m scripts.train_qwen --rate 0.01 --seed 3 --lr 5e-5 --proxy openwebtext/subset_20M_seed0.jsonl --re",
27
  "gpu": 1,
28
+ "status": "running",
29
+ "elapsed": 30,
30
+ "idle_time": 14,
31
+ "exit_code": -1,
32
+ "retries": 0,
33
  "log_file": "/tmp/stride_applications_queue/job_001_PYTHONPATH=._gpu1.log"
34
  },
35
  {
36
  "job_id": 2,
37
  "name": "PYTHONPATH=.",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  "cmd": "PYTHONPATH=. python -m scripts.train_qwen --rate 0.01 --seed 4 --lr 5e-5 --proxy openwebtext/subset_20M_seed0.jsonl --re",
39
+ "gpu": -1,
40
+ "status": "pending",
41
+ "elapsed": 0,
42
+ "idle_time": 0,
43
+ "exit_code": -1,
44
+ "retries": 0,
45
+ "log_file": ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  },
47
  {
48
+ "job_id": 3,
49
  "name": "PYTHONPATH=.",
50
  "cmd": "PYTHONPATH=. python -m scripts.train_qwen --rate 0.01 --seed 5 --lr 5e-5 --proxy openwebtext/subset_20M_seed0.jsonl --re",
51
+ "gpu": -1,
52
+ "status": "pending",
53
+ "elapsed": 0,
54
+ "idle_time": 0,
55
+ "exit_code": -1,
56
+ "retries": 0,
57
+ "log_file": ""
58
  }
59
  ]
60
  }