amirali1985 commited on
Commit
e548a1f
·
verified ·
1 Parent(s): 6b49009

Upload queue_status.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. queue_status.json +49 -49
queue_status.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "timestamp": "2026-04-24 22:14:18",
3
  "total": 12,
4
  "pending": 0,
5
  "running": 0,
6
- "done": 3,
7
- "failed": 9,
8
  "stale": 0,
9
  "retrying": 0,
10
  "jobs": [
@@ -14,8 +14,8 @@
14
  "cmd": "PYTHONPATH=. python -m scripts.train_qwen --rate 0.01 --seed 0 --lr 5e-5 --proxy openwebtext/subset_20M_seed0.jsonl --re",
15
  "gpu": 0,
16
  "status": "done",
17
- "elapsed": 1557,
18
- "idle_time": 586,
19
  "exit_code": 0,
20
  "retries": 0,
21
  "log_file": "/tmp/stride_applications_queue/job_000_PYTHONPATH=._gpu0.log"
@@ -25,24 +25,24 @@
25
  "name": "PYTHONPATH=.",
26
  "cmd": "PYTHONPATH=. python -m scripts.train_qwen --rate 0.01 --seed 1 --lr 5e-5 --proxy openwebtext/subset_20M_seed0.jsonl --re",
27
  "gpu": 1,
28
- "status": "done",
29
- "elapsed": 1579,
30
- "idle_time": 564,
31
- "exit_code": 0,
32
- "retries": 0,
33
  "log_file": "/tmp/stride_applications_queue/job_001_PYTHONPATH=._gpu1.log"
34
  },
35
  {
36
  "job_id": 2,
37
  "name": "PYTHONPATH=.",
38
  "cmd": "PYTHONPATH=. python -m scripts.train_qwen --rate 0.01 --seed 2 --lr 5e-5 --proxy openwebtext/subset_20M_seed0.jsonl --re",
39
- "gpu": 0,
40
  "status": "failed",
41
- "elapsed": 3,
42
- "idle_time": 559,
43
- "exit_code": -15,
44
  "retries": 1,
45
- "log_file": "/tmp/stride_applications_queue/job_002_PYTHONPATH=._gpu0.log"
46
  },
47
  {
48
  "job_id": 3,
@@ -50,9 +50,9 @@
50
  "cmd": "PYTHONPATH=. python -m scripts.train_qwen --rate 0.01 --seed 3 --lr 5e-5 --proxy openwebtext/subset_20M_seed0.jsonl --re",
51
  "gpu": 1,
52
  "status": "failed",
53
- "elapsed": 3,
54
- "idle_time": 559,
55
- "exit_code": -15,
56
  "retries": 1,
57
  "log_file": "/tmp/stride_applications_queue/job_003_PYTHONPATH=._gpu1.log"
58
  },
@@ -60,97 +60,97 @@
60
  "job_id": 4,
61
  "name": "PYTHONPATH=.",
62
  "cmd": "PYTHONPATH=. python -m scripts.train_qwen --rate 0.01 --seed 4 --lr 5e-5 --proxy openwebtext/subset_20M_seed0.jsonl --re",
63
- "gpu": 0,
64
  "status": "failed",
65
- "elapsed": 21,
66
- "idle_time": 362,
67
  "exit_code": 1,
68
  "retries": 1,
69
- "log_file": "/tmp/stride_applications_queue/job_004_PYTHONPATH=._gpu0.log"
70
  },
71
  {
72
  "job_id": 5,
73
  "name": "PYTHONPATH=.",
74
  "cmd": "PYTHONPATH=. python -m scripts.train_qwen --rate 0.01 --seed 5 --lr 5e-5 --proxy openwebtext/subset_20M_seed0.jsonl --re",
75
- "gpu": 0,
76
  "status": "failed",
77
  "elapsed": 21,
78
- "idle_time": 507,
79
  "exit_code": 1,
80
  "retries": 1,
81
- "log_file": "/tmp/stride_applications_queue/job_005_PYTHONPATH=._gpu0.log"
82
  },
83
  {
84
  "job_id": 6,
85
  "name": "PYTHONPATH=.",
86
  "cmd": "PYTHONPATH=. python -m scripts.train_qwen --rate 0.01 --seed 0 --lr 5e-5 --proxy openwebtext/subset_20M_seed0.jsonl --re",
87
- "gpu": 0,
88
  "status": "failed",
89
- "elapsed": 25,
90
- "idle_time": 404,
91
  "exit_code": 1,
92
  "retries": 1,
93
- "log_file": "/tmp/stride_applications_queue/job_006_PYTHONPATH=._gpu0.log"
94
  },
95
  {
96
  "job_id": 7,
97
  "name": "PYTHONPATH=.",
98
  "cmd": "PYTHONPATH=. python -m scripts.train_qwen --rate 0.01 --seed 1 --lr 5e-5 --proxy openwebtext/subset_20M_seed0.jsonl --re",
99
- "gpu": 0,
100
  "status": "failed",
101
- "elapsed": 19,
102
- "idle_time": 274,
103
  "exit_code": 1,
104
  "retries": 1,
105
- "log_file": "/tmp/stride_applications_queue/job_007_PYTHONPATH=._gpu0.log"
106
  },
107
  {
108
  "job_id": 8,
109
  "name": "PYTHONPATH=.",
110
  "cmd": "PYTHONPATH=. python -m scripts.train_qwen --rate 0.01 --seed 2 --lr 5e-5 --proxy openwebtext/subset_20M_seed0.jsonl --re",
111
  "gpu": 1,
112
- "status": "done",
113
- "elapsed": 552,
114
- "idle_time": 1,
115
- "exit_code": 0,
116
- "retries": 0,
117
  "log_file": "/tmp/stride_applications_queue/job_008_PYTHONPATH=._gpu1.log"
118
  },
119
  {
120
  "job_id": 9,
121
  "name": "PYTHONPATH=.",
122
  "cmd": "PYTHONPATH=. python -m scripts.train_qwen --rate 0.01 --seed 3 --lr 5e-5 --proxy openwebtext/subset_20M_seed0.jsonl --re",
123
- "gpu": 0,
124
  "status": "failed",
125
- "elapsed": 24,
126
- "idle_time": 457,
127
  "exit_code": 1,
128
  "retries": 1,
129
- "log_file": "/tmp/stride_applications_queue/job_009_PYTHONPATH=._gpu0.log"
130
  },
131
  {
132
  "job_id": 10,
133
  "name": "PYTHONPATH=.",
134
  "cmd": "PYTHONPATH=. python -m scripts.train_qwen --rate 0.01 --seed 4 --lr 5e-5 --proxy openwebtext/subset_20M_seed0.jsonl --re",
135
- "gpu": 0,
136
  "status": "failed",
137
  "elapsed": 18,
138
- "idle_time": 227,
139
  "exit_code": 1,
140
  "retries": 1,
141
- "log_file": "/tmp/stride_applications_queue/job_010_PYTHONPATH=._gpu0.log"
142
  },
143
  {
144
  "job_id": 11,
145
  "name": "PYTHONPATH=.",
146
  "cmd": "PYTHONPATH=. python -m scripts.train_qwen --rate 0.01 --seed 5 --lr 5e-5 --proxy openwebtext/subset_20M_seed0.jsonl --re",
147
- "gpu": 0,
148
  "status": "failed",
149
- "elapsed": 20,
150
- "idle_time": 314,
151
  "exit_code": 1,
152
  "retries": 1,
153
- "log_file": "/tmp/stride_applications_queue/job_011_PYTHONPATH=._gpu0.log"
154
  }
155
  ]
156
  }
 
1
  {
2
+ "timestamp": "2026-04-24 22:14:25",
3
  "total": 12,
4
  "pending": 0,
5
  "running": 0,
6
+ "done": 1,
7
+ "failed": 11,
8
  "stale": 0,
9
  "retrying": 0,
10
  "jobs": [
 
14
  "cmd": "PYTHONPATH=. python -m scripts.train_qwen --rate 0.01 --seed 0 --lr 5e-5 --proxy openwebtext/subset_20M_seed0.jsonl --re",
15
  "gpu": 0,
16
  "status": "done",
17
+ "elapsed": 555,
18
+ "idle_time": 1,
19
  "exit_code": 0,
20
  "retries": 0,
21
  "log_file": "/tmp/stride_applications_queue/job_000_PYTHONPATH=._gpu0.log"
 
25
  "name": "PYTHONPATH=.",
26
  "cmd": "PYTHONPATH=. python -m scripts.train_qwen --rate 0.01 --seed 1 --lr 5e-5 --proxy openwebtext/subset_20M_seed0.jsonl --re",
27
  "gpu": 1,
28
+ "status": "failed",
29
+ "elapsed": 21,
30
+ "idle_time": 515,
31
+ "exit_code": 1,
32
+ "retries": 1,
33
  "log_file": "/tmp/stride_applications_queue/job_001_PYTHONPATH=._gpu1.log"
34
  },
35
  {
36
  "job_id": 2,
37
  "name": "PYTHONPATH=.",
38
  "cmd": "PYTHONPATH=. python -m scripts.train_qwen --rate 0.01 --seed 2 --lr 5e-5 --proxy openwebtext/subset_20M_seed0.jsonl --re",
39
+ "gpu": 1,
40
  "status": "failed",
41
+ "elapsed": 21,
42
+ "idle_time": 467,
43
+ "exit_code": 1,
44
  "retries": 1,
45
+ "log_file": "/tmp/stride_applications_queue/job_002_PYTHONPATH=._gpu1.log"
46
  },
47
  {
48
  "job_id": 3,
 
50
  "cmd": "PYTHONPATH=. python -m scripts.train_qwen --rate 0.01 --seed 3 --lr 5e-5 --proxy openwebtext/subset_20M_seed0.jsonl --re",
51
  "gpu": 1,
52
  "status": "failed",
53
+ "elapsed": 21,
54
+ "idle_time": 426,
55
+ "exit_code": 1,
56
  "retries": 1,
57
  "log_file": "/tmp/stride_applications_queue/job_003_PYTHONPATH=._gpu1.log"
58
  },
 
60
  "job_id": 4,
61
  "name": "PYTHONPATH=.",
62
  "cmd": "PYTHONPATH=. python -m scripts.train_qwen --rate 0.01 --seed 4 --lr 5e-5 --proxy openwebtext/subset_20M_seed0.jsonl --re",
63
+ "gpu": 1,
64
  "status": "failed",
65
+ "elapsed": 25,
66
+ "idle_time": 376,
67
  "exit_code": 1,
68
  "retries": 1,
69
+ "log_file": "/tmp/stride_applications_queue/job_004_PYTHONPATH=._gpu1.log"
70
  },
71
  {
72
  "job_id": 5,
73
  "name": "PYTHONPATH=.",
74
  "cmd": "PYTHONPATH=. python -m scripts.train_qwen --rate 0.01 --seed 5 --lr 5e-5 --proxy openwebtext/subset_20M_seed0.jsonl --re",
75
+ "gpu": 1,
76
  "status": "failed",
77
  "elapsed": 21,
78
+ "idle_time": 291,
79
  "exit_code": 1,
80
  "retries": 1,
81
+ "log_file": "/tmp/stride_applications_queue/job_005_PYTHONPATH=._gpu1.log"
82
  },
83
  {
84
  "job_id": 6,
85
  "name": "PYTHONPATH=.",
86
  "cmd": "PYTHONPATH=. python -m scripts.train_qwen --rate 0.01 --seed 0 --lr 5e-5 --proxy openwebtext/subset_20M_seed0.jsonl --re",
87
+ "gpu": 1,
88
  "status": "failed",
89
+ "elapsed": 19,
90
+ "idle_time": 334,
91
  "exit_code": 1,
92
  "retries": 1,
93
+ "log_file": "/tmp/stride_applications_queue/job_006_PYTHONPATH=._gpu1.log"
94
  },
95
  {
96
  "job_id": 7,
97
  "name": "PYTHONPATH=.",
98
  "cmd": "PYTHONPATH=. python -m scripts.train_qwen --rate 0.01 --seed 1 --lr 5e-5 --proxy openwebtext/subset_20M_seed0.jsonl --re",
99
+ "gpu": 1,
100
  "status": "failed",
101
+ "elapsed": 17,
102
+ "idle_time": 96,
103
  "exit_code": 1,
104
  "retries": 1,
105
+ "log_file": "/tmp/stride_applications_queue/job_007_PYTHONPATH=._gpu1.log"
106
  },
107
  {
108
  "job_id": 8,
109
  "name": "PYTHONPATH=.",
110
  "cmd": "PYTHONPATH=. python -m scripts.train_qwen --rate 0.01 --seed 2 --lr 5e-5 --proxy openwebtext/subset_20M_seed0.jsonl --re",
111
  "gpu": 1,
112
+ "status": "failed",
113
+ "elapsed": 18,
114
+ "idle_time": 135,
115
+ "exit_code": 1,
116
+ "retries": 1,
117
  "log_file": "/tmp/stride_applications_queue/job_008_PYTHONPATH=._gpu1.log"
118
  },
119
  {
120
  "job_id": 9,
121
  "name": "PYTHONPATH=.",
122
  "cmd": "PYTHONPATH=. python -m scripts.train_qwen --rate 0.01 --seed 3 --lr 5e-5 --proxy openwebtext/subset_20M_seed0.jsonl --re",
123
+ "gpu": 1,
124
  "status": "failed",
125
+ "elapsed": 20,
126
+ "idle_time": 251,
127
  "exit_code": 1,
128
  "retries": 1,
129
+ "log_file": "/tmp/stride_applications_queue/job_009_PYTHONPATH=._gpu1.log"
130
  },
131
  {
132
  "job_id": 10,
133
  "name": "PYTHONPATH=.",
134
  "cmd": "PYTHONPATH=. python -m scripts.train_qwen --rate 0.01 --seed 4 --lr 5e-5 --proxy openwebtext/subset_20M_seed0.jsonl --re",
135
+ "gpu": 1,
136
  "status": "failed",
137
  "elapsed": 18,
138
+ "idle_time": 175,
139
  "exit_code": 1,
140
  "retries": 1,
141
+ "log_file": "/tmp/stride_applications_queue/job_010_PYTHONPATH=._gpu1.log"
142
  },
143
  {
144
  "job_id": 11,
145
  "name": "PYTHONPATH=.",
146
  "cmd": "PYTHONPATH=. python -m scripts.train_qwen --rate 0.01 --seed 5 --lr 5e-5 --proxy openwebtext/subset_20M_seed0.jsonl --re",
147
+ "gpu": 1,
148
  "status": "failed",
149
+ "elapsed": 18,
150
+ "idle_time": 215,
151
  "exit_code": 1,
152
  "retries": 1,
153
+ "log_file": "/tmp/stride_applications_queue/job_011_PYTHONPATH=._gpu1.log"
154
  }
155
  ]
156
  }