amirali1985 commited on
Commit
cc646a8
·
verified ·
1 Parent(s): 983ce29

queue status update

Browse files
Files changed (1) hide show
  1. queue_status.json +388 -388
queue_status.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "timestamp": "2026-04-12 08:07:30",
3
  "total": 120,
4
- "pending": 84,
5
- "running": 3,
6
  "done": 15,
7
- "failed": 18,
8
  "stale": 0,
9
  "retrying": 0,
10
  "jobs": [
@@ -15,7 +15,7 @@
15
  "gpu": 0,
16
  "status": "done",
17
  "elapsed": 1786,
18
- "idle_time": 20388,
19
  "exit_code": 0,
20
  "retries": 0,
21
  "log_file": "/tmp/gpu_queue/job_000_add_sub_baseline_10K_gpu0.log"
@@ -27,7 +27,7 @@
27
  "gpu": 1,
28
  "status": "done",
29
  "elapsed": 2101,
30
- "idle_time": 20073,
31
  "exit_code": 0,
32
  "retries": 0,
33
  "log_file": "/tmp/gpu_queue/job_001_add_sub_baseline_25K_gpu1.log"
@@ -39,7 +39,7 @@
39
  "gpu": 2,
40
  "status": "done",
41
  "elapsed": 4753,
42
- "idle_time": 17421,
43
  "exit_code": 0,
44
  "retries": 0,
45
  "log_file": "/tmp/gpu_queue/job_002_as_sorl_abs10_K1_25K_gpu2.log"
@@ -51,7 +51,7 @@
51
  "gpu": 0,
52
  "status": "done",
53
  "elapsed": 2366,
54
- "idle_time": 19808,
55
  "exit_code": 0,
56
  "retries": 0,
57
  "log_file": "/tmp/gpu_queue/job_003_add_sub_baseline_50K_gpu0.log"
@@ -63,7 +63,7 @@
63
  "gpu": 1,
64
  "status": "done",
65
  "elapsed": 6727,
66
- "idle_time": 15447,
67
  "exit_code": 0,
68
  "retries": 0,
69
  "log_file": "/tmp/gpu_queue/job_004_as_sorl_abs10_K1_50K_gpu1.log"
@@ -75,7 +75,7 @@
75
  "gpu": 2,
76
  "status": "done",
77
  "elapsed": 3112,
78
- "idle_time": 19062,
79
  "exit_code": 0,
80
  "retries": 0,
81
  "log_file": "/tmp/gpu_queue/job_005_add_sub_baseline_100K_gpu2.log"
@@ -87,7 +87,7 @@
87
  "gpu": 0,
88
  "status": "done",
89
  "elapsed": 8856,
90
- "idle_time": 11528,
91
  "exit_code": 0,
92
  "retries": 0,
93
  "log_file": "/tmp/gpu_queue/job_006_as_sorl_abs10_K1_100K_gpu0.log"
@@ -120,25 +120,25 @@
120
  "job_id": 9,
121
  "name": "add_sub_baseline_500K",
122
  "cmd": "python -m arithmetic.train --mode baseline --ops add_sub --dataset_size 500000 --num_epochs 20 --pus",
123
- "gpu": -1,
124
- "status": "pending",
125
- "elapsed": 0,
126
- "idle_time": 0,
127
- "exit_code": -1,
128
- "retries": 0,
129
- "log_file": ""
130
  },
131
  {
132
  "job_id": 10,
133
  "name": "as_sorl_abs10_K1_10K",
134
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 10000 --abs_vocab 10 --K 1 --num",
135
- "gpu": -1,
136
- "status": "pending",
137
- "elapsed": 0,
138
- "idle_time": 0,
139
- "exit_code": -1,
140
- "retries": 0,
141
- "log_file": ""
142
  },
143
  {
144
  "job_id": 11,
@@ -147,7 +147,7 @@
147
  "gpu": 2,
148
  "status": "failed",
149
  "elapsed": 5,
150
- "idle_time": 1,
151
  "exit_code": -9,
152
  "retries": 1,
153
  "log_file": "/tmp/gpu_queue/job_011_as_sorl_abs2_K4_500K_gpu2.log"
@@ -156,13 +156,13 @@
156
  "job_id": 12,
157
  "name": "as_sorl_abs5_K4_500K",
158
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 5 --K 4 --num",
159
- "gpu": -1,
160
- "status": "pending",
161
- "elapsed": 0,
162
- "idle_time": 0,
163
- "exit_code": -1,
164
- "retries": 0,
165
- "log_file": ""
166
  },
167
  {
168
  "job_id": 13,
@@ -171,7 +171,7 @@
171
  "gpu": 0,
172
  "status": "failed",
173
  "elapsed": 5,
174
- "idle_time": 1035,
175
  "exit_code": -9,
176
  "retries": 1,
177
  "log_file": "/tmp/gpu_queue/job_013_as_sorl_abs10_K4_500K_gpu0.log"
@@ -180,109 +180,109 @@
180
  "job_id": 14,
181
  "name": "as_sorl_abs16_K4_500K",
182
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 16 --K 4 --nu",
183
- "gpu": -1,
184
- "status": "pending",
185
- "elapsed": 0,
186
- "idle_time": 0,
187
- "exit_code": -1,
188
- "retries": 0,
189
- "log_file": ""
190
  },
191
  {
192
  "job_id": 15,
193
  "name": "as_sorl_abs20_K4_500K",
194
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 20 --K 4 --nu",
195
- "gpu": -1,
196
- "status": "pending",
197
- "elapsed": 0,
198
- "idle_time": 0,
199
- "exit_code": -1,
200
- "retries": 0,
201
- "log_file": ""
202
  },
203
  {
204
  "job_id": 16,
205
  "name": "as_sorl_abs50_K4_500K",
206
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 50 --K 4 --nu",
207
- "gpu": -1,
208
- "status": "pending",
209
- "elapsed": 0,
210
- "idle_time": 0,
211
- "exit_code": -1,
212
- "retries": 0,
213
- "log_file": ""
214
  },
215
  {
216
  "job_id": 17,
217
  "name": "as_sorl_abs70_K4_500K",
218
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 70 --K 4 --nu",
219
- "gpu": -1,
220
- "status": "pending",
221
- "elapsed": 0,
222
- "idle_time": 0,
223
- "exit_code": -1,
224
- "retries": 0,
225
- "log_file": ""
226
  },
227
  {
228
  "job_id": 18,
229
  "name": "as_sorl_abs100_K4_500K",
230
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 100 --K 4 --n",
231
  "gpu": 2,
232
- "status": "running",
233
- "elapsed": 1,
234
- "idle_time": 1,
235
- "exit_code": -1,
236
- "retries": 0,
237
  "log_file": "/tmp/gpu_queue/job_018_as_sorl_abs100_K4_500K_gpu2.log"
238
  },
239
  {
240
  "job_id": 19,
241
  "name": "as_sorl_abs2_K1_500K",
242
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 2 --K 1 --num",
243
- "gpu": -1,
244
- "status": "pending",
245
- "elapsed": 0,
246
- "idle_time": 0,
247
  "exit_code": -1,
248
- "retries": 0,
249
- "log_file": ""
250
  },
251
  {
252
  "job_id": 20,
253
  "name": "as_sorl_abs5_K1_500K",
254
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 5 --K 1 --num",
255
- "gpu": -1,
256
- "status": "pending",
257
- "elapsed": 0,
258
- "idle_time": 0,
259
  "exit_code": -1,
260
- "retries": 0,
261
- "log_file": ""
262
  },
263
  {
264
  "job_id": 21,
265
  "name": "as_sorl_abs10_K1_500K",
266
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 10 --K 1 --nu",
267
- "gpu": -1,
268
- "status": "pending",
269
- "elapsed": 0,
270
- "idle_time": 0,
271
- "exit_code": -1,
272
- "retries": 0,
273
- "log_file": ""
274
  },
275
  {
276
  "job_id": 22,
277
  "name": "as_sorl_abs16_K1_500K",
278
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 16 --K 1 --nu",
279
- "gpu": -1,
280
- "status": "pending",
281
- "elapsed": 0,
282
- "idle_time": 0,
283
  "exit_code": -1,
284
- "retries": 0,
285
- "log_file": ""
286
  },
287
  {
288
  "job_id": 23,
@@ -300,37 +300,37 @@
300
  "job_id": 24,
301
  "name": "as_sorl_abs50_K1_500K",
302
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 50 --K 1 --nu",
303
- "gpu": -1,
304
- "status": "pending",
305
- "elapsed": 0,
306
- "idle_time": 0,
307
- "exit_code": -1,
308
- "retries": 0,
309
- "log_file": ""
310
  },
311
  {
312
  "job_id": 25,
313
  "name": "as_sorl_abs70_K1_500K",
314
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 70 --K 1 --nu",
315
- "gpu": -1,
316
- "status": "pending",
317
- "elapsed": 0,
318
- "idle_time": 0,
319
- "exit_code": -1,
320
- "retries": 0,
321
- "log_file": ""
322
  },
323
  {
324
  "job_id": 26,
325
  "name": "as_sorl_abs100_K1_500K",
326
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 100 --K 1 --n",
327
- "gpu": -1,
328
- "status": "pending",
329
- "elapsed": 0,
330
- "idle_time": 0,
331
- "exit_code": -1,
332
- "retries": 0,
333
- "log_file": ""
334
  },
335
  {
336
  "job_id": 27,
@@ -339,7 +339,7 @@
339
  "gpu": 0,
340
  "status": "failed",
341
  "elapsed": 6,
342
- "idle_time": 5,
343
  "exit_code": -9,
344
  "retries": 1,
345
  "log_file": "/tmp/gpu_queue/job_027_as_sorl_abs5_K1_25K_gpu0.log"
@@ -348,109 +348,109 @@
348
  "job_id": 28,
349
  "name": "as_sorl_abs30_K1_25K",
350
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 25000 --abs_vocab 30 --K 1 --num",
351
- "gpu": -1,
352
- "status": "pending",
353
- "elapsed": 0,
354
- "idle_time": 0,
355
- "exit_code": -1,
356
- "retries": 0,
357
- "log_file": ""
358
  },
359
  {
360
  "job_id": 29,
361
  "name": "as_sorl_abs50_K1_25K",
362
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 25000 --abs_vocab 50 --K 1 --num",
363
- "gpu": -1,
364
- "status": "pending",
365
- "elapsed": 0,
366
- "idle_time": 0,
367
  "exit_code": -1,
368
  "retries": 0,
369
- "log_file": ""
370
  },
371
  {
372
  "job_id": 30,
373
  "name": "as_sorl_abs5_K1_50K",
374
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 50000 --abs_vocab 5 --K 1 --num_",
375
- "gpu": -1,
376
- "status": "pending",
377
- "elapsed": 0,
378
  "idle_time": 0,
379
- "exit_code": -1,
380
- "retries": 0,
381
- "log_file": ""
382
  },
383
  {
384
  "job_id": 31,
385
  "name": "as_sorl_abs30_K1_50K",
386
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 50000 --abs_vocab 30 --K 1 --num",
387
- "gpu": -1,
388
- "status": "pending",
389
- "elapsed": 0,
390
- "idle_time": 0,
391
- "exit_code": -1,
392
- "retries": 0,
393
- "log_file": ""
394
  },
395
  {
396
  "job_id": 32,
397
  "name": "as_sorl_abs50_K1_50K",
398
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 50000 --abs_vocab 50 --K 1 --num",
399
- "gpu": -1,
400
- "status": "pending",
401
- "elapsed": 0,
402
- "idle_time": 0,
403
- "exit_code": -1,
404
- "retries": 0,
405
- "log_file": ""
406
  },
407
  {
408
  "job_id": 33,
409
  "name": "as_sorl_abs5_K1_100K",
410
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 5 --K 1 --num",
411
- "gpu": -1,
412
- "status": "pending",
413
- "elapsed": 0,
414
- "idle_time": 0,
415
- "exit_code": -1,
416
- "retries": 0,
417
- "log_file": ""
418
  },
419
  {
420
  "job_id": 34,
421
  "name": "as_sorl_abs30_K1_100K",
422
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 30 --K 1 --nu",
423
- "gpu": -1,
424
- "status": "pending",
425
- "elapsed": 0,
426
- "idle_time": 0,
427
- "exit_code": -1,
428
- "retries": 0,
429
- "log_file": ""
430
  },
431
  {
432
  "job_id": 35,
433
  "name": "as_sorl_abs50_K1_100K",
434
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 50 --K 1 --nu",
435
- "gpu": -1,
436
- "status": "pending",
437
- "elapsed": 0,
438
- "idle_time": 0,
439
- "exit_code": -1,
440
- "retries": 0,
441
- "log_file": ""
442
  },
443
  {
444
  "job_id": 36,
445
  "name": "as_sorl_abs5_K4_25K",
446
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 25000 --abs_vocab 5 --K 4 --num_",
447
- "gpu": -1,
448
- "status": "pending",
449
- "elapsed": 0,
450
  "idle_time": 0,
451
- "exit_code": -1,
452
- "retries": 0,
453
- "log_file": ""
454
  },
455
  {
456
  "job_id": 37,
@@ -459,7 +459,7 @@
459
  "gpu": 0,
460
  "status": "done",
461
  "elapsed": 4510,
462
- "idle_time": 15293,
463
  "exit_code": 0,
464
  "retries": 0,
465
  "log_file": "/tmp/gpu_queue/job_037_as_sorl_abs10_K4_25K_gpu0.log"
@@ -468,25 +468,25 @@
468
  "job_id": 38,
469
  "name": "as_sorl_abs30_K4_25K",
470
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 25000 --abs_vocab 30 --K 4 --num",
471
- "gpu": -1,
472
- "status": "pending",
473
- "elapsed": 0,
474
- "idle_time": 0,
475
- "exit_code": -1,
476
- "retries": 0,
477
- "log_file": ""
478
  },
479
  {
480
  "job_id": 39,
481
  "name": "as_sorl_abs50_K4_25K",
482
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 25000 --abs_vocab 50 --K 4 --num",
483
- "gpu": -1,
484
- "status": "pending",
485
- "elapsed": 0,
486
- "idle_time": 0,
487
- "exit_code": -1,
488
- "retries": 0,
489
- "log_file": ""
490
  },
491
  {
492
  "job_id": 40,
@@ -507,7 +507,7 @@
507
  "gpu": 2,
508
  "status": "failed",
509
  "elapsed": 6,
510
- "idle_time": 5,
511
  "exit_code": -9,
512
  "retries": 1,
513
  "log_file": "/tmp/gpu_queue/job_041_as_sorl_abs10_K4_50K_gpu2.log"
@@ -531,7 +531,7 @@
531
  "gpu": 0,
532
  "status": "done",
533
  "elapsed": 5510,
534
- "idle_time": 9779,
535
  "exit_code": 0,
536
  "retries": 0,
537
  "log_file": "/tmp/gpu_queue/job_043_as_sorl_abs50_K4_50K_gpu0.log"
@@ -540,49 +540,49 @@
540
  "job_id": 44,
541
  "name": "as_sorl_abs5_K4_100K",
542
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 5 --K 4 --num",
543
- "gpu": -1,
544
- "status": "pending",
545
- "elapsed": 0,
546
- "idle_time": 0,
547
- "exit_code": -1,
548
- "retries": 0,
549
- "log_file": ""
550
  },
551
  {
552
  "job_id": 45,
553
  "name": "as_sorl_abs10_K4_100K",
554
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 10 --K 4 --nu",
555
  "gpu": 0,
556
- "status": "running",
557
- "elapsed": 1,
558
- "idle_time": 1,
559
- "exit_code": -1,
560
- "retries": 0,
561
  "log_file": "/tmp/gpu_queue/job_045_as_sorl_abs10_K4_100K_gpu0.log"
562
  },
563
  {
564
  "job_id": 46,
565
  "name": "as_sorl_abs30_K4_100K",
566
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 30 --K 4 --nu",
567
- "gpu": -1,
568
- "status": "pending",
569
- "elapsed": 0,
570
- "idle_time": 0,
571
- "exit_code": -1,
572
- "retries": 0,
573
- "log_file": ""
574
  },
575
  {
576
  "job_id": 47,
577
  "name": "as_sorl_abs50_K4_100K",
578
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 50 --K 4 --nu",
579
- "gpu": -1,
580
- "status": "pending",
581
- "elapsed": 0,
582
- "idle_time": 0,
583
- "exit_code": -1,
584
- "retries": 0,
585
- "log_file": ""
586
  },
587
  {
588
  "job_id": 48,
@@ -615,7 +615,7 @@
615
  "gpu": 2,
616
  "status": "failed",
617
  "elapsed": 6,
618
- "idle_time": 11,
619
  "exit_code": -9,
620
  "retries": 1,
621
  "log_file": "/tmp/gpu_queue/job_050_as_sorl_abs10_K1_zipf10.0_500K_gpu2.log"
@@ -636,13 +636,13 @@
636
  "job_id": 52,
637
  "name": "as_sorl_abs10_K4_zipf5.0_500K",
638
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 10 --K 4 --al",
639
- "gpu": -1,
640
- "status": "pending",
641
- "elapsed": 0,
642
- "idle_time": 0,
643
- "exit_code": -1,
644
- "retries": 0,
645
- "log_file": ""
646
  },
647
  {
648
  "job_id": 53,
@@ -651,7 +651,7 @@
651
  "gpu": 1,
652
  "status": "failed",
653
  "elapsed": 5,
654
- "idle_time": 13457,
655
  "exit_code": -9,
656
  "retries": 1,
657
  "log_file": "/tmp/gpu_queue/job_053_as_sorl_abs10_K4_zipf10.0_500K_gpu1.log"
@@ -660,13 +660,13 @@
660
  "job_id": 54,
661
  "name": "as_sorl_abs100_K1_zipf2.0_500K",
662
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 100 --K 1 --a",
663
- "gpu": -1,
664
- "status": "pending",
665
- "elapsed": 0,
666
- "idle_time": 0,
667
- "exit_code": -1,
668
- "retries": 0,
669
- "log_file": ""
670
  },
671
  {
672
  "job_id": 55,
@@ -675,7 +675,7 @@
675
  "gpu": 0,
676
  "status": "failed",
677
  "elapsed": 5,
678
- "idle_time": 6157,
679
  "exit_code": -9,
680
  "retries": 1,
681
  "log_file": "/tmp/gpu_queue/job_055_as_sorl_abs100_K1_zipf5.0_500K_gpu0.log"
@@ -696,49 +696,49 @@
696
  "job_id": 57,
697
  "name": "as_sorl_abs100_K4_zipf2.0_500K",
698
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 100 --K 4 --a",
699
- "gpu": -1,
700
- "status": "pending",
701
- "elapsed": 0,
702
- "idle_time": 0,
703
- "exit_code": -1,
704
- "retries": 0,
705
- "log_file": ""
706
  },
707
  {
708
  "job_id": 58,
709
  "name": "as_sorl_abs100_K4_zipf5.0_500K",
710
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 100 --K 4 --a",
711
  "gpu": 1,
712
- "status": "running",
713
- "elapsed": 1,
714
- "idle_time": 1,
715
- "exit_code": -1,
716
- "retries": 0,
717
  "log_file": "/tmp/gpu_queue/job_058_as_sorl_abs100_K4_zipf5.0_500K_gpu1.log"
718
  },
719
  {
720
  "job_id": 59,
721
  "name": "as_sorl_abs100_K4_zipf10.0_500K",
722
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 100 --K 4 --a",
723
- "gpu": -1,
724
- "status": "pending",
725
- "elapsed": 0,
726
- "idle_time": 0,
727
- "exit_code": -1,
728
- "retries": 0,
729
- "log_file": ""
730
  },
731
  {
732
  "job_id": 60,
733
  "name": "as_baseline_25K_1L3H510d",
734
  "cmd": "python -m arithmetic.train --mode baseline --ops add_sub --dataset_size 25000 --num_epochs 20 --n_la",
735
- "gpu": -1,
736
- "status": "pending",
737
- "elapsed": 0,
738
- "idle_time": 0,
739
- "exit_code": -1,
740
- "retries": 0,
741
- "log_file": ""
742
  },
743
  {
744
  "job_id": 61,
@@ -759,7 +759,7 @@
759
  "gpu": 0,
760
  "status": "failed",
761
  "elapsed": 5,
762
- "idle_time": 0,
763
  "exit_code": -9,
764
  "retries": 1,
765
  "log_file": "/tmp/gpu_queue/job_062_as_baseline_50K_1L3H510d_gpu0.log"
@@ -771,7 +771,7 @@
771
  "gpu": 0,
772
  "status": "done",
773
  "elapsed": 5352,
774
- "idle_time": 6171,
775
  "exit_code": 0,
776
  "retries": 0,
777
  "log_file": "/tmp/gpu_queue/job_063_as_sorl_abs10_K1_50K_1L3H510d_gpu0.log"
@@ -783,7 +783,7 @@
783
  "gpu": 1,
784
  "status": "failed",
785
  "elapsed": 4,
786
- "idle_time": 21,
787
  "exit_code": -15,
788
  "retries": 1,
789
  "log_file": "/tmp/gpu_queue/job_064_as_baseline_100K_1L3H510d_gpu1.log"
@@ -795,7 +795,7 @@
795
  "gpu": 0,
796
  "status": "failed",
797
  "elapsed": 6,
798
- "idle_time": 12,
799
  "exit_code": -9,
800
  "retries": 1,
801
  "log_file": "/tmp/gpu_queue/job_065_as_sorl_abs10_K1_100K_1L3H510d_gpu0.log"
@@ -804,37 +804,37 @@
804
  "job_id": 66,
805
  "name": "as_baseline_250K_1L3H510d",
806
  "cmd": "python -m arithmetic.train --mode baseline --ops add_sub --dataset_size 250000 --num_epochs 20 --n_l",
807
- "gpu": -1,
808
- "status": "pending",
809
- "elapsed": 0,
810
- "idle_time": 0,
811
- "exit_code": -1,
812
- "retries": 0,
813
- "log_file": ""
814
  },
815
  {
816
  "job_id": 67,
817
  "name": "as_sorl_abs10_K1_250K_1L3H510d",
818
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 250000 --abs_vocab 10 --K 1 --nu",
819
- "gpu": -1,
820
- "status": "pending",
821
- "elapsed": 0,
822
- "idle_time": 0,
823
- "exit_code": -1,
824
- "retries": 0,
825
- "log_file": ""
826
  },
827
  {
828
  "job_id": 68,
829
  "name": "as_baseline_500K_1L3H510d",
830
  "cmd": "python -m arithmetic.train --mode baseline --ops add_sub --dataset_size 500000 --num_epochs 20 --n_l",
831
- "gpu": -1,
832
- "status": "pending",
833
- "elapsed": 0,
834
- "idle_time": 0,
835
- "exit_code": -1,
836
- "retries": 0,
837
- "log_file": ""
838
  },
839
  {
840
  "job_id": 69,
@@ -843,7 +843,7 @@
843
  "gpu": 2,
844
  "status": "failed",
845
  "elapsed": 2,
846
- "idle_time": 19,
847
  "exit_code": -15,
848
  "retries": 1,
849
  "log_file": "/tmp/gpu_queue/job_069_as_sorl_abs10_K1_500K_1L3H510d_gpu2.log"
@@ -855,7 +855,7 @@
855
  "gpu": 1,
856
  "status": "failed",
857
  "elapsed": 6,
858
- "idle_time": 11,
859
  "exit_code": -9,
860
  "retries": 1,
861
  "log_file": "/tmp/gpu_queue/job_070_as_baseline_25K_1L2H256d_gpu1.log"
@@ -864,13 +864,13 @@
864
  "job_id": 71,
865
  "name": "as_sorl_abs10_K1_25K_1L2H256d",
866
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 25000 --abs_vocab 10 --K 1 --num",
867
- "gpu": -1,
868
- "status": "pending",
869
- "elapsed": 0,
870
- "idle_time": 0,
871
- "exit_code": -1,
872
- "retries": 0,
873
- "log_file": ""
874
  },
875
  {
876
  "job_id": 72,
@@ -903,7 +903,7 @@
903
  "gpu": 2,
904
  "status": "done",
905
  "elapsed": 2797,
906
- "idle_time": 14622,
907
  "exit_code": 0,
908
  "retries": 0,
909
  "log_file": "/tmp/gpu_queue/job_074_as_baseline_100K_1L2H256d_gpu2.log"
@@ -915,7 +915,7 @@
915
  "gpu": 1,
916
  "status": "failed",
917
  "elapsed": 6,
918
- "idle_time": 5,
919
  "exit_code": -9,
920
  "retries": 1,
921
  "log_file": "/tmp/gpu_queue/job_075_as_sorl_abs10_K1_100K_1L2H256d_gpu1.log"
@@ -927,7 +927,7 @@
927
  "gpu": 0,
928
  "status": "done",
929
  "elapsed": 4832,
930
- "idle_time": 1467,
931
  "exit_code": 0,
932
  "retries": 0,
933
  "log_file": "/tmp/gpu_queue/job_076_as_baseline_250K_1L2H256d_gpu0.log"
@@ -951,7 +951,7 @@
951
  "gpu": 2,
952
  "status": "done",
953
  "elapsed": 7064,
954
- "idle_time": 11995,
955
  "exit_code": 0,
956
  "retries": 0,
957
  "log_file": "/tmp/gpu_queue/job_078_as_baseline_500K_1L2H256d_gpu2.log"
@@ -960,13 +960,13 @@
960
  "job_id": 79,
961
  "name": "as_sorl_abs10_K1_500K_1L2H256d",
962
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 10 --K 1 --nu",
963
- "gpu": -1,
964
- "status": "pending",
965
- "elapsed": 0,
966
- "idle_time": 0,
967
- "exit_code": -1,
968
- "retries": 0,
969
- "log_file": ""
970
  },
971
  {
972
  "job_id": 80,
@@ -975,7 +975,7 @@
975
  "gpu": 1,
976
  "status": "done",
977
  "elapsed": 1971,
978
- "idle_time": 13473,
979
  "exit_code": 0,
980
  "retries": 0,
981
  "log_file": "/tmp/gpu_queue/job_080_as_baseline_25K_2L1H128d_gpu1.log"
@@ -984,25 +984,25 @@
984
  "job_id": 81,
985
  "name": "as_sorl_abs10_K1_25K_2L1H128d",
986
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 25000 --abs_vocab 10 --K 1 --num",
987
- "gpu": -1,
988
- "status": "pending",
989
- "elapsed": 0,
990
- "idle_time": 0,
991
- "exit_code": -1,
992
- "retries": 0,
993
- "log_file": ""
994
  },
995
  {
996
  "job_id": 82,
997
  "name": "as_baseline_50K_2L1H128d",
998
  "cmd": "python -m arithmetic.train --mode baseline --ops add_sub --dataset_size 50000 --num_epochs 20 --n_la",
999
- "gpu": -1,
1000
- "status": "pending",
1001
- "elapsed": 0,
1002
- "idle_time": 0,
1003
- "exit_code": -1,
1004
- "retries": 0,
1005
- "log_file": ""
1006
  },
1007
  {
1008
  "job_id": 83,
@@ -1020,37 +1020,37 @@
1020
  "job_id": 84,
1021
  "name": "as_baseline_100K_2L1H128d",
1022
  "cmd": "python -m arithmetic.train --mode baseline --ops add_sub --dataset_size 100000 --num_epochs 20 --n_l",
1023
- "gpu": -1,
1024
- "status": "pending",
1025
- "elapsed": 0,
1026
- "idle_time": 0,
1027
- "exit_code": -1,
1028
- "retries": 0,
1029
- "log_file": ""
1030
  },
1031
  {
1032
  "job_id": 85,
1033
  "name": "as_sorl_abs10_K1_100K_2L1H128d",
1034
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 10 --K 1 --nu",
1035
- "gpu": -1,
1036
- "status": "pending",
1037
- "elapsed": 0,
1038
- "idle_time": 0,
1039
- "exit_code": -1,
1040
- "retries": 0,
1041
- "log_file": ""
1042
  },
1043
  {
1044
  "job_id": 86,
1045
  "name": "as_baseline_250K_2L1H128d",
1046
  "cmd": "python -m arithmetic.train --mode baseline --ops add_sub --dataset_size 250000 --num_epochs 20 --n_l",
1047
- "gpu": -1,
1048
- "status": "pending",
1049
- "elapsed": 0,
1050
- "idle_time": 0,
1051
- "exit_code": -1,
1052
- "retries": 0,
1053
- "log_file": ""
1054
  },
1055
  {
1056
  "job_id": 87,
@@ -1059,7 +1059,7 @@
1059
  "gpu": 1,
1060
  "status": "failed",
1061
  "elapsed": 5,
1062
- "idle_time": 27,
1063
  "exit_code": -9,
1064
  "retries": 1,
1065
  "log_file": "/tmp/gpu_queue/job_087_as_sorl_abs10_K1_250K_2L1H128d_gpu1.log"
@@ -1083,7 +1083,7 @@
1083
  "gpu": 1,
1084
  "status": "failed",
1085
  "elapsed": 5,
1086
- "idle_time": 26,
1087
  "exit_code": -9,
1088
  "retries": 1,
1089
  "log_file": "/tmp/gpu_queue/job_089_as_sorl_abs10_K1_500K_2L1H128d_gpu1.log"
@@ -1104,13 +1104,13 @@
1104
  "job_id": 91,
1105
  "name": "as_sorl_abs10_K4_10K",
1106
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 10000 --abs_vocab 10 --K 4 --num",
1107
- "gpu": -1,
1108
- "status": "pending",
1109
- "elapsed": 0,
1110
- "idle_time": 0,
1111
- "exit_code": -1,
1112
- "retries": 0,
1113
- "log_file": ""
1114
  },
1115
  {
1116
  "job_id": 92,
@@ -1119,7 +1119,7 @@
1119
  "gpu": 0,
1120
  "status": "done",
1121
  "elapsed": 3477,
1122
- "idle_time": 6302,
1123
  "exit_code": 0,
1124
  "retries": 0,
1125
  "log_file": "/tmp/gpu_queue/job_092_as_sorl_abs30_K4_10K_gpu0.log"
@@ -1128,25 +1128,25 @@
1128
  "job_id": 93,
1129
  "name": "as_sorl_abs50_K4_10K",
1130
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 10000 --abs_vocab 50 --K 4 --num",
1131
- "gpu": -1,
1132
- "status": "pending",
1133
- "elapsed": 0,
1134
- "idle_time": 0,
1135
- "exit_code": -1,
1136
- "retries": 0,
1137
- "log_file": ""
1138
  },
1139
  {
1140
  "job_id": 94,
1141
  "name": "as_sorl_abs5_K4_250K",
1142
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 250000 --abs_vocab 5 --K 4 --num",
1143
- "gpu": -1,
1144
- "status": "pending",
1145
- "elapsed": 0,
1146
- "idle_time": 0,
1147
- "exit_code": -1,
1148
- "retries": 0,
1149
- "log_file": ""
1150
  },
1151
  {
1152
  "job_id": 95,
@@ -1155,7 +1155,7 @@
1155
  "gpu": 2,
1156
  "status": "failed",
1157
  "elapsed": 5,
1158
- "idle_time": 23,
1159
  "exit_code": -9,
1160
  "retries": 1,
1161
  "log_file": "/tmp/gpu_queue/job_095_as_sorl_abs10_K4_250K_gpu2.log"
@@ -1164,25 +1164,25 @@
1164
  "job_id": 96,
1165
  "name": "as_sorl_abs30_K4_250K",
1166
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 250000 --abs_vocab 30 --K 4 --nu",
1167
- "gpu": -1,
1168
- "status": "pending",
1169
- "elapsed": 0,
1170
- "idle_time": 0,
1171
- "exit_code": -1,
1172
- "retries": 0,
1173
- "log_file": ""
1174
  },
1175
  {
1176
  "job_id": 97,
1177
  "name": "as_sorl_abs50_K4_250K",
1178
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 250000 --abs_vocab 50 --K 4 --nu",
1179
- "gpu": -1,
1180
- "status": "pending",
1181
- "elapsed": 0,
1182
- "idle_time": 0,
1183
- "exit_code": -1,
1184
- "retries": 0,
1185
- "log_file": ""
1186
  },
1187
  {
1188
  "job_id": 98,
@@ -1191,7 +1191,7 @@
1191
  "gpu": 1,
1192
  "status": "failed",
1193
  "elapsed": 3,
1194
- "idle_time": 20,
1195
  "exit_code": -15,
1196
  "retries": 1,
1197
  "log_file": "/tmp/gpu_queue/job_098_as_sorl_abs2_K1_100K_gpu1.log"
@@ -1203,7 +1203,7 @@
1203
  "gpu": 1,
1204
  "status": "failed",
1205
  "elapsed": 5,
1206
- "idle_time": 0,
1207
  "exit_code": -9,
1208
  "retries": 1,
1209
  "log_file": "/tmp/gpu_queue/job_099_as_sorl_abs2_K4_100K_gpu1.log"
@@ -1212,13 +1212,13 @@
1212
  "job_id": 100,
1213
  "name": "as_sorl_abs16_K1_100K",
1214
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 16 --K 1 --nu",
1215
- "gpu": -1,
1216
- "status": "pending",
1217
- "elapsed": 0,
1218
- "idle_time": 0,
1219
- "exit_code": -1,
1220
- "retries": 0,
1221
- "log_file": ""
1222
  },
1223
  {
1224
  "job_id": 101,
 
1
  {
2
+ "timestamp": "2026-04-12 08:09:30",
3
  "total": 120,
4
+ "pending": 35,
5
+ "running": 4,
6
  "done": 15,
7
+ "failed": 66,
8
  "stale": 0,
9
  "retrying": 0,
10
  "jobs": [
 
15
  "gpu": 0,
16
  "status": "done",
17
  "elapsed": 1786,
18
+ "idle_time": 20508,
19
  "exit_code": 0,
20
  "retries": 0,
21
  "log_file": "/tmp/gpu_queue/job_000_add_sub_baseline_10K_gpu0.log"
 
27
  "gpu": 1,
28
  "status": "done",
29
  "elapsed": 2101,
30
+ "idle_time": 20193,
31
  "exit_code": 0,
32
  "retries": 0,
33
  "log_file": "/tmp/gpu_queue/job_001_add_sub_baseline_25K_gpu1.log"
 
39
  "gpu": 2,
40
  "status": "done",
41
  "elapsed": 4753,
42
+ "idle_time": 17541,
43
  "exit_code": 0,
44
  "retries": 0,
45
  "log_file": "/tmp/gpu_queue/job_002_as_sorl_abs10_K1_25K_gpu2.log"
 
51
  "gpu": 0,
52
  "status": "done",
53
  "elapsed": 2366,
54
+ "idle_time": 19928,
55
  "exit_code": 0,
56
  "retries": 0,
57
  "log_file": "/tmp/gpu_queue/job_003_add_sub_baseline_50K_gpu0.log"
 
63
  "gpu": 1,
64
  "status": "done",
65
  "elapsed": 6727,
66
+ "idle_time": 15567,
67
  "exit_code": 0,
68
  "retries": 0,
69
  "log_file": "/tmp/gpu_queue/job_004_as_sorl_abs10_K1_50K_gpu1.log"
 
75
  "gpu": 2,
76
  "status": "done",
77
  "elapsed": 3112,
78
+ "idle_time": 19181,
79
  "exit_code": 0,
80
  "retries": 0,
81
  "log_file": "/tmp/gpu_queue/job_005_add_sub_baseline_100K_gpu2.log"
 
87
  "gpu": 0,
88
  "status": "done",
89
  "elapsed": 8856,
90
+ "idle_time": 11648,
91
  "exit_code": 0,
92
  "retries": 0,
93
  "log_file": "/tmp/gpu_queue/job_006_as_sorl_abs10_K1_100K_gpu0.log"
 
120
  "job_id": 9,
121
  "name": "add_sub_baseline_500K",
122
  "cmd": "python -m arithmetic.train --mode baseline --ops add_sub --dataset_size 500000 --num_epochs 20 --pus",
123
+ "gpu": 1,
124
+ "status": "failed",
125
+ "elapsed": 6,
126
+ "idle_time": 15,
127
+ "exit_code": -9,
128
+ "retries": 1,
129
+ "log_file": "/tmp/gpu_queue/job_009_add_sub_baseline_500K_gpu1.log"
130
  },
131
  {
132
  "job_id": 10,
133
  "name": "as_sorl_abs10_K1_10K",
134
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 10000 --abs_vocab 10 --K 1 --num",
135
+ "gpu": 2,
136
+ "status": "failed",
137
+ "elapsed": 6,
138
+ "idle_time": 65,
139
+ "exit_code": -9,
140
+ "retries": 1,
141
+ "log_file": "/tmp/gpu_queue/job_010_as_sorl_abs10_K1_10K_gpu2.log"
142
  },
143
  {
144
  "job_id": 11,
 
147
  "gpu": 2,
148
  "status": "failed",
149
  "elapsed": 5,
150
+ "idle_time": 120,
151
  "exit_code": -9,
152
  "retries": 1,
153
  "log_file": "/tmp/gpu_queue/job_011_as_sorl_abs2_K4_500K_gpu2.log"
 
156
  "job_id": 12,
157
  "name": "as_sorl_abs5_K4_500K",
158
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 5 --K 4 --num",
159
+ "gpu": 1,
160
+ "status": "failed",
161
+ "elapsed": 5,
162
+ "idle_time": 80,
163
+ "exit_code": -9,
164
+ "retries": 1,
165
+ "log_file": "/tmp/gpu_queue/job_012_as_sorl_abs5_K4_500K_gpu1.log"
166
  },
167
  {
168
  "job_id": 13,
 
171
  "gpu": 0,
172
  "status": "failed",
173
  "elapsed": 5,
174
+ "idle_time": 1154,
175
  "exit_code": -9,
176
  "retries": 1,
177
  "log_file": "/tmp/gpu_queue/job_013_as_sorl_abs10_K4_500K_gpu0.log"
 
180
  "job_id": 14,
181
  "name": "as_sorl_abs16_K4_500K",
182
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 16 --K 4 --nu",
183
+ "gpu": 0,
184
+ "status": "failed",
185
+ "elapsed": 6,
186
+ "idle_time": 45,
187
+ "exit_code": -9,
188
+ "retries": 1,
189
+ "log_file": "/tmp/gpu_queue/job_014_as_sorl_abs16_K4_500K_gpu0.log"
190
  },
191
  {
192
  "job_id": 15,
193
  "name": "as_sorl_abs20_K4_500K",
194
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 20 --K 4 --nu",
195
+ "gpu": 2,
196
+ "status": "failed",
197
+ "elapsed": 6,
198
+ "idle_time": 15,
199
+ "exit_code": -9,
200
+ "retries": 1,
201
+ "log_file": "/tmp/gpu_queue/job_015_as_sorl_abs20_K4_500K_gpu2.log"
202
  },
203
  {
204
  "job_id": 16,
205
  "name": "as_sorl_abs50_K4_500K",
206
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 50 --K 4 --nu",
207
+ "gpu": 1,
208
+ "status": "failed",
209
+ "elapsed": 6,
210
+ "idle_time": 50,
211
+ "exit_code": -9,
212
+ "retries": 1,
213
+ "log_file": "/tmp/gpu_queue/job_016_as_sorl_abs50_K4_500K_gpu1.log"
214
  },
215
  {
216
  "job_id": 17,
217
  "name": "as_sorl_abs70_K4_500K",
218
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 70 --K 4 --nu",
219
+ "gpu": 2,
220
+ "status": "failed",
221
+ "elapsed": 6,
222
+ "idle_time": 50,
223
+ "exit_code": -9,
224
+ "retries": 1,
225
+ "log_file": "/tmp/gpu_queue/job_017_as_sorl_abs70_K4_500K_gpu2.log"
226
  },
227
  {
228
  "job_id": 18,
229
  "name": "as_sorl_abs100_K4_500K",
230
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 100 --K 4 --n",
231
  "gpu": 2,
232
+ "status": "failed",
233
+ "elapsed": 5,
234
+ "idle_time": 110,
235
+ "exit_code": -9,
236
+ "retries": 1,
237
  "log_file": "/tmp/gpu_queue/job_018_as_sorl_abs100_K4_500K_gpu2.log"
238
  },
239
  {
240
  "job_id": 19,
241
  "name": "as_sorl_abs2_K1_500K",
242
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 2 --K 1 --num",
243
+ "gpu": 1,
244
+ "status": "running",
245
+ "elapsed": 1,
246
+ "idle_time": 1,
247
  "exit_code": -1,
248
+ "retries": 1,
249
+ "log_file": "/tmp/gpu_queue/job_019_as_sorl_abs2_K1_500K_gpu1.log"
250
  },
251
  {
252
  "job_id": 20,
253
  "name": "as_sorl_abs5_K1_500K",
254
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 5 --K 1 --num",
255
+ "gpu": 1,
256
+ "status": "running",
257
+ "elapsed": 1,
258
+ "idle_time": 1,
259
  "exit_code": -1,
260
+ "retries": 1,
261
+ "log_file": "/tmp/gpu_queue/job_020_as_sorl_abs5_K1_500K_gpu1.log"
262
  },
263
  {
264
  "job_id": 21,
265
  "name": "as_sorl_abs10_K1_500K",
266
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 10 --K 1 --nu",
267
+ "gpu": 0,
268
+ "status": "failed",
269
+ "elapsed": 6,
270
+ "idle_time": 59,
271
+ "exit_code": -9,
272
+ "retries": 1,
273
+ "log_file": "/tmp/gpu_queue/job_021_as_sorl_abs10_K1_500K_gpu0.log"
274
  },
275
  {
276
  "job_id": 22,
277
  "name": "as_sorl_abs16_K1_500K",
278
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 16 --K 1 --nu",
279
+ "gpu": 0,
280
+ "status": "running",
281
+ "elapsed": 1,
282
+ "idle_time": 1,
283
  "exit_code": -1,
284
+ "retries": 1,
285
+ "log_file": "/tmp/gpu_queue/job_022_as_sorl_abs16_K1_500K_gpu0.log"
286
  },
287
  {
288
  "job_id": 23,
 
300
  "job_id": 24,
301
  "name": "as_sorl_abs50_K1_500K",
302
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 50 --K 1 --nu",
303
+ "gpu": 1,
304
+ "status": "failed",
305
+ "elapsed": 4,
306
+ "idle_time": 41,
307
+ "exit_code": -9,
308
+ "retries": 1,
309
+ "log_file": "/tmp/gpu_queue/job_024_as_sorl_abs50_K1_500K_gpu1.log"
310
  },
311
  {
312
  "job_id": 25,
313
  "name": "as_sorl_abs70_K1_500K",
314
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 70 --K 1 --nu",
315
+ "gpu": 2,
316
+ "status": "failed",
317
+ "elapsed": 5,
318
+ "idle_time": 50,
319
+ "exit_code": -9,
320
+ "retries": 1,
321
+ "log_file": "/tmp/gpu_queue/job_025_as_sorl_abs70_K1_500K_gpu2.log"
322
  },
323
  {
324
  "job_id": 26,
325
  "name": "as_sorl_abs100_K1_500K",
326
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 100 --K 1 --n",
327
+ "gpu": 1,
328
+ "status": "failed",
329
+ "elapsed": 6,
330
+ "idle_time": 25,
331
+ "exit_code": -9,
332
+ "retries": 1,
333
+ "log_file": "/tmp/gpu_queue/job_026_as_sorl_abs100_K1_500K_gpu1.log"
334
  },
335
  {
336
  "job_id": 27,
 
339
  "gpu": 0,
340
  "status": "failed",
341
  "elapsed": 6,
342
+ "idle_time": 125,
343
  "exit_code": -9,
344
  "retries": 1,
345
  "log_file": "/tmp/gpu_queue/job_027_as_sorl_abs5_K1_25K_gpu0.log"
 
348
  "job_id": 28,
349
  "name": "as_sorl_abs30_K1_25K",
350
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 25000 --abs_vocab 30 --K 1 --num",
351
+ "gpu": 1,
352
+ "status": "failed",
353
+ "elapsed": 5,
354
+ "idle_time": 59,
355
+ "exit_code": -9,
356
+ "retries": 1,
357
+ "log_file": "/tmp/gpu_queue/job_028_as_sorl_abs30_K1_25K_gpu1.log"
358
  },
359
  {
360
  "job_id": 29,
361
  "name": "as_sorl_abs50_K1_25K",
362
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 25000 --abs_vocab 50 --K 1 --num",
363
+ "gpu": 0,
364
+ "status": "running",
365
+ "elapsed": 1,
366
+ "idle_time": 1,
367
  "exit_code": -1,
368
  "retries": 0,
369
+ "log_file": "/tmp/gpu_queue/job_029_as_sorl_abs50_K1_25K_gpu0.log"
370
  },
371
  {
372
  "job_id": 30,
373
  "name": "as_sorl_abs5_K1_50K",
374
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 50000 --abs_vocab 5 --K 1 --num_",
375
+ "gpu": 2,
376
+ "status": "failed",
377
+ "elapsed": 6,
378
  "idle_time": 0,
379
+ "exit_code": -9,
380
+ "retries": 1,
381
+ "log_file": "/tmp/gpu_queue/job_030_as_sorl_abs5_K1_50K_gpu2.log"
382
  },
383
  {
384
  "job_id": 31,
385
  "name": "as_sorl_abs30_K1_50K",
386
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 50000 --abs_vocab 30 --K 1 --num",
387
+ "gpu": 2,
388
+ "status": "failed",
389
+ "elapsed": 5,
390
+ "idle_time": 105,
391
+ "exit_code": -9,
392
+ "retries": 1,
393
+ "log_file": "/tmp/gpu_queue/job_031_as_sorl_abs30_K1_50K_gpu2.log"
394
  },
395
  {
396
  "job_id": 32,
397
  "name": "as_sorl_abs50_K1_50K",
398
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 50000 --abs_vocab 50 --K 1 --num",
399
+ "gpu": 0,
400
+ "status": "failed",
401
+ "elapsed": 6,
402
+ "idle_time": 25,
403
+ "exit_code": -9,
404
+ "retries": 1,
405
+ "log_file": "/tmp/gpu_queue/job_032_as_sorl_abs50_K1_50K_gpu0.log"
406
  },
407
  {
408
  "job_id": 33,
409
  "name": "as_sorl_abs5_K1_100K",
410
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 5 --K 1 --num",
411
+ "gpu": 2,
412
+ "status": "failed",
413
+ "elapsed": 7,
414
+ "idle_time": 64,
415
+ "exit_code": -9,
416
+ "retries": 1,
417
+ "log_file": "/tmp/gpu_queue/job_033_as_sorl_abs5_K1_100K_gpu2.log"
418
  },
419
  {
420
  "job_id": 34,
421
  "name": "as_sorl_abs30_K1_100K",
422
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 30 --K 1 --nu",
423
+ "gpu": 0,
424
+ "status": "failed",
425
+ "elapsed": 5,
426
+ "idle_time": 89,
427
+ "exit_code": -9,
428
+ "retries": 1,
429
+ "log_file": "/tmp/gpu_queue/job_034_as_sorl_abs30_K1_100K_gpu0.log"
430
  },
431
  {
432
  "job_id": 35,
433
  "name": "as_sorl_abs50_K1_100K",
434
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 50 --K 1 --nu",
435
+ "gpu": 1,
436
+ "status": "failed",
437
+ "elapsed": 5,
438
+ "idle_time": 105,
439
+ "exit_code": -9,
440
+ "retries": 1,
441
+ "log_file": "/tmp/gpu_queue/job_035_as_sorl_abs50_K1_100K_gpu1.log"
442
  },
443
  {
444
  "job_id": 36,
445
  "name": "as_sorl_abs5_K4_25K",
446
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 25000 --abs_vocab 5 --K 4 --num_",
447
+ "gpu": 2,
448
+ "status": "failed",
449
+ "elapsed": 5,
450
  "idle_time": 0,
451
+ "exit_code": -9,
452
+ "retries": 1,
453
+ "log_file": "/tmp/gpu_queue/job_036_as_sorl_abs5_K4_25K_gpu2.log"
454
  },
455
  {
456
  "job_id": 37,
 
459
  "gpu": 0,
460
  "status": "done",
461
  "elapsed": 4510,
462
+ "idle_time": 15413,
463
  "exit_code": 0,
464
  "retries": 0,
465
  "log_file": "/tmp/gpu_queue/job_037_as_sorl_abs10_K4_25K_gpu0.log"
 
468
  "job_id": 38,
469
  "name": "as_sorl_abs30_K4_25K",
470
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 25000 --abs_vocab 30 --K 4 --num",
471
+ "gpu": 1,
472
+ "status": "failed",
473
+ "elapsed": 6,
474
+ "idle_time": 89,
475
+ "exit_code": -9,
476
+ "retries": 1,
477
+ "log_file": "/tmp/gpu_queue/job_038_as_sorl_abs30_K4_25K_gpu1.log"
478
  },
479
  {
480
  "job_id": 39,
481
  "name": "as_sorl_abs50_K4_25K",
482
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 25000 --abs_vocab 50 --K 4 --num",
483
+ "gpu": 2,
484
+ "status": "failed",
485
+ "elapsed": 4,
486
+ "idle_time": 41,
487
+ "exit_code": -9,
488
+ "retries": 1,
489
+ "log_file": "/tmp/gpu_queue/job_039_as_sorl_abs50_K4_25K_gpu2.log"
490
  },
491
  {
492
  "job_id": 40,
 
507
  "gpu": 2,
508
  "status": "failed",
509
  "elapsed": 6,
510
+ "idle_time": 125,
511
  "exit_code": -9,
512
  "retries": 1,
513
  "log_file": "/tmp/gpu_queue/job_041_as_sorl_abs10_K4_50K_gpu2.log"
 
531
  "gpu": 0,
532
  "status": "done",
533
  "elapsed": 5510,
534
+ "idle_time": 9899,
535
  "exit_code": 0,
536
  "retries": 0,
537
  "log_file": "/tmp/gpu_queue/job_043_as_sorl_abs50_K4_50K_gpu0.log"
 
540
  "job_id": 44,
541
  "name": "as_sorl_abs5_K4_100K",
542
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 5 --K 4 --num",
543
+ "gpu": 2,
544
+ "status": "failed",
545
+ "elapsed": 6,
546
+ "idle_time": 31,
547
+ "exit_code": -9,
548
+ "retries": 1,
549
+ "log_file": "/tmp/gpu_queue/job_044_as_sorl_abs5_K4_100K_gpu2.log"
550
  },
551
  {
552
  "job_id": 45,
553
  "name": "as_sorl_abs10_K4_100K",
554
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 10 --K 4 --nu",
555
  "gpu": 0,
556
+ "status": "failed",
557
+ "elapsed": 6,
558
+ "idle_time": 110,
559
+ "exit_code": -9,
560
+ "retries": 1,
561
  "log_file": "/tmp/gpu_queue/job_045_as_sorl_abs10_K4_100K_gpu0.log"
562
  },
563
  {
564
  "job_id": 46,
565
  "name": "as_sorl_abs30_K4_100K",
566
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 30 --K 4 --nu",
567
+ "gpu": 2,
568
+ "status": "failed",
569
+ "elapsed": 6,
570
+ "idle_time": 95,
571
+ "exit_code": -9,
572
+ "retries": 1,
573
+ "log_file": "/tmp/gpu_queue/job_046_as_sorl_abs30_K4_100K_gpu2.log"
574
  },
575
  {
576
  "job_id": 47,
577
  "name": "as_sorl_abs50_K4_100K",
578
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 50 --K 4 --nu",
579
+ "gpu": 0,
580
+ "status": "failed",
581
+ "elapsed": 6,
582
+ "idle_time": 75,
583
+ "exit_code": -9,
584
+ "retries": 1,
585
+ "log_file": "/tmp/gpu_queue/job_047_as_sorl_abs50_K4_100K_gpu0.log"
586
  },
587
  {
588
  "job_id": 48,
 
615
  "gpu": 2,
616
  "status": "failed",
617
  "elapsed": 6,
618
+ "idle_time": 131,
619
  "exit_code": -9,
620
  "retries": 1,
621
  "log_file": "/tmp/gpu_queue/job_050_as_sorl_abs10_K1_zipf10.0_500K_gpu2.log"
 
636
  "job_id": 52,
637
  "name": "as_sorl_abs10_K4_zipf5.0_500K",
638
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 10 --K 4 --al",
639
+ "gpu": 0,
640
+ "status": "failed",
641
+ "elapsed": 5,
642
+ "idle_time": 90,
643
+ "exit_code": -9,
644
+ "retries": 1,
645
+ "log_file": "/tmp/gpu_queue/job_052_as_sorl_abs10_K4_zipf5.0_500K_gpu0.log"
646
  },
647
  {
648
  "job_id": 53,
 
651
  "gpu": 1,
652
  "status": "failed",
653
  "elapsed": 5,
654
+ "idle_time": 13577,
655
  "exit_code": -9,
656
  "retries": 1,
657
  "log_file": "/tmp/gpu_queue/job_053_as_sorl_abs10_K4_zipf10.0_500K_gpu1.log"
 
660
  "job_id": 54,
661
  "name": "as_sorl_abs100_K1_zipf2.0_500K",
662
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 100 --K 1 --a",
663
+ "gpu": 0,
664
+ "status": "failed",
665
+ "elapsed": 6,
666
+ "idle_time": 75,
667
+ "exit_code": -9,
668
+ "retries": 1,
669
+ "log_file": "/tmp/gpu_queue/job_054_as_sorl_abs100_K1_zipf2.0_500K_gpu0.log"
670
  },
671
  {
672
  "job_id": 55,
 
675
  "gpu": 0,
676
  "status": "failed",
677
  "elapsed": 5,
678
+ "idle_time": 6277,
679
  "exit_code": -9,
680
  "retries": 1,
681
  "log_file": "/tmp/gpu_queue/job_055_as_sorl_abs100_K1_zipf5.0_500K_gpu0.log"
 
696
  "job_id": 57,
697
  "name": "as_sorl_abs100_K4_zipf2.0_500K",
698
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 100 --K 4 --a",
699
+ "gpu": 2,
700
+ "status": "failed",
701
+ "elapsed": 5,
702
+ "idle_time": 80,
703
+ "exit_code": -9,
704
+ "retries": 1,
705
+ "log_file": "/tmp/gpu_queue/job_057_as_sorl_abs100_K4_zipf2.0_500K_gpu2.log"
706
  },
707
  {
708
  "job_id": 58,
709
  "name": "as_sorl_abs100_K4_zipf5.0_500K",
710
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 100 --K 4 --a",
711
  "gpu": 1,
712
+ "status": "failed",
713
+ "elapsed": 6,
714
+ "idle_time": 110,
715
+ "exit_code": -9,
716
+ "retries": 1,
717
  "log_file": "/tmp/gpu_queue/job_058_as_sorl_abs100_K4_zipf5.0_500K_gpu1.log"
718
  },
719
  {
720
  "job_id": 59,
721
  "name": "as_sorl_abs100_K4_zipf10.0_500K",
722
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 100 --K 4 --a",
723
+ "gpu": 1,
724
+ "status": "failed",
725
+ "elapsed": 6,
726
+ "idle_time": 74,
727
+ "exit_code": -9,
728
+ "retries": 1,
729
+ "log_file": "/tmp/gpu_queue/job_059_as_sorl_abs100_K4_zipf10.0_500_gpu1.log"
730
  },
731
  {
732
  "job_id": 60,
733
  "name": "as_baseline_25K_1L3H510d",
734
  "cmd": "python -m arithmetic.train --mode baseline --ops add_sub --dataset_size 25000 --num_epochs 20 --n_la",
735
+ "gpu": 2,
736
+ "status": "failed",
737
+ "elapsed": 5,
738
+ "idle_time": 25,
739
+ "exit_code": -9,
740
+ "retries": 1,
741
+ "log_file": "/tmp/gpu_queue/job_060_as_baseline_25K_1L3H510d_gpu2.log"
742
  },
743
  {
744
  "job_id": 61,
 
759
  "gpu": 0,
760
  "status": "failed",
761
  "elapsed": 5,
762
+ "idle_time": 120,
763
  "exit_code": -9,
764
  "retries": 1,
765
  "log_file": "/tmp/gpu_queue/job_062_as_baseline_50K_1L3H510d_gpu0.log"
 
771
  "gpu": 0,
772
  "status": "done",
773
  "elapsed": 5352,
774
+ "idle_time": 6291,
775
  "exit_code": 0,
776
  "retries": 0,
777
  "log_file": "/tmp/gpu_queue/job_063_as_sorl_abs10_K1_50K_1L3H510d_gpu0.log"
 
783
  "gpu": 1,
784
  "status": "failed",
785
  "elapsed": 4,
786
+ "idle_time": 141,
787
  "exit_code": -15,
788
  "retries": 1,
789
  "log_file": "/tmp/gpu_queue/job_064_as_baseline_100K_1L3H510d_gpu1.log"
 
795
  "gpu": 0,
796
  "status": "failed",
797
  "elapsed": 6,
798
+ "idle_time": 132,
799
  "exit_code": -9,
800
  "retries": 1,
801
  "log_file": "/tmp/gpu_queue/job_065_as_sorl_abs10_K1_100K_1L3H510d_gpu0.log"
 
804
  "job_id": 66,
805
  "name": "as_baseline_250K_1L3H510d",
806
  "cmd": "python -m arithmetic.train --mode baseline --ops add_sub --dataset_size 250000 --num_epochs 20 --n_l",
807
+ "gpu": 0,
808
+ "status": "failed",
809
+ "elapsed": 5,
810
+ "idle_time": 105,
811
+ "exit_code": -9,
812
+ "retries": 1,
813
+ "log_file": "/tmp/gpu_queue/job_066_as_baseline_250K_1L3H510d_gpu0.log"
814
  },
815
  {
816
  "job_id": 67,
817
  "name": "as_sorl_abs10_K1_250K_1L3H510d",
818
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 250000 --abs_vocab 10 --K 1 --nu",
819
+ "gpu": 1,
820
+ "status": "failed",
821
+ "elapsed": 6,
822
+ "idle_time": 30,
823
+ "exit_code": -9,
824
+ "retries": 1,
825
+ "log_file": "/tmp/gpu_queue/job_067_as_sorl_abs10_K1_250K_1L3H510d_gpu1.log"
826
  },
827
  {
828
  "job_id": 68,
829
  "name": "as_baseline_500K_1L3H510d",
830
  "cmd": "python -m arithmetic.train --mode baseline --ops add_sub --dataset_size 500000 --num_epochs 20 --n_l",
831
+ "gpu": 1,
832
+ "status": "failed",
833
+ "elapsed": 6,
834
+ "idle_time": 10,
835
+ "exit_code": -9,
836
+ "retries": 1,
837
+ "log_file": "/tmp/gpu_queue/job_068_as_baseline_500K_1L3H510d_gpu1.log"
838
  },
839
  {
840
  "job_id": 69,
 
843
  "gpu": 2,
844
  "status": "failed",
845
  "elapsed": 2,
846
+ "idle_time": 139,
847
  "exit_code": -15,
848
  "retries": 1,
849
  "log_file": "/tmp/gpu_queue/job_069_as_sorl_abs10_K1_500K_1L3H510d_gpu2.log"
 
855
  "gpu": 1,
856
  "status": "failed",
857
  "elapsed": 6,
858
+ "idle_time": 131,
859
  "exit_code": -9,
860
  "retries": 1,
861
  "log_file": "/tmp/gpu_queue/job_070_as_baseline_25K_1L2H256d_gpu1.log"
 
864
  "job_id": 71,
865
  "name": "as_sorl_abs10_K1_25K_1L2H256d",
866
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 25000 --abs_vocab 10 --K 1 --num",
867
+ "gpu": 0,
868
+ "status": "failed",
869
+ "elapsed": 6,
870
+ "idle_time": 95,
871
+ "exit_code": -9,
872
+ "retries": 1,
873
+ "log_file": "/tmp/gpu_queue/job_071_as_sorl_abs10_K1_25K_1L2H256d_gpu0.log"
874
  },
875
  {
876
  "job_id": 72,
 
903
  "gpu": 2,
904
  "status": "done",
905
  "elapsed": 2797,
906
+ "idle_time": 14742,
907
  "exit_code": 0,
908
  "retries": 0,
909
  "log_file": "/tmp/gpu_queue/job_074_as_baseline_100K_1L2H256d_gpu2.log"
 
915
  "gpu": 1,
916
  "status": "failed",
917
  "elapsed": 6,
918
+ "idle_time": 125,
919
  "exit_code": -9,
920
  "retries": 1,
921
  "log_file": "/tmp/gpu_queue/job_075_as_sorl_abs10_K1_100K_1L2H256d_gpu1.log"
 
927
  "gpu": 0,
928
  "status": "done",
929
  "elapsed": 4832,
930
+ "idle_time": 1587,
931
  "exit_code": 0,
932
  "retries": 0,
933
  "log_file": "/tmp/gpu_queue/job_076_as_baseline_250K_1L2H256d_gpu0.log"
 
951
  "gpu": 2,
952
  "status": "done",
953
  "elapsed": 7064,
954
+ "idle_time": 12115,
955
  "exit_code": 0,
956
  "retries": 0,
957
  "log_file": "/tmp/gpu_queue/job_078_as_baseline_500K_1L2H256d_gpu2.log"
 
960
  "job_id": 79,
961
  "name": "as_sorl_abs10_K1_500K_1L2H256d",
962
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 10 --K 1 --nu",
963
+ "gpu": 2,
964
+ "status": "failed",
965
+ "elapsed": 4,
966
+ "idle_time": 41,
967
+ "exit_code": -9,
968
+ "retries": 1,
969
+ "log_file": "/tmp/gpu_queue/job_079_as_sorl_abs10_K1_500K_1L2H256d_gpu2.log"
970
  },
971
  {
972
  "job_id": 80,
 
975
  "gpu": 1,
976
  "status": "done",
977
  "elapsed": 1971,
978
+ "idle_time": 13593,
979
  "exit_code": 0,
980
  "retries": 0,
981
  "log_file": "/tmp/gpu_queue/job_080_as_baseline_25K_2L1H128d_gpu1.log"
 
984
  "job_id": 81,
985
  "name": "as_sorl_abs10_K1_25K_2L1H128d",
986
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 25000 --abs_vocab 10 --K 1 --num",
987
+ "gpu": 2,
988
+ "status": "failed",
989
+ "elapsed": 6,
990
+ "idle_time": 80,
991
+ "exit_code": -9,
992
+ "retries": 1,
993
+ "log_file": "/tmp/gpu_queue/job_081_as_sorl_abs10_K1_25K_2L1H128d_gpu2.log"
994
  },
995
  {
996
  "job_id": 82,
997
  "name": "as_baseline_50K_2L1H128d",
998
  "cmd": "python -m arithmetic.train --mode baseline --ops add_sub --dataset_size 50000 --num_epochs 20 --n_la",
999
+ "gpu": 0,
1000
+ "status": "failed",
1001
+ "elapsed": 5,
1002
+ "idle_time": 60,
1003
+ "exit_code": -9,
1004
+ "retries": 1,
1005
+ "log_file": "/tmp/gpu_queue/job_082_as_baseline_50K_2L1H128d_gpu0.log"
1006
  },
1007
  {
1008
  "job_id": 83,
 
1020
  "job_id": 84,
1021
  "name": "as_baseline_100K_2L1H128d",
1022
  "cmd": "python -m arithmetic.train --mode baseline --ops add_sub --dataset_size 100000 --num_epochs 20 --n_l",
1023
+ "gpu": 0,
1024
+ "status": "failed",
1025
+ "elapsed": 6,
1026
+ "idle_time": 31,
1027
+ "exit_code": -9,
1028
+ "retries": 1,
1029
+ "log_file": "/tmp/gpu_queue/job_084_as_baseline_100K_2L1H128d_gpu0.log"
1030
  },
1031
  {
1032
  "job_id": 85,
1033
  "name": "as_sorl_abs10_K1_100K_2L1H128d",
1034
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 10 --K 1 --nu",
1035
+ "gpu": 0,
1036
+ "status": "failed",
1037
+ "elapsed": 6,
1038
+ "idle_time": 10,
1039
+ "exit_code": -9,
1040
+ "retries": 1,
1041
+ "log_file": "/tmp/gpu_queue/job_085_as_sorl_abs10_K1_100K_2L1H128d_gpu0.log"
1042
  },
1043
  {
1044
  "job_id": 86,
1045
  "name": "as_baseline_250K_2L1H128d",
1046
  "cmd": "python -m arithmetic.train --mode baseline --ops add_sub --dataset_size 250000 --num_epochs 20 --n_l",
1047
+ "gpu": 1,
1048
+ "status": "failed",
1049
+ "elapsed": 6,
1050
+ "idle_time": 45,
1051
+ "exit_code": -9,
1052
+ "retries": 1,
1053
+ "log_file": "/tmp/gpu_queue/job_086_as_baseline_250K_2L1H128d_gpu1.log"
1054
  },
1055
  {
1056
  "job_id": 87,
 
1059
  "gpu": 1,
1060
  "status": "failed",
1061
  "elapsed": 5,
1062
+ "idle_time": 147,
1063
  "exit_code": -9,
1064
  "retries": 1,
1065
  "log_file": "/tmp/gpu_queue/job_087_as_sorl_abs10_K1_250K_2L1H128d_gpu1.log"
 
1083
  "gpu": 1,
1084
  "status": "failed",
1085
  "elapsed": 5,
1086
+ "idle_time": 146,
1087
  "exit_code": -9,
1088
  "retries": 1,
1089
  "log_file": "/tmp/gpu_queue/job_089_as_sorl_abs10_K1_500K_2L1H128d_gpu1.log"
 
1104
  "job_id": 91,
1105
  "name": "as_sorl_abs10_K4_10K",
1106
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 10000 --abs_vocab 10 --K 4 --num",
1107
+ "gpu": 0,
1108
+ "status": "failed",
1109
+ "elapsed": 5,
1110
+ "idle_time": 45,
1111
+ "exit_code": -9,
1112
+ "retries": 1,
1113
+ "log_file": "/tmp/gpu_queue/job_091_as_sorl_abs10_K4_10K_gpu0.log"
1114
  },
1115
  {
1116
  "job_id": 92,
 
1119
  "gpu": 0,
1120
  "status": "done",
1121
  "elapsed": 3477,
1122
+ "idle_time": 6422,
1123
  "exit_code": 0,
1124
  "retries": 0,
1125
  "log_file": "/tmp/gpu_queue/job_092_as_sorl_abs30_K4_10K_gpu0.log"
 
1128
  "job_id": 93,
1129
  "name": "as_sorl_abs50_K4_10K",
1130
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 10000 --abs_vocab 50 --K 4 --num",
1131
+ "gpu": 1,
1132
+ "status": "failed",
1133
+ "elapsed": 6,
1134
+ "idle_time": 65,
1135
+ "exit_code": -9,
1136
+ "retries": 1,
1137
+ "log_file": "/tmp/gpu_queue/job_093_as_sorl_abs50_K4_10K_gpu1.log"
1138
  },
1139
  {
1140
  "job_id": 94,
1141
  "name": "as_sorl_abs5_K4_250K",
1142
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 250000 --abs_vocab 5 --K 4 --num",
1143
+ "gpu": 0,
1144
+ "status": "failed",
1145
+ "elapsed": 6,
1146
+ "idle_time": 5,
1147
+ "exit_code": -9,
1148
+ "retries": 1,
1149
+ "log_file": "/tmp/gpu_queue/job_094_as_sorl_abs5_K4_250K_gpu0.log"
1150
  },
1151
  {
1152
  "job_id": 95,
 
1155
  "gpu": 2,
1156
  "status": "failed",
1157
  "elapsed": 5,
1158
+ "idle_time": 143,
1159
  "exit_code": -9,
1160
  "retries": 1,
1161
  "log_file": "/tmp/gpu_queue/job_095_as_sorl_abs10_K4_250K_gpu2.log"
 
1164
  "job_id": 96,
1165
  "name": "as_sorl_abs30_K4_250K",
1166
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 250000 --abs_vocab 30 --K 4 --nu",
1167
+ "gpu": 1,
1168
+ "status": "failed",
1169
+ "elapsed": 6,
1170
+ "idle_time": 10,
1171
+ "exit_code": -9,
1172
+ "retries": 1,
1173
+ "log_file": "/tmp/gpu_queue/job_096_as_sorl_abs30_K4_250K_gpu1.log"
1174
  },
1175
  {
1176
  "job_id": 97,
1177
  "name": "as_sorl_abs50_K4_250K",
1178
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 250000 --abs_vocab 50 --K 4 --nu",
1179
+ "gpu": 1,
1180
+ "status": "failed",
1181
+ "elapsed": 6,
1182
+ "idle_time": 95,
1183
+ "exit_code": -9,
1184
+ "retries": 1,
1185
+ "log_file": "/tmp/gpu_queue/job_097_as_sorl_abs50_K4_250K_gpu1.log"
1186
  },
1187
  {
1188
  "job_id": 98,
 
1191
  "gpu": 1,
1192
  "status": "failed",
1193
  "elapsed": 3,
1194
+ "idle_time": 140,
1195
  "exit_code": -15,
1196
  "retries": 1,
1197
  "log_file": "/tmp/gpu_queue/job_098_as_sorl_abs2_K1_100K_gpu1.log"
 
1203
  "gpu": 1,
1204
  "status": "failed",
1205
  "elapsed": 5,
1206
+ "idle_time": 120,
1207
  "exit_code": -9,
1208
  "retries": 1,
1209
  "log_file": "/tmp/gpu_queue/job_099_as_sorl_abs2_K4_100K_gpu1.log"
 
1212
  "job_id": 100,
1213
  "name": "as_sorl_abs16_K1_100K",
1214
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 16 --K 1 --nu",
1215
+ "gpu": 0,
1216
+ "status": "failed",
1217
+ "elapsed": 6,
1218
+ "idle_time": 19,
1219
+ "exit_code": -9,
1220
+ "retries": 1,
1221
+ "log_file": "/tmp/gpu_queue/job_100_as_sorl_abs16_K1_100K_gpu0.log"
1222
  },
1223
  {
1224
  "job_id": 101,