amirali1985 commited on
Commit
97a1a05
·
verified ·
1 Parent(s): cc646a8

queue status update

Browse files
Files changed (1) hide show
  1. queue_status.json +344 -344
queue_status.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "timestamp": "2026-04-12 08:09:30",
3
  "total": 120,
4
- "pending": 35,
5
- "running": 4,
6
  "done": 15,
7
- "failed": 66,
8
  "stale": 0,
9
  "retrying": 0,
10
  "jobs": [
@@ -15,7 +15,7 @@
15
  "gpu": 0,
16
  "status": "done",
17
  "elapsed": 1786,
18
- "idle_time": 20508,
19
  "exit_code": 0,
20
  "retries": 0,
21
  "log_file": "/tmp/gpu_queue/job_000_add_sub_baseline_10K_gpu0.log"
@@ -27,7 +27,7 @@
27
  "gpu": 1,
28
  "status": "done",
29
  "elapsed": 2101,
30
- "idle_time": 20193,
31
  "exit_code": 0,
32
  "retries": 0,
33
  "log_file": "/tmp/gpu_queue/job_001_add_sub_baseline_25K_gpu1.log"
@@ -39,7 +39,7 @@
39
  "gpu": 2,
40
  "status": "done",
41
  "elapsed": 4753,
42
- "idle_time": 17541,
43
  "exit_code": 0,
44
  "retries": 0,
45
  "log_file": "/tmp/gpu_queue/job_002_as_sorl_abs10_K1_25K_gpu2.log"
@@ -51,7 +51,7 @@
51
  "gpu": 0,
52
  "status": "done",
53
  "elapsed": 2366,
54
- "idle_time": 19928,
55
  "exit_code": 0,
56
  "retries": 0,
57
  "log_file": "/tmp/gpu_queue/job_003_add_sub_baseline_50K_gpu0.log"
@@ -63,7 +63,7 @@
63
  "gpu": 1,
64
  "status": "done",
65
  "elapsed": 6727,
66
- "idle_time": 15567,
67
  "exit_code": 0,
68
  "retries": 0,
69
  "log_file": "/tmp/gpu_queue/job_004_as_sorl_abs10_K1_50K_gpu1.log"
@@ -75,7 +75,7 @@
75
  "gpu": 2,
76
  "status": "done",
77
  "elapsed": 3112,
78
- "idle_time": 19181,
79
  "exit_code": 0,
80
  "retries": 0,
81
  "log_file": "/tmp/gpu_queue/job_005_add_sub_baseline_100K_gpu2.log"
@@ -87,7 +87,7 @@
87
  "gpu": 0,
88
  "status": "done",
89
  "elapsed": 8856,
90
- "idle_time": 11648,
91
  "exit_code": 0,
92
  "retries": 0,
93
  "log_file": "/tmp/gpu_queue/job_006_as_sorl_abs10_K1_100K_gpu0.log"
@@ -96,25 +96,25 @@
96
  "job_id": 7,
97
  "name": "add_sub_baseline_250K",
98
  "cmd": "python -m arithmetic.train --mode baseline --ops add_sub --dataset_size 250000 --num_epochs 20 --pus",
99
- "gpu": -1,
100
- "status": "pending",
101
- "elapsed": 0,
102
- "idle_time": 0,
103
- "exit_code": -1,
104
- "retries": 0,
105
- "log_file": ""
106
  },
107
  {
108
  "job_id": 8,
109
  "name": "as_sorl_abs10_K1_250K",
110
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 250000 --abs_vocab 10 --K 1 --nu",
111
- "gpu": -1,
112
- "status": "pending",
113
- "elapsed": 0,
114
- "idle_time": 0,
115
- "exit_code": -1,
116
- "retries": 0,
117
- "log_file": ""
118
  },
119
  {
120
  "job_id": 9,
@@ -123,7 +123,7 @@
123
  "gpu": 1,
124
  "status": "failed",
125
  "elapsed": 6,
126
- "idle_time": 15,
127
  "exit_code": -9,
128
  "retries": 1,
129
  "log_file": "/tmp/gpu_queue/job_009_add_sub_baseline_500K_gpu1.log"
@@ -135,7 +135,7 @@
135
  "gpu": 2,
136
  "status": "failed",
137
  "elapsed": 6,
138
- "idle_time": 65,
139
  "exit_code": -9,
140
  "retries": 1,
141
  "log_file": "/tmp/gpu_queue/job_010_as_sorl_abs10_K1_10K_gpu2.log"
@@ -147,7 +147,7 @@
147
  "gpu": 2,
148
  "status": "failed",
149
  "elapsed": 5,
150
- "idle_time": 120,
151
  "exit_code": -9,
152
  "retries": 1,
153
  "log_file": "/tmp/gpu_queue/job_011_as_sorl_abs2_K4_500K_gpu2.log"
@@ -159,7 +159,7 @@
159
  "gpu": 1,
160
  "status": "failed",
161
  "elapsed": 5,
162
- "idle_time": 80,
163
  "exit_code": -9,
164
  "retries": 1,
165
  "log_file": "/tmp/gpu_queue/job_012_as_sorl_abs5_K4_500K_gpu1.log"
@@ -171,7 +171,7 @@
171
  "gpu": 0,
172
  "status": "failed",
173
  "elapsed": 5,
174
- "idle_time": 1154,
175
  "exit_code": -9,
176
  "retries": 1,
177
  "log_file": "/tmp/gpu_queue/job_013_as_sorl_abs10_K4_500K_gpu0.log"
@@ -183,7 +183,7 @@
183
  "gpu": 0,
184
  "status": "failed",
185
  "elapsed": 6,
186
- "idle_time": 45,
187
  "exit_code": -9,
188
  "retries": 1,
189
  "log_file": "/tmp/gpu_queue/job_014_as_sorl_abs16_K4_500K_gpu0.log"
@@ -195,7 +195,7 @@
195
  "gpu": 2,
196
  "status": "failed",
197
  "elapsed": 6,
198
- "idle_time": 15,
199
  "exit_code": -9,
200
  "retries": 1,
201
  "log_file": "/tmp/gpu_queue/job_015_as_sorl_abs20_K4_500K_gpu2.log"
@@ -207,7 +207,7 @@
207
  "gpu": 1,
208
  "status": "failed",
209
  "elapsed": 6,
210
- "idle_time": 50,
211
  "exit_code": -9,
212
  "retries": 1,
213
  "log_file": "/tmp/gpu_queue/job_016_as_sorl_abs50_K4_500K_gpu1.log"
@@ -219,7 +219,7 @@
219
  "gpu": 2,
220
  "status": "failed",
221
  "elapsed": 6,
222
- "idle_time": 50,
223
  "exit_code": -9,
224
  "retries": 1,
225
  "log_file": "/tmp/gpu_queue/job_017_as_sorl_abs70_K4_500K_gpu2.log"
@@ -231,7 +231,7 @@
231
  "gpu": 2,
232
  "status": "failed",
233
  "elapsed": 5,
234
- "idle_time": 110,
235
  "exit_code": -9,
236
  "retries": 1,
237
  "log_file": "/tmp/gpu_queue/job_018_as_sorl_abs100_K4_500K_gpu2.log"
@@ -241,10 +241,10 @@
241
  "name": "as_sorl_abs2_K1_500K",
242
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 2 --K 1 --num",
243
  "gpu": 1,
244
- "status": "running",
245
- "elapsed": 1,
246
- "idle_time": 1,
247
- "exit_code": -1,
248
  "retries": 1,
249
  "log_file": "/tmp/gpu_queue/job_019_as_sorl_abs2_K1_500K_gpu1.log"
250
  },
@@ -253,10 +253,10 @@
253
  "name": "as_sorl_abs5_K1_500K",
254
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 5 --K 1 --num",
255
  "gpu": 1,
256
- "status": "running",
257
- "elapsed": 1,
258
- "idle_time": 1,
259
- "exit_code": -1,
260
  "retries": 1,
261
  "log_file": "/tmp/gpu_queue/job_020_as_sorl_abs5_K1_500K_gpu1.log"
262
  },
@@ -267,7 +267,7 @@
267
  "gpu": 0,
268
  "status": "failed",
269
  "elapsed": 6,
270
- "idle_time": 59,
271
  "exit_code": -9,
272
  "retries": 1,
273
  "log_file": "/tmp/gpu_queue/job_021_as_sorl_abs10_K1_500K_gpu0.log"
@@ -277,10 +277,10 @@
277
  "name": "as_sorl_abs16_K1_500K",
278
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 16 --K 1 --nu",
279
  "gpu": 0,
280
- "status": "running",
281
- "elapsed": 1,
282
- "idle_time": 1,
283
- "exit_code": -1,
284
  "retries": 1,
285
  "log_file": "/tmp/gpu_queue/job_022_as_sorl_abs16_K1_500K_gpu0.log"
286
  },
@@ -288,13 +288,13 @@
288
  "job_id": 23,
289
  "name": "as_sorl_abs20_K1_500K",
290
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 20 --K 1 --nu",
291
- "gpu": -1,
292
- "status": "pending",
293
- "elapsed": 0,
294
- "idle_time": 0,
295
- "exit_code": -1,
296
- "retries": 0,
297
- "log_file": ""
298
  },
299
  {
300
  "job_id": 24,
@@ -303,7 +303,7 @@
303
  "gpu": 1,
304
  "status": "failed",
305
  "elapsed": 4,
306
- "idle_time": 41,
307
  "exit_code": -9,
308
  "retries": 1,
309
  "log_file": "/tmp/gpu_queue/job_024_as_sorl_abs50_K1_500K_gpu1.log"
@@ -315,7 +315,7 @@
315
  "gpu": 2,
316
  "status": "failed",
317
  "elapsed": 5,
318
- "idle_time": 50,
319
  "exit_code": -9,
320
  "retries": 1,
321
  "log_file": "/tmp/gpu_queue/job_025_as_sorl_abs70_K1_500K_gpu2.log"
@@ -327,7 +327,7 @@
327
  "gpu": 1,
328
  "status": "failed",
329
  "elapsed": 6,
330
- "idle_time": 25,
331
  "exit_code": -9,
332
  "retries": 1,
333
  "log_file": "/tmp/gpu_queue/job_026_as_sorl_abs100_K1_500K_gpu1.log"
@@ -339,7 +339,7 @@
339
  "gpu": 0,
340
  "status": "failed",
341
  "elapsed": 6,
342
- "idle_time": 125,
343
  "exit_code": -9,
344
  "retries": 1,
345
  "log_file": "/tmp/gpu_queue/job_027_as_sorl_abs5_K1_25K_gpu0.log"
@@ -351,7 +351,7 @@
351
  "gpu": 1,
352
  "status": "failed",
353
  "elapsed": 5,
354
- "idle_time": 59,
355
  "exit_code": -9,
356
  "retries": 1,
357
  "log_file": "/tmp/gpu_queue/job_028_as_sorl_abs30_K1_25K_gpu1.log"
@@ -361,11 +361,11 @@
361
  "name": "as_sorl_abs50_K1_25K",
362
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 25000 --abs_vocab 50 --K 1 --num",
363
  "gpu": 0,
364
- "status": "running",
365
- "elapsed": 1,
366
- "idle_time": 1,
367
- "exit_code": -1,
368
- "retries": 0,
369
  "log_file": "/tmp/gpu_queue/job_029_as_sorl_abs50_K1_25K_gpu0.log"
370
  },
371
  {
@@ -375,7 +375,7 @@
375
  "gpu": 2,
376
  "status": "failed",
377
  "elapsed": 6,
378
- "idle_time": 0,
379
  "exit_code": -9,
380
  "retries": 1,
381
  "log_file": "/tmp/gpu_queue/job_030_as_sorl_abs5_K1_50K_gpu2.log"
@@ -387,7 +387,7 @@
387
  "gpu": 2,
388
  "status": "failed",
389
  "elapsed": 5,
390
- "idle_time": 105,
391
  "exit_code": -9,
392
  "retries": 1,
393
  "log_file": "/tmp/gpu_queue/job_031_as_sorl_abs30_K1_50K_gpu2.log"
@@ -399,7 +399,7 @@
399
  "gpu": 0,
400
  "status": "failed",
401
  "elapsed": 6,
402
- "idle_time": 25,
403
  "exit_code": -9,
404
  "retries": 1,
405
  "log_file": "/tmp/gpu_queue/job_032_as_sorl_abs50_K1_50K_gpu0.log"
@@ -411,7 +411,7 @@
411
  "gpu": 2,
412
  "status": "failed",
413
  "elapsed": 7,
414
- "idle_time": 64,
415
  "exit_code": -9,
416
  "retries": 1,
417
  "log_file": "/tmp/gpu_queue/job_033_as_sorl_abs5_K1_100K_gpu2.log"
@@ -423,7 +423,7 @@
423
  "gpu": 0,
424
  "status": "failed",
425
  "elapsed": 5,
426
- "idle_time": 89,
427
  "exit_code": -9,
428
  "retries": 1,
429
  "log_file": "/tmp/gpu_queue/job_034_as_sorl_abs30_K1_100K_gpu0.log"
@@ -435,7 +435,7 @@
435
  "gpu": 1,
436
  "status": "failed",
437
  "elapsed": 5,
438
- "idle_time": 105,
439
  "exit_code": -9,
440
  "retries": 1,
441
  "log_file": "/tmp/gpu_queue/job_035_as_sorl_abs50_K1_100K_gpu1.log"
@@ -447,7 +447,7 @@
447
  "gpu": 2,
448
  "status": "failed",
449
  "elapsed": 5,
450
- "idle_time": 0,
451
  "exit_code": -9,
452
  "retries": 1,
453
  "log_file": "/tmp/gpu_queue/job_036_as_sorl_abs5_K4_25K_gpu2.log"
@@ -459,7 +459,7 @@
459
  "gpu": 0,
460
  "status": "done",
461
  "elapsed": 4510,
462
- "idle_time": 15413,
463
  "exit_code": 0,
464
  "retries": 0,
465
  "log_file": "/tmp/gpu_queue/job_037_as_sorl_abs10_K4_25K_gpu0.log"
@@ -471,7 +471,7 @@
471
  "gpu": 1,
472
  "status": "failed",
473
  "elapsed": 6,
474
- "idle_time": 89,
475
  "exit_code": -9,
476
  "retries": 1,
477
  "log_file": "/tmp/gpu_queue/job_038_as_sorl_abs30_K4_25K_gpu1.log"
@@ -483,7 +483,7 @@
483
  "gpu": 2,
484
  "status": "failed",
485
  "elapsed": 4,
486
- "idle_time": 41,
487
  "exit_code": -9,
488
  "retries": 1,
489
  "log_file": "/tmp/gpu_queue/job_039_as_sorl_abs50_K4_25K_gpu2.log"
@@ -492,13 +492,13 @@
492
  "job_id": 40,
493
  "name": "as_sorl_abs5_K4_50K",
494
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 50000 --abs_vocab 5 --K 4 --num_",
495
- "gpu": -1,
496
- "status": "pending",
497
- "elapsed": 0,
498
- "idle_time": 0,
499
- "exit_code": -1,
500
- "retries": 0,
501
- "log_file": ""
502
  },
503
  {
504
  "job_id": 41,
@@ -507,7 +507,7 @@
507
  "gpu": 2,
508
  "status": "failed",
509
  "elapsed": 6,
510
- "idle_time": 125,
511
  "exit_code": -9,
512
  "retries": 1,
513
  "log_file": "/tmp/gpu_queue/job_041_as_sorl_abs10_K4_50K_gpu2.log"
@@ -516,13 +516,13 @@
516
  "job_id": 42,
517
  "name": "as_sorl_abs30_K4_50K",
518
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 50000 --abs_vocab 30 --K 4 --num",
519
- "gpu": -1,
520
- "status": "pending",
521
- "elapsed": 0,
522
- "idle_time": 0,
523
- "exit_code": -1,
524
- "retries": 0,
525
- "log_file": ""
526
  },
527
  {
528
  "job_id": 43,
@@ -531,7 +531,7 @@
531
  "gpu": 0,
532
  "status": "done",
533
  "elapsed": 5510,
534
- "idle_time": 9899,
535
  "exit_code": 0,
536
  "retries": 0,
537
  "log_file": "/tmp/gpu_queue/job_043_as_sorl_abs50_K4_50K_gpu0.log"
@@ -543,7 +543,7 @@
543
  "gpu": 2,
544
  "status": "failed",
545
  "elapsed": 6,
546
- "idle_time": 31,
547
  "exit_code": -9,
548
  "retries": 1,
549
  "log_file": "/tmp/gpu_queue/job_044_as_sorl_abs5_K4_100K_gpu2.log"
@@ -555,7 +555,7 @@
555
  "gpu": 0,
556
  "status": "failed",
557
  "elapsed": 6,
558
- "idle_time": 110,
559
  "exit_code": -9,
560
  "retries": 1,
561
  "log_file": "/tmp/gpu_queue/job_045_as_sorl_abs10_K4_100K_gpu0.log"
@@ -567,7 +567,7 @@
567
  "gpu": 2,
568
  "status": "failed",
569
  "elapsed": 6,
570
- "idle_time": 95,
571
  "exit_code": -9,
572
  "retries": 1,
573
  "log_file": "/tmp/gpu_queue/job_046_as_sorl_abs30_K4_100K_gpu2.log"
@@ -579,7 +579,7 @@
579
  "gpu": 0,
580
  "status": "failed",
581
  "elapsed": 6,
582
- "idle_time": 75,
583
  "exit_code": -9,
584
  "retries": 1,
585
  "log_file": "/tmp/gpu_queue/job_047_as_sorl_abs50_K4_100K_gpu0.log"
@@ -588,25 +588,25 @@
588
  "job_id": 48,
589
  "name": "as_sorl_abs10_K1_zipf2.0_500K",
590
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 10 --K 1 --al",
591
- "gpu": -1,
592
- "status": "pending",
593
- "elapsed": 0,
594
- "idle_time": 0,
595
- "exit_code": -1,
596
- "retries": 0,
597
- "log_file": ""
598
  },
599
  {
600
  "job_id": 49,
601
  "name": "as_sorl_abs10_K1_zipf5.0_500K",
602
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 10 --K 1 --al",
603
- "gpu": -1,
604
- "status": "pending",
605
- "elapsed": 0,
606
- "idle_time": 0,
607
- "exit_code": -1,
608
- "retries": 0,
609
- "log_file": ""
610
  },
611
  {
612
  "job_id": 50,
@@ -615,7 +615,7 @@
615
  "gpu": 2,
616
  "status": "failed",
617
  "elapsed": 6,
618
- "idle_time": 131,
619
  "exit_code": -9,
620
  "retries": 1,
621
  "log_file": "/tmp/gpu_queue/job_050_as_sorl_abs10_K1_zipf10.0_500K_gpu2.log"
@@ -624,13 +624,13 @@
624
  "job_id": 51,
625
  "name": "as_sorl_abs10_K4_zipf2.0_500K",
626
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 10 --K 4 --al",
627
- "gpu": -1,
628
- "status": "pending",
629
- "elapsed": 0,
630
- "idle_time": 0,
631
- "exit_code": -1,
632
- "retries": 0,
633
- "log_file": ""
634
  },
635
  {
636
  "job_id": 52,
@@ -639,7 +639,7 @@
639
  "gpu": 0,
640
  "status": "failed",
641
  "elapsed": 5,
642
- "idle_time": 90,
643
  "exit_code": -9,
644
  "retries": 1,
645
  "log_file": "/tmp/gpu_queue/job_052_as_sorl_abs10_K4_zipf5.0_500K_gpu0.log"
@@ -651,7 +651,7 @@
651
  "gpu": 1,
652
  "status": "failed",
653
  "elapsed": 5,
654
- "idle_time": 13577,
655
  "exit_code": -9,
656
  "retries": 1,
657
  "log_file": "/tmp/gpu_queue/job_053_as_sorl_abs10_K4_zipf10.0_500K_gpu1.log"
@@ -663,7 +663,7 @@
663
  "gpu": 0,
664
  "status": "failed",
665
  "elapsed": 6,
666
- "idle_time": 75,
667
  "exit_code": -9,
668
  "retries": 1,
669
  "log_file": "/tmp/gpu_queue/job_054_as_sorl_abs100_K1_zipf2.0_500K_gpu0.log"
@@ -675,7 +675,7 @@
675
  "gpu": 0,
676
  "status": "failed",
677
  "elapsed": 5,
678
- "idle_time": 6277,
679
  "exit_code": -9,
680
  "retries": 1,
681
  "log_file": "/tmp/gpu_queue/job_055_as_sorl_abs100_K1_zipf5.0_500K_gpu0.log"
@@ -684,13 +684,13 @@
684
  "job_id": 56,
685
  "name": "as_sorl_abs100_K1_zipf10.0_500K",
686
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 100 --K 1 --a",
687
- "gpu": -1,
688
- "status": "pending",
689
- "elapsed": 0,
690
- "idle_time": 0,
691
- "exit_code": -1,
692
- "retries": 0,
693
- "log_file": ""
694
  },
695
  {
696
  "job_id": 57,
@@ -699,7 +699,7 @@
699
  "gpu": 2,
700
  "status": "failed",
701
  "elapsed": 5,
702
- "idle_time": 80,
703
  "exit_code": -9,
704
  "retries": 1,
705
  "log_file": "/tmp/gpu_queue/job_057_as_sorl_abs100_K4_zipf2.0_500K_gpu2.log"
@@ -711,7 +711,7 @@
711
  "gpu": 1,
712
  "status": "failed",
713
  "elapsed": 6,
714
- "idle_time": 110,
715
  "exit_code": -9,
716
  "retries": 1,
717
  "log_file": "/tmp/gpu_queue/job_058_as_sorl_abs100_K4_zipf5.0_500K_gpu1.log"
@@ -723,7 +723,7 @@
723
  "gpu": 1,
724
  "status": "failed",
725
  "elapsed": 6,
726
- "idle_time": 74,
727
  "exit_code": -9,
728
  "retries": 1,
729
  "log_file": "/tmp/gpu_queue/job_059_as_sorl_abs100_K4_zipf10.0_500_gpu1.log"
@@ -735,7 +735,7 @@
735
  "gpu": 2,
736
  "status": "failed",
737
  "elapsed": 5,
738
- "idle_time": 25,
739
  "exit_code": -9,
740
  "retries": 1,
741
  "log_file": "/tmp/gpu_queue/job_060_as_baseline_25K_1L3H510d_gpu2.log"
@@ -744,13 +744,13 @@
744
  "job_id": 61,
745
  "name": "as_sorl_abs10_K1_25K_1L3H510d",
746
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 25000 --abs_vocab 10 --K 1 --num",
747
- "gpu": -1,
748
- "status": "pending",
749
- "elapsed": 0,
750
- "idle_time": 0,
751
- "exit_code": -1,
752
- "retries": 0,
753
- "log_file": ""
754
  },
755
  {
756
  "job_id": 62,
@@ -759,7 +759,7 @@
759
  "gpu": 0,
760
  "status": "failed",
761
  "elapsed": 5,
762
- "idle_time": 120,
763
  "exit_code": -9,
764
  "retries": 1,
765
  "log_file": "/tmp/gpu_queue/job_062_as_baseline_50K_1L3H510d_gpu0.log"
@@ -771,7 +771,7 @@
771
  "gpu": 0,
772
  "status": "done",
773
  "elapsed": 5352,
774
- "idle_time": 6291,
775
  "exit_code": 0,
776
  "retries": 0,
777
  "log_file": "/tmp/gpu_queue/job_063_as_sorl_abs10_K1_50K_1L3H510d_gpu0.log"
@@ -783,7 +783,7 @@
783
  "gpu": 1,
784
  "status": "failed",
785
  "elapsed": 4,
786
- "idle_time": 141,
787
  "exit_code": -15,
788
  "retries": 1,
789
  "log_file": "/tmp/gpu_queue/job_064_as_baseline_100K_1L3H510d_gpu1.log"
@@ -795,7 +795,7 @@
795
  "gpu": 0,
796
  "status": "failed",
797
  "elapsed": 6,
798
- "idle_time": 132,
799
  "exit_code": -9,
800
  "retries": 1,
801
  "log_file": "/tmp/gpu_queue/job_065_as_sorl_abs10_K1_100K_1L3H510d_gpu0.log"
@@ -807,7 +807,7 @@
807
  "gpu": 0,
808
  "status": "failed",
809
  "elapsed": 5,
810
- "idle_time": 105,
811
  "exit_code": -9,
812
  "retries": 1,
813
  "log_file": "/tmp/gpu_queue/job_066_as_baseline_250K_1L3H510d_gpu0.log"
@@ -819,7 +819,7 @@
819
  "gpu": 1,
820
  "status": "failed",
821
  "elapsed": 6,
822
- "idle_time": 30,
823
  "exit_code": -9,
824
  "retries": 1,
825
  "log_file": "/tmp/gpu_queue/job_067_as_sorl_abs10_K1_250K_1L3H510d_gpu1.log"
@@ -831,7 +831,7 @@
831
  "gpu": 1,
832
  "status": "failed",
833
  "elapsed": 6,
834
- "idle_time": 10,
835
  "exit_code": -9,
836
  "retries": 1,
837
  "log_file": "/tmp/gpu_queue/job_068_as_baseline_500K_1L3H510d_gpu1.log"
@@ -843,7 +843,7 @@
843
  "gpu": 2,
844
  "status": "failed",
845
  "elapsed": 2,
846
- "idle_time": 139,
847
  "exit_code": -15,
848
  "retries": 1,
849
  "log_file": "/tmp/gpu_queue/job_069_as_sorl_abs10_K1_500K_1L3H510d_gpu2.log"
@@ -855,7 +855,7 @@
855
  "gpu": 1,
856
  "status": "failed",
857
  "elapsed": 6,
858
- "idle_time": 131,
859
  "exit_code": -9,
860
  "retries": 1,
861
  "log_file": "/tmp/gpu_queue/job_070_as_baseline_25K_1L2H256d_gpu1.log"
@@ -867,7 +867,7 @@
867
  "gpu": 0,
868
  "status": "failed",
869
  "elapsed": 6,
870
- "idle_time": 95,
871
  "exit_code": -9,
872
  "retries": 1,
873
  "log_file": "/tmp/gpu_queue/job_071_as_sorl_abs10_K1_25K_1L2H256d_gpu0.log"
@@ -876,25 +876,25 @@
876
  "job_id": 72,
877
  "name": "as_baseline_50K_1L2H256d",
878
  "cmd": "python -m arithmetic.train --mode baseline --ops add_sub --dataset_size 50000 --num_epochs 20 --n_la",
879
- "gpu": -1,
880
- "status": "pending",
881
- "elapsed": 0,
882
- "idle_time": 0,
883
- "exit_code": -1,
884
- "retries": 0,
885
- "log_file": ""
886
  },
887
  {
888
  "job_id": 73,
889
  "name": "as_sorl_abs10_K1_50K_1L2H256d",
890
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 50000 --abs_vocab 10 --K 1 --num",
891
- "gpu": -1,
892
- "status": "pending",
893
- "elapsed": 0,
894
- "idle_time": 0,
895
- "exit_code": -1,
896
- "retries": 0,
897
- "log_file": ""
898
  },
899
  {
900
  "job_id": 74,
@@ -903,7 +903,7 @@
903
  "gpu": 2,
904
  "status": "done",
905
  "elapsed": 2797,
906
- "idle_time": 14742,
907
  "exit_code": 0,
908
  "retries": 0,
909
  "log_file": "/tmp/gpu_queue/job_074_as_baseline_100K_1L2H256d_gpu2.log"
@@ -915,7 +915,7 @@
915
  "gpu": 1,
916
  "status": "failed",
917
  "elapsed": 6,
918
- "idle_time": 125,
919
  "exit_code": -9,
920
  "retries": 1,
921
  "log_file": "/tmp/gpu_queue/job_075_as_sorl_abs10_K1_100K_1L2H256d_gpu1.log"
@@ -927,7 +927,7 @@
927
  "gpu": 0,
928
  "status": "done",
929
  "elapsed": 4832,
930
- "idle_time": 1587,
931
  "exit_code": 0,
932
  "retries": 0,
933
  "log_file": "/tmp/gpu_queue/job_076_as_baseline_250K_1L2H256d_gpu0.log"
@@ -936,13 +936,13 @@
936
  "job_id": 77,
937
  "name": "as_sorl_abs10_K1_250K_1L2H256d",
938
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 250000 --abs_vocab 10 --K 1 --nu",
939
- "gpu": -1,
940
- "status": "pending",
941
- "elapsed": 0,
942
- "idle_time": 0,
943
- "exit_code": -1,
944
- "retries": 0,
945
- "log_file": ""
946
  },
947
  {
948
  "job_id": 78,
@@ -951,7 +951,7 @@
951
  "gpu": 2,
952
  "status": "done",
953
  "elapsed": 7064,
954
- "idle_time": 12115,
955
  "exit_code": 0,
956
  "retries": 0,
957
  "log_file": "/tmp/gpu_queue/job_078_as_baseline_500K_1L2H256d_gpu2.log"
@@ -963,7 +963,7 @@
963
  "gpu": 2,
964
  "status": "failed",
965
  "elapsed": 4,
966
- "idle_time": 41,
967
  "exit_code": -9,
968
  "retries": 1,
969
  "log_file": "/tmp/gpu_queue/job_079_as_sorl_abs10_K1_500K_1L2H256d_gpu2.log"
@@ -975,7 +975,7 @@
975
  "gpu": 1,
976
  "status": "done",
977
  "elapsed": 1971,
978
- "idle_time": 13593,
979
  "exit_code": 0,
980
  "retries": 0,
981
  "log_file": "/tmp/gpu_queue/job_080_as_baseline_25K_2L1H128d_gpu1.log"
@@ -987,7 +987,7 @@
987
  "gpu": 2,
988
  "status": "failed",
989
  "elapsed": 6,
990
- "idle_time": 80,
991
  "exit_code": -9,
992
  "retries": 1,
993
  "log_file": "/tmp/gpu_queue/job_081_as_sorl_abs10_K1_25K_2L1H128d_gpu2.log"
@@ -999,7 +999,7 @@
999
  "gpu": 0,
1000
  "status": "failed",
1001
  "elapsed": 5,
1002
- "idle_time": 60,
1003
  "exit_code": -9,
1004
  "retries": 1,
1005
  "log_file": "/tmp/gpu_queue/job_082_as_baseline_50K_2L1H128d_gpu0.log"
@@ -1008,13 +1008,13 @@
1008
  "job_id": 83,
1009
  "name": "as_sorl_abs10_K1_50K_2L1H128d",
1010
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 50000 --abs_vocab 10 --K 1 --num",
1011
- "gpu": -1,
1012
- "status": "pending",
1013
- "elapsed": 0,
1014
- "idle_time": 0,
1015
- "exit_code": -1,
1016
- "retries": 0,
1017
- "log_file": ""
1018
  },
1019
  {
1020
  "job_id": 84,
@@ -1023,7 +1023,7 @@
1023
  "gpu": 0,
1024
  "status": "failed",
1025
  "elapsed": 6,
1026
- "idle_time": 31,
1027
  "exit_code": -9,
1028
  "retries": 1,
1029
  "log_file": "/tmp/gpu_queue/job_084_as_baseline_100K_2L1H128d_gpu0.log"
@@ -1035,7 +1035,7 @@
1035
  "gpu": 0,
1036
  "status": "failed",
1037
  "elapsed": 6,
1038
- "idle_time": 10,
1039
  "exit_code": -9,
1040
  "retries": 1,
1041
  "log_file": "/tmp/gpu_queue/job_085_as_sorl_abs10_K1_100K_2L1H128d_gpu0.log"
@@ -1047,7 +1047,7 @@
1047
  "gpu": 1,
1048
  "status": "failed",
1049
  "elapsed": 6,
1050
- "idle_time": 45,
1051
  "exit_code": -9,
1052
  "retries": 1,
1053
  "log_file": "/tmp/gpu_queue/job_086_as_baseline_250K_2L1H128d_gpu1.log"
@@ -1059,7 +1059,7 @@
1059
  "gpu": 1,
1060
  "status": "failed",
1061
  "elapsed": 5,
1062
- "idle_time": 147,
1063
  "exit_code": -9,
1064
  "retries": 1,
1065
  "log_file": "/tmp/gpu_queue/job_087_as_sorl_abs10_K1_250K_2L1H128d_gpu1.log"
@@ -1068,13 +1068,13 @@
1068
  "job_id": 88,
1069
  "name": "as_baseline_500K_2L1H128d",
1070
  "cmd": "python -m arithmetic.train --mode baseline --ops add_sub --dataset_size 500000 --num_epochs 20 --n_l",
1071
- "gpu": -1,
1072
- "status": "pending",
1073
- "elapsed": 0,
1074
- "idle_time": 0,
1075
- "exit_code": -1,
1076
- "retries": 0,
1077
- "log_file": ""
1078
  },
1079
  {
1080
  "job_id": 89,
@@ -1083,7 +1083,7 @@
1083
  "gpu": 1,
1084
  "status": "failed",
1085
  "elapsed": 5,
1086
- "idle_time": 146,
1087
  "exit_code": -9,
1088
  "retries": 1,
1089
  "log_file": "/tmp/gpu_queue/job_089_as_sorl_abs10_K1_500K_2L1H128d_gpu1.log"
@@ -1092,13 +1092,13 @@
1092
  "job_id": 90,
1093
  "name": "as_sorl_abs5_K4_10K",
1094
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 10000 --abs_vocab 5 --K 4 --num_",
1095
- "gpu": -1,
1096
- "status": "pending",
1097
- "elapsed": 0,
1098
- "idle_time": 0,
1099
- "exit_code": -1,
1100
- "retries": 0,
1101
- "log_file": ""
1102
  },
1103
  {
1104
  "job_id": 91,
@@ -1107,7 +1107,7 @@
1107
  "gpu": 0,
1108
  "status": "failed",
1109
  "elapsed": 5,
1110
- "idle_time": 45,
1111
  "exit_code": -9,
1112
  "retries": 1,
1113
  "log_file": "/tmp/gpu_queue/job_091_as_sorl_abs10_K4_10K_gpu0.log"
@@ -1119,7 +1119,7 @@
1119
  "gpu": 0,
1120
  "status": "done",
1121
  "elapsed": 3477,
1122
- "idle_time": 6422,
1123
  "exit_code": 0,
1124
  "retries": 0,
1125
  "log_file": "/tmp/gpu_queue/job_092_as_sorl_abs30_K4_10K_gpu0.log"
@@ -1131,7 +1131,7 @@
1131
  "gpu": 1,
1132
  "status": "failed",
1133
  "elapsed": 6,
1134
- "idle_time": 65,
1135
  "exit_code": -9,
1136
  "retries": 1,
1137
  "log_file": "/tmp/gpu_queue/job_093_as_sorl_abs50_K4_10K_gpu1.log"
@@ -1143,7 +1143,7 @@
1143
  "gpu": 0,
1144
  "status": "failed",
1145
  "elapsed": 6,
1146
- "idle_time": 5,
1147
  "exit_code": -9,
1148
  "retries": 1,
1149
  "log_file": "/tmp/gpu_queue/job_094_as_sorl_abs5_K4_250K_gpu0.log"
@@ -1155,7 +1155,7 @@
1155
  "gpu": 2,
1156
  "status": "failed",
1157
  "elapsed": 5,
1158
- "idle_time": 143,
1159
  "exit_code": -9,
1160
  "retries": 1,
1161
  "log_file": "/tmp/gpu_queue/job_095_as_sorl_abs10_K4_250K_gpu2.log"
@@ -1167,7 +1167,7 @@
1167
  "gpu": 1,
1168
  "status": "failed",
1169
  "elapsed": 6,
1170
- "idle_time": 10,
1171
  "exit_code": -9,
1172
  "retries": 1,
1173
  "log_file": "/tmp/gpu_queue/job_096_as_sorl_abs30_K4_250K_gpu1.log"
@@ -1179,7 +1179,7 @@
1179
  "gpu": 1,
1180
  "status": "failed",
1181
  "elapsed": 6,
1182
- "idle_time": 95,
1183
  "exit_code": -9,
1184
  "retries": 1,
1185
  "log_file": "/tmp/gpu_queue/job_097_as_sorl_abs50_K4_250K_gpu1.log"
@@ -1191,7 +1191,7 @@
1191
  "gpu": 1,
1192
  "status": "failed",
1193
  "elapsed": 3,
1194
- "idle_time": 140,
1195
  "exit_code": -15,
1196
  "retries": 1,
1197
  "log_file": "/tmp/gpu_queue/job_098_as_sorl_abs2_K1_100K_gpu1.log"
@@ -1203,7 +1203,7 @@
1203
  "gpu": 1,
1204
  "status": "failed",
1205
  "elapsed": 5,
1206
- "idle_time": 120,
1207
  "exit_code": -9,
1208
  "retries": 1,
1209
  "log_file": "/tmp/gpu_queue/job_099_as_sorl_abs2_K4_100K_gpu1.log"
@@ -1215,7 +1215,7 @@
1215
  "gpu": 0,
1216
  "status": "failed",
1217
  "elapsed": 6,
1218
- "idle_time": 19,
1219
  "exit_code": -9,
1220
  "retries": 1,
1221
  "log_file": "/tmp/gpu_queue/job_100_as_sorl_abs16_K1_100K_gpu0.log"
@@ -1224,229 +1224,229 @@
1224
  "job_id": 101,
1225
  "name": "as_sorl_abs16_K4_100K",
1226
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 16 --K 4 --nu",
1227
- "gpu": -1,
1228
- "status": "pending",
1229
- "elapsed": 0,
1230
  "idle_time": 0,
1231
- "exit_code": -1,
1232
- "retries": 0,
1233
- "log_file": ""
1234
  },
1235
  {
1236
  "job_id": 102,
1237
  "name": "as_sorl_abs20_K1_100K",
1238
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 20 --K 1 --nu",
1239
- "gpu": -1,
1240
- "status": "pending",
1241
- "elapsed": 0,
1242
- "idle_time": 0,
1243
- "exit_code": -1,
1244
- "retries": 0,
1245
- "log_file": ""
1246
  },
1247
  {
1248
  "job_id": 103,
1249
  "name": "as_sorl_abs20_K4_100K",
1250
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 20 --K 4 --nu",
1251
- "gpu": -1,
1252
- "status": "pending",
1253
- "elapsed": 0,
1254
  "idle_time": 0,
1255
- "exit_code": -1,
1256
- "retries": 0,
1257
- "log_file": ""
1258
  },
1259
  {
1260
  "job_id": 104,
1261
  "name": "as_sorl_abs70_K1_100K",
1262
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 70 --K 1 --nu",
1263
- "gpu": -1,
1264
- "status": "pending",
1265
- "elapsed": 0,
1266
- "idle_time": 0,
1267
- "exit_code": -1,
1268
- "retries": 0,
1269
- "log_file": ""
1270
  },
1271
  {
1272
  "job_id": 105,
1273
  "name": "as_sorl_abs70_K4_100K",
1274
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 70 --K 4 --nu",
1275
- "gpu": -1,
1276
- "status": "pending",
1277
- "elapsed": 0,
1278
- "idle_time": 0,
1279
- "exit_code": -1,
1280
- "retries": 0,
1281
- "log_file": ""
1282
  },
1283
  {
1284
  "job_id": 106,
1285
  "name": "as_sorl_abs100_K1_100K",
1286
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 100 --K 1 --n",
1287
- "gpu": -1,
1288
- "status": "pending",
1289
- "elapsed": 0,
1290
- "idle_time": 0,
1291
- "exit_code": -1,
1292
- "retries": 0,
1293
- "log_file": ""
1294
  },
1295
  {
1296
  "job_id": 107,
1297
  "name": "as_sorl_abs100_K4_100K",
1298
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 100 --K 4 --n",
1299
- "gpu": -1,
1300
- "status": "pending",
1301
- "elapsed": 0,
1302
- "idle_time": 0,
1303
- "exit_code": -1,
1304
- "retries": 0,
1305
- "log_file": ""
1306
  },
1307
  {
1308
  "job_id": 108,
1309
  "name": "as_sorl_abs10_K1_zipf2.0_100K",
1310
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 10 --K 1 --al",
1311
- "gpu": -1,
1312
- "status": "pending",
1313
- "elapsed": 0,
1314
- "idle_time": 0,
1315
- "exit_code": -1,
1316
- "retries": 0,
1317
- "log_file": ""
1318
  },
1319
  {
1320
  "job_id": 109,
1321
  "name": "as_sorl_abs10_K4_zipf2.0_100K",
1322
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 10 --K 4 --al",
1323
- "gpu": -1,
1324
- "status": "pending",
1325
- "elapsed": 0,
1326
- "idle_time": 0,
1327
- "exit_code": -1,
1328
- "retries": 0,
1329
- "log_file": ""
1330
  },
1331
  {
1332
  "job_id": 110,
1333
  "name": "as_sorl_abs10_K1_zipf5.0_100K",
1334
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 10 --K 1 --al",
1335
- "gpu": -1,
1336
- "status": "pending",
1337
- "elapsed": 0,
1338
- "idle_time": 0,
1339
- "exit_code": -1,
1340
- "retries": 0,
1341
- "log_file": ""
1342
  },
1343
  {
1344
  "job_id": 111,
1345
  "name": "as_sorl_abs10_K4_zipf5.0_100K",
1346
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 10 --K 4 --al",
1347
- "gpu": -1,
1348
- "status": "pending",
1349
- "elapsed": 0,
1350
- "idle_time": 0,
1351
- "exit_code": -1,
1352
- "retries": 0,
1353
- "log_file": ""
1354
  },
1355
  {
1356
  "job_id": 112,
1357
  "name": "as_sorl_abs10_K1_zipf10.0_100K",
1358
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 10 --K 1 --al",
1359
- "gpu": -1,
1360
- "status": "pending",
1361
- "elapsed": 0,
1362
- "idle_time": 0,
1363
- "exit_code": -1,
1364
- "retries": 0,
1365
- "log_file": ""
1366
  },
1367
  {
1368
  "job_id": 113,
1369
  "name": "as_sorl_abs10_K4_zipf10.0_100K",
1370
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 10 --K 4 --al",
1371
- "gpu": -1,
1372
- "status": "pending",
1373
- "elapsed": 0,
1374
- "idle_time": 0,
1375
- "exit_code": -1,
1376
- "retries": 0,
1377
- "log_file": ""
1378
  },
1379
  {
1380
  "job_id": 114,
1381
  "name": "as_sorl_abs100_K1_zipf2.0_100K",
1382
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 100 --K 1 --a",
1383
- "gpu": -1,
1384
- "status": "pending",
1385
- "elapsed": 0,
1386
- "idle_time": 0,
1387
- "exit_code": -1,
1388
- "retries": 0,
1389
- "log_file": ""
1390
  },
1391
  {
1392
  "job_id": 115,
1393
  "name": "as_sorl_abs100_K4_zipf2.0_100K",
1394
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 100 --K 4 --a",
1395
- "gpu": -1,
1396
- "status": "pending",
1397
- "elapsed": 0,
1398
- "idle_time": 0,
1399
- "exit_code": -1,
1400
- "retries": 0,
1401
- "log_file": ""
1402
  },
1403
  {
1404
  "job_id": 116,
1405
  "name": "as_sorl_abs100_K1_zipf5.0_100K",
1406
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 100 --K 1 --a",
1407
- "gpu": -1,
1408
- "status": "pending",
1409
- "elapsed": 0,
1410
  "idle_time": 0,
1411
- "exit_code": -1,
1412
- "retries": 0,
1413
- "log_file": ""
1414
  },
1415
  {
1416
  "job_id": 117,
1417
  "name": "as_sorl_abs100_K4_zipf5.0_100K",
1418
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 100 --K 4 --a",
1419
- "gpu": -1,
1420
- "status": "pending",
1421
- "elapsed": 0,
1422
- "idle_time": 0,
1423
- "exit_code": -1,
1424
- "retries": 0,
1425
- "log_file": ""
1426
  },
1427
  {
1428
  "job_id": 118,
1429
  "name": "as_sorl_abs100_K1_zipf10.0_100K",
1430
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 100 --K 1 --a",
1431
- "gpu": -1,
1432
- "status": "pending",
1433
- "elapsed": 0,
1434
- "idle_time": 0,
1435
- "exit_code": -1,
1436
- "retries": 0,
1437
- "log_file": ""
1438
  },
1439
  {
1440
  "job_id": 119,
1441
  "name": "as_sorl_abs100_K4_zipf10.0_100K",
1442
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 100 --K 4 --a",
1443
- "gpu": -1,
1444
- "status": "pending",
1445
- "elapsed": 0,
1446
- "idle_time": 0,
1447
- "exit_code": -1,
1448
- "retries": 0,
1449
- "log_file": ""
1450
  }
1451
  ]
1452
  }
 
1
  {
2
+ "timestamp": "2026-04-12 08:11:05",
3
  "total": 120,
4
+ "pending": 0,
5
+ "running": 0,
6
  "done": 15,
7
+ "failed": 105,
8
  "stale": 0,
9
  "retrying": 0,
10
  "jobs": [
 
15
  "gpu": 0,
16
  "status": "done",
17
  "elapsed": 1786,
18
+ "idle_time": 20603,
19
  "exit_code": 0,
20
  "retries": 0,
21
  "log_file": "/tmp/gpu_queue/job_000_add_sub_baseline_10K_gpu0.log"
 
27
  "gpu": 1,
28
  "status": "done",
29
  "elapsed": 2101,
30
+ "idle_time": 20288,
31
  "exit_code": 0,
32
  "retries": 0,
33
  "log_file": "/tmp/gpu_queue/job_001_add_sub_baseline_25K_gpu1.log"
 
39
  "gpu": 2,
40
  "status": "done",
41
  "elapsed": 4753,
42
+ "idle_time": 17636,
43
  "exit_code": 0,
44
  "retries": 0,
45
  "log_file": "/tmp/gpu_queue/job_002_as_sorl_abs10_K1_25K_gpu2.log"
 
51
  "gpu": 0,
52
  "status": "done",
53
  "elapsed": 2366,
54
+ "idle_time": 20022,
55
  "exit_code": 0,
56
  "retries": 0,
57
  "log_file": "/tmp/gpu_queue/job_003_add_sub_baseline_50K_gpu0.log"
 
63
  "gpu": 1,
64
  "status": "done",
65
  "elapsed": 6727,
66
+ "idle_time": 15662,
67
  "exit_code": 0,
68
  "retries": 0,
69
  "log_file": "/tmp/gpu_queue/job_004_as_sorl_abs10_K1_50K_gpu1.log"
 
75
  "gpu": 2,
76
  "status": "done",
77
  "elapsed": 3112,
78
+ "idle_time": 19276,
79
  "exit_code": 0,
80
  "retries": 0,
81
  "log_file": "/tmp/gpu_queue/job_005_add_sub_baseline_100K_gpu2.log"
 
87
  "gpu": 0,
88
  "status": "done",
89
  "elapsed": 8856,
90
+ "idle_time": 11743,
91
  "exit_code": 0,
92
  "retries": 0,
93
  "log_file": "/tmp/gpu_queue/job_006_as_sorl_abs10_K1_100K_gpu0.log"
 
96
  "job_id": 7,
97
  "name": "add_sub_baseline_250K",
98
  "cmd": "python -m arithmetic.train --mode baseline --ops add_sub --dataset_size 250000 --num_epochs 20 --pus",
99
+ "gpu": 0,
100
+ "status": "failed",
101
+ "elapsed": 5,
102
+ "idle_time": 45,
103
+ "exit_code": -9,
104
+ "retries": 1,
105
+ "log_file": "/tmp/gpu_queue/job_007_add_sub_baseline_250K_gpu0.log"
106
  },
107
  {
108
  "job_id": 8,
109
  "name": "as_sorl_abs10_K1_250K",
110
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 250000 --abs_vocab 10 --K 1 --nu",
111
+ "gpu": 0,
112
+ "status": "failed",
113
+ "elapsed": 5,
114
+ "idle_time": 75,
115
+ "exit_code": -9,
116
+ "retries": 1,
117
+ "log_file": "/tmp/gpu_queue/job_008_as_sorl_abs10_K1_250K_gpu0.log"
118
  },
119
  {
120
  "job_id": 9,
 
123
  "gpu": 1,
124
  "status": "failed",
125
  "elapsed": 6,
126
+ "idle_time": 110,
127
  "exit_code": -9,
128
  "retries": 1,
129
  "log_file": "/tmp/gpu_queue/job_009_add_sub_baseline_500K_gpu1.log"
 
135
  "gpu": 2,
136
  "status": "failed",
137
  "elapsed": 6,
138
+ "idle_time": 159,
139
  "exit_code": -9,
140
  "retries": 1,
141
  "log_file": "/tmp/gpu_queue/job_010_as_sorl_abs10_K1_10K_gpu2.log"
 
147
  "gpu": 2,
148
  "status": "failed",
149
  "elapsed": 5,
150
+ "idle_time": 215,
151
  "exit_code": -9,
152
  "retries": 1,
153
  "log_file": "/tmp/gpu_queue/job_011_as_sorl_abs2_K4_500K_gpu2.log"
 
159
  "gpu": 1,
160
  "status": "failed",
161
  "elapsed": 5,
162
+ "idle_time": 175,
163
  "exit_code": -9,
164
  "retries": 1,
165
  "log_file": "/tmp/gpu_queue/job_012_as_sorl_abs5_K4_500K_gpu1.log"
 
171
  "gpu": 0,
172
  "status": "failed",
173
  "elapsed": 5,
174
+ "idle_time": 1249,
175
  "exit_code": -9,
176
  "retries": 1,
177
  "log_file": "/tmp/gpu_queue/job_013_as_sorl_abs10_K4_500K_gpu0.log"
 
183
  "gpu": 0,
184
  "status": "failed",
185
  "elapsed": 6,
186
+ "idle_time": 140,
187
  "exit_code": -9,
188
  "retries": 1,
189
  "log_file": "/tmp/gpu_queue/job_014_as_sorl_abs16_K4_500K_gpu0.log"
 
195
  "gpu": 2,
196
  "status": "failed",
197
  "elapsed": 6,
198
+ "idle_time": 110,
199
  "exit_code": -9,
200
  "retries": 1,
201
  "log_file": "/tmp/gpu_queue/job_015_as_sorl_abs20_K4_500K_gpu2.log"
 
207
  "gpu": 1,
208
  "status": "failed",
209
  "elapsed": 6,
210
+ "idle_time": 145,
211
  "exit_code": -9,
212
  "retries": 1,
213
  "log_file": "/tmp/gpu_queue/job_016_as_sorl_abs50_K4_500K_gpu1.log"
 
219
  "gpu": 2,
220
  "status": "failed",
221
  "elapsed": 6,
222
+ "idle_time": 145,
223
  "exit_code": -9,
224
  "retries": 1,
225
  "log_file": "/tmp/gpu_queue/job_017_as_sorl_abs70_K4_500K_gpu2.log"
 
231
  "gpu": 2,
232
  "status": "failed",
233
  "elapsed": 5,
234
+ "idle_time": 205,
235
  "exit_code": -9,
236
  "retries": 1,
237
  "log_file": "/tmp/gpu_queue/job_018_as_sorl_abs100_K4_500K_gpu2.log"
 
241
  "name": "as_sorl_abs2_K1_500K",
242
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 2 --K 1 --num",
243
  "gpu": 1,
244
+ "status": "failed",
245
+ "elapsed": 6,
246
+ "idle_time": 90,
247
+ "exit_code": -9,
248
  "retries": 1,
249
  "log_file": "/tmp/gpu_queue/job_019_as_sorl_abs2_K1_500K_gpu1.log"
250
  },
 
253
  "name": "as_sorl_abs5_K1_500K",
254
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 5 --K 1 --num",
255
  "gpu": 1,
256
+ "status": "failed",
257
+ "elapsed": 6,
258
+ "idle_time": 90,
259
+ "exit_code": -9,
260
  "retries": 1,
261
  "log_file": "/tmp/gpu_queue/job_020_as_sorl_abs5_K1_500K_gpu1.log"
262
  },
 
267
  "gpu": 0,
268
  "status": "failed",
269
  "elapsed": 6,
270
+ "idle_time": 154,
271
  "exit_code": -9,
272
  "retries": 1,
273
  "log_file": "/tmp/gpu_queue/job_021_as_sorl_abs10_K1_500K_gpu0.log"
 
277
  "name": "as_sorl_abs16_K1_500K",
278
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 16 --K 1 --nu",
279
  "gpu": 0,
280
+ "status": "failed",
281
+ "elapsed": 6,
282
+ "idle_time": 90,
283
+ "exit_code": -9,
284
  "retries": 1,
285
  "log_file": "/tmp/gpu_queue/job_022_as_sorl_abs16_K1_500K_gpu0.log"
286
  },
 
288
  "job_id": 23,
289
  "name": "as_sorl_abs20_K1_500K",
290
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 20 --K 1 --nu",
291
+ "gpu": 1,
292
+ "status": "failed",
293
+ "elapsed": 6,
294
+ "idle_time": 75,
295
+ "exit_code": -9,
296
+ "retries": 1,
297
+ "log_file": "/tmp/gpu_queue/job_023_as_sorl_abs20_K1_500K_gpu1.log"
298
  },
299
  {
300
  "job_id": 24,
 
303
  "gpu": 1,
304
  "status": "failed",
305
  "elapsed": 4,
306
+ "idle_time": 136,
307
  "exit_code": -9,
308
  "retries": 1,
309
  "log_file": "/tmp/gpu_queue/job_024_as_sorl_abs50_K1_500K_gpu1.log"
 
315
  "gpu": 2,
316
  "status": "failed",
317
  "elapsed": 5,
318
+ "idle_time": 145,
319
  "exit_code": -9,
320
  "retries": 1,
321
  "log_file": "/tmp/gpu_queue/job_025_as_sorl_abs70_K1_500K_gpu2.log"
 
327
  "gpu": 1,
328
  "status": "failed",
329
  "elapsed": 6,
330
+ "idle_time": 120,
331
  "exit_code": -9,
332
  "retries": 1,
333
  "log_file": "/tmp/gpu_queue/job_026_as_sorl_abs100_K1_500K_gpu1.log"
 
339
  "gpu": 0,
340
  "status": "failed",
341
  "elapsed": 6,
342
+ "idle_time": 220,
343
  "exit_code": -9,
344
  "retries": 1,
345
  "log_file": "/tmp/gpu_queue/job_027_as_sorl_abs5_K1_25K_gpu0.log"
 
351
  "gpu": 1,
352
  "status": "failed",
353
  "elapsed": 5,
354
+ "idle_time": 154,
355
  "exit_code": -9,
356
  "retries": 1,
357
  "log_file": "/tmp/gpu_queue/job_028_as_sorl_abs30_K1_25K_gpu1.log"
 
361
  "name": "as_sorl_abs50_K1_25K",
362
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 25000 --abs_vocab 50 --K 1 --num",
363
  "gpu": 0,
364
+ "status": "failed",
365
+ "elapsed": 5,
366
+ "idle_time": 85,
367
+ "exit_code": -9,
368
+ "retries": 1,
369
  "log_file": "/tmp/gpu_queue/job_029_as_sorl_abs50_K1_25K_gpu0.log"
370
  },
371
  {
 
375
  "gpu": 2,
376
  "status": "failed",
377
  "elapsed": 6,
378
+ "idle_time": 95,
379
  "exit_code": -9,
380
  "retries": 1,
381
  "log_file": "/tmp/gpu_queue/job_030_as_sorl_abs5_K1_50K_gpu2.log"
 
387
  "gpu": 2,
388
  "status": "failed",
389
  "elapsed": 5,
390
+ "idle_time": 200,
391
  "exit_code": -9,
392
  "retries": 1,
393
  "log_file": "/tmp/gpu_queue/job_031_as_sorl_abs30_K1_50K_gpu2.log"
 
399
  "gpu": 0,
400
  "status": "failed",
401
  "elapsed": 6,
402
+ "idle_time": 119,
403
  "exit_code": -9,
404
  "retries": 1,
405
  "log_file": "/tmp/gpu_queue/job_032_as_sorl_abs50_K1_50K_gpu0.log"
 
411
  "gpu": 2,
412
  "status": "failed",
413
  "elapsed": 7,
414
+ "idle_time": 159,
415
  "exit_code": -9,
416
  "retries": 1,
417
  "log_file": "/tmp/gpu_queue/job_033_as_sorl_abs5_K1_100K_gpu2.log"
 
423
  "gpu": 0,
424
  "status": "failed",
425
  "elapsed": 5,
426
+ "idle_time": 184,
427
  "exit_code": -9,
428
  "retries": 1,
429
  "log_file": "/tmp/gpu_queue/job_034_as_sorl_abs30_K1_100K_gpu0.log"
 
435
  "gpu": 1,
436
  "status": "failed",
437
  "elapsed": 5,
438
+ "idle_time": 200,
439
  "exit_code": -9,
440
  "retries": 1,
441
  "log_file": "/tmp/gpu_queue/job_035_as_sorl_abs50_K1_100K_gpu1.log"
 
447
  "gpu": 2,
448
  "status": "failed",
449
  "elapsed": 5,
450
+ "idle_time": 95,
451
  "exit_code": -9,
452
  "retries": 1,
453
  "log_file": "/tmp/gpu_queue/job_036_as_sorl_abs5_K4_25K_gpu2.log"
 
459
  "gpu": 0,
460
  "status": "done",
461
  "elapsed": 4510,
462
+ "idle_time": 15508,
463
  "exit_code": 0,
464
  "retries": 0,
465
  "log_file": "/tmp/gpu_queue/job_037_as_sorl_abs10_K4_25K_gpu0.log"
 
471
  "gpu": 1,
472
  "status": "failed",
473
  "elapsed": 6,
474
+ "idle_time": 184,
475
  "exit_code": -9,
476
  "retries": 1,
477
  "log_file": "/tmp/gpu_queue/job_038_as_sorl_abs30_K4_25K_gpu1.log"
 
483
  "gpu": 2,
484
  "status": "failed",
485
  "elapsed": 4,
486
+ "idle_time": 135,
487
  "exit_code": -9,
488
  "retries": 1,
489
  "log_file": "/tmp/gpu_queue/job_039_as_sorl_abs50_K4_25K_gpu2.log"
 
492
  "job_id": 40,
493
  "name": "as_sorl_abs5_K4_50K",
494
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 50000 --abs_vocab 5 --K 4 --num_",
495
+ "gpu": 2,
496
+ "status": "failed",
497
+ "elapsed": 5,
498
+ "idle_time": 70,
499
+ "exit_code": -9,
500
+ "retries": 1,
501
+ "log_file": "/tmp/gpu_queue/job_040_as_sorl_abs5_K4_50K_gpu2.log"
502
  },
503
  {
504
  "job_id": 41,
 
507
  "gpu": 2,
508
  "status": "failed",
509
  "elapsed": 6,
510
+ "idle_time": 220,
511
  "exit_code": -9,
512
  "retries": 1,
513
  "log_file": "/tmp/gpu_queue/job_041_as_sorl_abs10_K4_50K_gpu2.log"
 
516
  "job_id": 42,
517
  "name": "as_sorl_abs30_K4_50K",
518
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 50000 --abs_vocab 30 --K 4 --num",
519
+ "gpu": 0,
520
+ "status": "failed",
521
+ "elapsed": 5,
522
+ "idle_time": 60,
523
+ "exit_code": -9,
524
+ "retries": 1,
525
+ "log_file": "/tmp/gpu_queue/job_042_as_sorl_abs30_K4_50K_gpu0.log"
526
  },
527
  {
528
  "job_id": 43,
 
531
  "gpu": 0,
532
  "status": "done",
533
  "elapsed": 5510,
534
+ "idle_time": 9994,
535
  "exit_code": 0,
536
  "retries": 0,
537
  "log_file": "/tmp/gpu_queue/job_043_as_sorl_abs50_K4_50K_gpu0.log"
 
543
  "gpu": 2,
544
  "status": "failed",
545
  "elapsed": 6,
546
+ "idle_time": 125,
547
  "exit_code": -9,
548
  "retries": 1,
549
  "log_file": "/tmp/gpu_queue/job_044_as_sorl_abs5_K4_100K_gpu2.log"
 
555
  "gpu": 0,
556
  "status": "failed",
557
  "elapsed": 6,
558
+ "idle_time": 205,
559
  "exit_code": -9,
560
  "retries": 1,
561
  "log_file": "/tmp/gpu_queue/job_045_as_sorl_abs10_K4_100K_gpu0.log"
 
567
  "gpu": 2,
568
  "status": "failed",
569
  "elapsed": 6,
570
+ "idle_time": 190,
571
  "exit_code": -9,
572
  "retries": 1,
573
  "log_file": "/tmp/gpu_queue/job_046_as_sorl_abs30_K4_100K_gpu2.log"
 
579
  "gpu": 0,
580
  "status": "failed",
581
  "elapsed": 6,
582
+ "idle_time": 169,
583
  "exit_code": -9,
584
  "retries": 1,
585
  "log_file": "/tmp/gpu_queue/job_047_as_sorl_abs50_K4_100K_gpu0.log"
 
588
  "job_id": 48,
589
  "name": "as_sorl_abs10_K1_zipf2.0_500K",
590
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 10 --K 1 --al",
591
+ "gpu": 0,
592
+ "status": "failed",
593
+ "elapsed": 6,
594
+ "idle_time": 80,
595
+ "exit_code": -9,
596
+ "retries": 1,
597
+ "log_file": "/tmp/gpu_queue/job_048_as_sorl_abs10_K1_zipf2.0_500K_gpu0.log"
598
  },
599
  {
600
  "job_id": 49,
601
  "name": "as_sorl_abs10_K1_zipf5.0_500K",
602
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 10 --K 1 --al",
603
+ "gpu": 1,
604
+ "status": "failed",
605
+ "elapsed": 5,
606
+ "idle_time": 50,
607
+ "exit_code": -9,
608
+ "retries": 1,
609
+ "log_file": "/tmp/gpu_queue/job_049_as_sorl_abs10_K1_zipf5.0_500K_gpu1.log"
610
  },
611
  {
612
  "job_id": 50,
 
615
  "gpu": 2,
616
  "status": "failed",
617
  "elapsed": 6,
618
+ "idle_time": 226,
619
  "exit_code": -9,
620
  "retries": 1,
621
  "log_file": "/tmp/gpu_queue/job_050_as_sorl_abs10_K1_zipf10.0_500K_gpu2.log"
 
624
  "job_id": 51,
625
  "name": "as_sorl_abs10_K4_zipf2.0_500K",
626
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 10 --K 4 --al",
627
+ "gpu": 0,
628
+ "status": "failed",
629
+ "elapsed": 6,
630
+ "idle_time": 45,
631
+ "exit_code": -9,
632
+ "retries": 1,
633
+ "log_file": "/tmp/gpu_queue/job_051_as_sorl_abs10_K4_zipf2.0_500K_gpu0.log"
634
  },
635
  {
636
  "job_id": 52,
 
639
  "gpu": 0,
640
  "status": "failed",
641
  "elapsed": 5,
642
+ "idle_time": 185,
643
  "exit_code": -9,
644
  "retries": 1,
645
  "log_file": "/tmp/gpu_queue/job_052_as_sorl_abs10_K4_zipf5.0_500K_gpu0.log"
 
651
  "gpu": 1,
652
  "status": "failed",
653
  "elapsed": 5,
654
+ "idle_time": 13672,
655
  "exit_code": -9,
656
  "retries": 1,
657
  "log_file": "/tmp/gpu_queue/job_053_as_sorl_abs10_K4_zipf10.0_500K_gpu1.log"
 
663
  "gpu": 0,
664
  "status": "failed",
665
  "elapsed": 6,
666
+ "idle_time": 169,
667
  "exit_code": -9,
668
  "retries": 1,
669
  "log_file": "/tmp/gpu_queue/job_054_as_sorl_abs100_K1_zipf2.0_500K_gpu0.log"
 
675
  "gpu": 0,
676
  "status": "failed",
677
  "elapsed": 5,
678
+ "idle_time": 6372,
679
  "exit_code": -9,
680
  "retries": 1,
681
  "log_file": "/tmp/gpu_queue/job_055_as_sorl_abs100_K1_zipf5.0_500K_gpu0.log"
 
684
  "job_id": 56,
685
  "name": "as_sorl_abs100_K1_zipf10.0_500K",
686
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 500000 --abs_vocab 100 --K 1 --a",
687
+ "gpu": 2,
688
+ "status": "failed",
689
+ "elapsed": 5,
690
+ "idle_time": 80,
691
+ "exit_code": -9,
692
+ "retries": 1,
693
+ "log_file": "/tmp/gpu_queue/job_056_as_sorl_abs100_K1_zipf10.0_500_gpu2.log"
694
  },
695
  {
696
  "job_id": 57,
 
699
  "gpu": 2,
700
  "status": "failed",
701
  "elapsed": 5,
702
+ "idle_time": 175,
703
  "exit_code": -9,
704
  "retries": 1,
705
  "log_file": "/tmp/gpu_queue/job_057_as_sorl_abs100_K4_zipf2.0_500K_gpu2.log"
 
711
  "gpu": 1,
712
  "status": "failed",
713
  "elapsed": 6,
714
+ "idle_time": 205,
715
  "exit_code": -9,
716
  "retries": 1,
717
  "log_file": "/tmp/gpu_queue/job_058_as_sorl_abs100_K4_zipf5.0_500K_gpu1.log"
 
723
  "gpu": 1,
724
  "status": "failed",
725
  "elapsed": 6,
726
+ "idle_time": 169,
727
  "exit_code": -9,
728
  "retries": 1,
729
  "log_file": "/tmp/gpu_queue/job_059_as_sorl_abs100_K4_zipf10.0_500_gpu1.log"
 
735
  "gpu": 2,
736
  "status": "failed",
737
  "elapsed": 5,
738
+ "idle_time": 120,
739
  "exit_code": -9,
740
  "retries": 1,
741
  "log_file": "/tmp/gpu_queue/job_060_as_baseline_25K_1L3H510d_gpu2.log"
 
744
  "job_id": 61,
745
  "name": "as_sorl_abs10_K1_25K_1L3H510d",
746
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 25000 --abs_vocab 10 --K 1 --num",
747
+ "gpu": 2,
748
+ "status": "failed",
749
+ "elapsed": 5,
750
+ "idle_time": 65,
751
+ "exit_code": -9,
752
+ "retries": 1,
753
+ "log_file": "/tmp/gpu_queue/job_061_as_sorl_abs10_K1_25K_1L3H510d_gpu2.log"
754
  },
755
  {
756
  "job_id": 62,
 
759
  "gpu": 0,
760
  "status": "failed",
761
  "elapsed": 5,
762
+ "idle_time": 215,
763
  "exit_code": -9,
764
  "retries": 1,
765
  "log_file": "/tmp/gpu_queue/job_062_as_baseline_50K_1L3H510d_gpu0.log"
 
771
  "gpu": 0,
772
  "status": "done",
773
  "elapsed": 5352,
774
+ "idle_time": 6386,
775
  "exit_code": 0,
776
  "retries": 0,
777
  "log_file": "/tmp/gpu_queue/job_063_as_sorl_abs10_K1_50K_1L3H510d_gpu0.log"
 
783
  "gpu": 1,
784
  "status": "failed",
785
  "elapsed": 4,
786
+ "idle_time": 236,
787
  "exit_code": -15,
788
  "retries": 1,
789
  "log_file": "/tmp/gpu_queue/job_064_as_baseline_100K_1L3H510d_gpu1.log"
 
795
  "gpu": 0,
796
  "status": "failed",
797
  "elapsed": 6,
798
+ "idle_time": 226,
799
  "exit_code": -9,
800
  "retries": 1,
801
  "log_file": "/tmp/gpu_queue/job_065_as_sorl_abs10_K1_100K_1L3H510d_gpu0.log"
 
807
  "gpu": 0,
808
  "status": "failed",
809
  "elapsed": 5,
810
+ "idle_time": 200,
811
  "exit_code": -9,
812
  "retries": 1,
813
  "log_file": "/tmp/gpu_queue/job_066_as_baseline_250K_1L3H510d_gpu0.log"
 
819
  "gpu": 1,
820
  "status": "failed",
821
  "elapsed": 6,
822
+ "idle_time": 125,
823
  "exit_code": -9,
824
  "retries": 1,
825
  "log_file": "/tmp/gpu_queue/job_067_as_sorl_abs10_K1_250K_1L3H510d_gpu1.log"
 
831
  "gpu": 1,
832
  "status": "failed",
833
  "elapsed": 6,
834
+ "idle_time": 105,
835
  "exit_code": -9,
836
  "retries": 1,
837
  "log_file": "/tmp/gpu_queue/job_068_as_baseline_500K_1L3H510d_gpu1.log"
 
843
  "gpu": 2,
844
  "status": "failed",
845
  "elapsed": 2,
846
+ "idle_time": 234,
847
  "exit_code": -15,
848
  "retries": 1,
849
  "log_file": "/tmp/gpu_queue/job_069_as_sorl_abs10_K1_500K_1L3H510d_gpu2.log"
 
855
  "gpu": 1,
856
  "status": "failed",
857
  "elapsed": 6,
858
+ "idle_time": 226,
859
  "exit_code": -9,
860
  "retries": 1,
861
  "log_file": "/tmp/gpu_queue/job_070_as_baseline_25K_1L2H256d_gpu1.log"
 
867
  "gpu": 0,
868
  "status": "failed",
869
  "elapsed": 6,
870
+ "idle_time": 190,
871
  "exit_code": -9,
872
  "retries": 1,
873
  "log_file": "/tmp/gpu_queue/job_071_as_sorl_abs10_K1_25K_1L2H256d_gpu0.log"
 
876
  "job_id": 72,
877
  "name": "as_baseline_50K_1L2H256d",
878
  "cmd": "python -m arithmetic.train --mode baseline --ops add_sub --dataset_size 50000 --num_epochs 20 --n_la",
879
+ "gpu": 2,
880
+ "status": "failed",
881
+ "elapsed": 6,
882
+ "idle_time": 55,
883
+ "exit_code": -9,
884
+ "retries": 1,
885
+ "log_file": "/tmp/gpu_queue/job_072_as_baseline_50K_1L2H256d_gpu2.log"
886
  },
887
  {
888
  "job_id": 73,
889
  "name": "as_sorl_abs10_K1_50K_1L2H256d",
890
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 50000 --abs_vocab 10 --K 1 --num",
891
+ "gpu": 1,
892
+ "status": "failed",
893
+ "elapsed": 5,
894
+ "idle_time": 75,
895
+ "exit_code": -9,
896
+ "retries": 1,
897
+ "log_file": "/tmp/gpu_queue/job_073_as_sorl_abs10_K1_50K_1L2H256d_gpu1.log"
898
  },
899
  {
900
  "job_id": 74,
 
903
  "gpu": 2,
904
  "status": "done",
905
  "elapsed": 2797,
906
+ "idle_time": 14836,
907
  "exit_code": 0,
908
  "retries": 0,
909
  "log_file": "/tmp/gpu_queue/job_074_as_baseline_100K_1L2H256d_gpu2.log"
 
915
  "gpu": 1,
916
  "status": "failed",
917
  "elapsed": 6,
918
+ "idle_time": 220,
919
  "exit_code": -9,
920
  "retries": 1,
921
  "log_file": "/tmp/gpu_queue/job_075_as_sorl_abs10_K1_100K_1L2H256d_gpu1.log"
 
927
  "gpu": 0,
928
  "status": "done",
929
  "elapsed": 4832,
930
+ "idle_time": 1682,
931
  "exit_code": 0,
932
  "retries": 0,
933
  "log_file": "/tmp/gpu_queue/job_076_as_baseline_250K_1L2H256d_gpu0.log"
 
936
  "job_id": 77,
937
  "name": "as_sorl_abs10_K1_250K_1L2H256d",
938
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 250000 --abs_vocab 10 --K 1 --nu",
939
+ "gpu": 2,
940
+ "status": "failed",
941
+ "elapsed": 6,
942
+ "idle_time": 49,
943
+ "exit_code": -9,
944
+ "retries": 1,
945
+ "log_file": "/tmp/gpu_queue/job_077_as_sorl_abs10_K1_250K_1L2H256d_gpu2.log"
946
  },
947
  {
948
  "job_id": 78,
 
951
  "gpu": 2,
952
  "status": "done",
953
  "elapsed": 7064,
954
+ "idle_time": 12209,
955
  "exit_code": 0,
956
  "retries": 0,
957
  "log_file": "/tmp/gpu_queue/job_078_as_baseline_500K_1L2H256d_gpu2.log"
 
963
  "gpu": 2,
964
  "status": "failed",
965
  "elapsed": 4,
966
+ "idle_time": 136,
967
  "exit_code": -9,
968
  "retries": 1,
969
  "log_file": "/tmp/gpu_queue/job_079_as_sorl_abs10_K1_500K_1L2H256d_gpu2.log"
 
975
  "gpu": 1,
976
  "status": "done",
977
  "elapsed": 1971,
978
+ "idle_time": 13688,
979
  "exit_code": 0,
980
  "retries": 0,
981
  "log_file": "/tmp/gpu_queue/job_080_as_baseline_25K_2L1H128d_gpu1.log"
 
987
  "gpu": 2,
988
  "status": "failed",
989
  "elapsed": 6,
990
+ "idle_time": 175,
991
  "exit_code": -9,
992
  "retries": 1,
993
  "log_file": "/tmp/gpu_queue/job_081_as_sorl_abs10_K1_25K_2L1H128d_gpu2.log"
 
999
  "gpu": 0,
1000
  "status": "failed",
1001
  "elapsed": 5,
1002
+ "idle_time": 155,
1003
  "exit_code": -9,
1004
  "retries": 1,
1005
  "log_file": "/tmp/gpu_queue/job_082_as_baseline_50K_2L1H128d_gpu0.log"
 
1008
  "job_id": 83,
1009
  "name": "as_sorl_abs10_K1_50K_2L1H128d",
1010
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 50000 --abs_vocab 10 --K 1 --num",
1011
+ "gpu": 0,
1012
+ "status": "failed",
1013
+ "elapsed": 6,
1014
+ "idle_time": 60,
1015
+ "exit_code": -9,
1016
+ "retries": 1,
1017
+ "log_file": "/tmp/gpu_queue/job_083_as_sorl_abs10_K1_50K_2L1H128d_gpu0.log"
1018
  },
1019
  {
1020
  "job_id": 84,
 
1023
  "gpu": 0,
1024
  "status": "failed",
1025
  "elapsed": 6,
1026
+ "idle_time": 125,
1027
  "exit_code": -9,
1028
  "retries": 1,
1029
  "log_file": "/tmp/gpu_queue/job_084_as_baseline_100K_2L1H128d_gpu0.log"
 
1035
  "gpu": 0,
1036
  "status": "failed",
1037
  "elapsed": 6,
1038
+ "idle_time": 105,
1039
  "exit_code": -9,
1040
  "retries": 1,
1041
  "log_file": "/tmp/gpu_queue/job_085_as_sorl_abs10_K1_100K_2L1H128d_gpu0.log"
 
1047
  "gpu": 1,
1048
  "status": "failed",
1049
  "elapsed": 6,
1050
+ "idle_time": 140,
1051
  "exit_code": -9,
1052
  "retries": 1,
1053
  "log_file": "/tmp/gpu_queue/job_086_as_baseline_250K_2L1H128d_gpu1.log"
 
1059
  "gpu": 1,
1060
  "status": "failed",
1061
  "elapsed": 5,
1062
+ "idle_time": 241,
1063
  "exit_code": -9,
1064
  "retries": 1,
1065
  "log_file": "/tmp/gpu_queue/job_087_as_sorl_abs10_K1_250K_2L1H128d_gpu1.log"
 
1068
  "job_id": 88,
1069
  "name": "as_baseline_500K_2L1H128d",
1070
  "cmd": "python -m arithmetic.train --mode baseline --ops add_sub --dataset_size 500000 --num_epochs 20 --n_l",
1071
+ "gpu": 1,
1072
+ "status": "failed",
1073
+ "elapsed": 6,
1074
+ "idle_time": 60,
1075
+ "exit_code": -9,
1076
+ "retries": 1,
1077
+ "log_file": "/tmp/gpu_queue/job_088_as_baseline_500K_2L1H128d_gpu1.log"
1078
  },
1079
  {
1080
  "job_id": 89,
 
1083
  "gpu": 1,
1084
  "status": "failed",
1085
  "elapsed": 5,
1086
+ "idle_time": 241,
1087
  "exit_code": -9,
1088
  "retries": 1,
1089
  "log_file": "/tmp/gpu_queue/job_089_as_sorl_abs10_K1_500K_2L1H128d_gpu1.log"
 
1092
  "job_id": 90,
1093
  "name": "as_sorl_abs5_K4_10K",
1094
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 10000 --abs_vocab 5 --K 4 --num_",
1095
+ "gpu": 0,
1096
+ "status": "failed",
1097
+ "elapsed": 5,
1098
+ "idle_time": 65,
1099
+ "exit_code": -9,
1100
+ "retries": 1,
1101
+ "log_file": "/tmp/gpu_queue/job_090_as_sorl_abs5_K4_10K_gpu0.log"
1102
  },
1103
  {
1104
  "job_id": 91,
 
1107
  "gpu": 0,
1108
  "status": "failed",
1109
  "elapsed": 5,
1110
+ "idle_time": 140,
1111
  "exit_code": -9,
1112
  "retries": 1,
1113
  "log_file": "/tmp/gpu_queue/job_091_as_sorl_abs10_K4_10K_gpu0.log"
 
1119
  "gpu": 0,
1120
  "status": "done",
1121
  "elapsed": 3477,
1122
+ "idle_time": 6517,
1123
  "exit_code": 0,
1124
  "retries": 0,
1125
  "log_file": "/tmp/gpu_queue/job_092_as_sorl_abs30_K4_10K_gpu0.log"
 
1131
  "gpu": 1,
1132
  "status": "failed",
1133
  "elapsed": 6,
1134
+ "idle_time": 159,
1135
  "exit_code": -9,
1136
  "retries": 1,
1137
  "log_file": "/tmp/gpu_queue/job_093_as_sorl_abs50_K4_10K_gpu1.log"
 
1143
  "gpu": 0,
1144
  "status": "failed",
1145
  "elapsed": 6,
1146
+ "idle_time": 100,
1147
  "exit_code": -9,
1148
  "retries": 1,
1149
  "log_file": "/tmp/gpu_queue/job_094_as_sorl_abs5_K4_250K_gpu0.log"
 
1155
  "gpu": 2,
1156
  "status": "failed",
1157
  "elapsed": 5,
1158
+ "idle_time": 237,
1159
  "exit_code": -9,
1160
  "retries": 1,
1161
  "log_file": "/tmp/gpu_queue/job_095_as_sorl_abs10_K4_250K_gpu2.log"
 
1167
  "gpu": 1,
1168
  "status": "failed",
1169
  "elapsed": 6,
1170
+ "idle_time": 104,
1171
  "exit_code": -9,
1172
  "retries": 1,
1173
  "log_file": "/tmp/gpu_queue/job_096_as_sorl_abs30_K4_250K_gpu1.log"
 
1179
  "gpu": 1,
1180
  "status": "failed",
1181
  "elapsed": 6,
1182
+ "idle_time": 190,
1183
  "exit_code": -9,
1184
  "retries": 1,
1185
  "log_file": "/tmp/gpu_queue/job_097_as_sorl_abs50_K4_250K_gpu1.log"
 
1191
  "gpu": 1,
1192
  "status": "failed",
1193
  "elapsed": 3,
1194
+ "idle_time": 235,
1195
  "exit_code": -15,
1196
  "retries": 1,
1197
  "log_file": "/tmp/gpu_queue/job_098_as_sorl_abs2_K1_100K_gpu1.log"
 
1203
  "gpu": 1,
1204
  "status": "failed",
1205
  "elapsed": 5,
1206
+ "idle_time": 215,
1207
  "exit_code": -9,
1208
  "retries": 1,
1209
  "log_file": "/tmp/gpu_queue/job_099_as_sorl_abs2_K4_100K_gpu1.log"
 
1215
  "gpu": 0,
1216
  "status": "failed",
1217
  "elapsed": 6,
1218
+ "idle_time": 114,
1219
  "exit_code": -9,
1220
  "retries": 1,
1221
  "log_file": "/tmp/gpu_queue/job_100_as_sorl_abs16_K1_100K_gpu0.log"
 
1224
  "job_id": 101,
1225
  "name": "as_sorl_abs16_K4_100K",
1226
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 16 --K 4 --nu",
1227
+ "gpu": 1,
1228
+ "status": "failed",
1229
+ "elapsed": 5,
1230
  "idle_time": 0,
1231
+ "exit_code": -9,
1232
+ "retries": 1,
1233
+ "log_file": "/tmp/gpu_queue/job_101_as_sorl_abs16_K4_100K_gpu1.log"
1234
  },
1235
  {
1236
  "job_id": 102,
1237
  "name": "as_sorl_abs20_K1_100K",
1238
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 20 --K 1 --nu",
1239
+ "gpu": 2,
1240
+ "status": "failed",
1241
+ "elapsed": 5,
1242
+ "idle_time": 40,
1243
+ "exit_code": -9,
1244
+ "retries": 1,
1245
+ "log_file": "/tmp/gpu_queue/job_102_as_sorl_abs20_K1_100K_gpu2.log"
1246
  },
1247
  {
1248
  "job_id": 103,
1249
  "name": "as_sorl_abs20_K4_100K",
1250
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 20 --K 4 --nu",
1251
+ "gpu": 0,
1252
+ "status": "failed",
1253
+ "elapsed": 5,
1254
  "idle_time": 0,
1255
+ "exit_code": -9,
1256
+ "retries": 1,
1257
+ "log_file": "/tmp/gpu_queue/job_103_as_sorl_abs20_K4_100K_gpu0.log"
1258
  },
1259
  {
1260
  "job_id": 104,
1261
  "name": "as_sorl_abs70_K1_100K",
1262
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 70 --K 1 --nu",
1263
+ "gpu": 1,
1264
+ "status": "failed",
1265
+ "elapsed": 5,
1266
+ "idle_time": 30,
1267
+ "exit_code": -9,
1268
+ "retries": 1,
1269
+ "log_file": "/tmp/gpu_queue/job_104_as_sorl_abs70_K1_100K_gpu1.log"
1270
  },
1271
  {
1272
  "job_id": 105,
1273
  "name": "as_sorl_abs70_K4_100K",
1274
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 70 --K 4 --nu",
1275
+ "gpu": 2,
1276
+ "status": "failed",
1277
+ "elapsed": 5,
1278
+ "idle_time": 35,
1279
+ "exit_code": -9,
1280
+ "retries": 1,
1281
+ "log_file": "/tmp/gpu_queue/job_105_as_sorl_abs70_K4_100K_gpu2.log"
1282
  },
1283
  {
1284
  "job_id": 106,
1285
  "name": "as_sorl_abs100_K1_100K",
1286
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 100 --K 1 --n",
1287
+ "gpu": 1,
1288
+ "status": "failed",
1289
+ "elapsed": 5,
1290
+ "idle_time": 15,
1291
+ "exit_code": -9,
1292
+ "retries": 1,
1293
+ "log_file": "/tmp/gpu_queue/job_106_as_sorl_abs100_K1_100K_gpu1.log"
1294
  },
1295
  {
1296
  "job_id": 107,
1297
  "name": "as_sorl_abs100_K4_100K",
1298
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 100 --K 4 --n",
1299
+ "gpu": 0,
1300
+ "status": "failed",
1301
+ "elapsed": 5,
1302
+ "idle_time": 24,
1303
+ "exit_code": -9,
1304
+ "retries": 1,
1305
+ "log_file": "/tmp/gpu_queue/job_107_as_sorl_abs100_K4_100K_gpu0.log"
1306
  },
1307
  {
1308
  "job_id": 108,
1309
  "name": "as_sorl_abs10_K1_zipf2.0_100K",
1310
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 10 --K 1 --al",
1311
+ "gpu": 0,
1312
+ "status": "failed",
1313
+ "elapsed": 5,
1314
+ "idle_time": 10,
1315
+ "exit_code": -9,
1316
+ "retries": 1,
1317
+ "log_file": "/tmp/gpu_queue/job_108_as_sorl_abs10_K1_zipf2.0_100K_gpu0.log"
1318
  },
1319
  {
1320
  "job_id": 109,
1321
  "name": "as_sorl_abs10_K4_zipf2.0_100K",
1322
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 10 --K 4 --al",
1323
+ "gpu": 1,
1324
+ "status": "failed",
1325
+ "elapsed": 6,
1326
+ "idle_time": 30,
1327
+ "exit_code": -9,
1328
+ "retries": 1,
1329
+ "log_file": "/tmp/gpu_queue/job_109_as_sorl_abs10_K4_zipf2.0_100K_gpu1.log"
1330
  },
1331
  {
1332
  "job_id": 110,
1333
  "name": "as_sorl_abs10_K1_zipf5.0_100K",
1334
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 10 --K 1 --al",
1335
+ "gpu": 1,
1336
+ "status": "failed",
1337
+ "elapsed": 5,
1338
+ "idle_time": 24,
1339
+ "exit_code": -9,
1340
+ "retries": 1,
1341
+ "log_file": "/tmp/gpu_queue/job_110_as_sorl_abs10_K1_zipf5.0_100K_gpu1.log"
1342
  },
1343
  {
1344
  "job_id": 111,
1345
  "name": "as_sorl_abs10_K4_zipf5.0_100K",
1346
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 10 --K 4 --al",
1347
+ "gpu": 2,
1348
+ "status": "failed",
1349
+ "elapsed": 5,
1350
+ "idle_time": 5,
1351
+ "exit_code": -9,
1352
+ "retries": 1,
1353
+ "log_file": "/tmp/gpu_queue/job_111_as_sorl_abs10_K4_zipf5.0_100K_gpu2.log"
1354
  },
1355
  {
1356
  "job_id": 112,
1357
  "name": "as_sorl_abs10_K1_zipf10.0_100K",
1358
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 10 --K 1 --al",
1359
+ "gpu": 2,
1360
+ "status": "failed",
1361
+ "elapsed": 6,
1362
+ "idle_time": 14,
1363
+ "exit_code": -9,
1364
+ "retries": 1,
1365
+ "log_file": "/tmp/gpu_queue/job_112_as_sorl_abs10_K1_zipf10.0_100K_gpu2.log"
1366
  },
1367
  {
1368
  "job_id": 113,
1369
  "name": "as_sorl_abs10_K4_zipf10.0_100K",
1370
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 10 --K 4 --al",
1371
+ "gpu": 1,
1372
+ "status": "failed",
1373
+ "elapsed": 6,
1374
+ "idle_time": 10,
1375
+ "exit_code": -9,
1376
+ "retries": 1,
1377
+ "log_file": "/tmp/gpu_queue/job_113_as_sorl_abs10_K4_zipf10.0_100K_gpu1.log"
1378
  },
1379
  {
1380
  "job_id": 114,
1381
  "name": "as_sorl_abs100_K1_zipf2.0_100K",
1382
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 100 --K 1 --a",
1383
+ "gpu": 2,
1384
+ "status": "failed",
1385
+ "elapsed": 6,
1386
+ "idle_time": 19,
1387
+ "exit_code": -9,
1388
+ "retries": 1,
1389
+ "log_file": "/tmp/gpu_queue/job_114_as_sorl_abs100_K1_zipf2.0_100K_gpu2.log"
1390
  },
1391
  {
1392
  "job_id": 115,
1393
  "name": "as_sorl_abs100_K4_zipf2.0_100K",
1394
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 100 --K 4 --a",
1395
+ "gpu": 0,
1396
+ "status": "failed",
1397
+ "elapsed": 6,
1398
+ "idle_time": 14,
1399
+ "exit_code": -9,
1400
+ "retries": 1,
1401
+ "log_file": "/tmp/gpu_queue/job_115_as_sorl_abs100_K4_zipf2.0_100K_gpu0.log"
1402
  },
1403
  {
1404
  "job_id": 116,
1405
  "name": "as_sorl_abs100_K1_zipf5.0_100K",
1406
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 100 --K 1 --a",
1407
+ "gpu": 2,
1408
+ "status": "failed",
1409
+ "elapsed": 5,
1410
  "idle_time": 0,
1411
+ "exit_code": -9,
1412
+ "retries": 1,
1413
+ "log_file": "/tmp/gpu_queue/job_116_as_sorl_abs100_K1_zipf5.0_100K_gpu2.log"
1414
  },
1415
  {
1416
  "job_id": 117,
1417
  "name": "as_sorl_abs100_K4_zipf5.0_100K",
1418
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 100 --K 4 --a",
1419
+ "gpu": 1,
1420
+ "status": "failed",
1421
+ "elapsed": 5,
1422
+ "idle_time": 44,
1423
+ "exit_code": -9,
1424
+ "retries": 1,
1425
+ "log_file": "/tmp/gpu_queue/job_117_as_sorl_abs100_K4_zipf5.0_100K_gpu1.log"
1426
  },
1427
  {
1428
  "job_id": 118,
1429
  "name": "as_sorl_abs100_K1_zipf10.0_100K",
1430
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 100 --K 1 --a",
1431
+ "gpu": 0,
1432
+ "status": "failed",
1433
+ "elapsed": 5,
1434
+ "idle_time": 36,
1435
+ "exit_code": -9,
1436
+ "retries": 1,
1437
+ "log_file": "/tmp/gpu_queue/job_118_as_sorl_abs100_K1_zipf10.0_100_gpu0.log"
1438
  },
1439
  {
1440
  "job_id": 119,
1441
  "name": "as_sorl_abs100_K4_zipf10.0_100K",
1442
  "cmd": "python -m arithmetic.train --mode sorl --ops add_sub --dataset_size 100000 --abs_vocab 100 --K 4 --a",
1443
+ "gpu": 0,
1444
+ "status": "failed",
1445
+ "elapsed": 6,
1446
+ "idle_time": 30,
1447
+ "exit_code": -9,
1448
+ "retries": 1,
1449
+ "log_file": "/tmp/gpu_queue/job_119_as_sorl_abs100_K4_zipf10.0_100_gpu0.log"
1450
  }
1451
  ]
1452
  }