JoshuaFreeman commited on
Commit
cc8e933
·
verified ·
1 Parent(s): f46523a

Upload training_log.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. training_log.json +2034 -954
training_log.json CHANGED
@@ -1,1442 +1,2522 @@
1
  [
2
  {
3
  "update": 5,
4
- "global_step": 10240,
5
- "num_episodes": 5,
6
- "mean_reward": -2.121724510192871,
7
- "mean_length": 337.8,
8
- "loss": 2.8168609142303467,
9
- "sps": 228.7546335682378
10
  },
11
  {
12
  "update": 10,
13
- "global_step": 20480,
14
- "num_episodes": 18,
15
- "mean_reward": -1.9787551561991374,
16
- "mean_length": 282.94444444444446,
17
- "loss": 0.21835778653621674,
18
- "sps": 1443.696434210723
19
  },
20
  {
21
  "update": 15,
22
- "global_step": 30720,
23
- "num_episodes": 18,
24
- "mean_reward": -1.9787551561991374,
25
- "mean_length": 282.94444444444446,
26
- "loss": 0.2217431664466858,
27
- "sps": 2444.104440024094
28
  },
29
  {
30
  "update": 20,
31
- "global_step": 40960,
32
- "num_episodes": 22,
33
- "mean_reward": 0.6390173651955344,
34
- "mean_length": 1597.090909090909,
35
- "loss": 0.5035408735275269,
36
- "sps": 769.1328942825502
37
  },
38
  {
39
  "update": 25,
40
- "global_step": 51200,
41
- "num_episodes": 27,
42
- "mean_reward": 0.33698498761212387,
43
- "mean_length": 1341.111111111111,
44
- "loss": 0.16991910338401794,
45
- "sps": 2449.665853907546
46
  },
47
  {
48
  "update": 30,
49
- "global_step": 61440,
50
- "num_episodes": 29,
51
- "mean_reward": 0.5858152652608937,
52
- "mean_length": 1594.9310344827586,
53
- "loss": 0.6312248110771179,
54
- "sps": 2363.5499282399865
55
  },
56
  {
57
  "update": 35,
58
- "global_step": 71680,
59
- "num_episodes": 29,
60
- "mean_reward": 0.5858152652608937,
61
- "mean_length": 1594.9310344827586,
62
- "loss": 0.4890201985836029,
63
- "sps": 2440.672480568945
64
  },
65
  {
66
  "update": 40,
67
- "global_step": 81920,
68
- "num_episodes": 31,
69
- "mean_reward": 1.2613418640628937,
70
- "mean_length": 1822.0,
71
- "loss": 0.42574575543403625,
72
- "sps": 940.0717318496648
73
  },
74
  {
75
  "update": 45,
76
- "global_step": 92160,
77
- "num_episodes": 55,
78
- "mean_reward": 2.8606436122547496,
79
- "mean_length": 1469.2727272727273,
80
- "loss": 0.5428293943405151,
81
- "sps": 1479.6348998060785
82
  },
83
  {
84
  "update": 50,
85
- "global_step": 102400,
86
- "num_episodes": 58,
87
- "mean_reward": 3.2600761939739358,
88
- "mean_length": 1568.051724137931,
89
- "loss": 0.6221064329147339,
90
- "sps": 2412.4523346060355
91
  },
92
  {
93
  "update": 55,
94
- "global_step": 112640,
95
- "num_episodes": 58,
96
- "mean_reward": 3.2600761939739358,
97
- "mean_length": 1568.051724137931,
98
- "loss": 0.794475257396698,
99
- "sps": 2405.0877911326343
100
  },
101
  {
102
  "update": 60,
103
- "global_step": 122880,
104
- "num_episodes": 58,
105
- "mean_reward": 3.2600761939739358,
106
- "mean_length": 1568.051724137931,
107
- "loss": 1.0936992168426514,
108
- "sps": 2358.874803208204
109
  },
110
  {
111
  "update": 65,
112
- "global_step": 133120,
113
- "num_episodes": 62,
114
- "mean_reward": 7.5118274227265385,
115
- "mean_length": 1954.4516129032259,
116
- "loss": 1.877166986465454,
117
- "sps": 2402.6606220225244
118
  },
119
  {
120
  "update": 70,
121
- "global_step": 143360,
122
- "num_episodes": 63,
123
- "mean_reward": 9.12682440924266,
124
- "mean_length": 2082.15873015873,
125
- "loss": 0.55415278673172,
126
- "sps": 2417.4656020731263
127
  },
128
  {
129
  "update": 75,
130
- "global_step": 153600,
131
- "num_episodes": 63,
132
- "mean_reward": 9.12682440924266,
133
- "mean_length": 2082.15873015873,
134
- "loss": 0.6123632788658142,
135
- "sps": 2415.520694055524
136
  },
137
  {
138
  "update": 80,
139
- "global_step": 163840,
140
- "num_episodes": 63,
141
- "mean_reward": 9.12682440924266,
142
- "mean_length": 2082.15873015873,
143
- "loss": 0.3689587116241455,
144
- "sps": 2464.6240808517623
145
  },
146
  {
147
  "update": 85,
148
- "global_step": 174080,
149
- "num_episodes": 70,
150
- "mean_reward": 9.247220189230783,
151
- "mean_length": 2310.557142857143,
152
- "loss": 0.3228328824043274,
153
- "sps": 2445.2962822104055
154
  },
155
  {
156
  "update": 90,
157
- "global_step": 184320,
158
- "num_episodes": 76,
159
- "mean_reward": 9.205821903128372,
160
- "mean_length": 2268.3026315789475,
161
- "loss": 0.6627960205078125,
162
- "sps": 453.4183326926133
163
  },
164
  {
165
  "update": 95,
166
- "global_step": 194560,
167
- "num_episodes": 80,
168
- "mean_reward": 9.297839653491973,
169
- "mean_length": 2165.9375,
170
- "loss": 1.1097347736358643,
171
- "sps": 2424.5065546850346
172
  },
173
  {
174
  "update": 100,
175
- "global_step": 204800,
176
- "num_episodes": 80,
177
- "mean_reward": 9.297839653491973,
178
- "mean_length": 2165.9375,
179
- "loss": 0.38644012808799744,
180
- "sps": 2386.6710469869367
181
  },
182
  {
183
  "update": 105,
184
- "global_step": 215040,
185
- "num_episodes": 87,
186
- "mean_reward": 11.394574307847297,
187
- "mean_length": 2339.057471264368,
188
- "loss": 0.7209265828132629,
189
- "sps": 2442.8262241524835
190
  },
191
  {
192
  "update": 110,
193
- "global_step": 225280,
194
- "num_episodes": 87,
195
- "mean_reward": 11.394574307847297,
196
- "mean_length": 2339.057471264368,
197
- "loss": 2.1218621730804443,
198
- "sps": 2449.9984290228053
199
  },
200
  {
201
  "update": 115,
202
- "global_step": 235520,
203
- "num_episodes": 90,
204
- "mean_reward": 12.198311275906033,
205
- "mean_length": 2375.788888888889,
206
- "loss": 0.5268573760986328,
207
- "sps": 2438.632604283566
208
  },
209
  {
210
  "update": 120,
211
- "global_step": 245760,
212
- "num_episodes": 90,
213
- "mean_reward": 12.198311275906033,
214
- "mean_length": 2375.788888888889,
215
- "loss": 2.0001754760742188,
216
- "sps": 2413.1198849334774
217
  },
218
  {
219
  "update": 125,
220
- "global_step": 256000,
221
- "num_episodes": 105,
222
- "mean_reward": 12.691367435455323,
223
- "mean_length": 2446.14,
224
- "loss": 0.6336287260055542,
225
- "sps": 619.7035033065293
226
  },
227
  {
228
  "update": 130,
229
- "global_step": 266240,
230
- "num_episodes": 105,
231
- "mean_reward": 12.691367435455323,
232
- "mean_length": 2446.14,
233
- "loss": 0.40229156613349915,
234
- "sps": 2411.1751921586524
235
  },
236
  {
237
  "update": 135,
238
- "global_step": 276480,
239
- "num_episodes": 109,
240
- "mean_reward": 13.886511907577514,
241
- "mean_length": 2526.42,
242
- "loss": 0.45471617579460144,
243
- "sps": 2450.150074118317
244
  },
245
  {
246
  "update": 140,
247
- "global_step": 286720,
248
- "num_episodes": 109,
249
- "mean_reward": 13.886511907577514,
250
- "mean_length": 2526.42,
251
- "loss": 0.4323250949382782,
252
- "sps": 2378.2401254524298
253
  },
254
  {
255
  "update": 145,
256
- "global_step": 296960,
257
- "num_episodes": 121,
258
- "mean_reward": 14.005602750778198,
259
- "mean_length": 2623.1,
260
- "loss": 0.5164788365364075,
261
- "sps": 1792.6912302713768
262
  },
263
  {
264
  "update": 150,
265
- "global_step": 307200,
266
- "num_episodes": 121,
267
- "mean_reward": 14.005602750778198,
268
- "mean_length": 2623.1,
269
- "loss": 0.8702018857002258,
270
- "sps": 2425.950637318183
271
  },
272
  {
273
  "update": 155,
274
- "global_step": 317440,
275
- "num_episodes": 129,
276
- "mean_reward": 14.87307053565979,
277
- "mean_length": 2527.32,
278
- "loss": 0.1953483670949936,
279
- "sps": 2394.950934358607
280
  },
281
  {
282
  "update": 160,
283
- "global_step": 327680,
284
  "num_episodes": 129,
285
- "mean_reward": 14.87307053565979,
286
- "mean_length": 2527.32,
287
- "loss": 0.19992592930793762,
288
- "sps": 2424.040624859784
289
  },
290
  {
291
  "update": 165,
292
- "global_step": 337920,
293
- "num_episodes": 139,
294
- "mean_reward": 16.099841012954712,
295
- "mean_length": 2525.93,
296
- "loss": 0.4464109241962433,
297
- "sps": 1319.6508401584945
298
  },
299
  {
300
  "update": 170,
301
- "global_step": 348160,
302
  "num_episodes": 139,
303
- "mean_reward": 16.099841012954712,
304
- "mean_length": 2525.93,
305
- "loss": 0.10373280942440033,
306
- "sps": 2348.0679386786046
307
  },
308
  {
309
  "update": 175,
310
- "global_step": 358400,
311
- "num_episodes": 142,
312
- "mean_reward": 17.19686735153198,
313
- "mean_length": 2624.69,
314
- "loss": 0.8095718026161194,
315
- "sps": 2386.2480483453646
316
  },
317
  {
318
  "update": 180,
319
- "global_step": 368640,
320
- "num_episodes": 142,
321
- "mean_reward": 17.19686735153198,
322
- "mean_length": 2624.69,
323
- "loss": 0.009909076616168022,
324
- "sps": 2301.590679746089
325
  },
326
  {
327
  "update": 185,
328
- "global_step": 378880,
329
- "num_episodes": 148,
330
- "mean_reward": 19.868873529434204,
331
- "mean_length": 2907.66,
332
- "loss": 1.7244997024536133,
333
- "sps": 2280.4958488818847
334
  },
335
  {
336
  "update": 190,
337
- "global_step": 389120,
338
- "num_episodes": 148,
339
- "mean_reward": 19.868873529434204,
340
- "mean_length": 2907.66,
341
- "loss": 4.516170978546143,
342
- "sps": 2419.553348740851
343
  },
344
  {
345
  "update": 195,
346
- "global_step": 399360,
347
- "num_episodes": 152,
348
- "mean_reward": 20.717632093429565,
349
- "mean_length": 3008.33,
350
- "loss": 3.401921272277832,
351
- "sps": 575.3298806579921
352
  },
353
  {
354
  "update": 200,
355
- "global_step": 409600,
356
- "num_episodes": 153,
357
- "mean_reward": 20.717632093429565,
358
- "mean_length": 3008.33,
359
- "loss": 0.8132290244102478,
360
- "sps": 2450.768029758656
361
  },
362
  {
363
  "update": 205,
364
- "global_step": 419840,
365
- "num_episodes": 162,
366
- "mean_reward": 19.608867626190186,
367
- "mean_length": 2909.79,
368
- "loss": 1.867810845375061,
369
- "sps": 621.2303877555788
370
  },
371
  {
372
  "update": 210,
373
- "global_step": 430080,
374
- "num_episodes": 167,
375
- "mean_reward": 19.028235759735107,
376
- "mean_length": 2619.36,
377
- "loss": 0.8309984803199768,
378
- "sps": 2358.6319149765754
379
  },
380
  {
381
  "update": 215,
382
- "global_step": 440320,
383
- "num_episodes": 169,
384
- "mean_reward": 19.63686834335327,
385
- "mean_length": 2715.53,
386
- "loss": 0.17013275623321533,
387
- "sps": 1381.2913503327266
388
  },
389
  {
390
  "update": 220,
391
- "global_step": 450560,
392
- "num_episodes": 170,
393
- "mean_reward": 19.778069705963134,
394
- "mean_length": 2619.36,
395
- "loss": -0.013407116755843163,
396
- "sps": 2225.527904161147
397
  },
398
  {
399
  "update": 225,
400
- "global_step": 460800,
401
- "num_episodes": 172,
402
- "mean_reward": 21.13547016143799,
403
- "mean_length": 2718.42,
404
- "loss": 0.14364227652549744,
405
- "sps": 2345.342614298781
406
  },
407
  {
408
  "update": 230,
409
- "global_step": 471040,
410
- "num_episodes": 174,
411
- "mean_reward": 22.236838788986205,
412
- "mean_length": 2819.34,
413
- "loss": 0.017919447273015976,
414
- "sps": 2408.348085488877
415
  },
416
  {
417
  "update": 235,
418
- "global_step": 481280,
419
- "num_episodes": 174,
420
- "mean_reward": 22.236838788986205,
421
- "mean_length": 2819.34,
422
- "loss": 2.6431455612182617,
423
- "sps": 2403.528538830992
424
  },
425
  {
426
  "update": 240,
427
- "global_step": 491520,
428
- "num_episodes": 178,
429
- "mean_reward": 21.810234026908873,
430
- "mean_length": 2928.26,
431
- "loss": 0.2556005120277405,
432
- "sps": 2373.833411448431
433
  },
434
  {
435
  "update": 245,
436
- "global_step": 501760,
437
  "num_episodes": 181,
438
- "mean_reward": 23.416525321006773,
439
- "mean_length": 3026.48,
440
- "loss": 3.2893059253692627,
441
- "sps": 2362.3097327902087
442
  },
443
  {
444
  "update": 250,
445
- "global_step": 512000,
446
- "num_episodes": 182,
447
- "mean_reward": 23.666512427330016,
448
- "mean_length": 3026.48,
449
- "loss": 4.634244441986084,
450
- "sps": 2410.981639631563
451
  },
452
  {
453
  "update": 255,
454
- "global_step": 522240,
455
- "num_episodes": 182,
456
- "mean_reward": 23.666512427330016,
457
- "mean_length": 3026.48,
458
- "loss": 1.1017451286315918,
459
- "sps": 2373.168403411863
460
  },
461
  {
462
  "update": 260,
463
- "global_step": 532480,
464
  "num_episodes": 188,
465
- "mean_reward": 24.153521933555602,
466
- "mean_length": 3032.56,
467
- "loss": 0.058166228234767914,
468
- "sps": 549.3090191152681
469
  },
470
  {
471
  "update": 265,
472
- "global_step": 542720,
473
- "num_episodes": 200,
474
- "mean_reward": 24.202102789878847,
475
- "mean_length": 2827.14,
476
- "loss": 3.6524577140808105,
477
- "sps": 2408.936350313052
478
  },
479
  {
480
  "update": 270,
481
- "global_step": 552960,
482
- "num_episodes": 203,
483
- "mean_reward": 25.780276198387146,
484
- "mean_length": 2924.21,
485
- "loss": -0.03986305743455887,
486
- "sps": 2384.8038232666454
487
  },
488
  {
489
  "update": 275,
490
- "global_step": 563200,
491
- "num_episodes": 203,
492
- "mean_reward": 25.780276198387146,
493
- "mean_length": 2924.21,
494
- "loss": 0.009281929582357407,
495
- "sps": 2245.2843029907467
496
  },
497
  {
498
  "update": 280,
499
- "global_step": 573440,
500
- "num_episodes": 204,
501
- "mean_reward": 26.63669138431549,
502
- "mean_length": 3023.78,
503
- "loss": 0.1620613932609558,
504
- "sps": 2155.8537501850938
505
  },
506
  {
507
  "update": 285,
508
- "global_step": 583680,
509
- "num_episodes": 208,
510
- "mean_reward": 27.24311915874481,
511
- "mean_length": 3124.7,
512
- "loss": 0.7804455161094666,
513
- "sps": 2336.540118617715
514
  },
515
  {
516
  "update": 290,
517
- "global_step": 593920,
518
- "num_episodes": 210,
519
- "mean_reward": 27.990238661766053,
520
- "mean_length": 3122.84,
521
- "loss": -0.02732567861676216,
522
- "sps": 2446.515068539969
523
  },
524
  {
525
  "update": 295,
526
- "global_step": 604160,
527
- "num_episodes": 210,
528
- "mean_reward": 27.990238661766053,
529
- "mean_length": 3122.84,
530
- "loss": 0.026210829615592957,
531
- "sps": 2401.0219632291446
532
  },
533
  {
534
  "update": 300,
535
- "global_step": 614400,
536
  "num_episodes": 212,
537
- "mean_reward": 28.8441028547287,
538
- "mean_length": 3221.9,
539
- "loss": 0.018815483897924423,
540
- "sps": 1787.985104867478
541
  },
542
  {
543
  "update": 305,
544
- "global_step": 624640,
545
- "num_episodes": 219,
546
- "mean_reward": 29.601799569129945,
547
- "mean_length": 3224.78,
548
- "loss": -0.01376257836818695,
549
- "sps": 2307.873344880848
550
  },
551
  {
552
  "update": 310,
553
- "global_step": 634880,
554
- "num_episodes": 223,
555
- "mean_reward": 30.112299256324768,
556
- "mean_length": 3222.81,
557
- "loss": -0.13672460615634918,
558
- "sps": 2376.26116166583
559
  },
560
  {
561
  "update": 315,
562
- "global_step": 645120,
563
- "num_episodes": 223,
564
- "mean_reward": 30.112299256324768,
565
- "mean_length": 3222.81,
566
- "loss": 0.24129757285118103,
567
- "sps": 2301.537029038706
568
  },
569
  {
570
  "update": 320,
571
- "global_step": 655360,
572
- "num_episodes": 224,
573
- "mean_reward": 30.965999827384948,
574
- "mean_length": 3320.52,
575
- "loss": 0.3078324496746063,
576
- "sps": 2359.266120378419
577
  },
578
  {
579
  "update": 325,
580
- "global_step": 665600,
581
- "num_episodes": 230,
582
- "mean_reward": 31.090740485191347,
583
- "mean_length": 3419.33,
584
- "loss": 0.002869449555873871,
585
- "sps": 2345.5360183144344
586
  },
587
  {
588
  "update": 330,
589
- "global_step": 675840,
590
- "num_episodes": 233,
591
- "mean_reward": 31.204454226493837,
592
- "mean_length": 3514.15,
593
- "loss": 0.8130831718444824,
594
- "sps": 2348.1224969342966
595
  },
596
  {
597
  "update": 335,
598
- "global_step": 686080,
599
- "num_episodes": 233,
600
- "mean_reward": 31.204454226493837,
601
- "mean_length": 3514.15,
602
- "loss": -0.10025477409362793,
603
- "sps": 2203.7073854518803
604
  },
605
  {
606
  "update": 340,
607
- "global_step": 696320,
608
- "num_episodes": 236,
609
- "mean_reward": 30.600520968437195,
610
- "mean_length": 3419.33,
611
- "loss": 0.11422204971313477,
612
- "sps": 1223.5079096329264
613
  },
614
  {
615
  "update": 345,
616
- "global_step": 706560,
617
- "num_episodes": 244,
618
- "mean_reward": 29.116036429405213,
619
- "mean_length": 3431.39,
620
- "loss": 0.3314732015132904,
621
- "sps": 604.6873121204968
622
  },
623
  {
624
  "update": 350,
625
- "global_step": 716800,
626
- "num_episodes": 248,
627
- "mean_reward": 27.764892373085022,
628
- "mean_length": 3338.43,
629
- "loss": -0.08348219841718674,
630
- "sps": 2334.652125215495
631
  },
632
  {
633
  "update": 355,
634
- "global_step": 727040,
635
- "num_episodes": 248,
636
- "mean_reward": 27.764892373085022,
637
- "mean_length": 3338.43,
638
- "loss": -0.011928500607609749,
639
- "sps": 2266.898774941684
640
  },
641
  {
642
  "update": 360,
643
- "global_step": 737280,
644
- "num_episodes": 251,
645
- "mean_reward": 27.271975588798522,
646
- "mean_length": 3335.03,
647
- "loss": 0.16195061802864075,
648
- "sps": 511.5670473274587
649
  },
650
  {
651
  "update": 365,
652
- "global_step": 747520,
653
  "num_episodes": 261,
654
- "mean_reward": 25.41567024707794,
655
- "mean_length": 3238.35,
656
- "loss": -0.17278775572776794,
657
- "sps": 810.6621085529184
658
  },
659
  {
660
  "update": 370,
661
- "global_step": 757760,
662
- "num_episodes": 274,
663
- "mean_reward": 21.383243069648742,
664
- "mean_length": 2943.09,
665
- "loss": -0.017205242067575455,
666
- "sps": 619.3760616179322
667
  },
668
  {
669
  "update": 375,
670
- "global_step": 768000,
671
- "num_episodes": 274,
672
- "mean_reward": 21.383243069648742,
673
- "mean_length": 2943.09,
674
- "loss": 0.08961383998394012,
675
- "sps": 2236.482803815745
676
  },
677
  {
678
  "update": 380,
679
- "global_step": 778240,
680
- "num_episodes": 274,
681
- "mean_reward": 21.383243069648742,
682
- "mean_length": 2943.09,
683
- "loss": 4.401951313018799,
684
- "sps": 2147.399362575019
685
  },
686
  {
687
  "update": 385,
688
- "global_step": 788480,
689
- "num_episodes": 278,
690
- "mean_reward": 22.28250171661377,
691
- "mean_length": 3028.1,
692
- "loss": 1.4729293584823608,
693
- "sps": 2300.4546825723296
694
  },
695
  {
696
  "update": 390,
697
- "global_step": 798720,
698
- "num_episodes": 280,
699
- "mean_reward": 21.386626567840576,
700
- "mean_length": 3127.16,
701
- "loss": 0.06943056732416153,
702
- "sps": 2341.8789187579932
703
  },
704
  {
705
  "update": 395,
706
- "global_step": 808960,
707
- "num_episodes": 280,
708
- "mean_reward": 21.386626567840576,
709
- "mean_length": 3127.16,
710
- "loss": 0.929630696773529,
711
- "sps": 2232.925422842351
712
  },
713
  {
714
  "update": 400,
715
- "global_step": 819200,
716
- "num_episodes": 280,
717
- "mean_reward": 21.386626567840576,
718
- "mean_length": 3127.16,
719
- "loss": -0.035214584320783615,
720
- "sps": 2187.4650402736006
721
  },
722
  {
723
  "update": 405,
724
- "global_step": 829440,
725
- "num_episodes": 284,
726
- "mean_reward": 21.765649213790894,
727
- "mean_length": 3031.31,
728
- "loss": 3.6039881706237793,
729
- "sps": 2344.35431842087
730
  },
731
  {
732
  "update": 410,
733
- "global_step": 839680,
734
- "num_episodes": 288,
735
- "mean_reward": 23.637630491256715,
736
- "mean_length": 3126.66,
737
- "loss": 0.8354597091674805,
738
- "sps": 1416.6940593723025
739
  },
740
  {
741
  "update": 415,
742
- "global_step": 849920,
743
- "num_episodes": 288,
744
- "mean_reward": 23.637630491256715,
745
- "mean_length": 3126.66,
746
- "loss": 1.1654760837554932,
747
- "sps": 2444.537765977125
748
  },
749
  {
750
  "update": 420,
751
- "global_step": 860160,
752
- "num_episodes": 288,
753
- "mean_reward": 23.637630491256715,
754
- "mean_length": 3126.66,
755
- "loss": 0.7069441676139832,
756
- "sps": 2402.946945101056
757
  },
758
  {
759
  "update": 425,
760
- "global_step": 870400,
761
- "num_episodes": 296,
762
- "mean_reward": 26.219414005279543,
763
- "mean_length": 3426.79,
764
- "loss": 1.0991233587265015,
765
- "sps": 1584.3855114127348
766
  },
767
  {
768
  "update": 430,
769
- "global_step": 880640,
770
- "num_episodes": 302,
771
- "mean_reward": 24.143256397247313,
772
- "mean_length": 3329.74,
773
- "loss": 0.5094761252403259,
774
- "sps": 1460.1652484484139
775
  },
776
  {
777
  "update": 435,
778
- "global_step": 890880,
779
- "num_episodes": 302,
780
- "mean_reward": 24.143256397247313,
781
- "mean_length": 3329.74,
782
- "loss": 1.6765072345733643,
783
- "sps": 2365.579425966714
784
  },
785
  {
786
  "update": 440,
787
- "global_step": 901120,
788
- "num_episodes": 302,
789
- "mean_reward": 24.143256397247313,
790
- "mean_length": 3329.74,
791
- "loss": -0.044437870383262634,
792
- "sps": 2282.7314942544617
793
  },
794
  {
795
  "update": 445,
796
- "global_step": 911360,
797
- "num_episodes": 308,
798
- "mean_reward": 23.534629821777344,
799
- "mean_length": 3238.0,
800
- "loss": 0.4068017303943634,
801
- "sps": 865.914540942848
802
  },
803
  {
804
  "update": 450,
805
- "global_step": 921600,
806
- "num_episodes": 314,
807
- "mean_reward": 23.246068153381348,
808
- "mean_length": 3138.43,
809
- "loss": 0.32406821846961975,
810
- "sps": 830.3529260738604
811
  },
812
  {
813
  "update": 455,
814
- "global_step": 931840,
815
- "num_episodes": 316,
816
- "mean_reward": 22.337194681167603,
817
- "mean_length": 3057.38,
818
- "loss": 0.21466466784477234,
819
- "sps": 2411.145412824079
820
  },
821
  {
822
  "update": 460,
823
- "global_step": 942080,
824
- "num_episodes": 316,
825
- "mean_reward": 22.337194681167603,
826
- "mean_length": 3057.38,
827
- "loss": 4.0019731521606445,
828
- "sps": 2372.8288290677337
829
  },
830
  {
831
  "update": 465,
832
- "global_step": 952320,
833
- "num_episodes": 321,
834
- "mean_reward": 22.935798473358155,
835
- "mean_length": 3155.68,
836
- "loss": 3.8567330837249756,
837
- "sps": 796.2439424516102
838
  },
839
  {
840
  "update": 470,
841
- "global_step": 962560,
842
- "num_episodes": 326,
843
- "mean_reward": 23.01468356132507,
844
- "mean_length": 3080.66,
845
- "loss": 0.04481935873627663,
846
- "sps": 2363.29177191282
847
  },
848
  {
849
  "update": 475,
850
- "global_step": 972800,
851
- "num_episodes": 329,
852
- "mean_reward": 24.00417799949646,
853
- "mean_length": 3081.1,
854
- "loss": -0.09712222218513489,
855
- "sps": 2323.0109350014427
856
  },
857
  {
858
  "update": 480,
859
- "global_step": 983040,
860
- "num_episodes": 329,
861
- "mean_reward": 24.00417799949646,
862
- "mean_length": 3081.1,
863
- "loss": 0.29562610387802124,
864
- "sps": 2249.6786763931746
865
  },
866
  {
867
  "update": 485,
868
- "global_step": 993280,
869
- "num_episodes": 330,
870
- "mean_reward": 24.60798607826233,
871
- "mean_length": 3177.27,
872
- "loss": 0.02162332460284233,
873
- "sps": 2222.1627608215495
874
  },
875
  {
876
  "update": 490,
877
- "global_step": 1003520,
878
- "num_episodes": 334,
879
- "mean_reward": 25.107961702346802,
880
- "mean_length": 3177.27,
881
- "loss": 0.30668944120407104,
882
- "sps": 2336.9965399128855
883
  },
884
  {
885
  "update": 495,
886
- "global_step": 1013760,
887
- "num_episodes": 337,
888
- "mean_reward": 25.60792893409729,
889
- "mean_length": 3175.92,
890
- "loss": 0.40231630206108093,
891
- "sps": 1175.8081438816857
892
  },
893
  {
894
  "update": 500,
895
- "global_step": 1024000,
896
- "num_episodes": 337,
897
- "mean_reward": 25.60792893409729,
898
- "mean_length": 3175.92,
899
- "loss": 0.33998745679855347,
900
- "sps": 2419.5867438311334
901
  },
902
  {
903
  "update": 505,
904
- "global_step": 1034240,
905
- "num_episodes": 340,
906
- "mean_reward": 25.602466859817504,
907
- "mean_length": 3171.49,
908
- "loss": 0.09274712204933167,
909
- "sps": 2409.938615394154
910
  },
911
  {
912
  "update": 510,
913
- "global_step": 1044480,
914
- "num_episodes": 346,
915
- "mean_reward": 25.201329984664916,
916
- "mean_length": 3263.94,
917
- "loss": 0.2879057228565216,
918
- "sps": 976.7251772331928
919
  },
920
  {
921
  "update": 515,
922
- "global_step": 1054720,
923
- "num_episodes": 347,
924
- "mean_reward": 25.555161905288696,
925
- "mean_length": 3361.65,
926
- "loss": 0.39604732394218445,
927
- "sps": 2406.938365656791
928
  },
929
  {
930
  "update": 520,
931
- "global_step": 1064960,
932
- "num_episodes": 347,
933
- "mean_reward": 25.555161905288696,
934
- "mean_length": 3361.65,
935
- "loss": 0.03350641578435898,
936
- "sps": 2384.9415454427835
937
  },
938
  {
939
  "update": 525,
940
- "global_step": 1075200,
941
- "num_episodes": 348,
942
- "mean_reward": 26.658946142196655,
943
- "mean_length": 3459.36,
944
- "loss": 0.8593693971633911,
945
- "sps": 2415.202845406159
946
  },
947
  {
948
  "update": 530,
949
- "global_step": 1085440,
950
- "num_episodes": 355,
951
- "mean_reward": 27.258036041259764,
952
- "mean_length": 3553.8,
953
- "loss": 0.4878949820995331,
954
- "sps": 710.1710154431543
955
  },
956
  {
957
  "update": 535,
958
- "global_step": 1095680,
959
- "num_episodes": 368,
960
- "mean_reward": 28.800746297836305,
961
- "mean_length": 3371.19,
962
- "loss": 0.21109388768672943,
963
- "sps": 532.1724281533873
964
  },
965
  {
966
  "update": 540,
967
- "global_step": 1105920,
968
- "num_episodes": 371,
969
- "mean_reward": 29.045905199050903,
970
- "mean_length": 3369.84,
971
- "loss": 0.14603528380393982,
972
- "sps": 2335.2550876461346
973
  },
974
  {
975
  "update": 545,
976
- "global_step": 1116160,
977
- "num_episodes": 373,
978
- "mean_reward": 29.899697828292847,
979
- "mean_length": 3466.01,
980
- "loss": -0.04600970447063446,
981
- "sps": 2305.921805837758
982
  },
983
  {
984
  "update": 550,
985
- "global_step": 1126400,
986
- "num_episodes": 374,
987
- "mean_reward": 31.003479852676392,
988
- "mean_length": 3565.07,
989
- "loss": 0.11138609051704407,
990
- "sps": 2400.3530395714765
991
  },
992
  {
993
  "update": 555,
994
- "global_step": 1136640,
995
- "num_episodes": 377,
996
- "mean_reward": 31.149640073776244,
997
- "mean_length": 3467.36,
998
- "loss": 0.35408008098602295,
999
- "sps": 1863.2471573876685
1000
  },
1001
  {
1002
  "update": 560,
1003
- "global_step": 1146880,
1004
- "num_episodes": 378,
1005
- "mean_reward": 31.50347306251526,
1006
- "mean_length": 3563.53,
1007
- "loss": 0.03238815441727638,
1008
- "sps": 2410.263871749416
1009
  },
1010
  {
1011
  "update": 565,
1012
- "global_step": 1157120,
1013
- "num_episodes": 379,
1014
- "mean_reward": 31.503473825454712,
1015
- "mean_length": 3563.53,
1016
- "loss": 1.2669872045516968,
1017
- "sps": 2435.9597448206396
1018
  },
1019
  {
1020
  "update": 570,
1021
- "global_step": 1167360,
1022
- "num_episodes": 381,
1023
- "mean_reward": 30.14463397026062,
1024
- "mean_length": 3465.82,
1025
- "loss": 1.1442028284072876,
1026
- "sps": 2364.850671069342
1027
  },
1028
  {
1029
  "update": 575,
1030
- "global_step": 1177600,
1031
- "num_episodes": 387,
1032
- "mean_reward": 27.80320453643799,
1033
- "mean_length": 3366.44,
1034
- "loss": 0.08564125001430511,
1035
- "sps": 566.4595364452883
1036
  },
1037
  {
1038
  "update": 580,
1039
- "global_step": 1187840,
1040
- "num_episodes": 389,
1041
- "mean_reward": 27.20208529472351,
1042
- "mean_length": 3266.87,
1043
- "loss": 0.14057211577892303,
1044
- "sps": 2297.6598442501095
1045
  },
1046
  {
1047
  "update": 585,
1048
- "global_step": 1198080,
1049
- "num_episodes": 396,
1050
- "mean_reward": 26.080838441848755,
1051
- "mean_length": 3169.09,
1052
- "loss": 0.0875428318977356,
1053
- "sps": 2440.645435446852
1054
  },
1055
  {
1056
  "update": 590,
1057
- "global_step": 1208320,
1058
- "num_episodes": 403,
1059
- "mean_reward": 24.67230319023132,
1060
- "mean_length": 3069.72,
1061
- "loss": -0.005318094044923782,
1062
- "sps": 2415.1756827351583
1063
  },
1064
  {
1065
  "update": 595,
1066
- "global_step": 1218560,
1067
- "num_episodes": 406,
1068
- "mean_reward": 23.778743057250978,
1069
- "mean_length": 3164.03,
1070
- "loss": 0.07221215218305588,
1071
- "sps": 1535.4284659165353
1072
  },
1073
  {
1074
  "update": 600,
1075
- "global_step": 1228800,
1076
- "num_episodes": 413,
1077
- "mean_reward": 22.0106067276001,
1078
- "mean_length": 2967.58,
1079
- "loss": 0.35672813653945923,
1080
- "sps": 2410.0731702663725
1081
  },
1082
  {
1083
  "update": 605,
1084
- "global_step": 1239040,
1085
- "num_episodes": 418,
1086
- "mean_reward": 20.706662797927855,
1087
- "mean_length": 2849.49,
1088
- "loss": 0.1961039900779724,
1089
- "sps": 2444.0300319234293
1090
  },
1091
  {
1092
  "update": 610,
1093
- "global_step": 1249280,
1094
- "num_episodes": 420,
1095
- "mean_reward": 21.058296146392824,
1096
- "mean_length": 2949.06,
1097
- "loss": 0.20069828629493713,
1098
- "sps": 2409.835173874158
1099
  },
1100
  {
1101
  "update": 615,
1102
- "global_step": 1259520,
1103
- "num_episodes": 420,
1104
- "mean_reward": 21.058296146392824,
1105
- "mean_length": 2949.06,
1106
- "loss": 0.6283493041992188,
1107
- "sps": 2351.264188304028
1108
  },
1109
  {
1110
  "update": 620,
1111
- "global_step": 1269760,
1112
- "num_episodes": 436,
1113
- "mean_reward": 16.700364379882814,
1114
- "mean_length": 2538.45,
1115
- "loss": 0.06421943008899689,
1116
- "sps": 581.6206601898414
1117
  },
1118
  {
1119
  "update": 625,
1120
- "global_step": 1280000,
1121
- "num_episodes": 437,
1122
- "mean_reward": 17.554159069061278,
1123
- "mean_length": 2634.62,
1124
- "loss": 0.025755397975444794,
1125
- "sps": 2336.2122156272303
1126
  },
1127
  {
1128
  "update": 630,
1129
- "global_step": 1290240,
1130
- "num_episodes": 438,
1131
- "mean_reward": 17.804676685333252,
1132
- "mean_length": 2634.62,
1133
- "loss": -0.039572782814502716,
1134
- "sps": 2353.5698753913625
1135
  },
1136
  {
1137
  "update": 635,
1138
- "global_step": 1300480,
1139
- "num_episodes": 438,
1140
- "mean_reward": 17.804676685333252,
1141
- "mean_length": 2634.62,
1142
- "loss": -0.0706489086151123,
1143
- "sps": 2283.6836848035246
1144
  },
1145
  {
1146
  "update": 640,
1147
- "global_step": 1310720,
1148
- "num_episodes": 441,
1149
- "mean_reward": 19.15775371551514,
1150
- "mean_length": 2733.68,
1151
- "loss": -0.09373792260885239,
1152
- "sps": 1474.6357445156093
1153
  },
1154
  {
1155
  "update": 645,
1156
- "global_step": 1320960,
1157
- "num_episodes": 442,
1158
- "mean_reward": 20.157576084136963,
1159
- "mean_length": 2733.68,
1160
- "loss": 0.11351752281188965,
1161
- "sps": 2334.5626594322757
1162
  },
1163
  {
1164
  "update": 650,
1165
- "global_step": 1331200,
1166
- "num_episodes": 444,
1167
- "mean_reward": 20.267150926589967,
1168
- "mean_length": 2833.09,
1169
- "loss": -0.18438173830509186,
1170
- "sps": 2236.677306457589
1171
  },
1172
  {
1173
  "update": 655,
1174
- "global_step": 1341440,
1175
- "num_episodes": 444,
1176
- "mean_reward": 20.267150926589967,
1177
- "mean_length": 2833.09,
1178
- "loss": -0.07986398041248322,
1179
- "sps": 2210.239531580035
1180
  },
1181
  {
1182
  "update": 660,
1183
- "global_step": 1351680,
1184
- "num_episodes": 447,
1185
- "mean_reward": 21.873516216278077,
1186
- "mean_length": 2929.77,
1187
- "loss": -0.05535223335027695,
1188
- "sps": 2385.7304437353546
1189
  },
1190
  {
1191
  "update": 665,
1192
- "global_step": 1361920,
1193
- "num_episodes": 449,
1194
- "mean_reward": 21.519568309783935,
1195
- "mean_length": 2830.71,
1196
- "loss": 0.10434924066066742,
1197
- "sps": 2330.8125431773424
1198
  },
1199
  {
1200
  "update": 670,
1201
- "global_step": 1372160,
1202
- "num_episodes": 451,
1203
- "mean_reward": 22.623257703781128,
1204
- "mean_length": 2927.07,
1205
- "loss": 0.29875361919403076,
1206
- "sps": 2388.3964178785764
1207
  },
1208
  {
1209
  "update": 675,
1210
- "global_step": 1382400,
1211
- "num_episodes": 451,
1212
- "mean_reward": 22.623257703781128,
1213
- "mean_length": 2927.07,
1214
- "loss": 0.01194491982460022,
1215
- "sps": 2360.8138719071453
1216
  },
1217
  {
1218
  "update": 680,
1219
- "global_step": 1392640,
1220
- "num_episodes": 454,
1221
- "mean_reward": 23.234625358581543,
1222
- "mean_length": 3026.64,
1223
- "loss": 0.1673501431941986,
1224
- "sps": 1557.6244924988657
1225
  },
1226
  {
1227
  "update": 685,
1228
- "global_step": 1402880,
1229
- "num_episodes": 458,
1230
- "mean_reward": 23.841174821853638,
1231
- "mean_length": 3124.86,
1232
- "loss": 0.026581741869449615,
1233
- "sps": 2296.8415092422506
1234
  },
1235
  {
1236
  "update": 690,
1237
- "global_step": 1413120,
1238
- "num_episodes": 459,
1239
- "mean_reward": 23.949846239089965,
1240
- "mean_length": 3221.03,
1241
- "loss": 1.637216329574585,
1242
- "sps": 413.9001244167228
1243
  },
1244
  {
1245
  "update": 695,
1246
- "global_step": 1423360,
1247
- "num_episodes": 465,
1248
- "mean_reward": 24.109339590072633,
1249
- "mean_length": 3246.44,
1250
- "loss": 0.4041682779788971,
1251
- "sps": 1973.9302703248002
1252
  },
1253
  {
1254
  "update": 700,
1255
- "global_step": 1433600,
1256
- "num_episodes": 466,
1257
- "mean_reward": 24.21024447441101,
1258
- "mean_length": 3344.24,
1259
- "loss": 0.13472315669059753,
1260
- "sps": 2279.0860535321326
1261
  },
1262
  {
1263
  "update": 705,
1264
- "global_step": 1443840,
1265
- "num_episodes": 468,
1266
- "mean_reward": 24.856354351043702,
1267
- "mean_length": 3440.41,
1268
- "loss": 0.6666802763938904,
1269
- "sps": 2315.9586381196755
1270
  },
1271
  {
1272
  "update": 710,
1273
- "global_step": 1454080,
1274
- "num_episodes": 468,
1275
- "mean_reward": 24.856354351043702,
1276
- "mean_length": 3440.41,
1277
- "loss": -0.15654346346855164,
1278
- "sps": 2242.2811278107365
1279
  },
1280
  {
1281
  "update": 715,
1282
- "global_step": 1464320,
1283
- "num_episodes": 477,
1284
- "mean_reward": 24.00254797935486,
1285
- "mean_length": 3342.7,
1286
- "loss": -0.14158941805362701,
1287
- "sps": 2301.1874356888225
1288
  },
1289
  {
1290
  "update": 720,
1291
- "global_step": 1474560,
1292
- "num_episodes": 480,
1293
- "mean_reward": 24.0372780418396,
1294
- "mean_length": 3146.96,
1295
- "loss": 0.10493358224630356,
1296
- "sps": 843.3732587375952
1297
  },
1298
  {
1299
  "update": 725,
1300
- "global_step": 1484800,
1301
- "num_episodes": 481,
1302
- "mean_reward": 23.896179141998292,
1303
- "mean_length": 3244.67,
1304
- "loss": 0.01014188677072525,
1305
- "sps": 2387.5931465826584
1306
  },
1307
  {
1308
  "update": 730,
1309
- "global_step": 1495040,
1310
- "num_episodes": 481,
1311
- "mean_reward": 23.896179141998292,
1312
- "mean_length": 3244.67,
1313
- "loss": 1.5800291299819946,
1314
- "sps": 2199.482155508925
1315
  },
1316
  {
1317
  "update": 735,
1318
- "global_step": 1505280,
1319
- "num_episodes": 486,
1320
- "mean_reward": 23.005251121520995,
1321
- "mean_length": 3344.42,
1322
- "loss": 2.476745128631592,
1323
- "sps": 555.9822817000502
1324
  },
1325
  {
1326
  "update": 740,
1327
- "global_step": 1515520,
1328
- "num_episodes": 487,
1329
- "mean_reward": 22.755260047912596,
1330
- "mean_length": 3344.42,
1331
- "loss": 0.043340131640434265,
1332
- "sps": 2358.713519842451
1333
  },
1334
  {
1335
  "update": 745,
1336
- "global_step": 1525760,
1337
- "num_episodes": 489,
1338
- "mean_reward": 23.35637879371643,
1339
- "mean_length": 3442.64,
1340
- "loss": 0.8548387885093689,
1341
- "sps": 2384.7462230357132
1342
  },
1343
  {
1344
  "update": 750,
1345
- "global_step": 1536000,
1346
- "num_episodes": 489,
1347
- "mean_reward": 23.35637879371643,
1348
- "mean_length": 3442.64,
1349
- "loss": 0.06128331273794174,
1350
- "sps": 2389.02547098786
1351
  },
1352
  {
1353
  "update": 755,
1354
- "global_step": 1546240,
1355
- "num_episodes": 495,
1356
- "mean_reward": 23.45271695137024,
1357
- "mean_length": 3539.52,
1358
- "loss": 0.010654259473085403,
1359
- "sps": 451.1509132304138
1360
  },
1361
  {
1362
  "update": 760,
1363
- "global_step": 1556480,
1364
- "num_episodes": 498,
1365
- "mean_reward": 24.20286971092224,
1366
- "mean_length": 3541.89,
1367
- "loss": -0.015714531764388084,
1368
- "sps": 2272.008455427166
1369
  },
1370
  {
1371
  "update": 765,
1372
- "global_step": 1566720,
1373
- "num_episodes": 503,
1374
- "mean_reward": 24.520383558273316,
1375
- "mean_length": 3642.43,
1376
- "loss": 1.2781355381011963,
1377
- "sps": 2234.132232505493
1378
  },
1379
  {
1380
  "update": 770,
1381
- "global_step": 1576960,
1382
- "num_episodes": 503,
1383
- "mean_reward": 24.520383558273316,
1384
- "mean_length": 3642.43,
1385
- "loss": 0.005558963865041733,
1386
- "sps": 2252.444167541167
1387
  },
1388
  {
1389
  "update": 775,
1390
- "global_step": 1587200,
1391
- "num_episodes": 508,
1392
- "mean_reward": 26.765025606155394,
1393
- "mean_length": 3642.43,
1394
- "loss": 1.5083853006362915,
1395
- "sps": 1001.7898928543148
1396
  },
1397
  {
1398
  "update": 780,
1399
- "global_step": 1597440,
1400
- "num_episodes": 512,
1401
- "mean_reward": 27.61695213317871,
1402
- "mean_length": 3739.96,
1403
- "loss": -0.03274097293615341,
1404
- "sps": 406.1884387297904
1405
  },
1406
  {
1407
  "update": 785,
1408
- "global_step": 1607680,
1409
- "num_episodes": 517,
1410
- "mean_reward": 29.239461097717285,
1411
- "mean_length": 3749.65,
1412
- "loss": 0.056141212582588196,
1413
- "sps": 711.222362259853
1414
  },
1415
  {
1416
  "update": 790,
1417
- "global_step": 1617920,
1418
- "num_episodes": 517,
1419
- "mean_reward": 29.239461097717285,
1420
- "mean_length": 3749.65,
1421
- "loss": 0.08803388476371765,
1422
- "sps": 2290.465987254353
1423
  },
1424
  {
1425
  "update": 795,
1426
- "global_step": 1628160,
1427
- "num_episodes": 524,
1428
- "mean_reward": 28.533955936431884,
1429
- "mean_length": 3547.65,
1430
- "loss": 5.9130449295043945,
1431
- "sps": 1206.367279137959
1432
  },
1433
  {
1434
  "update": 800,
1435
- "global_step": 1638400,
1436
- "num_episodes": 528,
1437
- "mean_reward": 29.898164215087892,
1438
- "mean_length": 3657.29,
1439
- "loss": 0.8671517372131348,
1440
- "sps": 2301.788037202966
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1441
  }
1442
  ]
 
1
  [
2
  {
3
  "update": 5,
4
+ "global_step": 20480,
5
+ "num_episodes": 12,
6
+ "mean_reward": 2.9954163233439126,
7
+ "mean_length": 301.75,
8
+ "loss": 0.4707077145576477,
9
+ "sps": 2897.2289066048047
10
  },
11
  {
12
  "update": 10,
13
+ "global_step": 40960,
14
+ "num_episodes": 12,
15
+ "mean_reward": 2.9954163233439126,
16
+ "mean_length": 301.75,
17
+ "loss": 1.0232033729553223,
18
+ "sps": 2951.2458688255706
19
  },
20
  {
21
  "update": 15,
22
+ "global_step": 61440,
23
+ "num_episodes": 12,
24
+ "mean_reward": 2.9954163233439126,
25
+ "mean_length": 301.75,
26
+ "loss": 1.3049204349517822,
27
+ "sps": 2822.044769995029
28
  },
29
  {
30
  "update": 20,
31
+ "global_step": 81920,
32
+ "num_episodes": 16,
33
+ "mean_reward": 6.446762174367905,
34
+ "mean_length": 2105.625,
35
+ "loss": 3.682100772857666,
36
+ "sps": 2013.3169607307623
37
  },
38
  {
39
  "update": 25,
40
+ "global_step": 102400,
41
+ "num_episodes": 24,
42
+ "mean_reward": 14.11154419183731,
43
+ "mean_length": 3507.7083333333335,
44
+ "loss": 0.5709385275840759,
45
+ "sps": 2704.0544416242456
46
  },
47
  {
48
  "update": 30,
49
+ "global_step": 122880,
50
+ "num_episodes": 24,
51
+ "mean_reward": 14.11154419183731,
52
+ "mean_length": 3507.7083333333335,
53
+ "loss": 0.9411047697067261,
54
+ "sps": 2703.677829940517
55
  },
56
  {
57
  "update": 35,
58
+ "global_step": 143360,
59
+ "num_episodes": 24,
60
+ "mean_reward": 14.11154419183731,
61
+ "mean_length": 3507.7083333333335,
62
+ "loss": 0.18034808337688446,
63
+ "sps": 2660.7833455377177
64
  },
65
  {
66
  "update": 40,
67
+ "global_step": 163840,
68
+ "num_episodes": 29,
69
+ "mean_reward": 33.75182229074939,
70
+ "mean_length": 4627.068965517241,
71
+ "loss": 0.449077308177948,
72
+ "sps": 781.6860032595372
73
  },
74
  {
75
  "update": 45,
76
+ "global_step": 184320,
77
+ "num_episodes": 43,
78
+ "mean_reward": 31.471195509267407,
79
+ "mean_length": 3927.2093023255816,
80
+ "loss": 2.4075186252593994,
81
+ "sps": 1640.7038600574003
82
  },
83
  {
84
  "update": 50,
85
+ "global_step": 204800,
86
+ "num_episodes": 43,
87
+ "mean_reward": 31.471195509267407,
88
+ "mean_length": 3927.2093023255816,
89
+ "loss": 0.7558565735816956,
90
+ "sps": 2296.3021079759415
91
  },
92
  {
93
  "update": 55,
94
+ "global_step": 225280,
95
+ "num_episodes": 43,
96
+ "mean_reward": 31.471195509267407,
97
+ "mean_length": 3927.2093023255816,
98
+ "loss": 0.004491906613111496,
99
+ "sps": 1918.4604744440246
100
  },
101
  {
102
  "update": 60,
103
+ "global_step": 245760,
104
+ "num_episodes": 45,
105
+ "mean_reward": 33.261741065979,
106
+ "mean_length": 4197.111111111111,
107
+ "loss": 0.35625988245010376,
108
+ "sps": 2061.6452443134217
109
  },
110
  {
111
  "update": 65,
112
+ "global_step": 266240,
113
+ "num_episodes": 56,
114
+ "mean_reward": 39.252455030168804,
115
+ "mean_length": 4465.803571428572,
116
+ "loss": 0.26919102668762207,
117
+ "sps": 2690.170793323002
118
  },
119
  {
120
  "update": 70,
121
+ "global_step": 286720,
122
+ "num_episodes": 56,
123
+ "mean_reward": 39.252455030168804,
124
+ "mean_length": 4465.803571428572,
125
+ "loss": 0.1174827367067337,
126
+ "sps": 2143.112234819028
127
  },
128
  {
129
  "update": 75,
130
+ "global_step": 307200,
131
+ "num_episodes": 56,
132
+ "mean_reward": 39.252455030168804,
133
+ "mean_length": 4465.803571428572,
134
+ "loss": -0.09781338274478912,
135
+ "sps": 2208.779203581181
136
  },
137
  {
138
  "update": 80,
139
+ "global_step": 327680,
140
+ "num_episodes": 62,
141
+ "mean_reward": 38.99283899030378,
142
+ "mean_length": 4527.532258064516,
143
+ "loss": 1.440595030784607,
144
+ "sps": 655.150017301736
145
  },
146
  {
147
  "update": 85,
148
+ "global_step": 348160,
149
+ "num_episodes": 84,
150
+ "mean_reward": 36.50451924119677,
151
+ "mean_length": 4002.7380952380954,
152
+ "loss": 1.0692782402038574,
153
+ "sps": 1057.9744994181403
154
  },
155
  {
156
  "update": 90,
157
+ "global_step": 368640,
158
+ "num_episodes": 86,
159
+ "mean_reward": 35.66498793003171,
160
+ "mean_length": 3935.0232558139537,
161
+ "loss": -0.0837952047586441,
162
+ "sps": 2123.204801099428
163
  },
164
  {
165
  "update": 95,
166
+ "global_step": 389120,
167
+ "num_episodes": 86,
168
+ "mean_reward": 35.66498793003171,
169
+ "mean_length": 3935.0232558139537,
170
+ "loss": 0.02618391066789627,
171
+ "sps": 1904.8344116389594
172
  },
173
  {
174
  "update": 100,
175
+ "global_step": 409600,
176
+ "num_episodes": 88,
177
+ "mean_reward": 35.91715085506439,
178
+ "mean_length": 4072.8636363636365,
179
+ "loss": -0.004935186356306076,
180
+ "sps": 1928.296801932442
181
  },
182
  {
183
  "update": 105,
184
+ "global_step": 430080,
185
+ "num_episodes": 100,
186
+ "mean_reward": 33.11026698112488,
187
+ "mean_length": 4118.01,
188
+ "loss": 1.716380000114441,
189
+ "sps": 755.5801175010796
190
  },
191
  {
192
  "update": 110,
193
+ "global_step": 450560,
194
+ "num_episodes": 102,
195
+ "mean_reward": 33.19641512870788,
196
+ "mean_length": 4218.5,
197
+ "loss": 0.0785534530878067,
198
+ "sps": 2175.9855223665277
199
  },
200
  {
201
  "update": 115,
202
+ "global_step": 471040,
203
+ "num_episodes": 102,
204
+ "mean_reward": 33.19641512870788,
205
+ "mean_length": 4218.5,
206
+ "loss": -0.07273395359516144,
207
+ "sps": 1787.2507488200727
208
  },
209
  {
210
  "update": 120,
211
+ "global_step": 491520,
212
+ "num_episodes": 103,
213
+ "mean_reward": 33.58080266952515,
214
+ "mean_length": 4314.65,
215
+ "loss": -0.026257965713739395,
216
+ "sps": 1805.9278386526794
217
  },
218
  {
219
  "update": 125,
220
+ "global_step": 512000,
221
+ "num_episodes": 110,
222
+ "mean_reward": 37.310110912323,
223
+ "mean_length": 4794.35,
224
+ "loss": 1.8378688097000122,
225
+ "sps": 1715.9048394677898
226
  },
227
  {
228
  "update": 130,
229
+ "global_step": 532480,
230
+ "num_episodes": 115,
231
+ "mean_reward": 38.11976575374603,
232
+ "mean_length": 4792.66,
233
+ "loss": 0.0826515257358551,
234
+ "sps": 2480.658396587235
235
  },
236
  {
237
  "update": 135,
238
+ "global_step": 552960,
239
+ "num_episodes": 115,
240
+ "mean_reward": 38.11976575374603,
241
+ "mean_length": 4792.66,
242
+ "loss": -0.058959588408470154,
243
+ "sps": 2558.7887965900827
244
  },
245
  {
246
  "update": 140,
247
+ "global_step": 573440,
248
+ "num_episodes": 117,
249
+ "mean_reward": 40.18981074810028,
250
+ "mean_length": 4792.66,
251
+ "loss": 0.01938764378428459,
252
+ "sps": 2511.6128893797145
253
  },
254
  {
255
  "update": 145,
256
+ "global_step": 593920,
257
+ "num_episodes": 124,
258
+ "mean_reward": 51.76217389583588,
259
+ "mean_length": 4803.44,
260
+ "loss": 20.378520965576172,
261
+ "sps": 909.3232748780807
262
  },
263
  {
264
  "update": 150,
265
+ "global_step": 614400,
266
+ "num_episodes": 126,
267
+ "mean_reward": 51.032718253135684,
268
+ "mean_length": 4803.44,
269
+ "loss": 0.11053133755922318,
270
+ "sps": 2492.3194028833077
271
  },
272
  {
273
  "update": 155,
274
+ "global_step": 634880,
275
+ "num_episodes": 126,
276
+ "mean_reward": 51.032718253135684,
277
+ "mean_length": 4803.44,
278
+ "loss": 0.31901270151138306,
279
+ "sps": 2322.6918424224104
280
  },
281
  {
282
  "update": 160,
283
+ "global_step": 655360,
284
  "num_episodes": 129,
285
+ "mean_reward": 49.292411608695986,
286
+ "mean_length": 4705.26,
287
+ "loss": 0.6963619589805603,
288
+ "sps": 1560.7635575105323
289
  },
290
  {
291
  "update": 165,
292
+ "global_step": 675840,
293
+ "num_episodes": 138,
294
+ "mean_reward": 51.79567018985748,
295
+ "mean_length": 5005.94,
296
+ "loss": 3.423638343811035,
297
+ "sps": 1566.1339446711113
298
  },
299
  {
300
  "update": 170,
301
+ "global_step": 696320,
302
  "num_episodes": 139,
303
+ "mean_reward": 52.29565020084381,
304
+ "mean_length": 5005.94,
305
+ "loss": 5.795687675476074,
306
+ "sps": 1548.0157210393158
307
  },
308
  {
309
  "update": 175,
310
+ "global_step": 716800,
311
+ "num_episodes": 139,
312
+ "mean_reward": 52.29565020084381,
313
+ "mean_length": 5005.94,
314
+ "loss": 11.365696907043457,
315
+ "sps": 1678.7844476146372
316
  },
317
  {
318
  "update": 180,
319
+ "global_step": 737280,
320
+ "num_episodes": 140,
321
+ "mean_reward": 53.179930477142335,
322
+ "mean_length": 5098.77,
323
+ "loss": -0.05775236710906029,
324
+ "sps": 1668.2296340431747
325
  },
326
  {
327
  "update": 185,
328
+ "global_step": 757760,
329
+ "num_episodes": 149,
330
+ "mean_reward": 70.75448143482208,
331
+ "mean_length": 5195.77,
332
+ "loss": 73.1705322265625,
333
+ "sps": 858.1752320250257
334
  },
335
  {
336
  "update": 190,
337
+ "global_step": 778240,
338
+ "num_episodes": 150,
339
+ "mean_reward": 71.51151569843292,
340
+ "mean_length": 5295.07,
341
+ "loss": 0.24444621801376343,
342
+ "sps": 1363.6491070768768
343
  },
344
  {
345
  "update": 195,
346
+ "global_step": 798720,
347
+ "num_episodes": 150,
348
+ "mean_reward": 71.51151569843292,
349
+ "mean_length": 5295.07,
350
+ "loss": 0.8110665082931519,
351
+ "sps": 1453.3363954580439
352
  },
353
  {
354
  "update": 200,
355
+ "global_step": 819200,
356
+ "num_episodes": 151,
357
+ "mean_reward": 72.04212862491607,
358
+ "mean_length": 5392.97,
359
+ "loss": 0.1485036462545395,
360
+ "sps": 1399.4723333291518
361
  },
362
  {
363
  "update": 205,
364
+ "global_step": 839680,
365
+ "num_episodes": 157,
366
+ "mean_reward": 98.28625455379486,
367
+ "mean_length": 5585.11,
368
+ "loss": 23.002685546875,
369
+ "sps": 240.5853655120997
370
  },
371
  {
372
  "update": 210,
373
+ "global_step": 860160,
374
+ "num_episodes": 160,
375
+ "mean_reward": 99.03613055229187,
376
+ "mean_length": 5599.07,
377
+ "loss": 56.14109802246094,
378
+ "sps": 963.6583162438803
379
  },
380
  {
381
  "update": 215,
382
+ "global_step": 880640,
383
+ "num_episodes": 160,
384
+ "mean_reward": 99.03613055229187,
385
+ "mean_length": 5599.07,
386
+ "loss": 0.288861483335495,
387
+ "sps": 1135.1194345027134
388
  },
389
  {
390
  "update": 220,
391
+ "global_step": 901120,
392
+ "num_episodes": 162,
393
+ "mean_reward": 100.13229321479797,
394
+ "mean_length": 5596.55,
395
+ "loss": 0.12560202181339264,
396
+ "sps": 1065.000301525082
397
  },
398
  {
399
  "update": 225,
400
+ "global_step": 921600,
401
+ "num_episodes": 173,
402
+ "mean_reward": 126.59835072040558,
403
+ "mean_length": 5771.42,
404
+ "loss": 18.769105911254883,
405
+ "sps": 484.9667540902585
406
  },
407
  {
408
  "update": 230,
409
+ "global_step": 942080,
410
+ "num_episodes": 175,
411
+ "mean_reward": 133.06522512435913,
412
+ "mean_length": 5865.35,
413
+ "loss": 2.0742053985595703,
414
+ "sps": 485.5770281538178
415
  },
416
  {
417
  "update": 235,
418
+ "global_step": 962560,
419
+ "num_episodes": 175,
420
+ "mean_reward": 133.06522512435913,
421
+ "mean_length": 5865.35,
422
+ "loss": 1.1434893608093262,
423
+ "sps": 476.861101571966
424
  },
425
  {
426
  "update": 240,
427
+ "global_step": 983040,
428
+ "num_episodes": 177,
429
+ "mean_reward": 136.24841757774354,
430
+ "mean_length": 6062.18,
431
+ "loss": 2.5528671741485596,
432
+ "sps": 519.697272037427
433
  },
434
  {
435
  "update": 245,
436
+ "global_step": 1003520,
437
  "num_episodes": 181,
438
+ "mean_reward": 162.3702562904358,
439
+ "mean_length": 6350.36,
440
+ "loss": 2.4525704383850098,
441
+ "sps": 2612.0970180967397
442
  },
443
  {
444
  "update": 250,
445
+ "global_step": 1024000,
446
+ "num_episodes": 183,
447
+ "mean_reward": 163.440665807724,
448
+ "mean_length": 6546.01,
449
+ "loss": 1.370378017425537,
450
+ "sps": 1843.3577332235466
451
  },
452
  {
453
  "update": 255,
454
+ "global_step": 1044480,
455
+ "num_episodes": 183,
456
+ "mean_reward": 163.440665807724,
457
+ "mean_length": 6546.01,
458
+ "loss": 3.996934652328491,
459
+ "sps": 1664.629871283792
460
  },
461
  {
462
  "update": 260,
463
+ "global_step": 1064960,
464
  "num_episodes": 188,
465
+ "mean_reward": 164.5874024486542,
466
+ "mean_length": 6629.5,
467
+ "loss": 38.7066535949707,
468
+ "sps": 275.2025358552949
469
  },
470
  {
471
  "update": 265,
472
+ "global_step": 1085440,
473
+ "num_episodes": 195,
474
+ "mean_reward": 178.587217502594,
475
+ "mean_length": 6716.14,
476
+ "loss": 48.47567367553711,
477
+ "sps": 202.49850145253117
478
  },
479
  {
480
  "update": 270,
481
+ "global_step": 1105920,
482
+ "num_episodes": 195,
483
+ "mean_reward": 178.587217502594,
484
+ "mean_length": 6716.14,
485
+ "loss": 65.4755859375,
486
+ "sps": 336.3470793114214
487
  },
488
  {
489
  "update": 275,
490
+ "global_step": 1126400,
491
+ "num_episodes": 195,
492
+ "mean_reward": 178.587217502594,
493
+ "mean_length": 6716.14,
494
+ "loss": 122.97542572021484,
495
+ "sps": 364.01088862747804
496
  },
497
  {
498
  "update": 280,
499
+ "global_step": 1146880,
500
+ "num_episodes": 200,
501
+ "mean_reward": 217.83971637248993,
502
+ "mean_length": 6897.39,
503
+ "loss": 16.919631958007812,
504
+ "sps": 523.4729171393346
505
  },
506
  {
507
  "update": 285,
508
+ "global_step": 1167360,
509
+ "num_episodes": 209,
510
+ "mean_reward": 269.6603216218948,
511
+ "mean_length": 6691.86,
512
+ "loss": 37.87599182128906,
513
+ "sps": 815.1007717041923
514
  },
515
  {
516
  "update": 290,
517
+ "global_step": 1187840,
518
+ "num_episodes": 209,
519
+ "mean_reward": 269.6603216218948,
520
+ "mean_length": 6691.86,
521
+ "loss": 93.80265808105469,
522
+ "sps": 677.6266786573581
523
  },
524
  {
525
  "update": 295,
526
+ "global_step": 1208320,
527
+ "num_episodes": 209,
528
+ "mean_reward": 269.6603216218948,
529
+ "mean_length": 6691.86,
530
+ "loss": 2.2752561569213867,
531
+ "sps": 796.2728392649855
532
  },
533
  {
534
  "update": 300,
535
+ "global_step": 1228800,
536
  "num_episodes": 212,
537
+ "mean_reward": 282.0548305273056,
538
+ "mean_length": 6888.72,
539
+ "loss": 44.09903335571289,
540
+ "sps": 435.2776190122807
541
  },
542
  {
543
  "update": 305,
544
+ "global_step": 1249280,
545
+ "num_episodes": 218,
546
+ "mean_reward": 326.81962947368623,
547
+ "mean_length": 6987.94,
548
+ "loss": 7.751248836517334,
549
+ "sps": 605.8631845420061
550
  },
551
  {
552
  "update": 310,
553
+ "global_step": 1269760,
554
+ "num_episodes": 218,
555
+ "mean_reward": 326.81962947368623,
556
+ "mean_length": 6987.94,
557
+ "loss": 1.2491610050201416,
558
+ "sps": 609.0143093574485
559
  },
560
  {
561
  "update": 315,
562
+ "global_step": 1290240,
563
+ "num_episodes": 218,
564
+ "mean_reward": 326.81962947368623,
565
+ "mean_length": 6987.94,
566
+ "loss": 0.18734177947044373,
567
+ "sps": 969.6236925007502
568
  },
569
  {
570
  "update": 320,
571
+ "global_step": 1310720,
572
+ "num_episodes": 222,
573
+ "mean_reward": 354.5139421653748,
574
+ "mean_length": 6981.29,
575
+ "loss": 5.763525009155273,
576
+ "sps": 471.35135085304955
577
  },
578
  {
579
  "update": 325,
580
+ "global_step": 1331200,
581
+ "num_episodes": 235,
582
+ "mean_reward": 373.75524856090544,
583
+ "mean_length": 6657.2,
584
+ "loss": 8.977479934692383,
585
+ "sps": 286.61651802589716
586
  },
587
  {
588
  "update": 330,
589
+ "global_step": 1351680,
590
+ "num_episodes": 235,
591
+ "mean_reward": 373.75524856090544,
592
+ "mean_length": 6657.2,
593
+ "loss": 4.4195051193237305,
594
+ "sps": 540.9233917238028
595
  },
596
  {
597
  "update": 335,
598
+ "global_step": 1372160,
599
+ "num_episodes": 235,
600
+ "mean_reward": 373.75524856090544,
601
+ "mean_length": 6657.2,
602
+ "loss": 1.350094199180603,
603
+ "sps": 577.9686556262599
604
  },
605
  {
606
  "update": 340,
607
+ "global_step": 1392640,
608
+ "num_episodes": 240,
609
+ "mean_reward": 407.44437908649445,
610
+ "mean_length": 6657.92,
611
+ "loss": 32.73158264160156,
612
+ "sps": 852.1673008615437
613
  },
614
  {
615
  "update": 345,
616
+ "global_step": 1413120,
617
+ "num_episodes": 250,
618
+ "mean_reward": 424.2497792673111,
619
+ "mean_length": 6448.66,
620
+ "loss": 40.15397262573242,
621
+ "sps": 149.27274368511317
622
  },
623
  {
624
  "update": 350,
625
+ "global_step": 1433600,
626
+ "num_episodes": 250,
627
+ "mean_reward": 424.2497792673111,
628
+ "mean_length": 6448.66,
629
+ "loss": 1.2196906805038452,
630
+ "sps": 385.0138381705491
631
  },
632
  {
633
  "update": 355,
634
+ "global_step": 1454080,
635
+ "num_episodes": 250,
636
+ "mean_reward": 424.2497792673111,
637
+ "mean_length": 6448.66,
638
+ "loss": 0.6893861889839172,
639
+ "sps": 364.4585616900348
640
  },
641
  {
642
  "update": 360,
643
+ "global_step": 1474560,
644
+ "num_episodes": 255,
645
+ "mean_reward": 406.7418563890457,
646
+ "mean_length": 6349.6,
647
+ "loss": 21.265850067138672,
648
+ "sps": 309.03058801935407
649
  },
650
  {
651
  "update": 365,
652
+ "global_step": 1495040,
653
  "num_episodes": 261,
654
+ "mean_reward": 472.512622590065,
655
+ "mean_length": 6333.64,
656
+ "loss": 1.368093490600586,
657
+ "sps": 1030.3650029243877
658
  },
659
  {
660
  "update": 370,
661
+ "global_step": 1515520,
662
+ "num_episodes": 261,
663
+ "mean_reward": 472.512622590065,
664
+ "mean_length": 6333.64,
665
+ "loss": 0.6811725497245789,
666
+ "sps": 988.2291227242658
667
  },
668
  {
669
  "update": 375,
670
+ "global_step": 1536000,
671
+ "num_episodes": 261,
672
+ "mean_reward": 472.512622590065,
673
+ "mean_length": 6333.64,
674
+ "loss": 0.5682379603385925,
675
+ "sps": 972.3493096974811
676
  },
677
  {
678
  "update": 380,
679
+ "global_step": 1556480,
680
+ "num_episodes": 272,
681
+ "mean_reward": 484.873091211319,
682
+ "mean_length": 6237.44,
683
+ "loss": 3.223947048187256,
684
+ "sps": 2004.102630433895
685
  },
686
  {
687
  "update": 385,
688
+ "global_step": 1576960,
689
+ "num_episodes": 277,
690
+ "mean_reward": 478.27076225280763,
691
+ "mean_length": 6236.56,
692
+ "loss": 1.7760697603225708,
693
+ "sps": 2832.6289658137994
694
  },
695
  {
696
  "update": 390,
697
+ "global_step": 1597440,
698
+ "num_episodes": 277,
699
+ "mean_reward": 478.27076225280763,
700
+ "mean_length": 6236.56,
701
+ "loss": -0.11427821964025497,
702
+ "sps": 2603.8164819807103
703
  },
704
  {
705
  "update": 395,
706
+ "global_step": 1617920,
707
+ "num_episodes": 279,
708
+ "mean_reward": 462.21016893386843,
709
+ "mean_length": 6137.5,
710
+ "loss": 0.8288934230804443,
711
+ "sps": 2392.5538344727297
712
  },
713
  {
714
  "update": 400,
715
+ "global_step": 1638400,
716
+ "num_episodes": 284,
717
+ "mean_reward": 454.1959769916534,
718
+ "mean_length": 6037.93,
719
+ "loss": 202.71636962890625,
720
+ "sps": 355.5497429467541
721
  },
722
  {
723
  "update": 405,
724
+ "global_step": 1658880,
725
+ "num_episodes": 289,
726
+ "mean_reward": 454.0530729818344,
727
+ "mean_length": 6035.23,
728
+ "loss": 0.2099057286977768,
729
+ "sps": 869.6302546973046
730
  },
731
  {
732
  "update": 410,
733
+ "global_step": 1679360,
734
+ "num_episodes": 289,
735
+ "mean_reward": 454.0530729818344,
736
+ "mean_length": 6035.23,
737
+ "loss": 0.018864750862121582,
738
+ "sps": 837.2921854213746
739
  },
740
  {
741
  "update": 415,
742
+ "global_step": 1699840,
743
+ "num_episodes": 290,
744
+ "mean_reward": 454.8029096364975,
745
+ "mean_length": 6035.23,
746
+ "loss": 0.08915512263774872,
747
+ "sps": 836.5661659973193
748
  },
749
  {
750
  "update": 420,
751
+ "global_step": 1720320,
752
+ "num_episodes": 294,
753
+ "mean_reward": 490.9899611520767,
754
+ "mean_length": 6134.91,
755
+ "loss": 9.022570610046387,
756
+ "sps": 1353.2280787984882
757
  },
758
  {
759
  "update": 425,
760
+ "global_step": 1740800,
761
+ "num_episodes": 299,
762
+ "mean_reward": 470.29952450752256,
763
+ "mean_length": 6136.24,
764
+ "loss": 1.4281823635101318,
765
+ "sps": 731.0193791394075
766
  },
767
  {
768
  "update": 430,
769
+ "global_step": 1761280,
770
+ "num_episodes": 299,
771
+ "mean_reward": 470.29952450752256,
772
+ "mean_length": 6136.24,
773
+ "loss": 0.12717652320861816,
774
+ "sps": 972.3142548127942
775
  },
776
  {
777
  "update": 435,
778
+ "global_step": 1781760,
779
+ "num_episodes": 300,
780
+ "mean_reward": 470.8291971683502,
781
+ "mean_length": 6235.51,
782
+ "loss": 1.772779107093811,
783
+ "sps": 955.1582152126839
784
  },
785
  {
786
  "update": 440,
787
+ "global_step": 1802240,
788
+ "num_episodes": 305,
789
+ "mean_reward": 467.37178787708285,
790
+ "mean_length": 6335.05,
791
+ "loss": 45.16695022583008,
792
+ "sps": 263.8764177763192
793
  },
794
  {
795
  "update": 445,
796
+ "global_step": 1822720,
797
+ "num_episodes": 310,
798
+ "mean_reward": 468.54246531009676,
799
+ "mean_length": 6353.87,
800
+ "loss": 45.204833984375,
801
+ "sps": 765.4018779843883
802
  },
803
  {
804
  "update": 450,
805
+ "global_step": 1843200,
806
+ "num_episodes": 310,
807
+ "mean_reward": 468.54246531009676,
808
+ "mean_length": 6353.87,
809
+ "loss": 17.418542861938477,
810
+ "sps": 716.3527304338312
811
  },
812
  {
813
  "update": 455,
814
+ "global_step": 1863680,
815
+ "num_episodes": 315,
816
+ "mean_reward": 448.0337993764877,
817
+ "mean_length": 6059.53,
818
+ "loss": 35.471492767333984,
819
+ "sps": 752.6285328468343
820
  },
821
  {
822
  "update": 460,
823
+ "global_step": 1884160,
824
+ "num_episodes": 323,
825
+ "mean_reward": 390.5069852733612,
826
+ "mean_length": 5868.34,
827
+ "loss": 22.768428802490234,
828
+ "sps": 1442.0900075832017
829
  },
830
  {
831
  "update": 465,
832
+ "global_step": 1904640,
833
+ "num_episodes": 325,
834
+ "mean_reward": 391.32213854789734,
835
+ "mean_length": 6066.81,
836
+ "loss": 1.743547797203064,
837
+ "sps": 2177.279445045517
838
  },
839
  {
840
  "update": 470,
841
+ "global_step": 1925120,
842
+ "num_episodes": 325,
843
+ "mean_reward": 391.32213854789734,
844
+ "mean_length": 6066.81,
845
+ "loss": -0.12528716027736664,
846
+ "sps": 2128.448401830331
847
  },
848
  {
849
  "update": 475,
850
+ "global_step": 1945600,
851
+ "num_episodes": 326,
852
+ "mean_reward": 386.4432405376434,
853
+ "mean_length": 6066.81,
854
+ "loss": -0.0739293247461319,
855
+ "sps": 2048.6650107889627
856
  },
857
  {
858
  "update": 480,
859
+ "global_step": 1966080,
860
+ "num_episodes": 331,
861
+ "mean_reward": 395.79872769355774,
862
+ "mean_length": 6362.12,
863
+ "loss": 1.8403631448745728,
864
+ "sps": 1177.4940640961918
865
  },
866
  {
867
  "update": 485,
868
+ "global_step": 1986560,
869
+ "num_episodes": 333,
870
+ "mean_reward": 396.0874492454529,
871
+ "mean_length": 6461.8,
872
+ "loss": 21.89581298828125,
873
+ "sps": 700.649536593067
874
  },
875
  {
876
  "update": 490,
877
+ "global_step": 2007040,
878
+ "num_episodes": 333,
879
+ "mean_reward": 396.0874492454529,
880
+ "mean_length": 6461.8,
881
+ "loss": 17.010061264038086,
882
+ "sps": 728.107784506257
883
  },
884
  {
885
  "update": 495,
886
+ "global_step": 2027520,
887
+ "num_episodes": 338,
888
+ "mean_reward": 396.22795082092284,
889
+ "mean_length": 6560.14,
890
+ "loss": 73.3307876586914,
891
+ "sps": 359.9910748355061
892
  },
893
  {
894
  "update": 500,
895
+ "global_step": 2048000,
896
+ "num_episodes": 347,
897
+ "mean_reward": 393.5666734457016,
898
+ "mean_length": 6464.03,
899
+ "loss": 51.052146911621094,
900
+ "sps": 186.4407003799165
901
  },
902
  {
903
  "update": 505,
904
+ "global_step": 2068480,
905
+ "num_episodes": 353,
906
+ "mean_reward": 363.4766788816452,
907
+ "mean_length": 6267.46,
908
+ "loss": 10.636628150939941,
909
+ "sps": 391.3156150286931
910
  },
911
  {
912
  "update": 510,
913
+ "global_step": 2088960,
914
+ "num_episodes": 353,
915
+ "mean_reward": 363.4766788816452,
916
+ "mean_length": 6267.46,
917
+ "loss": 5.408469200134277,
918
+ "sps": 392.291526612045
919
  },
920
  {
921
  "update": 515,
922
+ "global_step": 2109440,
923
+ "num_episodes": 356,
924
+ "mean_reward": 340.8062537956238,
925
+ "mean_length": 6159.0,
926
+ "loss": 0.881823718547821,
927
+ "sps": 887.267479612318
928
  },
929
  {
930
  "update": 520,
931
+ "global_step": 2129920,
932
+ "num_episodes": 368,
933
+ "mean_reward": 318.0571813583374,
934
+ "mean_length": 5973.21,
935
+ "loss": 29.853599548339844,
936
+ "sps": 776.9091606744039
937
  },
938
  {
939
  "update": 525,
940
+ "global_step": 2150400,
941
+ "num_episodes": 375,
942
+ "mean_reward": 277.7464536857605,
943
+ "mean_length": 5779.94,
944
+ "loss": 0.9935208559036255,
945
+ "sps": 2837.3122923609985
946
  },
947
  {
948
  "update": 530,
949
+ "global_step": 2170880,
950
+ "num_episodes": 375,
951
+ "mean_reward": 277.7464536857605,
952
+ "mean_length": 5779.94,
953
+ "loss": 0.16118717193603516,
954
+ "sps": 2674.823411889354
955
  },
956
  {
957
  "update": 535,
958
+ "global_step": 2191360,
959
+ "num_episodes": 379,
960
+ "mean_reward": 276.7646808052063,
961
+ "mean_length": 5783.77,
962
+ "loss": 0.6545084118843079,
963
+ "sps": 2718.7471360755962
964
  },
965
  {
966
  "update": 540,
967
+ "global_step": 2211840,
968
+ "num_episodes": 387,
969
+ "mean_reward": 273.41629776477816,
970
+ "mean_length": 5491.19,
971
+ "loss": 0.009996440261602402,
972
+ "sps": 2662.424914783932
973
  },
974
  {
975
  "update": 545,
976
+ "global_step": 2232320,
977
+ "num_episodes": 393,
978
+ "mean_reward": 237.44847791194917,
979
+ "mean_length": 5288.65,
980
+ "loss": -0.07593612372875214,
981
+ "sps": 2721.3806445317573
982
  },
983
  {
984
  "update": 550,
985
+ "global_step": 2252800,
986
+ "num_episodes": 393,
987
+ "mean_reward": 237.44847791194917,
988
+ "mean_length": 5288.65,
989
+ "loss": -0.013108465820550919,
990
+ "sps": 2496.4586032280413
991
  },
992
  {
993
  "update": 555,
994
+ "global_step": 2273280,
995
+ "num_episodes": 397,
996
+ "mean_reward": 214.58075400829316,
997
+ "mean_length": 5193.33,
998
+ "loss": 0.026919692754745483,
999
+ "sps": 2688.731731235585
1000
  },
1001
  {
1002
  "update": 560,
1003
+ "global_step": 2293760,
1004
+ "num_episodes": 407,
1005
+ "mean_reward": 159.23116432189943,
1006
+ "mean_length": 4793.65,
1007
+ "loss": 2.2952957153320312,
1008
+ "sps": 966.333276372859
1009
  },
1010
  {
1011
  "update": 565,
1012
+ "global_step": 2314240,
1013
+ "num_episodes": 413,
1014
+ "mean_reward": 159.70496647834779,
1015
+ "mean_length": 4791.54,
1016
+ "loss": 1.3611968755722046,
1017
+ "sps": 2550.010825605688
1018
  },
1019
  {
1020
  "update": 570,
1021
+ "global_step": 2334720,
1022
+ "num_episodes": 413,
1023
+ "mean_reward": 159.70496647834779,
1024
+ "mean_length": 4791.54,
1025
+ "loss": 1.439621090888977,
1026
+ "sps": 2484.751376024715
1027
  },
1028
  {
1029
  "update": 575,
1030
+ "global_step": 2355200,
1031
+ "num_episodes": 417,
1032
+ "mean_reward": 158.6872748184204,
1033
+ "mean_length": 4786.52,
1034
+ "loss": 12.480733871459961,
1035
+ "sps": 428.08037217860056
1036
  },
1037
  {
1038
  "update": 580,
1039
+ "global_step": 2375680,
1040
+ "num_episodes": 424,
1041
+ "mean_reward": 126.5506838798523,
1042
+ "mean_length": 4703.64,
1043
+ "loss": 12.992765426635742,
1044
+ "sps": 1524.6391897022352
1045
  },
1046
  {
1047
  "update": 585,
1048
+ "global_step": 2396160,
1049
+ "num_episodes": 427,
1050
+ "mean_reward": 131.45039747238158,
1051
+ "mean_length": 4703.64,
1052
+ "loss": 11.845376968383789,
1053
+ "sps": 487.7409629471044
1054
  },
1055
  {
1056
  "update": 590,
1057
+ "global_step": 2416640,
1058
+ "num_episodes": 427,
1059
+ "mean_reward": 131.45039747238158,
1060
+ "mean_length": 4703.64,
1061
+ "loss": 0.3306816816329956,
1062
+ "sps": 906.10311953954
1063
  },
1064
  {
1065
  "update": 595,
1066
+ "global_step": 2437120,
1067
+ "num_episodes": 429,
1068
+ "mean_reward": 123.15537308216095,
1069
+ "mean_length": 4604.79,
1070
+ "loss": 0.30884307622909546,
1071
+ "sps": 938.566230501384
1072
  },
1073
  {
1074
  "update": 600,
1075
+ "global_step": 2457600,
1076
+ "num_episodes": 437,
1077
+ "mean_reward": 133.3434053325653,
1078
+ "mean_length": 4405.56,
1079
+ "loss": 0.21275369822978973,
1080
+ "sps": 1018.040863452185
1081
  },
1082
  {
1083
  "update": 605,
1084
+ "global_step": 2478080,
1085
+ "num_episodes": 442,
1086
+ "mean_reward": 171.27508597373964,
1087
+ "mean_length": 4501.98,
1088
+ "loss": 0.47754910588264465,
1089
+ "sps": 2289.461146828872
1090
  },
1091
  {
1092
  "update": 610,
1093
+ "global_step": 2498560,
1094
+ "num_episodes": 442,
1095
+ "mean_reward": 171.27508597373964,
1096
+ "mean_length": 4501.98,
1097
+ "loss": 0.5260030627250671,
1098
+ "sps": 2191.020876073232
1099
  },
1100
  {
1101
  "update": 615,
1102
+ "global_step": 2519040,
1103
+ "num_episodes": 446,
1104
+ "mean_reward": 134.8440179824829,
1105
+ "mean_length": 4406.13,
1106
+ "loss": 0.03903695195913315,
1107
+ "sps": 2094.500891632649
1108
  },
1109
  {
1110
  "update": 620,
1111
+ "global_step": 2539520,
1112
+ "num_episodes": 462,
1113
+ "mean_reward": 108.21314854621887,
1114
+ "mean_length": 4325.47,
1115
+ "loss": 7.972282409667969,
1116
+ "sps": 1843.9384231501685
1117
  },
1118
  {
1119
  "update": 625,
1120
+ "global_step": 2560000,
1121
+ "num_episodes": 467,
1122
+ "mean_reward": 103.93524898529053,
1123
+ "mean_length": 4323.9,
1124
+ "loss": 0.2857341468334198,
1125
+ "sps": 2844.0073567316244
1126
  },
1127
  {
1128
  "update": 630,
1129
+ "global_step": 2580480,
1130
+ "num_episodes": 467,
1131
+ "mean_reward": 103.93524898529053,
1132
+ "mean_length": 4323.9,
1133
+ "loss": -0.0014126598834991455,
1134
+ "sps": 2662.6258687696713
1135
  },
1136
  {
1137
  "update": 635,
1138
+ "global_step": 2600960,
1139
+ "num_episodes": 469,
1140
+ "mean_reward": 105.16953000545502,
1141
+ "mean_length": 4425.22,
1142
+ "loss": -0.11831197887659073,
1143
+ "sps": 2712.08306677995
1144
  },
1145
  {
1146
  "update": 640,
1147
+ "global_step": 2621440,
1148
+ "num_episodes": 474,
1149
+ "mean_reward": 110.30714406490326,
1150
+ "mean_length": 4816.19,
1151
+ "loss": 1.1699683666229248,
1152
+ "sps": 2190.63868238328
1153
  },
1154
  {
1155
  "update": 645,
1156
+ "global_step": 2641920,
1157
+ "num_episodes": 478,
1158
+ "mean_reward": 115.03492943286896,
1159
+ "mean_length": 4720.8,
1160
+ "loss": 0.8407511115074158,
1161
+ "sps": 2477.6471668004287
1162
  },
1163
  {
1164
  "update": 650,
1165
+ "global_step": 2662400,
1166
+ "num_episodes": 478,
1167
+ "mean_reward": 115.03492943286896,
1168
+ "mean_length": 4720.8,
1169
+ "loss": 0.41076016426086426,
1170
+ "sps": 2375.4220186429184
1171
  },
1172
  {
1173
  "update": 655,
1174
+ "global_step": 2682880,
1175
+ "num_episodes": 480,
1176
+ "mean_reward": 113.97553071975707,
1177
+ "mean_length": 4722.63,
1178
+ "loss": 1.8437230587005615,
1179
+ "sps": 437.20362392139924
1180
  },
1181
  {
1182
  "update": 660,
1183
+ "global_step": 2703360,
1184
+ "num_episodes": 488,
1185
+ "mean_reward": 121.07888405799866,
1186
+ "mean_length": 4921.4,
1187
+ "loss": 9.341552734375,
1188
+ "sps": 433.1180294441828
1189
  },
1190
  {
1191
  "update": 665,
1192
+ "global_step": 2723840,
1193
+ "num_episodes": 493,
1194
+ "mean_reward": 122.25540927648544,
1195
+ "mean_length": 4927.03,
1196
+ "loss": 3.2276527881622314,
1197
+ "sps": 311.23534986570854
1198
  },
1199
  {
1200
  "update": 670,
1201
+ "global_step": 2744320,
1202
+ "num_episodes": 493,
1203
+ "mean_reward": 122.25540927648544,
1204
+ "mean_length": 4927.03,
1205
+ "loss": 1.626711130142212,
1206
+ "sps": 443.5605257662605
1207
  },
1208
  {
1209
  "update": 675,
1210
+ "global_step": 2764800,
1211
+ "num_episodes": 497,
1212
+ "mean_reward": 128.49539656877516,
1213
+ "mean_length": 4990.32,
1214
+ "loss": 1.4737187623977661,
1215
+ "sps": 557.1794951843984
1216
  },
1217
  {
1218
  "update": 680,
1219
+ "global_step": 2785280,
1220
+ "num_episodes": 501,
1221
+ "mean_reward": 133.27627504587173,
1222
+ "mean_length": 5189.29,
1223
+ "loss": 3.2421000003814697,
1224
+ "sps": 227.14390586968517
1225
  },
1226
  {
1227
  "update": 685,
1228
+ "global_step": 2805760,
1229
+ "num_episodes": 503,
1230
+ "mean_reward": 134.89025341749192,
1231
+ "mean_length": 5319.23,
1232
+ "loss": 19.16809844970703,
1233
+ "sps": 503.2482073037513
1234
  },
1235
  {
1236
  "update": 690,
1237
+ "global_step": 2826240,
1238
+ "num_episodes": 503,
1239
+ "mean_reward": 134.89025341749192,
1240
+ "mean_length": 5319.23,
1241
+ "loss": 0.3121195435523987,
1242
+ "sps": 1237.8635799099984
1243
  },
1244
  {
1245
  "update": 695,
1246
+ "global_step": 2846720,
1247
+ "num_episodes": 505,
1248
+ "mean_reward": 140.31052175283432,
1249
+ "mean_length": 5418.9,
1250
+ "loss": 10.290376663208008,
1251
+ "sps": 894.1160824447923
1252
  },
1253
  {
1254
  "update": 700,
1255
+ "global_step": 2867200,
1256
+ "num_episodes": 511,
1257
+ "mean_reward": 142.8947832798958,
1258
+ "mean_length": 5596.12,
1259
+ "loss": 8.96629810333252,
1260
+ "sps": 2766.323313198168
1261
  },
1262
  {
1263
  "update": 705,
1264
+ "global_step": 2887680,
1265
+ "num_episodes": 514,
1266
+ "mean_reward": 143.8507553601265,
1267
+ "mean_length": 5598.39,
1268
+ "loss": -0.03336918354034424,
1269
+ "sps": 2819.0861765762347
1270
  },
1271
  {
1272
  "update": 710,
1273
+ "global_step": 2908160,
1274
+ "num_episodes": 514,
1275
+ "mean_reward": 143.8507553601265,
1276
+ "mean_length": 5598.39,
1277
+ "loss": 1.484508991241455,
1278
+ "sps": 2804.209085249585
1279
  },
1280
  {
1281
  "update": 715,
1282
+ "global_step": 2928640,
1283
+ "num_episodes": 516,
1284
+ "mean_reward": 146.61101296186447,
1285
+ "mean_length": 5697.46,
1286
+ "loss": 0.41980481147766113,
1287
+ "sps": 2853.5154122563904
1288
  },
1289
  {
1290
  "update": 720,
1291
+ "global_step": 2949120,
1292
+ "num_episodes": 521,
1293
+ "mean_reward": 159.6500442767143,
1294
+ "mean_length": 5978.65,
1295
+ "loss": 3.3171966075897217,
1296
+ "sps": 337.7495209248038
1297
  },
1298
  {
1299
  "update": 725,
1300
+ "global_step": 2969600,
1301
+ "num_episodes": 523,
1302
+ "mean_reward": 165.79403271436692,
1303
+ "mean_length": 6078.2,
1304
+ "loss": 2.299358606338501,
1305
+ "sps": 488.4530631699151
1306
  },
1307
  {
1308
  "update": 730,
1309
+ "global_step": 2990080,
1310
+ "num_episodes": 523,
1311
+ "mean_reward": 165.79403271436692,
1312
+ "mean_length": 6078.2,
1313
+ "loss": 2.0765156745910645,
1314
+ "sps": 622.5255664645315
1315
  },
1316
  {
1317
  "update": 735,
1318
+ "global_step": 3010560,
1319
+ "num_episodes": 525,
1320
+ "mean_reward": 165.0441936945915,
1321
+ "mean_length": 6078.2,
1322
+ "loss": 0.5185770988464355,
1323
+ "sps": 912.7167424710788
1324
  },
1325
  {
1326
  "update": 740,
1327
+ "global_step": 3031040,
1328
+ "num_episodes": 529,
1329
+ "mean_reward": 169.13619287729264,
1330
+ "mean_length": 6177.05,
1331
+ "loss": 0.8914755582809448,
1332
+ "sps": 2277.7924528126773
1333
  },
1334
  {
1335
  "update": 745,
1336
+ "global_step": 3051520,
1337
+ "num_episodes": 531,
1338
+ "mean_reward": 182.76090200662614,
1339
+ "mean_length": 6177.05,
1340
+ "loss": -0.03897722810506821,
1341
+ "sps": 2938.7255967172237
1342
  },
1343
  {
1344
  "update": 750,
1345
+ "global_step": 3072000,
1346
+ "num_episodes": 531,
1347
+ "mean_reward": 182.76090200662614,
1348
+ "mean_length": 6177.05,
1349
+ "loss": -0.07192348688840866,
1350
+ "sps": 2896.8996334668986
1351
  },
1352
  {
1353
  "update": 755,
1354
+ "global_step": 3092480,
1355
+ "num_episodes": 535,
1356
+ "mean_reward": 176.57018686056136,
1357
+ "mean_length": 6276.7,
1358
+ "loss": 17.123456954956055,
1359
+ "sps": 1361.0160818187624
1360
  },
1361
  {
1362
  "update": 760,
1363
+ "global_step": 3112960,
1364
+ "num_episodes": 539,
1365
+ "mean_reward": 143.45233328580855,
1366
+ "mean_length": 6475.8,
1367
+ "loss": 1.7440789937973022,
1368
+ "sps": 431.96268578339885
1369
  },
1370
  {
1371
  "update": 765,
1372
+ "global_step": 3133440,
1373
+ "num_episodes": 541,
1374
+ "mean_reward": 143.19214475393295,
1375
+ "mean_length": 6476.28,
1376
+ "loss": 1.2793841361999512,
1377
+ "sps": 694.1149265905251
1378
  },
1379
  {
1380
  "update": 770,
1381
+ "global_step": 3153920,
1382
+ "num_episodes": 541,
1383
+ "mean_reward": 143.19214475393295,
1384
+ "mean_length": 6476.28,
1385
+ "loss": 27.601778030395508,
1386
+ "sps": 666.6380756689646
1387
  },
1388
  {
1389
  "update": 775,
1390
+ "global_step": 3174400,
1391
+ "num_episodes": 545,
1392
+ "mean_reward": 160.25242776632308,
1393
+ "mean_length": 6772.51,
1394
+ "loss": 3.569728136062622,
1395
+ "sps": 235.53664992575125
1396
  },
1397
  {
1398
  "update": 780,
1399
+ "global_step": 3194880,
1400
+ "num_episodes": 548,
1401
+ "mean_reward": 161.8272612643242,
1402
+ "mean_length": 6969.88,
1403
+ "loss": 1.578532338142395,
1404
+ "sps": 345.17499217473494
1405
  },
1406
  {
1407
  "update": 785,
1408
+ "global_step": 3215360,
1409
+ "num_episodes": 550,
1410
+ "mean_reward": 161.4852223944664,
1411
+ "mean_length": 7031.97,
1412
+ "loss": 6.10168981552124,
1413
+ "sps": 321.34142444031875
1414
  },
1415
  {
1416
  "update": 790,
1417
+ "global_step": 3235840,
1418
+ "num_episodes": 550,
1419
+ "mean_reward": 161.4852223944664,
1420
+ "mean_length": 7031.97,
1421
+ "loss": 0.8625541925430298,
1422
+ "sps": 739.6504838117347
1423
  },
1424
  {
1425
  "update": 795,
1426
+ "global_step": 3256320,
1427
+ "num_episodes": 554,
1428
+ "mean_reward": 162.4044385075569,
1429
+ "mean_length": 7219.96,
1430
+ "loss": 2.502009868621826,
1431
+ "sps": 191.1365478881192
1432
  },
1433
  {
1434
  "update": 800,
1435
+ "global_step": 3276800,
1436
+ "num_episodes": 563,
1437
+ "mean_reward": 162.7963340115547,
1438
+ "mean_length": 7220.73,
1439
+ "loss": 8.730064392089844,
1440
+ "sps": 386.9629605603807
1441
+ },
1442
+ {
1443
+ "update": 805,
1444
+ "global_step": 3297280,
1445
+ "num_episodes": 566,
1446
+ "mean_reward": 165.31176669359206,
1447
+ "mean_length": 7360.77,
1448
+ "loss": 48.92124557495117,
1449
+ "sps": 705.0215184629429
1450
+ },
1451
+ {
1452
+ "update": 810,
1453
+ "global_step": 3317760,
1454
+ "num_episodes": 566,
1455
+ "mean_reward": 165.31176669359206,
1456
+ "mean_length": 7360.77,
1457
+ "loss": 0.6651354432106018,
1458
+ "sps": 801.9215448692757
1459
+ },
1460
+ {
1461
+ "update": 815,
1462
+ "global_step": 3338240,
1463
+ "num_episodes": 569,
1464
+ "mean_reward": 180.35619497060776,
1465
+ "mean_length": 7453.65,
1466
+ "loss": 0.9045634865760803,
1467
+ "sps": 2078.869268499869
1468
+ },
1469
+ {
1470
+ "update": 820,
1471
+ "global_step": 3358720,
1472
+ "num_episodes": 575,
1473
+ "mean_reward": 193.25147196531296,
1474
+ "mean_length": 7258.9,
1475
+ "loss": 5.477941989898682,
1476
+ "sps": 1253.1294143150922
1477
+ },
1478
+ {
1479
+ "update": 825,
1480
+ "global_step": 3379200,
1481
+ "num_episodes": 581,
1482
+ "mean_reward": 185.37936263799668,
1483
+ "mean_length": 7158.59,
1484
+ "loss": 0.19656488299369812,
1485
+ "sps": 3241.297612628403
1486
+ },
1487
+ {
1488
+ "update": 830,
1489
+ "global_step": 3399680,
1490
+ "num_episodes": 581,
1491
+ "mean_reward": 185.37936263799668,
1492
+ "mean_length": 7158.59,
1493
+ "loss": 0.5765475034713745,
1494
+ "sps": 2869.6879773523438
1495
+ },
1496
+ {
1497
+ "update": 835,
1498
+ "global_step": 3420160,
1499
+ "num_episodes": 583,
1500
+ "mean_reward": 185.1090102362633,
1501
+ "mean_length": 7158.59,
1502
+ "loss": 2.0618038177490234,
1503
+ "sps": 415.0631773970818
1504
+ },
1505
+ {
1506
+ "update": 840,
1507
+ "global_step": 3440640,
1508
+ "num_episodes": 588,
1509
+ "mean_reward": 189.7356357884407,
1510
+ "mean_length": 7352.61,
1511
+ "loss": 9.016275405883789,
1512
+ "sps": 227.3230128869974
1513
+ },
1514
+ {
1515
+ "update": 845,
1516
+ "global_step": 3461120,
1517
+ "num_episodes": 591,
1518
+ "mean_reward": 189.1485662293434,
1519
+ "mean_length": 7394.25,
1520
+ "loss": 22.739429473876953,
1521
+ "sps": 314.2023925066658
1522
+ },
1523
+ {
1524
+ "update": 850,
1525
+ "global_step": 3481600,
1526
+ "num_episodes": 592,
1527
+ "mean_reward": 189.12477667808534,
1528
+ "mean_length": 7443.48,
1529
+ "loss": 1.6546752452850342,
1530
+ "sps": 957.4586504289063
1531
+ },
1532
+ {
1533
+ "update": 855,
1534
+ "global_step": 3502080,
1535
+ "num_episodes": 594,
1536
+ "mean_reward": 187.17996717453002,
1537
+ "mean_length": 7507.25,
1538
+ "loss": 6.797643184661865,
1539
+ "sps": 412.71199087606476
1540
+ },
1541
+ {
1542
+ "update": 860,
1543
+ "global_step": 3522560,
1544
+ "num_episodes": 596,
1545
+ "mean_reward": 193.70433765888214,
1546
+ "mean_length": 7638.31,
1547
+ "loss": 0.2814023196697235,
1548
+ "sps": 809.1402642140959
1549
+ },
1550
+ {
1551
+ "update": 865,
1552
+ "global_step": 3543040,
1553
+ "num_episodes": 600,
1554
+ "mean_reward": 192.16364938259125,
1555
+ "mean_length": 7538.65,
1556
+ "loss": 4.5296950340271,
1557
+ "sps": 934.0269243194301
1558
+ },
1559
+ {
1560
+ "update": 870,
1561
+ "global_step": 3563520,
1562
+ "num_episodes": 602,
1563
+ "mean_reward": 191.83141272068025,
1564
+ "mean_length": 7569.62,
1565
+ "loss": 0.12454970180988312,
1566
+ "sps": 459.3460462463923
1567
+ },
1568
+ {
1569
+ "update": 875,
1570
+ "global_step": 3584000,
1571
+ "num_episodes": 605,
1572
+ "mean_reward": 185.7972752714157,
1573
+ "mean_length": 7412.33,
1574
+ "loss": 0.010119002312421799,
1575
+ "sps": 2984.2673687681013
1576
+ },
1577
+ {
1578
+ "update": 880,
1579
+ "global_step": 3604480,
1580
+ "num_episodes": 607,
1581
+ "mean_reward": 191.80364267826081,
1582
+ "mean_length": 7423.25,
1583
+ "loss": 1.5759342908859253,
1584
+ "sps": 2498.994384783772
1585
+ },
1586
+ {
1587
+ "update": 885,
1588
+ "global_step": 3624960,
1589
+ "num_episodes": 611,
1590
+ "mean_reward": 197.66494411945342,
1591
+ "mean_length": 7523.71,
1592
+ "loss": 0.07411646842956543,
1593
+ "sps": 2398.8293936193445
1594
+ },
1595
+ {
1596
+ "update": 890,
1597
+ "global_step": 3645440,
1598
+ "num_episodes": 612,
1599
+ "mean_reward": 199.16475445270538,
1600
+ "mean_length": 7523.71,
1601
+ "loss": -0.09818007051944733,
1602
+ "sps": 2494.5939188365055
1603
+ },
1604
+ {
1605
+ "update": 895,
1606
+ "global_step": 3665920,
1607
+ "num_episodes": 616,
1608
+ "mean_reward": 198.68793545246123,
1609
+ "mean_length": 7520.61,
1610
+ "loss": 0.07828275859355927,
1611
+ "sps": 1930.4566445688752
1612
+ },
1613
+ {
1614
+ "update": 900,
1615
+ "global_step": 3686400,
1616
+ "num_episodes": 618,
1617
+ "mean_reward": 198.65358025074005,
1618
+ "mean_length": 7422.9,
1619
+ "loss": 0.6579947471618652,
1620
+ "sps": 2636.224514742254
1621
+ },
1622
+ {
1623
+ "update": 905,
1624
+ "global_step": 3706880,
1625
+ "num_episodes": 622,
1626
+ "mean_reward": 200.68803305149078,
1627
+ "mean_length": 7420.23,
1628
+ "loss": 1.256749153137207,
1629
+ "sps": 2869.3812284251953
1630
+ },
1631
+ {
1632
+ "update": 910,
1633
+ "global_step": 3727360,
1634
+ "num_episodes": 624,
1635
+ "mean_reward": 198.5566199350357,
1636
+ "mean_length": 7420.23,
1637
+ "loss": 1.2612106800079346,
1638
+ "sps": 2818.4419350605035
1639
+ },
1640
+ {
1641
+ "update": 915,
1642
+ "global_step": 3747840,
1643
+ "num_episodes": 626,
1644
+ "mean_reward": 205.93744747638704,
1645
+ "mean_length": 7420.23,
1646
+ "loss": 0.7723605036735535,
1647
+ "sps": 2519.097807723044
1648
+ },
1649
+ {
1650
+ "update": 920,
1651
+ "global_step": 3768320,
1652
+ "num_episodes": 628,
1653
+ "mean_reward": 201.4210061311722,
1654
+ "mean_length": 7420.23,
1655
+ "loss": 7.346652507781982,
1656
+ "sps": 2242.0312258079334
1657
+ },
1658
+ {
1659
+ "update": 925,
1660
+ "global_step": 3788800,
1661
+ "num_episodes": 630,
1662
+ "mean_reward": 189.0587257051468,
1663
+ "mean_length": 7420.23,
1664
+ "loss": 5.038862705230713,
1665
+ "sps": 383.270562834207
1666
+ },
1667
+ {
1668
+ "update": 930,
1669
+ "global_step": 3809280,
1670
+ "num_episodes": 633,
1671
+ "mean_reward": 189.46342292308807,
1672
+ "mean_length": 7360.68,
1673
+ "loss": 0.39050325751304626,
1674
+ "sps": 1858.9704973292514
1675
+ },
1676
+ {
1677
+ "update": 935,
1678
+ "global_step": 3829760,
1679
+ "num_episodes": 635,
1680
+ "mean_reward": 198.2463042974472,
1681
+ "mean_length": 7459.69,
1682
+ "loss": 53.42470169067383,
1683
+ "sps": 1069.4430639241739
1684
+ },
1685
+ {
1686
+ "update": 940,
1687
+ "global_step": 3850240,
1688
+ "num_episodes": 636,
1689
+ "mean_reward": 205.62649121761322,
1690
+ "mean_length": 7459.69,
1691
+ "loss": 1.097938895225525,
1692
+ "sps": 1447.2739010301416
1693
+ },
1694
+ {
1695
+ "update": 945,
1696
+ "global_step": 3870720,
1697
+ "num_episodes": 638,
1698
+ "mean_reward": 220.27144483089447,
1699
+ "mean_length": 7459.69,
1700
+ "loss": 0.30668070912361145,
1701
+ "sps": 2118.085155562087
1702
+ },
1703
+ {
1704
+ "update": 950,
1705
+ "global_step": 3891200,
1706
+ "num_episodes": 641,
1707
+ "mean_reward": 228.60497048854828,
1708
+ "mean_length": 7558.87,
1709
+ "loss": 1.160091519355774,
1710
+ "sps": 1340.3946598207713
1711
+ },
1712
+ {
1713
+ "update": 955,
1714
+ "global_step": 3911680,
1715
+ "num_episodes": 643,
1716
+ "mean_reward": 229.18769864559172,
1717
+ "mean_length": 7558.87,
1718
+ "loss": 1.3600318431854248,
1719
+ "sps": 2017.4738503217943
1720
+ },
1721
+ {
1722
+ "update": 960,
1723
+ "global_step": 3932160,
1724
+ "num_episodes": 645,
1725
+ "mean_reward": 226.2710527563095,
1726
+ "mean_length": 7558.87,
1727
+ "loss": 0.5724247097969055,
1728
+ "sps": 2190.27561016469
1729
+ },
1730
+ {
1731
+ "update": 965,
1732
+ "global_step": 3952640,
1733
+ "num_episodes": 647,
1734
+ "mean_reward": 233.28511716365813,
1735
+ "mean_length": 7558.87,
1736
+ "loss": 0.6445204019546509,
1737
+ "sps": 425.1404956260307
1738
+ },
1739
+ {
1740
+ "update": 970,
1741
+ "global_step": 3973120,
1742
+ "num_episodes": 649,
1743
+ "mean_reward": 234.76467885494233,
1744
+ "mean_length": 7558.87,
1745
+ "loss": 5.908900737762451,
1746
+ "sps": 291.87626703381966
1747
+ },
1748
+ {
1749
+ "update": 975,
1750
+ "global_step": 3993600,
1751
+ "num_episodes": 651,
1752
+ "mean_reward": 236.37012548923494,
1753
+ "mean_length": 7592.95,
1754
+ "loss": 16.730175018310547,
1755
+ "sps": 386.3945177442575
1756
+ },
1757
+ {
1758
+ "update": 980,
1759
+ "global_step": 4014080,
1760
+ "num_episodes": 655,
1761
+ "mean_reward": 240.30179923057557,
1762
+ "mean_length": 7654.13,
1763
+ "loss": 27.565311431884766,
1764
+ "sps": 653.7194172279133
1765
+ },
1766
+ {
1767
+ "update": 985,
1768
+ "global_step": 4034560,
1769
+ "num_episodes": 656,
1770
+ "mean_reward": 241.5855724811554,
1771
+ "mean_length": 7750.3,
1772
+ "loss": 1.2501119375228882,
1773
+ "sps": 1185.5585164533727
1774
+ },
1775
+ {
1776
+ "update": 990,
1777
+ "global_step": 4055040,
1778
+ "num_episodes": 657,
1779
+ "mean_reward": 242.3764306640625,
1780
+ "mean_length": 7849.96,
1781
+ "loss": 0.11368373781442642,
1782
+ "sps": 953.2503801655828
1783
+ },
1784
+ {
1785
+ "update": 995,
1786
+ "global_step": 4075520,
1787
+ "num_episodes": 660,
1788
+ "mean_reward": 266.251427526474,
1789
+ "mean_length": 8045.38,
1790
+ "loss": 5.969295978546143,
1791
+ "sps": 384.7242265220645
1792
+ },
1793
+ {
1794
+ "update": 1000,
1795
+ "global_step": 4096000,
1796
+ "num_episodes": 664,
1797
+ "mean_reward": 282.5860444736481,
1798
+ "mean_length": 8176.85,
1799
+ "loss": 27.033348083496094,
1800
+ "sps": 376.322863993888
1801
+ },
1802
+ {
1803
+ "update": 1005,
1804
+ "global_step": 4116480,
1805
+ "num_episodes": 665,
1806
+ "mean_reward": 283.86758942604064,
1807
+ "mean_length": 8232.26,
1808
+ "loss": 7.4467058181762695,
1809
+ "sps": 374.6383841753984
1810
+ },
1811
+ {
1812
+ "update": 1010,
1813
+ "global_step": 4136960,
1814
+ "num_episodes": 666,
1815
+ "mean_reward": 294.7833961582184,
1816
+ "mean_length": 8232.26,
1817
+ "loss": 17.35069465637207,
1818
+ "sps": 308.584037781751
1819
+ },
1820
+ {
1821
+ "update": 1015,
1822
+ "global_step": 4157440,
1823
+ "num_episodes": 669,
1824
+ "mean_reward": 281.23537853717806,
1825
+ "mean_length": 8289.35,
1826
+ "loss": 1.9592938423156738,
1827
+ "sps": 576.3621863402531
1828
+ },
1829
+ {
1830
+ "update": 1020,
1831
+ "global_step": 4177920,
1832
+ "num_episodes": 675,
1833
+ "mean_reward": 275.6631519651413,
1834
+ "mean_length": 8186.57,
1835
+ "loss": 23.52161407470703,
1836
+ "sps": 661.3091435538709
1837
+ },
1838
+ {
1839
+ "update": 1025,
1840
+ "global_step": 4198400,
1841
+ "num_episodes": 678,
1842
+ "mean_reward": 279.6017733335495,
1843
+ "mean_length": 8183.74,
1844
+ "loss": 29.961881637573242,
1845
+ "sps": 559.0450859302445
1846
+ },
1847
+ {
1848
+ "update": 1030,
1849
+ "global_step": 4218880,
1850
+ "num_episodes": 680,
1851
+ "mean_reward": 303.8355294418335,
1852
+ "mean_length": 8282.54,
1853
+ "loss": 0.7290852069854736,
1854
+ "sps": 1641.6323017599234
1855
+ },
1856
+ {
1857
+ "update": 1035,
1858
+ "global_step": 4239360,
1859
+ "num_episodes": 682,
1860
+ "mean_reward": 309.25065141677857,
1861
+ "mean_length": 8380.25,
1862
+ "loss": 7.585202693939209,
1863
+ "sps": 290.23415915491415
1864
+ },
1865
+ {
1866
+ "update": 1040,
1867
+ "global_step": 4259840,
1868
+ "num_episodes": 685,
1869
+ "mean_reward": 307.12660905838015,
1870
+ "mean_length": 8380.25,
1871
+ "loss": 6.446464538574219,
1872
+ "sps": 383.71739315963373
1873
+ },
1874
+ {
1875
+ "update": 1045,
1876
+ "global_step": 4280320,
1877
+ "num_episodes": 687,
1878
+ "mean_reward": 303.14754570007324,
1879
+ "mean_length": 8380.25,
1880
+ "loss": 9.89684772491455,
1881
+ "sps": 694.9770930488776
1882
+ },
1883
+ {
1884
+ "update": 1050,
1885
+ "global_step": 4300800,
1886
+ "num_episodes": 688,
1887
+ "mean_reward": 303.04791251659395,
1888
+ "mean_length": 8462.37,
1889
+ "loss": 1.8083292245864868,
1890
+ "sps": 1040.0758682399094
1891
+ },
1892
+ {
1893
+ "update": 1055,
1894
+ "global_step": 4321280,
1895
+ "num_episodes": 690,
1896
+ "mean_reward": 303.53817603588107,
1897
+ "mean_length": 8516.23,
1898
+ "loss": 1.01134192943573,
1899
+ "sps": 3137.679652522162
1900
+ },
1901
+ {
1902
+ "update": 1060,
1903
+ "global_step": 4341760,
1904
+ "num_episodes": 693,
1905
+ "mean_reward": 309.7838798046112,
1906
+ "mean_length": 8562.16,
1907
+ "loss": 14.91142749786377,
1908
+ "sps": 214.3493374321697
1909
+ },
1910
+ {
1911
+ "update": 1065,
1912
+ "global_step": 4362240,
1913
+ "num_episodes": 695,
1914
+ "mean_reward": 305.9636838245392,
1915
+ "mean_length": 8596.65,
1916
+ "loss": 25.306856155395508,
1917
+ "sps": 520.1481805059935
1918
+ },
1919
+ {
1920
+ "update": 1070,
1921
+ "global_step": 4382720,
1922
+ "num_episodes": 698,
1923
+ "mean_reward": 303.673222618103,
1924
+ "mean_length": 8440.95,
1925
+ "loss": 1.5045527219772339,
1926
+ "sps": 1217.974691455878
1927
+ },
1928
+ {
1929
+ "update": 1075,
1930
+ "global_step": 4403200,
1931
+ "num_episodes": 701,
1932
+ "mean_reward": 304.443781414032,
1933
+ "mean_length": 8557.61,
1934
+ "loss": 3.09621524810791,
1935
+ "sps": 3334.805184943337
1936
+ },
1937
+ {
1938
+ "update": 1080,
1939
+ "global_step": 4423680,
1940
+ "num_episodes": 702,
1941
+ "mean_reward": 303.1939596557617,
1942
+ "mean_length": 8557.61,
1943
+ "loss": 0.5530338883399963,
1944
+ "sps": 3357.0436968093577
1945
+ },
1946
+ {
1947
+ "update": 1085,
1948
+ "global_step": 4444160,
1949
+ "num_episodes": 706,
1950
+ "mean_reward": 297.1481484127045,
1951
+ "mean_length": 8521.02,
1952
+ "loss": 7.937706470489502,
1953
+ "sps": 1838.4046275928872
1954
+ },
1955
+ {
1956
+ "update": 1090,
1957
+ "global_step": 4464640,
1958
+ "num_episodes": 709,
1959
+ "mean_reward": 294.23652092933656,
1960
+ "mean_length": 8421.99,
1961
+ "loss": 1.2086868286132812,
1962
+ "sps": 2518.910916673558
1963
+ },
1964
+ {
1965
+ "update": 1095,
1966
+ "global_step": 4485120,
1967
+ "num_episodes": 713,
1968
+ "mean_reward": 291.4626713657379,
1969
+ "mean_length": 8418.82,
1970
+ "loss": 0.07519354671239853,
1971
+ "sps": 2391.4890715912597
1972
+ },
1973
+ {
1974
+ "update": 1100,
1975
+ "global_step": 4505600,
1976
+ "num_episodes": 714,
1977
+ "mean_reward": 295.5940579319,
1978
+ "mean_length": 8418.82,
1979
+ "loss": -0.04633036255836487,
1980
+ "sps": 2309.370211128856
1981
+ },
1982
+ {
1983
+ "update": 1105,
1984
+ "global_step": 4526080,
1985
+ "num_episodes": 717,
1986
+ "mean_reward": 308.55377667427064,
1987
+ "mean_length": 8615.8,
1988
+ "loss": 6.805793762207031,
1989
+ "sps": 1932.5391546325443
1990
+ },
1991
+ {
1992
+ "update": 1110,
1993
+ "global_step": 4546560,
1994
+ "num_episodes": 718,
1995
+ "mean_reward": 308.8043742084503,
1996
+ "mean_length": 8615.8,
1997
+ "loss": 1.5425922870635986,
1998
+ "sps": 3048.7004653828785
1999
+ },
2000
+ {
2001
+ "update": 1115,
2002
+ "global_step": 4567040,
2003
+ "num_episodes": 723,
2004
+ "mean_reward": 303.7773459148407,
2005
+ "mean_length": 8522.71,
2006
+ "loss": 0.27959662675857544,
2007
+ "sps": 2995.9554614172757
2008
+ },
2009
+ {
2010
+ "update": 1120,
2011
+ "global_step": 4587520,
2012
+ "num_episodes": 724,
2013
+ "mean_reward": 303.2645093822479,
2014
+ "mean_length": 8522.71,
2015
+ "loss": -0.0757850706577301,
2016
+ "sps": 3148.672000633405
2017
+ },
2018
+ {
2019
+ "update": 1125,
2020
+ "global_step": 4608000,
2021
+ "num_episodes": 728,
2022
+ "mean_reward": 295.8857748699188,
2023
+ "mean_length": 8522.71,
2024
+ "loss": 0.8416875004768372,
2025
+ "sps": 1310.0236701271228
2026
+ },
2027
+ {
2028
+ "update": 1130,
2029
+ "global_step": 4628480,
2030
+ "num_episodes": 728,
2031
+ "mean_reward": 295.8857748699188,
2032
+ "mean_length": 8522.71,
2033
+ "loss": 3.5039658546447754,
2034
+ "sps": 2038.3808422065272
2035
+ },
2036
+ {
2037
+ "update": 1135,
2038
+ "global_step": 4648960,
2039
+ "num_episodes": 733,
2040
+ "mean_reward": 288.808697681427,
2041
+ "mean_length": 8383.94,
2042
+ "loss": 0.9907295107841492,
2043
+ "sps": 432.1509684368163
2044
+ },
2045
+ {
2046
+ "update": 1140,
2047
+ "global_step": 4669440,
2048
+ "num_episodes": 734,
2049
+ "mean_reward": 288.7881833076477,
2050
+ "mean_length": 8383.94,
2051
+ "loss": 5.849100112915039,
2052
+ "sps": 743.4081065939373
2053
+ },
2054
+ {
2055
+ "update": 1145,
2056
+ "global_step": 4689920,
2057
+ "num_episodes": 742,
2058
+ "mean_reward": 248.96954289913177,
2059
+ "mean_length": 8035.29,
2060
+ "loss": 5.8911590576171875,
2061
+ "sps": 439.3842370241788
2062
+ },
2063
+ {
2064
+ "update": 1150,
2065
+ "global_step": 4710400,
2066
+ "num_episodes": 743,
2067
+ "mean_reward": 247.8361682987213,
2068
+ "mean_length": 7942.3,
2069
+ "loss": 2.5104424953460693,
2070
+ "sps": 2091.825912976944
2071
+ },
2072
+ {
2073
+ "update": 1155,
2074
+ "global_step": 4730880,
2075
+ "num_episodes": 746,
2076
+ "mean_reward": 247.5757204723358,
2077
+ "mean_length": 7843.08,
2078
+ "loss": 0.02520643174648285,
2079
+ "sps": 2215.1389764020146
2080
+ },
2081
+ {
2082
+ "update": 1160,
2083
+ "global_step": 4751360,
2084
+ "num_episodes": 748,
2085
+ "mean_reward": 237.4100481700897,
2086
+ "mean_length": 7843.08,
2087
+ "loss": 3.045536756515503,
2088
+ "sps": 1354.2367583329549
2089
+ },
2090
+ {
2091
+ "update": 1165,
2092
+ "global_step": 4771840,
2093
+ "num_episodes": 753,
2094
+ "mean_reward": 254.61865887641906,
2095
+ "mean_length": 7743.82,
2096
+ "loss": 18.703535079956055,
2097
+ "sps": 1982.4466952604278
2098
+ },
2099
+ {
2100
+ "update": 1170,
2101
+ "global_step": 4792320,
2102
+ "num_episodes": 753,
2103
+ "mean_reward": 254.61865887641906,
2104
+ "mean_length": 7743.82,
2105
+ "loss": 0.33046531677246094,
2106
+ "sps": 2095.2503659093586
2107
+ },
2108
+ {
2109
+ "update": 1175,
2110
+ "global_step": 4812800,
2111
+ "num_episodes": 755,
2112
+ "mean_reward": 253.8280854511261,
2113
+ "mean_length": 7787.79,
2114
+ "loss": 3.0896127223968506,
2115
+ "sps": 422.3812735467675
2116
+ },
2117
+ {
2118
+ "update": 1180,
2119
+ "global_step": 4833280,
2120
+ "num_episodes": 758,
2121
+ "mean_reward": 264.60756063461304,
2122
+ "mean_length": 7709.59,
2123
+ "loss": -0.09283498674631119,
2124
+ "sps": 2676.2813899197204
2125
+ },
2126
+ {
2127
+ "update": 1185,
2128
+ "global_step": 4853760,
2129
+ "num_episodes": 763,
2130
+ "mean_reward": 239.50925126552582,
2131
+ "mean_length": 7671.32,
2132
+ "loss": 0.09459003806114197,
2133
+ "sps": 2538.8112544970772
2134
+ },
2135
+ {
2136
+ "update": 1190,
2137
+ "global_step": 4874240,
2138
+ "num_episodes": 763,
2139
+ "mean_reward": 239.50925126552582,
2140
+ "mean_length": 7671.32,
2141
+ "loss": 0.17775702476501465,
2142
+ "sps": 2975.422989387635
2143
+ },
2144
+ {
2145
+ "update": 1195,
2146
+ "global_step": 4894720,
2147
+ "num_episodes": 767,
2148
+ "mean_reward": 218.03613798141478,
2149
+ "mean_length": 7614.22,
2150
+ "loss": 3.9349420070648193,
2151
+ "sps": 417.34421892596595
2152
+ },
2153
+ {
2154
+ "update": 1200,
2155
+ "global_step": 4915200,
2156
+ "num_episodes": 768,
2157
+ "mean_reward": 218.03613859176636,
2158
+ "mean_length": 7614.22,
2159
+ "loss": 4.028732776641846,
2160
+ "sps": 784.4513250410687
2161
+ },
2162
+ {
2163
+ "update": 1205,
2164
+ "global_step": 4935680,
2165
+ "num_episodes": 773,
2166
+ "mean_reward": 208.71825884342195,
2167
+ "mean_length": 7713.33,
2168
+ "loss": 10.78580093383789,
2169
+ "sps": 734.3705869944031
2170
+ },
2171
+ {
2172
+ "update": 1210,
2173
+ "global_step": 4956160,
2174
+ "num_episodes": 774,
2175
+ "mean_reward": 208.632105717659,
2176
+ "mean_length": 7808.29,
2177
+ "loss": 3.5958919525146484,
2178
+ "sps": 1305.6310352836174
2179
+ },
2180
+ {
2181
+ "update": 1215,
2182
+ "global_step": 4976640,
2183
+ "num_episodes": 777,
2184
+ "mean_reward": 204.77496297359465,
2185
+ "mean_length": 7805.2,
2186
+ "loss": 3.0023584365844727,
2187
+ "sps": 2482.0292324013326
2188
+ },
2189
+ {
2190
+ "update": 1220,
2191
+ "global_step": 4997120,
2192
+ "num_episodes": 779,
2193
+ "mean_reward": 196.00456192016603,
2194
+ "mean_length": 7808.44,
2195
+ "loss": 11.420112609863281,
2196
+ "sps": 2348.8046891139184
2197
+ },
2198
+ {
2199
+ "update": 1225,
2200
+ "global_step": 5017600,
2201
+ "num_episodes": 786,
2202
+ "mean_reward": 184.82681788921357,
2203
+ "mean_length": 7520.75,
2204
+ "loss": 9.389796257019043,
2205
+ "sps": 403.47715369548195
2206
+ },
2207
+ {
2208
+ "update": 1230,
2209
+ "global_step": 5038080,
2210
+ "num_episodes": 789,
2211
+ "mean_reward": 185.08837336063385,
2212
+ "mean_length": 7350.11,
2213
+ "loss": 1.2659046649932861,
2214
+ "sps": 740.0947252642347
2215
+ },
2216
+ {
2217
+ "update": 1235,
2218
+ "global_step": 5058560,
2219
+ "num_episodes": 793,
2220
+ "mean_reward": 179.18952730178833,
2221
+ "mean_length": 7160.46,
2222
+ "loss": 0.042632922530174255,
2223
+ "sps": 2798.3588240027393
2224
+ },
2225
+ {
2226
+ "update": 1240,
2227
+ "global_step": 5079040,
2228
+ "num_episodes": 794,
2229
+ "mean_reward": 184.8410611152649,
2230
+ "mean_length": 7160.46,
2231
+ "loss": 1.8178130388259888,
2232
+ "sps": 2741.015198709958
2233
+ },
2234
+ {
2235
+ "update": 1245,
2236
+ "global_step": 5099520,
2237
+ "num_episodes": 799,
2238
+ "mean_reward": 189.7154534816742,
2239
+ "mean_length": 7117.64,
2240
+ "loss": 2.4596240520477295,
2241
+ "sps": 2848.528616655762
2242
+ },
2243
+ {
2244
+ "update": 1250,
2245
+ "global_step": 5120000,
2246
+ "num_episodes": 802,
2247
+ "mean_reward": 191.06395349502563,
2248
+ "mean_length": 7035.96,
2249
+ "loss": 3.05761981010437,
2250
+ "sps": 2957.866130229398
2251
+ },
2252
+ {
2253
+ "update": 1255,
2254
+ "global_step": 5140480,
2255
+ "num_episodes": 807,
2256
+ "mean_reward": 184.37625044822693,
2257
+ "mean_length": 6937.2,
2258
+ "loss": 8.337796211242676,
2259
+ "sps": 797.4964725831112
2260
+ },
2261
+ {
2262
+ "update": 1260,
2263
+ "global_step": 5160960,
2264
+ "num_episodes": 811,
2265
+ "mean_reward": 183.53424713134766,
2266
+ "mean_length": 6839.0,
2267
+ "loss": 4.347324371337891,
2268
+ "sps": 2885.534959076487
2269
+ },
2270
+ {
2271
+ "update": 1265,
2272
+ "global_step": 5181440,
2273
+ "num_episodes": 814,
2274
+ "mean_reward": 179.14370263576507,
2275
+ "mean_length": 6841.38,
2276
+ "loss": 0.9887397289276123,
2277
+ "sps": 2726.0079616540743
2278
+ },
2279
+ {
2280
+ "update": 1270,
2281
+ "global_step": 5201920,
2282
+ "num_episodes": 816,
2283
+ "mean_reward": 182.02527619838713,
2284
+ "mean_length": 6841.38,
2285
+ "loss": 3.567499876022339,
2286
+ "sps": 2548.7726014650657
2287
+ },
2288
+ {
2289
+ "update": 1275,
2290
+ "global_step": 5222400,
2291
+ "num_episodes": 818,
2292
+ "mean_reward": 167.24890702724457,
2293
+ "mean_length": 6841.38,
2294
+ "loss": 1.4451591968536377,
2295
+ "sps": 312.6484055556043
2296
+ },
2297
+ {
2298
+ "update": 1280,
2299
+ "global_step": 5242880,
2300
+ "num_episodes": 821,
2301
+ "mean_reward": 165.29594656467438,
2302
+ "mean_length": 6837.89,
2303
+ "loss": 1.7843670845031738,
2304
+ "sps": 570.2931373528285
2305
+ },
2306
+ {
2307
+ "update": 1285,
2308
+ "global_step": 5263360,
2309
+ "num_episodes": 828,
2310
+ "mean_reward": 152.66287196159362,
2311
+ "mean_length": 6498.54,
2312
+ "loss": 5.005251407623291,
2313
+ "sps": 272.3373610296092
2314
+ },
2315
+ {
2316
+ "update": 1290,
2317
+ "global_step": 5283840,
2318
+ "num_episodes": 830,
2319
+ "mean_reward": 153.68320658683777,
2320
+ "mean_length": 6498.54,
2321
+ "loss": 8.443866729736328,
2322
+ "sps": 292.14330020270444
2323
+ },
2324
+ {
2325
+ "update": 1295,
2326
+ "global_step": 5304320,
2327
+ "num_episodes": 831,
2328
+ "mean_reward": 157.06462443351745,
2329
+ "mean_length": 6498.54,
2330
+ "loss": 2.2017781734466553,
2331
+ "sps": 265.6513386532661
2332
+ },
2333
+ {
2334
+ "update": 1300,
2335
+ "global_step": 5324800,
2336
+ "num_episodes": 834,
2337
+ "mean_reward": 158.8159614944458,
2338
+ "mean_length": 6680.51,
2339
+ "loss": 6.665990352630615,
2340
+ "sps": 563.3260777169
2341
+ },
2342
+ {
2343
+ "update": 1305,
2344
+ "global_step": 5345280,
2345
+ "num_episodes": 840,
2346
+ "mean_reward": 145.94160705566406,
2347
+ "mean_length": 6594.4,
2348
+ "loss": 10.99843978881836,
2349
+ "sps": 1955.0578156421066
2350
+ },
2351
+ {
2352
+ "update": 1310,
2353
+ "global_step": 5365760,
2354
+ "num_episodes": 843,
2355
+ "mean_reward": 145.8064595746994,
2356
+ "mean_length": 6590.29,
2357
+ "loss": 0.6882038116455078,
2358
+ "sps": 382.4471874145512
2359
+ },
2360
+ {
2361
+ "update": 1315,
2362
+ "global_step": 5386240,
2363
+ "num_episodes": 844,
2364
+ "mean_reward": 146.0564516401291,
2365
+ "mean_length": 6590.29,
2366
+ "loss": 0.8968511819839478,
2367
+ "sps": 688.8494388200007
2368
+ },
2369
+ {
2370
+ "update": 1320,
2371
+ "global_step": 5406720,
2372
+ "num_episodes": 848,
2373
+ "mean_reward": 150.32687950611114,
2374
+ "mean_length": 6662.46,
2375
+ "loss": 25.992216110229492,
2376
+ "sps": 658.8033472373343
2377
+ },
2378
+ {
2379
+ "update": 1325,
2380
+ "global_step": 5427200,
2381
+ "num_episodes": 851,
2382
+ "mean_reward": 146.97661718845367,
2383
+ "mean_length": 6761.72,
2384
+ "loss": 3.56770658493042,
2385
+ "sps": 985.132708019695
2386
+ },
2387
+ {
2388
+ "update": 1330,
2389
+ "global_step": 5447680,
2390
+ "num_episodes": 852,
2391
+ "mean_reward": 134.18026546001434,
2392
+ "mean_length": 6664.57,
2393
+ "loss": 15.875215530395508,
2394
+ "sps": 1092.9486199541875
2395
+ },
2396
+ {
2397
+ "update": 1335,
2398
+ "global_step": 5468160,
2399
+ "num_episodes": 853,
2400
+ "mean_reward": 130.54885493755341,
2401
+ "mean_length": 6664.57,
2402
+ "loss": 6.959790229797363,
2403
+ "sps": 289.8487086871761
2404
+ },
2405
+ {
2406
+ "update": 1340,
2407
+ "global_step": 5488640,
2408
+ "num_episodes": 860,
2409
+ "mean_reward": 130.95773406505586,
2410
+ "mean_length": 6485.88,
2411
+ "loss": 20.052452087402344,
2412
+ "sps": 486.39910125898484
2413
+ },
2414
+ {
2415
+ "update": 1345,
2416
+ "global_step": 5509120,
2417
+ "num_episodes": 863,
2418
+ "mean_reward": 142.73314903259276,
2419
+ "mean_length": 6488.99,
2420
+ "loss": 10.171817779541016,
2421
+ "sps": 201.9510308985079
2422
+ },
2423
+ {
2424
+ "update": 1350,
2425
+ "global_step": 5529600,
2426
+ "num_episodes": 865,
2427
+ "mean_reward": 140.62244102954864,
2428
+ "mean_length": 6427.8,
2429
+ "loss": 1.1496386528015137,
2430
+ "sps": 635.0260794777045
2431
+ },
2432
+ {
2433
+ "update": 1355,
2434
+ "global_step": 5550080,
2435
+ "num_episodes": 865,
2436
+ "mean_reward": 140.62244102954864,
2437
+ "mean_length": 6427.8,
2438
+ "loss": 4.384088039398193,
2439
+ "sps": 882.9404372993679
2440
+ },
2441
+ {
2442
+ "update": 1360,
2443
+ "global_step": 5570560,
2444
+ "num_episodes": 872,
2445
+ "mean_reward": 147.4697220182419,
2446
+ "mean_length": 6527.99,
2447
+ "loss": 10.650382041931152,
2448
+ "sps": 280.68259822115476
2449
+ },
2450
+ {
2451
+ "update": 1365,
2452
+ "global_step": 5591040,
2453
+ "num_episodes": 875,
2454
+ "mean_reward": 145.2843037557602,
2455
+ "mean_length": 6358.27,
2456
+ "loss": 38.430118560791016,
2457
+ "sps": 324.7223637427747
2458
+ },
2459
+ {
2460
+ "update": 1370,
2461
+ "global_step": 5611520,
2462
+ "num_episodes": 876,
2463
+ "mean_reward": 145.53429505825042,
2464
+ "mean_length": 6358.27,
2465
+ "loss": 38.706214904785156,
2466
+ "sps": 538.1413797129117
2467
+ },
2468
+ {
2469
+ "update": 1375,
2470
+ "global_step": 5632000,
2471
+ "num_episodes": 877,
2472
+ "mean_reward": 145.43353649616242,
2473
+ "mean_length": 6431.14,
2474
+ "loss": 16.998456954956055,
2475
+ "sps": 824.6403047601932
2476
+ },
2477
+ {
2478
+ "update": 1380,
2479
+ "global_step": 5652480,
2480
+ "num_episodes": 881,
2481
+ "mean_reward": 154.26934564590454,
2482
+ "mean_length": 6526.9,
2483
+ "loss": 3.5649774074554443,
2484
+ "sps": 844.5595148121038
2485
+ },
2486
+ {
2487
+ "update": 1385,
2488
+ "global_step": 5672960,
2489
+ "num_episodes": 884,
2490
+ "mean_reward": 161.71352871894837,
2491
+ "mean_length": 6550.38,
2492
+ "loss": 3.710602045059204,
2493
+ "sps": 908.754059815621
2494
+ },
2495
+ {
2496
+ "update": 1390,
2497
+ "global_step": 5693440,
2498
+ "num_episodes": 885,
2499
+ "mean_reward": 163.49456966400146,
2500
+ "mean_length": 6649.33,
2501
+ "loss": 0.4263116717338562,
2502
+ "sps": 2189.736809475642
2503
+ },
2504
+ {
2505
+ "update": 1395,
2506
+ "global_step": 5713920,
2507
+ "num_episodes": 886,
2508
+ "mean_reward": 176.25792336940765,
2509
+ "mean_length": 6738.86,
2510
+ "loss": 0.27514657378196716,
2511
+ "sps": 2997.673240659507
2512
+ },
2513
+ {
2514
+ "update": 1400,
2515
+ "global_step": 5734400,
2516
+ "num_episodes": 889,
2517
+ "mean_reward": 178.37687824249267,
2518
+ "mean_length": 6926.31,
2519
+ "loss": 0.08463311195373535,
2520
+ "sps": 3081.918148797146
2521
  }
2522
  ]