JoshuaFreeman commited on
Commit
939547d
·
verified ·
1 Parent(s): 490c9fe

Upload training_log.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. training_log.json +326 -2702
training_log.json CHANGED
@@ -3,2970 +3,594 @@
3
  "update": 5,
4
  "global_step": 20480,
5
  "num_episodes": 12,
6
- "mean_reward": 2.9954163233439126,
7
- "mean_length": 301.75,
8
- "loss": 0.4707077145576477,
9
- "sps": 2897.2289066048047
10
  },
11
  {
12
  "update": 10,
13
  "global_step": 40960,
14
  "num_episodes": 12,
15
- "mean_reward": 2.9954163233439126,
16
- "mean_length": 301.75,
17
- "loss": 1.0232033729553223,
18
- "sps": 2951.2458688255706
19
  },
20
  {
21
  "update": 15,
22
  "global_step": 61440,
23
  "num_episodes": 12,
24
- "mean_reward": 2.9954163233439126,
25
- "mean_length": 301.75,
26
- "loss": 1.3049204349517822,
27
- "sps": 2822.044769995029
28
  },
29
  {
30
  "update": 20,
31
  "global_step": 81920,
32
- "num_episodes": 16,
33
- "mean_reward": 6.446762174367905,
34
- "mean_length": 2105.625,
35
- "loss": 3.682100772857666,
36
- "sps": 2013.3169607307623
37
  },
38
  {
39
  "update": 25,
40
  "global_step": 102400,
41
- "num_episodes": 24,
42
- "mean_reward": 14.11154419183731,
43
- "mean_length": 3507.7083333333335,
44
- "loss": 0.5709385275840759,
45
- "sps": 2704.0544416242456
46
  },
47
  {
48
  "update": 30,
49
  "global_step": 122880,
50
- "num_episodes": 24,
51
- "mean_reward": 14.11154419183731,
52
- "mean_length": 3507.7083333333335,
53
- "loss": 0.9411047697067261,
54
- "sps": 2703.677829940517
55
  },
56
  {
57
  "update": 35,
58
  "global_step": 143360,
59
- "num_episodes": 24,
60
- "mean_reward": 14.11154419183731,
61
- "mean_length": 3507.7083333333335,
62
- "loss": 0.18034808337688446,
63
- "sps": 2660.7833455377177
64
  },
65
  {
66
  "update": 40,
67
  "global_step": 163840,
68
- "num_episodes": 29,
69
- "mean_reward": 33.75182229074939,
70
- "mean_length": 4627.068965517241,
71
- "loss": 0.449077308177948,
72
- "sps": 781.6860032595372
73
  },
74
  {
75
  "update": 45,
76
  "global_step": 184320,
77
- "num_episodes": 43,
78
- "mean_reward": 31.471195509267407,
79
- "mean_length": 3927.2093023255816,
80
- "loss": 2.4075186252593994,
81
- "sps": 1640.7038600574003
82
  },
83
  {
84
  "update": 50,
85
  "global_step": 204800,
86
- "num_episodes": 43,
87
- "mean_reward": 31.471195509267407,
88
- "mean_length": 3927.2093023255816,
89
- "loss": 0.7558565735816956,
90
- "sps": 2296.3021079759415
91
  },
92
  {
93
  "update": 55,
94
  "global_step": 225280,
95
- "num_episodes": 43,
96
- "mean_reward": 31.471195509267407,
97
- "mean_length": 3927.2093023255816,
98
- "loss": 0.004491906613111496,
99
- "sps": 1918.4604744440246
100
  },
101
  {
102
  "update": 60,
103
  "global_step": 245760,
104
- "num_episodes": 45,
105
- "mean_reward": 33.261741065979,
106
- "mean_length": 4197.111111111111,
107
- "loss": 0.35625988245010376,
108
- "sps": 2061.6452443134217
109
  },
110
  {
111
  "update": 65,
112
  "global_step": 266240,
113
- "num_episodes": 56,
114
- "mean_reward": 39.252455030168804,
115
- "mean_length": 4465.803571428572,
116
- "loss": 0.26919102668762207,
117
- "sps": 2690.170793323002
118
  },
119
  {
120
  "update": 70,
121
  "global_step": 286720,
122
- "num_episodes": 56,
123
- "mean_reward": 39.252455030168804,
124
- "mean_length": 4465.803571428572,
125
- "loss": 0.1174827367067337,
126
- "sps": 2143.112234819028
127
  },
128
  {
129
  "update": 75,
130
  "global_step": 307200,
131
- "num_episodes": 56,
132
- "mean_reward": 39.252455030168804,
133
- "mean_length": 4465.803571428572,
134
- "loss": -0.09781338274478912,
135
- "sps": 2208.779203581181
136
  },
137
  {
138
  "update": 80,
139
  "global_step": 327680,
140
- "num_episodes": 62,
141
- "mean_reward": 38.99283899030378,
142
- "mean_length": 4527.532258064516,
143
- "loss": 1.440595030784607,
144
- "sps": 655.150017301736
145
  },
146
  {
147
  "update": 85,
148
  "global_step": 348160,
149
- "num_episodes": 84,
150
- "mean_reward": 36.50451924119677,
151
- "mean_length": 4002.7380952380954,
152
- "loss": 1.0692782402038574,
153
- "sps": 1057.9744994181403
154
  },
155
  {
156
  "update": 90,
157
  "global_step": 368640,
158
- "num_episodes": 86,
159
- "mean_reward": 35.66498793003171,
160
- "mean_length": 3935.0232558139537,
161
- "loss": -0.0837952047586441,
162
- "sps": 2123.204801099428
163
  },
164
  {
165
  "update": 95,
166
  "global_step": 389120,
167
- "num_episodes": 86,
168
- "mean_reward": 35.66498793003171,
169
- "mean_length": 3935.0232558139537,
170
- "loss": 0.02618391066789627,
171
- "sps": 1904.8344116389594
172
  },
173
  {
174
  "update": 100,
175
  "global_step": 409600,
176
- "num_episodes": 88,
177
- "mean_reward": 35.91715085506439,
178
- "mean_length": 4072.8636363636365,
179
- "loss": -0.004935186356306076,
180
- "sps": 1928.296801932442
181
  },
182
  {
183
  "update": 105,
184
  "global_step": 430080,
185
- "num_episodes": 100,
186
- "mean_reward": 33.11026698112488,
187
- "mean_length": 4118.01,
188
- "loss": 1.716380000114441,
189
- "sps": 755.5801175010796
190
  },
191
  {
192
  "update": 110,
193
  "global_step": 450560,
194
- "num_episodes": 102,
195
- "mean_reward": 33.19641512870788,
196
- "mean_length": 4218.5,
197
- "loss": 0.0785534530878067,
198
- "sps": 2175.9855223665277
199
  },
200
  {
201
  "update": 115,
202
  "global_step": 471040,
203
- "num_episodes": 102,
204
- "mean_reward": 33.19641512870788,
205
- "mean_length": 4218.5,
206
- "loss": -0.07273395359516144,
207
- "sps": 1787.2507488200727
208
  },
209
  {
210
  "update": 120,
211
  "global_step": 491520,
212
- "num_episodes": 103,
213
- "mean_reward": 33.58080266952515,
214
- "mean_length": 4314.65,
215
- "loss": -0.026257965713739395,
216
- "sps": 1805.9278386526794
217
  },
218
  {
219
  "update": 125,
220
  "global_step": 512000,
221
- "num_episodes": 110,
222
- "mean_reward": 37.310110912323,
223
- "mean_length": 4794.35,
224
- "loss": 1.8378688097000122,
225
- "sps": 1715.9048394677898
226
  },
227
  {
228
  "update": 130,
229
  "global_step": 532480,
230
- "num_episodes": 115,
231
- "mean_reward": 38.11976575374603,
232
- "mean_length": 4792.66,
233
- "loss": 0.0826515257358551,
234
- "sps": 2480.658396587235
235
  },
236
  {
237
  "update": 135,
238
  "global_step": 552960,
239
- "num_episodes": 115,
240
- "mean_reward": 38.11976575374603,
241
- "mean_length": 4792.66,
242
- "loss": -0.058959588408470154,
243
- "sps": 2558.7887965900827
244
  },
245
  {
246
  "update": 140,
247
  "global_step": 573440,
248
- "num_episodes": 117,
249
- "mean_reward": 40.18981074810028,
250
- "mean_length": 4792.66,
251
- "loss": 0.01938764378428459,
252
- "sps": 2511.6128893797145
253
  },
254
  {
255
  "update": 145,
256
  "global_step": 593920,
257
  "num_episodes": 124,
258
- "mean_reward": 51.76217389583588,
259
- "mean_length": 4803.44,
260
- "loss": 20.378520965576172,
261
- "sps": 909.3232748780807
262
  },
263
  {
264
  "update": 150,
265
  "global_step": 614400,
266
- "num_episodes": 126,
267
- "mean_reward": 51.032718253135684,
268
- "mean_length": 4803.44,
269
- "loss": 0.11053133755922318,
270
- "sps": 2492.3194028833077
271
  },
272
  {
273
  "update": 155,
274
  "global_step": 634880,
275
- "num_episodes": 126,
276
- "mean_reward": 51.032718253135684,
277
- "mean_length": 4803.44,
278
- "loss": 0.31901270151138306,
279
- "sps": 2322.6918424224104
280
  },
281
  {
282
  "update": 160,
283
  "global_step": 655360,
284
- "num_episodes": 129,
285
- "mean_reward": 49.292411608695986,
286
- "mean_length": 4705.26,
287
- "loss": 0.6963619589805603,
288
- "sps": 1560.7635575105323
289
  },
290
  {
291
  "update": 165,
292
  "global_step": 675840,
293
- "num_episodes": 138,
294
- "mean_reward": 51.79567018985748,
295
- "mean_length": 5005.94,
296
- "loss": 3.423638343811035,
297
- "sps": 1566.1339446711113
298
  },
299
  {
300
  "update": 170,
301
  "global_step": 696320,
302
- "num_episodes": 139,
303
- "mean_reward": 52.29565020084381,
304
- "mean_length": 5005.94,
305
- "loss": 5.795687675476074,
306
- "sps": 1548.0157210393158
307
  },
308
  {
309
  "update": 175,
310
  "global_step": 716800,
311
- "num_episodes": 139,
312
- "mean_reward": 52.29565020084381,
313
- "mean_length": 5005.94,
314
- "loss": 11.365696907043457,
315
- "sps": 1678.7844476146372
316
  },
317
  {
318
  "update": 180,
319
  "global_step": 737280,
320
- "num_episodes": 140,
321
- "mean_reward": 53.179930477142335,
322
- "mean_length": 5098.77,
323
- "loss": -0.05775236710906029,
324
- "sps": 1668.2296340431747
325
  },
326
  {
327
  "update": 185,
328
  "global_step": 757760,
329
- "num_episodes": 149,
330
- "mean_reward": 70.75448143482208,
331
- "mean_length": 5195.77,
332
- "loss": 73.1705322265625,
333
- "sps": 858.1752320250257
334
  },
335
  {
336
  "update": 190,
337
  "global_step": 778240,
338
- "num_episodes": 150,
339
- "mean_reward": 71.51151569843292,
340
- "mean_length": 5295.07,
341
- "loss": 0.24444621801376343,
342
- "sps": 1363.6491070768768
343
  },
344
  {
345
  "update": 195,
346
  "global_step": 798720,
347
- "num_episodes": 150,
348
- "mean_reward": 71.51151569843292,
349
- "mean_length": 5295.07,
350
- "loss": 0.8110665082931519,
351
- "sps": 1453.3363954580439
352
  },
353
  {
354
  "update": 200,
355
  "global_step": 819200,
356
- "num_episodes": 151,
357
- "mean_reward": 72.04212862491607,
358
- "mean_length": 5392.97,
359
- "loss": 0.1485036462545395,
360
- "sps": 1399.4723333291518
361
  },
362
  {
363
  "update": 205,
364
  "global_step": 839680,
365
- "num_episodes": 157,
366
- "mean_reward": 98.28625455379486,
367
- "mean_length": 5585.11,
368
- "loss": 23.002685546875,
369
- "sps": 240.5853655120997
370
  },
371
  {
372
  "update": 210,
373
  "global_step": 860160,
374
- "num_episodes": 160,
375
- "mean_reward": 99.03613055229187,
376
- "mean_length": 5599.07,
377
- "loss": 56.14109802246094,
378
- "sps": 963.6583162438803
379
  },
380
  {
381
  "update": 215,
382
  "global_step": 880640,
383
- "num_episodes": 160,
384
- "mean_reward": 99.03613055229187,
385
- "mean_length": 5599.07,
386
- "loss": 0.288861483335495,
387
- "sps": 1135.1194345027134
388
  },
389
  {
390
  "update": 220,
391
  "global_step": 901120,
392
- "num_episodes": 162,
393
- "mean_reward": 100.13229321479797,
394
- "mean_length": 5596.55,
395
- "loss": 0.12560202181339264,
396
- "sps": 1065.000301525082
397
  },
398
  {
399
  "update": 225,
400
  "global_step": 921600,
401
- "num_episodes": 173,
402
- "mean_reward": 126.59835072040558,
403
- "mean_length": 5771.42,
404
- "loss": 18.769105911254883,
405
- "sps": 484.9667540902585
406
  },
407
  {
408
  "update": 230,
409
  "global_step": 942080,
410
- "num_episodes": 175,
411
- "mean_reward": 133.06522512435913,
412
- "mean_length": 5865.35,
413
- "loss": 2.0742053985595703,
414
- "sps": 485.5770281538178
415
  },
416
  {
417
  "update": 235,
418
  "global_step": 962560,
419
- "num_episodes": 175,
420
- "mean_reward": 133.06522512435913,
421
- "mean_length": 5865.35,
422
- "loss": 1.1434893608093262,
423
- "sps": 476.861101571966
424
  },
425
  {
426
  "update": 240,
427
  "global_step": 983040,
428
- "num_episodes": 177,
429
- "mean_reward": 136.24841757774354,
430
- "mean_length": 6062.18,
431
- "loss": 2.5528671741485596,
432
- "sps": 519.697272037427
433
  },
434
  {
435
  "update": 245,
436
  "global_step": 1003520,
437
- "num_episodes": 181,
438
- "mean_reward": 162.3702562904358,
439
- "mean_length": 6350.36,
440
- "loss": 2.4525704383850098,
441
- "sps": 2612.0970180967397
442
  },
443
  {
444
  "update": 250,
445
  "global_step": 1024000,
446
- "num_episodes": 183,
447
- "mean_reward": 163.440665807724,
448
- "mean_length": 6546.01,
449
- "loss": 1.370378017425537,
450
- "sps": 1843.3577332235466
451
  },
452
  {
453
  "update": 255,
454
  "global_step": 1044480,
455
- "num_episodes": 183,
456
- "mean_reward": 163.440665807724,
457
- "mean_length": 6546.01,
458
- "loss": 3.996934652328491,
459
- "sps": 1664.629871283792
460
  },
461
  {
462
  "update": 260,
463
  "global_step": 1064960,
464
- "num_episodes": 188,
465
- "mean_reward": 164.5874024486542,
466
- "mean_length": 6629.5,
467
- "loss": 38.7066535949707,
468
- "sps": 275.2025358552949
469
  },
470
  {
471
  "update": 265,
472
  "global_step": 1085440,
473
- "num_episodes": 195,
474
- "mean_reward": 178.587217502594,
475
- "mean_length": 6716.14,
476
- "loss": 48.47567367553711,
477
- "sps": 202.49850145253117
478
  },
479
  {
480
  "update": 270,
481
  "global_step": 1105920,
482
- "num_episodes": 195,
483
- "mean_reward": 178.587217502594,
484
- "mean_length": 6716.14,
485
- "loss": 65.4755859375,
486
- "sps": 336.3470793114214
487
  },
488
  {
489
  "update": 275,
490
  "global_step": 1126400,
491
- "num_episodes": 195,
492
- "mean_reward": 178.587217502594,
493
- "mean_length": 6716.14,
494
- "loss": 122.97542572021484,
495
- "sps": 364.01088862747804
496
  },
497
  {
498
  "update": 280,
499
  "global_step": 1146880,
500
- "num_episodes": 200,
501
- "mean_reward": 217.83971637248993,
502
- "mean_length": 6897.39,
503
- "loss": 16.919631958007812,
504
- "sps": 523.4729171393346
505
  },
506
  {
507
  "update": 285,
508
  "global_step": 1167360,
509
- "num_episodes": 209,
510
- "mean_reward": 269.6603216218948,
511
- "mean_length": 6691.86,
512
- "loss": 37.87599182128906,
513
- "sps": 815.1007717041923
514
  },
515
  {
516
  "update": 290,
517
  "global_step": 1187840,
518
- "num_episodes": 209,
519
- "mean_reward": 269.6603216218948,
520
- "mean_length": 6691.86,
521
- "loss": 93.80265808105469,
522
- "sps": 677.6266786573581
523
  },
524
  {
525
  "update": 295,
526
  "global_step": 1208320,
527
- "num_episodes": 209,
528
- "mean_reward": 269.6603216218948,
529
- "mean_length": 6691.86,
530
- "loss": 2.2752561569213867,
531
- "sps": 796.2728392649855
532
  },
533
  {
534
  "update": 300,
535
  "global_step": 1228800,
536
- "num_episodes": 212,
537
- "mean_reward": 282.0548305273056,
538
- "mean_length": 6888.72,
539
- "loss": 44.09903335571289,
540
- "sps": 435.2776190122807
541
  },
542
  {
543
  "update": 305,
544
  "global_step": 1249280,
545
- "num_episodes": 218,
546
- "mean_reward": 326.81962947368623,
547
- "mean_length": 6987.94,
548
- "loss": 7.751248836517334,
549
- "sps": 605.8631845420061
550
  },
551
  {
552
  "update": 310,
553
  "global_step": 1269760,
554
- "num_episodes": 218,
555
- "mean_reward": 326.81962947368623,
556
- "mean_length": 6987.94,
557
- "loss": 1.2491610050201416,
558
- "sps": 609.0143093574485
559
  },
560
  {
561
  "update": 315,
562
  "global_step": 1290240,
563
- "num_episodes": 218,
564
- "mean_reward": 326.81962947368623,
565
- "mean_length": 6987.94,
566
- "loss": 0.18734177947044373,
567
- "sps": 969.6236925007502
568
  },
569
  {
570
  "update": 320,
571
  "global_step": 1310720,
572
- "num_episodes": 222,
573
- "mean_reward": 354.5139421653748,
574
- "mean_length": 6981.29,
575
- "loss": 5.763525009155273,
576
- "sps": 471.35135085304955
577
  },
578
  {
579
  "update": 325,
580
  "global_step": 1331200,
581
- "num_episodes": 235,
582
- "mean_reward": 373.75524856090544,
583
- "mean_length": 6657.2,
584
- "loss": 8.977479934692383,
585
- "sps": 286.61651802589716
586
  },
587
  {
588
  "update": 330,
589
  "global_step": 1351680,
590
- "num_episodes": 235,
591
- "mean_reward": 373.75524856090544,
592
- "mean_length": 6657.2,
593
- "loss": 4.4195051193237305,
594
- "sps": 540.9233917238028
595
- },
596
- {
597
- "update": 335,
598
- "global_step": 1372160,
599
- "num_episodes": 235,
600
- "mean_reward": 373.75524856090544,
601
- "mean_length": 6657.2,
602
- "loss": 1.350094199180603,
603
- "sps": 577.9686556262599
604
- },
605
- {
606
- "update": 340,
607
- "global_step": 1392640,
608
- "num_episodes": 240,
609
- "mean_reward": 407.44437908649445,
610
- "mean_length": 6657.92,
611
- "loss": 32.73158264160156,
612
- "sps": 852.1673008615437
613
- },
614
- {
615
- "update": 345,
616
- "global_step": 1413120,
617
- "num_episodes": 250,
618
- "mean_reward": 424.2497792673111,
619
- "mean_length": 6448.66,
620
- "loss": 40.15397262573242,
621
- "sps": 149.27274368511317
622
- },
623
- {
624
- "update": 350,
625
- "global_step": 1433600,
626
- "num_episodes": 250,
627
- "mean_reward": 424.2497792673111,
628
- "mean_length": 6448.66,
629
- "loss": 1.2196906805038452,
630
- "sps": 385.0138381705491
631
- },
632
- {
633
- "update": 355,
634
- "global_step": 1454080,
635
- "num_episodes": 250,
636
- "mean_reward": 424.2497792673111,
637
- "mean_length": 6448.66,
638
- "loss": 0.6893861889839172,
639
- "sps": 364.4585616900348
640
- },
641
- {
642
- "update": 360,
643
- "global_step": 1474560,
644
- "num_episodes": 255,
645
- "mean_reward": 406.7418563890457,
646
- "mean_length": 6349.6,
647
- "loss": 21.265850067138672,
648
- "sps": 309.03058801935407
649
- },
650
- {
651
- "update": 365,
652
- "global_step": 1495040,
653
- "num_episodes": 261,
654
- "mean_reward": 472.512622590065,
655
- "mean_length": 6333.64,
656
- "loss": 1.368093490600586,
657
- "sps": 1030.3650029243877
658
- },
659
- {
660
- "update": 370,
661
- "global_step": 1515520,
662
- "num_episodes": 261,
663
- "mean_reward": 472.512622590065,
664
- "mean_length": 6333.64,
665
- "loss": 0.6811725497245789,
666
- "sps": 988.2291227242658
667
- },
668
- {
669
- "update": 375,
670
- "global_step": 1536000,
671
- "num_episodes": 261,
672
- "mean_reward": 472.512622590065,
673
- "mean_length": 6333.64,
674
- "loss": 0.5682379603385925,
675
- "sps": 972.3493096974811
676
- },
677
- {
678
- "update": 380,
679
- "global_step": 1556480,
680
- "num_episodes": 272,
681
- "mean_reward": 484.873091211319,
682
- "mean_length": 6237.44,
683
- "loss": 3.223947048187256,
684
- "sps": 2004.102630433895
685
- },
686
- {
687
- "update": 385,
688
- "global_step": 1576960,
689
- "num_episodes": 277,
690
- "mean_reward": 478.27076225280763,
691
- "mean_length": 6236.56,
692
- "loss": 1.7760697603225708,
693
- "sps": 2832.6289658137994
694
- },
695
- {
696
- "update": 390,
697
- "global_step": 1597440,
698
- "num_episodes": 277,
699
- "mean_reward": 478.27076225280763,
700
- "mean_length": 6236.56,
701
- "loss": -0.11427821964025497,
702
- "sps": 2603.8164819807103
703
- },
704
- {
705
- "update": 395,
706
- "global_step": 1617920,
707
- "num_episodes": 279,
708
- "mean_reward": 462.21016893386843,
709
- "mean_length": 6137.5,
710
- "loss": 0.8288934230804443,
711
- "sps": 2392.5538344727297
712
- },
713
- {
714
- "update": 400,
715
- "global_step": 1638400,
716
- "num_episodes": 284,
717
- "mean_reward": 454.1959769916534,
718
- "mean_length": 6037.93,
719
- "loss": 202.71636962890625,
720
- "sps": 355.5497429467541
721
- },
722
- {
723
- "update": 405,
724
- "global_step": 1658880,
725
- "num_episodes": 289,
726
- "mean_reward": 454.0530729818344,
727
- "mean_length": 6035.23,
728
- "loss": 0.2099057286977768,
729
- "sps": 869.6302546973046
730
- },
731
- {
732
- "update": 410,
733
- "global_step": 1679360,
734
- "num_episodes": 289,
735
- "mean_reward": 454.0530729818344,
736
- "mean_length": 6035.23,
737
- "loss": 0.018864750862121582,
738
- "sps": 837.2921854213746
739
- },
740
- {
741
- "update": 415,
742
- "global_step": 1699840,
743
- "num_episodes": 290,
744
- "mean_reward": 454.8029096364975,
745
- "mean_length": 6035.23,
746
- "loss": 0.08915512263774872,
747
- "sps": 836.5661659973193
748
- },
749
- {
750
- "update": 420,
751
- "global_step": 1720320,
752
- "num_episodes": 294,
753
- "mean_reward": 490.9899611520767,
754
- "mean_length": 6134.91,
755
- "loss": 9.022570610046387,
756
- "sps": 1353.2280787984882
757
- },
758
- {
759
- "update": 425,
760
- "global_step": 1740800,
761
- "num_episodes": 299,
762
- "mean_reward": 470.29952450752256,
763
- "mean_length": 6136.24,
764
- "loss": 1.4281823635101318,
765
- "sps": 731.0193791394075
766
- },
767
- {
768
- "update": 430,
769
- "global_step": 1761280,
770
- "num_episodes": 299,
771
- "mean_reward": 470.29952450752256,
772
- "mean_length": 6136.24,
773
- "loss": 0.12717652320861816,
774
- "sps": 972.3142548127942
775
- },
776
- {
777
- "update": 435,
778
- "global_step": 1781760,
779
- "num_episodes": 300,
780
- "mean_reward": 470.8291971683502,
781
- "mean_length": 6235.51,
782
- "loss": 1.772779107093811,
783
- "sps": 955.1582152126839
784
- },
785
- {
786
- "update": 440,
787
- "global_step": 1802240,
788
- "num_episodes": 305,
789
- "mean_reward": 467.37178787708285,
790
- "mean_length": 6335.05,
791
- "loss": 45.16695022583008,
792
- "sps": 263.8764177763192
793
- },
794
- {
795
- "update": 445,
796
- "global_step": 1822720,
797
- "num_episodes": 310,
798
- "mean_reward": 468.54246531009676,
799
- "mean_length": 6353.87,
800
- "loss": 45.204833984375,
801
- "sps": 765.4018779843883
802
- },
803
- {
804
- "update": 450,
805
- "global_step": 1843200,
806
- "num_episodes": 310,
807
- "mean_reward": 468.54246531009676,
808
- "mean_length": 6353.87,
809
- "loss": 17.418542861938477,
810
- "sps": 716.3527304338312
811
- },
812
- {
813
- "update": 455,
814
- "global_step": 1863680,
815
- "num_episodes": 315,
816
- "mean_reward": 448.0337993764877,
817
- "mean_length": 6059.53,
818
- "loss": 35.471492767333984,
819
- "sps": 752.6285328468343
820
- },
821
- {
822
- "update": 460,
823
- "global_step": 1884160,
824
- "num_episodes": 323,
825
- "mean_reward": 390.5069852733612,
826
- "mean_length": 5868.34,
827
- "loss": 22.768428802490234,
828
- "sps": 1442.0900075832017
829
- },
830
- {
831
- "update": 465,
832
- "global_step": 1904640,
833
- "num_episodes": 325,
834
- "mean_reward": 391.32213854789734,
835
- "mean_length": 6066.81,
836
- "loss": 1.743547797203064,
837
- "sps": 2177.279445045517
838
- },
839
- {
840
- "update": 470,
841
- "global_step": 1925120,
842
- "num_episodes": 325,
843
- "mean_reward": 391.32213854789734,
844
- "mean_length": 6066.81,
845
- "loss": -0.12528716027736664,
846
- "sps": 2128.448401830331
847
- },
848
- {
849
- "update": 475,
850
- "global_step": 1945600,
851
- "num_episodes": 326,
852
- "mean_reward": 386.4432405376434,
853
- "mean_length": 6066.81,
854
- "loss": -0.0739293247461319,
855
- "sps": 2048.6650107889627
856
- },
857
- {
858
- "update": 480,
859
- "global_step": 1966080,
860
- "num_episodes": 331,
861
- "mean_reward": 395.79872769355774,
862
- "mean_length": 6362.12,
863
- "loss": 1.8403631448745728,
864
- "sps": 1177.4940640961918
865
- },
866
- {
867
- "update": 485,
868
- "global_step": 1986560,
869
- "num_episodes": 333,
870
- "mean_reward": 396.0874492454529,
871
- "mean_length": 6461.8,
872
- "loss": 21.89581298828125,
873
- "sps": 700.649536593067
874
- },
875
- {
876
- "update": 490,
877
- "global_step": 2007040,
878
- "num_episodes": 333,
879
- "mean_reward": 396.0874492454529,
880
- "mean_length": 6461.8,
881
- "loss": 17.010061264038086,
882
- "sps": 728.107784506257
883
- },
884
- {
885
- "update": 495,
886
- "global_step": 2027520,
887
- "num_episodes": 338,
888
- "mean_reward": 396.22795082092284,
889
- "mean_length": 6560.14,
890
- "loss": 73.3307876586914,
891
- "sps": 359.9910748355061
892
- },
893
- {
894
- "update": 500,
895
- "global_step": 2048000,
896
- "num_episodes": 347,
897
- "mean_reward": 393.5666734457016,
898
- "mean_length": 6464.03,
899
- "loss": 51.052146911621094,
900
- "sps": 186.4407003799165
901
- },
902
- {
903
- "update": 505,
904
- "global_step": 2068480,
905
- "num_episodes": 353,
906
- "mean_reward": 363.4766788816452,
907
- "mean_length": 6267.46,
908
- "loss": 10.636628150939941,
909
- "sps": 391.3156150286931
910
- },
911
- {
912
- "update": 510,
913
- "global_step": 2088960,
914
- "num_episodes": 353,
915
- "mean_reward": 363.4766788816452,
916
- "mean_length": 6267.46,
917
- "loss": 5.408469200134277,
918
- "sps": 392.291526612045
919
- },
920
- {
921
- "update": 515,
922
- "global_step": 2109440,
923
- "num_episodes": 356,
924
- "mean_reward": 340.8062537956238,
925
- "mean_length": 6159.0,
926
- "loss": 0.881823718547821,
927
- "sps": 887.267479612318
928
- },
929
- {
930
- "update": 520,
931
- "global_step": 2129920,
932
- "num_episodes": 368,
933
- "mean_reward": 318.0571813583374,
934
- "mean_length": 5973.21,
935
- "loss": 29.853599548339844,
936
- "sps": 776.9091606744039
937
- },
938
- {
939
- "update": 525,
940
- "global_step": 2150400,
941
- "num_episodes": 375,
942
- "mean_reward": 277.7464536857605,
943
- "mean_length": 5779.94,
944
- "loss": 0.9935208559036255,
945
- "sps": 2837.3122923609985
946
- },
947
- {
948
- "update": 530,
949
- "global_step": 2170880,
950
- "num_episodes": 375,
951
- "mean_reward": 277.7464536857605,
952
- "mean_length": 5779.94,
953
- "loss": 0.16118717193603516,
954
- "sps": 2674.823411889354
955
- },
956
- {
957
- "update": 535,
958
- "global_step": 2191360,
959
- "num_episodes": 379,
960
- "mean_reward": 276.7646808052063,
961
- "mean_length": 5783.77,
962
- "loss": 0.6545084118843079,
963
- "sps": 2718.7471360755962
964
- },
965
- {
966
- "update": 540,
967
- "global_step": 2211840,
968
- "num_episodes": 387,
969
- "mean_reward": 273.41629776477816,
970
- "mean_length": 5491.19,
971
- "loss": 0.009996440261602402,
972
- "sps": 2662.424914783932
973
- },
974
- {
975
- "update": 545,
976
- "global_step": 2232320,
977
- "num_episodes": 393,
978
- "mean_reward": 237.44847791194917,
979
- "mean_length": 5288.65,
980
- "loss": -0.07593612372875214,
981
- "sps": 2721.3806445317573
982
- },
983
- {
984
- "update": 550,
985
- "global_step": 2252800,
986
- "num_episodes": 393,
987
- "mean_reward": 237.44847791194917,
988
- "mean_length": 5288.65,
989
- "loss": -0.013108465820550919,
990
- "sps": 2496.4586032280413
991
- },
992
- {
993
- "update": 555,
994
- "global_step": 2273280,
995
- "num_episodes": 397,
996
- "mean_reward": 214.58075400829316,
997
- "mean_length": 5193.33,
998
- "loss": 0.026919692754745483,
999
- "sps": 2688.731731235585
1000
- },
1001
- {
1002
- "update": 560,
1003
- "global_step": 2293760,
1004
- "num_episodes": 407,
1005
- "mean_reward": 159.23116432189943,
1006
- "mean_length": 4793.65,
1007
- "loss": 2.2952957153320312,
1008
- "sps": 966.333276372859
1009
- },
1010
- {
1011
- "update": 565,
1012
- "global_step": 2314240,
1013
- "num_episodes": 413,
1014
- "mean_reward": 159.70496647834779,
1015
- "mean_length": 4791.54,
1016
- "loss": 1.3611968755722046,
1017
- "sps": 2550.010825605688
1018
- },
1019
- {
1020
- "update": 570,
1021
- "global_step": 2334720,
1022
- "num_episodes": 413,
1023
- "mean_reward": 159.70496647834779,
1024
- "mean_length": 4791.54,
1025
- "loss": 1.439621090888977,
1026
- "sps": 2484.751376024715
1027
- },
1028
- {
1029
- "update": 575,
1030
- "global_step": 2355200,
1031
- "num_episodes": 417,
1032
- "mean_reward": 158.6872748184204,
1033
- "mean_length": 4786.52,
1034
- "loss": 12.480733871459961,
1035
- "sps": 428.08037217860056
1036
- },
1037
- {
1038
- "update": 580,
1039
- "global_step": 2375680,
1040
- "num_episodes": 424,
1041
- "mean_reward": 126.5506838798523,
1042
- "mean_length": 4703.64,
1043
- "loss": 12.992765426635742,
1044
- "sps": 1524.6391897022352
1045
- },
1046
- {
1047
- "update": 585,
1048
- "global_step": 2396160,
1049
- "num_episodes": 427,
1050
- "mean_reward": 131.45039747238158,
1051
- "mean_length": 4703.64,
1052
- "loss": 11.845376968383789,
1053
- "sps": 487.7409629471044
1054
- },
1055
- {
1056
- "update": 590,
1057
- "global_step": 2416640,
1058
- "num_episodes": 427,
1059
- "mean_reward": 131.45039747238158,
1060
- "mean_length": 4703.64,
1061
- "loss": 0.3306816816329956,
1062
- "sps": 906.10311953954
1063
- },
1064
- {
1065
- "update": 595,
1066
- "global_step": 2437120,
1067
- "num_episodes": 429,
1068
- "mean_reward": 123.15537308216095,
1069
- "mean_length": 4604.79,
1070
- "loss": 0.30884307622909546,
1071
- "sps": 938.566230501384
1072
- },
1073
- {
1074
- "update": 600,
1075
- "global_step": 2457600,
1076
- "num_episodes": 437,
1077
- "mean_reward": 133.3434053325653,
1078
- "mean_length": 4405.56,
1079
- "loss": 0.21275369822978973,
1080
- "sps": 1018.040863452185
1081
- },
1082
- {
1083
- "update": 605,
1084
- "global_step": 2478080,
1085
- "num_episodes": 442,
1086
- "mean_reward": 171.27508597373964,
1087
- "mean_length": 4501.98,
1088
- "loss": 0.47754910588264465,
1089
- "sps": 2289.461146828872
1090
- },
1091
- {
1092
- "update": 610,
1093
- "global_step": 2498560,
1094
- "num_episodes": 442,
1095
- "mean_reward": 171.27508597373964,
1096
- "mean_length": 4501.98,
1097
- "loss": 0.5260030627250671,
1098
- "sps": 2191.020876073232
1099
- },
1100
- {
1101
- "update": 615,
1102
- "global_step": 2519040,
1103
- "num_episodes": 446,
1104
- "mean_reward": 134.8440179824829,
1105
- "mean_length": 4406.13,
1106
- "loss": 0.03903695195913315,
1107
- "sps": 2094.500891632649
1108
- },
1109
- {
1110
- "update": 620,
1111
- "global_step": 2539520,
1112
- "num_episodes": 462,
1113
- "mean_reward": 108.21314854621887,
1114
- "mean_length": 4325.47,
1115
- "loss": 7.972282409667969,
1116
- "sps": 1843.9384231501685
1117
- },
1118
- {
1119
- "update": 625,
1120
- "global_step": 2560000,
1121
- "num_episodes": 467,
1122
- "mean_reward": 103.93524898529053,
1123
- "mean_length": 4323.9,
1124
- "loss": 0.2857341468334198,
1125
- "sps": 2844.0073567316244
1126
- },
1127
- {
1128
- "update": 630,
1129
- "global_step": 2580480,
1130
- "num_episodes": 467,
1131
- "mean_reward": 103.93524898529053,
1132
- "mean_length": 4323.9,
1133
- "loss": -0.0014126598834991455,
1134
- "sps": 2662.6258687696713
1135
- },
1136
- {
1137
- "update": 635,
1138
- "global_step": 2600960,
1139
- "num_episodes": 469,
1140
- "mean_reward": 105.16953000545502,
1141
- "mean_length": 4425.22,
1142
- "loss": -0.11831197887659073,
1143
- "sps": 2712.08306677995
1144
- },
1145
- {
1146
- "update": 640,
1147
- "global_step": 2621440,
1148
- "num_episodes": 474,
1149
- "mean_reward": 110.30714406490326,
1150
- "mean_length": 4816.19,
1151
- "loss": 1.1699683666229248,
1152
- "sps": 2190.63868238328
1153
- },
1154
- {
1155
- "update": 645,
1156
- "global_step": 2641920,
1157
- "num_episodes": 478,
1158
- "mean_reward": 115.03492943286896,
1159
- "mean_length": 4720.8,
1160
- "loss": 0.8407511115074158,
1161
- "sps": 2477.6471668004287
1162
- },
1163
- {
1164
- "update": 650,
1165
- "global_step": 2662400,
1166
- "num_episodes": 478,
1167
- "mean_reward": 115.03492943286896,
1168
- "mean_length": 4720.8,
1169
- "loss": 0.41076016426086426,
1170
- "sps": 2375.4220186429184
1171
- },
1172
- {
1173
- "update": 655,
1174
- "global_step": 2682880,
1175
- "num_episodes": 480,
1176
- "mean_reward": 113.97553071975707,
1177
- "mean_length": 4722.63,
1178
- "loss": 1.8437230587005615,
1179
- "sps": 437.20362392139924
1180
- },
1181
- {
1182
- "update": 660,
1183
- "global_step": 2703360,
1184
- "num_episodes": 488,
1185
- "mean_reward": 121.07888405799866,
1186
- "mean_length": 4921.4,
1187
- "loss": 9.341552734375,
1188
- "sps": 433.1180294441828
1189
- },
1190
- {
1191
- "update": 665,
1192
- "global_step": 2723840,
1193
- "num_episodes": 493,
1194
- "mean_reward": 122.25540927648544,
1195
- "mean_length": 4927.03,
1196
- "loss": 3.2276527881622314,
1197
- "sps": 311.23534986570854
1198
- },
1199
- {
1200
- "update": 670,
1201
- "global_step": 2744320,
1202
- "num_episodes": 493,
1203
- "mean_reward": 122.25540927648544,
1204
- "mean_length": 4927.03,
1205
- "loss": 1.626711130142212,
1206
- "sps": 443.5605257662605
1207
- },
1208
- {
1209
- "update": 675,
1210
- "global_step": 2764800,
1211
- "num_episodes": 497,
1212
- "mean_reward": 128.49539656877516,
1213
- "mean_length": 4990.32,
1214
- "loss": 1.4737187623977661,
1215
- "sps": 557.1794951843984
1216
- },
1217
- {
1218
- "update": 680,
1219
- "global_step": 2785280,
1220
- "num_episodes": 501,
1221
- "mean_reward": 133.27627504587173,
1222
- "mean_length": 5189.29,
1223
- "loss": 3.2421000003814697,
1224
- "sps": 227.14390586968517
1225
- },
1226
- {
1227
- "update": 685,
1228
- "global_step": 2805760,
1229
- "num_episodes": 503,
1230
- "mean_reward": 134.89025341749192,
1231
- "mean_length": 5319.23,
1232
- "loss": 19.16809844970703,
1233
- "sps": 503.2482073037513
1234
- },
1235
- {
1236
- "update": 690,
1237
- "global_step": 2826240,
1238
- "num_episodes": 503,
1239
- "mean_reward": 134.89025341749192,
1240
- "mean_length": 5319.23,
1241
- "loss": 0.3121195435523987,
1242
- "sps": 1237.8635799099984
1243
- },
1244
- {
1245
- "update": 695,
1246
- "global_step": 2846720,
1247
- "num_episodes": 505,
1248
- "mean_reward": 140.31052175283432,
1249
- "mean_length": 5418.9,
1250
- "loss": 10.290376663208008,
1251
- "sps": 894.1160824447923
1252
- },
1253
- {
1254
- "update": 700,
1255
- "global_step": 2867200,
1256
- "num_episodes": 511,
1257
- "mean_reward": 142.8947832798958,
1258
- "mean_length": 5596.12,
1259
- "loss": 8.96629810333252,
1260
- "sps": 2766.323313198168
1261
- },
1262
- {
1263
- "update": 705,
1264
- "global_step": 2887680,
1265
- "num_episodes": 514,
1266
- "mean_reward": 143.8507553601265,
1267
- "mean_length": 5598.39,
1268
- "loss": -0.03336918354034424,
1269
- "sps": 2819.0861765762347
1270
- },
1271
- {
1272
- "update": 710,
1273
- "global_step": 2908160,
1274
- "num_episodes": 514,
1275
- "mean_reward": 143.8507553601265,
1276
- "mean_length": 5598.39,
1277
- "loss": 1.484508991241455,
1278
- "sps": 2804.209085249585
1279
- },
1280
- {
1281
- "update": 715,
1282
- "global_step": 2928640,
1283
- "num_episodes": 516,
1284
- "mean_reward": 146.61101296186447,
1285
- "mean_length": 5697.46,
1286
- "loss": 0.41980481147766113,
1287
- "sps": 2853.5154122563904
1288
- },
1289
- {
1290
- "update": 720,
1291
- "global_step": 2949120,
1292
- "num_episodes": 521,
1293
- "mean_reward": 159.6500442767143,
1294
- "mean_length": 5978.65,
1295
- "loss": 3.3171966075897217,
1296
- "sps": 337.7495209248038
1297
- },
1298
- {
1299
- "update": 725,
1300
- "global_step": 2969600,
1301
- "num_episodes": 523,
1302
- "mean_reward": 165.79403271436692,
1303
- "mean_length": 6078.2,
1304
- "loss": 2.299358606338501,
1305
- "sps": 488.4530631699151
1306
- },
1307
- {
1308
- "update": 730,
1309
- "global_step": 2990080,
1310
- "num_episodes": 523,
1311
- "mean_reward": 165.79403271436692,
1312
- "mean_length": 6078.2,
1313
- "loss": 2.0765156745910645,
1314
- "sps": 622.5255664645315
1315
- },
1316
- {
1317
- "update": 735,
1318
- "global_step": 3010560,
1319
- "num_episodes": 525,
1320
- "mean_reward": 165.0441936945915,
1321
- "mean_length": 6078.2,
1322
- "loss": 0.5185770988464355,
1323
- "sps": 912.7167424710788
1324
- },
1325
- {
1326
- "update": 740,
1327
- "global_step": 3031040,
1328
- "num_episodes": 529,
1329
- "mean_reward": 169.13619287729264,
1330
- "mean_length": 6177.05,
1331
- "loss": 0.8914755582809448,
1332
- "sps": 2277.7924528126773
1333
- },
1334
- {
1335
- "update": 745,
1336
- "global_step": 3051520,
1337
- "num_episodes": 531,
1338
- "mean_reward": 182.76090200662614,
1339
- "mean_length": 6177.05,
1340
- "loss": -0.03897722810506821,
1341
- "sps": 2938.7255967172237
1342
- },
1343
- {
1344
- "update": 750,
1345
- "global_step": 3072000,
1346
- "num_episodes": 531,
1347
- "mean_reward": 182.76090200662614,
1348
- "mean_length": 6177.05,
1349
- "loss": -0.07192348688840866,
1350
- "sps": 2896.8996334668986
1351
- },
1352
- {
1353
- "update": 755,
1354
- "global_step": 3092480,
1355
- "num_episodes": 535,
1356
- "mean_reward": 176.57018686056136,
1357
- "mean_length": 6276.7,
1358
- "loss": 17.123456954956055,
1359
- "sps": 1361.0160818187624
1360
- },
1361
- {
1362
- "update": 760,
1363
- "global_step": 3112960,
1364
- "num_episodes": 539,
1365
- "mean_reward": 143.45233328580855,
1366
- "mean_length": 6475.8,
1367
- "loss": 1.7440789937973022,
1368
- "sps": 431.96268578339885
1369
- },
1370
- {
1371
- "update": 765,
1372
- "global_step": 3133440,
1373
- "num_episodes": 541,
1374
- "mean_reward": 143.19214475393295,
1375
- "mean_length": 6476.28,
1376
- "loss": 1.2793841361999512,
1377
- "sps": 694.1149265905251
1378
- },
1379
- {
1380
- "update": 770,
1381
- "global_step": 3153920,
1382
- "num_episodes": 541,
1383
- "mean_reward": 143.19214475393295,
1384
- "mean_length": 6476.28,
1385
- "loss": 27.601778030395508,
1386
- "sps": 666.6380756689646
1387
- },
1388
- {
1389
- "update": 775,
1390
- "global_step": 3174400,
1391
- "num_episodes": 545,
1392
- "mean_reward": 160.25242776632308,
1393
- "mean_length": 6772.51,
1394
- "loss": 3.569728136062622,
1395
- "sps": 235.53664992575125
1396
- },
1397
- {
1398
- "update": 780,
1399
- "global_step": 3194880,
1400
- "num_episodes": 548,
1401
- "mean_reward": 161.8272612643242,
1402
- "mean_length": 6969.88,
1403
- "loss": 1.578532338142395,
1404
- "sps": 345.17499217473494
1405
- },
1406
- {
1407
- "update": 785,
1408
- "global_step": 3215360,
1409
- "num_episodes": 550,
1410
- "mean_reward": 161.4852223944664,
1411
- "mean_length": 7031.97,
1412
- "loss": 6.10168981552124,
1413
- "sps": 321.34142444031875
1414
- },
1415
- {
1416
- "update": 790,
1417
- "global_step": 3235840,
1418
- "num_episodes": 550,
1419
- "mean_reward": 161.4852223944664,
1420
- "mean_length": 7031.97,
1421
- "loss": 0.8625541925430298,
1422
- "sps": 739.6504838117347
1423
- },
1424
- {
1425
- "update": 795,
1426
- "global_step": 3256320,
1427
- "num_episodes": 554,
1428
- "mean_reward": 162.4044385075569,
1429
- "mean_length": 7219.96,
1430
- "loss": 2.502009868621826,
1431
- "sps": 191.1365478881192
1432
- },
1433
- {
1434
- "update": 800,
1435
- "global_step": 3276800,
1436
- "num_episodes": 563,
1437
- "mean_reward": 162.7963340115547,
1438
- "mean_length": 7220.73,
1439
- "loss": 8.730064392089844,
1440
- "sps": 386.9629605603807
1441
- },
1442
- {
1443
- "update": 805,
1444
- "global_step": 3297280,
1445
- "num_episodes": 566,
1446
- "mean_reward": 165.31176669359206,
1447
- "mean_length": 7360.77,
1448
- "loss": 48.92124557495117,
1449
- "sps": 705.0215184629429
1450
- },
1451
- {
1452
- "update": 810,
1453
- "global_step": 3317760,
1454
- "num_episodes": 566,
1455
- "mean_reward": 165.31176669359206,
1456
- "mean_length": 7360.77,
1457
- "loss": 0.6651354432106018,
1458
- "sps": 801.9215448692757
1459
- },
1460
- {
1461
- "update": 815,
1462
- "global_step": 3338240,
1463
- "num_episodes": 569,
1464
- "mean_reward": 180.35619497060776,
1465
- "mean_length": 7453.65,
1466
- "loss": 0.9045634865760803,
1467
- "sps": 2078.869268499869
1468
- },
1469
- {
1470
- "update": 820,
1471
- "global_step": 3358720,
1472
- "num_episodes": 575,
1473
- "mean_reward": 193.25147196531296,
1474
- "mean_length": 7258.9,
1475
- "loss": 5.477941989898682,
1476
- "sps": 1253.1294143150922
1477
- },
1478
- {
1479
- "update": 825,
1480
- "global_step": 3379200,
1481
- "num_episodes": 581,
1482
- "mean_reward": 185.37936263799668,
1483
- "mean_length": 7158.59,
1484
- "loss": 0.19656488299369812,
1485
- "sps": 3241.297612628403
1486
- },
1487
- {
1488
- "update": 830,
1489
- "global_step": 3399680,
1490
- "num_episodes": 581,
1491
- "mean_reward": 185.37936263799668,
1492
- "mean_length": 7158.59,
1493
- "loss": 0.5765475034713745,
1494
- "sps": 2869.6879773523438
1495
- },
1496
- {
1497
- "update": 835,
1498
- "global_step": 3420160,
1499
- "num_episodes": 583,
1500
- "mean_reward": 185.1090102362633,
1501
- "mean_length": 7158.59,
1502
- "loss": 2.0618038177490234,
1503
- "sps": 415.0631773970818
1504
- },
1505
- {
1506
- "update": 840,
1507
- "global_step": 3440640,
1508
- "num_episodes": 588,
1509
- "mean_reward": 189.7356357884407,
1510
- "mean_length": 7352.61,
1511
- "loss": 9.016275405883789,
1512
- "sps": 227.3230128869974
1513
- },
1514
- {
1515
- "update": 845,
1516
- "global_step": 3461120,
1517
- "num_episodes": 591,
1518
- "mean_reward": 189.1485662293434,
1519
- "mean_length": 7394.25,
1520
- "loss": 22.739429473876953,
1521
- "sps": 314.2023925066658
1522
- },
1523
- {
1524
- "update": 850,
1525
- "global_step": 3481600,
1526
- "num_episodes": 592,
1527
- "mean_reward": 189.12477667808534,
1528
- "mean_length": 7443.48,
1529
- "loss": 1.6546752452850342,
1530
- "sps": 957.4586504289063
1531
- },
1532
- {
1533
- "update": 855,
1534
- "global_step": 3502080,
1535
- "num_episodes": 594,
1536
- "mean_reward": 187.17996717453002,
1537
- "mean_length": 7507.25,
1538
- "loss": 6.797643184661865,
1539
- "sps": 412.71199087606476
1540
- },
1541
- {
1542
- "update": 860,
1543
- "global_step": 3522560,
1544
- "num_episodes": 596,
1545
- "mean_reward": 193.70433765888214,
1546
- "mean_length": 7638.31,
1547
- "loss": 0.2814023196697235,
1548
- "sps": 809.1402642140959
1549
- },
1550
- {
1551
- "update": 865,
1552
- "global_step": 3543040,
1553
- "num_episodes": 600,
1554
- "mean_reward": 192.16364938259125,
1555
- "mean_length": 7538.65,
1556
- "loss": 4.5296950340271,
1557
- "sps": 934.0269243194301
1558
- },
1559
- {
1560
- "update": 870,
1561
- "global_step": 3563520,
1562
- "num_episodes": 602,
1563
- "mean_reward": 191.83141272068025,
1564
- "mean_length": 7569.62,
1565
- "loss": 0.12454970180988312,
1566
- "sps": 459.3460462463923
1567
- },
1568
- {
1569
- "update": 875,
1570
- "global_step": 3584000,
1571
- "num_episodes": 605,
1572
- "mean_reward": 185.7972752714157,
1573
- "mean_length": 7412.33,
1574
- "loss": 0.010119002312421799,
1575
- "sps": 2984.2673687681013
1576
- },
1577
- {
1578
- "update": 880,
1579
- "global_step": 3604480,
1580
- "num_episodes": 607,
1581
- "mean_reward": 191.80364267826081,
1582
- "mean_length": 7423.25,
1583
- "loss": 1.5759342908859253,
1584
- "sps": 2498.994384783772
1585
- },
1586
- {
1587
- "update": 885,
1588
- "global_step": 3624960,
1589
- "num_episodes": 611,
1590
- "mean_reward": 197.66494411945342,
1591
- "mean_length": 7523.71,
1592
- "loss": 0.07411646842956543,
1593
- "sps": 2398.8293936193445
1594
- },
1595
- {
1596
- "update": 890,
1597
- "global_step": 3645440,
1598
- "num_episodes": 612,
1599
- "mean_reward": 199.16475445270538,
1600
- "mean_length": 7523.71,
1601
- "loss": -0.09818007051944733,
1602
- "sps": 2494.5939188365055
1603
- },
1604
- {
1605
- "update": 895,
1606
- "global_step": 3665920,
1607
- "num_episodes": 616,
1608
- "mean_reward": 198.68793545246123,
1609
- "mean_length": 7520.61,
1610
- "loss": 0.07828275859355927,
1611
- "sps": 1930.4566445688752
1612
- },
1613
- {
1614
- "update": 900,
1615
- "global_step": 3686400,
1616
- "num_episodes": 618,
1617
- "mean_reward": 198.65358025074005,
1618
- "mean_length": 7422.9,
1619
- "loss": 0.6579947471618652,
1620
- "sps": 2636.224514742254
1621
- },
1622
- {
1623
- "update": 905,
1624
- "global_step": 3706880,
1625
- "num_episodes": 622,
1626
- "mean_reward": 200.68803305149078,
1627
- "mean_length": 7420.23,
1628
- "loss": 1.256749153137207,
1629
- "sps": 2869.3812284251953
1630
- },
1631
- {
1632
- "update": 910,
1633
- "global_step": 3727360,
1634
- "num_episodes": 624,
1635
- "mean_reward": 198.5566199350357,
1636
- "mean_length": 7420.23,
1637
- "loss": 1.2612106800079346,
1638
- "sps": 2818.4419350605035
1639
- },
1640
- {
1641
- "update": 915,
1642
- "global_step": 3747840,
1643
- "num_episodes": 626,
1644
- "mean_reward": 205.93744747638704,
1645
- "mean_length": 7420.23,
1646
- "loss": 0.7723605036735535,
1647
- "sps": 2519.097807723044
1648
- },
1649
- {
1650
- "update": 920,
1651
- "global_step": 3768320,
1652
- "num_episodes": 628,
1653
- "mean_reward": 201.4210061311722,
1654
- "mean_length": 7420.23,
1655
- "loss": 7.346652507781982,
1656
- "sps": 2242.0312258079334
1657
- },
1658
- {
1659
- "update": 925,
1660
- "global_step": 3788800,
1661
- "num_episodes": 630,
1662
- "mean_reward": 189.0587257051468,
1663
- "mean_length": 7420.23,
1664
- "loss": 5.038862705230713,
1665
- "sps": 383.270562834207
1666
- },
1667
- {
1668
- "update": 930,
1669
- "global_step": 3809280,
1670
- "num_episodes": 633,
1671
- "mean_reward": 189.46342292308807,
1672
- "mean_length": 7360.68,
1673
- "loss": 0.39050325751304626,
1674
- "sps": 1858.9704973292514
1675
- },
1676
- {
1677
- "update": 935,
1678
- "global_step": 3829760,
1679
- "num_episodes": 635,
1680
- "mean_reward": 198.2463042974472,
1681
- "mean_length": 7459.69,
1682
- "loss": 53.42470169067383,
1683
- "sps": 1069.4430639241739
1684
- },
1685
- {
1686
- "update": 940,
1687
- "global_step": 3850240,
1688
- "num_episodes": 636,
1689
- "mean_reward": 205.62649121761322,
1690
- "mean_length": 7459.69,
1691
- "loss": 1.097938895225525,
1692
- "sps": 1447.2739010301416
1693
- },
1694
- {
1695
- "update": 945,
1696
- "global_step": 3870720,
1697
- "num_episodes": 638,
1698
- "mean_reward": 220.27144483089447,
1699
- "mean_length": 7459.69,
1700
- "loss": 0.30668070912361145,
1701
- "sps": 2118.085155562087
1702
- },
1703
- {
1704
- "update": 950,
1705
- "global_step": 3891200,
1706
- "num_episodes": 641,
1707
- "mean_reward": 228.60497048854828,
1708
- "mean_length": 7558.87,
1709
- "loss": 1.160091519355774,
1710
- "sps": 1340.3946598207713
1711
- },
1712
- {
1713
- "update": 955,
1714
- "global_step": 3911680,
1715
- "num_episodes": 643,
1716
- "mean_reward": 229.18769864559172,
1717
- "mean_length": 7558.87,
1718
- "loss": 1.3600318431854248,
1719
- "sps": 2017.4738503217943
1720
- },
1721
- {
1722
- "update": 960,
1723
- "global_step": 3932160,
1724
- "num_episodes": 645,
1725
- "mean_reward": 226.2710527563095,
1726
- "mean_length": 7558.87,
1727
- "loss": 0.5724247097969055,
1728
- "sps": 2190.27561016469
1729
- },
1730
- {
1731
- "update": 965,
1732
- "global_step": 3952640,
1733
- "num_episodes": 647,
1734
- "mean_reward": 233.28511716365813,
1735
- "mean_length": 7558.87,
1736
- "loss": 0.6445204019546509,
1737
- "sps": 425.1404956260307
1738
- },
1739
- {
1740
- "update": 970,
1741
- "global_step": 3973120,
1742
- "num_episodes": 649,
1743
- "mean_reward": 234.76467885494233,
1744
- "mean_length": 7558.87,
1745
- "loss": 5.908900737762451,
1746
- "sps": 291.87626703381966
1747
- },
1748
- {
1749
- "update": 975,
1750
- "global_step": 3993600,
1751
- "num_episodes": 651,
1752
- "mean_reward": 236.37012548923494,
1753
- "mean_length": 7592.95,
1754
- "loss": 16.730175018310547,
1755
- "sps": 386.3945177442575
1756
- },
1757
- {
1758
- "update": 980,
1759
- "global_step": 4014080,
1760
- "num_episodes": 655,
1761
- "mean_reward": 240.30179923057557,
1762
- "mean_length": 7654.13,
1763
- "loss": 27.565311431884766,
1764
- "sps": 653.7194172279133
1765
- },
1766
- {
1767
- "update": 985,
1768
- "global_step": 4034560,
1769
- "num_episodes": 656,
1770
- "mean_reward": 241.5855724811554,
1771
- "mean_length": 7750.3,
1772
- "loss": 1.2501119375228882,
1773
- "sps": 1185.5585164533727
1774
- },
1775
- {
1776
- "update": 990,
1777
- "global_step": 4055040,
1778
- "num_episodes": 657,
1779
- "mean_reward": 242.3764306640625,
1780
- "mean_length": 7849.96,
1781
- "loss": 0.11368373781442642,
1782
- "sps": 953.2503801655828
1783
- },
1784
- {
1785
- "update": 995,
1786
- "global_step": 4075520,
1787
- "num_episodes": 660,
1788
- "mean_reward": 266.251427526474,
1789
- "mean_length": 8045.38,
1790
- "loss": 5.969295978546143,
1791
- "sps": 384.7242265220645
1792
- },
1793
- {
1794
- "update": 1000,
1795
- "global_step": 4096000,
1796
- "num_episodes": 664,
1797
- "mean_reward": 282.5860444736481,
1798
- "mean_length": 8176.85,
1799
- "loss": 27.033348083496094,
1800
- "sps": 376.322863993888
1801
- },
1802
- {
1803
- "update": 1005,
1804
- "global_step": 4116480,
1805
- "num_episodes": 665,
1806
- "mean_reward": 283.86758942604064,
1807
- "mean_length": 8232.26,
1808
- "loss": 7.4467058181762695,
1809
- "sps": 374.6383841753984
1810
- },
1811
- {
1812
- "update": 1010,
1813
- "global_step": 4136960,
1814
- "num_episodes": 666,
1815
- "mean_reward": 294.7833961582184,
1816
- "mean_length": 8232.26,
1817
- "loss": 17.35069465637207,
1818
- "sps": 308.584037781751
1819
- },
1820
- {
1821
- "update": 1015,
1822
- "global_step": 4157440,
1823
- "num_episodes": 669,
1824
- "mean_reward": 281.23537853717806,
1825
- "mean_length": 8289.35,
1826
- "loss": 1.9592938423156738,
1827
- "sps": 576.3621863402531
1828
- },
1829
- {
1830
- "update": 1020,
1831
- "global_step": 4177920,
1832
- "num_episodes": 675,
1833
- "mean_reward": 275.6631519651413,
1834
- "mean_length": 8186.57,
1835
- "loss": 23.52161407470703,
1836
- "sps": 661.3091435538709
1837
- },
1838
- {
1839
- "update": 1025,
1840
- "global_step": 4198400,
1841
- "num_episodes": 678,
1842
- "mean_reward": 279.6017733335495,
1843
- "mean_length": 8183.74,
1844
- "loss": 29.961881637573242,
1845
- "sps": 559.0450859302445
1846
- },
1847
- {
1848
- "update": 1030,
1849
- "global_step": 4218880,
1850
- "num_episodes": 680,
1851
- "mean_reward": 303.8355294418335,
1852
- "mean_length": 8282.54,
1853
- "loss": 0.7290852069854736,
1854
- "sps": 1641.6323017599234
1855
- },
1856
- {
1857
- "update": 1035,
1858
- "global_step": 4239360,
1859
- "num_episodes": 682,
1860
- "mean_reward": 309.25065141677857,
1861
- "mean_length": 8380.25,
1862
- "loss": 7.585202693939209,
1863
- "sps": 290.23415915491415
1864
- },
1865
- {
1866
- "update": 1040,
1867
- "global_step": 4259840,
1868
- "num_episodes": 685,
1869
- "mean_reward": 307.12660905838015,
1870
- "mean_length": 8380.25,
1871
- "loss": 6.446464538574219,
1872
- "sps": 383.71739315963373
1873
- },
1874
- {
1875
- "update": 1045,
1876
- "global_step": 4280320,
1877
- "num_episodes": 687,
1878
- "mean_reward": 303.14754570007324,
1879
- "mean_length": 8380.25,
1880
- "loss": 9.89684772491455,
1881
- "sps": 694.9770930488776
1882
- },
1883
- {
1884
- "update": 1050,
1885
- "global_step": 4300800,
1886
- "num_episodes": 688,
1887
- "mean_reward": 303.04791251659395,
1888
- "mean_length": 8462.37,
1889
- "loss": 1.8083292245864868,
1890
- "sps": 1040.0758682399094
1891
- },
1892
- {
1893
- "update": 1055,
1894
- "global_step": 4321280,
1895
- "num_episodes": 690,
1896
- "mean_reward": 303.53817603588107,
1897
- "mean_length": 8516.23,
1898
- "loss": 1.01134192943573,
1899
- "sps": 3137.679652522162
1900
- },
1901
- {
1902
- "update": 1060,
1903
- "global_step": 4341760,
1904
- "num_episodes": 693,
1905
- "mean_reward": 309.7838798046112,
1906
- "mean_length": 8562.16,
1907
- "loss": 14.91142749786377,
1908
- "sps": 214.3493374321697
1909
- },
1910
- {
1911
- "update": 1065,
1912
- "global_step": 4362240,
1913
- "num_episodes": 695,
1914
- "mean_reward": 305.9636838245392,
1915
- "mean_length": 8596.65,
1916
- "loss": 25.306856155395508,
1917
- "sps": 520.1481805059935
1918
- },
1919
- {
1920
- "update": 1070,
1921
- "global_step": 4382720,
1922
- "num_episodes": 698,
1923
- "mean_reward": 303.673222618103,
1924
- "mean_length": 8440.95,
1925
- "loss": 1.5045527219772339,
1926
- "sps": 1217.974691455878
1927
- },
1928
- {
1929
- "update": 1075,
1930
- "global_step": 4403200,
1931
- "num_episodes": 701,
1932
- "mean_reward": 304.443781414032,
1933
- "mean_length": 8557.61,
1934
- "loss": 3.09621524810791,
1935
- "sps": 3334.805184943337
1936
- },
1937
- {
1938
- "update": 1080,
1939
- "global_step": 4423680,
1940
- "num_episodes": 702,
1941
- "mean_reward": 303.1939596557617,
1942
- "mean_length": 8557.61,
1943
- "loss": 0.5530338883399963,
1944
- "sps": 3357.0436968093577
1945
- },
1946
- {
1947
- "update": 1085,
1948
- "global_step": 4444160,
1949
- "num_episodes": 706,
1950
- "mean_reward": 297.1481484127045,
1951
- "mean_length": 8521.02,
1952
- "loss": 7.937706470489502,
1953
- "sps": 1838.4046275928872
1954
- },
1955
- {
1956
- "update": 1090,
1957
- "global_step": 4464640,
1958
- "num_episodes": 709,
1959
- "mean_reward": 294.23652092933656,
1960
- "mean_length": 8421.99,
1961
- "loss": 1.2086868286132812,
1962
- "sps": 2518.910916673558
1963
- },
1964
- {
1965
- "update": 1095,
1966
- "global_step": 4485120,
1967
- "num_episodes": 713,
1968
- "mean_reward": 291.4626713657379,
1969
- "mean_length": 8418.82,
1970
- "loss": 0.07519354671239853,
1971
- "sps": 2391.4890715912597
1972
- },
1973
- {
1974
- "update": 1100,
1975
- "global_step": 4505600,
1976
- "num_episodes": 714,
1977
- "mean_reward": 295.5940579319,
1978
- "mean_length": 8418.82,
1979
- "loss": -0.04633036255836487,
1980
- "sps": 2309.370211128856
1981
- },
1982
- {
1983
- "update": 1105,
1984
- "global_step": 4526080,
1985
- "num_episodes": 717,
1986
- "mean_reward": 308.55377667427064,
1987
- "mean_length": 8615.8,
1988
- "loss": 6.805793762207031,
1989
- "sps": 1932.5391546325443
1990
- },
1991
- {
1992
- "update": 1110,
1993
- "global_step": 4546560,
1994
- "num_episodes": 718,
1995
- "mean_reward": 308.8043742084503,
1996
- "mean_length": 8615.8,
1997
- "loss": 1.5425922870635986,
1998
- "sps": 3048.7004653828785
1999
- },
2000
- {
2001
- "update": 1115,
2002
- "global_step": 4567040,
2003
- "num_episodes": 723,
2004
- "mean_reward": 303.7773459148407,
2005
- "mean_length": 8522.71,
2006
- "loss": 0.27959662675857544,
2007
- "sps": 2995.9554614172757
2008
- },
2009
- {
2010
- "update": 1120,
2011
- "global_step": 4587520,
2012
- "num_episodes": 724,
2013
- "mean_reward": 303.2645093822479,
2014
- "mean_length": 8522.71,
2015
- "loss": -0.0757850706577301,
2016
- "sps": 3148.672000633405
2017
- },
2018
- {
2019
- "update": 1125,
2020
- "global_step": 4608000,
2021
- "num_episodes": 728,
2022
- "mean_reward": 295.8857748699188,
2023
- "mean_length": 8522.71,
2024
- "loss": 0.8416875004768372,
2025
- "sps": 1310.0236701271228
2026
- },
2027
- {
2028
- "update": 1130,
2029
- "global_step": 4628480,
2030
- "num_episodes": 728,
2031
- "mean_reward": 295.8857748699188,
2032
- "mean_length": 8522.71,
2033
- "loss": 3.5039658546447754,
2034
- "sps": 2038.3808422065272
2035
- },
2036
- {
2037
- "update": 1135,
2038
- "global_step": 4648960,
2039
- "num_episodes": 733,
2040
- "mean_reward": 288.808697681427,
2041
- "mean_length": 8383.94,
2042
- "loss": 0.9907295107841492,
2043
- "sps": 432.1509684368163
2044
- },
2045
- {
2046
- "update": 1140,
2047
- "global_step": 4669440,
2048
- "num_episodes": 734,
2049
- "mean_reward": 288.7881833076477,
2050
- "mean_length": 8383.94,
2051
- "loss": 5.849100112915039,
2052
- "sps": 743.4081065939373
2053
- },
2054
- {
2055
- "update": 1145,
2056
- "global_step": 4689920,
2057
- "num_episodes": 742,
2058
- "mean_reward": 248.96954289913177,
2059
- "mean_length": 8035.29,
2060
- "loss": 5.8911590576171875,
2061
- "sps": 439.3842370241788
2062
- },
2063
- {
2064
- "update": 1150,
2065
- "global_step": 4710400,
2066
- "num_episodes": 743,
2067
- "mean_reward": 247.8361682987213,
2068
- "mean_length": 7942.3,
2069
- "loss": 2.5104424953460693,
2070
- "sps": 2091.825912976944
2071
- },
2072
- {
2073
- "update": 1155,
2074
- "global_step": 4730880,
2075
- "num_episodes": 746,
2076
- "mean_reward": 247.5757204723358,
2077
- "mean_length": 7843.08,
2078
- "loss": 0.02520643174648285,
2079
- "sps": 2215.1389764020146
2080
- },
2081
- {
2082
- "update": 1160,
2083
- "global_step": 4751360,
2084
- "num_episodes": 748,
2085
- "mean_reward": 237.4100481700897,
2086
- "mean_length": 7843.08,
2087
- "loss": 3.045536756515503,
2088
- "sps": 1354.2367583329549
2089
- },
2090
- {
2091
- "update": 1165,
2092
- "global_step": 4771840,
2093
- "num_episodes": 753,
2094
- "mean_reward": 254.61865887641906,
2095
- "mean_length": 7743.82,
2096
- "loss": 18.703535079956055,
2097
- "sps": 1982.4466952604278
2098
- },
2099
- {
2100
- "update": 1170,
2101
- "global_step": 4792320,
2102
- "num_episodes": 753,
2103
- "mean_reward": 254.61865887641906,
2104
- "mean_length": 7743.82,
2105
- "loss": 0.33046531677246094,
2106
- "sps": 2095.2503659093586
2107
- },
2108
- {
2109
- "update": 1175,
2110
- "global_step": 4812800,
2111
- "num_episodes": 755,
2112
- "mean_reward": 253.8280854511261,
2113
- "mean_length": 7787.79,
2114
- "loss": 3.0896127223968506,
2115
- "sps": 422.3812735467675
2116
- },
2117
- {
2118
- "update": 1180,
2119
- "global_step": 4833280,
2120
- "num_episodes": 758,
2121
- "mean_reward": 264.60756063461304,
2122
- "mean_length": 7709.59,
2123
- "loss": -0.09283498674631119,
2124
- "sps": 2676.2813899197204
2125
- },
2126
- {
2127
- "update": 1185,
2128
- "global_step": 4853760,
2129
- "num_episodes": 763,
2130
- "mean_reward": 239.50925126552582,
2131
- "mean_length": 7671.32,
2132
- "loss": 0.09459003806114197,
2133
- "sps": 2538.8112544970772
2134
- },
2135
- {
2136
- "update": 1190,
2137
- "global_step": 4874240,
2138
- "num_episodes": 763,
2139
- "mean_reward": 239.50925126552582,
2140
- "mean_length": 7671.32,
2141
- "loss": 0.17775702476501465,
2142
- "sps": 2975.422989387635
2143
- },
2144
- {
2145
- "update": 1195,
2146
- "global_step": 4894720,
2147
- "num_episodes": 767,
2148
- "mean_reward": 218.03613798141478,
2149
- "mean_length": 7614.22,
2150
- "loss": 3.9349420070648193,
2151
- "sps": 417.34421892596595
2152
- },
2153
- {
2154
- "update": 1200,
2155
- "global_step": 4915200,
2156
- "num_episodes": 768,
2157
- "mean_reward": 218.03613859176636,
2158
- "mean_length": 7614.22,
2159
- "loss": 4.028732776641846,
2160
- "sps": 784.4513250410687
2161
- },
2162
- {
2163
- "update": 1205,
2164
- "global_step": 4935680,
2165
- "num_episodes": 773,
2166
- "mean_reward": 208.71825884342195,
2167
- "mean_length": 7713.33,
2168
- "loss": 10.78580093383789,
2169
- "sps": 734.3705869944031
2170
- },
2171
- {
2172
- "update": 1210,
2173
- "global_step": 4956160,
2174
- "num_episodes": 774,
2175
- "mean_reward": 208.632105717659,
2176
- "mean_length": 7808.29,
2177
- "loss": 3.5958919525146484,
2178
- "sps": 1305.6310352836174
2179
- },
2180
- {
2181
- "update": 1215,
2182
- "global_step": 4976640,
2183
- "num_episodes": 777,
2184
- "mean_reward": 204.77496297359465,
2185
- "mean_length": 7805.2,
2186
- "loss": 3.0023584365844727,
2187
- "sps": 2482.0292324013326
2188
- },
2189
- {
2190
- "update": 1220,
2191
- "global_step": 4997120,
2192
- "num_episodes": 779,
2193
- "mean_reward": 196.00456192016603,
2194
- "mean_length": 7808.44,
2195
- "loss": 11.420112609863281,
2196
- "sps": 2348.8046891139184
2197
- },
2198
- {
2199
- "update": 1225,
2200
- "global_step": 5017600,
2201
- "num_episodes": 786,
2202
- "mean_reward": 184.82681788921357,
2203
- "mean_length": 7520.75,
2204
- "loss": 9.389796257019043,
2205
- "sps": 403.47715369548195
2206
- },
2207
- {
2208
- "update": 1230,
2209
- "global_step": 5038080,
2210
- "num_episodes": 789,
2211
- "mean_reward": 185.08837336063385,
2212
- "mean_length": 7350.11,
2213
- "loss": 1.2659046649932861,
2214
- "sps": 740.0947252642347
2215
- },
2216
- {
2217
- "update": 1235,
2218
- "global_step": 5058560,
2219
- "num_episodes": 793,
2220
- "mean_reward": 179.18952730178833,
2221
- "mean_length": 7160.46,
2222
- "loss": 0.042632922530174255,
2223
- "sps": 2798.3588240027393
2224
- },
2225
- {
2226
- "update": 1240,
2227
- "global_step": 5079040,
2228
- "num_episodes": 794,
2229
- "mean_reward": 184.8410611152649,
2230
- "mean_length": 7160.46,
2231
- "loss": 1.8178130388259888,
2232
- "sps": 2741.015198709958
2233
- },
2234
- {
2235
- "update": 1245,
2236
- "global_step": 5099520,
2237
- "num_episodes": 799,
2238
- "mean_reward": 189.7154534816742,
2239
- "mean_length": 7117.64,
2240
- "loss": 2.4596240520477295,
2241
- "sps": 2848.528616655762
2242
- },
2243
- {
2244
- "update": 1250,
2245
- "global_step": 5120000,
2246
- "num_episodes": 802,
2247
- "mean_reward": 191.06395349502563,
2248
- "mean_length": 7035.96,
2249
- "loss": 3.05761981010437,
2250
- "sps": 2957.866130229398
2251
- },
2252
- {
2253
- "update": 1255,
2254
- "global_step": 5140480,
2255
- "num_episodes": 807,
2256
- "mean_reward": 184.37625044822693,
2257
- "mean_length": 6937.2,
2258
- "loss": 8.337796211242676,
2259
- "sps": 797.4964725831112
2260
- },
2261
- {
2262
- "update": 1260,
2263
- "global_step": 5160960,
2264
- "num_episodes": 811,
2265
- "mean_reward": 183.53424713134766,
2266
- "mean_length": 6839.0,
2267
- "loss": 4.347324371337891,
2268
- "sps": 2885.534959076487
2269
- },
2270
- {
2271
- "update": 1265,
2272
- "global_step": 5181440,
2273
- "num_episodes": 814,
2274
- "mean_reward": 179.14370263576507,
2275
- "mean_length": 6841.38,
2276
- "loss": 0.9887397289276123,
2277
- "sps": 2726.0079616540743
2278
- },
2279
- {
2280
- "update": 1270,
2281
- "global_step": 5201920,
2282
- "num_episodes": 816,
2283
- "mean_reward": 182.02527619838713,
2284
- "mean_length": 6841.38,
2285
- "loss": 3.567499876022339,
2286
- "sps": 2548.7726014650657
2287
- },
2288
- {
2289
- "update": 1275,
2290
- "global_step": 5222400,
2291
- "num_episodes": 818,
2292
- "mean_reward": 167.24890702724457,
2293
- "mean_length": 6841.38,
2294
- "loss": 1.4451591968536377,
2295
- "sps": 312.6484055556043
2296
- },
2297
- {
2298
- "update": 1280,
2299
- "global_step": 5242880,
2300
- "num_episodes": 821,
2301
- "mean_reward": 165.29594656467438,
2302
- "mean_length": 6837.89,
2303
- "loss": 1.7843670845031738,
2304
- "sps": 570.2931373528285
2305
- },
2306
- {
2307
- "update": 1285,
2308
- "global_step": 5263360,
2309
- "num_episodes": 828,
2310
- "mean_reward": 152.66287196159362,
2311
- "mean_length": 6498.54,
2312
- "loss": 5.005251407623291,
2313
- "sps": 272.3373610296092
2314
- },
2315
- {
2316
- "update": 1290,
2317
- "global_step": 5283840,
2318
- "num_episodes": 830,
2319
- "mean_reward": 153.68320658683777,
2320
- "mean_length": 6498.54,
2321
- "loss": 8.443866729736328,
2322
- "sps": 292.14330020270444
2323
- },
2324
- {
2325
- "update": 1295,
2326
- "global_step": 5304320,
2327
- "num_episodes": 831,
2328
- "mean_reward": 157.06462443351745,
2329
- "mean_length": 6498.54,
2330
- "loss": 2.2017781734466553,
2331
- "sps": 265.6513386532661
2332
- },
2333
- {
2334
- "update": 1300,
2335
- "global_step": 5324800,
2336
- "num_episodes": 834,
2337
- "mean_reward": 158.8159614944458,
2338
- "mean_length": 6680.51,
2339
- "loss": 6.665990352630615,
2340
- "sps": 563.3260777169
2341
- },
2342
- {
2343
- "update": 1305,
2344
- "global_step": 5345280,
2345
- "num_episodes": 840,
2346
- "mean_reward": 145.94160705566406,
2347
- "mean_length": 6594.4,
2348
- "loss": 10.99843978881836,
2349
- "sps": 1955.0578156421066
2350
- },
2351
- {
2352
- "update": 1310,
2353
- "global_step": 5365760,
2354
- "num_episodes": 843,
2355
- "mean_reward": 145.8064595746994,
2356
- "mean_length": 6590.29,
2357
- "loss": 0.6882038116455078,
2358
- "sps": 382.4471874145512
2359
- },
2360
- {
2361
- "update": 1315,
2362
- "global_step": 5386240,
2363
- "num_episodes": 844,
2364
- "mean_reward": 146.0564516401291,
2365
- "mean_length": 6590.29,
2366
- "loss": 0.8968511819839478,
2367
- "sps": 688.8494388200007
2368
- },
2369
- {
2370
- "update": 1320,
2371
- "global_step": 5406720,
2372
- "num_episodes": 848,
2373
- "mean_reward": 150.32687950611114,
2374
- "mean_length": 6662.46,
2375
- "loss": 25.992216110229492,
2376
- "sps": 658.8033472373343
2377
- },
2378
- {
2379
- "update": 1325,
2380
- "global_step": 5427200,
2381
- "num_episodes": 851,
2382
- "mean_reward": 146.97661718845367,
2383
- "mean_length": 6761.72,
2384
- "loss": 3.56770658493042,
2385
- "sps": 985.132708019695
2386
- },
2387
- {
2388
- "update": 1330,
2389
- "global_step": 5447680,
2390
- "num_episodes": 852,
2391
- "mean_reward": 134.18026546001434,
2392
- "mean_length": 6664.57,
2393
- "loss": 15.875215530395508,
2394
- "sps": 1092.9486199541875
2395
- },
2396
- {
2397
- "update": 1335,
2398
- "global_step": 5468160,
2399
- "num_episodes": 853,
2400
- "mean_reward": 130.54885493755341,
2401
- "mean_length": 6664.57,
2402
- "loss": 6.959790229797363,
2403
- "sps": 289.8487086871761
2404
- },
2405
- {
2406
- "update": 1340,
2407
- "global_step": 5488640,
2408
- "num_episodes": 860,
2409
- "mean_reward": 130.95773406505586,
2410
- "mean_length": 6485.88,
2411
- "loss": 20.052452087402344,
2412
- "sps": 486.39910125898484
2413
- },
2414
- {
2415
- "update": 1345,
2416
- "global_step": 5509120,
2417
- "num_episodes": 863,
2418
- "mean_reward": 142.73314903259276,
2419
- "mean_length": 6488.99,
2420
- "loss": 10.171817779541016,
2421
- "sps": 201.9510308985079
2422
- },
2423
- {
2424
- "update": 1350,
2425
- "global_step": 5529600,
2426
- "num_episodes": 865,
2427
- "mean_reward": 140.62244102954864,
2428
- "mean_length": 6427.8,
2429
- "loss": 1.1496386528015137,
2430
- "sps": 635.0260794777045
2431
- },
2432
- {
2433
- "update": 1355,
2434
- "global_step": 5550080,
2435
- "num_episodes": 865,
2436
- "mean_reward": 140.62244102954864,
2437
- "mean_length": 6427.8,
2438
- "loss": 4.384088039398193,
2439
- "sps": 882.9404372993679
2440
- },
2441
- {
2442
- "update": 1360,
2443
- "global_step": 5570560,
2444
- "num_episodes": 872,
2445
- "mean_reward": 147.4697220182419,
2446
- "mean_length": 6527.99,
2447
- "loss": 10.650382041931152,
2448
- "sps": 280.68259822115476
2449
- },
2450
- {
2451
- "update": 1365,
2452
- "global_step": 5591040,
2453
- "num_episodes": 875,
2454
- "mean_reward": 145.2843037557602,
2455
- "mean_length": 6358.27,
2456
- "loss": 38.430118560791016,
2457
- "sps": 324.7223637427747
2458
- },
2459
- {
2460
- "update": 1370,
2461
- "global_step": 5611520,
2462
- "num_episodes": 876,
2463
- "mean_reward": 145.53429505825042,
2464
- "mean_length": 6358.27,
2465
- "loss": 38.706214904785156,
2466
- "sps": 538.1413797129117
2467
- },
2468
- {
2469
- "update": 1375,
2470
- "global_step": 5632000,
2471
- "num_episodes": 877,
2472
- "mean_reward": 145.43353649616242,
2473
- "mean_length": 6431.14,
2474
- "loss": 16.998456954956055,
2475
- "sps": 824.6403047601932
2476
- },
2477
- {
2478
- "update": 1380,
2479
- "global_step": 5652480,
2480
- "num_episodes": 881,
2481
- "mean_reward": 154.26934564590454,
2482
- "mean_length": 6526.9,
2483
- "loss": 3.5649774074554443,
2484
- "sps": 844.5595148121038
2485
- },
2486
- {
2487
- "update": 1385,
2488
- "global_step": 5672960,
2489
- "num_episodes": 884,
2490
- "mean_reward": 161.71352871894837,
2491
- "mean_length": 6550.38,
2492
- "loss": 3.710602045059204,
2493
- "sps": 908.754059815621
2494
- },
2495
- {
2496
- "update": 1390,
2497
- "global_step": 5693440,
2498
- "num_episodes": 885,
2499
- "mean_reward": 163.49456966400146,
2500
- "mean_length": 6649.33,
2501
- "loss": 0.4263116717338562,
2502
- "sps": 2189.736809475642
2503
- },
2504
- {
2505
- "update": 1395,
2506
- "global_step": 5713920,
2507
- "num_episodes": 886,
2508
- "mean_reward": 176.25792336940765,
2509
- "mean_length": 6738.86,
2510
- "loss": 0.27514657378196716,
2511
- "sps": 2997.673240659507
2512
- },
2513
- {
2514
- "update": 1400,
2515
- "global_step": 5734400,
2516
- "num_episodes": 889,
2517
- "mean_reward": 178.37687824249267,
2518
- "mean_length": 6926.31,
2519
- "loss": 0.08463311195373535,
2520
- "sps": 3081.918148797146
2521
- },
2522
- {
2523
- "update": 1405,
2524
- "global_step": 5754880,
2525
- "num_episodes": 893,
2526
- "mean_reward": 179.84025070667266,
2527
- "mean_length": 7044.6,
2528
- "loss": 11.629227638244629,
2529
- "sps": 191.9713547057929
2530
- },
2531
- {
2532
- "update": 1410,
2533
- "global_step": 5775360,
2534
- "num_episodes": 894,
2535
- "mean_reward": 174.45908066272736,
2536
- "mean_length": 7044.6,
2537
- "loss": 0.7166698575019836,
2538
- "sps": 487.6342667466037
2539
- },
2540
- {
2541
- "update": 1415,
2542
- "global_step": 5795840,
2543
- "num_episodes": 895,
2544
- "mean_reward": 173.22874859333038,
2545
- "mean_length": 7044.6,
2546
- "loss": 2.819256067276001,
2547
- "sps": 804.3018578156708
2548
- },
2549
- {
2550
- "update": 1420,
2551
- "global_step": 5816320,
2552
- "num_episodes": 899,
2553
- "mean_reward": 176.70803850889206,
2554
- "mean_length": 7143.29,
2555
- "loss": 8.249241828918457,
2556
- "sps": 154.735822589327
2557
- },
2558
- {
2559
- "update": 1425,
2560
- "global_step": 5836800,
2561
- "num_episodes": 906,
2562
- "mean_reward": 178.0406158566475,
2563
- "mean_length": 7045.96,
2564
- "loss": 23.5118408203125,
2565
- "sps": 174.4884682201926
2566
- },
2567
- {
2568
- "update": 1430,
2569
- "global_step": 5857280,
2570
- "num_episodes": 907,
2571
- "mean_reward": 178.821298725605,
2572
- "mean_length": 7144.72,
2573
- "loss": 30.70025634765625,
2574
- "sps": 216.094202192432
2575
- },
2576
- {
2577
- "update": 1435,
2578
- "global_step": 5877760,
2579
- "num_episodes": 913,
2580
- "mean_reward": 176.4720972943306,
2581
- "mean_length": 7145.4,
2582
- "loss": 7.781428813934326,
2583
- "sps": 1208.7161327543458
2584
- },
2585
- {
2586
- "update": 1440,
2587
- "global_step": 5898240,
2588
- "num_episodes": 914,
2589
- "mean_reward": 175.2331176495552,
2590
- "mean_length": 7145.4,
2591
- "loss": 0.31977832317352295,
2592
- "sps": 1229.1269815173093
2593
- },
2594
- {
2595
- "update": 1445,
2596
- "global_step": 5918720,
2597
- "num_episodes": 917,
2598
- "mean_reward": 194.64220715761184,
2599
- "mean_length": 7145.4,
2600
- "loss": 4.373215198516846,
2601
- "sps": 492.457155299295
2602
- },
2603
- {
2604
- "update": 1450,
2605
- "global_step": 5939200,
2606
- "num_episodes": 917,
2607
- "mean_reward": 194.64220715761184,
2608
- "mean_length": 7145.4,
2609
- "loss": 1.9693742990493774,
2610
- "sps": 298.24318986957036
2611
- },
2612
- {
2613
- "update": 1455,
2614
- "global_step": 5959680,
2615
- "num_episodes": 924,
2616
- "mean_reward": 207.0149205994606,
2617
- "mean_length": 7167.29,
2618
- "loss": 10.92724323272705,
2619
- "sps": 244.35456207965237
2620
- },
2621
- {
2622
- "update": 1460,
2623
- "global_step": 5980160,
2624
- "num_episodes": 925,
2625
- "mean_reward": 205.1948454117775,
2626
- "mean_length": 7167.29,
2627
- "loss": 1.1308797597885132,
2628
- "sps": 229.29791971281753
2629
- },
2630
- {
2631
- "update": 1465,
2632
- "global_step": 6000640,
2633
- "num_episodes": 929,
2634
- "mean_reward": 210.25953838586807,
2635
- "mean_length": 7296.31,
2636
- "loss": 3.534721612930298,
2637
- "sps": 592.749111404352
2638
- },
2639
- {
2640
- "update": 1470,
2641
- "global_step": 6021120,
2642
- "num_episodes": 930,
2643
- "mean_reward": 208.1203717112541,
2644
- "mean_length": 7266.63,
2645
- "loss": 1.5503363609313965,
2646
- "sps": 2447.564196906472
2647
- },
2648
- {
2649
- "update": 1475,
2650
- "global_step": 6041600,
2651
- "num_episodes": 933,
2652
- "mean_reward": 201.2189755320549,
2653
- "mean_length": 7266.63,
2654
- "loss": 0.6554332375526428,
2655
- "sps": 295.24147179578677
2656
- },
2657
- {
2658
- "update": 1480,
2659
- "global_step": 6062080,
2660
- "num_episodes": 934,
2661
- "mean_reward": 202.5496774840355,
2662
- "mean_length": 7282.98,
2663
- "loss": 4.126849174499512,
2664
- "sps": 646.598178325608
2665
- },
2666
- {
2667
- "update": 1485,
2668
- "global_step": 6082560,
2669
- "num_episodes": 940,
2670
- "mean_reward": 205.9382851600647,
2671
- "mean_length": 7375.73,
2672
- "loss": 0.5179982781410217,
2673
- "sps": 2704.744530210794
2674
- },
2675
- {
2676
- "update": 1490,
2677
- "global_step": 6103040,
2678
- "num_episodes": 941,
2679
- "mean_reward": 206.08987416267394,
2680
- "mean_length": 7473.15,
2681
- "loss": 3.0351297855377197,
2682
- "sps": 304.0684542240306
2683
- },
2684
- {
2685
- "update": 1495,
2686
- "global_step": 6123520,
2687
- "num_episodes": 943,
2688
- "mean_reward": 213.63924886703492,
2689
- "mean_length": 7572.02,
2690
- "loss": 2.000380754470825,
2691
- "sps": 221.62826006300585
2692
- },
2693
- {
2694
- "update": 1500,
2695
- "global_step": 6144000,
2696
- "num_episodes": 944,
2697
- "mean_reward": 212.38942768096925,
2698
- "mean_length": 7572.02,
2699
- "loss": 5.654113292694092,
2700
- "sps": 427.42645749613314
2701
- },
2702
- {
2703
- "update": 1505,
2704
- "global_step": 6164480,
2705
- "num_episodes": 951,
2706
- "mean_reward": 202.7977014017105,
2707
- "mean_length": 7366.39,
2708
- "loss": 6.195871829986572,
2709
- "sps": 214.22522946745585
2710
- },
2711
- {
2712
- "update": 1510,
2713
- "global_step": 6184960,
2714
- "num_episodes": 954,
2715
- "mean_reward": 201.14605865955352,
2716
- "mean_length": 7236.97,
2717
- "loss": 5.950435638427734,
2718
- "sps": 217.84006537381956
2719
- },
2720
- {
2721
- "update": 1515,
2722
- "global_step": 6205440,
2723
- "num_episodes": 960,
2724
- "mean_reward": 185.95387471675872,
2725
- "mean_length": 7023.6,
2726
- "loss": 7.220864772796631,
2727
- "sps": 516.9270272810767
2728
- },
2729
- {
2730
- "update": 1520,
2731
- "global_step": 6225920,
2732
- "num_episodes": 964,
2733
- "mean_reward": 171.7426621770859,
2734
- "mean_length": 6906.26,
2735
- "loss": 1.038556694984436,
2736
- "sps": 1541.5344112491593
2737
- },
2738
- {
2739
- "update": 1525,
2740
- "global_step": 6246400,
2741
- "num_episodes": 969,
2742
- "mean_reward": 184.1564519548416,
2743
- "mean_length": 6966.53,
2744
- "loss": 0.6786921620368958,
2745
- "sps": 2829.1006312828786
2746
- },
2747
- {
2748
- "update": 1530,
2749
- "global_step": 6266880,
2750
- "num_episodes": 969,
2751
- "mean_reward": 184.1564519548416,
2752
- "mean_length": 6966.53,
2753
- "loss": 1.1222167015075684,
2754
- "sps": 2615.546877868864
2755
- },
2756
- {
2757
- "update": 1535,
2758
- "global_step": 6287360,
2759
- "num_episodes": 973,
2760
- "mean_reward": 181.29534606933595,
2761
- "mean_length": 6864.66,
2762
- "loss": 0.8305673599243164,
2763
- "sps": 165.56638087166775
2764
- },
2765
- {
2766
- "update": 1540,
2767
- "global_step": 6307840,
2768
- "num_episodes": 975,
2769
- "mean_reward": 182.1956338787079,
2770
- "mean_length": 6941.24,
2771
- "loss": 7.593855857849121,
2772
- "sps": 345.2319256598677
2773
- },
2774
- {
2775
- "update": 1545,
2776
- "global_step": 6328320,
2777
- "num_episodes": 985,
2778
- "mean_reward": 163.70111170768737,
2779
- "mean_length": 6506.28,
2780
- "loss": 3.5074303150177,
2781
- "sps": 607.6658652702555
2782
- },
2783
- {
2784
- "update": 1550,
2785
- "global_step": 6348800,
2786
- "num_episodes": 986,
2787
- "mean_reward": 150.90680050373078,
2788
- "mean_length": 6485.25,
2789
- "loss": -0.0576794408261776,
2790
- "sps": 2294.650789556882
2791
- },
2792
- {
2793
- "update": 1555,
2794
- "global_step": 6369280,
2795
- "num_episodes": 986,
2796
- "mean_reward": 150.90680050373078,
2797
- "mean_length": 6485.25,
2798
- "loss": -0.06482464075088501,
2799
- "sps": 2276.202621733714
2800
- },
2801
- {
2802
- "update": 1560,
2803
- "global_step": 6389760,
2804
- "num_episodes": 990,
2805
- "mean_reward": 148.66352381229402,
2806
- "mean_length": 6385.85,
2807
- "loss": 1.5638670921325684,
2808
- "sps": 1452.822050860829
2809
- },
2810
- {
2811
- "update": 1565,
2812
- "global_step": 6410240,
2813
- "num_episodes": 997,
2814
- "mean_reward": 142.64638622045516,
2815
- "mean_length": 6262.48,
2816
- "loss": 10.640795707702637,
2817
- "sps": 831.3271413345293
2818
- },
2819
- {
2820
- "update": 1570,
2821
- "global_step": 6430720,
2822
- "num_episodes": 998,
2823
- "mean_reward": 147.54794536352156,
2824
- "mean_length": 6262.48,
2825
- "loss": 0.4938640296459198,
2826
- "sps": 1275.2283874235693
2827
- },
2828
- {
2829
- "update": 1575,
2830
- "global_step": 6451200,
2831
- "num_episodes": 999,
2832
- "mean_reward": 147.54794582128525,
2833
- "mean_length": 6262.48,
2834
- "loss": 0.6068828105926514,
2835
- "sps": 1238.905594097976
2836
- },
2837
- {
2838
- "update": 1580,
2839
- "global_step": 6471680,
2840
- "num_episodes": 1001,
2841
- "mean_reward": 157.1391297507286,
2842
- "mean_length": 6262.48,
2843
- "loss": 2.0471107959747314,
2844
- "sps": 791.2682917819899
2845
- },
2846
- {
2847
- "update": 1585,
2848
- "global_step": 6492160,
2849
- "num_episodes": 1007,
2850
- "mean_reward": 181.8120165514946,
2851
- "mean_length": 6453.69,
2852
- "loss": 2.0775344371795654,
2853
- "sps": 158.97643330398787
2854
- },
2855
- {
2856
- "update": 1590,
2857
- "global_step": 6512640,
2858
- "num_episodes": 1009,
2859
- "mean_reward": 182.60089690208434,
2860
- "mean_length": 6440.96,
2861
- "loss": 0.2661677896976471,
2862
- "sps": 516.263511797793
2863
- },
2864
- {
2865
- "update": 1595,
2866
- "global_step": 6533120,
2867
- "num_episodes": 1010,
2868
- "mean_reward": 184.2022120523453,
2869
- "mean_length": 6462.06,
2870
- "loss": 8.214560508728027,
2871
- "sps": 505.3776855932051
2872
- },
2873
- {
2874
- "update": 1600,
2875
- "global_step": 6553600,
2876
- "num_episodes": 1014,
2877
- "mean_reward": 198.42190223693848,
2878
- "mean_length": 6548.4,
2879
- "loss": 7.701492786407471,
2880
- "sps": 276.9719165588502
2881
- },
2882
- {
2883
- "update": 1605,
2884
- "global_step": 6574080,
2885
- "num_episodes": 1016,
2886
- "mean_reward": 196.53872619628908,
2887
- "mean_length": 6548.4,
2888
- "loss": 8.10093879699707,
2889
- "sps": 183.92737027550902
2890
- },
2891
- {
2892
- "update": 1610,
2893
- "global_step": 6594560,
2894
- "num_episodes": 1021,
2895
- "mean_reward": 172.8975705099106,
2896
- "mean_length": 6434.35,
2897
- "loss": 7.104397296905518,
2898
- "sps": 1251.426107351179
2899
- },
2900
- {
2901
- "update": 1615,
2902
- "global_step": 6615040,
2903
- "num_episodes": 1022,
2904
- "mean_reward": 178.2296389913559,
2905
- "mean_length": 6533.6,
2906
- "loss": 9.21767520904541,
2907
- "sps": 348.4657145461528
2908
- },
2909
- {
2910
- "update": 1620,
2911
- "global_step": 6635520,
2912
- "num_episodes": 1024,
2913
- "mean_reward": 190.50105198383332,
2914
- "mean_length": 6609.82,
2915
- "loss": 18.506481170654297,
2916
- "sps": 335.4327754993037
2917
- },
2918
- {
2919
- "update": 1625,
2920
- "global_step": 6656000,
2921
- "num_episodes": 1025,
2922
- "mean_reward": 203.21644562244416,
2923
- "mean_length": 6609.82,
2924
- "loss": 2.3920085430145264,
2925
- "sps": 595.2335834592267
2926
- },
2927
- {
2928
- "update": 1630,
2929
- "global_step": 6676480,
2930
- "num_episodes": 1029,
2931
- "mean_reward": 207.58161754131316,
2932
- "mean_length": 6719.36,
2933
- "loss": 31.76652717590332,
2934
- "sps": 203.75921758331614
2935
- },
2936
- {
2937
- "update": 1635,
2938
- "global_step": 6696960,
2939
- "num_episodes": 1032,
2940
- "mean_reward": 226.69792892456056,
2941
- "mean_length": 6678.85,
2942
- "loss": 19.729021072387695,
2943
- "sps": 726.913341411244
2944
- },
2945
- {
2946
- "update": 1640,
2947
- "global_step": 6717440,
2948
- "num_episodes": 1034,
2949
- "mean_reward": 227.13838208675384,
2950
- "mean_length": 6579.05,
2951
- "loss": 0.6724852323532104,
2952
- "sps": 1094.927298568171
2953
- },
2954
- {
2955
- "update": 1645,
2956
- "global_step": 6737920,
2957
- "num_episodes": 1036,
2958
- "mean_reward": 224.31010818958282,
2959
- "mean_length": 6678.82,
2960
- "loss": 11.505419731140137,
2961
- "sps": 1080.2913604455412
2962
- },
2963
- {
2964
- "update": 1650,
2965
- "global_step": 6758400,
2966
- "num_episodes": 1038,
2967
- "mean_reward": 231.36178754091262,
2968
- "mean_length": 6722.13,
2969
- "loss": 1.217943549156189,
2970
- "sps": 1558.2708562037928
2971
  }
2972
  ]
 
3
  "update": 5,
4
  "global_step": 20480,
5
  "num_episodes": 12,
6
+ "mean_reward": 5.371756076812744,
7
+ "mean_length": 259.75,
8
+ "loss": -0.07715612649917603,
9
+ "sps": 3197.8944336854393
10
  },
11
  {
12
  "update": 10,
13
  "global_step": 40960,
14
  "num_episodes": 12,
15
+ "mean_reward": 5.371756076812744,
16
+ "mean_length": 259.75,
17
+ "loss": -0.05466647446155548,
18
+ "sps": 3473.7621158486245
19
  },
20
  {
21
  "update": 15,
22
  "global_step": 61440,
23
  "num_episodes": 12,
24
+ "mean_reward": 5.371756076812744,
25
+ "mean_length": 259.75,
26
+ "loss": -0.12925735116004944,
27
+ "sps": 3498.48333768441
28
  },
29
  {
30
  "update": 20,
31
  "global_step": 81920,
32
+ "num_episodes": 17,
33
+ "mean_reward": 23.03933811187744,
34
+ "mean_length": 2539.294117647059,
35
+ "loss": 0.018057599663734436,
36
+ "sps": 1650.518983995032
37
  },
38
  {
39
  "update": 25,
40
  "global_step": 102400,
41
+ "num_episodes": 36,
42
+ "mean_reward": 22.05249885718028,
43
+ "mean_length": 2419.527777777778,
44
+ "loss": 1.0680813789367676,
45
+ "sps": 2943.2734013432255
46
  },
47
  {
48
  "update": 30,
49
  "global_step": 122880,
50
+ "num_episodes": 36,
51
+ "mean_reward": 22.05249885718028,
52
+ "mean_length": 2419.527777777778,
53
+ "loss": -0.11745010316371918,
54
+ "sps": 3506.3540771064972
55
  },
56
  {
57
  "update": 35,
58
  "global_step": 143360,
59
+ "num_episodes": 36,
60
+ "mean_reward": 22.05249885718028,
61
+ "mean_length": 2419.527777777778,
62
+ "loss": 0.011805668473243713,
63
+ "sps": 3463.1128670942708
64
  },
65
  {
66
  "update": 40,
67
  "global_step": 163840,
68
+ "num_episodes": 39,
69
+ "mean_reward": 28.794600401169216,
70
+ "mean_length": 3002.641025641026,
71
+ "loss": 0.10533764958381653,
72
+ "sps": 1986.6795686952885
73
  },
74
  {
75
  "update": 45,
76
  "global_step": 184320,
77
+ "num_episodes": 50,
78
+ "mean_reward": 34.32792649269104,
79
+ "mean_length": 3376.94,
80
+ "loss": 1.039185881614685,
81
+ "sps": 3203.1897121265147
82
  },
83
  {
84
  "update": 50,
85
  "global_step": 204800,
86
+ "num_episodes": 50,
87
+ "mean_reward": 34.32792649269104,
88
+ "mean_length": 3376.94,
89
+ "loss": 0.3282562494277954,
90
+ "sps": 3259.1573788510946
91
  },
92
  {
93
  "update": 55,
94
  "global_step": 225280,
95
+ "num_episodes": 50,
96
+ "mean_reward": 34.32792649269104,
97
+ "mean_length": 3376.94,
98
+ "loss": 0.04704876244068146,
99
+ "sps": 3216.591971265392
100
  },
101
  {
102
  "update": 60,
103
  "global_step": 245760,
104
+ "num_episodes": 54,
105
+ "mean_reward": 37.31432532381128,
106
+ "mean_length": 3500.074074074074,
107
+ "loss": 1.8284252882003784,
108
+ "sps": 2234.903286880901
109
  },
110
  {
111
  "update": 65,
112
  "global_step": 266240,
113
+ "num_episodes": 65,
114
+ "mean_reward": 49.254399849818306,
115
+ "mean_length": 3849.723076923077,
116
+ "loss": 0.3701796531677246,
117
+ "sps": 2919.281950407853
118
  },
119
  {
120
  "update": 70,
121
  "global_step": 286720,
122
+ "num_episodes": 65,
123
+ "mean_reward": 49.254399849818306,
124
+ "mean_length": 3849.723076923077,
125
+ "loss": 0.19717253744602203,
126
+ "sps": 2983.3501026207314
127
  },
128
  {
129
  "update": 75,
130
  "global_step": 307200,
131
+ "num_episodes": 65,
132
+ "mean_reward": 49.254399849818306,
133
+ "mean_length": 3849.723076923077,
134
+ "loss": 1.6765296459197998,
135
+ "sps": 2929.146040695321
136
  },
137
  {
138
  "update": 80,
139
  "global_step": 327680,
140
+ "num_episodes": 68,
141
+ "mean_reward": 53.68283286515404,
142
+ "mean_length": 4121.058823529412,
143
+ "loss": 0.2395431399345398,
144
+ "sps": 2769.2257028011463
145
  },
146
  {
147
  "update": 85,
148
  "global_step": 348160,
149
+ "num_episodes": 80,
150
+ "mean_reward": 54.429239320755,
151
+ "mean_length": 4161.5125,
152
+ "loss": 21.42174530029297,
153
+ "sps": 2001.0349067939262
154
  },
155
  {
156
  "update": 90,
157
  "global_step": 368640,
158
+ "num_episodes": 80,
159
+ "mean_reward": 54.429239320755,
160
+ "mean_length": 4161.5125,
161
+ "loss": 1.0561028718948364,
162
+ "sps": 2419.50496157211
163
  },
164
  {
165
  "update": 95,
166
  "global_step": 389120,
167
+ "num_episodes": 80,
168
+ "mean_reward": 54.429239320755,
169
+ "mean_length": 4161.5125,
170
+ "loss": 1.28178870677948,
171
+ "sps": 2387.100829239012
172
  },
173
  {
174
  "update": 100,
175
  "global_step": 409600,
176
+ "num_episodes": 83,
177
+ "mean_reward": 69.2969753311341,
178
+ "mean_length": 4253.614457831325,
179
+ "loss": 1.1579307317733765,
180
+ "sps": 1711.4232678830151
181
  },
182
  {
183
  "update": 105,
184
  "global_step": 430080,
185
+ "num_episodes": 98,
186
+ "mean_reward": 67.86165303600077,
187
+ "mean_length": 4242.530612244898,
188
+ "loss": 2.247659683227539,
189
+ "sps": 3247.5957464867383
190
  },
191
  {
192
  "update": 110,
193
  "global_step": 450560,
194
+ "num_episodes": 98,
195
+ "mean_reward": 67.86165303600077,
196
+ "mean_length": 4242.530612244898,
197
+ "loss": 0.582718014717102,
198
+ "sps": 3180.2988030100887
199
  },
200
  {
201
  "update": 115,
202
  "global_step": 471040,
203
+ "num_episodes": 98,
204
+ "mean_reward": 67.86165303600077,
205
+ "mean_length": 4242.530612244898,
206
+ "loss": -0.026346325874328613,
207
+ "sps": 2964.689585145653
208
  },
209
  {
210
  "update": 120,
211
  "global_step": 491520,
212
+ "num_episodes": 99,
213
+ "mean_reward": 68.72626714995413,
214
+ "mean_length": 4300.686868686868,
215
+ "loss": -0.047779276967048645,
216
+ "sps": 3043.7824660495194
217
  },
218
  {
219
  "update": 125,
220
  "global_step": 512000,
221
+ "num_episodes": 112,
222
+ "mean_reward": 79.7761773943901,
223
+ "mean_length": 4945.35,
224
+ "loss": 5.148533821105957,
225
+ "sps": 1330.97401420016
226
  },
227
  {
228
  "update": 130,
229
  "global_step": 532480,
230
+ "num_episodes": 112,
231
+ "mean_reward": 79.7761773943901,
232
+ "mean_length": 4945.35,
233
+ "loss": -0.18798421323299408,
234
+ "sps": 3502.5924445773726
235
  },
236
  {
237
  "update": 135,
238
  "global_step": 552960,
239
+ "num_episodes": 112,
240
+ "mean_reward": 79.7761773943901,
241
+ "mean_length": 4945.35,
242
+ "loss": -0.08565455675125122,
243
+ "sps": 3461.274367919746
244
  },
245
  {
246
  "update": 140,
247
  "global_step": 573440,
248
+ "num_episodes": 116,
249
+ "mean_reward": 84.52449138879776,
250
+ "mean_length": 4847.52,
251
+ "loss": 2.0761420726776123,
252
+ "sps": 2729.742934114658
253
  },
254
  {
255
  "update": 145,
256
  "global_step": 593920,
257
  "num_episodes": 124,
258
+ "mean_reward": 93.81721450567245,
259
+ "mean_length": 5141.93,
260
+ "loss": 2.071385622024536,
261
+ "sps": 3031.550454700739
262
  },
263
  {
264
  "update": 150,
265
  "global_step": 614400,
266
+ "num_episodes": 124,
267
+ "mean_reward": 93.81721450567245,
268
+ "mean_length": 5141.93,
269
+ "loss": 3.72536039352417,
270
+ "sps": 3220.2233787373293
271
  },
272
  {
273
  "update": 155,
274
  "global_step": 634880,
275
+ "num_episodes": 124,
276
+ "mean_reward": 93.81721450567245,
277
+ "mean_length": 5141.93,
278
+ "loss": 0.9640051126480103,
279
+ "sps": 3180.6185154137133
280
  },
281
  {
282
  "update": 160,
283
  "global_step": 655360,
284
+ "num_episodes": 128,
285
+ "mean_reward": 96.66174763917923,
286
+ "mean_length": 5340.2,
287
+ "loss": 0.27916210889816284,
288
+ "sps": 3097.2108275565165
289
  },
290
  {
291
  "update": 165,
292
  "global_step": 675840,
293
+ "num_episodes": 145,
294
+ "mean_reward": 96.82534897089005,
295
+ "mean_length": 5248.29,
296
+ "loss": 7.688024520874023,
297
+ "sps": 965.9878695065277
298
  },
299
  {
300
  "update": 170,
301
  "global_step": 696320,
302
+ "num_episodes": 147,
303
+ "mean_reward": 96.301647002697,
304
+ "mean_length": 5159.77,
305
+ "loss": 0.46070918440818787,
306
+ "sps": 2784.851134794954
307
  },
308
  {
309
  "update": 175,
310
  "global_step": 716800,
311
+ "num_episodes": 147,
312
+ "mean_reward": 96.301647002697,
313
+ "mean_length": 5159.77,
314
+ "loss": -0.03485479950904846,
315
+ "sps": 2660.7421365023765
316
  },
317
  {
318
  "update": 180,
319
  "global_step": 737280,
320
+ "num_episodes": 152,
321
+ "mean_reward": 100.57385118722915,
322
+ "mean_length": 5155.4,
323
+ "loss": 1.1904330253601074,
324
+ "sps": 2392.635470993461
325
  },
326
  {
327
  "update": 185,
328
  "global_step": 757760,
329
+ "num_episodes": 159,
330
+ "mean_reward": 95.51411318063737,
331
+ "mean_length": 5157.77,
332
+ "loss": 0.7197635173797607,
333
+ "sps": 2797.608756484379
334
  },
335
  {
336
  "update": 190,
337
  "global_step": 778240,
338
+ "num_episodes": 160,
339
+ "mean_reward": 94.83179949045181,
340
+ "mean_length": 5157.77,
341
+ "loss": 0.5011193156242371,
342
+ "sps": 2885.175874304794
343
  },
344
  {
345
  "update": 195,
346
  "global_step": 798720,
347
+ "num_episodes": 160,
348
+ "mean_reward": 94.83179949045181,
349
+ "mean_length": 5157.77,
350
+ "loss": 0.7710214853286743,
351
+ "sps": 2916.656200352378
352
  },
353
  {
354
  "update": 200,
355
  "global_step": 819200,
356
+ "num_episodes": 164,
357
+ "mean_reward": 95.26549025774003,
358
+ "mean_length": 5161.02,
359
+ "loss": 3.085784435272217,
360
+ "sps": 2609.085698942629
361
  },
362
  {
363
  "update": 205,
364
  "global_step": 839680,
365
+ "num_episodes": 180,
366
+ "mean_reward": 90.60398989439011,
367
+ "mean_length": 4882.37,
368
+ "loss": 25.020034790039062,
369
+ "sps": 278.88355834494627
370
  },
371
  {
372
  "update": 210,
373
  "global_step": 860160,
374
+ "num_episodes": 188,
375
+ "mean_reward": 79.05637022733688,
376
+ "mean_length": 4494.02,
377
+ "loss": 15.180160522460938,
378
+ "sps": 944.5016492069974
379
  },
380
  {
381
  "update": 215,
382
  "global_step": 880640,
383
+ "num_episodes": 190,
384
+ "mean_reward": 79.10023557424546,
385
+ "mean_length": 4536.35,
386
+ "loss": 9.965888977050781,
387
+ "sps": 364.6418338592982
388
  },
389
  {
390
  "update": 220,
391
  "global_step": 901120,
392
+ "num_episodes": 197,
393
+ "mean_reward": 77.15829209089279,
394
+ "mean_length": 4451.16,
395
+ "loss": 5.519225597381592,
396
+ "sps": 452.03522870499233
397
  },
398
  {
399
  "update": 225,
400
  "global_step": 921600,
401
+ "num_episodes": 202,
402
+ "mean_reward": 75.52227295637131,
403
+ "mean_length": 4354.33,
404
+ "loss": 64.15535736083984,
405
+ "sps": 367.65986127646954
406
  },
407
  {
408
  "update": 230,
409
  "global_step": 942080,
410
+ "num_episodes": 208,
411
+ "mean_reward": 77.0743759560585,
412
+ "mean_length": 4288.63,
413
+ "loss": 136.16909790039062,
414
+ "sps": 352.9326136603177
415
  },
416
  {
417
  "update": 235,
418
  "global_step": 962560,
419
+ "num_episodes": 214,
420
+ "mean_reward": 74.43292628288269,
421
+ "mean_length": 4152.63,
422
+ "loss": 21.60038185119629,
423
+ "sps": 347.70005658972826
424
  },
425
  {
426
  "update": 240,
427
  "global_step": 983040,
428
+ "num_episodes": 227,
429
+ "mean_reward": 56.747190070152286,
430
+ "mean_length": 3476.26,
431
+ "loss": 5247.01953125,
432
+ "sps": 338.3544128752909
433
  },
434
  {
435
  "update": 245,
436
  "global_step": 1003520,
437
+ "num_episodes": 246,
438
+ "mean_reward": 83.43761008739472,
439
+ "mean_length": 3093.44,
440
+ "loss": 24.65706443786621,
441
+ "sps": 502.2840263546167
442
  },
443
  {
444
  "update": 250,
445
  "global_step": 1024000,
446
+ "num_episodes": 258,
447
+ "mean_reward": 78.04051938056946,
448
+ "mean_length": 2602.94,
449
+ "loss": 243.8249053955078,
450
+ "sps": 330.56640317617087
451
  },
452
  {
453
  "update": 255,
454
  "global_step": 1044480,
455
+ "num_episodes": 264,
456
+ "mean_reward": 81.05206553459168,
457
+ "mean_length": 2411.21,
458
+ "loss": 39.96983337402344,
459
+ "sps": 549.1892263469624
460
  },
461
  {
462
  "update": 260,
463
  "global_step": 1064960,
464
+ "num_episodes": 268,
465
+ "mean_reward": 219.54257692813874,
466
+ "mean_length": 2413.93,
467
+ "loss": 1803.801025390625,
468
+ "sps": 598.9593989246749
469
  },
470
  {
471
  "update": 265,
472
  "global_step": 1085440,
473
+ "num_episodes": 270,
474
+ "mean_reward": 218.29985638141633,
475
+ "mean_length": 2316.4,
476
+ "loss": 542.6195678710938,
477
+ "sps": 576.2196944536587
478
  },
479
  {
480
  "update": 270,
481
  "global_step": 1105920,
482
+ "num_episodes": 274,
483
+ "mean_reward": 310.3814339208603,
484
+ "mean_length": 2500.61,
485
+ "loss": 36.787967681884766,
486
+ "sps": 476.229446655146
487
  },
488
  {
489
  "update": 275,
490
  "global_step": 1126400,
491
+ "num_episodes": 289,
492
+ "mean_reward": 305.01237434387207,
493
+ "mean_length": 2535.94,
494
+ "loss": 19.507030487060547,
495
+ "sps": 400.9073826886594
496
  },
497
  {
498
  "update": 280,
499
  "global_step": 1146880,
500
+ "num_episodes": 298,
501
+ "mean_reward": 401.39536855220797,
502
+ "mean_length": 2565.42,
503
+ "loss": 52.059120178222656,
504
+ "sps": 702.3737668350727
505
  },
506
  {
507
  "update": 285,
508
  "global_step": 1167360,
509
+ "num_episodes": 309,
510
+ "mean_reward": 391.02530930519106,
511
+ "mean_length": 2143.77,
512
+ "loss": 22.919591903686523,
513
+ "sps": 275.1379100679109
514
  },
515
  {
516
  "update": 290,
517
  "global_step": 1187840,
518
+ "num_episodes": 316,
519
+ "mean_reward": 489.53185575008393,
520
+ "mean_length": 2277.6,
521
+ "loss": 31.41555404663086,
522
+ "sps": 350.6416338473395
523
  },
524
  {
525
  "update": 295,
526
  "global_step": 1208320,
527
+ "num_episodes": 320,
528
+ "mean_reward": 489.0599168300629,
529
+ "mean_length": 2303.45,
530
+ "loss": 52.70932388305664,
531
+ "sps": 859.4427758832318
532
  },
533
  {
534
  "update": 300,
535
  "global_step": 1228800,
536
+ "num_episodes": 326,
537
+ "mean_reward": 506.6310522270203,
538
+ "mean_length": 2500.66,
539
+ "loss": 36.654197692871094,
540
+ "sps": 378.8344670632786
541
  },
542
  {
543
  "update": 305,
544
  "global_step": 1249280,
545
+ "num_episodes": 331,
546
+ "mean_reward": 490.07581648349765,
547
+ "mean_length": 2551.54,
548
+ "loss": 13.381026268005371,
549
+ "sps": 324.31118759980126
550
  },
551
  {
552
  "update": 310,
553
  "global_step": 1269760,
554
+ "num_episodes": 335,
555
+ "mean_reward": 490.83249175071717,
556
+ "mean_length": 2754.61,
557
+ "loss": 13.67751693725586,
558
+ "sps": 601.9785828545383
559
  },
560
  {
561
  "update": 315,
562
  "global_step": 1290240,
563
+ "num_episodes": 342,
564
+ "mean_reward": 558.9792760944366,
565
+ "mean_length": 2851.49,
566
+ "loss": 32.91801834106445,
567
+ "sps": 321.4874507503393
568
  },
569
  {
570
  "update": 320,
571
  "global_step": 1310720,
572
+ "num_episodes": 352,
573
+ "mean_reward": 553.7738126516342,
574
+ "mean_length": 2853.21,
575
+ "loss": 18.369260787963867,
576
+ "sps": 621.5240850375873
577
  },
578
  {
579
  "update": 325,
580
  "global_step": 1331200,
581
+ "num_episodes": 358,
582
+ "mean_reward": 590.7320046901702,
583
+ "mean_length": 2950.34,
584
+ "loss": 5.919332504272461,
585
+ "sps": 437.5267343221937
586
  },
587
  {
588
  "update": 330,
589
  "global_step": 1351680,
590
+ "num_episodes": 364,
591
+ "mean_reward": 591.3189961528778,
592
+ "mean_length": 3142.3,
593
+ "loss": 1779.034423828125,
594
+ "sps": 167.8109154720703
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
595
  }
596
  ]