[ { "update": 5, "global_step": 20480, "num_episodes": 5, "mean_reward": -0.4786701202392578, "mean_length": 232.8, "survival_pct": 0.023280000000000002, "max_steps": 10000, "loss": 0.12873922288417816, "sps": 2902.7821104130076 }, { "update": 10, "global_step": 40960, "num_episodes": 5, "mean_reward": -0.4786701202392578, "mean_length": 232.8, "survival_pct": 0.023280000000000002, "max_steps": 10000, "loss": 1.048768401145935, "sps": 2989.934249487722 }, { "update": 15, "global_step": 61440, "num_episodes": 5, "mean_reward": -0.4786701202392578, "mean_length": 232.8, "survival_pct": 0.023280000000000002, "max_steps": 10000, "loss": 0.4728538990020752, "sps": 2892.3409931007545 }, { "update": 20, "global_step": 81920, "num_episodes": 13, "mean_reward": 70.43313554617075, "mean_length": 4721.461538461538, "survival_pct": 0.4721461538461538, "max_steps": 10000, "loss": 0.8401235342025757, "sps": 1363.4104809447254 }, { "update": 25, "global_step": 102400, "num_episodes": 25, "mean_reward": 49.477407665252684, "mean_length": 3493.36, "survival_pct": 0.34933600000000004, "max_steps": 10000, "loss": 2.6180195808410645, "sps": 1433.9218398141863 }, { "update": 30, "global_step": 122880, "num_episodes": 25, "mean_reward": 49.477407665252684, "mean_length": 3493.36, "survival_pct": 0.34933600000000004, "max_steps": 10000, "loss": -0.17261816561222076, "sps": 2402.5672121197713 }, { "update": 35, "global_step": 143360, "num_episodes": 25, "mean_reward": 49.477407665252684, "mean_length": 3493.36, "survival_pct": 0.34933600000000004, "max_steps": 10000, "loss": -0.09139963984489441, "sps": 2342.5616398383595 }, { "update": 40, "global_step": 163840, "num_episodes": 29, "mean_reward": 63.332810089505955, "mean_length": 4053.3793103448274, "survival_pct": 0.40533793103448273, "max_steps": 10000, "loss": 2.368999719619751, "sps": 803.2009899550957 }, { "update": 45, "global_step": 184320, "num_episodes": 48, "mean_reward": 61.90170055627823, "mean_length": 3634.7916666666665, "survival_pct": 0.36347916666666663, "max_steps": 10000, "loss": 25.510446548461914, "sps": 876.6005063299407 }, { "update": 50, "global_step": 204800, "num_episodes": 51, "mean_reward": 58.35679834964228, "mean_length": 3464.156862745098, "survival_pct": 0.3464156862745098, "max_steps": 10000, "loss": 1.516045331954956, "sps": 2241.345890706289 }, { "update": 55, "global_step": 225280, "num_episodes": 51, "mean_reward": 58.35679834964228, "mean_length": 3464.156862745098, "survival_pct": 0.3464156862745098, "max_steps": 10000, "loss": -0.1645357310771942, "sps": 2287.8772324784622 }, { "update": 60, "global_step": 245760, "num_episodes": 55, "mean_reward": 61.13233257640492, "mean_length": 3583.0363636363636, "survival_pct": 0.3583036363636364, "max_steps": 10000, "loss": 1.9156525135040283, "sps": 1777.0489161780952 }, { "update": 65, "global_step": 266240, "num_episodes": 65, "mean_reward": 70.67225723266601, "mean_length": 3672.6153846153848, "survival_pct": 0.3672615384615385, "max_steps": 10000, "loss": 0.939544141292572, "sps": 2089.239566218028 }, { "update": 70, "global_step": 286720, "num_episodes": 71, "mean_reward": 69.33942423404103, "mean_length": 3655.2535211267605, "survival_pct": 0.36552535211267606, "max_steps": 10000, "loss": 1.1759222745895386, "sps": 2632.098762980766 }, { "update": 75, "global_step": 307200, "num_episodes": 71, "mean_reward": 69.33942423404103, "mean_length": 3655.2535211267605, "survival_pct": 0.36552535211267606, "max_steps": 10000, "loss": 1.520628571510315, "sps": 2482.8506672459066 }, { "update": 80, "global_step": 327680, "num_episodes": 73, "mean_reward": 73.5122941879377, "mean_length": 3829.082191780822, "survival_pct": 0.3829082191780822, "max_steps": 10000, "loss": 0.8090716004371643, "sps": 1082.2416143589735 }, { "update": 85, "global_step": 348160, "num_episodes": 83, "mean_reward": 73.65270042993936, "mean_length": 3811.9518072289156, "survival_pct": 0.38119518072289155, "max_steps": 10000, "loss": 6.820394515991211, "sps": 823.4620989576031 }, { "update": 90, "global_step": 368640, "num_episodes": 91, "mean_reward": 75.30516777981768, "mean_length": 3846.164835164835, "survival_pct": 0.38461648351648353, "max_steps": 10000, "loss": 2.671729803085327, "sps": 1723.468236780256 }, { "update": 95, "global_step": 389120, "num_episodes": 91, "mean_reward": 75.30516777981768, "mean_length": 3846.164835164835, "survival_pct": 0.38461648351648353, "max_steps": 10000, "loss": 15.690156936645508, "sps": 1463.8510964098139 }, { "update": 100, "global_step": 409600, "num_episodes": 91, "mean_reward": 75.30516777981768, "mean_length": 3846.164835164835, "survival_pct": 0.38461648351648353, "max_steps": 10000, "loss": 0.16147078573703766, "sps": 1544.6687513559787 }, { "update": 105, "global_step": 430080, "num_episodes": 95, "mean_reward": 91.60696769011648, "mean_length": 4105.273684210526, "survival_pct": 0.4105273684210526, "max_steps": 10000, "loss": 0.28715068101882935, "sps": 1491.869871137239 }, { "update": 110, "global_step": 450560, "num_episodes": 103, "mean_reward": 112.06914116382599, "mean_length": 4315.97, "survival_pct": 0.431597, "max_steps": 10000, "loss": 0.6831140518188477, "sps": 1664.9507448966885 }, { "update": 115, "global_step": 471040, "num_episodes": 103, "mean_reward": 112.06914116382599, "mean_length": 4315.97, "survival_pct": 0.431597, "max_steps": 10000, "loss": -0.07581882178783417, "sps": 2436.273752467313 }, { "update": 120, "global_step": 491520, "num_episodes": 103, "mean_reward": 112.06914116382599, "mean_length": 4315.97, "survival_pct": 0.431597, "max_steps": 10000, "loss": -0.12825502455234528, "sps": 2290.0122957801336 }, { "update": 125, "global_step": 512000, "num_episodes": 109, "mean_reward": 120.03564118385314, "mean_length": 4411.64, "survival_pct": 0.44116400000000006, "max_steps": 10000, "loss": 0.5430532097816467, "sps": 2629.093479829129 }, { "update": 130, "global_step": 532480, "num_episodes": 115, "mean_reward": 123.0210286808014, "mean_length": 4319.07, "survival_pct": 0.431907, "max_steps": 10000, "loss": 6.376620769500732, "sps": 960.5544143123027 }, { "update": 135, "global_step": 552960, "num_episodes": 116, "mean_reward": 123.100150847435, "mean_length": 4326.72, "survival_pct": 0.432672, "max_steps": 10000, "loss": 0.21949227154254913, "sps": 2445.2169291143578 }, { "update": 140, "global_step": 573440, "num_episodes": 116, "mean_reward": 123.100150847435, "mean_length": 4326.72, "survival_pct": 0.432672, "max_steps": 10000, "loss": -0.2501995265483856, "sps": 2494.697519833577 }, { "update": 145, "global_step": 593920, "num_episodes": 123, "mean_reward": 134.99672790527345, "mean_length": 4791.57, "survival_pct": 0.47915699999999994, "max_steps": 10000, "loss": 31.24036407470703, "sps": 657.6329559456061 }, { "update": 150, "global_step": 614400, "num_episodes": 129, "mean_reward": 136.9602340936661, "mean_length": 4811.37, "survival_pct": 0.481137, "max_steps": 10000, "loss": 0.5206527709960938, "sps": 2480.224703717002 }, { "update": 155, "global_step": 634880, "num_episodes": 132, "mean_reward": 133.16078406333924, "mean_length": 4618.29, "survival_pct": 0.461829, "max_steps": 10000, "loss": 0.5726419687271118, "sps": 2234.639911831208 }, { "update": 160, "global_step": 655360, "num_episodes": 132, "mean_reward": 133.16078406333924, "mean_length": 4618.29, "survival_pct": 0.461829, "max_steps": 10000, "loss": -0.06972374022006989, "sps": 2060.961397107338 }, { "update": 165, "global_step": 675840, "num_episodes": 139, "mean_reward": 134.1688402891159, "mean_length": 4694.54, "survival_pct": 0.469454, "max_steps": 10000, "loss": 1.1694589853286743, "sps": 2018.626635358524 }, { "update": 170, "global_step": 696320, "num_episodes": 144, "mean_reward": 142.00250946283342, "mean_length": 4984.25, "survival_pct": 0.498425, "max_steps": 10000, "loss": 2.366715669631958, "sps": 1592.928048802198 }, { "update": 175, "global_step": 716800, "num_episodes": 146, "mean_reward": 144.95669583559035, "mean_length": 5095.59, "survival_pct": 0.509559, "max_steps": 10000, "loss": 21.034530639648438, "sps": 1872.9182768017681 }, { "update": 180, "global_step": 737280, "num_episodes": 146, "mean_reward": 144.95669583559035, "mean_length": 5095.59, "survival_pct": 0.509559, "max_steps": 10000, "loss": 0.10471776127815247, "sps": 2076.429778308613 }, { "update": 185, "global_step": 757760, "num_episodes": 152, "mean_reward": 152.9122651386261, "mean_length": 5266.79, "survival_pct": 0.526679, "max_steps": 10000, "loss": 28.784088134765625, "sps": 1428.3776386526256 }, { "update": 190, "global_step": 778240, "num_episodes": 155, "mean_reward": 159.41734125614167, "mean_length": 5462.84, "survival_pct": 0.546284, "max_steps": 10000, "loss": 0.5585659742355347, "sps": 1503.145884656211 }, { "update": 195, "global_step": 798720, "num_episodes": 158, "mean_reward": 176.945558218956, "mean_length": 5559.17, "survival_pct": 0.555917, "max_steps": 10000, "loss": 48.71400451660156, "sps": 522.6239066604847 }, { "update": 200, "global_step": 819200, "num_episodes": 159, "mean_reward": 170.58766025066376, "mean_length": 5475.19, "survival_pct": 0.547519, "max_steps": 10000, "loss": 15.522979736328125, "sps": 671.7228145587918 }, { "update": 205, "global_step": 839680, "num_episodes": 159, "mean_reward": 170.58766025066376, "mean_length": 5475.19, "survival_pct": 0.36501266666666665, "max_steps": 15000, "loss": 4.25346040725708, "sps": 708.7725246983115 }, { "update": 210, "global_step": 860160, "num_episodes": 159, "mean_reward": 170.58766025066376, "mean_length": 5475.19, "survival_pct": 0.36501266666666665, "max_steps": 15000, "loss": 0.7638179659843445, "sps": 658.5550006478268 }, { "update": 215, "global_step": 880640, "num_episodes": 163, "mean_reward": 192.89881912708282, "mean_length": 5817.41, "survival_pct": 0.3878273333333333, "max_steps": 15000, "loss": 12.150495529174805, "sps": 353.88007707970496 }, { "update": 220, "global_step": 901120, "num_episodes": 166, "mean_reward": 207.2202451276779, "mean_length": 6161.78, "survival_pct": 0.41078533333333334, "max_steps": 15000, "loss": 39.93988037109375, "sps": 320.11899245227085 }, { "update": 225, "global_step": 921600, "num_episodes": 170, "mean_reward": 259.2956739234924, "mean_length": 6429.44, "survival_pct": 0.4286293333333333, "max_steps": 15000, "loss": 86.16336822509766, "sps": 269.09665591603243 }, { "update": 230, "global_step": 942080, "num_episodes": 174, "mean_reward": 253.5440968155861, "mean_length": 6199.25, "survival_pct": 0.41328333333333334, "max_steps": 15000, "loss": 95.76709747314453, "sps": 432.8286117914245 }, { "update": 235, "global_step": 962560, "num_episodes": 175, "mean_reward": 251.54071150064468, "mean_length": 6138.37, "survival_pct": 0.4092246666666667, "max_steps": 15000, "loss": 0.6741273999214172, "sps": 2603.814903422436 }, { "update": 240, "global_step": 983040, "num_episodes": 176, "mean_reward": 253.30119943857193, "mean_length": 6284.35, "survival_pct": 0.4189566666666667, "max_steps": 15000, "loss": 0.1569591909646988, "sps": 2278.9720754815094 }, { "update": 245, "global_step": 1003520, "num_episodes": 177, "mean_reward": 256.35702211141586, "mean_length": 6425.14, "survival_pct": 0.4283426666666667, "max_steps": 15000, "loss": -0.17721155285835266, "sps": 2303.773051684429 }, { "update": 250, "global_step": 1024000, "num_episodes": 179, "mean_reward": 255.5962089705467, "mean_length": 6525.14, "survival_pct": 0.43500933333333336, "max_steps": 15000, "loss": 0.23589976131916046, "sps": 2206.427532109994 }, { "update": 255, "global_step": 1044480, "num_episodes": 180, "mean_reward": 257.4188562893867, "mean_length": 6668.15, "survival_pct": 0.4445433333333333, "max_steps": 15000, "loss": 0.549183189868927, "sps": 2211.682715091644 }, { "update": 260, "global_step": 1064960, "num_episodes": 183, "mean_reward": 269.09694628953935, "mean_length": 7078.44, "survival_pct": 0.471896, "max_steps": 15000, "loss": 1.171219825744629, "sps": 2017.4015932661957 }, { "update": 265, "global_step": 1085440, "num_episodes": 183, "mean_reward": 269.09694628953935, "mean_length": 7078.44, "survival_pct": 0.471896, "max_steps": 15000, "loss": 0.1275859922170639, "sps": 1835.885855759612 }, { "update": 270, "global_step": 1105920, "num_episodes": 184, "mean_reward": 270.90041587114337, "mean_length": 7220.17, "survival_pct": 0.4813446666666667, "max_steps": 15000, "loss": 0.09780505299568176, "sps": 1811.136951881794 }, { "update": 275, "global_step": 1126400, "num_episodes": 186, "mean_reward": 279.4690273213387, "mean_length": 7320.17, "survival_pct": 0.48801133333333335, "max_steps": 15000, "loss": 15.602858543395996, "sps": 588.0868439042007 }, { "update": 280, "global_step": 1146880, "num_episodes": 188, "mean_reward": 288.84978276491165, "mean_length": 7391.33, "survival_pct": 0.4927553333333333, "max_steps": 15000, "loss": 0.023340240120887756, "sps": 1795.065224055729 }, { "update": 285, "global_step": 1167360, "num_episodes": 190, "mean_reward": 304.26770622015, "mean_length": 7681.49, "survival_pct": 0.5120993333333334, "max_steps": 15000, "loss": -0.13154439628124237, "sps": 2105.6127488836673 }, { "update": 290, "global_step": 1187840, "num_episodes": 193, "mean_reward": 299.99041105508803, "mean_length": 7774.97, "survival_pct": 0.5183313333333334, "max_steps": 15000, "loss": 27.63266944885254, "sps": 1435.2377599727854 }, { "update": 295, "global_step": 1208320, "num_episodes": 193, "mean_reward": 299.99041105508803, "mean_length": 7774.97, "survival_pct": 0.5183313333333334, "max_steps": 15000, "loss": 0.08694343268871307, "sps": 1579.9578506707744 }, { "update": 300, "global_step": 1228800, "num_episodes": 194, "mean_reward": 299.24194776773453, "mean_length": 7824.97, "survival_pct": 0.5216646666666667, "max_steps": 15000, "loss": -0.08208262920379639, "sps": 1713.483711856869 }, { "update": 305, "global_step": 1249280, "num_episodes": 199, "mean_reward": 291.81668387651445, "mean_length": 7825.67, "survival_pct": 0.5217113333333333, "max_steps": 15000, "loss": 5.8720316886901855, "sps": 1446.1967920114107 }, { "update": 310, "global_step": 1269760, "num_episodes": 201, "mean_reward": 274.33396270036695, "mean_length": 7777.49, "survival_pct": 0.5184993333333333, "max_steps": 15000, "loss": 25.04059600830078, "sps": 611.9018372332281 }, { "update": 315, "global_step": 1290240, "num_episodes": 205, "mean_reward": 272.47521389484405, "mean_length": 7893.17, "survival_pct": 0.5262113333333334, "max_steps": 15000, "loss": 35.360877990722656, "sps": 393.4470006251 }, { "update": 320, "global_step": 1310720, "num_episodes": 212, "mean_reward": 274.2415503978729, "mean_length": 7724.58, "survival_pct": 0.514972, "max_steps": 15000, "loss": 17.640727996826172, "sps": 576.0717870047208 }, { "update": 325, "global_step": 1331200, "num_episodes": 212, "mean_reward": 274.2415503978729, "mean_length": 7724.58, "survival_pct": 0.514972, "max_steps": 15000, "loss": 4.197415351867676, "sps": 763.5424454388547 }, { "update": 330, "global_step": 1351680, "num_episodes": 213, "mean_reward": 276.0169840526581, "mean_length": 7871.27, "survival_pct": 0.5247513333333333, "max_steps": 15000, "loss": 3.047353982925415, "sps": 822.0506273160125 }, { "update": 335, "global_step": 1372160, "num_episodes": 214, "mean_reward": 277.7569498729706, "mean_length": 8018.5, "survival_pct": 0.5345666666666666, "max_steps": 15000, "loss": 5.528885364532471, "sps": 753.0351947572923 }, { "update": 340, "global_step": 1392640, "num_episodes": 215, "mean_reward": 278.02211222648623, "mean_length": 8068.5, "survival_pct": 0.5379, "max_steps": 15000, "loss": 1.2490136623382568, "sps": 815.783836720429 }, { "update": 345, "global_step": 1413120, "num_episodes": 217, "mean_reward": 314.82606459617614, "mean_length": 8257.34, "survival_pct": 0.5504893333333334, "max_steps": 15000, "loss": 0.16502337157726288, "sps": 2263.8703548795343 }, { "update": 350, "global_step": 1433600, "num_episodes": 220, "mean_reward": 337.1490696239471, "mean_length": 8503.93, "survival_pct": 0.5669286666666666, "max_steps": 15000, "loss": -0.23407027125358582, "sps": 2114.7366860740576 }, { "update": 355, "global_step": 1454080, "num_episodes": 220, "mean_reward": 337.1490696239471, "mean_length": 8503.93, "survival_pct": 0.5669286666666666, "max_steps": 15000, "loss": -0.08559620380401611, "sps": 2031.0806393625717 }, { "update": 360, "global_step": 1474560, "num_episodes": 221, "mean_reward": 339.0253634929657, "mean_length": 8652.63, "survival_pct": 0.576842, "max_steps": 15000, "loss": -0.06816114485263824, "sps": 2006.9790581005002 }, { "update": 365, "global_step": 1495040, "num_episodes": 222, "mean_reward": 343.7125220012665, "mean_length": 8702.63, "survival_pct": 0.5801753333333333, "max_steps": 15000, "loss": -0.10728916525840759, "sps": 2019.2784003150005 }, { "update": 370, "global_step": 1515520, "num_episodes": 223, "mean_reward": 347.4228830242157, "mean_length": 8752.63, "survival_pct": 0.5835086666666666, "max_steps": 15000, "loss": -0.20886194705963135, "sps": 1983.558184566299 }, { "update": 375, "global_step": 1536000, "num_episodes": 229, "mean_reward": 347.23676467895507, "mean_length": 8733.48, "survival_pct": 0.582232, "max_steps": 15000, "loss": 8.476018905639648, "sps": 771.0850934786216 }, { "update": 380, "global_step": 1556480, "num_episodes": 234, "mean_reward": 365.97415913581847, "mean_length": 8974.59, "survival_pct": 0.598306, "max_steps": 15000, "loss": 12.912026405334473, "sps": 410.00426579514084 }, { "update": 385, "global_step": 1576960, "num_episodes": 235, "mean_reward": 364.00330050468443, "mean_length": 8891.43, "survival_pct": 0.592762, "max_steps": 15000, "loss": -0.11877703666687012, "sps": 2193.265879978675 }, { "update": 390, "global_step": 1597440, "num_episodes": 236, "mean_reward": 367.3724857187271, "mean_length": 9039.65, "survival_pct": 0.6026433333333333, "max_steps": 15000, "loss": -0.11603386700153351, "sps": 2297.7713967547143 }, { "update": 395, "global_step": 1617920, "num_episodes": 238, "mean_reward": 372.63827639579773, "mean_length": 9336.77, "survival_pct": 0.6224513333333334, "max_steps": 15000, "loss": -0.1684369295835495, "sps": 2389.134775345221 }, { "update": 400, "global_step": 1638400, "num_episodes": 238, "mean_reward": 372.63827639579773, "mean_length": 9336.77, "survival_pct": 0.6224513333333334, "max_steps": 15000, "loss": -0.02054491639137268, "sps": 2482.540323380072 }, { "update": 405, "global_step": 1658880, "num_episodes": 238, "mean_reward": 372.63827639579773, "mean_length": 9336.77, "survival_pct": 0.46683850000000005, "max_steps": 20000, "loss": -0.06290135532617569, "sps": 2344.9434191419 }, { "update": 410, "global_step": 1679360, "num_episodes": 238, "mean_reward": 372.63827639579773, "mean_length": 9336.77, "survival_pct": 0.46683850000000005, "max_steps": 20000, "loss": -0.250944048166275, "sps": 2189.1388557903356 }, { "update": 415, "global_step": 1699840, "num_episodes": 244, "mean_reward": 370.749574341774, "mean_length": 9542.11, "survival_pct": 0.4771055, "max_steps": 20000, "loss": 8.776338577270508, "sps": 833.8081182273202 }, { "update": 420, "global_step": 1720320, "num_episodes": 247, "mean_reward": 366.3301660585403, "mean_length": 9730.75, "survival_pct": 0.4865375, "max_steps": 20000, "loss": 258.7391052246094, "sps": 608.4010837129435 }, { "update": 425, "global_step": 1740800, "num_episodes": 247, "mean_reward": 366.3301660585403, "mean_length": 9730.75, "survival_pct": 0.4865375, "max_steps": 20000, "loss": 16.064197540283203, "sps": 685.7919661773449 }, { "update": 430, "global_step": 1761280, "num_episodes": 249, "mean_reward": 369.06732979297635, "mean_length": 9930.93, "survival_pct": 0.4965465, "max_steps": 20000, "loss": 11.074816703796387, "sps": 775.2822777334978 }, { "update": 435, "global_step": 1781760, "num_episodes": 252, "mean_reward": 369.86892349243163, "mean_length": 10135.24, "survival_pct": 0.5067619999999999, "max_steps": 20000, "loss": 8.431387901306152, "sps": 1088.379163272055 }, { "update": 440, "global_step": 1802240, "num_episodes": 252, "mean_reward": 369.86892349243163, "mean_length": 10135.24, "survival_pct": 0.5067619999999999, "max_steps": 20000, "loss": 4.96181583404541, "sps": 1059.8013951608161 }, { "update": 445, "global_step": 1822720, "num_episodes": 252, "mean_reward": 369.86892349243163, "mean_length": 10135.24, "survival_pct": 0.5067619999999999, "max_steps": 20000, "loss": 3.2583541870117188, "sps": 1049.9750481736253 }, { "update": 450, "global_step": 1843200, "num_episodes": 252, "mean_reward": 369.86892349243163, "mean_length": 10135.24, "survival_pct": 0.5067619999999999, "max_steps": 20000, "loss": 1.682092308998108, "sps": 1042.7284091211875 }, { "update": 455, "global_step": 1863680, "num_episodes": 255, "mean_reward": 370.1215773010254, "mean_length": 10435.24, "survival_pct": 0.521762, "max_steps": 20000, "loss": 5.13987398147583, "sps": 399.81189577443547 }, { "update": 460, "global_step": 1884160, "num_episodes": 257, "mean_reward": 501.74020595550536, "mean_length": 10734.18, "survival_pct": 0.536709, "max_steps": 20000, "loss": 19.97075843811035, "sps": 377.7733693034371 }, { "update": 465, "global_step": 1904640, "num_episodes": 259, "mean_reward": 499.11380367279054, "mean_length": 10838.47, "survival_pct": 0.5419235, "max_steps": 20000, "loss": 6.397243022918701, "sps": 542.2637076266058 }, { "update": 470, "global_step": 1925120, "num_episodes": 260, "mean_reward": 485.6533655166626, "mean_length": 10696.07, "survival_pct": 0.5348035, "max_steps": 20000, "loss": 0.6303369998931885, "sps": 817.7485527810969 }, { "update": 475, "global_step": 1945600, "num_episodes": 263, "mean_reward": 484.7361448955536, "mean_length": 10797.13, "survival_pct": 0.5398565, "max_steps": 20000, "loss": 0.7859541773796082, "sps": 844.7722671839322 }, { "update": 480, "global_step": 1966080, "num_episodes": 263, "mean_reward": 484.7361448955536, "mean_length": 10797.13, "survival_pct": 0.5398565, "max_steps": 20000, "loss": 0.6309153437614441, "sps": 838.773769030313 }, { "update": 485, "global_step": 1986560, "num_episodes": 263, "mean_reward": 484.7361448955536, "mean_length": 10797.13, "survival_pct": 0.5398565, "max_steps": 20000, "loss": 0.18543700873851776, "sps": 820.910360397683 }, { "update": 490, "global_step": 2007040, "num_episodes": 263, "mean_reward": 484.7361448955536, "mean_length": 10797.13, "survival_pct": 0.5398565, "max_steps": 20000, "loss": 0.3058473467826843, "sps": 842.5485957089527 }, { "update": 495, "global_step": 2027520, "num_episodes": 269, "mean_reward": 425.1001238536835, "mean_length": 10806.87, "survival_pct": 0.5403435000000001, "max_steps": 20000, "loss": 2.353271245956421, "sps": 454.6827555011673 }, { "update": 500, "global_step": 2048000, "num_episodes": 269, "mean_reward": 425.1001238536835, "mean_length": 10806.87, "survival_pct": 0.5403435000000001, "max_steps": 20000, "loss": 1.0133743286132812, "sps": 475.15957681047615 }, { "update": 505, "global_step": 2068480, "num_episodes": 270, "mean_reward": 437.5397934818268, "mean_length": 10983.17, "survival_pct": 0.5491585, "max_steps": 20000, "loss": 0.6723721027374268, "sps": 601.483216529865 }, { "update": 510, "global_step": 2088960, "num_episodes": 271, "mean_reward": 439.91473383665084, "mean_length": 11180.4, "survival_pct": 0.55902, "max_steps": 20000, "loss": -0.033539168536663055, "sps": 736.1812387453508 }, { "update": 515, "global_step": 2109440, "num_episodes": 275, "mean_reward": 531.3333820033073, "mean_length": 11478.88, "survival_pct": 0.573944, "max_steps": 20000, "loss": 0.6327868700027466, "sps": 564.0725158498853 }, { "update": 520, "global_step": 2129920, "num_episodes": 275, "mean_reward": 531.3333820033073, "mean_length": 11478.88, "survival_pct": 0.573944, "max_steps": 20000, "loss": 0.49002259969711304, "sps": 658.4478053303787 }, { "update": 525, "global_step": 2150400, "num_episodes": 275, "mean_reward": 531.3333820033073, "mean_length": 11478.88, "survival_pct": 0.573944, "max_steps": 20000, "loss": 0.24342404305934906, "sps": 891.360496999003 }, { "update": 530, "global_step": 2170880, "num_episodes": 275, "mean_reward": 531.3333820033073, "mean_length": 11478.88, "survival_pct": 0.573944, "max_steps": 20000, "loss": 0.16686059534549713, "sps": 1010.9528682298816 }, { "update": 535, "global_step": 2191360, "num_episodes": 284, "mean_reward": 521.5117145895958, "mean_length": 10942.83, "survival_pct": 0.5471415, "max_steps": 20000, "loss": 0.2558882236480713, "sps": 469.7280569365049 }, { "update": 540, "global_step": 2211840, "num_episodes": 284, "mean_reward": 521.5117145895958, "mean_length": 10942.83, "survival_pct": 0.5471415, "max_steps": 20000, "loss": 0.1111864298582077, "sps": 509.6937089521736 }, { "update": 545, "global_step": 2232320, "num_episodes": 286, "mean_reward": 512.1946889853477, "mean_length": 11042.83, "survival_pct": 0.5521415, "max_steps": 20000, "loss": 3.9095962047576904, "sps": 511.8590018524491 }, { "update": 550, "global_step": 2252800, "num_episodes": 287, "mean_reward": 501.32572416067126, "mean_length": 10911.73, "survival_pct": 0.5455865, "max_steps": 20000, "loss": -0.03819906711578369, "sps": 632.8032522356763 }, { "update": 555, "global_step": 2273280, "num_episodes": 292, "mean_reward": 503.7082317852974, "mean_length": 10844.31, "survival_pct": 0.5422155, "max_steps": 20000, "loss": 10.365863800048828, "sps": 429.53248817964095 }, { "update": 560, "global_step": 2293760, "num_episodes": 292, "mean_reward": 503.7082317852974, "mean_length": 10844.31, "survival_pct": 0.5422155, "max_steps": 20000, "loss": 2.5824058055877686, "sps": 835.0918364484975 }, { "update": 565, "global_step": 2314240, "num_episodes": 292, "mean_reward": 503.7082317852974, "mean_length": 10844.31, "survival_pct": 0.5422155, "max_steps": 20000, "loss": -0.015099406242370605, "sps": 823.3139156925881 }, { "update": 570, "global_step": 2334720, "num_episodes": 292, "mean_reward": 503.7082317852974, "mean_length": 10844.31, "survival_pct": 0.5422155, "max_steps": 20000, "loss": 0.004335612058639526, "sps": 847.512127655439 }, { "update": 575, "global_step": 2355200, "num_episodes": 296, "mean_reward": 506.11397255182266, "mean_length": 11193.09, "survival_pct": 0.5596545, "max_steps": 20000, "loss": 1.3773071765899658, "sps": 538.0889941452151 }, { "update": 580, "global_step": 2375680, "num_episodes": 296, "mean_reward": 506.11397255182266, "mean_length": 11193.09, "survival_pct": 0.5596545, "max_steps": 20000, "loss": 0.6394574046134949, "sps": 502.97316152248266 }, { "update": 585, "global_step": 2396160, "num_episodes": 297, "mean_reward": 508.4939224600792, "mean_length": 11391.04, "survival_pct": 0.5695520000000001, "max_steps": 20000, "loss": 36.28646469116211, "sps": 439.92401762976914 }, { "update": 590, "global_step": 2416640, "num_episodes": 298, "mean_reward": 510.8762067055702, "mean_length": 11589.3, "survival_pct": 0.579465, "max_steps": 20000, "loss": 13.898605346679688, "sps": 282.2760969771352 }, { "update": 595, "global_step": 2437120, "num_episodes": 301, "mean_reward": 526.9176897263527, "mean_length": 11688.03, "survival_pct": 0.5844015, "max_steps": 20000, "loss": 38.274803161621094, "sps": 332.3030413271302 }, { "update": 600, "global_step": 2457600, "num_episodes": 303, "mean_reward": 525.6999688172341, "mean_length": 11569.45, "survival_pct": 0.5784725000000001, "max_steps": 20000, "loss": 2.9384140968322754, "sps": 364.8644804633228 }, { "update": 605, "global_step": 2478080, "num_episodes": 304, "mean_reward": 524.2420133042335, "mean_length": 11426.62, "survival_pct": 0.45706480000000005, "max_steps": 25000, "loss": 438.5118408203125, "sps": 420.0558278495418 }, { "update": 610, "global_step": 2498560, "num_episodes": 304, "mean_reward": 524.2420133042335, "mean_length": 11426.62, "survival_pct": 0.45706480000000005, "max_steps": 25000, "loss": 6.759511947631836, "sps": 435.9725569680644 }, { "update": 615, "global_step": 2519040, "num_episodes": 304, "mean_reward": 524.2420133042335, "mean_length": 11426.62, "survival_pct": 0.45706480000000005, "max_steps": 25000, "loss": 2.7961549758911133, "sps": 483.2696887623864 }, { "update": 620, "global_step": 2539520, "num_episodes": 304, "mean_reward": 524.2420133042335, "mean_length": 11426.62, "survival_pct": 0.45706480000000005, "max_steps": 25000, "loss": 2.2368013858795166, "sps": 488.6334457862468 }, { "update": 625, "global_step": 2560000, "num_episodes": 309, "mean_reward": 559.3683041572571, "mean_length": 12243.92, "survival_pct": 0.4897568, "max_steps": 25000, "loss": 706.3812255859375, "sps": 240.30170576065763 }, { "update": 630, "global_step": 2580480, "num_episodes": 311, "mean_reward": 572.5723537635803, "mean_length": 12360.26, "survival_pct": 0.4944104, "max_steps": 25000, "loss": 13.363809585571289, "sps": 266.8083792943256 }, { "update": 635, "global_step": 2600960, "num_episodes": 316, "mean_reward": 564.724127240181, "mean_length": 11785.88, "survival_pct": 0.47143519999999994, "max_steps": 25000, "loss": 6.69994592666626, "sps": 303.78529409460765 }, { "update": 640, "global_step": 2621440, "num_episodes": 316, "mean_reward": 564.724127240181, "mean_length": 11785.88, "survival_pct": 0.47143519999999994, "max_steps": 25000, "loss": 629.8490600585938, "sps": 347.05583673981266 }, { "update": 645, "global_step": 2641920, "num_episodes": 325, "mean_reward": 599.5753115653991, "mean_length": 11091.83, "survival_pct": 0.4436732, "max_steps": 25000, "loss": 14.624711990356445, "sps": 202.6999148028545 }, { "update": 650, "global_step": 2662400, "num_episodes": 326, "mean_reward": 681.9963491630555, "mean_length": 11191.83, "survival_pct": 0.4476732, "max_steps": 25000, "loss": 14.961444854736328, "sps": 324.3719674553881 }, { "update": 655, "global_step": 2682880, "num_episodes": 329, "mean_reward": 681.6811992406845, "mean_length": 11184.4, "survival_pct": 0.447376, "max_steps": 25000, "loss": 1098.8870849609375, "sps": 282.5805927303007 }, { "update": 660, "global_step": 2703360, "num_episodes": 329, "mean_reward": 681.6811992406845, "mean_length": 11184.4, "survival_pct": 0.447376, "max_steps": 25000, "loss": 18.618370056152344, "sps": 328.8874894706864 }, { "update": 665, "global_step": 2723840, "num_episodes": 330, "mean_reward": 669.1296841955185, "mean_length": 11166.48, "survival_pct": 0.4466592, "max_steps": 25000, "loss": 0.3438085615634918, "sps": 350.7381600879147 }, { "update": 670, "global_step": 2744320, "num_episodes": 333, "mean_reward": 663.7387618637085, "mean_length": 11120.64, "survival_pct": 0.4448256, "max_steps": 25000, "loss": 1.9492148160934448, "sps": 336.44611386700046 }, { "update": 675, "global_step": 2764800, "num_episodes": 340, "mean_reward": 764.0141823387146, "mean_length": 11231.97, "survival_pct": 0.4492788, "max_steps": 25000, "loss": 11.20479679107666, "sps": 269.88464376416727 }, { "update": 680, "global_step": 2785280, "num_episodes": 341, "mean_reward": 763.7054350566864, "mean_length": 11237.76, "survival_pct": 0.44951040000000003, "max_steps": 25000, "loss": 2.3942978382110596, "sps": 368.845880053541 }, { "update": 685, "global_step": 2805760, "num_episodes": 341, "mean_reward": 763.7054350566864, "mean_length": 11237.76, "survival_pct": 0.44951040000000003, "max_steps": 25000, "loss": 0.6632025241851807, "sps": 437.7624284451732 }, { "update": 690, "global_step": 2826240, "num_episodes": 341, "mean_reward": 763.7054350566864, "mean_length": 11237.76, "survival_pct": 0.44951040000000003, "max_steps": 25000, "loss": 0.12569601833820343, "sps": 456.53233478658586 }, { "update": 695, "global_step": 2846720, "num_episodes": 343, "mean_reward": 799.47291888237, "mean_length": 11532.37, "survival_pct": 0.4612948, "max_steps": 25000, "loss": 32.66535949707031, "sps": 482.45747353519477 }, { "update": 700, "global_step": 2867200, "num_episodes": 344, "mean_reward": 900.5657841777802, "mean_length": 11582.37, "survival_pct": 0.4632948, "max_steps": 25000, "loss": 4.394363880157471, "sps": 602.707199771208 }, { "update": 705, "global_step": 2887680, "num_episodes": 344, "mean_reward": 900.5657841777802, "mean_length": 11582.37, "survival_pct": 0.4632948, "max_steps": 25000, "loss": 1.1503143310546875, "sps": 592.2907627828818 }, { "update": 710, "global_step": 2908160, "num_episodes": 345, "mean_reward": 917.1899351406097, "mean_length": 11829.79, "survival_pct": 0.47319160000000005, "max_steps": 25000, "loss": 20.407194137573242, "sps": 459.7672415701048 }, { "update": 715, "global_step": 2928640, "num_episodes": 346, "mean_reward": 915.0635627651214, "mean_length": 11657.27, "survival_pct": 0.4662908, "max_steps": 25000, "loss": 51.53656768798828, "sps": 484.2493919142758 }, { "update": 720, "global_step": 2949120, "num_episodes": 347, "mean_reward": 915.567684469223, "mean_length": 11707.27, "survival_pct": 0.4682908, "max_steps": 25000, "loss": 2.852640151977539, "sps": 564.1661889935658 }, { "update": 725, "global_step": 2969600, "num_episodes": 351, "mean_reward": 933.7657800292968, "mean_length": 11805.02, "survival_pct": 0.47220080000000003, "max_steps": 25000, "loss": 37.6703987121582, "sps": 269.4710162736009 }, { "update": 730, "global_step": 2990080, "num_episodes": 356, "mean_reward": 790.1477946281433, "mean_length": 11063.86, "survival_pct": 0.4425544, "max_steps": 25000, "loss": 92.7292709350586, "sps": 249.14561378417451 }, { "update": 735, "global_step": 3010560, "num_episodes": 356, "mean_reward": 790.1477946281433, "mean_length": 11063.86, "survival_pct": 0.4425544, "max_steps": 25000, "loss": 3.52268648147583, "sps": 336.33639220363955 }, { "update": 740, "global_step": 3031040, "num_episodes": 356, "mean_reward": 790.1477946281433, "mean_length": 11063.86, "survival_pct": 0.4425544, "max_steps": 25000, "loss": 1.0818921327590942, "sps": 322.89272707726104 }, { "update": 745, "global_step": 3051520, "num_episodes": 363, "mean_reward": 836.2594112110138, "mean_length": 10753.72, "survival_pct": 0.4301488, "max_steps": 25000, "loss": 6.418513774871826, "sps": 303.95242510324334 }, { "update": 750, "global_step": 3072000, "num_episodes": 369, "mean_reward": 865.2751739215851, "mean_length": 10221.97, "survival_pct": 0.4088788, "max_steps": 25000, "loss": 4.221797943115234, "sps": 550.4274763185425 }, { "update": 755, "global_step": 3092480, "num_episodes": 369, "mean_reward": 865.2751739215851, "mean_length": 10221.97, "survival_pct": 0.4088788, "max_steps": 25000, "loss": 1.761741280555725, "sps": 498.20561049907593 }, { "update": 760, "global_step": 3112960, "num_episodes": 369, "mean_reward": 865.2751739215851, "mean_length": 10221.97, "survival_pct": 0.4088788, "max_steps": 25000, "loss": 58.8026008605957, "sps": 521.3350619859364 }, { "update": 765, "global_step": 3133440, "num_episodes": 370, "mean_reward": 860.6973748493194, "mean_length": 10271.97, "survival_pct": 0.4108788, "max_steps": 25000, "loss": 1.9308984279632568, "sps": 438.4037568068571 }, { "update": 770, "global_step": 3153920, "num_episodes": 373, "mean_reward": 770.2798270845414, "mean_length": 9927.86, "survival_pct": 0.39711440000000003, "max_steps": 25000, "loss": 2.7318155765533447, "sps": 348.45193944086344 }, { "update": 775, "global_step": 3174400, "num_episodes": 377, "mean_reward": 806.3474672365188, "mean_length": 10020.65, "survival_pct": 0.40082599999999996, "max_steps": 25000, "loss": 0.32981979846954346, "sps": 555.3298506322647 }, { "update": 780, "global_step": 3194880, "num_episodes": 383, "mean_reward": 853.3938677740097, "mean_length": 10073.78, "survival_pct": 0.4029512, "max_steps": 25000, "loss": 0.16146810352802277, "sps": 1099.1025857576844 }, { "update": 785, "global_step": 3215360, "num_episodes": 383, "mean_reward": 853.3938677740097, "mean_length": 10073.78, "survival_pct": 0.4029512, "max_steps": 25000, "loss": -0.09470260143280029, "sps": 1762.63191717742 }, { "update": 790, "global_step": 3235840, "num_episodes": 384, "mean_reward": 853.8934272527695, "mean_length": 10123.78, "survival_pct": 0.4049512, "max_steps": 25000, "loss": -0.06440502405166626, "sps": 1195.8323452559932 }, { "update": 795, "global_step": 3256320, "num_episodes": 384, "mean_reward": 853.8934272527695, "mean_length": 10123.78, "survival_pct": 0.4049512, "max_steps": 25000, "loss": -0.15972009301185608, "sps": 888.4678009939021 }, { "update": 800, "global_step": 3276800, "num_episodes": 388, "mean_reward": 838.5986885023117, "mean_length": 10010.11, "survival_pct": 0.40040440000000005, "max_steps": 25000, "loss": 0.09807762503623962, "sps": 471.1430031097595 }, { "update": 805, "global_step": 3297280, "num_episodes": 388, "mean_reward": 838.5986885023117, "mean_length": 10010.11, "survival_pct": 0.33367033333333335, "max_steps": 30000, "loss": -0.0691152960062027, "sps": 1265.7956126806866 }, { "update": 810, "global_step": 3317760, "num_episodes": 388, "mean_reward": 838.5986885023117, "mean_length": 10010.11, "survival_pct": 0.33367033333333335, "max_steps": 30000, "loss": -0.1727883517742157, "sps": 1231.865460337574 }, { "update": 815, "global_step": 3338240, "num_episodes": 388, "mean_reward": 838.5986885023117, "mean_length": 10010.11, "survival_pct": 0.33367033333333335, "max_steps": 30000, "loss": -0.11673803627490997, "sps": 1256.6835286888843 }, { "update": 820, "global_step": 3358720, "num_episodes": 388, "mean_reward": 838.5986885023117, "mean_length": 10010.11, "survival_pct": 0.33367033333333335, "max_steps": 30000, "loss": -0.2521955370903015, "sps": 1137.9313207515677 }, { "update": 825, "global_step": 3379200, "num_episodes": 391, "mean_reward": 839.7568208217621, "mean_length": 10100.5, "survival_pct": 0.33668333333333333, "max_steps": 30000, "loss": -0.10739608108997345, "sps": 1220.8177867257723 }, { "update": 830, "global_step": 3399680, "num_episodes": 395, "mean_reward": 839.0241325330734, "mean_length": 10101.66, "survival_pct": 0.336722, "max_steps": 30000, "loss": 18.019046783447266, "sps": 656.125042039298 }, { "update": 835, "global_step": 3420160, "num_episodes": 402, "mean_reward": 813.9229806566238, "mean_length": 9366.46, "survival_pct": 0.3122153333333333, "max_steps": 30000, "loss": 3.2142348289489746, "sps": 348.9181765210399 }, { "update": 840, "global_step": 3440640, "num_episodes": 408, "mean_reward": 770.4422649216652, "mean_length": 8932.2, "survival_pct": 0.29774, "max_steps": 30000, "loss": 29.716121673583984, "sps": 258.20931803063246 }, { "update": 845, "global_step": 3461120, "num_episodes": 409, "mean_reward": 767.6314169716835, "mean_length": 8691.39, "survival_pct": 0.289713, "max_steps": 30000, "loss": 0.5027515888214111, "sps": 910.4499991149808 }, { "update": 850, "global_step": 3481600, "num_episodes": 412, "mean_reward": 755.9645525097847, "mean_length": 8707.21, "survival_pct": 0.2902403333333333, "max_steps": 30000, "loss": 2.889087438583374, "sps": 492.2319923662775 }, { "update": 855, "global_step": 3502080, "num_episodes": 415, "mean_reward": 755.2824851679802, "mean_length": 8717.11, "survival_pct": 0.2905703333333334, "max_steps": 30000, "loss": -0.22625428438186646, "sps": 1029.9811438873032 }, { "update": 860, "global_step": 3522560, "num_episodes": 420, "mean_reward": 659.1974053931236, "mean_length": 9077.37, "survival_pct": 0.30257900000000004, "max_steps": 30000, "loss": 2.288820505142212, "sps": 1016.411393455018 }, { "update": 865, "global_step": 3543040, "num_episodes": 420, "mean_reward": 659.1974053931236, "mean_length": 9077.37, "survival_pct": 0.30257900000000004, "max_steps": 30000, "loss": 0.2482612133026123, "sps": 1109.4319732205106 }, { "update": 870, "global_step": 3563520, "num_episodes": 420, "mean_reward": 659.1974053931236, "mean_length": 9077.37, "survival_pct": 0.30257900000000004, "max_steps": 30000, "loss": 0.09613563120365143, "sps": 1196.7584168005542 }, { "update": 875, "global_step": 3584000, "num_episodes": 420, "mean_reward": 659.1974053931236, "mean_length": 9077.37, "survival_pct": 0.30257900000000004, "max_steps": 30000, "loss": 0.2699776887893677, "sps": 1128.2554610475702 }, { "update": 880, "global_step": 3604480, "num_episodes": 420, "mean_reward": 659.1974053931236, "mean_length": 9077.37, "survival_pct": 0.30257900000000004, "max_steps": 30000, "loss": 0.020249858498573303, "sps": 1162.0742075936244 }, { "update": 885, "global_step": 3624960, "num_episodes": 421, "mean_reward": 662.2708982825279, "mean_length": 9375.82, "survival_pct": 0.3125273333333333, "max_steps": 30000, "loss": 0.4013591408729553, "sps": 1113.5340322378984 }, { "update": 890, "global_step": 3645440, "num_episodes": 422, "mean_reward": 665.2683095526695, "mean_length": 9673.23, "survival_pct": 0.322441, "max_steps": 30000, "loss": 2.35748028755188, "sps": 803.983802083326 }, { "update": 895, "global_step": 3665920, "num_episodes": 425, "mean_reward": 654.4653234362602, "mean_length": 9739.55, "survival_pct": 0.3246516666666666, "max_steps": 30000, "loss": 19.605010986328125, "sps": 421.5912921054319 }, { "update": 900, "global_step": 3686400, "num_episodes": 428, "mean_reward": 590.155419728756, "mean_length": 10093.48, "survival_pct": 0.3364493333333333, "max_steps": 30000, "loss": 38.713653564453125, "sps": 564.4312985780526 }, { "update": 905, "global_step": 3706880, "num_episodes": 428, "mean_reward": 590.155419728756, "mean_length": 10093.48, "survival_pct": 0.3364493333333333, "max_steps": 30000, "loss": 136.64389038085938, "sps": 613.1553074617179 }, { "update": 910, "global_step": 3727360, "num_episodes": 429, "mean_reward": 590.9192177844047, "mean_length": 10167.56, "survival_pct": 0.33891866666666665, "max_steps": 30000, "loss": 8.999781608581543, "sps": 511.63656492456863 }, { "update": 915, "global_step": 3747840, "num_episodes": 431, "mean_reward": 590.5451223254204, "mean_length": 10116.26, "survival_pct": 0.33720866666666666, "max_steps": 30000, "loss": 49.023075103759766, "sps": 506.30244315776105 }, { "update": 920, "global_step": 3768320, "num_episodes": 434, "mean_reward": 599.4833398604393, "mean_length": 10470.43, "survival_pct": 0.3490143333333333, "max_steps": 30000, "loss": 33.072776794433594, "sps": 346.8145463929963 }, { "update": 925, "global_step": 3788800, "num_episodes": 436, "mean_reward": 494.2830310034752, "mean_length": 10279.47, "survival_pct": 0.342649, "max_steps": 30000, "loss": 0.7968235015869141, "sps": 973.2809780471914 }, { "update": 930, "global_step": 3809280, "num_episodes": 436, "mean_reward": 494.2830310034752, "mean_length": 10279.47, "survival_pct": 0.342649, "max_steps": 30000, "loss": -0.05825723707675934, "sps": 966.3113177290679 }, { "update": 935, "global_step": 3829760, "num_episodes": 436, "mean_reward": 494.2830310034752, "mean_length": 10279.47, "survival_pct": 0.342649, "max_steps": 30000, "loss": -0.10611464828252792, "sps": 1015.3673402421392 }, { "update": 940, "global_step": 3850240, "num_episodes": 437, "mean_reward": 502.9565402960777, "mean_length": 10567.82, "survival_pct": 0.35226066666666667, "max_steps": 30000, "loss": -0.08356830477714539, "sps": 944.3927727566447 }, { "update": 945, "global_step": 3870720, "num_episodes": 439, "mean_reward": 500.08963894605637, "mean_length": 10318.88, "survival_pct": 0.34396266666666664, "max_steps": 30000, "loss": 5.465578556060791, "sps": 529.1770253242089 }, { "update": 950, "global_step": 3891200, "num_episodes": 440, "mean_reward": 503.0822184062004, "mean_length": 10614.79, "survival_pct": 0.35382633333333335, "max_steps": 30000, "loss": 3.838916301727295, "sps": 760.7930670056104 }, { "update": 955, "global_step": 3911680, "num_episodes": 442, "mean_reward": 467.75900787115097, "mean_length": 10391.71, "survival_pct": 0.3463903333333333, "max_steps": 30000, "loss": -0.04477877914905548, "sps": 965.6263182051849 }, { "update": 960, "global_step": 3932160, "num_episodes": 445, "mean_reward": 385.69126527786256, "mean_length": 10244.33, "survival_pct": 0.3414776666666667, "max_steps": 30000, "loss": 0.345672070980072, "sps": 1268.264040705237 }, { "update": 965, "global_step": 3952640, "num_episodes": 445, "mean_reward": 385.69126527786256, "mean_length": 10244.33, "survival_pct": 0.3414776666666667, "max_steps": 30000, "loss": -0.09349031746387482, "sps": 1253.8499203200772 }, { "update": 970, "global_step": 3973120, "num_episodes": 446, "mean_reward": 388.8185606575012, "mean_length": 10516.85, "survival_pct": 0.35056166666666666, "max_steps": 30000, "loss": 9.252518653869629, "sps": 855.3500954683526 }, { "update": 975, "global_step": 3993600, "num_episodes": 447, "mean_reward": 386.00149038314817, "mean_length": 10275.2, "survival_pct": 0.3425066666666667, "max_steps": 30000, "loss": 21.241113662719727, "sps": 950.7120230957267 }, { "update": 980, "global_step": 4014080, "num_episodes": 453, "mean_reward": 415.23604825496676, "mean_length": 10424.75, "survival_pct": 0.34749166666666664, "max_steps": 30000, "loss": 6.508986473083496, "sps": 715.3695692293137 }, { "update": 985, "global_step": 4034560, "num_episodes": 458, "mean_reward": 411.35455381393433, "mean_length": 10193.61, "survival_pct": 0.339787, "max_steps": 30000, "loss": 14.101386070251465, "sps": 436.54294748826067 }, { "update": 990, "global_step": 4055040, "num_episodes": 460, "mean_reward": 352.4448234796524, "mean_length": 9962.59, "survival_pct": 0.3320863333333333, "max_steps": 30000, "loss": 5.007307529449463, "sps": 657.8546600358904 }, { "update": 995, "global_step": 4075520, "num_episodes": 461, "mean_reward": 352.60661952495576, "mean_length": 9978.71, "survival_pct": 0.33262366666666665, "max_steps": 30000, "loss": -0.05159700661897659, "sps": 731.8566720639022 }, { "update": 1000, "global_step": 4096000, "num_episodes": 461, "mean_reward": 352.60661952495576, "mean_length": 9978.71, "survival_pct": 0.33262366666666665, "max_steps": 30000, "loss": -0.05063310265541077, "sps": 1192.9198062233531 }, { "update": 1005, "global_step": 4116480, "num_episodes": 461, "mean_reward": 352.60661952495576, "mean_length": 9978.71, "survival_pct": 0.33262366666666665, "max_steps": 30000, "loss": -0.12464120984077454, "sps": 1133.1887649612727 }, { "update": 1010, "global_step": 4136960, "num_episodes": 465, "mean_reward": 371.26235566139223, "mean_length": 10558.13, "survival_pct": 0.35193766666666665, "max_steps": 30000, "loss": 7.669186115264893, "sps": 701.8889529681794 }, { "update": 1015, "global_step": 4157440, "num_episodes": 466, "mean_reward": 375.76679421424865, "mean_length": 10855.0, "survival_pct": 0.36183333333333334, "max_steps": 30000, "loss": 12.382041931152344, "sps": 489.4857959286267 }, { "update": 1020, "global_step": 4177920, "num_episodes": 469, "mean_reward": 341.31604763984683, "mean_length": 10927.42, "survival_pct": 0.3642473333333333, "max_steps": 30000, "loss": 19.83820915222168, "sps": 590.4760026903639 }, { "update": 1025, "global_step": 4198400, "num_episodes": 473, "mean_reward": 331.36366960048673, "mean_length": 10432.91, "survival_pct": 0.34776366666666664, "max_steps": 30000, "loss": 183.8989715576172, "sps": 434.18935040659056 }, { "update": 1030, "global_step": 4218880, "num_episodes": 475, "mean_reward": 303.6989562559128, "mean_length": 10480.3, "survival_pct": 0.3493433333333333, "max_steps": 30000, "loss": 139.46194458007812, "sps": 590.2434550847422 }, { "update": 1035, "global_step": 4239360, "num_episodes": 475, "mean_reward": 303.6989562559128, "mean_length": 10480.3, "survival_pct": 0.3493433333333333, "max_steps": 30000, "loss": 4.460475444793701, "sps": 539.7108366907394 }, { "update": 1040, "global_step": 4259840, "num_episodes": 477, "mean_reward": 304.03712359905245, "mean_length": 10529.43, "survival_pct": 0.350981, "max_steps": 30000, "loss": 41.53654098510742, "sps": 570.0864654535922 }, { "update": 1045, "global_step": 4280320, "num_episodes": 482, "mean_reward": 257.8429533290863, "mean_length": 10591.03, "survival_pct": 0.35303433333333334, "max_steps": 30000, "loss": 12.68658447265625, "sps": 306.11899074758105 }, { "update": 1050, "global_step": 4300800, "num_episodes": 484, "mean_reward": 258.245273809433, "mean_length": 10666.92, "survival_pct": 0.355564, "max_steps": 30000, "loss": 34.416778564453125, "sps": 343.9275197061659 }, { "update": 1055, "global_step": 4321280, "num_episodes": 484, "mean_reward": 258.245273809433, "mean_length": 10666.92, "survival_pct": 0.355564, "max_steps": 30000, "loss": 0.8467625975608826, "sps": 529.8299357550097 }, { "update": 1060, "global_step": 4341760, "num_episodes": 484, "mean_reward": 258.245273809433, "mean_length": 10666.92, "survival_pct": 0.355564, "max_steps": 30000, "loss": 0.5682471990585327, "sps": 729.0597707102293 }, { "update": 1065, "global_step": 4362240, "num_episodes": 484, "mean_reward": 258.245273809433, "mean_length": 10666.92, "survival_pct": 0.355564, "max_steps": 30000, "loss": 0.36413297057151794, "sps": 703.8506900902968 }, { "update": 1070, "global_step": 4382720, "num_episodes": 491, "mean_reward": 255.56333970069886, "mean_length": 10467.79, "survival_pct": 0.34892633333333334, "max_steps": 30000, "loss": 30.412242889404297, "sps": 257.45270078331555 }, { "update": 1075, "global_step": 4403200, "num_episodes": 491, "mean_reward": 255.56333970069886, "mean_length": 10467.79, "survival_pct": 0.34892633333333334, "max_steps": 30000, "loss": 0.4469672739505768, "sps": 393.6182671059828 }, { "update": 1080, "global_step": 4423680, "num_episodes": 493, "mean_reward": 270.52365421295167, "mean_length": 10466.3, "survival_pct": 0.34887666666666667, "max_steps": 30000, "loss": 1.5025949478149414, "sps": 297.01417371765183 }, { "update": 1085, "global_step": 4444160, "num_episodes": 494, "mean_reward": 351.42790958404544, "mean_length": 10466.3, "survival_pct": 0.34887666666666667, "max_steps": 30000, "loss": 713.0115356445312, "sps": 337.53102630233496 }, { "update": 1090, "global_step": 4464640, "num_episodes": 501, "mean_reward": 389.6291408967972, "mean_length": 10484.48, "survival_pct": 0.34948266666666666, "max_steps": 30000, "loss": 376.2599182128906, "sps": 322.42891915898485 }, { "update": 1095, "global_step": 4485120, "num_episodes": 503, "mean_reward": 389.9803589296341, "mean_length": 10507.49, "survival_pct": 0.3502496666666667, "max_steps": 30000, "loss": 1246.39453125, "sps": 366.59931296887805 }, { "update": 1100, "global_step": 4505600, "num_episodes": 503, "mean_reward": 389.9803589296341, "mean_length": 10507.49, "survival_pct": 0.3502496666666667, "max_steps": 30000, "loss": 0.49186083674430847, "sps": 434.33848528937693 }, { "update": 1105, "global_step": 4526080, "num_episodes": 505, "mean_reward": 402.4173453474045, "mean_length": 10496.98, "survival_pct": 0.34989933333333334, "max_steps": 30000, "loss": 17.757164001464844, "sps": 288.5970778332255 }, { "update": 1110, "global_step": 4546560, "num_episodes": 508, "mean_reward": 443.8068909239769, "mean_length": 11091.98, "survival_pct": 0.36973266666666665, "max_steps": 30000, "loss": 69.42852783203125, "sps": 284.1876581819077 }, { "update": 1115, "global_step": 4567040, "num_episodes": 508, "mean_reward": 443.8068909239769, "mean_length": 11091.98, "survival_pct": 0.36973266666666665, "max_steps": 30000, "loss": 1.8417774438858032, "sps": 318.68963440900745 }, { "update": 1120, "global_step": 4587520, "num_episodes": 511, "mean_reward": 441.4550348258019, "mean_length": 10837.05, "survival_pct": 0.361235, "max_steps": 30000, "loss": 255.734619140625, "sps": 297.64729649475885 }, { "update": 1125, "global_step": 4608000, "num_episodes": 511, "mean_reward": 441.4550348258019, "mean_length": 10837.05, "survival_pct": 0.361235, "max_steps": 30000, "loss": 1.7074986696243286, "sps": 348.6616523408526 }, { "update": 1130, "global_step": 4628480, "num_episodes": 514, "mean_reward": 492.0692998147011, "mean_length": 11430.47, "survival_pct": 0.38101566666666664, "max_steps": 30000, "loss": 2.311823844909668, "sps": 406.9030800405532 }, { "update": 1135, "global_step": 4648960, "num_episodes": 515, "mean_reward": 563.7019203495979, "mean_length": 11721.54, "survival_pct": 0.390718, "max_steps": 30000, "loss": 1047.7276611328125, "sps": 314.10929898406806 }, { "update": 1140, "global_step": 4669440, "num_episodes": 516, "mean_reward": 560.5978558659554, "mean_length": 11438.6, "survival_pct": 0.38128666666666666, "max_steps": 30000, "loss": 9.904751777648926, "sps": 403.9643544594583 }, { "update": 1145, "global_step": 4689920, "num_episodes": 516, "mean_reward": 560.5978558659554, "mean_length": 11438.6, "survival_pct": 0.38128666666666666, "max_steps": 30000, "loss": 0.4376460909843445, "sps": 554.0697388186311 }, { "update": 1150, "global_step": 4710400, "num_episodes": 518, "mean_reward": 564.0319487595558, "mean_length": 11749.21, "survival_pct": 0.3916403333333333, "max_steps": 30000, "loss": 0.26738616824150085, "sps": 579.422583932766 }, { "update": 1155, "global_step": 4730880, "num_episodes": 519, "mean_reward": 564.0318553757668, "mean_length": 11749.21, "survival_pct": 0.3916403333333333, "max_steps": 30000, "loss": 2.65568208694458, "sps": 435.32093187971185 }, { "update": 1160, "global_step": 4751360, "num_episodes": 520, "mean_reward": 606.8646422314644, "mean_length": 12037.17, "survival_pct": 0.401239, "max_steps": 30000, "loss": 0.08337657153606415, "sps": 480.58739931645295 }, { "update": 1165, "global_step": 4771840, "num_episodes": 523, "mean_reward": 600.3835196709633, "mean_length": 11435.34, "survival_pct": 0.381178, "max_steps": 30000, "loss": 8.820674896240234, "sps": 284.63083015732 }, { "update": 1170, "global_step": 4792320, "num_episodes": 524, "mean_reward": 612.3443924736977, "mean_length": 11435.34, "survival_pct": 0.381178, "max_steps": 30000, "loss": 34.61191940307617, "sps": 250.67135617300423 }, { "update": 1175, "global_step": 4812800, "num_episodes": 525, "mean_reward": 612.3594747567176, "mean_length": 11474.33, "survival_pct": 0.38247766666666666, "max_steps": 30000, "loss": 1.2452768087387085, "sps": 607.4645366539573 }, { "update": 1180, "global_step": 4833280, "num_episodes": 527, "mean_reward": 692.7924189066887, "mean_length": 11466.93, "survival_pct": 0.382231, "max_steps": 30000, "loss": 0.4541545510292053, "sps": 527.819759792867 }, { "update": 1185, "global_step": 4853760, "num_episodes": 527, "mean_reward": 692.7924189066887, "mean_length": 11466.93, "survival_pct": 0.382231, "max_steps": 30000, "loss": 0.30217307806015015, "sps": 510.7606822708712 }, { "update": 1190, "global_step": 4874240, "num_episodes": 531, "mean_reward": 708.1065727066994, "mean_length": 11363.17, "survival_pct": 0.3787723333333333, "max_steps": 30000, "loss": 0.9803248643875122, "sps": 410.8110604092221 }, { "update": 1195, "global_step": 4894720, "num_episodes": 531, "mean_reward": 708.1065727066994, "mean_length": 11363.17, "survival_pct": 0.3787723333333333, "max_steps": 30000, "loss": 0.13086289167404175, "sps": 615.3777693911919 }, { "update": 1200, "global_step": 4915200, "num_episodes": 534, "mean_reward": 699.0353853631019, "mean_length": 11058.72, "survival_pct": 0.36862399999999995, "max_steps": 30000, "loss": 712.1206665039062, "sps": 388.8735852210615 }, { "update": 1205, "global_step": 4935680, "num_episodes": 535, "mean_reward": 698.9299844956398, "mean_length": 11044.65, "survival_pct": 0.27611625, "max_steps": 40000, "loss": 0.9771831035614014, "sps": 687.4149876254356 }, { "update": 1210, "global_step": 4956160, "num_episodes": 535, "mean_reward": 698.9299844956398, "mean_length": 11044.65, "survival_pct": 0.27611625, "max_steps": 40000, "loss": 0.8561661243438721, "sps": 675.4253456273739 }, { "update": 1215, "global_step": 4976640, "num_episodes": 535, "mean_reward": 698.9299844956398, "mean_length": 11044.65, "survival_pct": 0.27611625, "max_steps": 40000, "loss": 0.5291672945022583, "sps": 747.9105659323011 }, { "update": 1220, "global_step": 4997120, "num_episodes": 535, "mean_reward": 698.9299844956398, "mean_length": 11044.65, "survival_pct": 0.27611625, "max_steps": 40000, "loss": 0.3995021879673004, "sps": 710.3102224548963 }, { "update": 1225, "global_step": 5017600, "num_episodes": 535, "mean_reward": 698.9299844956398, "mean_length": 11044.65, "survival_pct": 0.27611625, "max_steps": 40000, "loss": 0.19824837148189545, "sps": 714.1081614413822 }, { "update": 1230, "global_step": 5038080, "num_episodes": 537, "mean_reward": 694.6386151909828, "mean_length": 11117.3, "survival_pct": 0.27793249999999997, "max_steps": 40000, "loss": 0.6759960651397705, "sps": 620.8841198425342 }, { "update": 1235, "global_step": 5058560, "num_episodes": 543, "mean_reward": 676.4015409398079, "mean_length": 10875.12, "survival_pct": 0.271878, "max_steps": 40000, "loss": 8.773111343383789, "sps": 504.81741951884067 }, { "update": 1240, "global_step": 5079040, "num_episodes": 544, "mean_reward": 655.3968575978279, "mean_length": 10576.9, "survival_pct": 0.2644225, "max_steps": 40000, "loss": 0.13716170191764832, "sps": 568.9063484934894 }, { "update": 1245, "global_step": 5099520, "num_episodes": 544, "mean_reward": 655.3968575978279, "mean_length": 10576.9, "survival_pct": 0.2644225, "max_steps": 40000, "loss": -0.016505300998687744, "sps": 639.2019983734826 }, { "update": 1250, "global_step": 5120000, "num_episodes": 546, "mean_reward": 697.8135006427765, "mean_length": 11074.28, "survival_pct": 0.276857, "max_steps": 40000, "loss": 44.272918701171875, "sps": 571.4595742161331 }, { "update": 1255, "global_step": 5140480, "num_episodes": 546, "mean_reward": 697.8135006427765, "mean_length": 11074.28, "survival_pct": 0.276857, "max_steps": 40000, "loss": 402.4531555175781, "sps": 630.3028273736365 }, { "update": 1260, "global_step": 5160960, "num_episodes": 551, "mean_reward": 682.566458747387, "mean_length": 11177.02, "survival_pct": 0.2794255, "max_steps": 40000, "loss": 59.98308181762695, "sps": 566.8093758374978 }, { "update": 1265, "global_step": 5181440, "num_episodes": 553, "mean_reward": 696.4643092989921, "mean_length": 10984.46, "survival_pct": 0.27461149999999995, "max_steps": 40000, "loss": 1.1551482677459717, "sps": 808.9894187333804 }, { "update": 1270, "global_step": 5201920, "num_episodes": 556, "mean_reward": 701.1732182240486, "mean_length": 11378.53, "survival_pct": 0.28446325, "max_steps": 40000, "loss": 2.668344020843506, "sps": 746.5326784132175 }, { "update": 1275, "global_step": 5222400, "num_episodes": 556, "mean_reward": 701.1732182240486, "mean_length": 11378.53, "survival_pct": 0.28446325, "max_steps": 40000, "loss": 0.8112964630126953, "sps": 834.4341431680355 }, { "update": 1280, "global_step": 5242880, "num_episodes": 559, "mean_reward": 707.131958372593, "mean_length": 11746.87, "survival_pct": 0.29367175, "max_steps": 40000, "loss": 8.622824668884277, "sps": 541.8830935414046 }, { "update": 1285, "global_step": 5263360, "num_episodes": 560, "mean_reward": 707.348245446682, "mean_length": 11769.75, "survival_pct": 0.29424375, "max_steps": 40000, "loss": 6.280955791473389, "sps": 788.0408244951375 }, { "update": 1290, "global_step": 5283840, "num_episodes": 560, "mean_reward": 707.348245446682, "mean_length": 11769.75, "survival_pct": 0.29424375, "max_steps": 40000, "loss": 0.17306624352931976, "sps": 826.4544197536233 }, { "update": 1295, "global_step": 5304320, "num_episodes": 560, "mean_reward": 707.348245446682, "mean_length": 11769.75, "survival_pct": 0.29424375, "max_steps": 40000, "loss": -0.04585009068250656, "sps": 839.4415230223722 }, { "update": 1300, "global_step": 5324800, "num_episodes": 560, "mean_reward": 707.348245446682, "mean_length": 11769.75, "survival_pct": 0.29424375, "max_steps": 40000, "loss": -0.1055583506822586, "sps": 831.0648597366617 }, { "update": 1305, "global_step": 5345280, "num_episodes": 560, "mean_reward": 707.348245446682, "mean_length": 11769.75, "survival_pct": 0.29424375, "max_steps": 40000, "loss": 0.10219299793243408, "sps": 850.5287420009836 }, { "update": 1310, "global_step": 5365760, "num_episodes": 561, "mean_reward": 711.5321604895591, "mean_length": 12147.49, "survival_pct": 0.30368725, "max_steps": 40000, "loss": 1.737269639968872, "sps": 713.2928399136136 }, { "update": 1315, "global_step": 5386240, "num_episodes": 563, "mean_reward": 705.7887069511413, "mean_length": 12247.84, "survival_pct": 0.306196, "max_steps": 40000, "loss": 146.1320037841797, "sps": 515.8274568639007 }, { "update": 1320, "global_step": 5406720, "num_episodes": 563, "mean_reward": 705.7887069511413, "mean_length": 12247.84, "survival_pct": 0.306196, "max_steps": 40000, "loss": 0.19398686289787292, "sps": 800.8556253753457 }, { "update": 1325, "global_step": 5427200, "num_episodes": 564, "mean_reward": 722.067927532196, "mean_length": 12347.84, "survival_pct": 0.308696, "max_steps": 40000, "loss": 72.42268371582031, "sps": 507.17764221119916 }, { "update": 1330, "global_step": 5447680, "num_episodes": 567, "mean_reward": 809.5898822021485, "mean_length": 12150.75, "survival_pct": 0.30376875, "max_steps": 40000, "loss": 0.290306031703949, "sps": 764.8144386668907 }, { "update": 1335, "global_step": 5468160, "num_episodes": 567, "mean_reward": 809.5898822021485, "mean_length": 12150.75, "survival_pct": 0.30376875, "max_steps": 40000, "loss": 0.8032262325286865, "sps": 799.1206249673232 }, { "update": 1340, "global_step": 5488640, "num_episodes": 568, "mean_reward": 813.5851877593994, "mean_length": 12546.64, "survival_pct": 0.313666, "max_steps": 40000, "loss": 60.33620071411133, "sps": 555.4755949712716 }, { "update": 1345, "global_step": 5509120, "num_episodes": 570, "mean_reward": 897.157964668274, "mean_length": 13314.53, "survival_pct": 0.33286325, "max_steps": 40000, "loss": 794.7894897460938, "sps": 305.32909580240744 }, { "update": 1350, "global_step": 5529600, "num_episodes": 571, "mean_reward": 897.837958946228, "mean_length": 13330.75, "survival_pct": 0.33326875, "max_steps": 40000, "loss": -0.03776288032531738, "sps": 738.9189775623264 }, { "update": 1355, "global_step": 5550080, "num_episodes": 571, "mean_reward": 897.837958946228, "mean_length": 13330.75, "survival_pct": 0.33326875, "max_steps": 40000, "loss": 0.003855481743812561, "sps": 837.3194861665531 }, { "update": 1360, "global_step": 5570560, "num_episodes": 571, "mean_reward": 897.837958946228, "mean_length": 13330.75, "survival_pct": 0.33326875, "max_steps": 40000, "loss": -0.0835946649312973, "sps": 852.7604200993885 }, { "update": 1365, "global_step": 5591040, "num_episodes": 573, "mean_reward": 901.498504357338, "mean_length": 13730.89, "survival_pct": 0.34327225, "max_steps": 40000, "loss": 0.6666049957275391, "sps": 798.1656538062524 }, { "update": 1370, "global_step": 5611520, "num_episodes": 573, "mean_reward": 901.498504357338, "mean_length": 13730.89, "survival_pct": 0.34327225, "max_steps": 40000, "loss": -0.11962562799453735, "sps": 837.7882020187046 }, { "update": 1375, "global_step": 5632000, "num_episodes": 573, "mean_reward": 901.498504357338, "mean_length": 13730.89, "survival_pct": 0.34327225, "max_steps": 40000, "loss": 0.004459500312805176, "sps": 823.1821543483084 }, { "update": 1380, "global_step": 5652480, "num_episodes": 573, "mean_reward": 901.498504357338, "mean_length": 13730.89, "survival_pct": 0.34327225, "max_steps": 40000, "loss": -0.17405246198177338, "sps": 832.9659390943079 }, { "update": 1385, "global_step": 5672960, "num_episodes": 575, "mean_reward": 940.8680406999588, "mean_length": 13834.99, "survival_pct": 0.34587475, "max_steps": 40000, "loss": 24.183181762695312, "sps": 560.738665689664 }, { "update": 1390, "global_step": 5693440, "num_episodes": 575, "mean_reward": 940.8680406999588, "mean_length": 13834.99, "survival_pct": 0.34587475, "max_steps": 40000, "loss": 0.48853129148483276, "sps": 896.2171670714368 }, { "update": 1395, "global_step": 5713920, "num_episodes": 576, "mean_reward": 982.5036016130448, "mean_length": 13934.99, "survival_pct": 0.34837475, "max_steps": 40000, "loss": 8.266661643981934, "sps": 749.770689049747 }, { "update": 1400, "global_step": 5734400, "num_episodes": 576, "mean_reward": 982.5036016130448, "mean_length": 13934.99, "survival_pct": 0.34837475, "max_steps": 40000, "loss": -0.013010233640670776, "sps": 869.1728201417309 }, { "update": 1405, "global_step": 5754880, "num_episodes": 579, "mean_reward": 1029.220560479164, "mean_length": 14034.37, "survival_pct": 0.35085925, "max_steps": 40000, "loss": 29.333189010620117, "sps": 539.5250549272407 }, { "update": 1410, "global_step": 5775360, "num_episodes": 585, "mean_reward": 1027.995834054947, "mean_length": 13820.46, "survival_pct": 0.34551149999999997, "max_steps": 40000, "loss": 18.8126220703125, "sps": 439.6503781444294 }, { "update": 1415, "global_step": 5795840, "num_episodes": 585, "mean_reward": 1027.995834054947, "mean_length": 13820.46, "survival_pct": 0.34551149999999997, "max_steps": 40000, "loss": 0.8571314811706543, "sps": 639.3946228140344 }, { "update": 1420, "global_step": 5816320, "num_episodes": 586, "mean_reward": 1043.9769775485993, "mean_length": 13920.46, "survival_pct": 0.3480115, "max_steps": 40000, "loss": 17.923433303833008, "sps": 384.01858038202676 }, { "update": 1425, "global_step": 5836800, "num_episodes": 587, "mean_reward": 1106.1342781925202, "mean_length": 14317.99, "survival_pct": 0.35794975, "max_steps": 40000, "loss": 9.14981460571289, "sps": 439.45182965039373 }, { "update": 1430, "global_step": 5857280, "num_episodes": 591, "mean_reward": 1116.412140932083, "mean_length": 14771.51, "survival_pct": 0.36928775, "max_steps": 40000, "loss": 1.7514079809188843, "sps": 662.6907944527344 }, { "update": 1435, "global_step": 5877760, "num_episodes": 591, "mean_reward": 1116.412140932083, "mean_length": 14771.51, "survival_pct": 0.36928775, "max_steps": 40000, "loss": 0.37396010756492615, "sps": 903.1148649948647 }, { "update": 1440, "global_step": 5898240, "num_episodes": 591, "mean_reward": 1116.412140932083, "mean_length": 14771.51, "survival_pct": 0.36928775, "max_steps": 40000, "loss": 0.057983383536338806, "sps": 850.523983894577 }, { "update": 1445, "global_step": 5918720, "num_episodes": 593, "mean_reward": 1102.7952147102355, "mean_length": 14889.53, "survival_pct": 0.37223825, "max_steps": 40000, "loss": 83.34320831298828, "sps": 526.5779746081431 }, { "update": 1450, "global_step": 5939200, "num_episodes": 595, "mean_reward": 1018.396473865509, "mean_length": 14585.34, "survival_pct": 0.3646335, "max_steps": 40000, "loss": 6.169382572174072, "sps": 451.6525927001659 }, { "update": 1455, "global_step": 5959680, "num_episodes": 596, "mean_reward": 1018.2012248802185, "mean_length": 14603.1, "survival_pct": 0.3650775, "max_steps": 40000, "loss": 1.4308723211288452, "sps": 619.623956950883 }, { "update": 1460, "global_step": 5980160, "num_episodes": 596, "mean_reward": 1018.2012248802185, "mean_length": 14603.1, "survival_pct": 0.3650775, "max_steps": 40000, "loss": 0.18915529549121857, "sps": 800.6110593469816 }, { "update": 1465, "global_step": 6000640, "num_episodes": 597, "mean_reward": 1022.199059085846, "mean_length": 14999.43, "survival_pct": 0.37498575, "max_steps": 40000, "loss": -0.0486217737197876, "sps": 813.4318788464009 }, { "update": 1470, "global_step": 6021120, "num_episodes": 597, "mean_reward": 1022.199059085846, "mean_length": 14999.43, "survival_pct": 0.37498575, "max_steps": 40000, "loss": 0.03587697446346283, "sps": 825.3421945718204 }, { "update": 1475, "global_step": 6041600, "num_episodes": 598, "mean_reward": 1041.5796708869934, "mean_length": 15399.05, "survival_pct": 0.38497624999999996, "max_steps": 40000, "loss": 0.06574638187885284, "sps": 875.8211361821493 }, { "update": 1480, "global_step": 6062080, "num_episodes": 598, "mean_reward": 1041.5796708869934, "mean_length": 15399.05, "survival_pct": 0.38497624999999996, "max_steps": 40000, "loss": -0.11098746955394745, "sps": 970.9604971491241 }, { "update": 1485, "global_step": 6082560, "num_episodes": 598, "mean_reward": 1041.5796708869934, "mean_length": 15399.05, "survival_pct": 0.38497624999999996, "max_steps": 40000, "loss": -0.16564123332500458, "sps": 941.8806003191344 }, { "update": 1490, "global_step": 6103040, "num_episodes": 601, "mean_reward": 1025.6624535942078, "mean_length": 15891.22, "survival_pct": 0.3972805, "max_steps": 40000, "loss": 257.9145202636719, "sps": 562.7793013045049 }, { "update": 1495, "global_step": 6123520, "num_episodes": 602, "mean_reward": 1025.6871674919128, "mean_length": 15904.06, "survival_pct": 0.3976015, "max_steps": 40000, "loss": 8.345800399780273, "sps": 713.7930964796354 }, { "update": 1500, "global_step": 6144000, "num_episodes": 604, "mean_reward": 1014.5235131645203, "mean_length": 16004.56, "survival_pct": 0.40011399999999997, "max_steps": 40000, "loss": 6.72609806060791, "sps": 535.8752056296396 }, { "update": 1505, "global_step": 6164480, "num_episodes": 606, "mean_reward": 1021.3157058906555, "mean_length": 16107.38, "survival_pct": 0.4026845, "max_steps": 40000, "loss": 10.722652435302734, "sps": 630.4906569968188 }, { "update": 1510, "global_step": 6184960, "num_episodes": 607, "mean_reward": 1004.4655008125305, "mean_length": 16207.38, "survival_pct": 0.4051845, "max_steps": 40000, "loss": 2.4828217029571533, "sps": 838.6584245149437 }, { "update": 1515, "global_step": 6205440, "num_episodes": 607, "mean_reward": 1004.4655008125305, "mean_length": 16207.38, "survival_pct": 0.4051845, "max_steps": 40000, "loss": 1.270835280418396, "sps": 822.396289748599 }, { "update": 1520, "global_step": 6225920, "num_episodes": 607, "mean_reward": 1004.4655008125305, "mean_length": 16207.38, "survival_pct": 0.4051845, "max_steps": 40000, "loss": 1.3082380294799805, "sps": 817.9383133650281 }, { "update": 1525, "global_step": 6246400, "num_episodes": 607, "mean_reward": 1004.4655008125305, "mean_length": 16207.38, "survival_pct": 0.3241476, "max_steps": 50000, "loss": 0.6200645565986633, "sps": 810.1343000695222 }, { "update": 1530, "global_step": 6266880, "num_episodes": 607, "mean_reward": 1004.4655008125305, "mean_length": 16207.38, "survival_pct": 0.3241476, "max_steps": 50000, "loss": 0.4365927278995514, "sps": 815.0941587526099 }, { "update": 1535, "global_step": 6287360, "num_episodes": 607, "mean_reward": 1004.4655008125305, "mean_length": 16207.38, "survival_pct": 0.3241476, "max_steps": 50000, "loss": 0.3482319116592407, "sps": 778.7197208191486 }, { "update": 1540, "global_step": 6307840, "num_episodes": 607, "mean_reward": 1004.4655008125305, "mean_length": 16207.38, "survival_pct": 0.3241476, "max_steps": 50000, "loss": 0.30282458662986755, "sps": 856.1091418900583 }, { "update": 1545, "global_step": 6328320, "num_episodes": 607, "mean_reward": 1004.4655008125305, "mean_length": 16207.38, "survival_pct": 0.3241476, "max_steps": 50000, "loss": 0.3217318058013916, "sps": 780.544661851666 }, { "update": 1550, "global_step": 6348800, "num_episodes": 607, "mean_reward": 1004.4655008125305, "mean_length": 16207.38, "survival_pct": 0.3241476, "max_steps": 50000, "loss": 0.012079894542694092, "sps": 780.2026645895013 }, { "update": 1555, "global_step": 6369280, "num_episodes": 609, "mean_reward": 1009.2189987373353, "mean_length": 16649.64, "survival_pct": 0.3329928, "max_steps": 50000, "loss": 0.06771233677864075, "sps": 827.805833049566 }, { "update": 1560, "global_step": 6389760, "num_episodes": 609, "mean_reward": 1009.2189987373353, "mean_length": 16649.64, "survival_pct": 0.3329928, "max_steps": 50000, "loss": 0.09607579559087753, "sps": 794.12397331076 }, { "update": 1565, "global_step": 6410240, "num_episodes": 612, "mean_reward": 972.2015235805511, "mean_length": 16863.26, "survival_pct": 0.3372652, "max_steps": 50000, "loss": 31.345748901367188, "sps": 575.1634041964817 }, { "update": 1570, "global_step": 6430720, "num_episodes": 613, "mean_reward": 977.1921964931488, "mean_length": 17359.13, "survival_pct": 0.3471826, "max_steps": 50000, "loss": 62.981990814208984, "sps": 467.3951752643231 }, { "update": 1575, "global_step": 6451200, "num_episodes": 613, "mean_reward": 977.1921964931488, "mean_length": 17359.13, "survival_pct": 0.3471826, "max_steps": 50000, "loss": 79.36112976074219, "sps": 427.6025784582037 }, { "update": 1580, "global_step": 6471680, "num_episodes": 613, "mean_reward": 977.1921964931488, "mean_length": 17359.13, "survival_pct": 0.3471826, "max_steps": 50000, "loss": 0.3020017147064209, "sps": 667.9687302715897 }, { "update": 1585, "global_step": 6492160, "num_episodes": 614, "mean_reward": 1108.4623094081878, "mean_length": 17559.13, "survival_pct": 0.3511826, "max_steps": 50000, "loss": 30.998226165771484, "sps": 597.6820954824248 }, { "update": 1590, "global_step": 6512640, "num_episodes": 614, "mean_reward": 1108.4623094081878, "mean_length": 17559.13, "survival_pct": 0.3511826, "max_steps": 50000, "loss": 0.0028700977563858032, "sps": 696.7986031474608 }, { "update": 1595, "global_step": 6533120, "num_episodes": 615, "mean_reward": 1119.9435447597505, "mean_length": 17759.13, "survival_pct": 0.3551826, "max_steps": 50000, "loss": 7.728819847106934, "sps": 618.6918228629329 }, { "update": 1600, "global_step": 6553600, "num_episodes": 617, "mean_reward": 1121.774534635544, "mean_length": 17943.08, "survival_pct": 0.35886160000000006, "max_steps": 50000, "loss": 1.3815250396728516, "sps": 548.2307499735696 }, { "update": 1605, "global_step": 6574080, "num_episodes": 620, "mean_reward": 1160.0132136058808, "mean_length": 18332.17, "survival_pct": 0.36664339999999995, "max_steps": 50000, "loss": 53.58154296875, "sps": 218.35284346944567 }, { "update": 1610, "global_step": 6594560, "num_episodes": 620, "mean_reward": 1160.0132136058808, "mean_length": 18332.17, "survival_pct": 0.36664339999999995, "max_steps": 50000, "loss": 198.85508728027344, "sps": 414.0528781364895 }, { "update": 1615, "global_step": 6615040, "num_episodes": 620, "mean_reward": 1160.0132136058808, "mean_length": 18332.17, "survival_pct": 0.36664339999999995, "max_steps": 50000, "loss": 0.6458793878555298, "sps": 468.4840673845562 }, { "update": 1620, "global_step": 6635520, "num_episodes": 620, "mean_reward": 1160.0132136058808, "mean_length": 18332.17, "survival_pct": 0.36664339999999995, "max_steps": 50000, "loss": 4.100710868835449, "sps": 453.53569760987244 }, { "update": 1625, "global_step": 6656000, "num_episodes": 620, "mean_reward": 1160.0132136058808, "mean_length": 18332.17, "survival_pct": 0.36664339999999995, "max_steps": 50000, "loss": 8.949458122253418, "sps": 445.9539348920468 }, { "update": 1630, "global_step": 6676480, "num_episodes": 620, "mean_reward": 1160.0132136058808, "mean_length": 18332.17, "survival_pct": 0.36664339999999995, "max_steps": 50000, "loss": 12.222755432128906, "sps": 454.9897483249271 }, { "update": 1635, "global_step": 6696960, "num_episodes": 622, "mean_reward": 1161.4788415908813, "mean_length": 18470.78, "survival_pct": 0.36941559999999996, "max_steps": 50000, "loss": 0.15580856800079346, "sps": 433.21338637539577 }, { "update": 1640, "global_step": 6717440, "num_episodes": 622, "mean_reward": 1161.4788415908813, "mean_length": 18470.78, "survival_pct": 0.36941559999999996, "max_steps": 50000, "loss": -0.03489271551370621, "sps": 424.3238296174212 }, { "update": 1645, "global_step": 6737920, "num_episodes": 622, "mean_reward": 1161.4788415908813, "mean_length": 18470.78, "survival_pct": 0.36941559999999996, "max_steps": 50000, "loss": -0.022046178579330444, "sps": 411.0249793122232 }, { "update": 1650, "global_step": 6758400, "num_episodes": 623, "mean_reward": 1166.8129835891723, "mean_length": 18963.37, "survival_pct": 0.3792674, "max_steps": 50000, "loss": 147.39236450195312, "sps": 388.6099849730296 }, { "update": 1655, "global_step": 6778880, "num_episodes": 623, "mean_reward": 1166.8129835891723, "mean_length": 18963.37, "survival_pct": 0.3792674, "max_steps": 50000, "loss": 0.3184221386909485, "sps": 559.058329817769 }, { "update": 1660, "global_step": 6799360, "num_episodes": 623, "mean_reward": 1166.8129835891723, "mean_length": 18963.37, "survival_pct": 0.3792674, "max_steps": 50000, "loss": 0.13589444756507874, "sps": 524.1350206501007 }, { "update": 1665, "global_step": 6819840, "num_episodes": 624, "mean_reward": 1224.4580519485473, "mean_length": 19163.37, "survival_pct": 0.3832674, "max_steps": 50000, "loss": 0.28983187675476074, "sps": 520.1970206412298 }, { "update": 1670, "global_step": 6840320, "num_episodes": 625, "mean_reward": 1325.5696249008179, "mean_length": 19621.96, "survival_pct": 0.3924392, "max_steps": 50000, "loss": -0.04390272498130798, "sps": 600.9722666327863 }, { "update": 1675, "global_step": 6860800, "num_episodes": 625, "mean_reward": 1325.5696249008179, "mean_length": 19621.96, "survival_pct": 0.3924392, "max_steps": 50000, "loss": 0.029237419366836548, "sps": 619.2145461020093 }, { "update": 1680, "global_step": 6881280, "num_episodes": 625, "mean_reward": 1325.5696249008179, "mean_length": 19621.96, "survival_pct": 0.3924392, "max_steps": 50000, "loss": 0.28174102306365967, "sps": 619.1091990093024 }, { "update": 1685, "global_step": 6901760, "num_episodes": 626, "mean_reward": 1263.1981332015991, "mean_length": 19821.96, "survival_pct": 0.3964392, "max_steps": 50000, "loss": 0.4367483854293823, "sps": 626.916632808775 }, { "update": 1690, "global_step": 6922240, "num_episodes": 626, "mean_reward": 1263.1981332015991, "mean_length": 19821.96, "survival_pct": 0.3964392, "max_steps": 50000, "loss": 0.1575974076986313, "sps": 618.4480121190815 }, { "update": 1695, "global_step": 6942720, "num_episodes": 627, "mean_reward": 1294.6462133026123, "mean_length": 20320.93, "survival_pct": 0.4064186, "max_steps": 50000, "loss": 9.566240310668945, "sps": 406.6387424437816 }, { "update": 1700, "global_step": 6963200, "num_episodes": 628, "mean_reward": 1280.1067363739014, "mean_length": 20520.93, "survival_pct": 0.4104186, "max_steps": 50000, "loss": 0.709872841835022, "sps": 442.501654087899 }, { "update": 1705, "global_step": 6983680, "num_episodes": 631, "mean_reward": 1386.179150118828, "mean_length": 20721.63, "survival_pct": 0.41443260000000004, "max_steps": 50000, "loss": 19.365644454956055, "sps": 477.75886736803943 }, { "update": 1710, "global_step": 7004160, "num_episodes": 631, "mean_reward": 1386.179150118828, "mean_length": 20721.63, "survival_pct": 0.41443260000000004, "max_steps": 50000, "loss": 108.79446411132812, "sps": 612.149128502162 }, { "update": 1715, "global_step": 7024640, "num_episodes": 631, "mean_reward": 1386.179150118828, "mean_length": 20721.63, "survival_pct": 0.41443260000000004, "max_steps": 50000, "loss": 0.14560824632644653, "sps": 695.5005180060909 }, { "update": 1720, "global_step": 7045120, "num_episodes": 631, "mean_reward": 1386.179150118828, "mean_length": 20721.63, "survival_pct": 0.41443260000000004, "max_steps": 50000, "loss": -0.01733715832233429, "sps": 691.5803550233451 }, { "update": 1725, "global_step": 7065600, "num_episodes": 631, "mean_reward": 1386.179150118828, "mean_length": 20721.63, "survival_pct": 0.41443260000000004, "max_steps": 50000, "loss": 0.15833212435245514, "sps": 667.4815281045276 }, { "update": 1730, "global_step": 7086080, "num_episodes": 633, "mean_reward": 1468.0781693506242, "mean_length": 20922.8, "survival_pct": 0.418456, "max_steps": 50000, "loss": 11.883382797241211, "sps": 581.8314657800905 }, { "update": 1735, "global_step": 7106560, "num_episodes": 633, "mean_reward": 1468.0781693506242, "mean_length": 20922.8, "survival_pct": 0.418456, "max_steps": 50000, "loss": 1.1643352508544922, "sps": 681.4017891653039 }, { "update": 1740, "global_step": 7127040, "num_episodes": 633, "mean_reward": 1468.0781693506242, "mean_length": 20922.8, "survival_pct": 0.418456, "max_steps": 50000, "loss": -0.016278870403766632, "sps": 777.0781892710015 }, { "update": 1745, "global_step": 7147520, "num_episodes": 633, "mean_reward": 1468.0781693506242, "mean_length": 20922.8, "survival_pct": 0.418456, "max_steps": 50000, "loss": -0.061865031719207764, "sps": 774.9591726476483 }, { "update": 1750, "global_step": 7168000, "num_episodes": 638, "mean_reward": 1506.0245201086998, "mean_length": 20610.96, "survival_pct": 0.4122192, "max_steps": 50000, "loss": 2.459982395172119, "sps": 545.323321230957 }, { "update": 1755, "global_step": 7188480, "num_episodes": 638, "mean_reward": 1506.0245201086998, "mean_length": 20610.96, "survival_pct": 0.4122192, "max_steps": 50000, "loss": 0.11832943558692932, "sps": 817.3067294637037 }, { "update": 1760, "global_step": 7208960, "num_episodes": 640, "mean_reward": 1516.9915149474143, "mean_length": 21111.92, "survival_pct": 0.42223839999999996, "max_steps": 50000, "loss": 17.96449851989746, "sps": 621.1327524456565 }, { "update": 1765, "global_step": 7229440, "num_episodes": 642, "mean_reward": 1570.5918082213402, "mean_length": 21605.81, "survival_pct": 0.4321162, "max_steps": 50000, "loss": 886.5294189453125, "sps": 239.42082440770906 }, { "update": 1770, "global_step": 7249920, "num_episodes": 647, "mean_reward": 1521.5539734148979, "mean_length": 20475.3, "survival_pct": 0.409506, "max_steps": 50000, "loss": 179.85931396484375, "sps": 298.4259022731802 }, { "update": 1775, "global_step": 7270400, "num_episodes": 647, "mean_reward": 1521.5539734148979, "mean_length": 20475.3, "survival_pct": 0.409506, "max_steps": 50000, "loss": 966.6091918945312, "sps": 537.2474721109993 }, { "update": 1780, "global_step": 7290880, "num_episodes": 648, "mean_reward": 1564.6685786104201, "mean_length": 20973.13, "survival_pct": 0.4194626, "max_steps": 50000, "loss": 62.704280853271484, "sps": 509.433704200731 }, { "update": 1785, "global_step": 7311360, "num_episodes": 648, "mean_reward": 1564.6685786104201, "mean_length": 20973.13, "survival_pct": 0.4194626, "max_steps": 50000, "loss": -0.05052866041660309, "sps": 655.9797605411271 }, { "update": 1790, "global_step": 7331840, "num_episodes": 649, "mean_reward": 1654.446047320366, "mean_length": 21472.42, "survival_pct": 0.42944839999999995, "max_steps": 50000, "loss": 107.669677734375, "sps": 447.78139010542594 }, { "update": 1795, "global_step": 7352320, "num_episodes": 650, "mean_reward": 1661.2120521116258, "mean_length": 21959.9, "survival_pct": 0.43919800000000003, "max_steps": 50000, "loss": 116.32649993896484, "sps": 581.3285585073751 }, { "update": 1800, "global_step": 7372800, "num_episodes": 650, "mean_reward": 1661.2120521116258, "mean_length": 21959.9, "survival_pct": 0.43919800000000003, "max_steps": 50000, "loss": -0.1900792270898819, "sps": 738.826346232058 }, { "update": 1805, "global_step": 7393280, "num_episodes": 650, "mean_reward": 1661.2120521116258, "mean_length": 21959.9, "survival_pct": 0.36599833333333337, "max_steps": 60000, "loss": -0.12798017263412476, "sps": 735.1636673358219 }, { "update": 1810, "global_step": 7413760, "num_episodes": 650, "mean_reward": 1661.2120521116258, "mean_length": 21959.9, "survival_pct": 0.36599833333333337, "max_steps": 60000, "loss": -0.17347615957260132, "sps": 710.1339399692507 }, { "update": 1815, "global_step": 7434240, "num_episodes": 650, "mean_reward": 1661.2120521116258, "mean_length": 21959.9, "survival_pct": 0.36599833333333337, "max_steps": 60000, "loss": -0.04106990993022919, "sps": 703.2321739929172 }, { "update": 1820, "global_step": 7454720, "num_episodes": 651, "mean_reward": 1667.2202544736863, "mean_length": 22557.46, "survival_pct": 0.37595766666666663, "max_steps": 60000, "loss": 1.203838586807251, "sps": 608.5787796882346 }, { "update": 1825, "global_step": 7475200, "num_episodes": 651, "mean_reward": 1667.2202544736863, "mean_length": 22557.46, "survival_pct": 0.37595766666666663, "max_steps": 60000, "loss": -0.09379199892282486, "sps": 656.6381880745341 }, { "update": 1830, "global_step": 7495680, "num_episodes": 651, "mean_reward": 1667.2202544736863, "mean_length": 22557.46, "survival_pct": 0.37595766666666663, "max_steps": 60000, "loss": 0.08595463633537292, "sps": 696.5027182462075 }, { "update": 1835, "global_step": 7516160, "num_episodes": 651, "mean_reward": 1667.2202544736863, "mean_length": 22557.46, "survival_pct": 0.37595766666666663, "max_steps": 60000, "loss": -0.18902313709259033, "sps": 686.4702117808695 }, { "update": 1840, "global_step": 7536640, "num_episodes": 651, "mean_reward": 1667.2202544736863, "mean_length": 22557.46, "survival_pct": 0.37595766666666663, "max_steps": 60000, "loss": -0.18000459671020508, "sps": 663.4220467173932 }, { "update": 1845, "global_step": 7557120, "num_episodes": 651, "mean_reward": 1667.2202544736863, "mean_length": 22557.46, "survival_pct": 0.37595766666666663, "max_steps": 60000, "loss": -0.09385547041893005, "sps": 687.3361936552064 }, { "update": 1850, "global_step": 7577600, "num_episodes": 655, "mean_reward": 1615.1273900747299, "mean_length": 22353.29, "survival_pct": 0.37255483333333334, "max_steps": 60000, "loss": 0.5962892174720764, "sps": 577.3992316901965 }, { "update": 1855, "global_step": 7598080, "num_episodes": 655, "mean_reward": 1615.1273900747299, "mean_length": 22353.29, "survival_pct": 0.37255483333333334, "max_steps": 60000, "loss": 30.23639488220215, "sps": 604.6227237743161 }, { "update": 1860, "global_step": 7618560, "num_episodes": 655, "mean_reward": 1615.1273900747299, "mean_length": 22353.29, "survival_pct": 0.37255483333333334, "max_steps": 60000, "loss": -0.19098417460918427, "sps": 683.5282670532649 }, { "update": 1865, "global_step": 7639040, "num_episodes": 655, "mean_reward": 1615.1273900747299, "mean_length": 22353.29, "survival_pct": 0.37255483333333334, "max_steps": 60000, "loss": -0.1761397421360016, "sps": 638.4061361181431 }, { "update": 1870, "global_step": 7659520, "num_episodes": 656, "mean_reward": 1621.1403862142563, "mean_length": 22950.87, "survival_pct": 0.3825145, "max_steps": 60000, "loss": 1.2790898084640503, "sps": 692.9472830895132 }, { "update": 1875, "global_step": 7680000, "num_episodes": 656, "mean_reward": 1621.1403862142563, "mean_length": 22950.87, "survival_pct": 0.3825145, "max_steps": 60000, "loss": 0.5525964498519897, "sps": 708.1301830175981 }, { "update": 1880, "global_step": 7700480, "num_episodes": 656, "mean_reward": 1621.1403862142563, "mean_length": 22950.87, "survival_pct": 0.3825145, "max_steps": 60000, "loss": 0.05596184730529785, "sps": 688.5831713801172 }, { "update": 1885, "global_step": 7720960, "num_episodes": 656, "mean_reward": 1621.1403862142563, "mean_length": 22950.87, "survival_pct": 0.3825145, "max_steps": 60000, "loss": -0.039150021970272064, "sps": 707.8312184620268 }, { "update": 1890, "global_step": 7741440, "num_episodes": 658, "mean_reward": 1769.352957472801, "mean_length": 23749.89, "survival_pct": 0.3958315, "max_steps": 60000, "loss": 1.1895029544830322, "sps": 799.8935632475255 }, { "update": 1895, "global_step": 7761920, "num_episodes": 658, "mean_reward": 1769.352957472801, "mean_length": 23749.89, "survival_pct": 0.3958315, "max_steps": 60000, "loss": 1.1538077592849731, "sps": 762.3956777604958 }, { "update": 1900, "global_step": 7782400, "num_episodes": 659, "mean_reward": 1825.4085091924667, "mean_length": 24347.31, "survival_pct": 0.4057885, "max_steps": 60000, "loss": 0.6419044137001038, "sps": 703.4263875262799 }, { "update": 1905, "global_step": 7802880, "num_episodes": 659, "mean_reward": 1825.4085091924667, "mean_length": 24347.31, "survival_pct": 0.4057885, "max_steps": 60000, "loss": 0.15757112205028534, "sps": 674.3401183927315 }, { "update": 1910, "global_step": 7823360, "num_episodes": 660, "mean_reward": 1898.589024977684, "mean_length": 24923.49, "survival_pct": 0.4153915, "max_steps": 60000, "loss": -0.06820769608020782, "sps": 831.2280725759563 }, { "update": 1915, "global_step": 7843840, "num_episodes": 664, "mean_reward": 1901.6304970765113, "mean_length": 24331.52, "survival_pct": 0.40552533333333335, "max_steps": 60000, "loss": -0.050920240581035614, "sps": 814.7046480629325 }, { "update": 1920, "global_step": 7864320, "num_episodes": 664, "mean_reward": 1901.6304970765113, "mean_length": 24331.52, "survival_pct": 0.40552533333333335, "max_steps": 60000, "loss": -0.14338093996047974, "sps": 799.537306564484 }, { "update": 1925, "global_step": 7884800, "num_episodes": 664, "mean_reward": 1901.6304970765113, "mean_length": 24331.52, "survival_pct": 0.40552533333333335, "max_steps": 60000, "loss": -0.11479577422142029, "sps": 803.4181345293225 }, { "update": 1930, "global_step": 7905280, "num_episodes": 664, "mean_reward": 1901.6304970765113, "mean_length": 24331.52, "survival_pct": 0.40552533333333335, "max_steps": 60000, "loss": -0.061470456421375275, "sps": 778.5318076168905 }, { "update": 1935, "global_step": 7925760, "num_episodes": 664, "mean_reward": 1901.6304970765113, "mean_length": 24331.52, "survival_pct": 0.40552533333333335, "max_steps": 60000, "loss": -0.17063158750534058, "sps": 797.1980171427649 }, { "update": 1940, "global_step": 7946240, "num_episodes": 665, "mean_reward": 1851.0390957093239, "mean_length": 24531.52, "survival_pct": 0.40885866666666665, "max_steps": 60000, "loss": 0.14837267994880676, "sps": 741.7946196855224 }, { "update": 1945, "global_step": 7966720, "num_episodes": 665, "mean_reward": 1851.0390957093239, "mean_length": 24531.52, "survival_pct": 0.40885866666666665, "max_steps": 60000, "loss": 0.07289181649684906, "sps": 799.7640530972891 }, { "update": 1950, "global_step": 7987200, "num_episodes": 665, "mean_reward": 1851.0390957093239, "mean_length": 24531.52, "survival_pct": 0.40885866666666665, "max_steps": 60000, "loss": -0.03141336888074875, "sps": 777.4373642953062 }, { "update": 1955, "global_step": 8007680, "num_episodes": 665, "mean_reward": 1851.0390957093239, "mean_length": 24531.52, "survival_pct": 0.40885866666666665, "max_steps": 60000, "loss": -0.1878751814365387, "sps": 787.4362527685952 }, { "update": 1960, "global_step": 8028160, "num_episodes": 665, "mean_reward": 1851.0390957093239, "mean_length": 24531.52, "survival_pct": 0.40885866666666665, "max_steps": 60000, "loss": 0.2337094247341156, "sps": 836.4509391010391 }, { "update": 1965, "global_step": 8048640, "num_episodes": 665, "mean_reward": 1851.0390957093239, "mean_length": 24531.52, "survival_pct": 0.40885866666666665, "max_steps": 60000, "loss": -0.1974252164363861, "sps": 825.3140040567519 }, { "update": 1970, "global_step": 8069120, "num_episodes": 666, "mean_reward": 1869.434642584324, "mean_length": 25128.01, "survival_pct": 0.41880016666666664, "max_steps": 60000, "loss": -0.07606863975524902, "sps": 811.4969102185196 }, { "update": 1975, "global_step": 8089600, "num_episodes": 666, "mean_reward": 1869.434642584324, "mean_length": 25128.01, "survival_pct": 0.41880016666666664, "max_steps": 60000, "loss": -0.14107482135295868, "sps": 865.2539173939119 }, { "update": 1980, "global_step": 8110080, "num_episodes": 666, "mean_reward": 1869.434642584324, "mean_length": 25128.01, "survival_pct": 0.41880016666666664, "max_steps": 60000, "loss": 0.04902653396129608, "sps": 835.4859345260967 }, { "update": 1985, "global_step": 8130560, "num_episodes": 667, "mean_reward": 1875.4432358384133, "mean_length": 25725.3, "survival_pct": 0.428755, "max_steps": 60000, "loss": 235.1592254638672, "sps": 462.53669720868015 }, { "update": 1990, "global_step": 8151040, "num_episodes": 667, "mean_reward": 1875.4432358384133, "mean_length": 25725.3, "survival_pct": 0.428755, "max_steps": 60000, "loss": 4.359109401702881, "sps": 570.3135079716526 }, { "update": 1995, "global_step": 8171520, "num_episodes": 667, "mean_reward": 1875.4432358384133, "mean_length": 25725.3, "survival_pct": 0.428755, "max_steps": 60000, "loss": 0.6510128378868103, "sps": 774.3925100979434 }, { "update": 2000, "global_step": 8192000, "num_episodes": 667, "mean_reward": 1875.4432358384133, "mean_length": 25725.3, "survival_pct": 0.428755, "max_steps": 60000, "loss": 0.2337443083524704, "sps": 781.7803733771449 }, { "update": 2005, "global_step": 8212480, "num_episodes": 667, "mean_reward": 1875.4432358384133, "mean_length": 25725.3, "survival_pct": 0.32156625, "max_steps": 80000, "loss": 0.05558648705482483, "sps": 760.4769751448292 }, { "update": 2010, "global_step": 8232960, "num_episodes": 667, "mean_reward": 1875.4432358384133, "mean_length": 25725.3, "survival_pct": 0.32156625, "max_steps": 80000, "loss": -0.010526187717914581, "sps": 782.7780404574484 }, { "update": 2015, "global_step": 8253440, "num_episodes": 667, "mean_reward": 1875.4432358384133, "mean_length": 25725.3, "survival_pct": 0.32156625, "max_steps": 80000, "loss": -0.0630020946264267, "sps": 770.7413751342134 }, { "update": 2020, "global_step": 8273920, "num_episodes": 667, "mean_reward": 1875.4432358384133, "mean_length": 25725.3, "survival_pct": 0.32156625, "max_steps": 80000, "loss": 0.22121278941631317, "sps": 791.5506888179068 }, { "update": 2025, "global_step": 8294400, "num_episodes": 667, "mean_reward": 1875.4432358384133, "mean_length": 25725.3, "survival_pct": 0.32156625, "max_steps": 80000, "loss": -0.09089811146259308, "sps": 778.5213294943511 }, { "update": 2030, "global_step": 8314880, "num_episodes": 667, "mean_reward": 1875.4432358384133, "mean_length": 25725.3, "survival_pct": 0.32156625, "max_steps": 80000, "loss": -0.16075977683067322, "sps": 755.6295349772931 }, { "update": 2035, "global_step": 8335360, "num_episodes": 667, "mean_reward": 1875.4432358384133, "mean_length": 25725.3, "survival_pct": 0.32156625, "max_steps": 80000, "loss": -0.158203586935997, "sps": 754.4182628574841 }, { "update": 2040, "global_step": 8355840, "num_episodes": 667, "mean_reward": 1875.4432358384133, "mean_length": 25725.3, "survival_pct": 0.32156625, "max_steps": 80000, "loss": -0.21690981090068817, "sps": 717.6735596436803 }, { "update": 2045, "global_step": 8376320, "num_episodes": 670, "mean_reward": 1922.8468935227395, "mean_length": 26134.49, "survival_pct": 0.326681125, "max_steps": 80000, "loss": 156.1393585205078, "sps": 323.2563608041114 }, { "update": 2050, "global_step": 8396800, "num_episodes": 670, "mean_reward": 1922.8468935227395, "mean_length": 26134.49, "survival_pct": 0.326681125, "max_steps": 80000, "loss": 0.16352523863315582, "sps": 454.8234843620012 }, { "update": 2055, "global_step": 8417280, "num_episodes": 671, "mean_reward": 1930.1808717942238, "mean_length": 26915.84, "survival_pct": 0.336448, "max_steps": 80000, "loss": -0.08801546692848206, "sps": 801.0593266816616 }, { "update": 2060, "global_step": 8437760, "num_episodes": 671, "mean_reward": 1930.1808717942238, "mean_length": 26915.84, "survival_pct": 0.336448, "max_steps": 80000, "loss": -0.13592661917209625, "sps": 779.2904032377721 }, { "update": 2065, "global_step": 8458240, "num_episodes": 672, "mean_reward": 1975.5371123337745, "mean_length": 27315.84, "survival_pct": 0.34144800000000003, "max_steps": 80000, "loss": -0.17474070191383362, "sps": 545.5720531350578 }, { "update": 2070, "global_step": 8478720, "num_episodes": 673, "mean_reward": 2013.7483372473716, "mean_length": 28111.01, "survival_pct": 0.351387625, "max_steps": 80000, "loss": 12.530708312988281, "sps": 499.4637464037537 }, { "update": 2075, "global_step": 8499200, "num_episodes": 673, "mean_reward": 2013.7483372473716, "mean_length": 28111.01, "survival_pct": 0.351387625, "max_steps": 80000, "loss": 0.8680031299591064, "sps": 530.5638999597164 }, { "update": 2080, "global_step": 8519680, "num_episodes": 673, "mean_reward": 2013.7483372473716, "mean_length": 28111.01, "survival_pct": 0.351387625, "max_steps": 80000, "loss": 19.798368453979492, "sps": 656.9070684694709 }, { "update": 2085, "global_step": 8540160, "num_episodes": 673, "mean_reward": 2013.7483372473716, "mean_length": 28111.01, "survival_pct": 0.351387625, "max_steps": 80000, "loss": -0.19701889157295227, "sps": 698.9096675564882 }, { "update": 2090, "global_step": 8560640, "num_episodes": 673, "mean_reward": 2013.7483372473716, "mean_length": 28111.01, "survival_pct": 0.351387625, "max_steps": 80000, "loss": -0.0173691064119339, "sps": 728.1424091253976 }, { "update": 2095, "global_step": 8581120, "num_episodes": 674, "mean_reward": 1971.775068204403, "mean_length": 28511.01, "survival_pct": 0.356387625, "max_steps": 80000, "loss": 2452.706787109375, "sps": 356.8059399544614 }, { "update": 2100, "global_step": 8601600, "num_episodes": 674, "mean_reward": 1971.775068204403, "mean_length": 28511.01, "survival_pct": 0.356387625, "max_steps": 80000, "loss": 14.414767265319824, "sps": 585.3881767861222 }, { "update": 2105, "global_step": 8622080, "num_episodes": 674, "mean_reward": 1971.775068204403, "mean_length": 28511.01, "survival_pct": 0.356387625, "max_steps": 80000, "loss": 0.022661790251731873, "sps": 678.3736040372619 }, { "update": 2110, "global_step": 8642560, "num_episodes": 674, "mean_reward": 1971.775068204403, "mean_length": 28511.01, "survival_pct": 0.356387625, "max_steps": 80000, "loss": -0.20638275146484375, "sps": 665.6932482303778 }, { "update": 2115, "global_step": 8663040, "num_episodes": 674, "mean_reward": 1971.775068204403, "mean_length": 28511.01, "survival_pct": 0.356387625, "max_steps": 80000, "loss": -0.1961488574743271, "sps": 661.1262168442004 }, { "update": 2120, "global_step": 8683520, "num_episodes": 674, "mean_reward": 1971.775068204403, "mean_length": 28511.01, "survival_pct": 0.356387625, "max_steps": 80000, "loss": -0.16852501034736633, "sps": 666.07765454515 }, { "update": 2125, "global_step": 8704000, "num_episodes": 675, "mean_reward": 1988.447893064022, "mean_length": 29306.48, "survival_pct": 0.366331, "max_steps": 80000, "loss": 0.14007116854190826, "sps": 633.2168784322698 }, { "update": 2130, "global_step": 8724480, "num_episodes": 675, "mean_reward": 1988.447893064022, "mean_length": 29306.48, "survival_pct": 0.366331, "max_steps": 80000, "loss": 0.08940272033214569, "sps": 621.5424559601437 }, { "update": 2135, "global_step": 8744960, "num_episodes": 675, "mean_reward": 1988.447893064022, "mean_length": 29306.48, "survival_pct": 0.366331, "max_steps": 80000, "loss": -0.16373726725578308, "sps": 689.1369758818673 }, { "update": 2140, "global_step": 8765440, "num_episodes": 678, "mean_reward": 2002.1588407492638, "mean_length": 29307.47, "survival_pct": 0.366343375, "max_steps": 80000, "loss": 9.728021621704102, "sps": 466.25151106127714 }, { "update": 2145, "global_step": 8785920, "num_episodes": 681, "mean_reward": 1996.5775413012504, "mean_length": 28885.87, "survival_pct": 0.361073375, "max_steps": 80000, "loss": 0.4610579311847687, "sps": 551.8281626137556 }, { "update": 2150, "global_step": 8806400, "num_episodes": 681, "mean_reward": 1996.5775413012504, "mean_length": 28885.87, "survival_pct": 0.361073375, "max_steps": 80000, "loss": 0.20792128145694733, "sps": 708.9972554003779 }, { "update": 2155, "global_step": 8826880, "num_episodes": 681, "mean_reward": 1996.5775413012504, "mean_length": 28885.87, "survival_pct": 0.361073375, "max_steps": 80000, "loss": -0.21552087366580963, "sps": 688.7493020847775 }, { "update": 2160, "global_step": 8847360, "num_episodes": 681, "mean_reward": 1996.5775413012504, "mean_length": 28885.87, "survival_pct": 0.361073375, "max_steps": 80000, "loss": -0.16776643693447113, "sps": 693.0826430290605 }, { "update": 2165, "global_step": 8867840, "num_episodes": 681, "mean_reward": 1996.5775413012504, "mean_length": 28885.87, "survival_pct": 0.361073375, "max_steps": 80000, "loss": 0.11363191902637482, "sps": 709.5181085639529 }, { "update": 2170, "global_step": 8888320, "num_episodes": 681, "mean_reward": 1996.5775413012504, "mean_length": 28885.87, "survival_pct": 0.361073375, "max_steps": 80000, "loss": 0.0609743595123291, "sps": 712.5366166562799 }, { "update": 2175, "global_step": 8908800, "num_episodes": 681, "mean_reward": 1996.5775413012504, "mean_length": 28885.87, "survival_pct": 0.361073375, "max_steps": 80000, "loss": 0.7529717087745667, "sps": 721.4643529853796 }, { "update": 2180, "global_step": 8929280, "num_episodes": 681, "mean_reward": 1996.5775413012504, "mean_length": 28885.87, "survival_pct": 0.361073375, "max_steps": 80000, "loss": 0.46089768409729004, "sps": 729.282506794838 }, { "update": 2185, "global_step": 8949760, "num_episodes": 681, "mean_reward": 1996.5775413012504, "mean_length": 28885.87, "survival_pct": 0.361073375, "max_steps": 80000, "loss": -0.04369640350341797, "sps": 731.0711424448074 }, { "update": 2190, "global_step": 8970240, "num_episodes": 681, "mean_reward": 1996.5775413012504, "mean_length": 28885.87, "survival_pct": 0.361073375, "max_steps": 80000, "loss": -0.2550358772277832, "sps": 741.8354272639722 }, { "update": 2195, "global_step": 8990720, "num_episodes": 681, "mean_reward": 1996.5775413012504, "mean_length": 28885.87, "survival_pct": 0.361073375, "max_steps": 80000, "loss": -0.1644577533006668, "sps": 712.1224425521518 }, { "update": 2200, "global_step": 9011200, "num_episodes": 682, "mean_reward": 2004.891281106472, "mean_length": 29684.96, "survival_pct": 0.371062, "max_steps": 80000, "loss": 19.648794174194336, "sps": 465.00511290277484 }, { "update": 2205, "global_step": 9031680, "num_episodes": 686, "mean_reward": 2088.85676854372, "mean_length": 30082.53, "survival_pct": 0.376031625, "max_steps": 80000, "loss": 164.72410583496094, "sps": 526.5227813400744 }, { "update": 2210, "global_step": 9052160, "num_episodes": 687, "mean_reward": 2035.9988553357125, "mean_length": 30482.53, "survival_pct": 0.381031625, "max_steps": 80000, "loss": 0.7927183508872986, "sps": 546.9580142534905 }, { "update": 2215, "global_step": 9072640, "num_episodes": 687, "mean_reward": 2035.9988553357125, "mean_length": 30482.53, "survival_pct": 0.381031625, "max_steps": 80000, "loss": 0.38622233271598816, "sps": 654.3004159066745 }, { "update": 2220, "global_step": 9093120, "num_episodes": 688, "mean_reward": 2065.251064198017, "mean_length": 30882.53, "survival_pct": 0.386031625, "max_steps": 80000, "loss": 0.734276533126831, "sps": 253.13496237874534 }, { "update": 2225, "global_step": 9113600, "num_episodes": 689, "mean_reward": 2089.462522289753, "mean_length": 31681.58, "survival_pct": 0.39601975, "max_steps": 80000, "loss": 5.1840643882751465, "sps": 407.23576672305194 }, { "update": 2230, "global_step": 9134080, "num_episodes": 689, "mean_reward": 2089.462522289753, "mean_length": 31681.58, "survival_pct": 0.39601975, "max_steps": 80000, "loss": 3.3036017417907715, "sps": 334.98140076678567 }, { "update": 2235, "global_step": 9154560, "num_episodes": 689, "mean_reward": 2089.462522289753, "mean_length": 31681.58, "survival_pct": 0.39601975, "max_steps": 80000, "loss": 12.958847045898438, "sps": 413.60512125042015 }, { "update": 2240, "global_step": 9175040, "num_episodes": 689, "mean_reward": 2089.462522289753, "mean_length": 31681.58, "survival_pct": 0.39601975, "max_steps": 80000, "loss": 11.410442352294922, "sps": 429.40010384163304 }, { "update": 2245, "global_step": 9195520, "num_episodes": 689, "mean_reward": 2089.462522289753, "mean_length": 31681.58, "survival_pct": 0.39601975, "max_steps": 80000, "loss": 0.4679904580116272, "sps": 442.22049673117283 }, { "update": 2250, "global_step": 9216000, "num_episodes": 690, "mean_reward": 2216.5914781308174, "mean_length": 32423.39, "survival_pct": 0.405292375, "max_steps": 80000, "loss": 13.81257152557373, "sps": 503.0567425680048 }, { "update": 2255, "global_step": 9236480, "num_episodes": 690, "mean_reward": 2216.5914781308174, "mean_length": 32423.39, "survival_pct": 0.405292375, "max_steps": 80000, "loss": 0.347695916891098, "sps": 582.6999735714041 }, { "update": 2260, "global_step": 9256960, "num_episodes": 690, "mean_reward": 2216.5914781308174, "mean_length": 32423.39, "survival_pct": 0.405292375, "max_steps": 80000, "loss": 0.276647686958313, "sps": 530.4694388918329 }, { "update": 2265, "global_step": 9277440, "num_episodes": 690, "mean_reward": 2216.5914781308174, "mean_length": 32423.39, "survival_pct": 0.405292375, "max_steps": 80000, "loss": 0.12728533148765564, "sps": 524.460969045791 }, { "update": 2270, "global_step": 9297920, "num_episodes": 690, "mean_reward": 2216.5914781308174, "mean_length": 32423.39, "survival_pct": 0.405292375, "max_steps": 80000, "loss": 0.27348071336746216, "sps": 540.9874205296653 }, { "update": 2275, "global_step": 9318400, "num_episodes": 690, "mean_reward": 2216.5914781308174, "mean_length": 32423.39, "survival_pct": 0.405292375, "max_steps": 80000, "loss": 0.07365398108959198, "sps": 560.7709890786243 }, { "update": 2280, "global_step": 9338880, "num_episodes": 692, "mean_reward": 2220.5268848729133, "mean_length": 32823.83, "survival_pct": 0.41029787500000003, "max_steps": 80000, "loss": 0.7585805654525757, "sps": 369.35670437401643 }, { "update": 2285, "global_step": 9359360, "num_episodes": 692, "mean_reward": 2220.5268848729133, "mean_length": 32823.83, "survival_pct": 0.41029787500000003, "max_steps": 80000, "loss": 84.29670715332031, "sps": 372.3955428292709 }, { "update": 2290, "global_step": 9379840, "num_episodes": 692, "mean_reward": 2220.5268848729133, "mean_length": 32823.83, "survival_pct": 0.41029787500000003, "max_steps": 80000, "loss": 0.22566762566566467, "sps": 391.1882509653406 }, { "update": 2295, "global_step": 9400320, "num_episodes": 692, "mean_reward": 2220.5268848729133, "mean_length": 32823.83, "survival_pct": 0.41029787500000003, "max_steps": 80000, "loss": 0.11116530001163483, "sps": 404.8368327596648 }, { "update": 2300, "global_step": 9420800, "num_episodes": 693, "mean_reward": 2245.6658289647103, "mean_length": 33605.52, "survival_pct": 0.42006899999999997, "max_steps": 80000, "loss": 14.594441413879395, "sps": 319.4636076671403 }, { "update": 2305, "global_step": 9441280, "num_episodes": 693, "mean_reward": 2245.6658289647103, "mean_length": 33605.52, "survival_pct": 0.42006899999999997, "max_steps": 80000, "loss": 4.601461410522461, "sps": 358.98227257652104 }, { "update": 2310, "global_step": 9461760, "num_episodes": 693, "mean_reward": 2245.6658289647103, "mean_length": 33605.52, "survival_pct": 0.42006899999999997, "max_steps": 80000, "loss": -0.08196337521076202, "sps": 397.59174172984365 }, { "update": 2315, "global_step": 9482240, "num_episodes": 693, "mean_reward": 2245.6658289647103, "mean_length": 33605.52, "survival_pct": 0.42006899999999997, "max_steps": 80000, "loss": -0.038676969707012177, "sps": 390.256940583996 }, { "update": 2320, "global_step": 9502720, "num_episodes": 693, "mean_reward": 2245.6658289647103, "mean_length": 33605.52, "survival_pct": 0.42006899999999997, "max_steps": 80000, "loss": -0.11568231880664825, "sps": 386.22805447511666 }, { "update": 2325, "global_step": 9523200, "num_episodes": 693, "mean_reward": 2245.6658289647103, "mean_length": 33605.52, "survival_pct": 0.42006899999999997, "max_steps": 80000, "loss": -0.09200012683868408, "sps": 393.62586076527043 }, { "update": 2330, "global_step": 9543680, "num_episodes": 693, "mean_reward": 2245.6658289647103, "mean_length": 33605.52, "survival_pct": 0.42006899999999997, "max_steps": 80000, "loss": -0.14359304308891296, "sps": 390.66784252973497 }, { "update": 2335, "global_step": 9564160, "num_episodes": 693, "mean_reward": 2245.6658289647103, "mean_length": 33605.52, "survival_pct": 0.42006899999999997, "max_steps": 80000, "loss": 0.18879307806491852, "sps": 395.8483711314571 }, { "update": 2340, "global_step": 9584640, "num_episodes": 693, "mean_reward": 2245.6658289647103, "mean_length": 33605.52, "survival_pct": 0.42006899999999997, "max_steps": 80000, "loss": -0.1260424256324768, "sps": 392.3815903936216 }, { "update": 2345, "global_step": 9605120, "num_episodes": 693, "mean_reward": 2245.6658289647103, "mean_length": 33605.52, "survival_pct": 0.42006899999999997, "max_steps": 80000, "loss": -0.09693819284439087, "sps": 394.7211181932461 }, { "update": 2350, "global_step": 9625600, "num_episodes": 693, "mean_reward": 2245.6658289647103, "mean_length": 33605.52, "survival_pct": 0.42006899999999997, "max_steps": 80000, "loss": -0.11026182770729065, "sps": 396.72162235901294 }, { "update": 2355, "global_step": 9646080, "num_episodes": 693, "mean_reward": 2245.6658289647103, "mean_length": 33605.52, "survival_pct": 0.42006899999999997, "max_steps": 80000, "loss": -0.1978517770767212, "sps": 389.2953386649375 }, { "update": 2360, "global_step": 9666560, "num_episodes": 697, "mean_reward": 2322.5986268925667, "mean_length": 34769.99, "survival_pct": 0.43462487499999997, "max_steps": 80000, "loss": 683.0784912109375, "sps": 168.6861821242579 }, { "update": 2365, "global_step": 9687040, "num_episodes": 698, "mean_reward": 2311.5085823369027, "mean_length": 35169.99, "survival_pct": 0.43962487499999997, "max_steps": 80000, "loss": 3170.56884765625, "sps": 235.4733420148456 }, { "update": 2370, "global_step": 9707520, "num_episodes": 699, "mean_reward": 2308.040635712147, "mean_length": 34804.58, "survival_pct": 0.43505725, "max_steps": 80000, "loss": 1.0950156450271606, "sps": 311.37399850720584 }, { "update": 2375, "global_step": 9728000, "num_episodes": 700, "mean_reward": 2335.846759979725, "mean_length": 35204.58, "survival_pct": 0.44005725, "max_steps": 80000, "loss": 1.3819221258163452, "sps": 322.7681723058591 }, { "update": 2380, "global_step": 9748480, "num_episodes": 700, "mean_reward": 2335.846759979725, "mean_length": 35204.58, "survival_pct": 0.44005725, "max_steps": 80000, "loss": 0.11061151325702667, "sps": 371.7560051115758 }, { "update": 2385, "global_step": 9768960, "num_episodes": 701, "mean_reward": 2415.715403892994, "mean_length": 36004.33, "survival_pct": 0.450054125, "max_steps": 80000, "loss": 0.04716165363788605, "sps": 404.5228919670066 }, { "update": 2390, "global_step": 9789440, "num_episodes": 701, "mean_reward": 2415.715403892994, "mean_length": 36004.33, "survival_pct": 0.450054125, "max_steps": 80000, "loss": 0.673524022102356, "sps": 438.07322870511223 }, { "update": 2395, "global_step": 9809920, "num_episodes": 701, "mean_reward": 2415.715403892994, "mean_length": 36004.33, "survival_pct": 0.450054125, "max_steps": 80000, "loss": 0.04936300218105316, "sps": 447.99792386063456 }, { "update": 2400, "global_step": 9830400, "num_episodes": 701, "mean_reward": 2415.715403892994, "mean_length": 36004.33, "survival_pct": 0.450054125, "max_steps": 80000, "loss": -0.05190306156873703, "sps": 451.9291955717441 }, { "update": 2405, "global_step": 9850880, "num_episodes": 701, "mean_reward": 2415.715403892994, "mean_length": 36004.33, "survival_pct": 0.450054125, "max_steps": 80000, "loss": -0.01377500593662262, "sps": 456.3292063646881 }, { "update": 2410, "global_step": 9871360, "num_episodes": 705, "mean_reward": 2415.0658296895026, "mean_length": 35966.34, "survival_pct": 0.44957924999999993, "max_steps": 80000, "loss": 7.559074401855469, "sps": 317.32403267612773 }, { "update": 2415, "global_step": 9891840, "num_episodes": 705, "mean_reward": 2415.0658296895026, "mean_length": 35966.34, "survival_pct": 0.44957924999999993, "max_steps": 80000, "loss": 1.5048213005065918, "sps": 332.4634487520527 }, { "update": 2420, "global_step": 9912320, "num_episodes": 705, "mean_reward": 2415.0658296895026, "mean_length": 35966.34, "survival_pct": 0.44957924999999993, "max_steps": 80000, "loss": 0.2138240784406662, "sps": 432.40076557163655 }, { "update": 2425, "global_step": 9932800, "num_episodes": 705, "mean_reward": 2415.0658296895026, "mean_length": 35966.34, "survival_pct": 0.44957924999999993, "max_steps": 80000, "loss": 0.21876683831214905, "sps": 448.3816453184398 }, { "update": 2430, "global_step": 9953280, "num_episodes": 705, "mean_reward": 2415.0658296895026, "mean_length": 35966.34, "survival_pct": 0.44957924999999993, "max_steps": 80000, "loss": -0.08095875382423401, "sps": 427.35281803394867 }, { "update": 2435, "global_step": 9973760, "num_episodes": 706, "mean_reward": 2496.027885582447, "mean_length": 36761.21, "survival_pct": 0.45951512499999997, "max_steps": 80000, "loss": 1.3263893127441406, "sps": 489.32343069605076 }, { "update": 2440, "global_step": 9994240, "num_episodes": 706, "mean_reward": 2496.027885582447, "mean_length": 36761.21, "survival_pct": 0.45951512499999997, "max_steps": 80000, "loss": 0.8851792812347412, "sps": 507.1439259148593 }, { "update": 2445, "global_step": 10014720, "num_episodes": 706, "mean_reward": 2496.027885582447, "mean_length": 36761.21, "survival_pct": 0.45951512499999997, "max_steps": 80000, "loss": -0.022784769535064697, "sps": 514.2103065530886 }, { "update": 2450, "global_step": 10035200, "num_episodes": 706, "mean_reward": 2496.027885582447, "mean_length": 36761.21, "survival_pct": 0.45951512499999997, "max_steps": 80000, "loss": 0.03437136113643646, "sps": 498.98534084342003 }, { "update": 2455, "global_step": 10055680, "num_episodes": 708, "mean_reward": 2566.8018279957773, "mean_length": 36664.09, "survival_pct": 0.458301125, "max_steps": 80000, "loss": 42.47873306274414, "sps": 329.38816457132594 }, { "update": 2460, "global_step": 10076160, "num_episodes": 708, "mean_reward": 2566.8018279957773, "mean_length": 36664.09, "survival_pct": 0.458301125, "max_steps": 80000, "loss": -0.1173701360821724, "sps": 513.6460372429699 }, { "update": 2465, "global_step": 10096640, "num_episodes": 708, "mean_reward": 2566.8018279957773, "mean_length": 36664.09, "survival_pct": 0.458301125, "max_steps": 80000, "loss": -0.008197635412216187, "sps": 529.8279586174691 }, { "update": 2470, "global_step": 10117120, "num_episodes": 708, "mean_reward": 2566.8018279957773, "mean_length": 36664.09, "survival_pct": 0.458301125, "max_steps": 80000, "loss": -0.09317698329687119, "sps": 533.5564592749838 }, { "update": 2475, "global_step": 10137600, "num_episodes": 708, "mean_reward": 2566.8018279957773, "mean_length": 36664.09, "survival_pct": 0.458301125, "max_steps": 80000, "loss": -0.21753042936325073, "sps": 523.3857154825558 }, { "update": 2480, "global_step": 10158080, "num_episodes": 708, "mean_reward": 2566.8018279957773, "mean_length": 36664.09, "survival_pct": 0.458301125, "max_steps": 80000, "loss": -0.038853421807289124, "sps": 532.2887541618683 }, { "update": 2485, "global_step": 10178560, "num_episodes": 708, "mean_reward": 2566.8018279957773, "mean_length": 36664.09, "survival_pct": 0.458301125, "max_steps": 80000, "loss": -0.13897421956062317, "sps": 529.4232504246512 }, { "update": 2490, "global_step": 10199040, "num_episodes": 708, "mean_reward": 2566.8018279957773, "mean_length": 36664.09, "survival_pct": 0.458301125, "max_steps": 80000, "loss": -0.09001248329877853, "sps": 527.0291369571413 }, { "update": 2495, "global_step": 10219520, "num_episodes": 708, "mean_reward": 2566.8018279957773, "mean_length": 36664.09, "survival_pct": 0.458301125, "max_steps": 80000, "loss": -0.07257804274559021, "sps": 502.41772632138844 }, { "update": 2500, "global_step": 10240000, "num_episodes": 708, "mean_reward": 2566.8018279957773, "mean_length": 36664.09, "survival_pct": 0.458301125, "max_steps": 80000, "loss": -0.09625621140003204, "sps": 503.481571804067 }, { "update": 2505, "global_step": 10260480, "num_episodes": 708, "mean_reward": 2566.8018279957773, "mean_length": 36664.09, "survival_pct": 0.458301125, "max_steps": 80000, "loss": 0.5746378898620605, "sps": 511.2198490027179 }, { "update": 2510, "global_step": 10280960, "num_episodes": 708, "mean_reward": 2566.8018279957773, "mean_length": 36664.09, "survival_pct": 0.458301125, "max_steps": 80000, "loss": 0.2466697096824646, "sps": 502.732132447925 }, { "update": 2515, "global_step": 10301440, "num_episodes": 711, "mean_reward": 2659.348615782261, "mean_length": 36969.03, "survival_pct": 0.462112875, "max_steps": 80000, "loss": 18.992151260375977, "sps": 423.86791208132917 }, { "update": 2520, "global_step": 10321920, "num_episodes": 711, "mean_reward": 2659.348615782261, "mean_length": 36969.03, "survival_pct": 0.462112875, "max_steps": 80000, "loss": 1.12687087059021, "sps": 561.3134646893078 }, { "update": 2525, "global_step": 10342400, "num_episodes": 713, "mean_reward": 2761.6309762120245, "mean_length": 38056.0, "survival_pct": 0.4757, "max_steps": 80000, "loss": 2.7139060497283936, "sps": 570.6222365340182 }, { "update": 2530, "global_step": 10362880, "num_episodes": 713, "mean_reward": 2761.6309762120245, "mean_length": 38056.0, "survival_pct": 0.4757, "max_steps": 80000, "loss": 0.7590304613113403, "sps": 590.034373507516 }, { "update": 2535, "global_step": 10383360, "num_episodes": 720, "mean_reward": 2516.4986815714838, "mean_length": 36373.73, "survival_pct": 0.454671625, "max_steps": 80000, "loss": 6.213076591491699, "sps": 402.7930412963076 }, { "update": 2540, "global_step": 10403840, "num_episodes": 723, "mean_reward": 2544.001566827297, "mean_length": 36533.04, "survival_pct": 0.456663, "max_steps": 80000, "loss": 0.7334296703338623, "sps": 460.5953936249251 }, { "update": 2545, "global_step": 10424320, "num_episodes": 723, "mean_reward": 2544.001566827297, "mean_length": 36533.04, "survival_pct": 0.456663, "max_steps": 80000, "loss": 0.2690082788467407, "sps": 536.0100630883657 }, { "update": 2550, "global_step": 10444800, "num_episodes": 723, "mean_reward": 2544.001566827297, "mean_length": 36533.04, "survival_pct": 0.456663, "max_steps": 80000, "loss": -0.026896320283412933, "sps": 716.9425490948719 }, { "update": 2555, "global_step": 10465280, "num_episodes": 723, "mean_reward": 2544.001566827297, "mean_length": 36533.04, "survival_pct": 0.456663, "max_steps": 80000, "loss": -0.2345649003982544, "sps": 709.2426825661537 }, { "update": 2560, "global_step": 10485760, "num_episodes": 723, "mean_reward": 2544.001566827297, "mean_length": 36533.04, "survival_pct": 0.456663, "max_steps": 80000, "loss": -0.09797754883766174, "sps": 722.0229903452855 }, { "update": 2565, "global_step": 10506240, "num_episodes": 725, "mean_reward": 2377.9404914259912, "mean_length": 36339.3, "survival_pct": 0.45424125000000004, "max_steps": 80000, "loss": 8.174196243286133, "sps": 380.177542139671 }, { "update": 2570, "global_step": 10526720, "num_episodes": 729, "mean_reward": 2195.4616722035407, "mean_length": 34356.45, "survival_pct": 0.42945562499999995, "max_steps": 80000, "loss": -0.04960069805383682, "sps": 528.86594743839 }, { "update": 2575, "global_step": 10547200, "num_episodes": 729, "mean_reward": 2195.4616722035407, "mean_length": 34356.45, "survival_pct": 0.42945562499999995, "max_steps": 80000, "loss": -0.13887368142604828, "sps": 724.4350769243744 }, { "update": 2580, "global_step": 10567680, "num_episodes": 729, "mean_reward": 2195.4616722035407, "mean_length": 34356.45, "survival_pct": 0.42945562499999995, "max_steps": 80000, "loss": -0.08070817589759827, "sps": 710.3014121255162 }, { "update": 2585, "global_step": 10588160, "num_episodes": 729, "mean_reward": 2195.4616722035407, "mean_length": 34356.45, "survival_pct": 0.42945562499999995, "max_steps": 80000, "loss": 0.2522279620170593, "sps": 722.3160261788157 }, { "update": 2590, "global_step": 10608640, "num_episodes": 729, "mean_reward": 2195.4616722035407, "mean_length": 34356.45, "survival_pct": 0.42945562499999995, "max_steps": 80000, "loss": -0.10373173654079437, "sps": 669.264371724261 }, { "update": 2595, "global_step": 10629120, "num_episodes": 737, "mean_reward": 2092.385828053951, "mean_length": 34157.73, "survival_pct": 0.426971625, "max_steps": 80000, "loss": -0.1631798893213272, "sps": 758.9302511283099 }, { "update": 2600, "global_step": 10649600, "num_episodes": 737, "mean_reward": 2092.385828053951, "mean_length": 34157.73, "survival_pct": 0.426971625, "max_steps": 80000, "loss": -0.20927369594573975, "sps": 740.8227163674813 }, { "update": 2605, "global_step": 10670080, "num_episodes": 737, "mean_reward": 2092.385828053951, "mean_length": 34157.73, "survival_pct": 0.34157730000000003, "max_steps": 100000, "loss": -0.19633692502975464, "sps": 755.8358488017623 }, { "update": 2610, "global_step": 10690560, "num_episodes": 737, "mean_reward": 2092.385828053951, "mean_length": 34157.73, "survival_pct": 0.34157730000000003, "max_steps": 100000, "loss": -0.177586629986763, "sps": 741.6510280859512 }, { "update": 2615, "global_step": 10711040, "num_episodes": 737, "mean_reward": 2092.385828053951, "mean_length": 34157.73, "survival_pct": 0.34157730000000003, "max_steps": 100000, "loss": -0.21765384078025818, "sps": 727.5310686928377 }, { "update": 2620, "global_step": 10731520, "num_episodes": 737, "mean_reward": 2092.385828053951, "mean_length": 34157.73, "survival_pct": 0.34157730000000003, "max_steps": 100000, "loss": -0.0905933529138565, "sps": 741.0285660060694 }, { "update": 2625, "global_step": 10752000, "num_episodes": 737, "mean_reward": 2092.385828053951, "mean_length": 34157.73, "survival_pct": 0.34157730000000003, "max_steps": 100000, "loss": -0.0920332819223404, "sps": 730.4494102532749 }, { "update": 2630, "global_step": 10772480, "num_episodes": 737, "mean_reward": 2092.385828053951, "mean_length": 34157.73, "survival_pct": 0.34157730000000003, "max_steps": 100000, "loss": -0.16122817993164062, "sps": 720.0710211707521 }, { "update": 2635, "global_step": 10792960, "num_episodes": 737, "mean_reward": 2092.385828053951, "mean_length": 34157.73, "survival_pct": 0.34157730000000003, "max_steps": 100000, "loss": -0.11723538488149643, "sps": 703.7882071317721 }, { "update": 2640, "global_step": 10813440, "num_episodes": 737, "mean_reward": 2092.385828053951, "mean_length": 34157.73, "survival_pct": 0.34157730000000003, "max_steps": 100000, "loss": -0.18088921904563904, "sps": 692.1664885002592 }, { "update": 2645, "global_step": 10833920, "num_episodes": 737, "mean_reward": 2092.385828053951, "mean_length": 34157.73, "survival_pct": 0.34157730000000003, "max_steps": 100000, "loss": -0.17876286804676056, "sps": 684.4397604362919 }, { "update": 2650, "global_step": 10854400, "num_episodes": 738, "mean_reward": 2173.3602819800376, "mean_length": 35156.66, "survival_pct": 0.35156660000000006, "max_steps": 100000, "loss": 0.5544903874397278, "sps": 410.99817441737457 }, { "update": 2655, "global_step": 10874880, "num_episodes": 741, "mean_reward": 2107.5635332036018, "mean_length": 34163.47, "survival_pct": 0.3416347, "max_steps": 100000, "loss": 544.5499267578125, "sps": 460.02549342528056 }, { "update": 2660, "global_step": 10895360, "num_episodes": 741, "mean_reward": 2107.5635332036018, "mean_length": 34163.47, "survival_pct": 0.3416347, "max_steps": 100000, "loss": 0.021736368536949158, "sps": 548.2332167398129 }, { "update": 2665, "global_step": 10915840, "num_episodes": 741, "mean_reward": 2107.5635332036018, "mean_length": 34163.47, "survival_pct": 0.3416347, "max_steps": 100000, "loss": -0.050015464425086975, "sps": 527.6499326243032 }, { "update": 2670, "global_step": 10936320, "num_episodes": 741, "mean_reward": 2107.5635332036018, "mean_length": 34163.47, "survival_pct": 0.3416347, "max_steps": 100000, "loss": -0.1121983677148819, "sps": 510.22633473983234 }, { "update": 2675, "global_step": 10956800, "num_episodes": 741, "mean_reward": 2107.5635332036018, "mean_length": 34163.47, "survival_pct": 0.3416347, "max_steps": 100000, "loss": 0.46661272644996643, "sps": 565.3571380995694 }, { "update": 2680, "global_step": 10977280, "num_episodes": 741, "mean_reward": 2107.5635332036018, "mean_length": 34163.47, "survival_pct": 0.3416347, "max_steps": 100000, "loss": 0.4648454785346985, "sps": 572.308466885207 }, { "update": 2685, "global_step": 10997760, "num_episodes": 741, "mean_reward": 2107.5635332036018, "mean_length": 34163.47, "survival_pct": 0.3416347, "max_steps": 100000, "loss": 0.2607373595237732, "sps": 570.2676571961572 }, { "update": 2690, "global_step": 11018240, "num_episodes": 741, "mean_reward": 2107.5635332036018, "mean_length": 34163.47, "survival_pct": 0.3416347, "max_steps": 100000, "loss": 0.125162735581398, "sps": 566.7273297363138 }, { "update": 2695, "global_step": 11038720, "num_episodes": 741, "mean_reward": 2107.5635332036018, "mean_length": 34163.47, "survival_pct": 0.3416347, "max_steps": 100000, "loss": 0.059064000844955444, "sps": 552.6096670437072 }, { "update": 2700, "global_step": 11059200, "num_episodes": 741, "mean_reward": 2107.5635332036018, "mean_length": 34163.47, "survival_pct": 0.3416347, "max_steps": 100000, "loss": -0.03592519462108612, "sps": 605.1661001263743 }, { "update": 2705, "global_step": 11079680, "num_episodes": 741, "mean_reward": 2107.5635332036018, "mean_length": 34163.47, "survival_pct": 0.3416347, "max_steps": 100000, "loss": -0.05099225044250488, "sps": 607.4455279859145 }, { "update": 2710, "global_step": 11100160, "num_episodes": 742, "mean_reward": 2117.9644485020635, "mean_length": 35162.27, "survival_pct": 0.35162269999999995, "max_steps": 100000, "loss": -0.046167902648448944, "sps": 590.2299902607874 }, { "update": 2715, "global_step": 11120640, "num_episodes": 746, "mean_reward": 2117.223214428425, "mean_length": 35106.36, "survival_pct": 0.35106360000000003, "max_steps": 100000, "loss": 11.869409561157227, "sps": 396.9261046886479 }, { "update": 2720, "global_step": 11141120, "num_episodes": 749, "mean_reward": 2004.7705671191216, "mean_length": 36107.98, "survival_pct": 0.3610798, "max_steps": 100000, "loss": 156.38876342773438, "sps": 334.0244695201906 }, { "update": 2725, "global_step": 11161600, "num_episodes": 749, "mean_reward": 2004.7705671191216, "mean_length": 36107.98, "survival_pct": 0.3610798, "max_steps": 100000, "loss": 2.8302462100982666, "sps": 414.56266910509237 }, { "update": 2730, "global_step": 11182080, "num_episodes": 749, "mean_reward": 2004.7705671191216, "mean_length": 36107.98, "survival_pct": 0.3610798, "max_steps": 100000, "loss": 2.787081003189087, "sps": 444.05090888582356 }, { "update": 2735, "global_step": 11202560, "num_episodes": 757, "mean_reward": 1866.676198823452, "mean_length": 35228.65, "survival_pct": 0.3522865, "max_steps": 100000, "loss": 20.099119186401367, "sps": 219.7197521516745 }, { "update": 2740, "global_step": 11223040, "num_episodes": 757, "mean_reward": 1866.676198823452, "mean_length": 35228.65, "survival_pct": 0.3522865, "max_steps": 100000, "loss": 24.235774993896484, "sps": 364.1255592485544 }, { "update": 2745, "global_step": 11243520, "num_episodes": 757, "mean_reward": 1866.676198823452, "mean_length": 35228.65, "survival_pct": 0.3522865, "max_steps": 100000, "loss": 10.114006996154785, "sps": 485.1483243292639 }, { "update": 2750, "global_step": 11264000, "num_episodes": 757, "mean_reward": 1866.676198823452, "mean_length": 35228.65, "survival_pct": 0.3522865, "max_steps": 100000, "loss": 17.78866958618164, "sps": 492.4805186782399 } ]