diff --git "a/training_log.json" "b/training_log.json" --- "a/training_log.json" +++ "b/training_log.json" @@ -2,4465 +2,6051 @@ { "update": 5, "global_step": 20480, - "num_episodes": 12, - "mean_reward": 5.371756076812744, - "mean_length": 259.75, - "loss": -0.07715612649917603, - "sps": 3197.8944336854393 + "num_episodes": 5, + "mean_reward": -0.4786701202392578, + "mean_length": 232.8, + "survival_pct": 0.023280000000000002, + "max_steps": 10000, + "loss": 0.12873922288417816, + "sps": 2902.7821104130076 }, { "update": 10, "global_step": 40960, - "num_episodes": 12, - "mean_reward": 5.371756076812744, - "mean_length": 259.75, - "loss": -0.05466647446155548, - "sps": 3473.7621158486245 + "num_episodes": 5, + "mean_reward": -0.4786701202392578, + "mean_length": 232.8, + "survival_pct": 0.023280000000000002, + "max_steps": 10000, + "loss": 1.048768401145935, + "sps": 2989.934249487722 }, { "update": 15, "global_step": 61440, - "num_episodes": 12, - "mean_reward": 5.371756076812744, - "mean_length": 259.75, - "loss": -0.12925735116004944, - "sps": 3498.48333768441 + "num_episodes": 5, + "mean_reward": -0.4786701202392578, + "mean_length": 232.8, + "survival_pct": 0.023280000000000002, + "max_steps": 10000, + "loss": 0.4728538990020752, + "sps": 2892.3409931007545 }, { "update": 20, "global_step": 81920, - "num_episodes": 17, - "mean_reward": 23.03933811187744, - "mean_length": 2539.294117647059, - "loss": 0.018057599663734436, - "sps": 1650.518983995032 + "num_episodes": 13, + "mean_reward": 70.43313554617075, + "mean_length": 4721.461538461538, + "survival_pct": 0.4721461538461538, + "max_steps": 10000, + "loss": 0.8401235342025757, + "sps": 1363.4104809447254 }, { "update": 25, "global_step": 102400, - "num_episodes": 36, - "mean_reward": 22.05249885718028, - "mean_length": 2419.527777777778, - "loss": 1.0680813789367676, - "sps": 2943.2734013432255 + "num_episodes": 25, + "mean_reward": 49.477407665252684, + "mean_length": 3493.36, + "survival_pct": 0.34933600000000004, + "max_steps": 10000, + "loss": 2.6180195808410645, + "sps": 1433.9218398141863 }, { "update": 30, "global_step": 122880, - "num_episodes": 36, - "mean_reward": 22.05249885718028, - "mean_length": 2419.527777777778, - "loss": -0.11745010316371918, - "sps": 3506.3540771064972 + "num_episodes": 25, + "mean_reward": 49.477407665252684, + "mean_length": 3493.36, + "survival_pct": 0.34933600000000004, + "max_steps": 10000, + "loss": -0.17261816561222076, + "sps": 2402.5672121197713 }, { "update": 35, "global_step": 143360, - "num_episodes": 36, - "mean_reward": 22.05249885718028, - "mean_length": 2419.527777777778, - "loss": 0.011805668473243713, - "sps": 3463.1128670942708 + "num_episodes": 25, + "mean_reward": 49.477407665252684, + "mean_length": 3493.36, + "survival_pct": 0.34933600000000004, + "max_steps": 10000, + "loss": -0.09139963984489441, + "sps": 2342.5616398383595 }, { "update": 40, "global_step": 163840, - "num_episodes": 39, - "mean_reward": 28.794600401169216, - "mean_length": 3002.641025641026, - "loss": 0.10533764958381653, - "sps": 1986.6795686952885 + "num_episodes": 29, + "mean_reward": 63.332810089505955, + "mean_length": 4053.3793103448274, + "survival_pct": 0.40533793103448273, + "max_steps": 10000, + "loss": 2.368999719619751, + "sps": 803.2009899550957 }, { "update": 45, "global_step": 184320, - "num_episodes": 50, - "mean_reward": 34.32792649269104, - "mean_length": 3376.94, - "loss": 1.039185881614685, - "sps": 3203.1897121265147 + "num_episodes": 48, + "mean_reward": 61.90170055627823, + "mean_length": 3634.7916666666665, + "survival_pct": 0.36347916666666663, + "max_steps": 10000, + "loss": 25.510446548461914, + "sps": 876.6005063299407 }, { "update": 50, "global_step": 204800, - "num_episodes": 50, - "mean_reward": 34.32792649269104, - "mean_length": 3376.94, - "loss": 0.3282562494277954, - "sps": 3259.1573788510946 + "num_episodes": 51, + "mean_reward": 58.35679834964228, + "mean_length": 3464.156862745098, + "survival_pct": 0.3464156862745098, + "max_steps": 10000, + "loss": 1.516045331954956, + "sps": 2241.345890706289 }, { "update": 55, "global_step": 225280, - "num_episodes": 50, - "mean_reward": 34.32792649269104, - "mean_length": 3376.94, - "loss": 0.04704876244068146, - "sps": 3216.591971265392 + "num_episodes": 51, + "mean_reward": 58.35679834964228, + "mean_length": 3464.156862745098, + "survival_pct": 0.3464156862745098, + "max_steps": 10000, + "loss": -0.1645357310771942, + "sps": 2287.8772324784622 }, { "update": 60, "global_step": 245760, - "num_episodes": 54, - "mean_reward": 37.31432532381128, - "mean_length": 3500.074074074074, - "loss": 1.8284252882003784, - "sps": 2234.903286880901 + "num_episodes": 55, + "mean_reward": 61.13233257640492, + "mean_length": 3583.0363636363636, + "survival_pct": 0.3583036363636364, + "max_steps": 10000, + "loss": 1.9156525135040283, + "sps": 1777.0489161780952 }, { "update": 65, "global_step": 266240, "num_episodes": 65, - "mean_reward": 49.254399849818306, - "mean_length": 3849.723076923077, - "loss": 0.3701796531677246, - "sps": 2919.281950407853 + "mean_reward": 70.67225723266601, + "mean_length": 3672.6153846153848, + "survival_pct": 0.3672615384615385, + "max_steps": 10000, + "loss": 0.939544141292572, + "sps": 2089.239566218028 }, { "update": 70, "global_step": 286720, - "num_episodes": 65, - "mean_reward": 49.254399849818306, - "mean_length": 3849.723076923077, - "loss": 0.19717253744602203, - "sps": 2983.3501026207314 + "num_episodes": 71, + "mean_reward": 69.33942423404103, + "mean_length": 3655.2535211267605, + "survival_pct": 0.36552535211267606, + "max_steps": 10000, + "loss": 1.1759222745895386, + "sps": 2632.098762980766 }, { "update": 75, "global_step": 307200, - "num_episodes": 65, - "mean_reward": 49.254399849818306, - "mean_length": 3849.723076923077, - "loss": 1.6765296459197998, - "sps": 2929.146040695321 + "num_episodes": 71, + "mean_reward": 69.33942423404103, + "mean_length": 3655.2535211267605, + "survival_pct": 0.36552535211267606, + "max_steps": 10000, + "loss": 1.520628571510315, + "sps": 2482.8506672459066 }, { "update": 80, "global_step": 327680, - "num_episodes": 68, - "mean_reward": 53.68283286515404, - "mean_length": 4121.058823529412, - "loss": 0.2395431399345398, - "sps": 2769.2257028011463 + "num_episodes": 73, + "mean_reward": 73.5122941879377, + "mean_length": 3829.082191780822, + "survival_pct": 0.3829082191780822, + "max_steps": 10000, + "loss": 0.8090716004371643, + "sps": 1082.2416143589735 }, { "update": 85, "global_step": 348160, - "num_episodes": 80, - "mean_reward": 54.429239320755, - "mean_length": 4161.5125, - "loss": 21.42174530029297, - "sps": 2001.0349067939262 + "num_episodes": 83, + "mean_reward": 73.65270042993936, + "mean_length": 3811.9518072289156, + "survival_pct": 0.38119518072289155, + "max_steps": 10000, + "loss": 6.820394515991211, + "sps": 823.4620989576031 }, { "update": 90, "global_step": 368640, - "num_episodes": 80, - "mean_reward": 54.429239320755, - "mean_length": 4161.5125, - "loss": 1.0561028718948364, - "sps": 2419.50496157211 + "num_episodes": 91, + "mean_reward": 75.30516777981768, + "mean_length": 3846.164835164835, + "survival_pct": 0.38461648351648353, + "max_steps": 10000, + "loss": 2.671729803085327, + "sps": 1723.468236780256 }, { "update": 95, "global_step": 389120, - "num_episodes": 80, - "mean_reward": 54.429239320755, - "mean_length": 4161.5125, - "loss": 1.28178870677948, - "sps": 2387.100829239012 + "num_episodes": 91, + "mean_reward": 75.30516777981768, + "mean_length": 3846.164835164835, + "survival_pct": 0.38461648351648353, + "max_steps": 10000, + "loss": 15.690156936645508, + "sps": 1463.8510964098139 }, { "update": 100, "global_step": 409600, - "num_episodes": 83, - "mean_reward": 69.2969753311341, - "mean_length": 4253.614457831325, - "loss": 1.1579307317733765, - "sps": 1711.4232678830151 + "num_episodes": 91, + "mean_reward": 75.30516777981768, + "mean_length": 3846.164835164835, + "survival_pct": 0.38461648351648353, + "max_steps": 10000, + "loss": 0.16147078573703766, + "sps": 1544.6687513559787 }, { "update": 105, "global_step": 430080, - "num_episodes": 98, - "mean_reward": 67.86165303600077, - "mean_length": 4242.530612244898, - "loss": 2.247659683227539, - "sps": 3247.5957464867383 + "num_episodes": 95, + "mean_reward": 91.60696769011648, + "mean_length": 4105.273684210526, + "survival_pct": 0.4105273684210526, + "max_steps": 10000, + "loss": 0.28715068101882935, + "sps": 1491.869871137239 }, { "update": 110, "global_step": 450560, - "num_episodes": 98, - "mean_reward": 67.86165303600077, - "mean_length": 4242.530612244898, - "loss": 0.582718014717102, - "sps": 3180.2988030100887 + "num_episodes": 103, + "mean_reward": 112.06914116382599, + "mean_length": 4315.97, + "survival_pct": 0.431597, + "max_steps": 10000, + "loss": 0.6831140518188477, + "sps": 1664.9507448966885 }, { "update": 115, "global_step": 471040, - "num_episodes": 98, - "mean_reward": 67.86165303600077, - "mean_length": 4242.530612244898, - "loss": -0.026346325874328613, - "sps": 2964.689585145653 + "num_episodes": 103, + "mean_reward": 112.06914116382599, + "mean_length": 4315.97, + "survival_pct": 0.431597, + "max_steps": 10000, + "loss": -0.07581882178783417, + "sps": 2436.273752467313 }, { "update": 120, "global_step": 491520, - "num_episodes": 99, - "mean_reward": 68.72626714995413, - "mean_length": 4300.686868686868, - "loss": -0.047779276967048645, - "sps": 3043.7824660495194 + "num_episodes": 103, + "mean_reward": 112.06914116382599, + "mean_length": 4315.97, + "survival_pct": 0.431597, + "max_steps": 10000, + "loss": -0.12825502455234528, + "sps": 2290.0122957801336 }, { "update": 125, "global_step": 512000, - "num_episodes": 112, - "mean_reward": 79.7761773943901, - "mean_length": 4945.35, - "loss": 5.148533821105957, - "sps": 1330.97401420016 + "num_episodes": 109, + "mean_reward": 120.03564118385314, + "mean_length": 4411.64, + "survival_pct": 0.44116400000000006, + "max_steps": 10000, + "loss": 0.5430532097816467, + "sps": 2629.093479829129 }, { "update": 130, "global_step": 532480, - "num_episodes": 112, - "mean_reward": 79.7761773943901, - "mean_length": 4945.35, - "loss": -0.18798421323299408, - "sps": 3502.5924445773726 + "num_episodes": 115, + "mean_reward": 123.0210286808014, + "mean_length": 4319.07, + "survival_pct": 0.431907, + "max_steps": 10000, + "loss": 6.376620769500732, + "sps": 960.5544143123027 }, { "update": 135, "global_step": 552960, - "num_episodes": 112, - "mean_reward": 79.7761773943901, - "mean_length": 4945.35, - "loss": -0.08565455675125122, - "sps": 3461.274367919746 + "num_episodes": 116, + "mean_reward": 123.100150847435, + "mean_length": 4326.72, + "survival_pct": 0.432672, + "max_steps": 10000, + "loss": 0.21949227154254913, + "sps": 2445.2169291143578 }, { "update": 140, "global_step": 573440, "num_episodes": 116, - "mean_reward": 84.52449138879776, - "mean_length": 4847.52, - "loss": 2.0761420726776123, - "sps": 2729.742934114658 + "mean_reward": 123.100150847435, + "mean_length": 4326.72, + "survival_pct": 0.432672, + "max_steps": 10000, + "loss": -0.2501995265483856, + "sps": 2494.697519833577 }, { "update": 145, "global_step": 593920, - "num_episodes": 124, - "mean_reward": 93.81721450567245, - "mean_length": 5141.93, - "loss": 2.071385622024536, - "sps": 3031.550454700739 + "num_episodes": 123, + "mean_reward": 134.99672790527345, + "mean_length": 4791.57, + "survival_pct": 0.47915699999999994, + "max_steps": 10000, + "loss": 31.24036407470703, + "sps": 657.6329559456061 }, { "update": 150, "global_step": 614400, - "num_episodes": 124, - "mean_reward": 93.81721450567245, - "mean_length": 5141.93, - "loss": 3.72536039352417, - "sps": 3220.2233787373293 + "num_episodes": 129, + "mean_reward": 136.9602340936661, + "mean_length": 4811.37, + "survival_pct": 0.481137, + "max_steps": 10000, + "loss": 0.5206527709960938, + "sps": 2480.224703717002 }, { "update": 155, "global_step": 634880, - "num_episodes": 124, - "mean_reward": 93.81721450567245, - "mean_length": 5141.93, - "loss": 0.9640051126480103, - "sps": 3180.6185154137133 + "num_episodes": 132, + "mean_reward": 133.16078406333924, + "mean_length": 4618.29, + "survival_pct": 0.461829, + "max_steps": 10000, + "loss": 0.5726419687271118, + "sps": 2234.639911831208 }, { "update": 160, "global_step": 655360, - "num_episodes": 128, - "mean_reward": 96.66174763917923, - "mean_length": 5340.2, - "loss": 0.27916210889816284, - "sps": 3097.2108275565165 + "num_episodes": 132, + "mean_reward": 133.16078406333924, + "mean_length": 4618.29, + "survival_pct": 0.461829, + "max_steps": 10000, + "loss": -0.06972374022006989, + "sps": 2060.961397107338 }, { "update": 165, "global_step": 675840, - "num_episodes": 145, - "mean_reward": 96.82534897089005, - "mean_length": 5248.29, - "loss": 7.688024520874023, - "sps": 965.9878695065277 + "num_episodes": 139, + "mean_reward": 134.1688402891159, + "mean_length": 4694.54, + "survival_pct": 0.469454, + "max_steps": 10000, + "loss": 1.1694589853286743, + "sps": 2018.626635358524 }, { "update": 170, "global_step": 696320, - "num_episodes": 147, - "mean_reward": 96.301647002697, - "mean_length": 5159.77, - "loss": 0.46070918440818787, - "sps": 2784.851134794954 + "num_episodes": 144, + "mean_reward": 142.00250946283342, + "mean_length": 4984.25, + "survival_pct": 0.498425, + "max_steps": 10000, + "loss": 2.366715669631958, + "sps": 1592.928048802198 }, { "update": 175, "global_step": 716800, - "num_episodes": 147, - "mean_reward": 96.301647002697, - "mean_length": 5159.77, - "loss": -0.03485479950904846, - "sps": 2660.7421365023765 + "num_episodes": 146, + "mean_reward": 144.95669583559035, + "mean_length": 5095.59, + "survival_pct": 0.509559, + "max_steps": 10000, + "loss": 21.034530639648438, + "sps": 1872.9182768017681 }, { "update": 180, "global_step": 737280, - "num_episodes": 152, - "mean_reward": 100.57385118722915, - "mean_length": 5155.4, - "loss": 1.1904330253601074, - "sps": 2392.635470993461 + "num_episodes": 146, + "mean_reward": 144.95669583559035, + "mean_length": 5095.59, + "survival_pct": 0.509559, + "max_steps": 10000, + "loss": 0.10471776127815247, + "sps": 2076.429778308613 }, { "update": 185, "global_step": 757760, - "num_episodes": 159, - "mean_reward": 95.51411318063737, - "mean_length": 5157.77, - "loss": 0.7197635173797607, - "sps": 2797.608756484379 + "num_episodes": 152, + "mean_reward": 152.9122651386261, + "mean_length": 5266.79, + "survival_pct": 0.526679, + "max_steps": 10000, + "loss": 28.784088134765625, + "sps": 1428.3776386526256 }, { "update": 190, "global_step": 778240, - "num_episodes": 160, - "mean_reward": 94.83179949045181, - "mean_length": 5157.77, - "loss": 0.5011193156242371, - "sps": 2885.175874304794 + "num_episodes": 155, + "mean_reward": 159.41734125614167, + "mean_length": 5462.84, + "survival_pct": 0.546284, + "max_steps": 10000, + "loss": 0.5585659742355347, + "sps": 1503.145884656211 }, { "update": 195, "global_step": 798720, - "num_episodes": 160, - "mean_reward": 94.83179949045181, - "mean_length": 5157.77, - "loss": 0.7710214853286743, - "sps": 2916.656200352378 + "num_episodes": 158, + "mean_reward": 176.945558218956, + "mean_length": 5559.17, + "survival_pct": 0.555917, + "max_steps": 10000, + "loss": 48.71400451660156, + "sps": 522.6239066604847 }, { "update": 200, "global_step": 819200, - "num_episodes": 164, - "mean_reward": 95.26549025774003, - "mean_length": 5161.02, - "loss": 3.085784435272217, - "sps": 2609.085698942629 + "num_episodes": 159, + "mean_reward": 170.58766025066376, + "mean_length": 5475.19, + "survival_pct": 0.547519, + "max_steps": 10000, + "loss": 15.522979736328125, + "sps": 671.7228145587918 }, { "update": 205, "global_step": 839680, - "num_episodes": 180, - "mean_reward": 90.60398989439011, - "mean_length": 4882.37, - "loss": 25.020034790039062, - "sps": 278.88355834494627 + "num_episodes": 159, + "mean_reward": 170.58766025066376, + "mean_length": 5475.19, + "survival_pct": 0.36501266666666665, + "max_steps": 15000, + "loss": 4.25346040725708, + "sps": 708.7725246983115 }, { "update": 210, "global_step": 860160, - "num_episodes": 188, - "mean_reward": 79.05637022733688, - "mean_length": 4494.02, - "loss": 15.180160522460938, - "sps": 944.5016492069974 + "num_episodes": 159, + "mean_reward": 170.58766025066376, + "mean_length": 5475.19, + "survival_pct": 0.36501266666666665, + "max_steps": 15000, + "loss": 0.7638179659843445, + "sps": 658.5550006478268 }, { "update": 215, "global_step": 880640, - "num_episodes": 190, - "mean_reward": 79.10023557424546, - "mean_length": 4536.35, - "loss": 9.965888977050781, - "sps": 364.6418338592982 + "num_episodes": 163, + "mean_reward": 192.89881912708282, + "mean_length": 5817.41, + "survival_pct": 0.3878273333333333, + "max_steps": 15000, + "loss": 12.150495529174805, + "sps": 353.88007707970496 }, { "update": 220, "global_step": 901120, - "num_episodes": 197, - "mean_reward": 77.15829209089279, - "mean_length": 4451.16, - "loss": 5.519225597381592, - "sps": 452.03522870499233 + "num_episodes": 166, + "mean_reward": 207.2202451276779, + "mean_length": 6161.78, + "survival_pct": 0.41078533333333334, + "max_steps": 15000, + "loss": 39.93988037109375, + "sps": 320.11899245227085 }, { "update": 225, "global_step": 921600, - "num_episodes": 202, - "mean_reward": 75.52227295637131, - "mean_length": 4354.33, - "loss": 64.15535736083984, - "sps": 367.65986127646954 + "num_episodes": 170, + "mean_reward": 259.2956739234924, + "mean_length": 6429.44, + "survival_pct": 0.4286293333333333, + "max_steps": 15000, + "loss": 86.16336822509766, + "sps": 269.09665591603243 }, { "update": 230, "global_step": 942080, - "num_episodes": 208, - "mean_reward": 77.0743759560585, - "mean_length": 4288.63, - "loss": 136.16909790039062, - "sps": 352.9326136603177 + "num_episodes": 174, + "mean_reward": 253.5440968155861, + "mean_length": 6199.25, + "survival_pct": 0.41328333333333334, + "max_steps": 15000, + "loss": 95.76709747314453, + "sps": 432.8286117914245 }, { "update": 235, "global_step": 962560, - "num_episodes": 214, - "mean_reward": 74.43292628288269, - "mean_length": 4152.63, - "loss": 21.60038185119629, - "sps": 347.70005658972826 + "num_episodes": 175, + "mean_reward": 251.54071150064468, + "mean_length": 6138.37, + "survival_pct": 0.4092246666666667, + "max_steps": 15000, + "loss": 0.6741273999214172, + "sps": 2603.814903422436 }, { "update": 240, "global_step": 983040, - "num_episodes": 227, - "mean_reward": 56.747190070152286, - "mean_length": 3476.26, - "loss": 5247.01953125, - "sps": 338.3544128752909 + "num_episodes": 176, + "mean_reward": 253.30119943857193, + "mean_length": 6284.35, + "survival_pct": 0.4189566666666667, + "max_steps": 15000, + "loss": 0.1569591909646988, + "sps": 2278.9720754815094 }, { "update": 245, "global_step": 1003520, - "num_episodes": 246, - "mean_reward": 83.43761008739472, - "mean_length": 3093.44, - "loss": 24.65706443786621, - "sps": 502.2840263546167 + "num_episodes": 177, + "mean_reward": 256.35702211141586, + "mean_length": 6425.14, + "survival_pct": 0.4283426666666667, + "max_steps": 15000, + "loss": -0.17721155285835266, + "sps": 2303.773051684429 }, { "update": 250, "global_step": 1024000, - "num_episodes": 258, - "mean_reward": 78.04051938056946, - "mean_length": 2602.94, - "loss": 243.8249053955078, - "sps": 330.56640317617087 + "num_episodes": 179, + "mean_reward": 255.5962089705467, + "mean_length": 6525.14, + "survival_pct": 0.43500933333333336, + "max_steps": 15000, + "loss": 0.23589976131916046, + "sps": 2206.427532109994 }, { "update": 255, "global_step": 1044480, - "num_episodes": 264, - "mean_reward": 81.05206553459168, - "mean_length": 2411.21, - "loss": 39.96983337402344, - "sps": 549.1892263469624 + "num_episodes": 180, + "mean_reward": 257.4188562893867, + "mean_length": 6668.15, + "survival_pct": 0.4445433333333333, + "max_steps": 15000, + "loss": 0.549183189868927, + "sps": 2211.682715091644 }, { "update": 260, "global_step": 1064960, - "num_episodes": 268, - "mean_reward": 219.54257692813874, - "mean_length": 2413.93, - "loss": 1803.801025390625, - "sps": 598.9593989246749 + "num_episodes": 183, + "mean_reward": 269.09694628953935, + "mean_length": 7078.44, + "survival_pct": 0.471896, + "max_steps": 15000, + "loss": 1.171219825744629, + "sps": 2017.4015932661957 }, { "update": 265, "global_step": 1085440, - "num_episodes": 270, - "mean_reward": 218.29985638141633, - "mean_length": 2316.4, - "loss": 542.6195678710938, - "sps": 576.2196944536587 + "num_episodes": 183, + "mean_reward": 269.09694628953935, + "mean_length": 7078.44, + "survival_pct": 0.471896, + "max_steps": 15000, + "loss": 0.1275859922170639, + "sps": 1835.885855759612 }, { "update": 270, "global_step": 1105920, - "num_episodes": 274, - "mean_reward": 310.3814339208603, - "mean_length": 2500.61, - "loss": 36.787967681884766, - "sps": 476.229446655146 + "num_episodes": 184, + "mean_reward": 270.90041587114337, + "mean_length": 7220.17, + "survival_pct": 0.4813446666666667, + "max_steps": 15000, + "loss": 0.09780505299568176, + "sps": 1811.136951881794 }, { "update": 275, "global_step": 1126400, - "num_episodes": 289, - "mean_reward": 305.01237434387207, - "mean_length": 2535.94, - "loss": 19.507030487060547, - "sps": 400.9073826886594 + "num_episodes": 186, + "mean_reward": 279.4690273213387, + "mean_length": 7320.17, + "survival_pct": 0.48801133333333335, + "max_steps": 15000, + "loss": 15.602858543395996, + "sps": 588.0868439042007 }, { "update": 280, "global_step": 1146880, - "num_episodes": 298, - "mean_reward": 401.39536855220797, - "mean_length": 2565.42, - "loss": 52.059120178222656, - "sps": 702.3737668350727 + "num_episodes": 188, + "mean_reward": 288.84978276491165, + "mean_length": 7391.33, + "survival_pct": 0.4927553333333333, + "max_steps": 15000, + "loss": 0.023340240120887756, + "sps": 1795.065224055729 }, { "update": 285, "global_step": 1167360, - "num_episodes": 309, - "mean_reward": 391.02530930519106, - "mean_length": 2143.77, - "loss": 22.919591903686523, - "sps": 275.1379100679109 + "num_episodes": 190, + "mean_reward": 304.26770622015, + "mean_length": 7681.49, + "survival_pct": 0.5120993333333334, + "max_steps": 15000, + "loss": -0.13154439628124237, + "sps": 2105.6127488836673 }, { "update": 290, "global_step": 1187840, - "num_episodes": 316, - "mean_reward": 489.53185575008393, - "mean_length": 2277.6, - "loss": 31.41555404663086, - "sps": 350.6416338473395 + "num_episodes": 193, + "mean_reward": 299.99041105508803, + "mean_length": 7774.97, + "survival_pct": 0.5183313333333334, + "max_steps": 15000, + "loss": 27.63266944885254, + "sps": 1435.2377599727854 }, { "update": 295, "global_step": 1208320, - "num_episodes": 320, - "mean_reward": 489.0599168300629, - "mean_length": 2303.45, - "loss": 52.70932388305664, - "sps": 859.4427758832318 + "num_episodes": 193, + "mean_reward": 299.99041105508803, + "mean_length": 7774.97, + "survival_pct": 0.5183313333333334, + "max_steps": 15000, + "loss": 0.08694343268871307, + "sps": 1579.9578506707744 }, { "update": 300, "global_step": 1228800, - "num_episodes": 326, - "mean_reward": 506.6310522270203, - "mean_length": 2500.66, - "loss": 36.654197692871094, - "sps": 378.8344670632786 + "num_episodes": 194, + "mean_reward": 299.24194776773453, + "mean_length": 7824.97, + "survival_pct": 0.5216646666666667, + "max_steps": 15000, + "loss": -0.08208262920379639, + "sps": 1713.483711856869 }, { "update": 305, "global_step": 1249280, - "num_episodes": 331, - "mean_reward": 490.07581648349765, - "mean_length": 2551.54, - "loss": 13.381026268005371, - "sps": 324.31118759980126 + "num_episodes": 199, + "mean_reward": 291.81668387651445, + "mean_length": 7825.67, + "survival_pct": 0.5217113333333333, + "max_steps": 15000, + "loss": 5.8720316886901855, + "sps": 1446.1967920114107 }, { "update": 310, "global_step": 1269760, - "num_episodes": 335, - "mean_reward": 490.83249175071717, - "mean_length": 2754.61, - "loss": 13.67751693725586, - "sps": 601.9785828545383 + "num_episodes": 201, + "mean_reward": 274.33396270036695, + "mean_length": 7777.49, + "survival_pct": 0.5184993333333333, + "max_steps": 15000, + "loss": 25.04059600830078, + "sps": 611.9018372332281 }, { "update": 315, "global_step": 1290240, - "num_episodes": 342, - "mean_reward": 558.9792760944366, - "mean_length": 2851.49, - "loss": 32.91801834106445, - "sps": 321.4874507503393 + "num_episodes": 205, + "mean_reward": 272.47521389484405, + "mean_length": 7893.17, + "survival_pct": 0.5262113333333334, + "max_steps": 15000, + "loss": 35.360877990722656, + "sps": 393.4470006251 }, { "update": 320, "global_step": 1310720, - "num_episodes": 352, - "mean_reward": 553.7738126516342, - "mean_length": 2853.21, - "loss": 18.369260787963867, - "sps": 621.5240850375873 + "num_episodes": 212, + "mean_reward": 274.2415503978729, + "mean_length": 7724.58, + "survival_pct": 0.514972, + "max_steps": 15000, + "loss": 17.640727996826172, + "sps": 576.0717870047208 }, { "update": 325, "global_step": 1331200, - "num_episodes": 358, - "mean_reward": 590.7320046901702, - "mean_length": 2950.34, - "loss": 5.919332504272461, - "sps": 437.5267343221937 + "num_episodes": 212, + "mean_reward": 274.2415503978729, + "mean_length": 7724.58, + "survival_pct": 0.514972, + "max_steps": 15000, + "loss": 4.197415351867676, + "sps": 763.5424454388547 }, { "update": 330, "global_step": 1351680, - "num_episodes": 364, - "mean_reward": 591.3189961528778, - "mean_length": 3142.3, - "loss": 1779.034423828125, - "sps": 167.8109154720703 + "num_episodes": 213, + "mean_reward": 276.0169840526581, + "mean_length": 7871.27, + "survival_pct": 0.5247513333333333, + "max_steps": 15000, + "loss": 3.047353982925415, + "sps": 822.0506273160125 }, { "update": 335, "global_step": 1372160, - "num_episodes": 378, - "mean_reward": 357.36821466445923, - "mean_length": 2510.76, - "loss": 33.58185958862305, - "sps": 451.94218989282257 + "num_episodes": 214, + "mean_reward": 277.7569498729706, + "mean_length": 8018.5, + "survival_pct": 0.5345666666666666, + "max_steps": 15000, + "loss": 5.528885364532471, + "sps": 753.0351947572923 }, { "update": 340, "global_step": 1392640, - "num_episodes": 388, - "mean_reward": 455.7884948730469, - "mean_length": 2739.63, - "loss": 16.64685821533203, - "sps": 273.0263210661101 + "num_episodes": 215, + "mean_reward": 278.02211222648623, + "mean_length": 8068.5, + "survival_pct": 0.5379, + "max_steps": 15000, + "loss": 1.2490136623382568, + "sps": 815.783836720429 }, { "update": 345, "global_step": 1413120, - "num_episodes": 390, - "mean_reward": 456.36600038528445, - "mean_length": 2831.63, - "loss": 53.800376892089844, - "sps": 262.8572795282316 + "num_episodes": 217, + "mean_reward": 314.82606459617614, + "mean_length": 8257.34, + "survival_pct": 0.5504893333333334, + "max_steps": 15000, + "loss": 0.16502337157726288, + "sps": 2263.8703548795343 }, { "update": 350, "global_step": 1433600, - "num_episodes": 392, - "mean_reward": 358.3022264003754, - "mean_length": 2855.26, - "loss": 275.63385009765625, - "sps": 446.63750460322206 + "num_episodes": 220, + "mean_reward": 337.1490696239471, + "mean_length": 8503.93, + "survival_pct": 0.5669286666666666, + "max_steps": 15000, + "loss": -0.23407027125358582, + "sps": 2114.7366860740576 }, { "update": 355, "global_step": 1454080, - "num_episodes": 407, - "mean_reward": 375.7198329591751, - "mean_length": 2911.33, - "loss": 8.481382369995117, - "sps": 367.7815116282032 + "num_episodes": 220, + "mean_reward": 337.1490696239471, + "mean_length": 8503.93, + "survival_pct": 0.5669286666666666, + "max_steps": 15000, + "loss": -0.08559620380401611, + "sps": 2031.0806393625717 }, { "update": 360, "global_step": 1474560, - "num_episodes": 417, - "mean_reward": 297.66134167194366, - "mean_length": 2867.69, - "loss": 23.24544334411621, - "sps": 538.1701556626739 + "num_episodes": 221, + "mean_reward": 339.0253634929657, + "mean_length": 8652.63, + "survival_pct": 0.576842, + "max_steps": 15000, + "loss": -0.06816114485263824, + "sps": 2006.9790581005002 }, { "update": 365, "global_step": 1495040, - "num_episodes": 423, - "mean_reward": 297.34756595134735, - "mean_length": 2864.35, - "loss": 4.300067901611328, - "sps": 967.5617761077667 + "num_episodes": 222, + "mean_reward": 343.7125220012665, + "mean_length": 8702.63, + "survival_pct": 0.5801753333333333, + "max_steps": 15000, + "loss": -0.10728916525840759, + "sps": 2019.2784003150005 }, { "update": 370, "global_step": 1515520, - "num_episodes": 426, - "mean_reward": 420.4760777759552, - "mean_length": 2769.38, - "loss": 0.5866292715072632, - "sps": 666.1817947182782 + "num_episodes": 223, + "mean_reward": 347.4228830242157, + "mean_length": 8752.63, + "survival_pct": 0.5835086666666666, + "max_steps": 15000, + "loss": -0.20886194705963135, + "sps": 1983.558184566299 }, { "update": 375, "global_step": 1536000, - "num_episodes": 429, - "mean_reward": 420.90687354564665, - "mean_length": 2720.24, - "loss": 1.5303698778152466, - "sps": 387.94660115994657 + "num_episodes": 229, + "mean_reward": 347.23676467895507, + "mean_length": 8733.48, + "survival_pct": 0.582232, + "max_steps": 15000, + "loss": 8.476018905639648, + "sps": 771.0850934786216 }, { "update": 380, "global_step": 1556480, - "num_episodes": 447, - "mean_reward": 356.93565448284147, - "mean_length": 2600.92, - "loss": 1.487197995185852, - "sps": 563.1387661188227 + "num_episodes": 234, + "mean_reward": 365.97415913581847, + "mean_length": 8974.59, + "survival_pct": 0.598306, + "max_steps": 15000, + "loss": 12.912026405334473, + "sps": 410.00426579514084 }, { "update": 385, "global_step": 1576960, - "num_episodes": 452, - "mean_reward": 357.39739652633665, - "mean_length": 2840.12, - "loss": 139.934326171875, - "sps": 419.07382914990956 + "num_episodes": 235, + "mean_reward": 364.00330050468443, + "mean_length": 8891.43, + "survival_pct": 0.592762, + "max_steps": 15000, + "loss": -0.11877703666687012, + "sps": 2193.265879978675 }, { "update": 390, "global_step": 1597440, - "num_episodes": 456, - "mean_reward": 320.4454140949249, - "mean_length": 2739.7, - "loss": 35.25250244140625, - "sps": 1220.6982535284524 + "num_episodes": 236, + "mean_reward": 367.3724857187271, + "mean_length": 9039.65, + "survival_pct": 0.6026433333333333, + "max_steps": 15000, + "loss": -0.11603386700153351, + "sps": 2297.7713967547143 }, { "update": 395, "global_step": 1617920, - "num_episodes": 456, - "mean_reward": 320.4454140949249, - "mean_length": 2739.7, - "loss": 3.3828349113464355, - "sps": 1342.5750178490377 + "num_episodes": 238, + "mean_reward": 372.63827639579773, + "mean_length": 9336.77, + "survival_pct": 0.6224513333333334, + "max_steps": 15000, + "loss": -0.1684369295835495, + "sps": 2389.134775345221 }, { "update": 400, "global_step": 1638400, - "num_episodes": 464, - "mean_reward": 328.27054366588595, - "mean_length": 2746.2, - "loss": 9.8130521774292, - "sps": 1796.7926258567122 + "num_episodes": 238, + "mean_reward": 372.63827639579773, + "mean_length": 9336.77, + "survival_pct": 0.6224513333333334, + "max_steps": 15000, + "loss": -0.02054491639137268, + "sps": 2482.540323380072 }, { "update": 405, "global_step": 1658880, - "num_episodes": 470, - "mean_reward": 469.22999305725097, - "mean_length": 2992.85, - "loss": 1.3559972047805786, - "sps": 692.6458346671639 + "num_episodes": 238, + "mean_reward": 372.63827639579773, + "mean_length": 9336.77, + "survival_pct": 0.46683850000000005, + "max_steps": 20000, + "loss": -0.06290135532617569, + "sps": 2344.9434191419 }, { "update": 410, "global_step": 1679360, - "num_episodes": 490, - "mean_reward": 372.7216357469559, - "mean_length": 2776.16, - "loss": 1.9204107522964478, - "sps": 654.3363015594651 + "num_episodes": 238, + "mean_reward": 372.63827639579773, + "mean_length": 9336.77, + "survival_pct": 0.46683850000000005, + "max_steps": 20000, + "loss": -0.250944048166275, + "sps": 2189.1388557903356 }, { "update": 415, "global_step": 1699840, - "num_episodes": 498, - "mean_reward": 355.0806073760986, - "mean_length": 2364.43, - "loss": 0.20851367712020874, - "sps": 1574.1015970289384 + "num_episodes": 244, + "mean_reward": 370.749574341774, + "mean_length": 9542.11, + "survival_pct": 0.4771055, + "max_steps": 20000, + "loss": 8.776338577270508, + "sps": 833.8081182273202 }, { "update": 420, "global_step": 1720320, - "num_episodes": 505, - "mean_reward": 357.1250058794022, - "mean_length": 2659.22, - "loss": 0.8760831356048584, - "sps": 725.5072374754453 + "num_episodes": 247, + "mean_reward": 366.3301660585403, + "mean_length": 9730.75, + "survival_pct": 0.4865375, + "max_steps": 20000, + "loss": 258.7391052246094, + "sps": 608.4010837129435 }, { "update": 425, "global_step": 1740800, - "num_episodes": 519, - "mean_reward": 338.12661921024323, - "mean_length": 2572.09, - "loss": 0.7201396822929382, - "sps": 774.136034580983 + "num_episodes": 247, + "mean_reward": 366.3301660585403, + "mean_length": 9730.75, + "survival_pct": 0.4865375, + "max_steps": 20000, + "loss": 16.064197540283203, + "sps": 685.7919661773449 }, { "update": 430, "global_step": 1761280, - "num_episodes": 520, - "mean_reward": 338.7818141031265, - "mean_length": 2670.69, - "loss": 0.2984998822212219, - "sps": 593.9586130082629 + "num_episodes": 249, + "mean_reward": 369.06732979297635, + "mean_length": 9930.93, + "survival_pct": 0.4965465, + "max_steps": 20000, + "loss": 11.074816703796387, + "sps": 775.2822777334978 }, { "update": 435, "global_step": 1781760, - "num_episodes": 524, - "mean_reward": 200.51355738162994, - "mean_length": 2677.33, - "loss": 0.6558288931846619, - "sps": 563.7237889903446 + "num_episodes": 252, + "mean_reward": 369.86892349243163, + "mean_length": 10135.24, + "survival_pct": 0.5067619999999999, + "max_steps": 20000, + "loss": 8.431387901306152, + "sps": 1088.379163272055 }, { "update": 440, "global_step": 1802240, - "num_episodes": 531, - "mean_reward": 195.75862418174745, - "mean_length": 2684.96, - "loss": 0.6485836505889893, - "sps": 801.5054039510692 + "num_episodes": 252, + "mean_reward": 369.86892349243163, + "mean_length": 10135.24, + "survival_pct": 0.5067619999999999, + "max_steps": 20000, + "loss": 4.96181583404541, + "sps": 1059.8013951608161 }, { "update": 445, "global_step": 1822720, - "num_episodes": 541, - "mean_reward": 182.37921036720275, - "mean_length": 2787.68, - "loss": 0.9738303422927856, - "sps": 552.0177086572941 + "num_episodes": 252, + "mean_reward": 369.86892349243163, + "mean_length": 10135.24, + "survival_pct": 0.5067619999999999, + "max_steps": 20000, + "loss": 3.2583541870117188, + "sps": 1049.9750481736253 }, { "update": 450, "global_step": 1843200, - "num_episodes": 544, - "mean_reward": 182.38307575702666, - "mean_length": 2886.96, - "loss": 0.35677605867385864, - "sps": 1078.6474451651457 + "num_episodes": 252, + "mean_reward": 369.86892349243163, + "mean_length": 10135.24, + "survival_pct": 0.5067619999999999, + "max_steps": 20000, + "loss": 1.682092308998108, + "sps": 1042.7284091211875 }, { "update": 455, "global_step": 1863680, - "num_episodes": 545, - "mean_reward": 183.31554008960723, - "mean_length": 2983.98, - "loss": 2.5785653591156006, - "sps": 2238.2674979327135 + "num_episodes": 255, + "mean_reward": 370.1215773010254, + "mean_length": 10435.24, + "survival_pct": 0.521762, + "max_steps": 20000, + "loss": 5.13987398147583, + "sps": 399.81189577443547 }, { "update": 460, "global_step": 1884160, - "num_episodes": 552, - "mean_reward": 184.02062775611878, - "mean_length": 2848.2, - "loss": 2.8595657348632812, - "sps": 788.1771239252804 + "num_episodes": 257, + "mean_reward": 501.74020595550536, + "mean_length": 10734.18, + "survival_pct": 0.536709, + "max_steps": 20000, + "loss": 19.97075843811035, + "sps": 377.7733693034371 }, { "update": 465, "global_step": 1904640, - "num_episodes": 556, - "mean_reward": 185.80799278259278, - "mean_length": 3043.7, - "loss": 1.9609827995300293, - "sps": 577.1914698783215 + "num_episodes": 259, + "mean_reward": 499.11380367279054, + "mean_length": 10838.47, + "survival_pct": 0.5419235, + "max_steps": 20000, + "loss": 6.397243022918701, + "sps": 542.2637076266058 }, { "update": 470, "global_step": 1925120, - "num_episodes": 562, - "mean_reward": 187.01453431129457, - "mean_length": 3143.24, - "loss": 3.339183807373047, - "sps": 1007.594504286708 + "num_episodes": 260, + "mean_reward": 485.6533655166626, + "mean_length": 10696.07, + "survival_pct": 0.5348035, + "max_steps": 20000, + "loss": 0.6303369998931885, + "sps": 817.7485527810969 }, { "update": 475, "global_step": 1945600, - "num_episodes": 566, - "mean_reward": 173.54088757038116, - "mean_length": 2963.92, - "loss": 0.6618616580963135, - "sps": 901.0594653949358 + "num_episodes": 263, + "mean_reward": 484.7361448955536, + "mean_length": 10797.13, + "survival_pct": 0.5398565, + "max_steps": 20000, + "loss": 0.7859541773796082, + "sps": 844.7722671839322 }, { "update": 480, "global_step": 1966080, - "num_episodes": 575, - "mean_reward": 33.9250515460968, - "mean_length": 2871.77, - "loss": 2.1673176288604736, - "sps": 637.9211744730668 + "num_episodes": 263, + "mean_reward": 484.7361448955536, + "mean_length": 10797.13, + "survival_pct": 0.5398565, + "max_steps": 20000, + "loss": 0.6309153437614441, + "sps": 838.773769030313 }, { "update": 485, "global_step": 1986560, - "num_episodes": 586, - "mean_reward": 34.479522528648374, - "mean_length": 3064.33, - "loss": 0.5433505773544312, - "sps": 434.7028394813384 + "num_episodes": 263, + "mean_reward": 484.7361448955536, + "mean_length": 10797.13, + "survival_pct": 0.5398565, + "max_steps": 20000, + "loss": 0.18543700873851776, + "sps": 820.910360397683 }, { "update": 490, "global_step": 2007040, - "num_episodes": 596, - "mean_reward": 35.79800989627838, - "mean_length": 3282.82, - "loss": 0.78948974609375, - "sps": 336.9612684589691 + "num_episodes": 263, + "mean_reward": 484.7361448955536, + "mean_length": 10797.13, + "survival_pct": 0.5398565, + "max_steps": 20000, + "loss": 0.3058473467826843, + "sps": 842.5485957089527 }, { "update": 495, "global_step": 2027520, - "num_episodes": 604, - "mean_reward": 33.232103657722476, - "mean_length": 2993.3, - "loss": 1.0133846998214722, - "sps": 825.0153267212829 + "num_episodes": 269, + "mean_reward": 425.1001238536835, + "mean_length": 10806.87, + "survival_pct": 0.5403435000000001, + "max_steps": 20000, + "loss": 2.353271245956421, + "sps": 454.6827555011673 }, { "update": 500, "global_step": 2048000, - "num_episodes": 608, - "mean_reward": 35.19134169101715, - "mean_length": 3091.99, - "loss": 2.315326690673828, - "sps": 1734.6708087716852 + "num_episodes": 269, + "mean_reward": 425.1001238536835, + "mean_length": 10806.87, + "survival_pct": 0.5403435000000001, + "max_steps": 20000, + "loss": 1.0133743286132812, + "sps": 475.15957681047615 }, { "update": 505, "global_step": 2068480, - "num_episodes": 626, - "mean_reward": 34.19606147289276, - "mean_length": 2992.5, - "loss": 0.5885242223739624, - "sps": 326.9154194294925 + "num_episodes": 270, + "mean_reward": 437.5397934818268, + "mean_length": 10983.17, + "survival_pct": 0.5491585, + "max_steps": 20000, + "loss": 0.6723721027374268, + "sps": 601.483216529865 }, { "update": 510, "global_step": 2088960, - "num_episodes": 635, - "mean_reward": 34.653258776664735, - "mean_length": 2903.6, - "loss": 0.5012519359588623, - "sps": 618.1195825840496 + "num_episodes": 271, + "mean_reward": 439.91473383665084, + "mean_length": 11180.4, + "survival_pct": 0.55902, + "max_steps": 20000, + "loss": -0.033539168536663055, + "sps": 736.1812387453508 }, { "update": 515, "global_step": 2109440, - "num_episodes": 648, - "mean_reward": 31.289368829727174, - "mean_length": 2418.77, - "loss": 3.98177433013916, - "sps": 646.2346813472644 + "num_episodes": 275, + "mean_reward": 531.3333820033073, + "mean_length": 11478.88, + "survival_pct": 0.573944, + "max_steps": 20000, + "loss": 0.6327868700027466, + "sps": 564.0725158498853 }, { "update": 520, "global_step": 2129920, - "num_episodes": 655, - "mean_reward": 31.192284369468688, - "mean_length": 2226.18, - "loss": 0.6265271306037903, - "sps": 1099.3023206141552 + "num_episodes": 275, + "mean_reward": 531.3333820033073, + "mean_length": 11478.88, + "survival_pct": 0.573944, + "max_steps": 20000, + "loss": 0.49002259969711304, + "sps": 658.4478053303787 }, { "update": 525, "global_step": 2150400, - "num_episodes": 660, - "mean_reward": 31.34406463623047, - "mean_length": 2219.04, - "loss": 0.10834737122058868, - "sps": 1013.9364910908357 + "num_episodes": 275, + "mean_reward": 531.3333820033073, + "mean_length": 11478.88, + "survival_pct": 0.573944, + "max_steps": 20000, + "loss": 0.24342404305934906, + "sps": 891.360496999003 }, { "update": 530, "global_step": 2170880, - "num_episodes": 663, - "mean_reward": 32.43585729598999, - "mean_length": 2310.25, - "loss": 0.16828738152980804, - "sps": 2044.258556649656 + "num_episodes": 275, + "mean_reward": 531.3333820033073, + "mean_length": 11478.88, + "survival_pct": 0.573944, + "max_steps": 20000, + "loss": 0.16686059534549713, + "sps": 1010.9528682298816 }, { "update": 535, "global_step": 2191360, - "num_episodes": 673, - "mean_reward": 31.12276816368103, - "mean_length": 2188.44, - "loss": 0.30583497881889343, - "sps": 669.970940042682 + "num_episodes": 284, + "mean_reward": 521.5117145895958, + "mean_length": 10942.83, + "survival_pct": 0.5471415, + "max_steps": 20000, + "loss": 0.2558882236480713, + "sps": 469.7280569365049 }, { "update": 540, "global_step": 2211840, - "num_episodes": 688, - "mean_reward": 31.249693727493288, - "mean_length": 2081.05, - "loss": 0.17176635563373566, - "sps": 590.5779813457752 + "num_episodes": 284, + "mean_reward": 521.5117145895958, + "mean_length": 10942.83, + "survival_pct": 0.5471415, + "max_steps": 20000, + "loss": 0.1111864298582077, + "sps": 509.6937089521736 }, { "update": 545, "global_step": 2232320, - "num_episodes": 694, - "mean_reward": 33.27913472175598, - "mean_length": 2258.75, - "loss": 0.7694998979568481, - "sps": 738.4276136206116 + "num_episodes": 286, + "mean_reward": 512.1946889853477, + "mean_length": 11042.83, + "survival_pct": 0.5521415, + "max_steps": 20000, + "loss": 3.9095962047576904, + "sps": 511.8590018524491 }, { "update": 550, "global_step": 2252800, - "num_episodes": 698, - "mean_reward": 33.9422331905365, - "mean_length": 2351.64, - "loss": 0.15182125568389893, - "sps": 762.723220730318 + "num_episodes": 287, + "mean_reward": 501.32572416067126, + "mean_length": 10911.73, + "survival_pct": 0.5455865, + "max_steps": 20000, + "loss": -0.03819906711578369, + "sps": 632.8032522356763 }, { "update": 555, "global_step": 2273280, - "num_episodes": 700, - "mean_reward": 34.04119193077087, - "mean_length": 2350.76, - "loss": 0.032472074031829834, - "sps": 1245.5515732240913 + "num_episodes": 292, + "mean_reward": 503.7082317852974, + "mean_length": 10844.31, + "survival_pct": 0.5422155, + "max_steps": 20000, + "loss": 10.365863800048828, + "sps": 429.53248817964095 }, { "update": 560, "global_step": 2293760, - "num_episodes": 712, - "mean_reward": 32.31116131782532, - "mean_length": 2355.04, - "loss": 0.5084341168403625, - "sps": 356.9310128125497 + "num_episodes": 292, + "mean_reward": 503.7082317852974, + "mean_length": 10844.31, + "survival_pct": 0.5422155, + "max_steps": 20000, + "loss": 2.5824058055877686, + "sps": 835.0918364484975 }, { "update": 565, "global_step": 2314240, - "num_episodes": 719, - "mean_reward": 32.47976410865784, - "mean_length": 2441.54, - "loss": 0.21268926560878754, - "sps": 649.2244348171059 + "num_episodes": 292, + "mean_reward": 503.7082317852974, + "mean_length": 10844.31, + "survival_pct": 0.5422155, + "max_steps": 20000, + "loss": -0.015099406242370605, + "sps": 823.3139156925881 }, { "update": 570, "global_step": 2334720, - "num_episodes": 728, - "mean_reward": 33.27659282684326, - "mean_length": 2530.94, - "loss": 0.9577451348304749, - "sps": 768.1072804561215 + "num_episodes": 292, + "mean_reward": 503.7082317852974, + "mean_length": 10844.31, + "survival_pct": 0.5422155, + "max_steps": 20000, + "loss": 0.004335612058639526, + "sps": 847.512127655439 }, { "update": 575, "global_step": 2355200, - "num_episodes": 734, - "mean_reward": 31.69405174255371, - "mean_length": 2412.76, - "loss": 1.0152896642684937, - "sps": 1202.6180581266553 + "num_episodes": 296, + "mean_reward": 506.11397255182266, + "mean_length": 11193.09, + "survival_pct": 0.5596545, + "max_steps": 20000, + "loss": 1.3773071765899658, + "sps": 538.0889941452151 }, { "update": 580, "global_step": 2375680, - "num_episodes": 751, - "mean_reward": 32.80648866415024, - "mean_length": 2678.86, - "loss": 1.551302433013916, - "sps": 620.7046830826843 + "num_episodes": 296, + "mean_reward": 506.11397255182266, + "mean_length": 11193.09, + "survival_pct": 0.5596545, + "max_steps": 20000, + "loss": 0.6394574046134949, + "sps": 502.97316152248266 }, { "update": 585, "global_step": 2396160, - "num_episodes": 757, - "mean_reward": 29.564244215488433, - "mean_length": 2585.36, - "loss": 3.1353652477264404, - "sps": 488.49247923421996 + "num_episodes": 297, + "mean_reward": 508.4939224600792, + "mean_length": 11391.04, + "survival_pct": 0.5695520000000001, + "max_steps": 20000, + "loss": 36.28646469116211, + "sps": 439.92401762976914 }, { "update": 590, "global_step": 2416640, - "num_episodes": 758, - "mean_reward": 30.12968365430832, - "mean_length": 2685.11, - "loss": 1.9931682348251343, - "sps": 1344.82690419507 + "num_episodes": 298, + "mean_reward": 510.8762067055702, + "mean_length": 11589.3, + "survival_pct": 0.579465, + "max_steps": 20000, + "loss": 13.898605346679688, + "sps": 282.2760969771352 }, { "update": 595, "global_step": 2437120, - "num_episodes": 765, - "mean_reward": 28.753595340251923, - "mean_length": 2505.15, - "loss": 1.9217519760131836, - "sps": 447.9409443511792 + "num_episodes": 301, + "mean_reward": 526.9176897263527, + "mean_length": 11688.03, + "survival_pct": 0.5844015, + "max_steps": 20000, + "loss": 38.274803161621094, + "sps": 332.3030413271302 }, { "update": 600, "global_step": 2457600, - "num_episodes": 777, - "mean_reward": 30.216613895893097, - "mean_length": 2711.46, - "loss": 18.632978439331055, - "sps": 404.03595919196334 + "num_episodes": 303, + "mean_reward": 525.6999688172341, + "mean_length": 11569.45, + "survival_pct": 0.5784725000000001, + "max_steps": 20000, + "loss": 2.9384140968322754, + "sps": 364.8644804633228 }, { "update": 605, "global_step": 2478080, - "num_episodes": 795, - "mean_reward": 25.763007862567903, - "mean_length": 2472.96, - "loss": 0.08484360575675964, - "sps": 273.80019869630905 + "num_episodes": 304, + "mean_reward": 524.2420133042335, + "mean_length": 11426.62, + "survival_pct": 0.45706480000000005, + "max_steps": 25000, + "loss": 438.5118408203125, + "sps": 420.0558278495418 }, { "update": 610, "global_step": 2498560, - "num_episodes": 800, - "mean_reward": 25.233678991794587, - "mean_length": 2472.55, - "loss": 1.4983996152877808, - "sps": 842.5169450671826 + "num_episodes": 304, + "mean_reward": 524.2420133042335, + "mean_length": 11426.62, + "survival_pct": 0.45706480000000005, + "max_steps": 25000, + "loss": 6.759511947631836, + "sps": 435.9725569680644 }, { "update": 615, "global_step": 2519040, - "num_episodes": 800, - "mean_reward": 25.233678991794587, - "mean_length": 2472.55, - "loss": 2.3043322563171387, - "sps": 1633.9246020797414 + "num_episodes": 304, + "mean_reward": 524.2420133042335, + "mean_length": 11426.62, + "survival_pct": 0.45706480000000005, + "max_steps": 25000, + "loss": 2.7961549758911133, + "sps": 483.2696887623864 }, { "update": 620, "global_step": 2539520, - "num_episodes": 810, - "mean_reward": 25.826587975025177, - "mean_length": 2510.48, - "loss": 0.21852731704711914, - "sps": 518.4307357532686 + "num_episodes": 304, + "mean_reward": 524.2420133042335, + "mean_length": 11426.62, + "survival_pct": 0.45706480000000005, + "max_steps": 25000, + "loss": 2.2368013858795166, + "sps": 488.6334457862468 }, { "update": 625, "global_step": 2560000, - "num_episodes": 814, - "mean_reward": 25.645928237438202, - "mean_length": 2512.63, - "loss": 1.0583765506744385, - "sps": 581.0047205132955 + "num_episodes": 309, + "mean_reward": 559.3683041572571, + "mean_length": 12243.92, + "survival_pct": 0.4897568, + "max_steps": 25000, + "loss": 706.3812255859375, + "sps": 240.30170576065763 }, { "update": 630, "global_step": 2580480, - "num_episodes": 823, - "mean_reward": 25.49915248632431, - "mean_length": 2527.36, - "loss": 1.795029878616333, - "sps": 537.504310438823 + "num_episodes": 311, + "mean_reward": 572.5723537635803, + "mean_length": 12360.26, + "survival_pct": 0.4944104, + "max_steps": 25000, + "loss": 13.363809585571289, + "sps": 266.8083792943256 }, { "update": 635, "global_step": 2600960, - "num_episodes": 825, - "mean_reward": 26.937079560756683, - "mean_length": 2620.74, - "loss": 0.2923164963722229, - "sps": 1350.1934205312384 + "num_episodes": 316, + "mean_reward": 564.724127240181, + "mean_length": 11785.88, + "survival_pct": 0.47143519999999994, + "max_steps": 25000, + "loss": 6.69994592666626, + "sps": 303.78529409460765 }, { "update": 640, "global_step": 2621440, - "num_episodes": 834, - "mean_reward": 29.924575612545013, - "mean_length": 2905.58, - "loss": 0.43789708614349365, - "sps": 280.4967413720513 + "num_episodes": 316, + "mean_reward": 564.724127240181, + "mean_length": 11785.88, + "survival_pct": 0.47143519999999994, + "max_steps": 25000, + "loss": 629.8490600585938, + "sps": 347.05583673981266 }, { "update": 645, "global_step": 2641920, - "num_episodes": 836, - "mean_reward": 28.948164422512054, - "mean_length": 2820.95, - "loss": 1.5149576663970947, - "sps": 640.5808040483822 + "num_episodes": 325, + "mean_reward": 599.5753115653991, + "mean_length": 11091.83, + "survival_pct": 0.4436732, + "max_steps": 25000, + "loss": 14.624711990356445, + "sps": 202.6999148028545 }, { "update": 650, "global_step": 2662400, - "num_episodes": 843, - "mean_reward": 35.76819623708725, - "mean_length": 3020.26, - "loss": 1.5319448709487915, - "sps": 573.556787061315 + "num_episodes": 326, + "mean_reward": 681.9963491630555, + "mean_length": 11191.83, + "survival_pct": 0.4476732, + "max_steps": 25000, + "loss": 14.961444854736328, + "sps": 324.3719674553881 }, { "update": 655, "global_step": 2682880, - "num_episodes": 851, - "mean_reward": 35.12200962543488, - "mean_length": 2929.69, - "loss": 0.4334838390350342, - "sps": 721.898659363473 + "num_episodes": 329, + "mean_reward": 681.6811992406845, + "mean_length": 11184.4, + "survival_pct": 0.447376, + "max_steps": 25000, + "loss": 1098.8870849609375, + "sps": 282.5805927303007 }, { "update": 660, "global_step": 2703360, - "num_episodes": 863, - "mean_reward": 34.17141466140747, - "mean_length": 2837.57, - "loss": 15.400986671447754, - "sps": 531.226152703821 + "num_episodes": 329, + "mean_reward": 681.6811992406845, + "mean_length": 11184.4, + "survival_pct": 0.447376, + "max_steps": 25000, + "loss": 18.618370056152344, + "sps": 328.8874894706864 }, { "update": 665, "global_step": 2723840, - "num_episodes": 871, - "mean_reward": 34.406737427711484, - "mean_length": 2847.12, - "loss": -0.022274762392044067, - "sps": 1102.2418013565505 + "num_episodes": 330, + "mean_reward": 669.1296841955185, + "mean_length": 11166.48, + "survival_pct": 0.4466592, + "max_steps": 25000, + "loss": 0.3438085615634918, + "sps": 350.7381600879147 }, { "update": 670, "global_step": 2744320, - "num_episodes": 880, - "mean_reward": 33.42318947315216, - "mean_length": 2744.42, - "loss": 0.21278248727321625, - "sps": 932.5471353242045 + "num_episodes": 333, + "mean_reward": 663.7387618637085, + "mean_length": 11120.64, + "survival_pct": 0.4448256, + "max_steps": 25000, + "loss": 1.9492148160934448, + "sps": 336.44611386700046 }, { "update": 675, "global_step": 2764800, - "num_episodes": 881, - "mean_reward": 35.02722130298614, - "mean_length": 2793.45, - "loss": 0.48318442702293396, - "sps": 849.2902333548969 + "num_episodes": 340, + "mean_reward": 764.0141823387146, + "mean_length": 11231.97, + "survival_pct": 0.4492788, + "max_steps": 25000, + "loss": 11.20479679107666, + "sps": 269.88464376416727 }, { "update": 680, "global_step": 2785280, - "num_episodes": 894, - "mean_reward": 42.5996656703949, - "mean_length": 2993.97, - "loss": 0.9185795187950134, - "sps": 476.82647813725623 + "num_episodes": 341, + "mean_reward": 763.7054350566864, + "mean_length": 11237.76, + "survival_pct": 0.44951040000000003, + "max_steps": 25000, + "loss": 2.3942978382110596, + "sps": 368.845880053541 }, { "update": 685, "global_step": 2805760, - "num_episodes": 914, - "mean_reward": 40.827002143859865, - "mean_length": 2561.69, - "loss": 1.2765486240386963, - "sps": 255.71578525021312 + "num_episodes": 341, + "mean_reward": 763.7054350566864, + "mean_length": 11237.76, + "survival_pct": 0.44951040000000003, + "max_steps": 25000, + "loss": 0.6632025241851807, + "sps": 437.7624284451732 }, { "update": 690, "global_step": 2826240, - "num_episodes": 925, - "mean_reward": 45.62547200202942, - "mean_length": 2469.27, - "loss": 2.3010942935943604, - "sps": 570.2368795975491 + "num_episodes": 341, + "mean_reward": 763.7054350566864, + "mean_length": 11237.76, + "survival_pct": 0.44951040000000003, + "max_steps": 25000, + "loss": 0.12569601833820343, + "sps": 456.53233478658586 }, { "update": 695, "global_step": 2846720, - "num_episodes": 931, - "mean_reward": 43.9872697687149, - "mean_length": 2287.32, - "loss": 1.1779371500015259, - "sps": 532.4176047731756 + "num_episodes": 343, + "mean_reward": 799.47291888237, + "mean_length": 11532.37, + "survival_pct": 0.4612948, + "max_steps": 25000, + "loss": 32.66535949707031, + "sps": 482.45747353519477 }, { "update": 700, "global_step": 2867200, - "num_episodes": 943, - "mean_reward": 36.38923056125641, - "mean_length": 1897.85, - "loss": 0.6398267149925232, - "sps": 501.06106420522286 + "num_episodes": 344, + "mean_reward": 900.5657841777802, + "mean_length": 11582.37, + "survival_pct": 0.4632948, + "max_steps": 25000, + "loss": 4.394363880157471, + "sps": 602.707199771208 }, { "update": 705, "global_step": 2887680, - "num_episodes": 952, - "mean_reward": 38.85928546905517, - "mean_length": 2189.91, - "loss": 0.7464686632156372, - "sps": 358.482733937848 + "num_episodes": 344, + "mean_reward": 900.5657841777802, + "mean_length": 11582.37, + "survival_pct": 0.4632948, + "max_steps": 25000, + "loss": 1.1503143310546875, + "sps": 592.2907627828818 }, { "update": 710, "global_step": 2908160, - "num_episodes": 960, - "mean_reward": 38.50984364032745, - "mean_length": 2181.81, - "loss": 0.819063663482666, - "sps": 729.0695475685854 + "num_episodes": 345, + "mean_reward": 917.1899351406097, + "mean_length": 11829.79, + "survival_pct": 0.47319160000000005, + "max_steps": 25000, + "loss": 20.407194137573242, + "sps": 459.7672415701048 }, { "update": 715, "global_step": 2928640, - "num_episodes": 960, - "mean_reward": 38.50984364032745, - "mean_length": 2181.81, - "loss": 1.3829450607299805, - "sps": 2103.44152639025 + "num_episodes": 346, + "mean_reward": 915.0635627651214, + "mean_length": 11657.27, + "survival_pct": 0.4662908, + "max_steps": 25000, + "loss": 51.53656768798828, + "sps": 484.2493919142758 }, { "update": 720, "global_step": 2949120, - "num_episodes": 961, - "mean_reward": 38.30655426502228, - "mean_length": 2181.81, - "loss": 1.3242486715316772, - "sps": 2180.924028748301 + "num_episodes": 347, + "mean_reward": 915.567684469223, + "mean_length": 11707.27, + "survival_pct": 0.4682908, + "max_steps": 25000, + "loss": 2.852640151977539, + "sps": 564.1661889935658 }, { "update": 725, "global_step": 2969600, - "num_episodes": 979, - "mean_reward": 37.64626069068909, - "mean_length": 2258.86, - "loss": 0.6890235543251038, - "sps": 512.7806225711571 + "num_episodes": 351, + "mean_reward": 933.7657800292968, + "mean_length": 11805.02, + "survival_pct": 0.47220080000000003, + "max_steps": 25000, + "loss": 37.6703987121582, + "sps": 269.4710162736009 }, { "update": 730, "global_step": 2990080, - "num_episodes": 993, - "mean_reward": 29.446645002365113, - "mean_length": 2178.52, - "loss": 1.424858570098877, - "sps": 397.19464657996275 + "num_episodes": 356, + "mean_reward": 790.1477946281433, + "mean_length": 11063.86, + "survival_pct": 0.4425544, + "max_steps": 25000, + "loss": 92.7292709350586, + "sps": 249.14561378417451 }, { "update": 735, "global_step": 3010560, - "num_episodes": 994, - "mean_reward": 29.469437551498412, - "mean_length": 2182.57, - "loss": 1.9823501110076904, - "sps": 742.698176618862 + "num_episodes": 356, + "mean_reward": 790.1477946281433, + "mean_length": 11063.86, + "survival_pct": 0.4425544, + "max_steps": 25000, + "loss": 3.52268648147583, + "sps": 336.33639220363955 }, { "update": 740, "global_step": 3031040, - "num_episodes": 995, - "mean_reward": 29.75782982826233, - "mean_length": 2282.33, - "loss": 0.30816513299942017, - "sps": 1825.6853767545124 + "num_episodes": 356, + "mean_reward": 790.1477946281433, + "mean_length": 11063.86, + "survival_pct": 0.4425544, + "max_steps": 25000, + "loss": 1.0818921327590942, + "sps": 322.89272707726104 }, { "update": 745, "global_step": 3051520, - "num_episodes": 998, - "mean_reward": 30.453793020248412, - "mean_length": 2473.99, - "loss": 3.092510938644409, - "sps": 1555.5380368163708 + "num_episodes": 363, + "mean_reward": 836.2594112110138, + "mean_length": 10753.72, + "survival_pct": 0.4301488, + "max_steps": 25000, + "loss": 6.418513774871826, + "sps": 303.95242510324334 }, { "update": 750, "global_step": 3072000, - "num_episodes": 1010, - "mean_reward": 31.862056040763854, - "mean_length": 2872.71, - "loss": 3.4220612049102783, - "sps": 266.50791667174065 + "num_episodes": 369, + "mean_reward": 865.2751739215851, + "mean_length": 10221.97, + "survival_pct": 0.4088788, + "max_steps": 25000, + "loss": 4.221797943115234, + "sps": 550.4274763185425 }, { "update": 755, "global_step": 3092480, - "num_episodes": 1019, - "mean_reward": 28.9614812707901, - "mean_length": 2699.59, - "loss": 1.2745823860168457, - "sps": 753.5067688712668 + "num_episodes": 369, + "mean_reward": 865.2751739215851, + "mean_length": 10221.97, + "survival_pct": 0.4088788, + "max_steps": 25000, + "loss": 1.761741280555725, + "sps": 498.20561049907593 }, { "update": 760, "global_step": 3112960, - "num_episodes": 1020, - "mean_reward": 23.99162916660309, - "mean_length": 2699.59, - "loss": 0.8926928043365479, - "sps": 1444.3258733361963 + "num_episodes": 369, + "mean_reward": 865.2751739215851, + "mean_length": 10221.97, + "survival_pct": 0.4088788, + "max_steps": 25000, + "loss": 58.8026008605957, + "sps": 521.3350619859364 }, { "update": 765, "global_step": 3133440, - "num_episodes": 1024, - "mean_reward": 30.025504064559936, - "mean_length": 2889.38, - "loss": 1.112790822982788, - "sps": 756.883715254125 + "num_episodes": 370, + "mean_reward": 860.6973748493194, + "mean_length": 10271.97, + "survival_pct": 0.4108788, + "max_steps": 25000, + "loss": 1.9308984279632568, + "sps": 438.4037568068571 }, { "update": 770, "global_step": 3153920, - "num_episodes": 1034, - "mean_reward": 36.13991012096405, - "mean_length": 3187.15, - "loss": 0.43345746397972107, - "sps": 419.40821109391015 + "num_episodes": 373, + "mean_reward": 770.2798270845414, + "mean_length": 9927.86, + "survival_pct": 0.39711440000000003, + "max_steps": 25000, + "loss": 2.7318155765533447, + "sps": 348.45193944086344 }, { "update": 775, "global_step": 3174400, - "num_episodes": 1050, - "mean_reward": 32.24160755157471, - "mean_length": 2985.91, - "loss": 0.13569332659244537, - "sps": 317.4046328348688 + "num_episodes": 377, + "mean_reward": 806.3474672365188, + "mean_length": 10020.65, + "survival_pct": 0.40082599999999996, + "max_steps": 25000, + "loss": 0.32981979846954346, + "sps": 555.3298506322647 }, { "update": 780, "global_step": 3194880, - "num_episodes": 1061, - "mean_reward": 30.67223885536194, - "mean_length": 2711.19, - "loss": 0.2064545899629593, - "sps": 303.1813456850613 + "num_episodes": 383, + "mean_reward": 853.3938677740097, + "mean_length": 10073.78, + "survival_pct": 0.4029512, + "max_steps": 25000, + "loss": 0.16146810352802277, + "sps": 1099.1025857576844 }, { "update": 785, "global_step": 3215360, - "num_episodes": 1072, - "mean_reward": 29.705427560806275, - "mean_length": 2524.86, - "loss": 4.488283634185791, - "sps": 913.592632749929 + "num_episodes": 383, + "mean_reward": 853.3938677740097, + "mean_length": 10073.78, + "survival_pct": 0.4029512, + "max_steps": 25000, + "loss": -0.09470260143280029, + "sps": 1762.63191717742 }, { "update": 790, "global_step": 3235840, - "num_episodes": 1085, - "mean_reward": 32.955479860305786, - "mean_length": 2618.62, - "loss": 0.05505555868148804, - "sps": 607.269974208857 + "num_episodes": 384, + "mean_reward": 853.8934272527695, + "mean_length": 10123.78, + "survival_pct": 0.4049512, + "max_steps": 25000, + "loss": -0.06440502405166626, + "sps": 1195.8323452559932 }, { "update": 795, "global_step": 3256320, - "num_episodes": 1097, - "mean_reward": 31.909876976013184, - "mean_length": 2214.73, - "loss": 1.9556808471679688, - "sps": 747.2261381867389 + "num_episodes": 384, + "mean_reward": 853.8934272527695, + "mean_length": 10123.78, + "survival_pct": 0.4049512, + "max_steps": 25000, + "loss": -0.15972009301185608, + "sps": 888.4678009939021 }, { "update": 800, "global_step": 3276800, - "num_episodes": 1106, - "mean_reward": 29.67423951148987, - "mean_length": 1915.87, - "loss": 2.1206791400909424, - "sps": 701.3988516159834 + "num_episodes": 388, + "mean_reward": 838.5986885023117, + "mean_length": 10010.11, + "survival_pct": 0.40040440000000005, + "max_steps": 25000, + "loss": 0.09807762503623962, + "sps": 471.1430031097595 }, { "update": 805, "global_step": 3297280, - "num_episodes": 1110, - "mean_reward": 30.086654043197633, - "mean_length": 2005.75, - "loss": 0.5833985805511475, - "sps": 660.5544733393301 + "num_episodes": 388, + "mean_reward": 838.5986885023117, + "mean_length": 10010.11, + "survival_pct": 0.33367033333333335, + "max_steps": 30000, + "loss": -0.0691152960062027, + "sps": 1265.7956126806866 }, { "update": 810, "global_step": 3317760, - "num_episodes": 1115, - "mean_reward": 32.238635430336, - "mean_length": 2171.27, - "loss": 0.4146590232849121, - "sps": 1094.4092641494085 + "num_episodes": 388, + "mean_reward": 838.5986885023117, + "mean_length": 10010.11, + "survival_pct": 0.33367033333333335, + "max_steps": 30000, + "loss": -0.1727883517742157, + "sps": 1231.865460337574 }, { "update": 815, "global_step": 3338240, - "num_episodes": 1120, - "mean_reward": 33.369399318695066, - "mean_length": 2262.48, - "loss": 2.585177183151245, - "sps": 901.8559204378539 + "num_episodes": 388, + "mean_reward": 838.5986885023117, + "mean_length": 10010.11, + "survival_pct": 0.33367033333333335, + "max_steps": 30000, + "loss": -0.11673803627490997, + "sps": 1256.6835286888843 }, { "update": 820, "global_step": 3358720, - "num_episodes": 1130, - "mean_reward": 27.537890434265137, - "mean_length": 2050.2, - "loss": 0.5106171369552612, - "sps": 838.9275291477041 + "num_episodes": 388, + "mean_reward": 838.5986885023117, + "mean_length": 10010.11, + "survival_pct": 0.33367033333333335, + "max_steps": 30000, + "loss": -0.2521955370903015, + "sps": 1137.9313207515677 }, { "update": 825, "global_step": 3379200, - "num_episodes": 1136, - "mean_reward": 23.029513311386108, - "mean_length": 2161.34, - "loss": 4.071342468261719, - "sps": 1617.4028966230667 + "num_episodes": 391, + "mean_reward": 839.7568208217621, + "mean_length": 10100.5, + "survival_pct": 0.33668333333333333, + "max_steps": 30000, + "loss": -0.10739608108997345, + "sps": 1220.8177867257723 }, { "update": 830, "global_step": 3399680, - "num_episodes": 1138, - "mean_reward": 23.361328144073486, - "mean_length": 2259.74, - "loss": 0.30649739503860474, - "sps": 489.67958732294835 + "num_episodes": 395, + "mean_reward": 839.0241325330734, + "mean_length": 10101.66, + "survival_pct": 0.336722, + "max_steps": 30000, + "loss": 18.019046783447266, + "sps": 656.125042039298 }, { "update": 835, "global_step": 3420160, - "num_episodes": 1146, - "mean_reward": 29.256564235687257, - "mean_length": 2379.17, - "loss": 0.2716136574745178, - "sps": 789.1092351378779 + "num_episodes": 402, + "mean_reward": 813.9229806566238, + "mean_length": 9366.46, + "survival_pct": 0.3122153333333333, + "max_steps": 30000, + "loss": 3.2142348289489746, + "sps": 348.9181765210399 }, { "update": 840, "global_step": 3440640, - "num_episodes": 1152, - "mean_reward": 30.681161608695984, - "mean_length": 2564.53, - "loss": 3.510643720626831, - "sps": 465.91684356996774 + "num_episodes": 408, + "mean_reward": 770.4422649216652, + "mean_length": 8932.2, + "survival_pct": 0.29774, + "max_steps": 30000, + "loss": 29.716121673583984, + "sps": 258.20931803063246 }, { "update": 845, "global_step": 3461120, - "num_episodes": 1157, - "mean_reward": 31.814847540855407, - "mean_length": 2661.46, - "loss": 0.2368174046278, - "sps": 970.5356676833453 + "num_episodes": 409, + "mean_reward": 767.6314169716835, + "mean_length": 8691.39, + "survival_pct": 0.289713, + "max_steps": 30000, + "loss": 0.5027515888214111, + "sps": 910.4499991149808 }, { "update": 850, "global_step": 3481600, - "num_episodes": 1158, - "mean_reward": 32.19375528335571, - "mean_length": 2760.37, - "loss": 1.0439393520355225, - "sps": 2118.0253570743016 + "num_episodes": 412, + "mean_reward": 755.9645525097847, + "mean_length": 8707.21, + "survival_pct": 0.2902403333333333, + "max_steps": 30000, + "loss": 2.889087438583374, + "sps": 492.2319923662775 }, { "update": 855, "global_step": 3502080, - "num_episodes": 1167, - "mean_reward": 32.78835594654083, - "mean_length": 2934.75, - "loss": 0.11038707196712494, - "sps": 373.62636209376416 + "num_episodes": 415, + "mean_reward": 755.2824851679802, + "mean_length": 8717.11, + "survival_pct": 0.2905703333333334, + "max_steps": 30000, + "loss": -0.22625428438186646, + "sps": 1029.9811438873032 }, { "update": 860, "global_step": 3522560, - "num_episodes": 1171, - "mean_reward": 38.54492960453033, - "mean_length": 3055.33, - "loss": 0.27012330293655396, - "sps": 811.5381951612115 + "num_episodes": 420, + "mean_reward": 659.1974053931236, + "mean_length": 9077.37, + "survival_pct": 0.30257900000000004, + "max_steps": 30000, + "loss": 2.288820505142212, + "sps": 1016.411393455018 }, { "update": 865, "global_step": 3543040, - "num_episodes": 1182, - "mean_reward": 38.79421305179596, - "mean_length": 3135.05, - "loss": 0.8694961071014404, - "sps": 643.9358039841488 + "num_episodes": 420, + "mean_reward": 659.1974053931236, + "mean_length": 9077.37, + "survival_pct": 0.30257900000000004, + "max_steps": 30000, + "loss": 0.2482612133026123, + "sps": 1109.4319732205106 }, { "update": 870, "global_step": 3563520, - "num_episodes": 1187, - "mean_reward": 37.14653766155243, - "mean_length": 3053.45, - "loss": 1.2393195629119873, - "sps": 871.9844580529647 + "num_episodes": 420, + "mean_reward": 659.1974053931236, + "mean_length": 9077.37, + "survival_pct": 0.30257900000000004, + "max_steps": 30000, + "loss": 0.09613563120365143, + "sps": 1196.7584168005542 }, { "update": 875, "global_step": 3584000, - "num_episodes": 1194, - "mean_reward": 40.78861089706421, - "mean_length": 3345.54, - "loss": 0.6096416115760803, - "sps": 304.93330977862365 + "num_episodes": 420, + "mean_reward": 659.1974053931236, + "mean_length": 9077.37, + "survival_pct": 0.30257900000000004, + "max_steps": 30000, + "loss": 0.2699776887893677, + "sps": 1128.2554610475702 }, { "update": 880, "global_step": 3604480, - "num_episodes": 1201, - "mean_reward": 39.81052246570587, - "mean_length": 3252.28, - "loss": 0.6608580946922302, - "sps": 677.495925390962 + "num_episodes": 420, + "mean_reward": 659.1974053931236, + "mean_length": 9077.37, + "survival_pct": 0.30257900000000004, + "max_steps": 30000, + "loss": 0.020249858498573303, + "sps": 1162.0742075936244 }, { "update": 885, "global_step": 3624960, - "num_episodes": 1215, - "mean_reward": 38.70529013156891, - "mean_length": 3165.08, - "loss": 1.2741384506225586, - "sps": 854.5639976310788 + "num_episodes": 421, + "mean_reward": 662.2708982825279, + "mean_length": 9375.82, + "survival_pct": 0.3125273333333333, + "max_steps": 30000, + "loss": 0.4013591408729553, + "sps": 1113.5340322378984 }, { "update": 890, "global_step": 3645440, - "num_episodes": 1215, - "mean_reward": 38.70529013156891, - "mean_length": 3165.08, - "loss": 0.5457536578178406, - "sps": 1511.837849491964 + "num_episodes": 422, + "mean_reward": 665.2683095526695, + "mean_length": 9673.23, + "survival_pct": 0.322441, + "max_steps": 30000, + "loss": 2.35748028755188, + "sps": 803.983802083326 }, { "update": 895, "global_step": 3665920, - "num_episodes": 1220, - "mean_reward": 39.244035544395445, - "mean_length": 3264.42, - "loss": 0.6924870014190674, - "sps": 556.2715786447075 + "num_episodes": 425, + "mean_reward": 654.4653234362602, + "mean_length": 9739.55, + "survival_pct": 0.3246516666666666, + "max_steps": 30000, + "loss": 19.605010986328125, + "sps": 421.5912921054319 }, { "update": 900, "global_step": 3686400, - "num_episodes": 1232, - "mean_reward": 46.50887234210968, - "mean_length": 3173.4, - "loss": 1.7522594928741455, - "sps": 699.9425408800316 + "num_episodes": 428, + "mean_reward": 590.155419728756, + "mean_length": 10093.48, + "survival_pct": 0.3364493333333333, + "max_steps": 30000, + "loss": 38.713653564453125, + "sps": 564.4312985780526 }, { "update": 905, "global_step": 3706880, - "num_episodes": 1245, - "mean_reward": 41.68342576980591, - "mean_length": 2950.87, - "loss": 1.1043925285339355, - "sps": 375.42107758469984 + "num_episodes": 428, + "mean_reward": 590.155419728756, + "mean_length": 10093.48, + "survival_pct": 0.3364493333333333, + "max_steps": 30000, + "loss": 136.64389038085938, + "sps": 613.1553074617179 }, { "update": 910, "global_step": 3727360, - "num_episodes": 1257, - "mean_reward": 38.028645734786984, - "mean_length": 2550.61, - "loss": 0.11873626708984375, - "sps": 856.8550488972196 + "num_episodes": 429, + "mean_reward": 590.9192177844047, + "mean_length": 10167.56, + "survival_pct": 0.33891866666666665, + "max_steps": 30000, + "loss": 8.999781608581543, + "sps": 511.63656492456863 }, { "update": 915, "global_step": 3747840, - "num_episodes": 1265, - "mean_reward": 38.954245281219485, - "mean_length": 2549.56, - "loss": 0.31198206543922424, - "sps": 690.3971885269259 + "num_episodes": 431, + "mean_reward": 590.5451223254204, + "mean_length": 10116.26, + "survival_pct": 0.33720866666666666, + "max_steps": 30000, + "loss": 49.023075103759766, + "sps": 506.30244315776105 }, { "update": 920, "global_step": 3768320, - "num_episodes": 1269, - "mean_reward": 34.42387727260589, - "mean_length": 2518.34, - "loss": 1.7575799226760864, - "sps": 1152.958968508612 + "num_episodes": 434, + "mean_reward": 599.4833398604393, + "mean_length": 10470.43, + "survival_pct": 0.3490143333333333, + "max_steps": 30000, + "loss": 33.072776794433594, + "sps": 346.8145463929963 }, { "update": 925, "global_step": 3788800, - "num_episodes": 1274, - "mean_reward": 34.58799042224884, - "mean_length": 2520.98, - "loss": 0.3646126985549927, - "sps": 1100.5795244909316 + "num_episodes": 436, + "mean_reward": 494.2830310034752, + "mean_length": 10279.47, + "survival_pct": 0.342649, + "max_steps": 30000, + "loss": 0.7968235015869141, + "sps": 973.2809780471914 }, { "update": 930, "global_step": 3809280, - "num_episodes": 1276, - "mean_reward": 36.83985797405243, - "mean_length": 2620.63, - "loss": 0.5593718886375427, - "sps": 999.9135216052517 + "num_episodes": 436, + "mean_reward": 494.2830310034752, + "mean_length": 10279.47, + "survival_pct": 0.342649, + "max_steps": 30000, + "loss": -0.05825723707675934, + "sps": 966.3113177290679 }, { "update": 935, "global_step": 3829760, - "num_episodes": 1292, - "mean_reward": 36.57479173660278, - "mean_length": 2520.6, - "loss": 0.009059503674507141, - "sps": 1097.323441932561 + "num_episodes": 436, + "mean_reward": 494.2830310034752, + "mean_length": 10279.47, + "survival_pct": 0.342649, + "max_steps": 30000, + "loss": -0.10611464828252792, + "sps": 1015.3673402421392 }, { "update": 940, "global_step": 3850240, - "num_episodes": 1297, - "mean_reward": 37.501826615333556, - "mean_length": 2614.74, - "loss": 0.686941385269165, - "sps": 1048.1224362748299 + "num_episodes": 437, + "mean_reward": 502.9565402960777, + "mean_length": 10567.82, + "survival_pct": 0.35226066666666667, + "max_steps": 30000, + "loss": -0.08356830477714539, + "sps": 944.3927727566447 }, { "update": 945, "global_step": 3870720, - "num_episodes": 1302, - "mean_reward": 38.2632510137558, - "mean_length": 2706.87, - "loss": 1.56045663356781, - "sps": 984.6772159375175 + "num_episodes": 439, + "mean_reward": 500.08963894605637, + "mean_length": 10318.88, + "survival_pct": 0.34396266666666664, + "max_steps": 30000, + "loss": 5.465578556060791, + "sps": 529.1770253242089 }, { "update": 950, "global_step": 3891200, - "num_episodes": 1303, - "mean_reward": 37.39876995563507, - "mean_length": 2611.05, - "loss": 1.817370891571045, - "sps": 662.4908033005096 + "num_episodes": 440, + "mean_reward": 503.0822184062004, + "mean_length": 10614.79, + "survival_pct": 0.35382633333333335, + "max_steps": 30000, + "loss": 3.838916301727295, + "sps": 760.7930670056104 }, { "update": 955, "global_step": 3911680, - "num_episodes": 1312, - "mean_reward": 38.347609777450565, - "mean_length": 2799.94, - "loss": 1.7936173677444458, - "sps": 498.5900701113524 + "num_episodes": 442, + "mean_reward": 467.75900787115097, + "mean_length": 10391.71, + "survival_pct": 0.3463903333333333, + "max_steps": 30000, + "loss": -0.04477877914905548, + "sps": 965.6263182051849 }, { "update": 960, "global_step": 3932160, - "num_episodes": 1319, - "mean_reward": 38.531728854179384, - "mean_length": 2800.38, - "loss": 0.8617715835571289, - "sps": 288.6257663929259 + "num_episodes": 445, + "mean_reward": 385.69126527786256, + "mean_length": 10244.33, + "survival_pct": 0.3414776666666667, + "max_steps": 30000, + "loss": 0.345672070980072, + "sps": 1268.264040705237 }, { "update": 965, "global_step": 3952640, - "num_episodes": 1325, - "mean_reward": 32.38980568408966, - "mean_length": 2706.94, - "loss": 0.410921573638916, - "sps": 729.5250188646075 + "num_episodes": 445, + "mean_reward": 385.69126527786256, + "mean_length": 10244.33, + "survival_pct": 0.3414776666666667, + "max_steps": 30000, + "loss": -0.09349031746387482, + "sps": 1253.8499203200772 }, { "update": 970, "global_step": 3973120, - "num_episodes": 1330, - "mean_reward": 33.02072194576263, - "mean_length": 2800.87, - "loss": 0.6215076446533203, - "sps": 913.5447323136685 + "num_episodes": 446, + "mean_reward": 388.8185606575012, + "mean_length": 10516.85, + "survival_pct": 0.35056166666666666, + "max_steps": 30000, + "loss": 9.252518653869629, + "sps": 855.3500954683526 }, { "update": 975, "global_step": 3993600, - "num_episodes": 1333, - "mean_reward": 34.48237750530243, - "mean_length": 2899.46, - "loss": 1.4754104614257812, - "sps": 857.6512350171981 + "num_episodes": 447, + "mean_reward": 386.00149038314817, + "mean_length": 10275.2, + "survival_pct": 0.3425066666666667, + "max_steps": 30000, + "loss": 21.241113662719727, + "sps": 950.7120230957267 }, { "update": 980, "global_step": 4014080, - "num_episodes": 1346, - "mean_reward": 35.15436544418335, - "mean_length": 3006.69, - "loss": 1.3220990896224976, - "sps": 275.0666378661058 + "num_episodes": 453, + "mean_reward": 415.23604825496676, + "mean_length": 10424.75, + "survival_pct": 0.34749166666666664, + "max_steps": 30000, + "loss": 6.508986473083496, + "sps": 715.3695692293137 }, { "update": 985, "global_step": 4034560, - "num_episodes": 1359, - "mean_reward": 36.966177105903625, - "mean_length": 3140.04, - "loss": 0.31860384345054626, - "sps": 723.735690218882 + "num_episodes": 458, + "mean_reward": 411.35455381393433, + "mean_length": 10193.61, + "survival_pct": 0.339787, + "max_steps": 30000, + "loss": 14.101386070251465, + "sps": 436.54294748826067 }, { "update": 990, "global_step": 4055040, - "num_episodes": 1368, - "mean_reward": 36.728344497680666, - "mean_length": 2959.73, - "loss": 5.1887078285217285, - "sps": 773.332744431613 + "num_episodes": 460, + "mean_reward": 352.4448234796524, + "mean_length": 9962.59, + "survival_pct": 0.3320863333333333, + "max_steps": 30000, + "loss": 5.007307529449463, + "sps": 657.8546600358904 }, { "update": 995, "global_step": 4075520, - "num_episodes": 1369, - "mean_reward": 35.78023895263672, - "mean_length": 2959.73, - "loss": 1.6374891996383667, - "sps": 2350.737845032927 + "num_episodes": 461, + "mean_reward": 352.60661952495576, + "mean_length": 9978.71, + "survival_pct": 0.33262366666666665, + "max_steps": 30000, + "loss": -0.05159700661897659, + "sps": 731.8566720639022 }, { "update": 1000, "global_step": 4096000, - "num_episodes": 1370, - "mean_reward": 35.719558029174806, - "mean_length": 2959.73, - "loss": 1.2606910467147827, - "sps": 2287.2771680470537 + "num_episodes": 461, + "mean_reward": 352.60661952495576, + "mean_length": 9978.71, + "survival_pct": 0.33262366666666665, + "max_steps": 30000, + "loss": -0.05063310265541077, + "sps": 1192.9198062233531 }, { "update": 1005, "global_step": 4116480, - "num_episodes": 1381, - "mean_reward": 37.67828846931457, - "mean_length": 2963.83, - "loss": 0.20636317133903503, - "sps": 1606.0294791853535 + "num_episodes": 461, + "mean_reward": 352.60661952495576, + "mean_length": 9978.71, + "survival_pct": 0.33262366666666665, + "max_steps": 30000, + "loss": -0.12464120984077454, + "sps": 1133.1887649612727 }, { "update": 1010, "global_step": 4136960, - "num_episodes": 1391, - "mean_reward": 39.33717452049255, - "mean_length": 3249.05, - "loss": 3.9951727390289307, - "sps": 418.63976772772594 + "num_episodes": 465, + "mean_reward": 371.26235566139223, + "mean_length": 10558.13, + "survival_pct": 0.35193766666666665, + "max_steps": 30000, + "loss": 7.669186115264893, + "sps": 701.8889529681794 }, { "update": 1015, "global_step": 4157440, - "num_episodes": 1399, - "mean_reward": 36.405463542938236, - "mean_length": 2962.77, - "loss": 1.837266445159912, - "sps": 683.5481201646958 + "num_episodes": 466, + "mean_reward": 375.76679421424865, + "mean_length": 10855.0, + "survival_pct": 0.36183333333333334, + "max_steps": 30000, + "loss": 12.382041931152344, + "sps": 489.4857959286267 }, { "update": 1020, "global_step": 4177920, - "num_episodes": 1401, - "mean_reward": 35.86304131507873, - "mean_length": 2879.03, - "loss": 2.9102792739868164, - "sps": 1089.8691629385837 + "num_episodes": 469, + "mean_reward": 341.31604763984683, + "mean_length": 10927.42, + "survival_pct": 0.3642473333333333, + "max_steps": 30000, + "loss": 19.83820915222168, + "sps": 590.4760026903639 }, { "update": 1025, "global_step": 4198400, - "num_episodes": 1416, - "mean_reward": 34.70472603797913, - "mean_length": 2801.68, - "loss": 0.9925887584686279, - "sps": 255.81352094643265 + "num_episodes": 473, + "mean_reward": 331.36366960048673, + "mean_length": 10432.91, + "survival_pct": 0.34776366666666664, + "max_steps": 30000, + "loss": 183.8989715576172, + "sps": 434.18935040659056 }, { "update": 1030, "global_step": 4218880, - "num_episodes": 1425, - "mean_reward": 33.617551488876344, - "mean_length": 2725.68, - "loss": 1.9214203357696533, - "sps": 737.2169737476238 + "num_episodes": 475, + "mean_reward": 303.6989562559128, + "mean_length": 10480.3, + "survival_pct": 0.3493433333333333, + "max_steps": 30000, + "loss": 139.46194458007812, + "sps": 590.2434550847422 }, { "update": 1035, "global_step": 4239360, - "num_episodes": 1434, - "mean_reward": 34.879857649803164, - "mean_length": 2533.35, - "loss": 0.2052566111087799, - "sps": 579.7787814604097 + "num_episodes": 475, + "mean_reward": 303.6989562559128, + "mean_length": 10480.3, + "survival_pct": 0.3493433333333333, + "max_steps": 30000, + "loss": 4.460475444793701, + "sps": 539.7108366907394 }, { "update": 1040, "global_step": 4259840, - "num_episodes": 1434, - "mean_reward": 34.879857649803164, - "mean_length": 2533.35, - "loss": 1.6612746715545654, - "sps": 1248.33251762393 + "num_episodes": 477, + "mean_reward": 304.03712359905245, + "mean_length": 10529.43, + "survival_pct": 0.350981, + "max_steps": 30000, + "loss": 41.53654098510742, + "sps": 570.0864654535922 }, { "update": 1045, "global_step": 4280320, - "num_episodes": 1445, - "mean_reward": 39.72180326938629, - "mean_length": 2610.11, - "loss": 4.8466691970825195, - "sps": 656.9837380754918 + "num_episodes": 482, + "mean_reward": 257.8429533290863, + "mean_length": 10591.03, + "survival_pct": 0.35303433333333334, + "max_steps": 30000, + "loss": 12.68658447265625, + "sps": 306.11899074758105 }, { "update": 1050, "global_step": 4300800, - "num_episodes": 1450, - "mean_reward": 42.0785810136795, - "mean_length": 2811.63, - "loss": 2.377190351486206, - "sps": 683.0332370956824 + "num_episodes": 484, + "mean_reward": 258.245273809433, + "mean_length": 10666.92, + "survival_pct": 0.355564, + "max_steps": 30000, + "loss": 34.416778564453125, + "sps": 343.9275197061659 }, { "update": 1055, "global_step": 4321280, - "num_episodes": 1461, - "mean_reward": 39.593364839553836, - "mean_length": 2808.07, - "loss": 0.6159077882766724, - "sps": 526.5738589213908 + "num_episodes": 484, + "mean_reward": 258.245273809433, + "mean_length": 10666.92, + "survival_pct": 0.355564, + "max_steps": 30000, + "loss": 0.8467625975608826, + "sps": 529.8299357550097 }, { "update": 1060, "global_step": 4341760, - "num_episodes": 1461, - "mean_reward": 39.593364839553836, - "mean_length": 2808.07, - "loss": 0.8465878963470459, - "sps": 2184.9711988588206 + "num_episodes": 484, + "mean_reward": 258.245273809433, + "mean_length": 10666.92, + "survival_pct": 0.355564, + "max_steps": 30000, + "loss": 0.5682471990585327, + "sps": 729.0597707102293 }, { "update": 1065, "global_step": 4362240, - "num_episodes": 1468, - "mean_reward": 41.406082754135134, - "mean_length": 2993.5, - "loss": 2.0758755207061768, - "sps": 756.2821057624998 + "num_episodes": 484, + "mean_reward": 258.245273809433, + "mean_length": 10666.92, + "survival_pct": 0.355564, + "max_steps": 30000, + "loss": 0.36413297057151794, + "sps": 703.8506900902968 }, { "update": 1070, "global_step": 4382720, - "num_episodes": 1475, - "mean_reward": 35.40320841789246, - "mean_length": 2801.81, - "loss": 9.254124641418457, - "sps": 1308.8266389540704 + "num_episodes": 491, + "mean_reward": 255.56333970069886, + "mean_length": 10467.79, + "survival_pct": 0.34892633333333334, + "max_steps": 30000, + "loss": 30.412242889404297, + "sps": 257.45270078331555 }, { "update": 1075, "global_step": 4403200, - "num_episodes": 1485, - "mean_reward": 40.63698256492615, - "mean_length": 2897.46, - "loss": 0.5495067834854126, - "sps": 799.4087672915922 + "num_episodes": 491, + "mean_reward": 255.56333970069886, + "mean_length": 10467.79, + "survival_pct": 0.34892633333333334, + "max_steps": 30000, + "loss": 0.4469672739505768, + "sps": 393.6182671059828 }, { "update": 1080, "global_step": 4423680, - "num_episodes": 1485, - "mean_reward": 40.63698256492615, - "mean_length": 2897.46, - "loss": 0.5127925872802734, - "sps": 1340.6154629856217 + "num_episodes": 493, + "mean_reward": 270.52365421295167, + "mean_length": 10466.3, + "survival_pct": 0.34887666666666667, + "max_steps": 30000, + "loss": 1.5025949478149414, + "sps": 297.01417371765183 }, { "update": 1085, "global_step": 4444160, - "num_episodes": 1486, - "mean_reward": 41.25436541080475, - "mean_length": 2996.57, - "loss": 0.2694500684738159, - "sps": 1837.4922707085245 + "num_episodes": 494, + "mean_reward": 351.42790958404544, + "mean_length": 10466.3, + "survival_pct": 0.34887666666666667, + "max_steps": 30000, + "loss": 713.0115356445312, + "sps": 337.53102630233496 }, { "update": 1090, "global_step": 4464640, - "num_episodes": 1495, - "mean_reward": 46.52874216079712, - "mean_length": 3122.1, - "loss": 10.304019927978516, - "sps": 1046.1438942302716 + "num_episodes": 501, + "mean_reward": 389.6291408967972, + "mean_length": 10484.48, + "survival_pct": 0.34948266666666666, + "max_steps": 30000, + "loss": 376.2599182128906, + "sps": 322.42891915898485 }, { "update": 1095, "global_step": 4485120, - "num_episodes": 1509, - "mean_reward": 47.41412097454071, - "mean_length": 3088.91, - "loss": 4.403233051300049, - "sps": 705.1397561929946 + "num_episodes": 503, + "mean_reward": 389.9803589296341, + "mean_length": 10507.49, + "survival_pct": 0.3502496666666667, + "max_steps": 30000, + "loss": 1246.39453125, + "sps": 366.59931296887805 }, { "update": 1100, "global_step": 4505600, - "num_episodes": 1509, - "mean_reward": 47.41412097454071, - "mean_length": 3088.91, - "loss": 5.204225540161133, - "sps": 956.5984516573334 + "num_episodes": 503, + "mean_reward": 389.9803589296341, + "mean_length": 10507.49, + "survival_pct": 0.3502496666666667, + "max_steps": 30000, + "loss": 0.49186083674430847, + "sps": 434.33848528937693 }, { "update": 1105, "global_step": 4526080, - "num_episodes": 1514, - "mean_reward": 46.740014991760255, - "mean_length": 3048.45, - "loss": 1.3485753536224365, - "sps": 2885.439969795025 + "num_episodes": 505, + "mean_reward": 402.4173453474045, + "mean_length": 10496.98, + "survival_pct": 0.34989933333333334, + "max_steps": 30000, + "loss": 17.757164001464844, + "sps": 288.5970778332255 }, { "update": 1110, "global_step": 4546560, - "num_episodes": 1520, - "mean_reward": 55.2595415019989, - "mean_length": 3313.8, - "loss": 2.248853921890259, - "sps": 307.51773062645026 + "num_episodes": 508, + "mean_reward": 443.8068909239769, + "mean_length": 11091.98, + "survival_pct": 0.36973266666666665, + "max_steps": 30000, + "loss": 69.42852783203125, + "sps": 284.1876581819077 }, { "update": 1115, "global_step": 4567040, - "num_episodes": 1535, - "mean_reward": 45.75344041824341, - "mean_length": 3241.81, - "loss": 1.3050477504730225, - "sps": 349.05706197896666 + "num_episodes": 508, + "mean_reward": 443.8068909239769, + "mean_length": 11091.98, + "survival_pct": 0.36973266666666665, + "max_steps": 30000, + "loss": 1.8417774438858032, + "sps": 318.68963440900745 }, { "update": 1120, "global_step": 4587520, - "num_episodes": 1538, - "mean_reward": 48.896285104751584, - "mean_length": 3154.78, - "loss": 1.0057706832885742, - "sps": 741.8276433683515 + "num_episodes": 511, + "mean_reward": 441.4550348258019, + "mean_length": 10837.05, + "survival_pct": 0.361235, + "max_steps": 30000, + "loss": 255.734619140625, + "sps": 297.64729649475885 }, { "update": 1125, "global_step": 4608000, - "num_episodes": 1543, - "mean_reward": 49.45138314723968, - "mean_length": 3312.76, - "loss": 3.0420098304748535, - "sps": 267.5648126417643 + "num_episodes": 511, + "mean_reward": 441.4550348258019, + "mean_length": 10837.05, + "survival_pct": 0.361235, + "max_steps": 30000, + "loss": 1.7074986696243286, + "sps": 348.6616523408526 }, { "update": 1130, "global_step": 4628480, - "num_episodes": 1554, - "mean_reward": 45.64340697288513, - "mean_length": 2967.91, - "loss": 0.534530758857727, - "sps": 595.2909007117904 + "num_episodes": 514, + "mean_reward": 492.0692998147011, + "mean_length": 11430.47, + "survival_pct": 0.38101566666666664, + "max_steps": 30000, + "loss": 2.311823844909668, + "sps": 406.9030800405532 }, { "update": 1135, "global_step": 4648960, - "num_episodes": 1566, - "mean_reward": 46.33692901611328, - "mean_length": 3175.93, - "loss": 12.374032974243164, - "sps": 273.2022330683734 + "num_episodes": 515, + "mean_reward": 563.7019203495979, + "mean_length": 11721.54, + "survival_pct": 0.390718, + "max_steps": 30000, + "loss": 1047.7276611328125, + "sps": 314.10929898406806 }, { "update": 1140, "global_step": 4669440, - "num_episodes": 1574, - "mean_reward": 45.48817714929581, - "mean_length": 3007.35, - "loss": 9.789308547973633, - "sps": 468.42362264683504 + "num_episodes": 516, + "mean_reward": 560.5978558659554, + "mean_length": 11438.6, + "survival_pct": 0.38128666666666666, + "max_steps": 30000, + "loss": 9.904751777648926, + "sps": 403.9643544594583 }, { "update": 1145, "global_step": 4689920, - "num_episodes": 1579, - "mean_reward": 39.85454882383347, - "mean_length": 2862.76, - "loss": 14.043472290039062, - "sps": 685.1921972366682 + "num_episodes": 516, + "mean_reward": 560.5978558659554, + "mean_length": 11438.6, + "survival_pct": 0.38128666666666666, + "max_steps": 30000, + "loss": 0.4376460909843445, + "sps": 554.0697388186311 }, { "update": 1150, "global_step": 4710400, - "num_episodes": 1592, - "mean_reward": 33.700903475284576, - "mean_length": 2480.53, - "loss": 8.177810668945312, - "sps": 333.4487379606718 + "num_episodes": 518, + "mean_reward": 564.0319487595558, + "mean_length": 11749.21, + "survival_pct": 0.3916403333333333, + "max_steps": 30000, + "loss": 0.26738616824150085, + "sps": 579.422583932766 }, { "update": 1155, "global_step": 4730880, - "num_episodes": 1607, - "mean_reward": 32.822717969417575, - "mean_length": 2485.88, - "loss": 58.6836051940918, - "sps": 320.2091534142309 + "num_episodes": 519, + "mean_reward": 564.0318553757668, + "mean_length": 11749.21, + "survival_pct": 0.3916403333333333, + "max_steps": 30000, + "loss": 2.65568208694458, + "sps": 435.32093187971185 }, { "update": 1160, "global_step": 4751360, - "num_episodes": 1613, - "mean_reward": 32.850986263751984, - "mean_length": 2471.11, - "loss": 4.779659748077393, - "sps": 886.848895232356 + "num_episodes": 520, + "mean_reward": 606.8646422314644, + "mean_length": 12037.17, + "survival_pct": 0.401239, + "max_steps": 30000, + "loss": 0.08337657153606415, + "sps": 480.58739931645295 }, { "update": 1165, "global_step": 4771840, - "num_episodes": 1616, - "mean_reward": 30.757194340229034, - "mean_length": 2379.02, - "loss": 0.5000092387199402, - "sps": 1201.649782869044 + "num_episodes": 523, + "mean_reward": 600.3835196709633, + "mean_length": 11435.34, + "survival_pct": 0.381178, + "max_steps": 30000, + "loss": 8.820674896240234, + "sps": 284.63083015732 }, { "update": 1170, "global_step": 4792320, - "num_episodes": 1618, - "mean_reward": 31.608184831142424, - "mean_length": 2481.05, - "loss": 0.5737665891647339, - "sps": 999.5698665674614 + "num_episodes": 524, + "mean_reward": 612.3443924736977, + "mean_length": 11435.34, + "survival_pct": 0.381178, + "max_steps": 30000, + "loss": 34.61191940307617, + "sps": 250.67135617300423 }, { "update": 1175, "global_step": 4812800, - "num_episodes": 1623, - "mean_reward": 32.650640833377835, - "mean_length": 2658.87, - "loss": 3.0942459106445312, - "sps": 616.1316245054412 + "num_episodes": 525, + "mean_reward": 612.3594747567176, + "mean_length": 11474.33, + "survival_pct": 0.38247766666666666, + "max_steps": 30000, + "loss": 1.2452768087387085, + "sps": 607.4645366539573 }, { "update": 1180, "global_step": 4833280, - "num_episodes": 1625, - "mean_reward": 37.96194513559342, - "mean_length": 2682.02, - "loss": 9.840081214904785, - "sps": 698.9043221988104 + "num_episodes": 527, + "mean_reward": 692.7924189066887, + "mean_length": 11466.93, + "survival_pct": 0.382231, + "max_steps": 30000, + "loss": 0.4541545510292053, + "sps": 527.819759792867 }, { "update": 1185, "global_step": 4853760, - "num_episodes": 1630, - "mean_reward": 42.92790410280227, - "mean_length": 2817.92, - "loss": 1.0771808624267578, - "sps": 677.5959966787289 + "num_episodes": 527, + "mean_reward": 692.7924189066887, + "mean_length": 11466.93, + "survival_pct": 0.382231, + "max_steps": 30000, + "loss": 0.30217307806015015, + "sps": 510.7606822708712 }, { "update": 1190, "global_step": 4874240, - "num_episodes": 1631, - "mean_reward": 43.29853859186173, - "mean_length": 2913.27, - "loss": 0.15924929082393646, - "sps": 2128.8886043740195 + "num_episodes": 531, + "mean_reward": 708.1065727066994, + "mean_length": 11363.17, + "survival_pct": 0.3787723333333333, + "max_steps": 30000, + "loss": 0.9803248643875122, + "sps": 410.8110604092221 }, { "update": 1195, "global_step": 4894720, - "num_episodes": 1635, - "mean_reward": 44.563561956882474, - "mean_length": 3101.73, - "loss": 24.4128475189209, - "sps": 768.3773024267982 + "num_episodes": 531, + "mean_reward": 708.1065727066994, + "mean_length": 11363.17, + "survival_pct": 0.3787723333333333, + "max_steps": 30000, + "loss": 0.13086289167404175, + "sps": 615.3777693911919 }, { "update": 1200, "global_step": 4915200, - "num_episodes": 1642, - "mean_reward": 45.34813198566437, - "mean_length": 3112.72, - "loss": 8.374368667602539, - "sps": 976.3571543939305 + "num_episodes": 534, + "mean_reward": 699.0353853631019, + "mean_length": 11058.72, + "survival_pct": 0.36862399999999995, + "max_steps": 30000, + "loss": 712.1206665039062, + "sps": 388.8735852210615 }, { "update": 1205, "global_step": 4935680, - "num_episodes": 1649, - "mean_reward": 46.91295046806336, - "mean_length": 3360.16, - "loss": 13.855979919433594, - "sps": 264.6689562194928 + "num_episodes": 535, + "mean_reward": 698.9299844956398, + "mean_length": 11044.65, + "survival_pct": 0.27611625, + "max_steps": 40000, + "loss": 0.9771831035614014, + "sps": 687.4149876254356 }, { "update": 1210, "global_step": 4956160, - "num_episodes": 1661, - "mean_reward": 46.23843379974365, - "mean_length": 3281.89, - "loss": 95.5245590209961, - "sps": 248.04865845009095 + "num_episodes": 535, + "mean_reward": 698.9299844956398, + "mean_length": 11044.65, + "survival_pct": 0.27611625, + "max_steps": 40000, + "loss": 0.8561661243438721, + "sps": 675.4253456273739 }, { "update": 1215, "global_step": 4976640, - "num_episodes": 1669, - "mean_reward": 49.53000379562378, - "mean_length": 3152.88, - "loss": 92.37181091308594, - "sps": 241.45699781157901 + "num_episodes": 535, + "mean_reward": 698.9299844956398, + "mean_length": 11044.65, + "survival_pct": 0.27611625, + "max_steps": 40000, + "loss": 0.5291672945022583, + "sps": 747.9105659323011 }, { "update": 1220, "global_step": 4997120, - "num_episodes": 1675, - "mean_reward": 49.94117986917496, - "mean_length": 3244.89, - "loss": 36.136539459228516, - "sps": 332.50957910682797 + "num_episodes": 535, + "mean_reward": 698.9299844956398, + "mean_length": 11044.65, + "survival_pct": 0.27611625, + "max_steps": 40000, + "loss": 0.3995021879673004, + "sps": 710.3102224548963 }, { "update": 1225, "global_step": 5017600, - "num_episodes": 1681, - "mean_reward": 49.826350367069246, - "mean_length": 3322.41, - "loss": 4.504380702972412, - "sps": 317.38761005771676 + "num_episodes": 535, + "mean_reward": 698.9299844956398, + "mean_length": 11044.65, + "survival_pct": 0.27611625, + "max_steps": 40000, + "loss": 0.19824837148189545, + "sps": 714.1081614413822 }, { "update": 1230, "global_step": 5038080, - "num_episodes": 1686, - "mean_reward": 50.16007830858231, - "mean_length": 3383.77, - "loss": 11.077418327331543, - "sps": 614.5178546338719 + "num_episodes": 537, + "mean_reward": 694.6386151909828, + "mean_length": 11117.3, + "survival_pct": 0.27793249999999997, + "max_steps": 40000, + "loss": 0.6759960651397705, + "sps": 620.8841198425342 }, { "update": 1235, "global_step": 5058560, - "num_episodes": 1689, - "mean_reward": 50.77651453256607, - "mean_length": 3495.66, - "loss": 12.558845520019531, - "sps": 574.6067562429008 + "num_episodes": 543, + "mean_reward": 676.4015409398079, + "mean_length": 10875.12, + "survival_pct": 0.271878, + "max_steps": 40000, + "loss": 8.773111343383789, + "sps": 504.81741951884067 }, { "update": 1240, "global_step": 5079040, - "num_episodes": 1694, - "mean_reward": 51.09682610750198, - "mean_length": 3719.18, - "loss": 21.6842098236084, - "sps": 1195.0937330699562 + "num_episodes": 544, + "mean_reward": 655.3968575978279, + "mean_length": 10576.9, + "survival_pct": 0.2644225, + "max_steps": 40000, + "loss": 0.13716170191764832, + "sps": 568.9063484934894 }, { "update": 1245, "global_step": 5099520, - "num_episodes": 1698, - "mean_reward": 50.89644577741623, - "mean_length": 3737.48, - "loss": 65.59449768066406, - "sps": 836.6498465651391 + "num_episodes": 544, + "mean_reward": 655.3968575978279, + "mean_length": 10576.9, + "survival_pct": 0.2644225, + "max_steps": 40000, + "loss": -0.016505300998687744, + "sps": 639.2019983734826 }, { "update": 1250, "global_step": 5120000, - "num_episodes": 1701, - "mean_reward": 51.103903777599335, - "mean_length": 3865.3, - "loss": 20.571155548095703, - "sps": 835.7550802547468 + "num_episodes": 546, + "mean_reward": 697.8135006427765, + "mean_length": 11074.28, + "survival_pct": 0.276857, + "max_steps": 40000, + "loss": 44.272918701171875, + "sps": 571.4595742161331 }, { "update": 1255, "global_step": 5140480, - "num_episodes": 1705, - "mean_reward": 56.42267068624496, - "mean_length": 4036.09, - "loss": 65.08052825927734, - "sps": 391.16626869711587 + "num_episodes": 546, + "mean_reward": 697.8135006427765, + "mean_length": 11074.28, + "survival_pct": 0.276857, + "max_steps": 40000, + "loss": 402.4531555175781, + "sps": 630.3028273736365 }, { "update": 1260, "global_step": 5160960, - "num_episodes": 1709, - "mean_reward": 57.39474864244461, - "mean_length": 4054.32, - "loss": 9.735504150390625, - "sps": 279.94177596903813 + "num_episodes": 551, + "mean_reward": 682.566458747387, + "mean_length": 11177.02, + "survival_pct": 0.2794255, + "max_steps": 40000, + "loss": 59.98308181762695, + "sps": 566.8093758374978 }, { "update": 1265, "global_step": 5181440, - "num_episodes": 1715, - "mean_reward": 63.1245819735527, - "mean_length": 4262.64, - "loss": 473.1329040527344, - "sps": 195.8880726253604 + "num_episodes": 553, + "mean_reward": 696.4643092989921, + "mean_length": 10984.46, + "survival_pct": 0.27461149999999995, + "max_steps": 40000, + "loss": 1.1551482677459717, + "sps": 808.9894187333804 }, { "update": 1270, "global_step": 5201920, - "num_episodes": 1719, - "mean_reward": 56.77260554075241, - "mean_length": 4121.23, - "loss": 263.35589599609375, - "sps": 350.0223876268303 + "num_episodes": 556, + "mean_reward": 701.1732182240486, + "mean_length": 11378.53, + "survival_pct": 0.28446325, + "max_steps": 40000, + "loss": 2.668344020843506, + "sps": 746.5326784132175 }, { "update": 1275, "global_step": 5222400, - "num_episodes": 1726, - "mean_reward": 57.57951702833176, - "mean_length": 3960.67, - "loss": 59.782310485839844, - "sps": 412.90096002367255 + "num_episodes": 556, + "mean_reward": 701.1732182240486, + "mean_length": 11378.53, + "survival_pct": 0.28446325, + "max_steps": 40000, + "loss": 0.8112964630126953, + "sps": 834.4341431680355 }, { "update": 1280, "global_step": 5242880, - "num_episodes": 1736, - "mean_reward": 56.08834368228912, - "mean_length": 3644.63, - "loss": 52.62528991699219, - "sps": 343.349341248895 + "num_episodes": 559, + "mean_reward": 707.131958372593, + "mean_length": 11746.87, + "survival_pct": 0.29367175, + "max_steps": 40000, + "loss": 8.622824668884277, + "sps": 541.8830935414046 }, { "update": 1285, "global_step": 5263360, - "num_episodes": 1737, - "mean_reward": 55.905996737480166, - "mean_length": 3640.62, - "loss": 104.78556060791016, - "sps": 397.8813187415849 + "num_episodes": 560, + "mean_reward": 707.348245446682, + "mean_length": 11769.75, + "survival_pct": 0.29424375, + "max_steps": 40000, + "loss": 6.280955791473389, + "sps": 788.0408244951375 }, { "update": 1290, "global_step": 5283840, - "num_episodes": 1746, - "mean_reward": 59.82821818828583, - "mean_length": 3347.95, - "loss": 935.470947265625, - "sps": 355.2664513297179 + "num_episodes": 560, + "mean_reward": 707.348245446682, + "mean_length": 11769.75, + "survival_pct": 0.29424375, + "max_steps": 40000, + "loss": 0.17306624352931976, + "sps": 826.4544197536233 }, { "update": 1295, "global_step": 5304320, - "num_episodes": 1753, - "mean_reward": 86.4339063167572, - "mean_length": 3459.02, - "loss": 795.3289794921875, - "sps": 309.51331334453033 + "num_episodes": 560, + "mean_reward": 707.348245446682, + "mean_length": 11769.75, + "survival_pct": 0.29424375, + "max_steps": 40000, + "loss": -0.04585009068250656, + "sps": 839.4415230223722 }, { "update": 1300, "global_step": 5324800, - "num_episodes": 1762, - "mean_reward": 306.1682329654694, - "mean_length": 3702.99, - "loss": 621.1657104492188, - "sps": 312.8076116030221 + "num_episodes": 560, + "mean_reward": 707.348245446682, + "mean_length": 11769.75, + "survival_pct": 0.29424375, + "max_steps": 40000, + "loss": -0.1055583506822586, + "sps": 831.0648597366617 }, { "update": 1305, "global_step": 5345280, - "num_episodes": 1763, - "mean_reward": 306.26519484996794, - "mean_length": 3714.28, - "loss": 445.75, - "sps": 390.84962605893924 + "num_episodes": 560, + "mean_reward": 707.348245446682, + "mean_length": 11769.75, + "survival_pct": 0.29424375, + "max_steps": 40000, + "loss": 0.10219299793243408, + "sps": 850.5287420009836 }, { "update": 1310, "global_step": 5365760, - "num_episodes": 1772, - "mean_reward": 357.3265011548996, - "mean_length": 3776.52, - "loss": 1374.097412109375, - "sps": 385.8458185201923 + "num_episodes": 561, + "mean_reward": 711.5321604895591, + "mean_length": 12147.49, + "survival_pct": 0.30368725, + "max_steps": 40000, + "loss": 1.737269639968872, + "sps": 713.2928399136136 }, { "update": 1315, "global_step": 5386240, - "num_episodes": 1773, - "mean_reward": 357.70143792629244, - "mean_length": 3875.1, - "loss": 208.59083557128906, - "sps": 555.0694538042676 + "num_episodes": 563, + "mean_reward": 705.7887069511413, + "mean_length": 12247.84, + "survival_pct": 0.306196, + "max_steps": 40000, + "loss": 146.1320037841797, + "sps": 515.8274568639007 }, { "update": 1320, "global_step": 5406720, - "num_episodes": 1786, - "mean_reward": 362.7652148580551, - "mean_length": 3706.89, - "loss": 1506.414306640625, - "sps": 246.7002255816469 + "num_episodes": 563, + "mean_reward": 705.7887069511413, + "mean_length": 12247.84, + "survival_pct": 0.306196, + "max_steps": 40000, + "loss": 0.19398686289787292, + "sps": 800.8556253753457 }, { "update": 1325, "global_step": 5427200, - "num_episodes": 1786, - "mean_reward": 362.7652148580551, - "mean_length": 3706.89, - "loss": 46.25688934326172, - "sps": 409.5117865802824 + "num_episodes": 564, + "mean_reward": 722.067927532196, + "mean_length": 12347.84, + "survival_pct": 0.308696, + "max_steps": 40000, + "loss": 72.42268371582031, + "sps": 507.17764221119916 }, { "update": 1330, "global_step": 5447680, - "num_episodes": 1791, - "mean_reward": 501.35476023197174, - "mean_length": 3744.98, - "loss": 371.16973876953125, - "sps": 278.5499936271881 + "num_episodes": 567, + "mean_reward": 809.5898822021485, + "mean_length": 12150.75, + "survival_pct": 0.30376875, + "max_steps": 40000, + "loss": 0.290306031703949, + "sps": 764.8144386668907 }, { "update": 1335, "global_step": 5468160, - "num_episodes": 1793, - "mean_reward": 607.4379647493363, - "mean_length": 3921.02, - "loss": 73.33538818359375, - "sps": 388.619707385681 + "num_episodes": 567, + "mean_reward": 809.5898822021485, + "mean_length": 12150.75, + "survival_pct": 0.30376875, + "max_steps": 40000, + "loss": 0.8032262325286865, + "sps": 799.1206249673232 }, { "update": 1340, "global_step": 5488640, - "num_episodes": 1796, - "mean_reward": 756.3367283582687, - "mean_length": 4020.31, - "loss": 462.8284606933594, - "sps": 342.5094025627594 + "num_episodes": 568, + "mean_reward": 813.5851877593994, + "mean_length": 12546.64, + "survival_pct": 0.313666, + "max_steps": 40000, + "loss": 60.33620071411133, + "sps": 555.4755949712716 }, { "update": 1345, "global_step": 5509120, - "num_episodes": 1796, - "mean_reward": 756.3367283582687, - "mean_length": 4020.31, - "loss": 67.39126586914062, - "sps": 385.48904917796193 + "num_episodes": 570, + "mean_reward": 897.157964668274, + "mean_length": 13314.53, + "survival_pct": 0.33286325, + "max_steps": 40000, + "loss": 794.7894897460938, + "sps": 305.32909580240744 }, { "update": 1350, "global_step": 5529600, - "num_episodes": 1802, - "mean_reward": 869.4895800352097, - "mean_length": 3967.21, - "loss": 12.0632905960083, - "sps": 668.8793003173347 + "num_episodes": 571, + "mean_reward": 897.837958946228, + "mean_length": 13330.75, + "survival_pct": 0.33326875, + "max_steps": 40000, + "loss": -0.03776288032531738, + "sps": 738.9189775623264 }, { "update": 1355, "global_step": 5550080, - "num_episodes": 1804, - "mean_reward": 989.9085026121139, - "mean_length": 4063.23, - "loss": 86.83934783935547, - "sps": 418.3353031686608 + "num_episodes": 571, + "mean_reward": 897.837958946228, + "mean_length": 13330.75, + "survival_pct": 0.33326875, + "max_steps": 40000, + "loss": 0.003855481743812561, + "sps": 837.3194861665531 }, { "update": 1360, "global_step": 5570560, - "num_episodes": 1815, - "mean_reward": 1078.798788728714, - "mean_length": 3886.21, - "loss": 666.8345336914062, - "sps": 256.51104316818095 + "num_episodes": 571, + "mean_reward": 897.837958946228, + "mean_length": 13330.75, + "survival_pct": 0.33326875, + "max_steps": 40000, + "loss": -0.0835946649312973, + "sps": 852.7604200993885 }, { "update": 1365, "global_step": 5591040, - "num_episodes": 1815, - "mean_reward": 1078.798788728714, - "mean_length": 3886.21, - "loss": 900.6356811523438, - "sps": 495.47242571130397 + "num_episodes": 573, + "mean_reward": 901.498504357338, + "mean_length": 13730.89, + "survival_pct": 0.34327225, + "max_steps": 40000, + "loss": 0.6666049957275391, + "sps": 798.1656538062524 }, { "update": 1370, "global_step": 5611520, - "num_episodes": 1824, - "mean_reward": 1200.6963546466827, - "mean_length": 4027.44, - "loss": 20.821474075317383, - "sps": 506.7840907522114 + "num_episodes": 573, + "mean_reward": 901.498504357338, + "mean_length": 13730.89, + "survival_pct": 0.34327225, + "max_steps": 40000, + "loss": -0.11962562799453735, + "sps": 837.7882020187046 }, { "update": 1375, "global_step": 5632000, - "num_episodes": 1826, - "mean_reward": 1261.5812614822387, - "mean_length": 4015.04, - "loss": 4563.7763671875, - "sps": 675.537157736375 + "num_episodes": 573, + "mean_reward": 901.498504357338, + "mean_length": 13730.89, + "survival_pct": 0.34327225, + "max_steps": 40000, + "loss": 0.004459500312805176, + "sps": 823.1821543483084 }, { "update": 1380, "global_step": 5652480, - "num_episodes": 1836, - "mean_reward": 1445.384720082283, - "mean_length": 4027.89, - "loss": 459.9952697753906, - "sps": 341.6139254031597 + "num_episodes": 573, + "mean_reward": 901.498504357338, + "mean_length": 13730.89, + "survival_pct": 0.34327225, + "max_steps": 40000, + "loss": -0.17405246198177338, + "sps": 832.9659390943079 }, { "update": 1385, "global_step": 5672960, - "num_episodes": 1839, - "mean_reward": 1445.7170259952545, - "mean_length": 4039.25, - "loss": 1.4308533668518066, - "sps": 1488.9461962855103 + "num_episodes": 575, + "mean_reward": 940.8680406999588, + "mean_length": 13834.99, + "survival_pct": 0.34587475, + "max_steps": 40000, + "loss": 24.183181762695312, + "sps": 560.738665689664 }, { "update": 1390, "global_step": 5693440, - "num_episodes": 1856, - "mean_reward": 1429.1698562908173, - "mean_length": 3901.28, - "loss": 13.810539245605469, - "sps": 583.7912153704766 + "num_episodes": 575, + "mean_reward": 940.8680406999588, + "mean_length": 13834.99, + "survival_pct": 0.34587475, + "max_steps": 40000, + "loss": 0.48853129148483276, + "sps": 896.2171670714368 }, { "update": 1395, "global_step": 5713920, - "num_episodes": 1863, - "mean_reward": 1209.2014095544814, - "mean_length": 3639.94, - "loss": 273.29278564453125, - "sps": 645.3489718832685 + "num_episodes": 576, + "mean_reward": 982.5036016130448, + "mean_length": 13934.99, + "survival_pct": 0.34837475, + "max_steps": 40000, + "loss": 8.266661643981934, + "sps": 749.770689049747 }, { "update": 1400, "global_step": 5734400, - "num_episodes": 1871, - "mean_reward": 1319.8262105464935, - "mean_length": 3745.57, - "loss": 16.14458656311035, - "sps": 324.31654457005965 + "num_episodes": 576, + "mean_reward": 982.5036016130448, + "mean_length": 13934.99, + "survival_pct": 0.34837475, + "max_steps": 40000, + "loss": -0.013010233640670776, + "sps": 869.1728201417309 }, { "update": 1405, "global_step": 5754880, - "num_episodes": 1887, - "mean_reward": 1180.7784528923034, - "mean_length": 3295.57, - "loss": 3.5345799922943115, - "sps": 635.0555390978727 + "num_episodes": 579, + "mean_reward": 1029.220560479164, + "mean_length": 14034.37, + "survival_pct": 0.35085925, + "max_steps": 40000, + "loss": 29.333189010620117, + "sps": 539.5250549272407 }, { "update": 1410, "global_step": 5775360, - "num_episodes": 1897, - "mean_reward": 920.7941575908661, - "mean_length": 2829.74, - "loss": 29.106164932250977, - "sps": 283.5497895518039 + "num_episodes": 585, + "mean_reward": 1027.995834054947, + "mean_length": 13820.46, + "survival_pct": 0.34551149999999997, + "max_steps": 40000, + "loss": 18.8126220703125, + "sps": 439.6503781444294 }, { "update": 1415, "global_step": 5795840, - "num_episodes": 1899, - "mean_reward": 1015.2013060569764, - "mean_length": 2877.24, - "loss": 134.17953491210938, - "sps": 514.4558919437959 + "num_episodes": 585, + "mean_reward": 1027.995834054947, + "mean_length": 13820.46, + "survival_pct": 0.34551149999999997, + "max_steps": 40000, + "loss": 0.8571314811706543, + "sps": 639.3946228140344 }, { "update": 1420, "global_step": 5816320, - "num_episodes": 1905, - "mean_reward": 759.3930412435532, - "mean_length": 2493.3, - "loss": 71.0991439819336, - "sps": 407.36350916126725 + "num_episodes": 586, + "mean_reward": 1043.9769775485993, + "mean_length": 13920.46, + "survival_pct": 0.3480115, + "max_steps": 40000, + "loss": 17.923433303833008, + "sps": 384.01858038202676 }, { "update": 1425, "global_step": 5836800, - "num_episodes": 1923, - "mean_reward": 591.994899687767, - "mean_length": 2306.11, - "loss": 665.6765747070312, - "sps": 292.25754672826326 + "num_episodes": 587, + "mean_reward": 1106.1342781925202, + "mean_length": 14317.99, + "survival_pct": 0.35794975, + "max_steps": 40000, + "loss": 9.14981460571289, + "sps": 439.45182965039373 }, { "update": 1430, "global_step": 5857280, - "num_episodes": 1926, - "mean_reward": 491.9243581056595, - "mean_length": 2308.91, - "loss": 97.95196533203125, - "sps": 398.43841528471495 + "num_episodes": 591, + "mean_reward": 1116.412140932083, + "mean_length": 14771.51, + "survival_pct": 0.36928775, + "max_steps": 40000, + "loss": 1.7514079809188843, + "sps": 662.6907944527344 }, { "update": 1435, "global_step": 5877760, - "num_episodes": 1934, - "mean_reward": 396.85663065433505, - "mean_length": 2257.74, - "loss": 1123.9271240234375, - "sps": 467.5370774012981 + "num_episodes": 591, + "mean_reward": 1116.412140932083, + "mean_length": 14771.51, + "survival_pct": 0.36928775, + "max_steps": 40000, + "loss": 0.37396010756492615, + "sps": 903.1148649948647 }, { "update": 1440, "global_step": 5898240, - "num_episodes": 1939, - "mean_reward": 444.68491759777066, - "mean_length": 2252.07, - "loss": 149.9423065185547, - "sps": 397.96294200167154 + "num_episodes": 591, + "mean_reward": 1116.412140932083, + "mean_length": 14771.51, + "survival_pct": 0.36928775, + "max_steps": 40000, + "loss": 0.057983383536338806, + "sps": 850.523983894577 }, { "update": 1445, "global_step": 5918720, - "num_episodes": 1952, - "mean_reward": 490.6093361234665, - "mean_length": 2347.41, - "loss": 188.90318298339844, - "sps": 433.4020614000179 + "num_episodes": 593, + "mean_reward": 1102.7952147102355, + "mean_length": 14889.53, + "survival_pct": 0.37223825, + "max_steps": 40000, + "loss": 83.34320831298828, + "sps": 526.5779746081431 }, { "update": 1450, "global_step": 5939200, - "num_episodes": 1968, - "mean_reward": 341.27103984832763, - "mean_length": 2088.38, - "loss": 49.23589324951172, - "sps": 192.3194868778508 + "num_episodes": 595, + "mean_reward": 1018.396473865509, + "mean_length": 14585.34, + "survival_pct": 0.3646335, + "max_steps": 40000, + "loss": 6.169382572174072, + "sps": 451.6525927001659 }, { "update": 1455, "global_step": 5959680, - "num_episodes": 1980, - "mean_reward": 340.5469175004959, - "mean_length": 2102.8, - "loss": 53.03819274902344, - "sps": 401.45828106312945 + "num_episodes": 596, + "mean_reward": 1018.2012248802185, + "mean_length": 14603.1, + "survival_pct": 0.3650775, + "max_steps": 40000, + "loss": 1.4308723211288452, + "sps": 619.623956950883 }, { "update": 1460, "global_step": 5980160, - "num_episodes": 1987, - "mean_reward": 341.23766746520994, - "mean_length": 2290.56, - "loss": 12.17508316040039, - "sps": 321.48619942614465 + "num_episodes": 596, + "mean_reward": 1018.2012248802185, + "mean_length": 14603.1, + "survival_pct": 0.3650775, + "max_steps": 40000, + "loss": 0.18915529549121857, + "sps": 800.6110593469816 }, { "update": 1465, "global_step": 6000640, - "num_episodes": 1993, - "mean_reward": 340.6004219532013, - "mean_length": 2289.4, - "loss": 4.770975589752197, - "sps": 400.6137968711218 + "num_episodes": 597, + "mean_reward": 1022.199059085846, + "mean_length": 14999.43, + "survival_pct": 0.37498575, + "max_steps": 40000, + "loss": -0.0486217737197876, + "sps": 813.4318788464009 }, { "update": 1470, "global_step": 6021120, - "num_episodes": 2004, - "mean_reward": 321.17223945140836, - "mean_length": 2148.56, - "loss": 29.761117935180664, - "sps": 472.58742855602395 + "num_episodes": 597, + "mean_reward": 1022.199059085846, + "mean_length": 14999.43, + "survival_pct": 0.37498575, + "max_steps": 40000, + "loss": 0.03587697446346283, + "sps": 825.3421945718204 }, { "update": 1475, "global_step": 6041600, - "num_episodes": 2026, - "mean_reward": 336.3226027822495, - "mean_length": 1952.34, - "loss": 203.31748962402344, - "sps": 357.29518794795473 + "num_episodes": 598, + "mean_reward": 1041.5796708869934, + "mean_length": 15399.05, + "survival_pct": 0.38497624999999996, + "max_steps": 40000, + "loss": 0.06574638187885284, + "sps": 875.8211361821493 }, { "update": 1480, "global_step": 6062080, - "num_episodes": 2033, - "mean_reward": 313.32783755540845, - "mean_length": 1958.32, - "loss": 508.1851501464844, - "sps": 510.89708007010705 + "num_episodes": 598, + "mean_reward": 1041.5796708869934, + "mean_length": 15399.05, + "survival_pct": 0.38497624999999996, + "max_steps": 40000, + "loss": -0.11098746955394745, + "sps": 970.9604971491241 }, { "update": 1485, "global_step": 6082560, - "num_episodes": 2046, - "mean_reward": 194.68075824975966, - "mean_length": 1639.76, - "loss": 7.842682361602783, - "sps": 546.9033062498472 + "num_episodes": 598, + "mean_reward": 1041.5796708869934, + "mean_length": 15399.05, + "survival_pct": 0.38497624999999996, + "max_steps": 40000, + "loss": -0.16564123332500458, + "sps": 941.8806003191344 }, { "update": 1490, "global_step": 6103040, - "num_episodes": 2050, - "mean_reward": 143.90823694944382, - "mean_length": 1738.66, - "loss": 46.426143646240234, - "sps": 306.85539615127885 + "num_episodes": 601, + "mean_reward": 1025.6624535942078, + "mean_length": 15891.22, + "survival_pct": 0.3972805, + "max_steps": 40000, + "loss": 257.9145202636719, + "sps": 562.7793013045049 }, { "update": 1495, "global_step": 6123520, - "num_episodes": 2060, - "mean_reward": 334.49683523893356, - "mean_length": 1898.53, - "loss": 468.38525390625, - "sps": 245.82053873712613 + "num_episodes": 602, + "mean_reward": 1025.6871674919128, + "mean_length": 15904.06, + "survival_pct": 0.3976015, + "max_steps": 40000, + "loss": 8.345800399780273, + "sps": 713.7930964796354 }, { "update": 1500, "global_step": 6144000, - "num_episodes": 2069, - "mean_reward": 334.39896435022354, - "mean_length": 2044.37, - "loss": 85.75762939453125, - "sps": 334.1080989119973 + "num_episodes": 604, + "mean_reward": 1014.5235131645203, + "mean_length": 16004.56, + "survival_pct": 0.40011399999999997, + "max_steps": 40000, + "loss": 6.72609806060791, + "sps": 535.8752056296396 }, { "update": 1505, "global_step": 6164480, - "num_episodes": 2074, - "mean_reward": 333.67030994653703, - "mean_length": 1990.3, - "loss": 295.0758361816406, - "sps": 345.61576592839117 + "num_episodes": 606, + "mean_reward": 1021.3157058906555, + "mean_length": 16107.38, + "survival_pct": 0.4026845, + "max_steps": 40000, + "loss": 10.722652435302734, + "sps": 630.4906569968188 }, { "update": 1510, "global_step": 6184960, - "num_episodes": 2080, - "mean_reward": 334.66018194913863, - "mean_length": 2150.39, - "loss": 10.077195167541504, - "sps": 420.2391860174057 + "num_episodes": 607, + "mean_reward": 1004.4655008125305, + "mean_length": 16207.38, + "survival_pct": 0.4051845, + "max_steps": 40000, + "loss": 2.4828217029571533, + "sps": 838.6584245149437 }, { "update": 1515, "global_step": 6205440, - "num_episodes": 2097, - "mean_reward": 258.6993135094643, - "mean_length": 2109.74, - "loss": 122.61629486083984, - "sps": 229.56375224355847 + "num_episodes": 607, + "mean_reward": 1004.4655008125305, + "mean_length": 16207.38, + "survival_pct": 0.4051845, + "max_steps": 40000, + "loss": 1.270835280418396, + "sps": 822.396289748599 }, { "update": 1520, "global_step": 6225920, - "num_episodes": 2107, - "mean_reward": 258.4274232983589, - "mean_length": 2108.42, - "loss": 361.1533203125, - "sps": 361.8525022786034 + "num_episodes": 607, + "mean_reward": 1004.4655008125305, + "mean_length": 16207.38, + "survival_pct": 0.4051845, + "max_steps": 40000, + "loss": 1.3082380294799805, + "sps": 817.9383133650281 }, { "update": 1525, "global_step": 6246400, - "num_episodes": 2125, - "mean_reward": 239.45979365587235, - "mean_length": 1883.93, - "loss": 25.989086151123047, - "sps": 275.38397015212564 + "num_episodes": 607, + "mean_reward": 1004.4655008125305, + "mean_length": 16207.38, + "survival_pct": 0.3241476, + "max_steps": 50000, + "loss": 0.6200645565986633, + "sps": 810.1343000695222 }, { "update": 1530, "global_step": 6266880, - "num_episodes": 2131, - "mean_reward": 239.8594556736946, - "mean_length": 1981.69, - "loss": 404.8509521484375, - "sps": 405.6766540706019 + "num_episodes": 607, + "mean_reward": 1004.4655008125305, + "mean_length": 16207.38, + "survival_pct": 0.3241476, + "max_steps": 50000, + "loss": 0.4365927278995514, + "sps": 815.0941587526099 }, { "update": 1535, "global_step": 6287360, - "num_episodes": 2146, - "mean_reward": 303.8745777082443, - "mean_length": 2254.68, - "loss": 1170.5218505859375, - "sps": 251.0949486970236 + "num_episodes": 607, + "mean_reward": 1004.4655008125305, + "mean_length": 16207.38, + "survival_pct": 0.3241476, + "max_steps": 50000, + "loss": 0.3482319116592407, + "sps": 778.7197208191486 }, { "update": 1540, "global_step": 6307840, - "num_episodes": 2147, - "mean_reward": 304.0846936607361, - "mean_length": 2341.33, - "loss": 8.5966157913208, - "sps": 446.2390266117631 + "num_episodes": 607, + "mean_reward": 1004.4655008125305, + "mean_length": 16207.38, + "survival_pct": 0.3241476, + "max_steps": 50000, + "loss": 0.30282458662986755, + "sps": 856.1091418900583 }, { "update": 1545, "global_step": 6328320, - "num_episodes": 2154, - "mean_reward": 437.3326745700836, - "mean_length": 2280.45, - "loss": 1299.16552734375, - "sps": 259.202559576872 + "num_episodes": 607, + "mean_reward": 1004.4655008125305, + "mean_length": 16207.38, + "survival_pct": 0.3241476, + "max_steps": 50000, + "loss": 0.3217318058013916, + "sps": 780.544661851666 }, { "update": 1550, "global_step": 6348800, - "num_episodes": 2158, - "mean_reward": 303.4427977991104, - "mean_length": 2211.31, - "loss": 1.1450996398925781, - "sps": 549.5109518078744 + "num_episodes": 607, + "mean_reward": 1004.4655008125305, + "mean_length": 16207.38, + "survival_pct": 0.3241476, + "max_steps": 50000, + "loss": 0.012079894542694092, + "sps": 780.2026645895013 }, { "update": 1555, "global_step": 6369280, - "num_episodes": 2172, - "mean_reward": 304.4946458005905, - "mean_length": 2210.89, - "loss": 59.038814544677734, - "sps": 341.25380949315337 + "num_episodes": 609, + "mean_reward": 1009.2189987373353, + "mean_length": 16649.64, + "survival_pct": 0.3329928, + "max_steps": 50000, + "loss": 0.06771233677864075, + "sps": 827.805833049566 }, { "update": 1560, "global_step": 6389760, - "num_episodes": 2175, - "mean_reward": 304.3582187414169, - "mean_length": 2211.87, - "loss": 13.61323070526123, - "sps": 642.8393173099237 + "num_episodes": 609, + "mean_reward": 1009.2189987373353, + "mean_length": 16649.64, + "survival_pct": 0.3329928, + "max_steps": 50000, + "loss": 0.09607579559087753, + "sps": 794.12397331076 }, { "update": 1565, "global_step": 6410240, - "num_episodes": 2180, - "mean_reward": 305.09624064445495, - "mean_length": 2307.71, - "loss": 1913.05126953125, - "sps": 391.8241680446758 + "num_episodes": 612, + "mean_reward": 972.2015235805511, + "mean_length": 16863.26, + "survival_pct": 0.3372652, + "max_steps": 50000, + "loss": 31.345748901367188, + "sps": 575.1634041964817 }, { "update": 1570, "global_step": 6430720, - "num_episodes": 2183, - "mean_reward": 305.77074447154996, - "mean_length": 2405.35, - "loss": 136.88858032226562, - "sps": 394.7672396028363 + "num_episodes": 613, + "mean_reward": 977.1921964931488, + "mean_length": 17359.13, + "survival_pct": 0.3471826, + "max_steps": 50000, + "loss": 62.981990814208984, + "sps": 467.3951752643231 }, { "update": 1575, "global_step": 6451200, - "num_episodes": 2189, - "mean_reward": 405.41952825069427, - "mean_length": 2593.85, - "loss": 83.12802124023438, - "sps": 299.9621165564932 + "num_episodes": 613, + "mean_reward": 977.1921964931488, + "mean_length": 17359.13, + "survival_pct": 0.3471826, + "max_steps": 50000, + "loss": 79.36112976074219, + "sps": 427.6025784582037 }, { "update": 1580, "global_step": 6471680, - "num_episodes": 2207, - "mean_reward": 405.9634767389297, - "mean_length": 2524.02, - "loss": 13081.1357421875, - "sps": 223.53810669328672 + "num_episodes": 613, + "mean_reward": 977.1921964931488, + "mean_length": 17359.13, + "survival_pct": 0.3471826, + "max_steps": 50000, + "loss": 0.3020017147064209, + "sps": 667.9687302715897 }, { "update": 1585, "global_step": 6492160, - "num_episodes": 2211, - "mean_reward": 499.87862377643586, - "mean_length": 2495.58, - "loss": 46.772918701171875, - "sps": 453.36473969791376 + "num_episodes": 614, + "mean_reward": 1108.4623094081878, + "mean_length": 17559.13, + "survival_pct": 0.3511826, + "max_steps": 50000, + "loss": 30.998226165771484, + "sps": 597.6820954824248 }, { "update": 1590, "global_step": 6512640, - "num_episodes": 2221, - "mean_reward": 670.4232928800583, - "mean_length": 2706.97, - "loss": 368.63763427734375, - "sps": 429.39001544641116 + "num_episodes": 614, + "mean_reward": 1108.4623094081878, + "mean_length": 17559.13, + "survival_pct": 0.3511826, + "max_steps": 50000, + "loss": 0.0028700977563858032, + "sps": 696.7986031474608 }, { "update": 1595, "global_step": 6533120, - "num_episodes": 2227, - "mean_reward": 803.7308926296234, - "mean_length": 2725.72, - "loss": 90.26509094238281, - "sps": 413.30646156887036 + "num_episodes": 615, + "mean_reward": 1119.9435447597505, + "mean_length": 17759.13, + "survival_pct": 0.3551826, + "max_steps": 50000, + "loss": 7.728819847106934, + "sps": 618.6918228629329 }, { "update": 1600, "global_step": 6553600, - "num_episodes": 2232, - "mean_reward": 933.0243373584748, - "mean_length": 2915.95, - "loss": 68.28546142578125, - "sps": 374.56623534825485 + "num_episodes": 617, + "mean_reward": 1121.774534635544, + "mean_length": 17943.08, + "survival_pct": 0.35886160000000006, + "max_steps": 50000, + "loss": 1.3815250396728516, + "sps": 548.2307499735696 }, { "update": 1605, "global_step": 6574080, - "num_episodes": 2235, - "mean_reward": 933.269734544754, - "mean_length": 2912.74, - "loss": 865.8887939453125, - "sps": 445.657946827829 + "num_episodes": 620, + "mean_reward": 1160.0132136058808, + "mean_length": 18332.17, + "survival_pct": 0.36664339999999995, + "max_steps": 50000, + "loss": 53.58154296875, + "sps": 218.35284346944567 }, { "update": 1610, "global_step": 6594560, - "num_episodes": 2237, - "mean_reward": 933.7870253133774, - "mean_length": 3011.88, - "loss": 871.9052734375, - "sps": 387.41989720305287 + "num_episodes": 620, + "mean_reward": 1160.0132136058808, + "mean_length": 18332.17, + "survival_pct": 0.36664339999999995, + "max_steps": 50000, + "loss": 198.85508728027344, + "sps": 414.0528781364895 }, { "update": 1615, "global_step": 6615040, - "num_episodes": 2248, - "mean_reward": 1077.8849126005173, - "mean_length": 3056.53, - "loss": 54.994510650634766, - "sps": 423.33621401282124 + "num_episodes": 620, + "mean_reward": 1160.0132136058808, + "mean_length": 18332.17, + "survival_pct": 0.36664339999999995, + "max_steps": 50000, + "loss": 0.6458793878555298, + "sps": 468.4840673845562 }, { "update": 1620, "global_step": 6635520, - "num_episodes": 2258, - "mean_reward": 943.9673701143265, - "mean_length": 3023.43, - "loss": 137.88092041015625, - "sps": 374.57440205325895 + "num_episodes": 620, + "mean_reward": 1160.0132136058808, + "mean_length": 18332.17, + "survival_pct": 0.36664339999999995, + "max_steps": 50000, + "loss": 4.100710868835449, + "sps": 453.53569760987244 }, { "update": 1625, "global_step": 6656000, - "num_episodes": 2263, - "mean_reward": 856.1039279556275, - "mean_length": 2924.57, - "loss": 348.59234619140625, - "sps": 377.6081162361363 + "num_episodes": 620, + "mean_reward": 1160.0132136058808, + "mean_length": 18332.17, + "survival_pct": 0.36664339999999995, + "max_steps": 50000, + "loss": 8.949458122253418, + "sps": 445.9539348920468 }, { "update": 1630, "global_step": 6676480, - "num_episodes": 2267, - "mean_reward": 931.3061418437958, - "mean_length": 2926.62, - "loss": 9.557199478149414, - "sps": 460.1107258733158 + "num_episodes": 620, + "mean_reward": 1160.0132136058808, + "mean_length": 18332.17, + "survival_pct": 0.36664339999999995, + "max_steps": 50000, + "loss": 12.222755432128906, + "sps": 454.9897483249271 }, { "update": 1635, "global_step": 6696960, - "num_episodes": 2273, - "mean_reward": 931.8358981752395, - "mean_length": 3120.06, - "loss": 514.8411254882812, - "sps": 523.5469529045816 + "num_episodes": 622, + "mean_reward": 1161.4788415908813, + "mean_length": 18470.78, + "survival_pct": 0.36941559999999996, + "max_steps": 50000, + "loss": 0.15580856800079346, + "sps": 433.21338637539577 }, { "update": 1640, "global_step": 6717440, - "num_episodes": 2276, - "mean_reward": 1109.3497059488298, - "mean_length": 3219.09, - "loss": 31.73206329345703, - "sps": 412.7885853043598 + "num_episodes": 622, + "mean_reward": 1161.4788415908813, + "mean_length": 18470.78, + "survival_pct": 0.36941559999999996, + "max_steps": 50000, + "loss": -0.03489271551370621, + "sps": 424.3238296174212 }, { "update": 1645, "global_step": 6737920, - "num_episodes": 2282, - "mean_reward": 1197.9563966131211, - "mean_length": 3220.5, - "loss": 35.86986541748047, - "sps": 466.28907053794126 + "num_episodes": 622, + "mean_reward": 1161.4788415908813, + "mean_length": 18470.78, + "survival_pct": 0.36941559999999996, + "max_steps": 50000, + "loss": -0.022046178579330444, + "sps": 411.0249793122232 }, { "update": 1650, "global_step": 6758400, - "num_episodes": 2285, - "mean_reward": 1119.968164858818, - "mean_length": 3067.54, - "loss": 279.3809509277344, - "sps": 415.9298164658184 + "num_episodes": 623, + "mean_reward": 1166.8129835891723, + "mean_length": 18963.37, + "survival_pct": 0.3792674, + "max_steps": 50000, + "loss": 147.39236450195312, + "sps": 388.6099849730296 }, { "update": 1655, "global_step": 6778880, - "num_episodes": 2299, - "mean_reward": 1277.0260197210312, - "mean_length": 3026.13, - "loss": 44.909996032714844, - "sps": 289.10855632711406 + "num_episodes": 623, + "mean_reward": 1166.8129835891723, + "mean_length": 18963.37, + "survival_pct": 0.3792674, + "max_steps": 50000, + "loss": 0.3184221386909485, + "sps": 559.058329817769 }, { "update": 1660, "global_step": 6799360, - "num_episodes": 2310, - "mean_reward": 1212.9116531801224, - "mean_length": 3157.69, - "loss": 19.89190673828125, - "sps": 442.19954156582503 + "num_episodes": 623, + "mean_reward": 1166.8129835891723, + "mean_length": 18963.37, + "survival_pct": 0.3792674, + "max_steps": 50000, + "loss": 0.13589444756507874, + "sps": 524.1350206501007 }, { "update": 1665, "global_step": 6819840, - "num_episodes": 2313, - "mean_reward": 1237.1113989305495, - "mean_length": 3267.66, - "loss": 49.466552734375, - "sps": 941.6208281097553 + "num_episodes": 624, + "mean_reward": 1224.4580519485473, + "mean_length": 19163.37, + "survival_pct": 0.3832674, + "max_steps": 50000, + "loss": 0.28983187675476074, + "sps": 520.1970206412298 }, { "update": 1670, "global_step": 6840320, - "num_episodes": 2320, - "mean_reward": 1185.366361656189, - "mean_length": 3170.05, - "loss": 226.9459686279297, - "sps": 560.8755073464478 + "num_episodes": 625, + "mean_reward": 1325.5696249008179, + "mean_length": 19621.96, + "survival_pct": 0.3924392, + "max_steps": 50000, + "loss": -0.04390272498130798, + "sps": 600.9722666327863 }, { "update": 1675, "global_step": 6860800, - "num_episodes": 2327, - "mean_reward": 1171.2066772079468, - "mean_length": 3355.09, - "loss": 10.15625, - "sps": 335.8002299447698 + "num_episodes": 625, + "mean_reward": 1325.5696249008179, + "mean_length": 19621.96, + "survival_pct": 0.3924392, + "max_steps": 50000, + "loss": 0.029237419366836548, + "sps": 619.2145461020093 }, { "update": 1680, "global_step": 6881280, - "num_episodes": 2333, - "mean_reward": 1042.0874049663544, - "mean_length": 3211.85, - "loss": 82.60301971435547, - "sps": 302.2235800221304 + "num_episodes": 625, + "mean_reward": 1325.5696249008179, + "mean_length": 19621.96, + "survival_pct": 0.3924392, + "max_steps": 50000, + "loss": 0.28174102306365967, + "sps": 619.1091990093024 }, { "update": 1685, "global_step": 6901760, - "num_episodes": 2339, - "mean_reward": 972.5269057083129, - "mean_length": 3193.44, - "loss": 120.32151794433594, - "sps": 664.7036679347333 + "num_episodes": 626, + "mean_reward": 1263.1981332015991, + "mean_length": 19821.96, + "survival_pct": 0.3964392, + "max_steps": 50000, + "loss": 0.4367483854293823, + "sps": 626.916632808775 }, { "update": 1690, "global_step": 6922240, - "num_episodes": 2339, - "mean_reward": 972.5269057083129, - "mean_length": 3193.44, - "loss": 1.7924470901489258, - "sps": 1063.7604928109242 + "num_episodes": 626, + "mean_reward": 1263.1981332015991, + "mean_length": 19821.96, + "survival_pct": 0.3964392, + "max_steps": 50000, + "loss": 0.1575974076986313, + "sps": 618.4480121190815 }, { "update": 1695, "global_step": 6942720, - "num_episodes": 2342, - "mean_reward": 942.9001926231384, - "mean_length": 3342.44, - "loss": 6.127931594848633, - "sps": 592.3562356846871 + "num_episodes": 627, + "mean_reward": 1294.6462133026123, + "mean_length": 20320.93, + "survival_pct": 0.4064186, + "max_steps": 50000, + "loss": 9.566240310668945, + "sps": 406.6387424437816 }, { "update": 1700, "global_step": 6963200, - "num_episodes": 2345, - "mean_reward": 943.6058731651306, - "mean_length": 3474.03, - "loss": 44.08333206176758, - "sps": 297.44937442353597 + "num_episodes": 628, + "mean_reward": 1280.1067363739014, + "mean_length": 20520.93, + "survival_pct": 0.4104186, + "max_steps": 50000, + "loss": 0.709872841835022, + "sps": 442.501654087899 }, { "update": 1705, "global_step": 6983680, - "num_episodes": 2350, - "mean_reward": 891.2378608131409, - "mean_length": 3613.22, - "loss": 139.50340270996094, - "sps": 321.276001001162 + "num_episodes": 631, + "mean_reward": 1386.179150118828, + "mean_length": 20721.63, + "survival_pct": 0.41443260000000004, + "max_steps": 50000, + "loss": 19.365644454956055, + "sps": 477.75886736803943 }, { "update": 1710, "global_step": 7004160, - "num_episodes": 2351, - "mean_reward": 891.2994220161438, - "mean_length": 3670.58, - "loss": 80.7029800415039, - "sps": 602.1302177845877 + "num_episodes": 631, + "mean_reward": 1386.179150118828, + "mean_length": 20721.63, + "survival_pct": 0.41443260000000004, + "max_steps": 50000, + "loss": 108.79446411132812, + "sps": 612.149128502162 }, { "update": 1715, "global_step": 7024640, - "num_episodes": 2354, - "mean_reward": 891.3987969732284, - "mean_length": 3707.87, - "loss": 43.55419921875, - "sps": 609.1286236648131 + "num_episodes": 631, + "mean_reward": 1386.179150118828, + "mean_length": 20721.63, + "survival_pct": 0.41443260000000004, + "max_steps": 50000, + "loss": 0.14560824632644653, + "sps": 695.5005180060909 }, { "update": 1720, "global_step": 7045120, - "num_episodes": 2356, - "mean_reward": 908.0675931024551, - "mean_length": 3894.63, - "loss": 1924.8736572265625, - "sps": 521.5708115336275 + "num_episodes": 631, + "mean_reward": 1386.179150118828, + "mean_length": 20721.63, + "survival_pct": 0.41443260000000004, + "max_steps": 50000, + "loss": -0.01733715832233429, + "sps": 691.5803550233451 }, { "update": 1725, "global_step": 7065600, - "num_episodes": 2368, - "mean_reward": 877.4067792797089, - "mean_length": 4024.38, - "loss": 1546.024658203125, - "sps": 144.1450251623922 + "num_episodes": 631, + "mean_reward": 1386.179150118828, + "mean_length": 20721.63, + "survival_pct": 0.41443260000000004, + "max_steps": 50000, + "loss": 0.15833212435245514, + "sps": 667.4815281045276 }, { "update": 1730, "global_step": 7086080, - "num_episodes": 2371, - "mean_reward": 876.8285467529297, - "mean_length": 4001.29, - "loss": 140.04344177246094, - "sps": 219.3887746950412 + "num_episodes": 633, + "mean_reward": 1468.0781693506242, + "mean_length": 20922.8, + "survival_pct": 0.418456, + "max_steps": 50000, + "loss": 11.883382797241211, + "sps": 581.8314657800905 }, { "update": 1735, "global_step": 7106560, - "num_episodes": 2379, - "mean_reward": 712.0810299873352, - "mean_length": 3765.17, - "loss": 592.1142578125, - "sps": 154.4538739502226 + "num_episodes": 633, + "mean_reward": 1468.0781693506242, + "mean_length": 20922.8, + "survival_pct": 0.418456, + "max_steps": 50000, + "loss": 1.1643352508544922, + "sps": 681.4017891653039 }, { "update": 1740, "global_step": 7127040, - "num_episodes": 2380, - "mean_reward": 711.973635725975, - "mean_length": 3803.94, - "loss": 94.82659149169922, - "sps": 312.59798550353105 + "num_episodes": 633, + "mean_reward": 1468.0781693506242, + "mean_length": 20922.8, + "survival_pct": 0.418456, + "max_steps": 50000, + "loss": -0.016278870403766632, + "sps": 777.0781892710015 }, { "update": 1745, "global_step": 7147520, - "num_episodes": 2386, - "mean_reward": 721.5674746751786, - "mean_length": 4056.06, - "loss": 62.10371398925781, - "sps": 320.26559324554347 + "num_episodes": 633, + "mean_reward": 1468.0781693506242, + "mean_length": 20922.8, + "survival_pct": 0.418456, + "max_steps": 50000, + "loss": -0.061865031719207764, + "sps": 774.9591726476483 }, { "update": 1750, "global_step": 7168000, - "num_episodes": 2388, - "mean_reward": 722.019714179039, - "mean_length": 4119.0, - "loss": 154.1345977783203, - "sps": 490.86442866974096 + "num_episodes": 638, + "mean_reward": 1506.0245201086998, + "mean_length": 20610.96, + "survival_pct": 0.4122192, + "max_steps": 50000, + "loss": 2.459982395172119, + "sps": 545.323321230957 }, { "update": 1755, "global_step": 7188480, - "num_episodes": 2393, - "mean_reward": 641.6925968694687, - "mean_length": 4085.19, - "loss": 3164.498779296875, - "sps": 258.3838207181636 + "num_episodes": 638, + "mean_reward": 1506.0245201086998, + "mean_length": 20610.96, + "survival_pct": 0.4122192, + "max_steps": 50000, + "loss": 0.11832943558692932, + "sps": 817.3067294637037 }, { "update": 1760, "global_step": 7208960, - "num_episodes": 2395, - "mean_reward": 686.4729778194428, - "mean_length": 4253.36, - "loss": 425.0414733886719, - "sps": 362.7607269459368 + "num_episodes": 640, + "mean_reward": 1516.9915149474143, + "mean_length": 21111.92, + "survival_pct": 0.42223839999999996, + "max_steps": 50000, + "loss": 17.96449851989746, + "sps": 621.1327524456565 }, { "update": 1765, "global_step": 7229440, - "num_episodes": 2398, - "mean_reward": 781.3392351341248, - "mean_length": 4537.14, - "loss": 31.12351417541504, - "sps": 385.67594003613885 + "num_episodes": 642, + "mean_reward": 1570.5918082213402, + "mean_length": 21605.81, + "survival_pct": 0.4321162, + "max_steps": 50000, + "loss": 886.5294189453125, + "sps": 239.42082440770906 }, { "update": 1770, "global_step": 7249920, - "num_episodes": 2401, - "mean_reward": 780.9093813705445, - "mean_length": 4438.34, - "loss": 154.23956298828125, - "sps": 350.9404403984712 + "num_episodes": 647, + "mean_reward": 1521.5539734148979, + "mean_length": 20475.3, + "survival_pct": 0.409506, + "max_steps": 50000, + "loss": 179.85931396484375, + "sps": 298.4259022731802 }, { "update": 1775, "global_step": 7270400, - "num_episodes": 2409, - "mean_reward": 920.4024870014191, - "mean_length": 4671.4, - "loss": 68.82001495361328, - "sps": 373.0457413366687 + "num_episodes": 647, + "mean_reward": 1521.5539734148979, + "mean_length": 20475.3, + "survival_pct": 0.409506, + "max_steps": 50000, + "loss": 966.6091918945312, + "sps": 537.2474721109993 }, { "update": 1780, "global_step": 7290880, - "num_episodes": 2431, - "mean_reward": 661.6825140190125, - "mean_length": 4178.67, - "loss": 1634.844482421875, - "sps": 334.82655519105265 + "num_episodes": 648, + "mean_reward": 1564.6685786104201, + "mean_length": 20973.13, + "survival_pct": 0.4194626, + "max_steps": 50000, + "loss": 62.704280853271484, + "sps": 509.433704200731 }, { "update": 1785, "global_step": 7311360, - "num_episodes": 2436, - "mean_reward": 703.8663965082169, - "mean_length": 4154.8, - "loss": 36.222721099853516, - "sps": 690.9562224821975 + "num_episodes": 648, + "mean_reward": 1564.6685786104201, + "mean_length": 20973.13, + "survival_pct": 0.4194626, + "max_steps": 50000, + "loss": -0.05052866041660309, + "sps": 655.9797605411271 }, { "update": 1790, "global_step": 7331840, - "num_episodes": 2442, - "mean_reward": 682.8625202751159, - "mean_length": 3792.16, - "loss": 7.088906288146973, - "sps": 546.4602476565249 + "num_episodes": 649, + "mean_reward": 1654.446047320366, + "mean_length": 21472.42, + "survival_pct": 0.42944839999999995, + "max_steps": 50000, + "loss": 107.669677734375, + "sps": 447.78139010542594 }, { "update": 1795, "global_step": 7352320, - "num_episodes": 2452, - "mean_reward": 668.2820160484314, - "mean_length": 3464.88, - "loss": 15.93359375, - "sps": 338.3906013362985 + "num_episodes": 650, + "mean_reward": 1661.2120521116258, + "mean_length": 21959.9, + "survival_pct": 0.43919800000000003, + "max_steps": 50000, + "loss": 116.32649993896484, + "sps": 581.3285585073751 }, { "update": 1800, "global_step": 7372800, - "num_episodes": 2479, - "mean_reward": 503.18441142082213, - "mean_length": 2745.39, - "loss": 4.317780494689941, - "sps": 247.07750153960802 + "num_episodes": 650, + "mean_reward": 1661.2120521116258, + "mean_length": 21959.9, + "survival_pct": 0.43919800000000003, + "max_steps": 50000, + "loss": -0.1900792270898819, + "sps": 738.826346232058 }, { "update": 1805, "global_step": 7393280, - "num_episodes": 2499, - "mean_reward": 254.19922243118287, - "mean_length": 1727.29, - "loss": 2.4029455184936523, - "sps": 343.1557628026188 + "num_episodes": 650, + "mean_reward": 1661.2120521116258, + "mean_length": 21959.9, + "survival_pct": 0.36599833333333337, + "max_steps": 60000, + "loss": -0.12798017263412476, + "sps": 735.1636673358219 }, { "update": 1810, "global_step": 7413760, - "num_episodes": 2505, - "mean_reward": 156.29735308647156, - "mean_length": 1608.82, - "loss": 50.26245880126953, - "sps": 545.8711155437793 + "num_episodes": 650, + "mean_reward": 1661.2120521116258, + "mean_length": 21959.9, + "survival_pct": 0.36599833333333337, + "max_steps": 60000, + "loss": -0.17347615957260132, + "sps": 710.1339399692507 }, { "update": 1815, "global_step": 7434240, - "num_episodes": 2511, - "mean_reward": 86.22271653652192, - "mean_length": 1490.02, - "loss": 0.49680042266845703, - "sps": 765.2176779813885 + "num_episodes": 650, + "mean_reward": 1661.2120521116258, + "mean_length": 21959.9, + "survival_pct": 0.36599833333333337, + "max_steps": 60000, + "loss": -0.04106990993022919, + "sps": 703.2321739929172 }, { "update": 1820, "global_step": 7454720, - "num_episodes": 2517, - "mean_reward": 87.76825302124024, - "mean_length": 1667.16, - "loss": 0.9337818622589111, - "sps": 741.1448982958542 + "num_episodes": 651, + "mean_reward": 1667.2202544736863, + "mean_length": 22557.46, + "survival_pct": 0.37595766666666663, + "max_steps": 60000, + "loss": 1.203838586807251, + "sps": 608.5787796882346 }, { "update": 1825, "global_step": 7475200, - "num_episodes": 2525, - "mean_reward": 83.8934311246872, - "mean_length": 1771.63, - "loss": 1.8736273050308228, - "sps": 346.82608481484516 + "num_episodes": 651, + "mean_reward": 1667.2202544736863, + "mean_length": 22557.46, + "survival_pct": 0.37595766666666663, + "max_steps": 60000, + "loss": -0.09379199892282486, + "sps": 656.6381880745341 }, { "update": 1830, "global_step": 7495680, - "num_episodes": 2536, - "mean_reward": 42.52432149887085, - "mean_length": 1779.55, - "loss": 43.508872985839844, - "sps": 538.257412633311 + "num_episodes": 651, + "mean_reward": 1667.2202544736863, + "mean_length": 22557.46, + "survival_pct": 0.37595766666666663, + "max_steps": 60000, + "loss": 0.08595463633537292, + "sps": 696.5027182462075 }, { "update": 1835, "global_step": 7516160, - "num_episodes": 2559, - "mean_reward": 22.3750723361969, - "mean_length": 1585.34, - "loss": 11.682701110839844, - "sps": 283.17378891804617 + "num_episodes": 651, + "mean_reward": 1667.2202544736863, + "mean_length": 22557.46, + "survival_pct": 0.37595766666666663, + "max_steps": 60000, + "loss": -0.18902313709259033, + "sps": 686.4702117808695 }, { "update": 1840, "global_step": 7536640, - "num_episodes": 2572, - "mean_reward": 23.224842309951782, - "mean_length": 1714.64, - "loss": 0.64460289478302, - "sps": 945.2402518055575 + "num_episodes": 651, + "mean_reward": 1667.2202544736863, + "mean_length": 22557.46, + "survival_pct": 0.37595766666666663, + "max_steps": 60000, + "loss": -0.18000459671020508, + "sps": 663.4220467173932 }, { "update": 1845, "global_step": 7557120, - "num_episodes": 2575, - "mean_reward": 23.735417137145998, - "mean_length": 1716.6, - "loss": 1.5831042528152466, - "sps": 1337.9087240256438 + "num_episodes": 651, + "mean_reward": 1667.2202544736863, + "mean_length": 22557.46, + "survival_pct": 0.37595766666666663, + "max_steps": 60000, + "loss": -0.09385547041893005, + "sps": 687.3361936552064 }, { "update": 1850, "global_step": 7577600, - "num_episodes": 2575, - "mean_reward": 23.735417137145998, - "mean_length": 1716.6, - "loss": 0.5824832320213318, - "sps": 1306.8006548657663 + "num_episodes": 655, + "mean_reward": 1615.1273900747299, + "mean_length": 22353.29, + "survival_pct": 0.37255483333333334, + "max_steps": 60000, + "loss": 0.5962892174720764, + "sps": 577.3992316901965 }, { "update": 1855, "global_step": 7598080, - "num_episodes": 2579, - "mean_reward": 25.101176109313965, - "mean_length": 2001.32, - "loss": 3.1498076915740967, - "sps": 383.12937784399907 + "num_episodes": 655, + "mean_reward": 1615.1273900747299, + "mean_length": 22353.29, + "survival_pct": 0.37255483333333334, + "max_steps": 60000, + "loss": 30.23639488220215, + "sps": 604.6227237743161 }, { "update": 1860, "global_step": 7618560, - "num_episodes": 2607, - "mean_reward": 27.537090787887575, - "mean_length": 2252.34, - "loss": 2.7028920650482178, - "sps": 276.49082168209895 + "num_episodes": 655, + "mean_reward": 1615.1273900747299, + "mean_length": 22353.29, + "survival_pct": 0.37255483333333334, + "max_steps": 60000, + "loss": -0.19098417460918427, + "sps": 683.5282670532649 }, { "update": 1865, "global_step": 7639040, - "num_episodes": 2609, - "mean_reward": 27.678814001083374, - "mean_length": 2266.75, - "loss": 1.0194354057312012, - "sps": 829.6019480631423 + "num_episodes": 655, + "mean_reward": 1615.1273900747299, + "mean_length": 22353.29, + "survival_pct": 0.37255483333333334, + "max_steps": 60000, + "loss": -0.1761397421360016, + "sps": 638.4061361181431 }, { "update": 1870, "global_step": 7659520, - "num_episodes": 2609, - "mean_reward": 27.678814001083374, - "mean_length": 2266.75, - "loss": 0.69590824842453, - "sps": 1339.889941319705 + "num_episodes": 656, + "mean_reward": 1621.1403862142563, + "mean_length": 22950.87, + "survival_pct": 0.3825145, + "max_steps": 60000, + "loss": 1.2790898084640503, + "sps": 692.9472830895132 }, { "update": 1875, "global_step": 7680000, - "num_episodes": 2610, - "mean_reward": 27.743932600021363, - "mean_length": 2363.84, - "loss": 4.22328519821167, - "sps": 1047.687217978352 + "num_episodes": 656, + "mean_reward": 1621.1403862142563, + "mean_length": 22950.87, + "survival_pct": 0.3825145, + "max_steps": 60000, + "loss": 0.5525964498519897, + "sps": 708.1301830175981 }, { "update": 1880, "global_step": 7700480, - "num_episodes": 2623, - "mean_reward": 28.715706405639647, - "mean_length": 2373.15, - "loss": 15.052788734436035, - "sps": 180.92434386649956 + "num_episodes": 656, + "mean_reward": 1621.1403862142563, + "mean_length": 22950.87, + "survival_pct": 0.3825145, + "max_steps": 60000, + "loss": 0.05596184730529785, + "sps": 688.5831713801172 }, { "update": 1885, "global_step": 7720960, - "num_episodes": 2650, - "mean_reward": 28.79868775844574, - "mean_length": 2376.77, - "loss": 141.97862243652344, - "sps": 189.85363801957897 + "num_episodes": 656, + "mean_reward": 1621.1403862142563, + "mean_length": 22950.87, + "survival_pct": 0.3825145, + "max_steps": 60000, + "loss": -0.039150021970272064, + "sps": 707.8312184620268 }, { "update": 1890, "global_step": 7741440, - "num_episodes": 2663, - "mean_reward": 29.01506271839142, - "mean_length": 2187.12, - "loss": 2.7120704650878906, - "sps": 442.6728082704278 + "num_episodes": 658, + "mean_reward": 1769.352957472801, + "mean_length": 23749.89, + "survival_pct": 0.3958315, + "max_steps": 60000, + "loss": 1.1895029544830322, + "sps": 799.8935632475255 }, { "update": 1895, "global_step": 7761920, - "num_episodes": 2664, - "mean_reward": 28.84576090812683, - "mean_length": 2212.69, - "loss": 0.6469894647598267, - "sps": 1603.7569257295258 + "num_episodes": 658, + "mean_reward": 1769.352957472801, + "mean_length": 23749.89, + "survival_pct": 0.3958315, + "max_steps": 60000, + "loss": 1.1538077592849731, + "sps": 762.3956777604958 }, { "update": 1900, "global_step": 7782400, - "num_episodes": 2665, - "mean_reward": 29.100953969955444, - "mean_length": 2212.69, - "loss": 5.927608489990234, - "sps": 1560.5345956258918 + "num_episodes": 659, + "mean_reward": 1825.4085091924667, + "mean_length": 24347.31, + "survival_pct": 0.4057885, + "max_steps": 60000, + "loss": 0.6419044137001038, + "sps": 703.4263875262799 }, { "update": 1905, "global_step": 7802880, - "num_episodes": 2677, - "mean_reward": 29.594479999542237, - "mean_length": 2279.92, - "loss": 3.0546085834503174, - "sps": 239.632742367245 + "num_episodes": 659, + "mean_reward": 1825.4085091924667, + "mean_length": 24347.31, + "survival_pct": 0.4057885, + "max_steps": 60000, + "loss": 0.15757112205028534, + "sps": 674.3401183927315 }, { "update": 1910, "global_step": 7823360, - "num_episodes": 2696, - "mean_reward": 29.67495318889618, - "mean_length": 2013.09, - "loss": 0.40485668182373047, - "sps": 483.9873532409158 + "num_episodes": 660, + "mean_reward": 1898.589024977684, + "mean_length": 24923.49, + "survival_pct": 0.4153915, + "max_steps": 60000, + "loss": -0.06820769608020782, + "sps": 831.2280725759563 }, { "update": 1915, "global_step": 7843840, - "num_episodes": 2706, - "mean_reward": 30.284353771209716, - "mean_length": 2114.96, - "loss": 1.9687494039535522, - "sps": 668.5066115534931 + "num_episodes": 664, + "mean_reward": 1901.6304970765113, + "mean_length": 24331.52, + "survival_pct": 0.40552533333333335, + "max_steps": 60000, + "loss": -0.050920240581035614, + "sps": 814.7046480629325 }, { "update": 1920, "global_step": 7864320, - "num_episodes": 2713, - "mean_reward": 28.00047863960266, - "mean_length": 1718.87, - "loss": 0.2566870450973511, - "sps": 774.1152448288775 + "num_episodes": 664, + "mean_reward": 1901.6304970765113, + "mean_length": 24331.52, + "survival_pct": 0.40552533333333335, + "max_steps": 60000, + "loss": -0.14338093996047974, + "sps": 799.537306564484 }, { "update": 1925, "global_step": 7884800, - "num_episodes": 2728, - "mean_reward": 28.49471785068512, - "mean_length": 1709.19, - "loss": 0.5462437272071838, - "sps": 335.32752289720815 + "num_episodes": 664, + "mean_reward": 1901.6304970765113, + "mean_length": 24331.52, + "survival_pct": 0.40552533333333335, + "max_steps": 60000, + "loss": -0.11479577422142029, + "sps": 803.4181345293225 }, { "update": 1930, "global_step": 7905280, - "num_episodes": 2749, - "mean_reward": 29.774335384368896, - "mean_length": 1793.46, - "loss": 1.6195909976959229, - "sps": 151.6658643145884 + "num_episodes": 664, + "mean_reward": 1901.6304970765113, + "mean_length": 24331.52, + "survival_pct": 0.40552533333333335, + "max_steps": 60000, + "loss": -0.061470456421375275, + "sps": 778.5318076168905 }, { "update": 1935, "global_step": 7925760, - "num_episodes": 2763, - "mean_reward": 29.902848501205444, - "mean_length": 1903.12, - "loss": 3.346653461456299, - "sps": 299.9200663991073 + "num_episodes": 664, + "mean_reward": 1901.6304970765113, + "mean_length": 24331.52, + "survival_pct": 0.40552533333333335, + "max_steps": 60000, + "loss": -0.17063158750534058, + "sps": 797.1980171427649 }, { "update": 1940, "global_step": 7946240, - "num_episodes": 2766, - "mean_reward": 29.951358599662782, - "mean_length": 1817.08, - "loss": 2.196648120880127, - "sps": 612.8349441003236 + "num_episodes": 665, + "mean_reward": 1851.0390957093239, + "mean_length": 24531.52, + "survival_pct": 0.40885866666666665, + "max_steps": 60000, + "loss": 0.14837267994880676, + "sps": 741.7946196855224 }, { "update": 1945, "global_step": 7966720, - "num_episodes": 2775, - "mean_reward": 27.90867582321167, - "mean_length": 1530.25, - "loss": 5.727337837219238, - "sps": 686.4829120395315 + "num_episodes": 665, + "mean_reward": 1851.0390957093239, + "mean_length": 24531.52, + "survival_pct": 0.40885866666666665, + "max_steps": 60000, + "loss": 0.07289181649684906, + "sps": 799.7640530972891 }, { "update": 1950, "global_step": 7987200, - "num_episodes": 2791, - "mean_reward": 27.395657448768617, - "mean_length": 1515.36, - "loss": 0.2642287015914917, - "sps": 427.4124315071339 + "num_episodes": 665, + "mean_reward": 1851.0390957093239, + "mean_length": 24531.52, + "survival_pct": 0.40885866666666665, + "max_steps": 60000, + "loss": -0.03141336888074875, + "sps": 777.4373642953062 }, { "update": 1955, "global_step": 8007680, - "num_episodes": 2811, - "mean_reward": 29.46188481807709, - "mean_length": 1804.43, - "loss": 1.1504522562026978, - "sps": 440.35140268496957 + "num_episodes": 665, + "mean_reward": 1851.0390957093239, + "mean_length": 24531.52, + "survival_pct": 0.40885866666666665, + "max_steps": 60000, + "loss": -0.1878751814365387, + "sps": 787.4362527685952 }, { "update": 1960, "global_step": 8028160, - "num_episodes": 2811, - "mean_reward": 29.46188481807709, - "mean_length": 1804.43, - "loss": 0.6443239450454712, - "sps": 897.6638295466576 + "num_episodes": 665, + "mean_reward": 1851.0390957093239, + "mean_length": 24531.52, + "survival_pct": 0.40885866666666665, + "max_steps": 60000, + "loss": 0.2337094247341156, + "sps": 836.4509391010391 }, { "update": 1965, "global_step": 8048640, - "num_episodes": 2811, - "mean_reward": 29.46188481807709, - "mean_length": 1804.43, - "loss": 0.9464260339736938, - "sps": 1231.9540613883512 + "num_episodes": 665, + "mean_reward": 1851.0390957093239, + "mean_length": 24531.52, + "survival_pct": 0.40885866666666665, + "max_steps": 60000, + "loss": -0.1974252164363861, + "sps": 825.3140040567519 }, { "update": 1970, "global_step": 8069120, - "num_episodes": 2816, - "mean_reward": 30.10758180618286, - "mean_length": 1900.22, - "loss": 13.55959701538086, - "sps": 762.2875965279533 + "num_episodes": 666, + "mean_reward": 1869.434642584324, + "mean_length": 25128.01, + "survival_pct": 0.41880016666666664, + "max_steps": 60000, + "loss": -0.07606863975524902, + "sps": 811.4969102185196 }, { "update": 1975, "global_step": 8089600, - "num_episodes": 2836, - "mean_reward": 33.175699801445006, - "mean_length": 2303.58, - "loss": 7.275441646575928, - "sps": 227.75311612847298 + "num_episodes": 666, + "mean_reward": 1869.434642584324, + "mean_length": 25128.01, + "survival_pct": 0.41880016666666664, + "max_steps": 60000, + "loss": -0.14107482135295868, + "sps": 865.2539173939119 }, { "update": 1980, "global_step": 8110080, - "num_episodes": 2856, - "mean_reward": 29.203288111686707, - "mean_length": 2029.45, - "loss": 39.95777130126953, - "sps": 325.31625618278383 + "num_episodes": 666, + "mean_reward": 1869.434642584324, + "mean_length": 25128.01, + "survival_pct": 0.41880016666666664, + "max_steps": 60000, + "loss": 0.04902653396129608, + "sps": 835.4859345260967 }, { "update": 1985, "global_step": 8130560, - "num_episodes": 2877, - "mean_reward": 26.940120091438292, - "mean_length": 1738.19, - "loss": 2.9013671875, - "sps": 477.60825044232615 + "num_episodes": 667, + "mean_reward": 1875.4432358384133, + "mean_length": 25725.3, + "survival_pct": 0.428755, + "max_steps": 60000, + "loss": 235.1592254638672, + "sps": 462.53669720868015 }, { "update": 1990, "global_step": 8151040, - "num_episodes": 2881, - "mean_reward": 26.892020201683046, - "mean_length": 1748.27, - "loss": 1.5360685586929321, - "sps": 779.3787507434323 + "num_episodes": 667, + "mean_reward": 1875.4432358384133, + "mean_length": 25725.3, + "survival_pct": 0.428755, + "max_steps": 60000, + "loss": 4.359109401702881, + "sps": 570.3135079716526 }, { "update": 1995, "global_step": 8171520, - "num_episodes": 2887, - "mean_reward": 28.383681316375732, - "mean_length": 2035.38, - "loss": 0.28828829526901245, - "sps": 528.7312594886902 + "num_episodes": 667, + "mean_reward": 1875.4432358384133, + "mean_length": 25725.3, + "survival_pct": 0.428755, + "max_steps": 60000, + "loss": 0.6510128378868103, + "sps": 774.3925100979434 }, { "update": 2000, "global_step": 8192000, - "num_episodes": 2894, - "mean_reward": 27.269041929244995, - "mean_length": 2017.08, - "loss": 92.5503158569336, - "sps": 423.8490051832835 + "num_episodes": 667, + "mean_reward": 1875.4432358384133, + "mean_length": 25725.3, + "survival_pct": 0.428755, + "max_steps": 60000, + "loss": 0.2337443083524704, + "sps": 781.7803733771449 }, { "update": 2005, "global_step": 8212480, - "num_episodes": 2910, - "mean_reward": 29.26204339504242, - "mean_length": 2043.4, - "loss": 1.737417221069336, - "sps": 319.480134990497 + "num_episodes": 667, + "mean_reward": 1875.4432358384133, + "mean_length": 25725.3, + "survival_pct": 0.32156625, + "max_steps": 80000, + "loss": 0.05558648705482483, + "sps": 760.4769751448292 }, { "update": 2010, "global_step": 8232960, - "num_episodes": 2914, - "mean_reward": 27.918287878036498, - "mean_length": 1848.0, - "loss": 9.918323516845703, - "sps": 520.2825326468419 + "num_episodes": 667, + "mean_reward": 1875.4432358384133, + "mean_length": 25725.3, + "survival_pct": 0.32156625, + "max_steps": 80000, + "loss": -0.010526187717914581, + "sps": 782.7780404574484 }, { "update": 2015, "global_step": 8253440, - "num_episodes": 2923, - "mean_reward": 25.578041405677794, - "mean_length": 1641.01, - "loss": 101.00448608398438, - "sps": 412.83971034964594 + "num_episodes": 667, + "mean_reward": 1875.4432358384133, + "mean_length": 25725.3, + "survival_pct": 0.32156625, + "max_steps": 80000, + "loss": -0.0630020946264267, + "sps": 770.7413751342134 }, { "update": 2020, "global_step": 8273920, - "num_episodes": 2929, - "mean_reward": 24.985388655662536, - "mean_length": 1728.38, - "loss": 33.80594253540039, - "sps": 407.32375547775194 + "num_episodes": 667, + "mean_reward": 1875.4432358384133, + "mean_length": 25725.3, + "survival_pct": 0.32156625, + "max_steps": 80000, + "loss": 0.22121278941631317, + "sps": 791.5506888179068 }, { "update": 2025, "global_step": 8294400, - "num_episodes": 2941, - "mean_reward": 54.48088625907898, - "mean_length": 1727.06, - "loss": 57.52255630493164, - "sps": 251.07848653412563 + "num_episodes": 667, + "mean_reward": 1875.4432358384133, + "mean_length": 25725.3, + "survival_pct": 0.32156625, + "max_steps": 80000, + "loss": -0.09089811146259308, + "sps": 778.5213294943511 }, { "update": 2030, "global_step": 8314880, - "num_episodes": 2952, - "mean_reward": 62.7720309972763, - "mean_length": 1877.51, - "loss": 18.3802547454834, - "sps": 355.5720033718748 + "num_episodes": 667, + "mean_reward": 1875.4432358384133, + "mean_length": 25725.3, + "survival_pct": 0.32156625, + "max_steps": 80000, + "loss": -0.16075977683067322, + "sps": 755.6295349772931 }, { "update": 2035, "global_step": 8335360, - "num_episodes": 2957, - "mean_reward": 62.98542010307312, - "mean_length": 1979.5, - "loss": 1.3044229745864868, - "sps": 646.2262220511143 + "num_episodes": 667, + "mean_reward": 1875.4432358384133, + "mean_length": 25725.3, + "survival_pct": 0.32156625, + "max_steps": 80000, + "loss": -0.158203586935997, + "sps": 754.4182628574841 }, { "update": 2040, "global_step": 8355840, - "num_episodes": 2961, - "mean_reward": 63.64336392879486, - "mean_length": 2180.16, - "loss": 92.43401336669922, - "sps": 659.5681486510539 + "num_episodes": 667, + "mean_reward": 1875.4432358384133, + "mean_length": 25725.3, + "survival_pct": 0.32156625, + "max_steps": 80000, + "loss": -0.21690981090068817, + "sps": 717.6735596436803 }, { "update": 2045, "global_step": 8376320, - "num_episodes": 2962, - "mean_reward": 63.40275900363922, - "mean_length": 2203.23, - "loss": 1.014630675315857, - "sps": 1446.1879050125103 + "num_episodes": 670, + "mean_reward": 1922.8468935227395, + "mean_length": 26134.49, + "survival_pct": 0.326681125, + "max_steps": 80000, + "loss": 156.1393585205078, + "sps": 323.2563608041114 }, { "update": 2050, "global_step": 8396800, - "num_episodes": 2986, - "mean_reward": 79.26061116695404, - "mean_length": 2214.81, - "loss": 29.98980712890625, - "sps": 205.60997792847448 + "num_episodes": 670, + "mean_reward": 1922.8468935227395, + "mean_length": 26134.49, + "survival_pct": 0.326681125, + "max_steps": 80000, + "loss": 0.16352523863315582, + "sps": 454.8234843620012 }, { "update": 2055, "global_step": 8417280, - "num_episodes": 3001, - "mean_reward": 82.85091641426087, - "mean_length": 2227.86, - "loss": 32.844966888427734, - "sps": 159.2487235394598 + "num_episodes": 671, + "mean_reward": 1930.1808717942238, + "mean_length": 26915.84, + "survival_pct": 0.336448, + "max_steps": 80000, + "loss": -0.08801546692848206, + "sps": 801.0593266816616 }, { "update": 2060, "global_step": 8437760, - "num_episodes": 3027, - "mean_reward": 78.89424407482147, - "mean_length": 1849.23, - "loss": 103.22246551513672, - "sps": 213.8993421623802 + "num_episodes": 671, + "mean_reward": 1930.1808717942238, + "mean_length": 26915.84, + "survival_pct": 0.336448, + "max_steps": 80000, + "loss": -0.13592661917209625, + "sps": 779.2904032377721 }, { "update": 2065, "global_step": 8458240, - "num_episodes": 3033, - "mean_reward": 78.97739854335785, - "mean_length": 1848.21, - "loss": 29.219009399414062, - "sps": 303.98893287836097 + "num_episodes": 672, + "mean_reward": 1975.5371123337745, + "mean_length": 27315.84, + "survival_pct": 0.34144800000000003, + "max_steps": 80000, + "loss": -0.17474070191383362, + "sps": 545.5720531350578 }, { "update": 2070, "global_step": 8478720, - "num_episodes": 3038, - "mean_reward": 78.37423317909241, - "mean_length": 1807.15, - "loss": 34.15611267089844, - "sps": 219.78900023670354 + "num_episodes": 673, + "mean_reward": 2013.7483372473716, + "mean_length": 28111.01, + "survival_pct": 0.351387625, + "max_steps": 80000, + "loss": 12.530708312988281, + "sps": 499.4637464037537 }, { "update": 2075, "global_step": 8499200, - "num_episodes": 3052, - "mean_reward": 75.33004099369049, - "mean_length": 1865.63, - "loss": 26.766494750976562, - "sps": 339.7451627672537 + "num_episodes": 673, + "mean_reward": 2013.7483372473716, + "mean_length": 28111.01, + "survival_pct": 0.351387625, + "max_steps": 80000, + "loss": 0.8680031299591064, + "sps": 530.5638999597164 }, { "update": 2080, "global_step": 8519680, - "num_episodes": 3056, - "mean_reward": 75.64053434848785, - "mean_length": 1968.04, - "loss": 176.32748413085938, - "sps": 334.0194819221175 + "num_episodes": 673, + "mean_reward": 2013.7483372473716, + "mean_length": 28111.01, + "survival_pct": 0.351387625, + "max_steps": 80000, + "loss": 19.798368453979492, + "sps": 656.9070684694709 }, { "update": 2085, "global_step": 8540160, - "num_episodes": 3066, - "mean_reward": 74.47905586719513, - "mean_length": 1544.14, - "loss": 167.32608032226562, - "sps": 393.50550599249544 + "num_episodes": 673, + "mean_reward": 2013.7483372473716, + "mean_length": 28111.01, + "survival_pct": 0.351387625, + "max_steps": 80000, + "loss": -0.19701889157295227, + "sps": 698.9096675564882 }, { "update": 2090, "global_step": 8560640, - "num_episodes": 3072, - "mean_reward": 75.01324489593506, - "mean_length": 1620.95, - "loss": 40.1760139465332, - "sps": 272.5369799883836 + "num_episodes": 673, + "mean_reward": 2013.7483372473716, + "mean_length": 28111.01, + "survival_pct": 0.351387625, + "max_steps": 80000, + "loss": -0.0173691064119339, + "sps": 728.1424091253976 }, { "update": 2095, "global_step": 8581120, - "num_episodes": 3084, - "mean_reward": 80.59875289440156, - "mean_length": 1815.35, - "loss": 23.691143035888672, - "sps": 460.4555876617225 + "num_episodes": 674, + "mean_reward": 1971.775068204403, + "mean_length": 28511.01, + "survival_pct": 0.356387625, + "max_steps": 80000, + "loss": 2452.706787109375, + "sps": 356.8059399544614 }, { "update": 2100, "global_step": 8601600, - "num_episodes": 3092, - "mean_reward": 151.1377474308014, - "mean_length": 2023.52, - "loss": 119.31157684326172, - "sps": 230.98807371797147 + "num_episodes": 674, + "mean_reward": 1971.775068204403, + "mean_length": 28511.01, + "survival_pct": 0.356387625, + "max_steps": 80000, + "loss": 14.414767265319824, + "sps": 585.3881767861222 }, { "update": 2105, "global_step": 8622080, - "num_episodes": 3104, - "mean_reward": 145.0986036682129, - "mean_length": 1964.44, - "loss": 17.907983779907227, - "sps": 136.7176138415176 + "num_episodes": 674, + "mean_reward": 1971.775068204403, + "mean_length": 28511.01, + "survival_pct": 0.356387625, + "max_steps": 80000, + "loss": 0.022661790251731873, + "sps": 678.3736040372619 }, { "update": 2110, "global_step": 8642560, - "num_episodes": 3113, - "mean_reward": 145.05979912757874, - "mean_length": 2015.23, - "loss": 238.5266876220703, - "sps": 191.61578614248398 + "num_episodes": 674, + "mean_reward": 1971.775068204403, + "mean_length": 28511.01, + "survival_pct": 0.356387625, + "max_steps": 80000, + "loss": -0.20638275146484375, + "sps": 665.6932482303778 }, { "update": 2115, "global_step": 8663040, - "num_episodes": 3127, - "mean_reward": 145.9841158914566, - "mean_length": 2223.16, - "loss": 279.5526428222656, - "sps": 178.22145258830818 + "num_episodes": 674, + "mean_reward": 1971.775068204403, + "mean_length": 28511.01, + "survival_pct": 0.356387625, + "max_steps": 80000, + "loss": -0.1961488574743271, + "sps": 661.1262168442004 }, { "update": 2120, "global_step": 8683520, - "num_episodes": 3129, - "mean_reward": 161.8294042444229, - "mean_length": 2317.37, - "loss": 4402.13232421875, - "sps": 187.96868067815723 + "num_episodes": 674, + "mean_reward": 1971.775068204403, + "mean_length": 28511.01, + "survival_pct": 0.356387625, + "max_steps": 80000, + "loss": -0.16852501034736633, + "sps": 666.07765454515 }, { "update": 2125, "global_step": 8704000, - "num_episodes": 3131, - "mean_reward": 254.4432011270523, - "mean_length": 2415.71, - "loss": 253.15243530273438, - "sps": 389.3006404112279 + "num_episodes": 675, + "mean_reward": 1988.447893064022, + "mean_length": 29306.48, + "survival_pct": 0.366331, + "max_steps": 80000, + "loss": 0.14007116854190826, + "sps": 633.2168784322698 }, { "update": 2130, "global_step": 8724480, - "num_episodes": 3136, - "mean_reward": 308.86108244895934, - "mean_length": 2455.38, - "loss": 42.87656784057617, - "sps": 289.06916804822475 + "num_episodes": 675, + "mean_reward": 1988.447893064022, + "mean_length": 29306.48, + "survival_pct": 0.366331, + "max_steps": 80000, + "loss": 0.08940272033214569, + "sps": 621.5424559601437 }, { "update": 2135, "global_step": 8744960, - "num_episodes": 3151, - "mean_reward": 442.90291794776914, - "mean_length": 2544.55, - "loss": 52.280975341796875, - "sps": 168.18531939110147 + "num_episodes": 675, + "mean_reward": 1988.447893064022, + "mean_length": 29306.48, + "survival_pct": 0.366331, + "max_steps": 80000, + "loss": -0.16373726725578308, + "sps": 689.1369758818673 }, { "update": 2140, "global_step": 8765440, - "num_episodes": 3153, - "mean_reward": 443.3635806703567, - "mean_length": 2596.27, - "loss": 123.76752471923828, - "sps": 250.64368624913917 + "num_episodes": 678, + "mean_reward": 2002.1588407492638, + "mean_length": 29307.47, + "survival_pct": 0.366343375, + "max_steps": 80000, + "loss": 9.728021621704102, + "sps": 466.25151106127714 }, { "update": 2145, "global_step": 8785920, - "num_episodes": 3154, - "mean_reward": 443.81920017719267, - "mean_length": 2693.35, - "loss": 8.35338020324707, - "sps": 403.9011407049708 + "num_episodes": 681, + "mean_reward": 1996.5775413012504, + "mean_length": 28885.87, + "survival_pct": 0.361073375, + "max_steps": 80000, + "loss": 0.4610579311847687, + "sps": 551.8281626137556 }, { "update": 2150, "global_step": 8806400, - "num_episodes": 3158, - "mean_reward": 578.3950145959855, - "mean_length": 2690.97, - "loss": 117.6234359741211, - "sps": 427.3976727795614 + "num_episodes": 681, + "mean_reward": 1996.5775413012504, + "mean_length": 28885.87, + "survival_pct": 0.361073375, + "max_steps": 80000, + "loss": 0.20792128145694733, + "sps": 708.9972554003779 }, { "update": 2155, "global_step": 8826880, - "num_episodes": 3168, - "mean_reward": 593.7985137224198, - "mean_length": 2964.72, - "loss": 27.907575607299805, - "sps": 408.92434877647025 + "num_episodes": 681, + "mean_reward": 1996.5775413012504, + "mean_length": 28885.87, + "survival_pct": 0.361073375, + "max_steps": 80000, + "loss": -0.21552087366580963, + "sps": 688.7493020847775 }, { "update": 2160, "global_step": 8847360, - "num_episodes": 3176, - "mean_reward": 696.11108481884, - "mean_length": 2789.14, - "loss": 37.44743347167969, - "sps": 405.43106143344477 + "num_episodes": 681, + "mean_reward": 1996.5775413012504, + "mean_length": 28885.87, + "survival_pct": 0.361073375, + "max_steps": 80000, + "loss": -0.16776643693447113, + "sps": 693.0826430290605 }, { "update": 2165, "global_step": 8867840, - "num_episodes": 3181, - "mean_reward": 691.3086440181733, - "mean_length": 2841.13, - "loss": 444.12481689453125, - "sps": 410.99575565900966 + "num_episodes": 681, + "mean_reward": 1996.5775413012504, + "mean_length": 28885.87, + "survival_pct": 0.361073375, + "max_steps": 80000, + "loss": 0.11363191902637482, + "sps": 709.5181085639529 }, { "update": 2170, "global_step": 8888320, - "num_episodes": 3186, - "mean_reward": 696.4550316858291, - "mean_length": 2910.25, - "loss": 221.63902282714844, - "sps": 355.0140974319845 + "num_episodes": 681, + "mean_reward": 1996.5775413012504, + "mean_length": 28885.87, + "survival_pct": 0.361073375, + "max_steps": 80000, + "loss": 0.0609743595123291, + "sps": 712.5366166562799 }, { "update": 2175, "global_step": 8908800, - "num_episodes": 3191, - "mean_reward": 696.0580731773376, - "mean_length": 3004.52, - "loss": 762.4512939453125, - "sps": 303.24516246589195 + "num_episodes": 681, + "mean_reward": 1996.5775413012504, + "mean_length": 28885.87, + "survival_pct": 0.361073375, + "max_steps": 80000, + "loss": 0.7529717087745667, + "sps": 721.4643529853796 }, { "update": 2180, "global_step": 8929280, - "num_episodes": 3197, - "mean_reward": 814.6755250024795, - "mean_length": 3002.62, - "loss": 409.33624267578125, - "sps": 427.0124459168339 + "num_episodes": 681, + "mean_reward": 1996.5775413012504, + "mean_length": 28885.87, + "survival_pct": 0.361073375, + "max_steps": 80000, + "loss": 0.46089768409729004, + "sps": 729.282506794838 }, { "update": 2185, "global_step": 8949760, - "num_episodes": 3210, - "mean_reward": 934.5317647647857, - "mean_length": 3211.49, - "loss": 95.55060577392578, - "sps": 125.75423605418091 + "num_episodes": 681, + "mean_reward": 1996.5775413012504, + "mean_length": 28885.87, + "survival_pct": 0.361073375, + "max_steps": 80000, + "loss": -0.04369640350341797, + "sps": 731.0711424448074 }, { "update": 2190, "global_step": 8970240, - "num_episodes": 3214, - "mean_reward": 1002.574015007019, - "mean_length": 3132.07, - "loss": 68.63960266113281, - "sps": 206.72189870239387 + "num_episodes": 681, + "mean_reward": 1996.5775413012504, + "mean_length": 28885.87, + "survival_pct": 0.361073375, + "max_steps": 80000, + "loss": -0.2550358772277832, + "sps": 741.8354272639722 }, { "update": 2195, "global_step": 8990720, - "num_episodes": 3225, - "mean_reward": 1036.229081878662, - "mean_length": 3290.05, - "loss": 450.1454772949219, - "sps": 308.8153724070321 + "num_episodes": 681, + "mean_reward": 1996.5775413012504, + "mean_length": 28885.87, + "survival_pct": 0.361073375, + "max_steps": 80000, + "loss": -0.1644577533006668, + "sps": 712.1224425521518 }, { "update": 2200, "global_step": 9011200, - "num_episodes": 3226, - "mean_reward": 1175.8488982009887, - "mean_length": 3386.93, - "loss": 11.516603469848633, - "sps": 505.34211227316496 + "num_episodes": 682, + "mean_reward": 2004.891281106472, + "mean_length": 29684.96, + "survival_pct": 0.371062, + "max_steps": 80000, + "loss": 19.648794174194336, + "sps": 465.00511290277484 }, { "update": 2205, "global_step": 9031680, - "num_episodes": 3228, - "mean_reward": 1241.2947177028657, - "mean_length": 3452.12, - "loss": 998.0031127929688, - "sps": 380.7435127282772 + "num_episodes": 686, + "mean_reward": 2088.85676854372, + "mean_length": 30082.53, + "survival_pct": 0.376031625, + "max_steps": 80000, + "loss": 164.72410583496094, + "sps": 526.5227813400744 }, { "update": 2210, "global_step": 9052160, - "num_episodes": 3234, - "mean_reward": 1148.464052476883, - "mean_length": 3444.2, - "loss": 0.2201317399740219, - "sps": 447.01108197826755 + "num_episodes": 687, + "mean_reward": 2035.9988553357125, + "mean_length": 30482.53, + "survival_pct": 0.381031625, + "max_steps": 80000, + "loss": 0.7927183508872986, + "sps": 546.9580142534905 }, { "update": 2215, "global_step": 9072640, - "num_episodes": 3251, - "mean_reward": 983.5756113386154, - "mean_length": 3185.62, - "loss": 10.140728950500488, - "sps": 285.88922550188585 + "num_episodes": 687, + "mean_reward": 2035.9988553357125, + "mean_length": 30482.53, + "survival_pct": 0.381031625, + "max_steps": 80000, + "loss": 0.38622233271598816, + "sps": 654.3004159066745 }, { "update": 2220, "global_step": 9093120, - "num_episodes": 3267, - "mean_reward": 830.9440940666199, - "mean_length": 2711.54, - "loss": 23.857481002807617, - "sps": 225.0139483384211 + "num_episodes": 688, + "mean_reward": 2065.251064198017, + "mean_length": 30882.53, + "survival_pct": 0.386031625, + "max_steps": 80000, + "loss": 0.734276533126831, + "sps": 253.13496237874534 }, { "update": 2225, "global_step": 9113600, - "num_episodes": 3285, - "mean_reward": 755.7127734375, - "mean_length": 2369.97, - "loss": 263.3361511230469, - "sps": 274.6876121563017 + "num_episodes": 689, + "mean_reward": 2089.462522289753, + "mean_length": 31681.58, + "survival_pct": 0.39601975, + "max_steps": 80000, + "loss": 5.1840643882751465, + "sps": 407.23576672305194 }, { "update": 2230, "global_step": 9134080, - "num_episodes": 3291, - "mean_reward": 754.6721585416794, - "mean_length": 2306.49, - "loss": 4289.046875, - "sps": 174.4840112420559 + "num_episodes": 689, + "mean_reward": 2089.462522289753, + "mean_length": 31681.58, + "survival_pct": 0.39601975, + "max_steps": 80000, + "loss": 3.3036017417907715, + "sps": 334.98140076678567 }, { "update": 2235, "global_step": 9154560, - "num_episodes": 3298, - "mean_reward": 730.0353905773163, - "mean_length": 2354.08, - "loss": 84.65528869628906, - "sps": 265.5093767251286 + "num_episodes": 689, + "mean_reward": 2089.462522289753, + "mean_length": 31681.58, + "survival_pct": 0.39601975, + "max_steps": 80000, + "loss": 12.958847045898438, + "sps": 413.60512125042015 }, { "update": 2240, "global_step": 9175040, - "num_episodes": 3300, - "mean_reward": 660.2916961956024, - "mean_length": 2256.21, - "loss": 388.5572814941406, - "sps": 407.65937488684284 + "num_episodes": 689, + "mean_reward": 2089.462522289753, + "mean_length": 31681.58, + "survival_pct": 0.39601975, + "max_steps": 80000, + "loss": 11.410442352294922, + "sps": 429.40010384163304 }, { "update": 2245, "global_step": 9195520, - "num_episodes": 3309, - "mean_reward": 687.2124669837951, - "mean_length": 2477.43, - "loss": 172.86532592773438, - "sps": 298.38812190862865 + "num_episodes": 689, + "mean_reward": 2089.462522289753, + "mean_length": 31681.58, + "survival_pct": 0.39601975, + "max_steps": 80000, + "loss": 0.4679904580116272, + "sps": 442.22049673117283 }, { "update": 2250, "global_step": 9216000, - "num_episodes": 3310, - "mean_reward": 687.4369230651855, - "mean_length": 2481.91, - "loss": 14.442253112792969, - "sps": 667.1891280431901 + "num_episodes": 690, + "mean_reward": 2216.5914781308174, + "mean_length": 32423.39, + "survival_pct": 0.405292375, + "max_steps": 80000, + "loss": 13.81257152557373, + "sps": 503.0567425680048 }, { "update": 2255, "global_step": 9236480, - "num_episodes": 3320, - "mean_reward": 686.6187604236603, - "mean_length": 2631.57, - "loss": 8.769046783447266, - "sps": 248.0906360841076 + "num_episodes": 690, + "mean_reward": 2216.5914781308174, + "mean_length": 32423.39, + "survival_pct": 0.405292375, + "max_steps": 80000, + "loss": 0.347695916891098, + "sps": 582.6999735714041 }, { "update": 2260, "global_step": 9256960, - "num_episodes": 3324, - "mean_reward": 714.0761754512787, - "mean_length": 2596.79, - "loss": 2.7038824558258057, - "sps": 611.7833643654915 + "num_episodes": 690, + "mean_reward": 2216.5914781308174, + "mean_length": 32423.39, + "survival_pct": 0.405292375, + "max_steps": 80000, + "loss": 0.276647686958313, + "sps": 530.4694388918329 }, { "update": 2265, "global_step": 9277440, - "num_episodes": 3333, - "mean_reward": 514.9936645078659, - "mean_length": 2480.39, - "loss": 107.10433959960938, - "sps": 352.06042337330143 + "num_episodes": 690, + "mean_reward": 2216.5914781308174, + "mean_length": 32423.39, + "survival_pct": 0.405292375, + "max_steps": 80000, + "loss": 0.12728533148765564, + "sps": 524.460969045791 }, { "update": 2270, "global_step": 9297920, - "num_episodes": 3333, - "mean_reward": 514.9936645078659, - "mean_length": 2480.39, - "loss": 205.3878936767578, - "sps": 557.4872251219026 + "num_episodes": 690, + "mean_reward": 2216.5914781308174, + "mean_length": 32423.39, + "survival_pct": 0.405292375, + "max_steps": 80000, + "loss": 0.27348071336746216, + "sps": 540.9874205296653 }, { "update": 2275, "global_step": 9318400, - "num_episodes": 3341, - "mean_reward": 516.5512644720078, - "mean_length": 2668.78, - "loss": 10.443692207336426, - "sps": 381.5541543957095 + "num_episodes": 690, + "mean_reward": 2216.5914781308174, + "mean_length": 32423.39, + "survival_pct": 0.405292375, + "max_steps": 80000, + "loss": 0.07365398108959198, + "sps": 560.7709890786243 }, { "update": 2280, "global_step": 9338880, - "num_episodes": 3349, - "mean_reward": 462.85379876613615, - "mean_length": 2668.78, - "loss": 442.84942626953125, - "sps": 279.7896158892528 + "num_episodes": 692, + "mean_reward": 2220.5268848729133, + "mean_length": 32823.83, + "survival_pct": 0.41029787500000003, + "max_steps": 80000, + "loss": 0.7585805654525757, + "sps": 369.35670437401643 }, { "update": 2285, "global_step": 9359360, - "num_episodes": 3360, - "mean_reward": 507.16173345565795, - "mean_length": 2757.46, - "loss": 65.15730285644531, - "sps": 265.8172803900086 + "num_episodes": 692, + "mean_reward": 2220.5268848729133, + "mean_length": 32823.83, + "survival_pct": 0.41029787500000003, + "max_steps": 80000, + "loss": 84.29670715332031, + "sps": 372.3955428292709 }, { "update": 2290, "global_step": 9379840, - "num_episodes": 3363, - "mean_reward": 507.4162758731842, - "mean_length": 2818.63, - "loss": 70.9657211303711, - "sps": 353.8588588797036 + "num_episodes": 692, + "mean_reward": 2220.5268848729133, + "mean_length": 32823.83, + "survival_pct": 0.41029787500000003, + "max_steps": 80000, + "loss": 0.22566762566566467, + "sps": 391.1882509653406 }, { "update": 2295, "global_step": 9400320, - "num_episodes": 3368, - "mean_reward": 551.9985440444947, - "mean_length": 2959.32, - "loss": 5.74236536026001, - "sps": 340.8187159543217 + "num_episodes": 692, + "mean_reward": 2220.5268848729133, + "mean_length": 32823.83, + "survival_pct": 0.41029787500000003, + "max_steps": 80000, + "loss": 0.11116530001163483, + "sps": 404.8368327596648 }, { "update": 2300, "global_step": 9420800, - "num_episodes": 3377, - "mean_reward": 564.7782349967956, - "mean_length": 3130.89, - "loss": 5.68098783493042, - "sps": 383.2875448626093 + "num_episodes": 693, + "mean_reward": 2245.6658289647103, + "mean_length": 33605.52, + "survival_pct": 0.42006899999999997, + "max_steps": 80000, + "loss": 14.594441413879395, + "sps": 319.4636076671403 }, { "update": 2305, "global_step": 9441280, - "num_episodes": 3390, - "mean_reward": 594.4548657274246, - "mean_length": 3136.03, - "loss": 178.03961181640625, - "sps": 278.82986749605175 + "num_episodes": 693, + "mean_reward": 2245.6658289647103, + "mean_length": 33605.52, + "survival_pct": 0.42006899999999997, + "max_steps": 80000, + "loss": 4.601461410522461, + "sps": 358.98227257652104 }, { "update": 2310, "global_step": 9461760, - "num_episodes": 3391, - "mean_reward": 595.1103666639328, - "mean_length": 3234.65, - "loss": 73.16796112060547, - "sps": 386.1064879014476 + "num_episodes": 693, + "mean_reward": 2245.6658289647103, + "mean_length": 33605.52, + "survival_pct": 0.42006899999999997, + "max_steps": 80000, + "loss": -0.08196337521076202, + "sps": 397.59174172984365 }, { "update": 2315, "global_step": 9482240, - "num_episodes": 3397, - "mean_reward": 560.5055163288116, - "mean_length": 3213.23, - "loss": 1437.6278076171875, - "sps": 198.45749967938127 + "num_episodes": 693, + "mean_reward": 2245.6658289647103, + "mean_length": 33605.52, + "survival_pct": 0.42006899999999997, + "max_steps": 80000, + "loss": -0.038676969707012177, + "sps": 390.256940583996 }, { "update": 2320, "global_step": 9502720, - "num_episodes": 3411, - "mean_reward": 401.03073542118074, - "mean_length": 2986.15, - "loss": 3.3738672733306885, - "sps": 1083.0313220969133 + "num_episodes": 693, + "mean_reward": 2245.6658289647103, + "mean_length": 33605.52, + "survival_pct": 0.42006899999999997, + "max_steps": 80000, + "loss": -0.11568231880664825, + "sps": 386.22805447511666 }, { "update": 2325, "global_step": 9523200, - "num_episodes": 3412, - "mean_reward": 413.53732123851773, - "mean_length": 3083.93, - "loss": 21.815982818603516, - "sps": 659.8576311368523 + "num_episodes": 693, + "mean_reward": 2245.6658289647103, + "mean_length": 33605.52, + "survival_pct": 0.42006899999999997, + "max_steps": 80000, + "loss": -0.09200012683868408, + "sps": 393.62586076527043 }, { "update": 2330, "global_step": 9543680, - "num_episodes": 3413, - "mean_reward": 407.9750119304657, - "mean_length": 3002.15, - "loss": 0.47333452105522156, - "sps": 726.1803705441753 + "num_episodes": 693, + "mean_reward": 2245.6658289647103, + "mean_length": 33605.52, + "survival_pct": 0.42006899999999997, + "max_steps": 80000, + "loss": -0.14359304308891296, + "sps": 390.66784252973497 }, { "update": 2335, "global_step": 9564160, - "num_episodes": 3420, - "mean_reward": 454.97729597568514, - "mean_length": 3001.83, - "loss": 0.5463234186172485, - "sps": 582.7651222934119 + "num_episodes": 693, + "mean_reward": 2245.6658289647103, + "mean_length": 33605.52, + "survival_pct": 0.42006899999999997, + "max_steps": 80000, + "loss": 0.18879307806491852, + "sps": 395.8483711314571 }, { "update": 2340, "global_step": 9584640, - "num_episodes": 3454, - "mean_reward": 401.5923653173447, - "mean_length": 2474.53, - "loss": 0.9732112884521484, - "sps": 190.63133488811127 + "num_episodes": 693, + "mean_reward": 2245.6658289647103, + "mean_length": 33605.52, + "survival_pct": 0.42006899999999997, + "max_steps": 80000, + "loss": -0.1260424256324768, + "sps": 392.3815903936216 }, { "update": 2345, "global_step": 9605120, - "num_episodes": 3475, - "mean_reward": 180.83635659217833, - "mean_length": 1855.64, - "loss": 0.047050729393959045, - "sps": 497.0424644143239 + "num_episodes": 693, + "mean_reward": 2245.6658289647103, + "mean_length": 33605.52, + "survival_pct": 0.42006899999999997, + "max_steps": 80000, + "loss": -0.09693819284439087, + "sps": 394.7211181932461 }, { "update": 2350, "global_step": 9625600, - "num_episodes": 3482, - "mean_reward": 181.7909957742691, - "mean_length": 1925.71, - "loss": 1.1374897956848145, - "sps": 853.7958489004412 + "num_episodes": 693, + "mean_reward": 2245.6658289647103, + "mean_length": 33605.52, + "survival_pct": 0.42006899999999997, + "max_steps": 80000, + "loss": -0.11026182770729065, + "sps": 396.72162235901294 }, { "update": 2355, "global_step": 9646080, - "num_episodes": 3489, - "mean_reward": 158.73119886875153, - "mean_length": 1914.03, - "loss": 1.258785605430603, - "sps": 551.9084865795908 + "num_episodes": 693, + "mean_reward": 2245.6658289647103, + "mean_length": 33605.52, + "survival_pct": 0.42006899999999997, + "max_steps": 80000, + "loss": -0.1978517770767212, + "sps": 389.2953386649375 }, { "update": 2360, "global_step": 9666560, - "num_episodes": 3502, - "mean_reward": 98.6479220199585, - "mean_length": 1635.16, - "loss": 0.43499165773391724, - "sps": 419.3776501549414 + "num_episodes": 697, + "mean_reward": 2322.5986268925667, + "mean_length": 34769.99, + "survival_pct": 0.43462487499999997, + "max_steps": 80000, + "loss": 683.0784912109375, + "sps": 168.6861821242579 }, { "update": 2365, "global_step": 9687040, - "num_episodes": 3515, - "mean_reward": 66.50108590602875, - "mean_length": 1518.13, - "loss": 166.75491333007812, - "sps": 281.0402931910567 + "num_episodes": 698, + "mean_reward": 2311.5085823369027, + "mean_length": 35169.99, + "survival_pct": 0.43962487499999997, + "max_steps": 80000, + "loss": 3170.56884765625, + "sps": 235.4733420148456 }, { "update": 2370, "global_step": 9707520, - "num_episodes": 3528, - "mean_reward": 33.59190697193146, - "mean_length": 1331.87, - "loss": 274.23516845703125, - "sps": 287.97774743130026 + "num_episodes": 699, + "mean_reward": 2308.040635712147, + "mean_length": 34804.58, + "survival_pct": 0.43505725, + "max_steps": 80000, + "loss": 1.0950156450271606, + "sps": 311.37399850720584 }, { "update": 2375, "global_step": 9728000, - "num_episodes": 3538, - "mean_reward": 23.649227323532106, - "mean_length": 1328.19, - "loss": 24.9585018157959, - "sps": 656.234816203577 + "num_episodes": 700, + "mean_reward": 2335.846759979725, + "mean_length": 35204.58, + "survival_pct": 0.44005725, + "max_steps": 80000, + "loss": 1.3819221258163452, + "sps": 322.7681723058591 }, { "update": 2380, "global_step": 9748480, - "num_episodes": 3539, - "mean_reward": 24.15320231437683, - "mean_length": 1427.05, - "loss": 13.851099967956543, - "sps": 840.3197888208443 + "num_episodes": 700, + "mean_reward": 2335.846759979725, + "mean_length": 35204.58, + "survival_pct": 0.44005725, + "max_steps": 80000, + "loss": 0.11061151325702667, + "sps": 371.7560051115758 }, { "update": 2385, "global_step": 9768960, - "num_episodes": 3553, - "mean_reward": 25.30362526893616, - "mean_length": 1552.4, - "loss": -0.07959967106580734, - "sps": 348.5790453337399 + "num_episodes": 701, + "mean_reward": 2415.715403892994, + "mean_length": 36004.33, + "survival_pct": 0.450054125, + "max_steps": 80000, + "loss": 0.04716165363788605, + "sps": 404.5228919670066 }, { "update": 2390, "global_step": 9789440, - "num_episodes": 3575, - "mean_reward": 43.762970843315124, - "mean_length": 1834.94, - "loss": 256.5601806640625, - "sps": 314.0190386184578 + "num_episodes": 701, + "mean_reward": 2415.715403892994, + "mean_length": 36004.33, + "survival_pct": 0.450054125, + "max_steps": 80000, + "loss": 0.673524022102356, + "sps": 438.07322870511223 }, { "update": 2395, "global_step": 9809920, - "num_episodes": 3584, - "mean_reward": 41.22667598724365, - "mean_length": 1832.99, - "loss": 37.159915924072266, - "sps": 261.015177859497 + "num_episodes": 701, + "mean_reward": 2415.715403892994, + "mean_length": 36004.33, + "survival_pct": 0.450054125, + "max_steps": 80000, + "loss": 0.04936300218105316, + "sps": 447.99792386063456 }, { "update": 2400, "global_step": 9830400, - "num_episodes": 3593, - "mean_reward": 56.500927686691284, - "mean_length": 1809.72, - "loss": 949.2464599609375, - "sps": 236.54852291533763 + "num_episodes": 701, + "mean_reward": 2415.715403892994, + "mean_length": 36004.33, + "survival_pct": 0.450054125, + "max_steps": 80000, + "loss": -0.05190306156873703, + "sps": 451.9291955717441 }, { "update": 2405, "global_step": 9850880, - "num_episodes": 3599, - "mean_reward": 105.25954935073852, - "mean_length": 1813.21, - "loss": 1035.45751953125, - "sps": 654.1456293848414 + "num_episodes": 701, + "mean_reward": 2415.715403892994, + "mean_length": 36004.33, + "survival_pct": 0.450054125, + "max_steps": 80000, + "loss": -0.01377500593662262, + "sps": 456.3292063646881 }, { "update": 2410, "global_step": 9871360, - "num_episodes": 3608, - "mean_reward": 174.8251446390152, - "mean_length": 1969.27, - "loss": 12.137419700622559, - "sps": 374.5700328217855 + "num_episodes": 705, + "mean_reward": 2415.0658296895026, + "mean_length": 35966.34, + "survival_pct": 0.44957924999999993, + "max_steps": 80000, + "loss": 7.559074401855469, + "sps": 317.32403267612773 }, { "update": 2415, "global_step": 9891840, - "num_episodes": 3613, - "mean_reward": 174.6033950281143, - "mean_length": 2093.28, - "loss": 470.9022216796875, - "sps": 829.3315456709553 + "num_episodes": 705, + "mean_reward": 2415.0658296895026, + "mean_length": 35966.34, + "survival_pct": 0.44957924999999993, + "max_steps": 80000, + "loss": 1.5048213005065918, + "sps": 332.4634487520527 }, { "update": 2420, "global_step": 9912320, - "num_episodes": 3614, - "mean_reward": 175.19530655384062, - "mean_length": 2188.0, - "loss": 7.125182628631592, - "sps": 1392.3792980312514 + "num_episodes": 705, + "mean_reward": 2415.0658296895026, + "mean_length": 35966.34, + "survival_pct": 0.44957924999999993, + "max_steps": 80000, + "loss": 0.2138240784406662, + "sps": 432.40076557163655 }, { "update": 2425, "global_step": 9932800, - "num_episodes": 3616, - "mean_reward": 175.76038108348845, - "mean_length": 2285.88, - "loss": 11.183570861816406, - "sps": 303.2125684013846 + "num_episodes": 705, + "mean_reward": 2415.0658296895026, + "mean_length": 35966.34, + "survival_pct": 0.44957924999999993, + "max_steps": 80000, + "loss": 0.21876683831214905, + "sps": 448.3816453184398 }, { "update": 2430, "global_step": 9953280, - "num_episodes": 3625, - "mean_reward": 245.62015884399415, - "mean_length": 2481.25, - "loss": 72.24958038330078, - "sps": 223.82439359999108 + "num_episodes": 705, + "mean_reward": 2415.0658296895026, + "mean_length": 35966.34, + "survival_pct": 0.44957924999999993, + "max_steps": 80000, + "loss": -0.08095875382423401, + "sps": 427.35281803394867 }, { "update": 2435, "global_step": 9973760, - "num_episodes": 3640, - "mean_reward": 12.841221110026042, - "mean_length": 242.8, - "loss": 39.742530822753906, - "sps": 444.8952550886448 + "num_episodes": 706, + "mean_reward": 2496.027885582447, + "mean_length": 36761.21, + "survival_pct": 0.45951512499999997, + "max_steps": 80000, + "loss": 1.3263893127441406, + "sps": 489.32343069605076 }, { "update": 2440, "global_step": 9994240, - "num_episodes": 3640, - "mean_reward": 12.841221110026042, - "mean_length": 242.8, - "loss": 0.29037195444107056, - "sps": 1069.6427524147307 + "num_episodes": 706, + "mean_reward": 2496.027885582447, + "mean_length": 36761.21, + "survival_pct": 0.45951512499999997, + "max_steps": 80000, + "loss": 0.8851792812347412, + "sps": 507.1439259148593 }, { "update": 2445, "global_step": 10014720, - "num_episodes": 3640, - "mean_reward": 12.841221110026042, - "mean_length": 242.8, - "loss": 3.3830156326293945, - "sps": 1318.7931203919134 + "num_episodes": 706, + "mean_reward": 2496.027885582447, + "mean_length": 36761.21, + "survival_pct": 0.45951512499999997, + "max_steps": 80000, + "loss": -0.022784769535064697, + "sps": 514.2103065530886 }, { "update": 2450, "global_step": 10035200, - "num_episodes": 3644, - "mean_reward": 91.08290822882401, - "mean_length": 2296.9473684210525, - "loss": 20.516225814819336, - "sps": 481.54488129169596 + "num_episodes": 706, + "mean_reward": 2496.027885582447, + "mean_length": 36761.21, + "survival_pct": 0.45951512499999997, + "max_steps": 80000, + "loss": 0.03437136113643646, + "sps": 498.98534084342003 }, { "update": 2455, "global_step": 10055680, - "num_episodes": 3658, - "mean_reward": 124.92099246111783, - "mean_length": 2621.4848484848485, - "loss": 150.3678436279297, - "sps": 253.02333051932328 + "num_episodes": 708, + "mean_reward": 2566.8018279957773, + "mean_length": 36664.09, + "survival_pct": 0.458301125, + "max_steps": 80000, + "loss": 42.47873306274414, + "sps": 329.38816457132594 }, { "update": 2460, "global_step": 10076160, - "num_episodes": 3665, - "mean_reward": 104.92156012058258, - "mean_length": 2402.575, - "loss": 43.09859085083008, - "sps": 444.9419436729771 + "num_episodes": 708, + "mean_reward": 2566.8018279957773, + "mean_length": 36664.09, + "survival_pct": 0.458301125, + "max_steps": 80000, + "loss": -0.1173701360821724, + "sps": 513.6460372429699 }, { "update": 2465, "global_step": 10096640, - "num_episodes": 3668, - "mean_reward": 99.1124037143796, - "mean_length": 2358.6744186046512, - "loss": 30.914440155029297, - "sps": 564.239378309882 + "num_episodes": 708, + "mean_reward": 2566.8018279957773, + "mean_length": 36664.09, + "survival_pct": 0.458301125, + "max_steps": 80000, + "loss": -0.008197635412216187, + "sps": 529.8279586174691 }, { "update": 2470, "global_step": 10117120, - "num_episodes": 3674, - "mean_reward": 212.01101287530392, - "mean_length": 2689.183673469388, - "loss": 519.401611328125, - "sps": 400.7113960553138 + "num_episodes": 708, + "mean_reward": 2566.8018279957773, + "mean_length": 36664.09, + "survival_pct": 0.458301125, + "max_steps": 80000, + "loss": -0.09317698329687119, + "sps": 533.5564592749838 }, { "update": 2475, "global_step": 10137600, - "num_episodes": 3685, - "mean_reward": 246.73736356099445, - "mean_length": 2406.016666666667, - "loss": 18.674598693847656, - "sps": 645.3387661404473 + "num_episodes": 708, + "mean_reward": 2566.8018279957773, + "mean_length": 36664.09, + "survival_pct": 0.458301125, + "max_steps": 80000, + "loss": -0.21753042936325073, + "sps": 523.3857154825558 }, { "update": 2480, "global_step": 10158080, - "num_episodes": 3690, - "mean_reward": 254.607934988462, - "mean_length": 2687.8153846153846, - "loss": 227.44029235839844, - "sps": 409.4558222523011 + "num_episodes": 708, + "mean_reward": 2566.8018279957773, + "mean_length": 36664.09, + "survival_pct": 0.458301125, + "max_steps": 80000, + "loss": -0.038853421807289124, + "sps": 532.2887541618683 + }, + { + "update": 2485, + "global_step": 10178560, + "num_episodes": 708, + "mean_reward": 2566.8018279957773, + "mean_length": 36664.09, + "survival_pct": 0.458301125, + "max_steps": 80000, + "loss": -0.13897421956062317, + "sps": 529.4232504246512 + }, + { + "update": 2490, + "global_step": 10199040, + "num_episodes": 708, + "mean_reward": 2566.8018279957773, + "mean_length": 36664.09, + "survival_pct": 0.458301125, + "max_steps": 80000, + "loss": -0.09001248329877853, + "sps": 527.0291369571413 + }, + { + "update": 2495, + "global_step": 10219520, + "num_episodes": 708, + "mean_reward": 2566.8018279957773, + "mean_length": 36664.09, + "survival_pct": 0.458301125, + "max_steps": 80000, + "loss": -0.07257804274559021, + "sps": 502.41772632138844 + }, + { + "update": 2500, + "global_step": 10240000, + "num_episodes": 708, + "mean_reward": 2566.8018279957773, + "mean_length": 36664.09, + "survival_pct": 0.458301125, + "max_steps": 80000, + "loss": -0.09625621140003204, + "sps": 503.481571804067 + }, + { + "update": 2505, + "global_step": 10260480, + "num_episodes": 708, + "mean_reward": 2566.8018279957773, + "mean_length": 36664.09, + "survival_pct": 0.458301125, + "max_steps": 80000, + "loss": 0.5746378898620605, + "sps": 511.2198490027179 + }, + { + "update": 2510, + "global_step": 10280960, + "num_episodes": 708, + "mean_reward": 2566.8018279957773, + "mean_length": 36664.09, + "survival_pct": 0.458301125, + "max_steps": 80000, + "loss": 0.2466697096824646, + "sps": 502.732132447925 + }, + { + "update": 2515, + "global_step": 10301440, + "num_episodes": 711, + "mean_reward": 2659.348615782261, + "mean_length": 36969.03, + "survival_pct": 0.462112875, + "max_steps": 80000, + "loss": 18.992151260375977, + "sps": 423.86791208132917 + }, + { + "update": 2520, + "global_step": 10321920, + "num_episodes": 711, + "mean_reward": 2659.348615782261, + "mean_length": 36969.03, + "survival_pct": 0.462112875, + "max_steps": 80000, + "loss": 1.12687087059021, + "sps": 561.3134646893078 + }, + { + "update": 2525, + "global_step": 10342400, + "num_episodes": 713, + "mean_reward": 2761.6309762120245, + "mean_length": 38056.0, + "survival_pct": 0.4757, + "max_steps": 80000, + "loss": 2.7139060497283936, + "sps": 570.6222365340182 + }, + { + "update": 2530, + "global_step": 10362880, + "num_episodes": 713, + "mean_reward": 2761.6309762120245, + "mean_length": 38056.0, + "survival_pct": 0.4757, + "max_steps": 80000, + "loss": 0.7590304613113403, + "sps": 590.034373507516 + }, + { + "update": 2535, + "global_step": 10383360, + "num_episodes": 720, + "mean_reward": 2516.4986815714838, + "mean_length": 36373.73, + "survival_pct": 0.454671625, + "max_steps": 80000, + "loss": 6.213076591491699, + "sps": 402.7930412963076 + }, + { + "update": 2540, + "global_step": 10403840, + "num_episodes": 723, + "mean_reward": 2544.001566827297, + "mean_length": 36533.04, + "survival_pct": 0.456663, + "max_steps": 80000, + "loss": 0.7334296703338623, + "sps": 460.5953936249251 + }, + { + "update": 2545, + "global_step": 10424320, + "num_episodes": 723, + "mean_reward": 2544.001566827297, + "mean_length": 36533.04, + "survival_pct": 0.456663, + "max_steps": 80000, + "loss": 0.2690082788467407, + "sps": 536.0100630883657 + }, + { + "update": 2550, + "global_step": 10444800, + "num_episodes": 723, + "mean_reward": 2544.001566827297, + "mean_length": 36533.04, + "survival_pct": 0.456663, + "max_steps": 80000, + "loss": -0.026896320283412933, + "sps": 716.9425490948719 + }, + { + "update": 2555, + "global_step": 10465280, + "num_episodes": 723, + "mean_reward": 2544.001566827297, + "mean_length": 36533.04, + "survival_pct": 0.456663, + "max_steps": 80000, + "loss": -0.2345649003982544, + "sps": 709.2426825661537 + }, + { + "update": 2560, + "global_step": 10485760, + "num_episodes": 723, + "mean_reward": 2544.001566827297, + "mean_length": 36533.04, + "survival_pct": 0.456663, + "max_steps": 80000, + "loss": -0.09797754883766174, + "sps": 722.0229903452855 + }, + { + "update": 2565, + "global_step": 10506240, + "num_episodes": 725, + "mean_reward": 2377.9404914259912, + "mean_length": 36339.3, + "survival_pct": 0.45424125000000004, + "max_steps": 80000, + "loss": 8.174196243286133, + "sps": 380.177542139671 + }, + { + "update": 2570, + "global_step": 10526720, + "num_episodes": 729, + "mean_reward": 2195.4616722035407, + "mean_length": 34356.45, + "survival_pct": 0.42945562499999995, + "max_steps": 80000, + "loss": -0.04960069805383682, + "sps": 528.86594743839 + }, + { + "update": 2575, + "global_step": 10547200, + "num_episodes": 729, + "mean_reward": 2195.4616722035407, + "mean_length": 34356.45, + "survival_pct": 0.42945562499999995, + "max_steps": 80000, + "loss": -0.13887368142604828, + "sps": 724.4350769243744 + }, + { + "update": 2580, + "global_step": 10567680, + "num_episodes": 729, + "mean_reward": 2195.4616722035407, + "mean_length": 34356.45, + "survival_pct": 0.42945562499999995, + "max_steps": 80000, + "loss": -0.08070817589759827, + "sps": 710.3014121255162 + }, + { + "update": 2585, + "global_step": 10588160, + "num_episodes": 729, + "mean_reward": 2195.4616722035407, + "mean_length": 34356.45, + "survival_pct": 0.42945562499999995, + "max_steps": 80000, + "loss": 0.2522279620170593, + "sps": 722.3160261788157 + }, + { + "update": 2590, + "global_step": 10608640, + "num_episodes": 729, + "mean_reward": 2195.4616722035407, + "mean_length": 34356.45, + "survival_pct": 0.42945562499999995, + "max_steps": 80000, + "loss": -0.10373173654079437, + "sps": 669.264371724261 + }, + { + "update": 2595, + "global_step": 10629120, + "num_episodes": 737, + "mean_reward": 2092.385828053951, + "mean_length": 34157.73, + "survival_pct": 0.426971625, + "max_steps": 80000, + "loss": -0.1631798893213272, + "sps": 758.9302511283099 + }, + { + "update": 2600, + "global_step": 10649600, + "num_episodes": 737, + "mean_reward": 2092.385828053951, + "mean_length": 34157.73, + "survival_pct": 0.426971625, + "max_steps": 80000, + "loss": -0.20927369594573975, + "sps": 740.8227163674813 + }, + { + "update": 2605, + "global_step": 10670080, + "num_episodes": 737, + "mean_reward": 2092.385828053951, + "mean_length": 34157.73, + "survival_pct": 0.34157730000000003, + "max_steps": 100000, + "loss": -0.19633692502975464, + "sps": 755.8358488017623 + }, + { + "update": 2610, + "global_step": 10690560, + "num_episodes": 737, + "mean_reward": 2092.385828053951, + "mean_length": 34157.73, + "survival_pct": 0.34157730000000003, + "max_steps": 100000, + "loss": -0.177586629986763, + "sps": 741.6510280859512 + }, + { + "update": 2615, + "global_step": 10711040, + "num_episodes": 737, + "mean_reward": 2092.385828053951, + "mean_length": 34157.73, + "survival_pct": 0.34157730000000003, + "max_steps": 100000, + "loss": -0.21765384078025818, + "sps": 727.5310686928377 + }, + { + "update": 2620, + "global_step": 10731520, + "num_episodes": 737, + "mean_reward": 2092.385828053951, + "mean_length": 34157.73, + "survival_pct": 0.34157730000000003, + "max_steps": 100000, + "loss": -0.0905933529138565, + "sps": 741.0285660060694 + }, + { + "update": 2625, + "global_step": 10752000, + "num_episodes": 737, + "mean_reward": 2092.385828053951, + "mean_length": 34157.73, + "survival_pct": 0.34157730000000003, + "max_steps": 100000, + "loss": -0.0920332819223404, + "sps": 730.4494102532749 + }, + { + "update": 2630, + "global_step": 10772480, + "num_episodes": 737, + "mean_reward": 2092.385828053951, + "mean_length": 34157.73, + "survival_pct": 0.34157730000000003, + "max_steps": 100000, + "loss": -0.16122817993164062, + "sps": 720.0710211707521 + }, + { + "update": 2635, + "global_step": 10792960, + "num_episodes": 737, + "mean_reward": 2092.385828053951, + "mean_length": 34157.73, + "survival_pct": 0.34157730000000003, + "max_steps": 100000, + "loss": -0.11723538488149643, + "sps": 703.7882071317721 + }, + { + "update": 2640, + "global_step": 10813440, + "num_episodes": 737, + "mean_reward": 2092.385828053951, + "mean_length": 34157.73, + "survival_pct": 0.34157730000000003, + "max_steps": 100000, + "loss": -0.18088921904563904, + "sps": 692.1664885002592 + }, + { + "update": 2645, + "global_step": 10833920, + "num_episodes": 737, + "mean_reward": 2092.385828053951, + "mean_length": 34157.73, + "survival_pct": 0.34157730000000003, + "max_steps": 100000, + "loss": -0.17876286804676056, + "sps": 684.4397604362919 + }, + { + "update": 2650, + "global_step": 10854400, + "num_episodes": 738, + "mean_reward": 2173.3602819800376, + "mean_length": 35156.66, + "survival_pct": 0.35156660000000006, + "max_steps": 100000, + "loss": 0.5544903874397278, + "sps": 410.99817441737457 + }, + { + "update": 2655, + "global_step": 10874880, + "num_episodes": 741, + "mean_reward": 2107.5635332036018, + "mean_length": 34163.47, + "survival_pct": 0.3416347, + "max_steps": 100000, + "loss": 544.5499267578125, + "sps": 460.02549342528056 + }, + { + "update": 2660, + "global_step": 10895360, + "num_episodes": 741, + "mean_reward": 2107.5635332036018, + "mean_length": 34163.47, + "survival_pct": 0.3416347, + "max_steps": 100000, + "loss": 0.021736368536949158, + "sps": 548.2332167398129 + }, + { + "update": 2665, + "global_step": 10915840, + "num_episodes": 741, + "mean_reward": 2107.5635332036018, + "mean_length": 34163.47, + "survival_pct": 0.3416347, + "max_steps": 100000, + "loss": -0.050015464425086975, + "sps": 527.6499326243032 + }, + { + "update": 2670, + "global_step": 10936320, + "num_episodes": 741, + "mean_reward": 2107.5635332036018, + "mean_length": 34163.47, + "survival_pct": 0.3416347, + "max_steps": 100000, + "loss": -0.1121983677148819, + "sps": 510.22633473983234 + }, + { + "update": 2675, + "global_step": 10956800, + "num_episodes": 741, + "mean_reward": 2107.5635332036018, + "mean_length": 34163.47, + "survival_pct": 0.3416347, + "max_steps": 100000, + "loss": 0.46661272644996643, + "sps": 565.3571380995694 + }, + { + "update": 2680, + "global_step": 10977280, + "num_episodes": 741, + "mean_reward": 2107.5635332036018, + "mean_length": 34163.47, + "survival_pct": 0.3416347, + "max_steps": 100000, + "loss": 0.4648454785346985, + "sps": 572.308466885207 + }, + { + "update": 2685, + "global_step": 10997760, + "num_episodes": 741, + "mean_reward": 2107.5635332036018, + "mean_length": 34163.47, + "survival_pct": 0.3416347, + "max_steps": 100000, + "loss": 0.2607373595237732, + "sps": 570.2676571961572 + }, + { + "update": 2690, + "global_step": 11018240, + "num_episodes": 741, + "mean_reward": 2107.5635332036018, + "mean_length": 34163.47, + "survival_pct": 0.3416347, + "max_steps": 100000, + "loss": 0.125162735581398, + "sps": 566.7273297363138 + }, + { + "update": 2695, + "global_step": 11038720, + "num_episodes": 741, + "mean_reward": 2107.5635332036018, + "mean_length": 34163.47, + "survival_pct": 0.3416347, + "max_steps": 100000, + "loss": 0.059064000844955444, + "sps": 552.6096670437072 + }, + { + "update": 2700, + "global_step": 11059200, + "num_episodes": 741, + "mean_reward": 2107.5635332036018, + "mean_length": 34163.47, + "survival_pct": 0.3416347, + "max_steps": 100000, + "loss": -0.03592519462108612, + "sps": 605.1661001263743 + }, + { + "update": 2705, + "global_step": 11079680, + "num_episodes": 741, + "mean_reward": 2107.5635332036018, + "mean_length": 34163.47, + "survival_pct": 0.3416347, + "max_steps": 100000, + "loss": -0.05099225044250488, + "sps": 607.4455279859145 + }, + { + "update": 2710, + "global_step": 11100160, + "num_episodes": 742, + "mean_reward": 2117.9644485020635, + "mean_length": 35162.27, + "survival_pct": 0.35162269999999995, + "max_steps": 100000, + "loss": -0.046167902648448944, + "sps": 590.2299902607874 + }, + { + "update": 2715, + "global_step": 11120640, + "num_episodes": 746, + "mean_reward": 2117.223214428425, + "mean_length": 35106.36, + "survival_pct": 0.35106360000000003, + "max_steps": 100000, + "loss": 11.869409561157227, + "sps": 396.9261046886479 + }, + { + "update": 2720, + "global_step": 11141120, + "num_episodes": 749, + "mean_reward": 2004.7705671191216, + "mean_length": 36107.98, + "survival_pct": 0.3610798, + "max_steps": 100000, + "loss": 156.38876342773438, + "sps": 334.0244695201906 + }, + { + "update": 2725, + "global_step": 11161600, + "num_episodes": 749, + "mean_reward": 2004.7705671191216, + "mean_length": 36107.98, + "survival_pct": 0.3610798, + "max_steps": 100000, + "loss": 2.8302462100982666, + "sps": 414.56266910509237 + }, + { + "update": 2730, + "global_step": 11182080, + "num_episodes": 749, + "mean_reward": 2004.7705671191216, + "mean_length": 36107.98, + "survival_pct": 0.3610798, + "max_steps": 100000, + "loss": 2.787081003189087, + "sps": 444.05090888582356 + }, + { + "update": 2735, + "global_step": 11202560, + "num_episodes": 757, + "mean_reward": 1866.676198823452, + "mean_length": 35228.65, + "survival_pct": 0.3522865, + "max_steps": 100000, + "loss": 20.099119186401367, + "sps": 219.7197521516745 + }, + { + "update": 2740, + "global_step": 11223040, + "num_episodes": 757, + "mean_reward": 1866.676198823452, + "mean_length": 35228.65, + "survival_pct": 0.3522865, + "max_steps": 100000, + "loss": 24.235774993896484, + "sps": 364.1255592485544 + }, + { + "update": 2745, + "global_step": 11243520, + "num_episodes": 757, + "mean_reward": 1866.676198823452, + "mean_length": 35228.65, + "survival_pct": 0.3522865, + "max_steps": 100000, + "loss": 10.114006996154785, + "sps": 485.1483243292639 + }, + { + "update": 2750, + "global_step": 11264000, + "num_episodes": 757, + "mean_reward": 1866.676198823452, + "mean_length": 35228.65, + "survival_pct": 0.3522865, + "max_steps": 100000, + "loss": 17.78866958618164, + "sps": 492.4805186782399 } ] \ No newline at end of file