| ep=0001 [easy ] steps=030 reward= +17.290 evac=1 hp=100.0 s30=1.00 t=0s |
| ep=0002 [easy ] steps=200 reward= -9.295 evac=0 hp= 98.5 s30=0.50 t=1s |
| ep=0003 [easy ] steps=200 reward= -9.090 evac=0 hp=100.0 s30=0.33 t=2s |
| ep=0004 [easy ] steps=200 reward= -16.130 evac=0 hp= 62.0 s30=0.25 t=3s |
| ep=0005 [easy ] steps=200 reward= -12.080 evac=0 hp=100.0 s30=0.20 t=3s |
| ep=0006 [easy ] steps=200 reward= -12.320 evac=0 hp=100.0 s30=0.17 t=4s |
| ep=0007 [easy ] steps=200 reward= -14.560 evac=0 hp=100.0 s30=0.14 t=6s |
| ep=0008 [easy ] steps=200 reward= -9.890 evac=0 hp=100.0 s30=0.12 t=7s |
| >> PPO update pi_loss=-0.0065 v_loss=4.3169 entropy=2.0778 kl=0.0066 lr=2.96e-04 |
| ep=0009 [easy ] steps=141 reward= +10.370 evac=1 hp= 98.0 s30=0.22 t=8s |
| ep=0010 [easy ] steps=200 reward= -17.620 evac=0 hp=100.0 s30=0.20 t=9s |
| ep=0011 [easy ] steps=049 reward= +15.740 evac=1 hp=100.0 s30=0.27 t=9s |
| ep=0012 [easy ] steps=200 reward= -9.980 evac=0 hp=100.0 s30=0.25 t=10s |
| ep=0013 [easy ] steps=200 reward= -3.220 evac=0 hp=100.0 s30=0.23 t=11s |
| ep=0014 [easy ] steps=123 reward= +11.430 evac=1 hp=100.0 s30=0.29 t=12s |
| ep=0015 [easy ] steps=200 reward= -5.360 evac=0 hp=100.0 s30=0.27 t=13s |
| ep=0016 [easy ] steps=127 reward= +10.970 evac=1 hp=100.0 s30=0.31 t=13s |
| >> PPO update pi_loss=-0.0035 v_loss=6.9670 entropy=2.0211 kl=0.0010 lr=2.93e-04 |
| ep=0017 [easy ] steps=156 reward= +10.050 evac=1 hp= 98.0 s30=0.35 t=15s |
| ep=0018 [easy ] steps=088 reward= +14.830 evac=1 hp=100.0 s30=0.39 t=15s |
| ep=0019 [easy ] steps=200 reward= -17.770 evac=0 hp=100.0 s30=0.37 t=16s |
| ep=0020 [easy ] steps=176 reward= +9.470 evac=1 hp=100.0 s30=0.40 t=17s |
| ep=0021 [easy ] steps=200 reward= -6.850 evac=0 hp= 99.0 s30=0.38 t=18s |
| ep=0022 [easy ] steps=021 reward= +17.650 evac=1 hp=100.0 s30=0.41 t=18s |
| ep=0023 [easy ] steps=096 reward= +11.280 evac=1 hp=100.0 s30=0.43 t=18s |
| ep=0024 [easy ] steps=200 reward= -5.890 evac=0 hp= 95.0 s30=0.42 t=19s |
| >> PPO update pi_loss=-0.0139 v_loss=8.8112 entropy=2.1823 kl=0.0050 lr=2.89e-04 |
| ep=0025 [easy ] steps=139 reward= +11.360 evac=1 hp=100.0 s30=0.44 t=21s |
| ** EVAL [hard] reward=-10.124 success=0.00 |
| ep=0026 [easy ] steps=200 reward= -19.000 evac=0 hp=100.0 s30=0.42 t=23s |
| ep=0027 [easy ] steps=063 reward= +14.240 evac=1 hp=100.0 s30=0.44 t=23s |
| ep=0028 [easy ] steps=200 reward= -15.310 evac=0 hp=100.0 s30=0.43 t=24s |
| ep=0029 [easy ] steps=200 reward= -11.940 evac=0 hp=100.0 s30=0.41 t=25s |
| ep=0030 [easy ] steps=200 reward= -12.810 evac=0 hp=100.0 s30=0.40 t=26s |
| ep=0031 [easy ] steps=031 reward= +16.630 evac=1 hp=100.0 s30=0.40 t=26s |
| ep=0032 [easy ] steps=200 reward= -9.350 evac=0 hp=100.0 s30=0.40 t=27s |
| >> PPO update pi_loss=-0.0033 v_loss=6.9677 entropy=1.7971 kl=0.0012 lr=2.86e-04 |
| ep=0033 [easy ] steps=200 reward= -12.530 evac=0 hp=100.0 s30=0.40 t=29s |
| ep=0034 [easy ] steps=043 reward= +15.460 evac=1 hp=100.0 s30=0.43 t=29s |
| ep=0035 [easy ] steps=009 reward= +17.210 evac=1 hp=100.0 s30=0.47 t=29s |
| ep=0036 [easy ] steps=200 reward= -9.600 evac=0 hp=100.0 s30=0.47 t=30s |
| ep=0037 [easy ] steps=200 reward= -13.100 evac=0 hp=100.0 s30=0.47 t=31s |
| ep=0038 [easy ] steps=088 reward= +14.520 evac=1 hp=100.0 s30=0.50 t=31s |
| ep=0039 [easy ] steps=200 reward= -8.090 evac=0 hp=100.0 s30=0.47 t=32s |
| ep=0040 [easy ] steps=058 reward= +15.970 evac=1 hp=100.0 s30=0.50 t=32s |
| >> PPO update pi_loss=-0.0045 v_loss=10.7413 entropy=2.1999 kl=0.0039 lr=2.82e-04 |
| ep=0041 [easy ] steps=047 reward= +15.940 evac=1 hp=100.0 s30=0.50 t=33s |
| ep=0042 [easy ] steps=200 reward= -8.200 evac=0 hp=100.0 s30=0.50 t=34s |
| ep=0043 [easy ] steps=187 reward= +9.560 evac=1 hp=100.0 s30=0.53 t=35s |
| ep=0044 [easy ] steps=130 reward= +10.540 evac=1 hp=100.0 s30=0.53 t=36s |
| ep=0045 [easy ] steps=200 reward= -16.440 evac=0 hp=100.0 s30=0.53 t=36s |
| ep=0046 [easy ] steps=048 reward= +16.420 evac=1 hp=100.0 s30=0.53 t=37s |
| ep=0047 [easy ] steps=064 reward= +14.910 evac=1 hp=100.0 s30=0.53 t=37s |
| ep=0048 [easy ] steps=048 reward= -14.320 evac=0 hp= 0.0 s30=0.50 t=37s |
| >> PPO update pi_loss=-0.0083 v_loss=11.0590 entropy=2.0080 kl=0.0078 lr=2.78e-04 |
| ep=0049 [easy ] steps=140 reward= +11.540 evac=1 hp=100.0 s30=0.53 t=38s |
| ep=0050 [easy ] steps=013 reward= +19.200 evac=1 hp=100.0 s30=0.53 t=38s |
| ** EVAL [hard] reward=-11.184 success=0.00 |
| ep=0051 [easy ] steps=200 reward= -11.910 evac=0 hp=100.0 s30=0.53 t=41s |
| ep=0052 [easy ] steps=080 reward= +15.090 evac=1 hp=100.0 s30=0.53 t=41s |
| ep=0053 [easy ] steps=088 reward= +14.720 evac=1 hp=100.0 s30=0.53 t=42s |
| ep=0054 [easy ] steps=004 reward= +17.580 evac=1 hp=100.0 s30=0.57 t=42s |
| ep=0055 [easy ] steps=200 reward= -13.970 evac=0 hp=100.0 s30=0.53 t=43s |
| ep=0056 [easy ] steps=062 reward= +15.320 evac=1 hp=100.0 s30=0.57 t=43s |
| >> PPO update pi_loss=-0.0230 v_loss=13.0751 entropy=2.0431 kl=0.0081 lr=2.75e-04 |
| ep=0057 [easy ] steps=021 reward= +18.980 evac=1 hp=100.0 s30=0.57 t=44s |
| ep=0058 [easy ] steps=019 reward= +17.800 evac=1 hp=100.0 s30=0.60 t=44s |
| ep=0059 [easy ] steps=012 reward= +18.630 evac=1 hp=100.0 s30=0.63 t=44s |
| ep=0060 [easy ] steps=067 reward= +14.700 evac=1 hp=100.0 s30=0.67 t=44s |
| ep=0061 [easy ] steps=129 reward= +11.070 evac=1 hp=100.0 s30=0.67 t=45s |
| ep=0062 [easy ] steps=045 reward= +17.620 evac=1 hp=100.0 s30=0.70 t=45s |
| ep=0063 [easy ] steps=040 reward= +14.960 evac=1 hp=100.0 s30=0.73 t=45s |
| ep=0064 [easy ] steps=041 reward= +16.660 evac=1 hp=100.0 s30=0.73 t=45s |
| >> PPO update pi_loss=-0.0191 v_loss=44.0687 entropy=1.8650 kl=0.0033 lr=2.71e-04 |
| ep=0065 [easy ] steps=082 reward= +14.330 evac=1 hp=100.0 s30=0.73 t=46s |
| ep=0066 [easy ] steps=015 reward= +17.400 evac=1 hp=100.0 s30=0.77 t=46s |
| ep=0067 [easy ] steps=018 reward= +17.970 evac=1 hp=100.0 s30=0.80 t=46s |
| ep=0068 [easy ] steps=200 reward= -16.405 evac=0 hp= 9.5 s30=0.77 t=47s |
| ep=0069 [easy ] steps=005 reward= +17.930 evac=1 hp=100.0 s30=0.80 t=47s |
| ep=0070 [easy ] steps=044 reward= +16.420 evac=1 hp=100.0 s30=0.80 t=47s |
| ep=0071 [easy ] steps=200 reward= -12.220 evac=0 hp=100.0 s30=0.77 t=48s |
| ep=0072 [easy ] steps=151 reward= +2.060 evac=1 hp=100.0 s30=0.80 t=49s |
| >> PPO update pi_loss=-0.0054 v_loss=13.8242 entropy=2.0114 kl=0.0043 lr=2.68e-04 |
| ep=0073 [easy ] steps=005 reward= +17.990 evac=1 hp=100.0 s30=0.80 t=49s |
| ep=0074 [easy ] steps=200 reward= -12.780 evac=0 hp=100.0 s30=0.77 t=50s |
| ep=0075 [easy ] steps=034 reward= +16.900 evac=1 hp=100.0 s30=0.80 t=50s |
| ** EVAL [hard] reward=-11.468 success=0.00 |
| ep=0076 [easy ] steps=017 reward= +19.290 evac=1 hp=100.0 s30=0.80 t=51s |
| ep=0077 [easy ] steps=022 reward= +17.490 evac=1 hp=100.0 s30=0.80 t=51s |
| ep=0078 [easy ] steps=005 reward= +17.050 evac=1 hp=100.0 s30=0.83 t=51s |
| ep=0079 [easy ] steps=017 reward= +18.580 evac=1 hp=100.0 s30=0.83 t=51s |
| ep=0080 [easy ] steps=030 reward= +16.785 evac=1 hp= 99.0 s30=0.83 t=51s |
| >> PPO update pi_loss=-0.0150 v_loss=35.1268 entropy=2.0081 kl=0.0006 lr=2.64e-04 |
| [curriculum] Advanced to 'medium' (suc30=0.87) |
| ep=0081 [easy ] steps=176 reward= +8.340 evac=1 hp=100.0 s30=0.87 t=52s |
| ep=0082 [medium] steps=050 reward= -19.900 evac=0 hp= 0.0 s30=0.83 t=53s |
| ep=0083 [medium] steps=029 reward= -14.630 evac=0 hp= 0.0 s30=0.80 t=53s |
| ep=0084 [medium] steps=025 reward= +15.600 evac=1 hp=100.0 s30=0.80 t=53s |
| ep=0085 [medium] steps=080 reward= -24.320 evac=0 hp= 0.0 s30=0.80 t=53s |
| ep=0086 [medium] steps=043 reward= +8.883 evac=1 hp= 45.5 s30=0.80 t=53s |
| ep=0087 [medium] steps=040 reward= -15.070 evac=0 hp= 0.0 s30=0.77 t=54s |
| ep=0088 [medium] steps=014 reward= -15.090 evac=0 hp= 0.0 s30=0.73 t=54s |
| >> PPO update pi_loss=-0.0087 v_loss=30.8968 entropy=1.8425 kl=0.0018 lr=2.60e-04 |
| ep=0089 [medium] steps=014 reward= +16.320 evac=1 hp=100.0 s30=0.73 t=54s |
| ep=0090 [medium] steps=023 reward= +16.400 evac=1 hp=100.0 s30=0.73 t=54s |
| ep=0091 [medium] steps=026 reward= -14.010 evac=0 hp= 0.0 s30=0.70 t=54s |
| ep=0092 [medium] steps=017 reward= +15.510 evac=1 hp=100.0 s30=0.70 t=54s |
| ep=0093 [medium] steps=011 reward= +15.170 evac=1 hp=100.0 s30=0.70 t=54s |
| ep=0094 [medium] steps=036 reward= -20.530 evac=0 hp= 0.0 s30=0.67 t=54s |
| ep=0095 [medium] steps=150 reward= -14.250 evac=0 hp=100.0 s30=0.63 t=55s |
| ep=0096 [medium] steps=029 reward= -9.600 evac=0 hp= 0.0 s30=0.60 t=55s |
| >> PPO update pi_loss=-0.0083 v_loss=34.3949 entropy=1.8332 kl=0.0026 lr=2.57e-04 |
| ep=0097 [medium] steps=016 reward= +16.800 evac=1 hp=100.0 s30=0.60 t=56s |
| ep=0098 [medium] steps=150 reward= -13.210 evac=0 hp= 99.0 s30=0.60 t=56s |
| ep=0099 [medium] steps=005 reward= +15.000 evac=1 hp=100.0 s30=0.60 t=56s |
| ep=0100 [medium] steps=150 reward= -17.065 evac=0 hp= 1.5 s30=0.57 t=57s |
| ** EVAL [hard] reward=-9.827 success=0.00 |
| ep=0101 [medium] steps=019 reward= +12.548 evac=1 hp= 70.5 s30=0.60 t=59s |
| ep=0102 [medium] steps=056 reward= -19.370 evac=0 hp= 0.0 s30=0.57 t=59s |
| ep=0103 [medium] steps=059 reward= +13.980 evac=1 hp=100.0 s30=0.57 t=59s |
| ep=0104 [medium] steps=150 reward= -26.195 evac=0 hp= 50.5 s30=0.57 t=60s |
| >> PPO update pi_loss=-0.0004 v_loss=14.6058 entropy=1.3296 kl=0.0008 lr=2.53e-04 |
| ep=0105 [medium] steps=045 reward= -12.460 evac=0 hp= 0.0 s30=0.53 t=61s |
| ep=0106 [medium] steps=040 reward= +15.800 evac=1 hp=100.0 s30=0.53 t=61s |
| ep=0107 [medium] steps=150 reward= -25.180 evac=0 hp=100.0 s30=0.50 t=62s |
| ep=0108 [medium] steps=150 reward= -9.150 evac=0 hp= 97.0 s30=0.47 t=62s |
| ep=0109 [medium] steps=023 reward= +15.840 evac=1 hp=100.0 s30=0.47 t=62s |
| ep=0110 [medium] steps=130 reward= +2.095 evac=1 hp= 87.0 s30=0.47 t=63s |
| ep=0111 [medium] steps=150 reward= -12.520 evac=0 hp= 97.0 s30=0.43 t=64s |
| ep=0112 [medium] steps=058 reward= +13.820 evac=1 hp=100.0 s30=0.47 t=64s |
| >> PPO update pi_loss=-0.0084 v_loss=11.2875 entropy=1.7357 kl=0.0044 lr=2.50e-04 |
| ep=0113 [medium] steps=001 reward= +14.260 evac=1 hp=100.0 s30=0.50 t=64s |
| ep=0114 [medium] steps=012 reward= +16.180 evac=1 hp=100.0 s30=0.50 t=65s |
| ep=0115 [medium] steps=042 reward= +10.953 evac=1 hp= 71.5 s30=0.53 t=65s |
| ep=0116 [medium] steps=116 reward= +4.300 evac=1 hp=100.0 s30=0.53 t=65s |
| ep=0117 [medium] steps=150 reward= -12.700 evac=0 hp=100.0 s30=0.53 t=66s |
| ep=0118 [medium] steps=083 reward= +12.210 evac=1 hp=100.0 s30=0.57 t=66s |
| ep=0119 [medium] steps=022 reward= -15.580 evac=0 hp= 0.0 s30=0.53 t=66s |
| ep=0120 [medium] steps=023 reward= +15.445 evac=1 hp= 99.0 s30=0.53 t=66s |
| >> PPO update pi_loss=-0.0151 v_loss=18.8505 entropy=1.7544 kl=0.0059 lr=2.46e-04 |
| ep=0121 [medium] steps=005 reward= +15.020 evac=1 hp=100.0 s30=0.57 t=67s |
| ep=0122 [medium] steps=065 reward= +11.930 evac=1 hp=100.0 s30=0.57 t=67s |
| ep=0123 [medium] steps=010 reward= +11.925 evac=1 hp= 55.0 s30=0.57 t=67s |
| ep=0124 [medium] steps=044 reward= +15.040 evac=1 hp=100.0 s30=0.60 t=67s |
| ep=0125 [medium] steps=093 reward= -24.720 evac=0 hp= 0.0 s30=0.60 t=68s |
| ** EVAL [hard] reward=-7.792 success=0.20 |
| ep=0126 [medium] steps=010 reward= +14.650 evac=1 hp=100.0 s30=0.63 t=69s |
| ep=0127 [medium] steps=019 reward= -13.110 evac=0 hp= 0.0 s30=0.60 t=69s |
| ep=0128 [medium] steps=018 reward= +14.210 evac=1 hp=100.0 s30=0.63 t=69s |
| >> PPO update pi_loss=+0.0149 v_loss=31.2597 entropy=1.5042 kl=0.0039 lr=2.42e-04 |
| ep=0129 [medium] steps=150 reward= -18.700 evac=0 hp= 95.0 s30=0.60 t=70s |
| ep=0130 [medium] steps=039 reward= +14.670 evac=1 hp=100.0 s30=0.63 t=70s |
| ep=0131 [medium] steps=034 reward= -10.750 evac=0 hp= 0.0 s30=0.60 t=70s |
| ep=0132 [medium] steps=045 reward= -23.760 evac=0 hp= 0.0 s30=0.60 t=70s |
| ep=0133 [medium] steps=150 reward= -17.000 evac=0 hp=100.0 s30=0.57 t=71s |
| ep=0134 [medium] steps=029 reward= +15.530 evac=1 hp=100.0 s30=0.60 t=71s |
| ep=0135 [medium] steps=150 reward= -27.040 evac=0 hp= 96.0 s30=0.60 t=72s |
| ep=0136 [medium] steps=016 reward= +15.660 evac=1 hp=100.0 s30=0.60 t=72s |
| >> PPO update pi_loss=-0.0045 v_loss=18.2808 entropy=1.4920 kl=0.0046 lr=2.39e-04 |
| ep=0137 [medium] steps=016 reward= -14.740 evac=0 hp= 0.0 s30=0.60 t=72s |
| ep=0138 [medium] steps=012 reward= +16.740 evac=1 hp=100.0 s30=0.63 t=72s |
| ep=0139 [medium] steps=013 reward= +16.620 evac=1 hp=100.0 s30=0.63 t=73s |
| ep=0140 [medium] steps=008 reward= +14.720 evac=1 hp=100.0 s30=0.63 t=73s |
| ep=0141 [medium] steps=061 reward= -18.720 evac=0 hp= 0.0 s30=0.63 t=73s |
| ep=0142 [medium] steps=005 reward= +14.570 evac=1 hp=100.0 s30=0.63 t=73s |
| ep=0143 [medium] steps=027 reward= +12.642 evac=1 hp= 59.5 s30=0.63 t=73s |
| ep=0144 [medium] steps=060 reward= +13.820 evac=1 hp=100.0 s30=0.63 t=73s |
| >> PPO update pi_loss=-0.0005 v_loss=64.7183 entropy=1.4458 kl=0.0003 lr=2.35e-04 |
| ep=0145 [medium] steps=150 reward= -16.260 evac=0 hp=100.0 s30=0.60 t=74s |
| ep=0146 [medium] steps=029 reward= +12.297 evac=1 hp= 94.5 s30=0.60 t=74s |
| ep=0147 [medium] steps=055 reward= +11.020 evac=1 hp=100.0 s30=0.63 t=74s |
| ep=0148 [medium] steps=015 reward= +16.030 evac=1 hp=100.0 s30=0.63 t=75s |
| ep=0149 [medium] steps=029 reward= +15.030 evac=1 hp=100.0 s30=0.67 t=75s |
| ep=0150 [medium] steps=050 reward= +7.125 evac=1 hp= 47.0 s30=0.67 t=75s |
| ** EVAL [hard] reward=-4.237 success=0.40 |
| ep=0151 [medium] steps=048 reward= -0.080 evac=1 hp= 14.0 s30=0.67 t=76s |
| ep=0152 [medium] steps=022 reward= +14.553 evac=1 hp= 81.5 s30=0.67 t=76s |
| >> PPO update pi_loss=-0.0157 v_loss=36.3181 entropy=1.6305 kl=0.0048 lr=2.32e-04 |
| ep=0153 [medium] steps=022 reward= -7.700 evac=0 hp= 0.0 s30=0.63 t=77s |
| ep=0154 [medium] steps=150 reward= -29.100 evac=0 hp=100.0 s30=0.60 t=77s |
| ep=0155 [medium] steps=028 reward= -16.030 evac=0 hp= 0.0 s30=0.60 t=77s |
| ep=0156 [medium] steps=019 reward= +15.060 evac=1 hp=100.0 s30=0.60 t=77s |
| ep=0157 [medium] steps=004 reward= +15.550 evac=1 hp=100.0 s30=0.63 t=77s |
| ep=0158 [medium] steps=023 reward= +16.510 evac=1 hp=100.0 s30=0.63 t=78s |
| ep=0159 [medium] steps=020 reward= +16.350 evac=1 hp=100.0 s30=0.67 t=78s |
| ep=0160 [medium] steps=025 reward= -12.920 evac=0 hp= 0.0 s30=0.63 t=78s |
| >> PPO update pi_loss=-0.0265 v_loss=29.0037 entropy=1.5078 kl=0.0049 lr=2.28e-04 |
| ep=0161 [medium] steps=038 reward= -12.270 evac=0 hp= 0.0 s30=0.63 t=78s |
| ep=0162 [medium] steps=035 reward= -13.590 evac=0 hp= 0.0 s30=0.63 t=79s |
| ep=0163 [medium] steps=037 reward= -16.930 evac=0 hp= 0.0 s30=0.63 t=79s |
| ep=0164 [medium] steps=150 reward= -11.790 evac=0 hp=100.0 s30=0.60 t=79s |
| ep=0165 [medium] steps=030 reward= -15.790 evac=0 hp= 0.0 s30=0.60 t=80s |
| ep=0166 [medium] steps=150 reward= -9.340 evac=0 hp=100.0 s30=0.57 t=80s |
| ep=0167 [medium] steps=094 reward= +10.390 evac=1 hp=100.0 s30=0.60 t=81s |
| ep=0168 [medium] steps=021 reward= +15.260 evac=1 hp=100.0 s30=0.60 t=81s |
| >> PPO update pi_loss=+0.0017 v_loss=17.6311 entropy=1.7762 kl=0.0062 lr=2.24e-04 |
| ep=0169 [medium] steps=064 reward= -26.290 evac=0 hp= 0.0 s30=0.57 t=82s |
| ep=0170 [medium] steps=021 reward= -11.750 evac=0 hp= 0.0 s30=0.53 t=82s |
| ep=0171 [medium] steps=017 reward= +16.280 evac=1 hp=100.0 s30=0.57 t=82s |
| ep=0172 [medium] steps=028 reward= +15.830 evac=1 hp=100.0 s30=0.57 t=82s |
| ep=0173 [medium] steps=052 reward= +14.260 evac=1 hp=100.0 s30=0.57 t=82s |
| ep=0174 [medium] steps=018 reward= -12.960 evac=0 hp= 0.0 s30=0.53 t=82s |
| ep=0175 [medium] steps=040 reward= -19.070 evac=0 hp= 0.0 s30=0.53 t=82s |
| ** EVAL [hard] reward=-6.674 success=0.20 |
| ep=0176 [medium] steps=150 reward= -30.735 evac=0 hp= 36.5 s30=0.50 t=84s |
| >> PPO update pi_loss=-0.0060 v_loss=30.8760 entropy=1.4049 kl=0.0037 lr=2.21e-04 |
| ep=0177 [medium] steps=008 reward= +15.130 evac=1 hp=100.0 s30=0.50 t=84s |
| ep=0178 [medium] steps=031 reward= +16.050 evac=1 hp=100.0 s30=0.50 t=84s |
| ep=0179 [medium] steps=009 reward= +15.070 evac=1 hp=100.0 s30=0.50 t=85s |
| ep=0180 [medium] steps=150 reward= -15.990 evac=0 hp= 6.0 s30=0.47 t=85s |
| ep=0181 [medium] steps=039 reward= -14.280 evac=0 hp= 0.0 s30=0.43 t=85s |
| ep=0182 [medium] steps=013 reward= +17.160 evac=1 hp=100.0 s30=0.43 t=85s |
| ep=0183 [medium] steps=026 reward= +14.380 evac=1 hp=100.0 s30=0.47 t=86s |
| ep=0184 [medium] steps=150 reward= -8.320 evac=0 hp=100.0 s30=0.47 t=86s |
| >> PPO update pi_loss=-0.0047 v_loss=30.2118 entropy=1.9547 kl=0.0032 lr=2.17e-04 |
| ep=0185 [medium] steps=021 reward= +16.540 evac=1 hp=100.0 s30=0.50 t=87s |
| ep=0186 [medium] steps=081 reward= +11.440 evac=1 hp=100.0 s30=0.50 t=87s |
| ep=0187 [medium] steps=019 reward= +13.470 evac=1 hp= 84.0 s30=0.50 t=87s |
| ep=0188 [medium] steps=002 reward= +14.730 evac=1 hp=100.0 s30=0.50 t=87s |
| ep=0189 [medium] steps=150 reward= -12.590 evac=0 hp= 92.0 s30=0.47 t=88s |
| ep=0190 [medium] steps=028 reward= +16.133 evac=1 hp= 95.5 s30=0.50 t=88s |
| ep=0191 [medium] steps=038 reward= -20.930 evac=0 hp= 0.0 s30=0.50 t=89s |
| ep=0192 [medium] steps=034 reward= +14.270 evac=1 hp=100.0 s30=0.53 t=89s |
| >> PPO update pi_loss=-0.0183 v_loss=29.7914 entropy=1.7819 kl=0.0039 lr=2.14e-04 |
| ep=0193 [medium] steps=037 reward= +11.290 evac=1 hp= 34.0 s30=0.57 t=89s |
| ep=0194 [medium] steps=020 reward= -16.220 evac=0 hp= 0.0 s30=0.57 t=89s |
| ep=0195 [medium] steps=017 reward= +17.320 evac=1 hp=100.0 s30=0.60 t=89s |
| ep=0196 [medium] steps=027 reward= +10.703 evac=1 hp= 35.5 s30=0.63 t=89s |
| ep=0197 [medium] steps=150 reward= -32.225 evac=0 hp= 1.5 s30=0.60 t=90s |
| ep=0198 [medium] steps=050 reward= -15.130 evac=0 hp= 0.0 s30=0.57 t=90s |
| ep=0199 [medium] steps=017 reward= +16.950 evac=1 hp=100.0 s30=0.60 t=90s |
| ep=0200 [medium] steps=150 reward= -26.000 evac=0 hp= 17.0 s30=0.60 t=91s |
| >> PPO update pi_loss=-0.0057 v_loss=18.1479 entropy=1.1786 kl=0.0061 lr=2.10e-04 |
| ** EVAL [hard] reward=-12.304 success=0.00 |
| ep=0201 [medium] steps=150 reward= -16.065 evac=0 hp= 93.5 s30=0.57 t=94s |
| ep=0202 [medium] steps=021 reward= +16.650 evac=1 hp=100.0 s30=0.57 t=95s |
| ep=0203 [medium] steps=078 reward= -9.000 evac=0 hp= 0.0 s30=0.53 t=95s |
| ep=0204 [medium] steps=023 reward= -18.280 evac=0 hp= 0.0 s30=0.53 t=95s |
| ep=0205 [medium] steps=002 reward= +14.730 evac=1 hp=100.0 s30=0.57 t=95s |
| ep=0206 [medium] steps=044 reward= -9.060 evac=0 hp= 0.0 s30=0.57 t=95s |
| ep=0207 [medium] steps=033 reward= -17.560 evac=0 hp= 0.0 s30=0.53 t=95s |
| ep=0208 [medium] steps=018 reward= -16.030 evac=0 hp= 0.0 s30=0.50 t=96s |
| >> PPO update pi_loss=-0.0094 v_loss=20.9687 entropy=1.5537 kl=0.0047 lr=2.06e-04 |
| ep=0209 [medium] steps=029 reward= +11.915 evac=1 hp= 63.0 s30=0.50 t=96s |
| ep=0210 [medium] steps=150 reward= -19.650 evac=0 hp=100.0 s30=0.50 t=97s |
| ep=0211 [medium] steps=013 reward= +16.290 evac=1 hp=100.0 s30=0.53 t=97s |
| ep=0212 [medium] steps=150 reward= -11.440 evac=0 hp= 88.0 s30=0.50 t=97s |
| ep=0213 [medium] steps=150 reward= -18.155 evac=0 hp= 99.5 s30=0.47 t=98s |
| ep=0214 [medium] steps=002 reward= +14.730 evac=1 hp=100.0 s30=0.50 t=98s |
| ep=0215 [medium] steps=140 reward= +9.375 evac=1 hp= 97.0 s30=0.50 t=99s |
| ep=0216 [medium] steps=150 reward= -20.495 evac=0 hp= 67.5 s30=0.47 t=99s |
| >> PPO update pi_loss=-0.0179 v_loss=3.1620 entropy=1.5723 kl=0.0041 lr=2.03e-04 |
| ep=0217 [medium] steps=019 reward= +13.883 evac=1 hp= 89.5 s30=0.47 t=100s |
| ep=0218 [medium] steps=045 reward= -10.200 evac=0 hp= 0.0 s30=0.43 t=100s |
| ep=0219 [medium] steps=007 reward= +15.170 evac=1 hp=100.0 s30=0.47 t=100s |
| ep=0220 [medium] steps=019 reward= -11.850 evac=0 hp= 0.0 s30=0.43 t=100s |
| ep=0221 [medium] steps=010 reward= +15.740 evac=1 hp=100.0 s30=0.47 t=100s |
| ep=0222 [medium] steps=034 reward= +14.350 evac=1 hp=100.0 s30=0.47 t=100s |
| ep=0223 [medium] steps=150 reward= -10.765 evac=0 hp= 12.5 s30=0.43 t=101s |
| ep=0224 [medium] steps=150 reward= -8.865 evac=0 hp= 86.5 s30=0.43 t=102s |
| >> PPO update pi_loss=-0.0082 v_loss=28.3779 entropy=1.9019 kl=0.0052 lr=1.99e-04 |
| ep=0225 [medium] steps=005 reward= +15.900 evac=1 hp=100.0 s30=0.43 t=102s |
| ** EVAL [hard] reward=-11.080 success=0.00 |
| ep=0226 [medium] steps=150 reward= -11.360 evac=0 hp=100.0 s30=0.40 t=105s |
| ep=0227 [medium] steps=011 reward= +15.840 evac=1 hp=100.0 s30=0.43 t=105s |
| ep=0228 [medium] steps=025 reward= +15.300 evac=1 hp=100.0 s30=0.47 t=105s |
| ep=0229 [medium] steps=068 reward= -22.710 evac=0 hp= 0.0 s30=0.43 t=106s |
| ep=0230 [medium] steps=015 reward= -12.360 evac=0 hp= 0.0 s30=0.43 t=106s |
| ep=0231 [medium] steps=044 reward= -11.060 evac=0 hp= 0.0 s30=0.43 t=106s |
| ep=0232 [medium] steps=015 reward= +17.160 evac=1 hp=100.0 s30=0.43 t=106s |
| >> PPO update pi_loss=+0.0029 v_loss=33.0012 entropy=1.4043 kl=0.0086 lr=1.96e-04 |
| ep=0233 [medium] steps=013 reward= +16.830 evac=1 hp=100.0 s30=0.47 t=106s |
| ep=0234 [medium] steps=099 reward= +9.075 evac=1 hp= 59.0 s30=0.50 t=107s |
| ep=0235 [medium] steps=150 reward= -18.945 evac=0 hp= 66.5 s30=0.47 t=108s |
| ep=0236 [medium] steps=009 reward= +16.310 evac=1 hp=100.0 s30=0.50 t=108s |
| ep=0237 [medium] steps=023 reward= -14.880 evac=0 hp= 0.0 s30=0.50 t=108s |
| ep=0238 [medium] steps=150 reward= -19.820 evac=0 hp=100.0 s30=0.50 t=108s |
| ep=0239 [medium] steps=150 reward= -14.000 evac=0 hp=100.0 s30=0.47 t=109s |
| ep=0240 [medium] steps=053 reward= +10.170 evac=1 hp= 32.0 s30=0.50 t=109s |
| >> PPO update pi_loss=-0.0042 v_loss=13.0209 entropy=1.5150 kl=0.0011 lr=1.92e-04 |
| ep=0241 [medium] steps=150 reward= -19.830 evac=0 hp=100.0 s30=0.47 t=110s |
| ep=0242 [medium] steps=150 reward= -11.270 evac=0 hp= 74.0 s30=0.47 t=111s |
| ep=0243 [medium] steps=047 reward= -10.300 evac=0 hp= 0.0 s30=0.47 t=111s |
| ep=0244 [medium] steps=046 reward= -21.860 evac=0 hp= 0.0 s30=0.43 t=111s |
| ep=0245 [medium] steps=150 reward= -12.495 evac=0 hp= 93.5 s30=0.40 t=112s |
| ep=0246 [medium] steps=030 reward= -10.980 evac=0 hp= 0.0 s30=0.40 t=112s |
| ep=0247 [medium] steps=031 reward= +9.955 evac=1 hp= 65.0 s30=0.40 t=112s |
| ep=0248 [medium] steps=050 reward= +14.810 evac=1 hp=100.0 s30=0.43 t=112s |
| >> PPO update pi_loss=-0.0064 v_loss=11.4714 entropy=1.6570 kl=0.0017 lr=1.88e-04 |
| ep=0249 [medium] steps=033 reward= +15.620 evac=1 hp=100.0 s30=0.43 t=113s |
| ep=0250 [medium] steps=042 reward= -13.750 evac=0 hp= 0.0 s30=0.43 t=113s |
| ** EVAL [hard] reward=-5.648 success=0.20 |
| ep=0251 [medium] steps=150 reward= -21.140 evac=0 hp=100.0 s30=0.40 t=115s |
| ep=0252 [medium] steps=017 reward= +14.880 evac=1 hp=100.0 s30=0.40 t=115s |
| ep=0253 [medium] steps=025 reward= +16.060 evac=1 hp=100.0 s30=0.43 t=115s |
| ep=0254 [medium] steps=041 reward= +14.490 evac=1 hp=100.0 s30=0.47 t=115s |
| ep=0255 [medium] steps=150 reward= -10.270 evac=0 hp=100.0 s30=0.43 t=116s |
| ep=0256 [medium] steps=047 reward= +14.500 evac=1 hp=100.0 s30=0.47 t=116s |
| >> PPO update pi_loss=-0.0026 v_loss=19.7825 entropy=1.6007 kl=0.0029 lr=1.85e-04 |
| ep=0257 [medium] steps=085 reward= -22.280 evac=0 hp= 0.0 s30=0.43 t=117s |
| ep=0258 [medium] steps=014 reward= +15.860 evac=1 hp=100.0 s30=0.43 t=117s |
| ep=0259 [medium] steps=150 reward= -10.100 evac=0 hp=100.0 s30=0.43 t=118s |
| ep=0260 [medium] steps=099 reward= -20.870 evac=0 hp= 0.0 s30=0.43 t=118s |
| ep=0261 [medium] steps=010 reward= +16.800 evac=1 hp=100.0 s30=0.47 t=118s |
| ep=0262 [medium] steps=043 reward= +15.430 evac=1 hp=100.0 s30=0.47 t=118s |
| ep=0263 [medium] steps=042 reward= +15.385 evac=1 hp= 97.0 s30=0.47 t=118s |
| ep=0264 [medium] steps=058 reward= -15.040 evac=0 hp= 0.0 s30=0.43 t=119s |
| >> PPO update pi_loss=+0.0024 v_loss=18.7074 entropy=1.6059 kl=0.0008 lr=1.81e-04 |
| ep=0265 [medium] steps=028 reward= +15.670 evac=1 hp=100.0 s30=0.47 t=119s |
| ep=0266 [medium] steps=037 reward= -20.460 evac=0 hp= 0.0 s30=0.43 t=119s |
| ep=0267 [medium] steps=023 reward= +12.072 evac=1 hp= 39.5 s30=0.47 t=119s |
| ep=0268 [medium] steps=024 reward= +7.133 evac=1 hp= 21.5 s30=0.50 t=119s |
| ep=0269 [medium] steps=014 reward= +16.080 evac=1 hp=100.0 s30=0.53 t=119s |
| ep=0270 [medium] steps=013 reward= +10.907 evac=1 hp= 30.5 s30=0.53 t=120s |
| ep=0271 [medium] steps=068 reward= +13.167 evac=1 hp= 98.5 s30=0.57 t=120s |
| ep=0272 [medium] steps=026 reward= +9.660 evac=1 hp= 50.0 s30=0.60 t=120s |
| >> PPO update pi_loss=-0.0026 v_loss=51.5106 entropy=1.6209 kl=0.0003 lr=1.78e-04 |
| ep=0273 [medium] steps=150 reward= -15.250 evac=0 hp= 18.0 s30=0.60 t=121s |
| ep=0274 [medium] steps=046 reward= +15.253 evac=1 hp= 99.5 s30=0.63 t=121s |
| ep=0275 [medium] steps=050 reward= -20.940 evac=0 hp= 0.0 s30=0.63 t=121s |
| ** EVAL [hard] reward=-10.368 success=0.00 |
| ep=0276 [medium] steps=016 reward= +16.070 evac=1 hp=100.0 s30=0.67 t=123s |
| ep=0277 [medium] steps=005 reward= +15.490 evac=1 hp=100.0 s30=0.67 t=123s |
| ep=0278 [medium] steps=032 reward= -12.180 evac=0 hp= 0.0 s30=0.63 t=123s |
| ep=0279 [medium] steps=027 reward= +14.932 evac=1 hp= 99.5 s30=0.63 t=123s |
| ep=0280 [medium] steps=007 reward= +15.780 evac=1 hp=100.0 s30=0.67 t=123s |
| >> PPO update pi_loss=-0.0248 v_loss=32.8994 entropy=1.7511 kl=0.0017 lr=1.74e-04 |
| ep=0281 [medium] steps=008 reward= +14.720 evac=1 hp=100.0 s30=0.70 t=124s |
| ep=0282 [medium] steps=044 reward= +13.120 evac=1 hp= 98.0 s30=0.70 t=124s |
| ep=0283 [medium] steps=007 reward= +15.780 evac=1 hp=100.0 s30=0.70 t=124s |
| ep=0284 [medium] steps=025 reward= -13.390 evac=0 hp= 0.0 s30=0.67 t=124s |
| ep=0285 [medium] steps=059 reward= -18.330 evac=0 hp= 0.0 s30=0.67 t=124s |
| ep=0286 [medium] steps=014 reward= +13.222 evac=1 hp= 87.5 s30=0.67 t=124s |
| ep=0287 [medium] steps=033 reward= +14.592 evac=1 hp= 93.5 s30=0.70 t=125s |
| ep=0288 [medium] steps=003 reward= +15.160 evac=1 hp=100.0 s30=0.70 t=125s |
| >> PPO update pi_loss=-0.0022 v_loss=36.9067 entropy=1.3689 kl=0.0008 lr=1.70e-04 |
| ep=0289 [medium] steps=017 reward= +15.430 evac=1 hp=100.0 s30=0.73 t=125s |
| ep=0290 [medium] steps=036 reward= -8.750 evac=0 hp= 0.0 s30=0.73 t=125s |
| ep=0291 [medium] steps=021 reward= +14.890 evac=1 hp=100.0 s30=0.73 t=125s |
| ep=0292 [medium] steps=003 reward= +15.160 evac=1 hp=100.0 s30=0.73 t=125s |
| ep=0293 [medium] steps=048 reward= -10.520 evac=0 hp= 0.0 s30=0.70 t=125s |
| ep=0294 [medium] steps=027 reward= +16.330 evac=1 hp=100.0 s30=0.73 t=125s |
| ep=0295 [medium] steps=011 reward= +16.130 evac=1 hp=100.0 s30=0.73 t=125s |
| ep=0296 [medium] steps=042 reward= -12.320 evac=0 hp= 0.0 s30=0.73 t=126s |
| >> PPO update pi_loss=-0.0016 v_loss=49.3725 entropy=1.6299 kl=0.0004 lr=1.67e-04 |
| ep=0297 [medium] steps=150 reward= -30.925 evac=0 hp= 66.5 s30=0.70 t=126s |
| ep=0298 [medium] steps=008 reward= +15.670 evac=1 hp=100.0 s30=0.70 t=126s |
| ep=0299 [medium] steps=030 reward= +15.950 evac=1 hp=100.0 s30=0.70 t=127s |
| ep=0300 [medium] steps=019 reward= -19.910 evac=0 hp= 0.0 s30=0.67 t=127s |
| ** EVAL [hard] reward=-4.421 success=0.20 |
| ep=0301 [medium] steps=009 reward= +16.700 evac=1 hp=100.0 s30=0.67 t=129s |
| ep=0302 [medium] steps=014 reward= +15.740 evac=1 hp=100.0 s30=0.67 t=129s |
| ep=0303 [medium] steps=019 reward= +15.830 evac=1 hp=100.0 s30=0.70 t=129s |
| ep=0304 [medium] steps=008 reward= +16.290 evac=1 hp=100.0 s30=0.70 t=129s |
| >> PPO update pi_loss=-0.0023 v_loss=32.9150 entropy=1.0633 kl=0.0003 lr=1.63e-04 |
| ep=0305 [medium] steps=018 reward= +11.265 evac=1 hp= 57.0 s30=0.73 t=129s |
| ep=0306 [medium] steps=006 reward= +15.840 evac=1 hp=100.0 s30=0.73 t=129s |
| ep=0307 [medium] steps=004 reward= +15.080 evac=1 hp=100.0 s30=0.73 t=129s |
| ep=0308 [medium] steps=150 reward= -14.790 evac=0 hp=100.0 s30=0.73 t=130s |
| ep=0309 [medium] steps=027 reward= +15.510 evac=1 hp=100.0 s30=0.73 t=130s |
| ep=0310 [medium] steps=027 reward= +16.010 evac=1 hp=100.0 s30=0.73 t=130s |
| ep=0311 [medium] steps=150 reward= -15.270 evac=0 hp=100.0 s30=0.70 t=131s |
| ep=0312 [medium] steps=047 reward= +16.010 evac=1 hp=100.0 s30=0.70 t=131s |
| >> PPO update pi_loss=-0.0012 v_loss=20.0507 entropy=1.7914 kl=0.0008 lr=1.60e-04 |
| ep=0313 [medium] steps=030 reward= -11.460 evac=0 hp= 0.0 s30=0.67 t=131s |
| ep=0314 [medium] steps=010 reward= +16.820 evac=1 hp=100.0 s30=0.70 t=131s |
| ep=0315 [medium] steps=150 reward= -10.870 evac=0 hp=100.0 s30=0.70 t=132s |
| ep=0316 [medium] steps=150 reward= -24.845 evac=0 hp= 71.5 s30=0.67 t=133s |
| ep=0317 [medium] steps=005 reward= +14.940 evac=1 hp=100.0 s30=0.67 t=133s |
| ep=0318 [medium] steps=032 reward= -10.190 evac=0 hp= 0.0 s30=0.63 t=133s |
| ep=0319 [medium] steps=005 reward= +15.900 evac=1 hp=100.0 s30=0.63 t=133s |
| ep=0320 [medium] steps=002 reward= +14.730 evac=1 hp=100.0 s30=0.67 t=133s |
| >> PPO update pi_loss=+0.0007 v_loss=20.7397 entropy=1.4529 kl=0.0018 lr=1.56e-04 |
| ep=0321 [medium] steps=004 reward= +15.550 evac=1 hp=100.0 s30=0.67 t=133s |
| ep=0322 [medium] steps=007 reward= +15.360 evac=1 hp=100.0 s30=0.67 t=133s |
| ep=0323 [medium] steps=023 reward= -8.390 evac=0 hp= 0.0 s30=0.67 t=133s |
| ep=0324 [medium] steps=016 reward= +16.550 evac=1 hp=100.0 s30=0.67 t=133s |
| ep=0325 [medium] steps=028 reward= +15.430 evac=1 hp=100.0 s30=0.67 t=134s |
| ** EVAL [hard] reward=-11.180 success=0.00 |
| ep=0326 [medium] steps=017 reward= +11.340 evac=1 hp= 72.0 s30=0.70 t=135s |
| ep=0327 [medium] steps=150 reward= -14.040 evac=0 hp=100.0 s30=0.70 t=136s |
| ep=0328 [medium] steps=020 reward= +16.560 evac=1 hp=100.0 s30=0.70 t=136s |
| >> PPO update pi_loss=-0.1127 v_loss=34.7078 entropy=1.6140 kl=0.0021 lr=1.52e-04 |
| ep=0329 [medium] steps=001 reward= +14.260 evac=1 hp=100.0 s30=0.70 t=136s |
| ep=0330 [medium] steps=015 reward= +16.492 evac=1 hp= 99.5 s30=0.73 t=136s |
| ep=0331 [medium] steps=026 reward= -10.810 evac=0 hp= 0.0 s30=0.70 t=136s |
| ep=0332 [medium] steps=035 reward= -16.300 evac=0 hp= 0.0 s30=0.67 t=136s |
| ep=0333 [medium] steps=024 reward= +12.070 evac=1 hp= 66.0 s30=0.67 t=136s |
| ep=0334 [medium] steps=021 reward= +15.380 evac=1 hp=100.0 s30=0.67 t=136s |
| ep=0335 [medium] steps=025 reward= +13.668 evac=1 hp= 96.5 s30=0.67 t=137s |
| ep=0336 [medium] steps=001 reward= +14.260 evac=1 hp=100.0 s30=0.67 t=137s |
| >> PPO update pi_loss=-0.0010 v_loss=51.7629 entropy=1.4248 kl=0.0002 lr=1.49e-04 |
| ep=0337 [medium] steps=150 reward= -8.340 evac=0 hp=100.0 s30=0.63 t=137s |
| ep=0338 [medium] steps=045 reward= -22.490 evac=0 hp= 0.0 s30=0.63 t=138s |
| ep=0339 [medium] steps=011 reward= +15.610 evac=1 hp=100.0 s30=0.63 t=138s |
| ep=0340 [medium] steps=020 reward= +15.480 evac=1 hp= 98.0 s30=0.63 t=138s |
| ep=0341 [medium] steps=033 reward= -13.980 evac=0 hp= 0.0 s30=0.63 t=138s |
| ep=0342 [medium] steps=026 reward= +16.010 evac=1 hp=100.0 s30=0.63 t=138s |
| ep=0343 [medium] steps=010 reward= +16.640 evac=1 hp=100.0 s30=0.67 t=138s |
| ep=0344 [medium] steps=017 reward= +9.383 evac=1 hp= 37.5 s30=0.67 t=138s |
| >> PPO update pi_loss=-0.0115 v_loss=29.0228 entropy=1.6334 kl=0.0006 lr=1.45e-04 |
| ep=0345 [medium] steps=037 reward= +15.953 evac=1 hp= 97.5 s30=0.70 t=139s |
| ep=0346 [medium] steps=018 reward= +16.840 evac=1 hp=100.0 s30=0.73 t=139s |
| ep=0347 [medium] steps=014 reward= -14.940 evac=0 hp= 0.0 s30=0.70 t=139s |
| ep=0348 [medium] steps=009 reward= +16.250 evac=1 hp=100.0 s30=0.73 t=139s |
| ep=0349 [medium] steps=042 reward= +15.600 evac=1 hp=100.0 s30=0.73 t=139s |
| ep=0350 [medium] steps=015 reward= +16.420 evac=1 hp=100.0 s30=0.73 t=139s |
| ** EVAL [hard] reward=-9.845 success=0.00 |
| ep=0351 [medium] steps=009 reward= +15.030 evac=1 hp=100.0 s30=0.73 t=141s |
| ep=0352 [medium] steps=015 reward= +16.290 evac=1 hp=100.0 s30=0.73 t=141s |
| >> PPO update pi_loss=-0.0015 v_loss=44.9861 entropy=1.4760 kl=0.0002 lr=1.42e-04 |
| ep=0353 [medium] steps=011 reward= +16.240 evac=1 hp=100.0 s30=0.77 t=141s |
| ep=0354 [medium] steps=150 reward= -25.450 evac=0 hp= 56.0 s30=0.73 t=142s |
| ep=0355 [medium] steps=150 reward= -12.510 evac=0 hp=100.0 s30=0.70 t=142s |
| ep=0356 [medium] steps=016 reward= +15.078 evac=1 hp= 98.5 s30=0.70 t=142s |
| ep=0357 [medium] steps=016 reward= +16.920 evac=1 hp=100.0 s30=0.73 t=143s |
| ep=0358 [medium] steps=013 reward= +16.350 evac=1 hp=100.0 s30=0.73 t=143s |
| ep=0359 [medium] steps=015 reward= +15.070 evac=1 hp=100.0 s30=0.73 t=143s |
| ep=0360 [medium] steps=016 reward= +17.010 evac=1 hp=100.0 s30=0.73 t=143s |
| >> PPO update pi_loss=+0.0065 v_loss=18.1264 entropy=1.2002 kl=0.0020 lr=1.38e-04 |
| ep=0361 [medium] steps=014 reward= +16.450 evac=1 hp=100.0 s30=0.77 t=143s |
| ep=0362 [medium] steps=054 reward= -6.740 evac=0 hp= 0.0 s30=0.77 t=143s |
| ep=0363 [medium] steps=060 reward= -19.560 evac=0 hp= 0.0 s30=0.73 t=144s |
| [curriculum] Advanced to 'hard' (suc30=0.73) |
| ep=0364 [medium] steps=017 reward= +15.980 evac=1 hp=100.0 s30=0.73 t=144s |
| ep=0365 [hard ] steps=031 reward= -11.430 evac=0 hp= 0.0 s30=0.70 t=144s |
| ep=0366 [easy ] steps=200 reward= -17.860 evac=0 hp=100.0 s30=0.67 t=145s |
| ep=0367 [medium] steps=016 reward= +12.742 evac=1 hp= 91.5 s30=0.70 t=145s |
| ep=0368 [medium] steps=038 reward= -7.300 evac=0 hp= 0.0 s30=0.70 t=145s |
| >> PPO update pi_loss=-0.0051 v_loss=15.8902 entropy=1.2523 kl=0.0026 lr=1.34e-04 |
| ep=0369 [hard ] steps=038 reward= -13.110 evac=0 hp= 0.0 s30=0.67 t=145s |
| ep=0370 [hard ] steps=018 reward= -12.610 evac=0 hp= 0.0 s30=0.63 t=146s |
| ep=0371 [hard ] steps=100 reward= -8.120 evac=0 hp=100.0 s30=0.63 t=146s |
| ep=0372 [easy ] steps=030 reward= +18.690 evac=1 hp=100.0 s30=0.63 t=146s |
| ep=0373 [medium] steps=010 reward= +16.470 evac=1 hp=100.0 s30=0.63 t=146s |
| ep=0374 [medium] steps=037 reward= +14.580 evac=1 hp=100.0 s30=0.63 t=146s |
| ep=0375 [hard ] steps=031 reward= -13.080 evac=0 hp= 0.0 s30=0.60 t=147s |
| ** EVAL [hard] reward=-11.320 success=0.00 |
| ep=0376 [easy ] steps=008 reward= +18.940 evac=1 hp=100.0 s30=0.60 t=147s |
| >> PPO update pi_loss=+0.0186 v_loss=42.2980 entropy=1.9139 kl=0.0080 lr=1.31e-04 |
| ep=0377 [easy ] steps=022 reward= +17.990 evac=1 hp=100.0 s30=0.63 t=148s |
| ep=0378 [hard ] steps=042 reward= -11.380 evac=0 hp= 0.0 s30=0.60 t=148s |
| ep=0379 [hard ] steps=045 reward= -12.320 evac=0 hp= 0.0 s30=0.57 t=148s |
| ep=0380 [hard ] steps=024 reward= -10.440 evac=0 hp= 0.0 s30=0.53 t=148s |
| ep=0381 [hard ] steps=032 reward= -9.660 evac=0 hp= 0.0 s30=0.50 t=148s |
| ep=0382 [medium] steps=007 reward= +16.480 evac=1 hp=100.0 s30=0.50 t=148s |
| ep=0383 [medium] steps=150 reward= -13.880 evac=0 hp=100.0 s30=0.47 t=149s |
| ep=0384 [hard ] steps=032 reward= -10.670 evac=0 hp= 0.0 s30=0.47 t=149s |
| >> PPO update pi_loss=-0.0179 v_loss=15.9173 entropy=2.0312 kl=0.0022 lr=1.27e-04 |
| ep=0385 [medium] steps=013 reward= +15.470 evac=1 hp=100.0 s30=0.50 t=150s |
| ep=0386 [hard ] steps=040 reward= -12.560 evac=0 hp= 0.0 s30=0.47 t=150s |
| ep=0387 [hard ] steps=015 reward= -11.420 evac=0 hp= 0.0 s30=0.43 t=150s |
| ep=0388 [hard ] steps=100 reward= -7.770 evac=0 hp=100.0 s30=0.40 t=150s |
| ep=0389 [medium] steps=150 reward= -15.300 evac=0 hp= 93.0 s30=0.37 t=151s |
| ep=0390 [medium] steps=150 reward= -10.360 evac=0 hp=100.0 s30=0.33 t=152s |
| ep=0391 [hard ] steps=100 reward= -9.240 evac=0 hp=100.0 s30=0.30 t=152s |
| ep=0392 [medium] steps=010 reward= +16.320 evac=1 hp=100.0 s30=0.33 t=152s |
| >> PPO update pi_loss=-0.0158 v_loss=8.8704 entropy=1.8786 kl=0.0033 lr=1.24e-04 |
| ep=0393 [medium] steps=063 reward= -10.280 evac=0 hp= 0.0 s30=0.33 t=153s |
| ep=0394 [hard ] steps=100 reward= -10.105 evac=0 hp= 72.5 s30=0.30 t=154s |
| ep=0395 [medium] steps=002 reward= +14.730 evac=1 hp=100.0 s30=0.33 t=154s |
| ep=0396 [hard ] steps=019 reward= -12.630 evac=0 hp= 0.0 s30=0.33 t=154s |
| ep=0397 [hard ] steps=027 reward= -15.040 evac=0 hp= 0.0 s30=0.30 t=154s |
| ep=0398 [easy ] steps=011 reward= +18.760 evac=1 hp=100.0 s30=0.33 t=154s |
| ep=0399 [easy ] steps=163 reward= +10.190 evac=1 hp=100.0 s30=0.37 t=154s |
| ep=0400 [easy ] steps=014 reward= +18.110 evac=1 hp=100.0 s30=0.40 t=155s |
| >> PPO update pi_loss=+0.0007 v_loss=10.2287 entropy=1.9814 kl=0.0007 lr=1.20e-04 |
| ** EVAL [hard] reward=-12.256 success=0.00 |
| ep=0401 [hard ] steps=030 reward= -11.450 evac=0 hp= 0.0 s30=0.40 t=156s |
| ep=0402 [hard ] steps=100 reward= -8.010 evac=0 hp=100.0 s30=0.37 t=157s |
| ep=0403 [medium] steps=027 reward= -10.860 evac=0 hp= 0.0 s30=0.33 t=157s |
| ep=0404 [medium] steps=004 reward= +15.550 evac=1 hp=100.0 s30=0.33 t=157s |
| ep=0405 [hard ] steps=100 reward= -8.615 evac=0 hp= 93.5 s30=0.33 t=157s |
| ep=0406 [medium] steps=018 reward= -15.140 evac=0 hp= 0.0 s30=0.30 t=157s |
| ep=0407 [hard ] steps=038 reward= +13.605 evac=1 hp= 97.0 s30=0.30 t=157s |
| ep=0408 [easy ] steps=026 reward= +17.300 evac=1 hp=100.0 s30=0.33 t=158s |
| >> PPO update pi_loss=-0.0036 v_loss=28.6206 entropy=1.8868 kl=0.0006 lr=1.16e-04 |
| ep=0409 [hard ] steps=100 reward= -9.880 evac=0 hp=100.0 s30=0.33 t=158s |
| ep=0410 [medium] steps=049 reward= +13.980 evac=1 hp=100.0 s30=0.37 t=159s |
| ep=0411 [hard ] steps=054 reward= -11.150 evac=0 hp= 0.0 s30=0.37 t=159s |
| ep=0412 [medium] steps=150 reward= -10.620 evac=0 hp=100.0 s30=0.33 t=160s |
| ep=0413 [medium] steps=010 reward= +16.230 evac=1 hp=100.0 s30=0.37 t=160s |
| ep=0414 [hard ] steps=029 reward= +13.570 evac=1 hp=100.0 s30=0.40 t=160s |
| ep=0415 [easy ] steps=026 reward= +15.887 evac=1 hp= 84.5 s30=0.40 t=160s |
| ep=0416 [medium] steps=072 reward= -20.080 evac=0 hp= 0.0 s30=0.40 t=160s |
| >> PPO update pi_loss=-0.0059 v_loss=18.3843 entropy=1.7402 kl=0.0007 lr=1.13e-04 |
| ep=0417 [easy ] steps=200 reward= -15.510 evac=0 hp=100.0 s30=0.40 t=161s |
| ep=0418 [easy ] steps=200 reward= -10.150 evac=0 hp=100.0 s30=0.40 t=162s |
| ep=0419 [medium] steps=019 reward= -20.390 evac=0 hp= 0.0 s30=0.40 t=162s |
| ep=0420 [easy ] steps=012 reward= +17.270 evac=1 hp=100.0 s30=0.43 t=162s |
| ep=0421 [hard ] steps=029 reward= -12.360 evac=0 hp= 0.0 s30=0.43 t=163s |
| ep=0422 [hard ] steps=056 reward= -17.780 evac=0 hp= 0.0 s30=0.40 t=163s |
| ep=0423 [hard ] steps=020 reward= -13.260 evac=0 hp= 0.0 s30=0.40 t=163s |
| ep=0424 [hard ] steps=041 reward= -12.590 evac=0 hp= 0.0 s30=0.40 t=163s |
| >> PPO update pi_loss=-0.0197 v_loss=16.7425 entropy=1.9853 kl=0.0074 lr=1.09e-04 |
| ep=0425 [hard ] steps=038 reward= +13.130 evac=1 hp=100.0 s30=0.40 t=164s |
| ** EVAL [hard] reward=-7.024 success=0.20 |
| ep=0426 [hard ] steps=027 reward= -11.930 evac=0 hp= 0.0 s30=0.40 t=165s |
| ep=0427 [easy ] steps=001 reward= +16.760 evac=1 hp=100.0 s30=0.43 t=165s |
| ep=0428 [hard ] steps=100 reward= -9.300 evac=0 hp=100.0 s30=0.40 t=165s |
| ep=0429 [hard ] steps=016 reward= -11.390 evac=0 hp= 0.0 s30=0.37 t=165s |
| ep=0430 [medium] steps=019 reward= -13.260 evac=0 hp= 0.0 s30=0.33 t=166s |
| ep=0431 [hard ] steps=100 reward= -7.910 evac=0 hp=100.0 s30=0.33 t=166s |
| ep=0432 [easy ] steps=010 reward= +18.790 evac=1 hp=100.0 s30=0.37 t=166s |
| >> PPO update pi_loss=+0.0241 v_loss=23.2591 entropy=2.0669 kl=0.0023 lr=1.06e-04 |
| ep=0433 [hard ] steps=020 reward= -13.380 evac=0 hp= 0.0 s30=0.37 t=167s |
| ep=0434 [easy ] steps=011 reward= +18.740 evac=1 hp=100.0 s30=0.37 t=167s |
| ep=0435 [medium] steps=023 reward= +13.832 evac=1 hp= 99.5 s30=0.40 t=167s |
| ep=0436 [hard ] steps=013 reward= +13.010 evac=1 hp=100.0 s30=0.43 t=167s |
| ep=0437 [hard ] steps=037 reward= -17.250 evac=0 hp= 0.0 s30=0.40 t=167s |
| ep=0438 [easy ] steps=014 reward= +18.400 evac=1 hp=100.0 s30=0.40 t=167s |
| ep=0439 [medium] steps=003 reward= +14.670 evac=1 hp=100.0 s30=0.43 t=167s |
| ep=0440 [medium] steps=034 reward= +14.730 evac=1 hp= 94.0 s30=0.43 t=167s |
| >> PPO update pi_loss=+0.0003 v_loss=67.2617 entropy=1.6175 kl=0.0005 lr=1.02e-04 |
| ep=0441 [medium] steps=015 reward= +16.330 evac=1 hp=100.0 s30=0.47 t=167s |
| ep=0442 [hard ] steps=021 reward= -9.930 evac=0 hp= 0.0 s30=0.47 t=168s |
| ep=0443 [hard ] steps=100 reward= -7.920 evac=0 hp=100.0 s30=0.43 t=168s |
| ep=0444 [hard ] steps=027 reward= -10.220 evac=0 hp= 0.0 s30=0.40 t=168s |
| ep=0445 [easy ] steps=200 reward= -13.360 evac=0 hp=100.0 s30=0.37 t=169s |
| ep=0446 [medium] steps=026 reward= +14.897 evac=1 hp= 96.5 s30=0.40 t=169s |
| ep=0447 [hard ] steps=100 reward= -8.950 evac=0 hp=100.0 s30=0.40 t=170s |
| ep=0448 [hard ] steps=028 reward= -11.460 evac=0 hp= 0.0 s30=0.40 t=170s |
| >> PPO update pi_loss=-0.0618 v_loss=6.9971 entropy=1.9275 kl=0.0016 lr=9.84e-05 |
| ep=0449 [hard ] steps=100 reward= -7.500 evac=0 hp=100.0 s30=0.40 t=171s |
| ep=0450 [hard ] steps=057 reward= +13.620 evac=1 hp=100.0 s30=0.40 t=171s |
| ** EVAL [hard] reward=-10.726 success=0.00 |
| ep=0451 [medium] steps=007 reward= +16.480 evac=1 hp=100.0 s30=0.43 t=173s |
| ep=0452 [medium] steps=014 reward= +15.950 evac=1 hp=100.0 s30=0.47 t=173s |
| ep=0453 [easy ] steps=016 reward= +18.320 evac=1 hp=100.0 s30=0.50 t=173s |
| ep=0454 [medium] steps=084 reward= -19.540 evac=0 hp= 0.0 s30=0.50 t=173s |
| ep=0455 [hard ] steps=100 reward= -8.100 evac=0 hp=100.0 s30=0.47 t=173s |
| ep=0456 [medium] steps=017 reward= +14.253 evac=1 hp= 99.5 s30=0.50 t=174s |
| >> PPO update pi_loss=+0.0019 v_loss=18.8275 entropy=1.9587 kl=0.0010 lr=9.48e-05 |
| ep=0457 [medium] steps=012 reward= +15.530 evac=1 hp=100.0 s30=0.50 t=174s |
| ep=0458 [medium] steps=017 reward= -13.910 evac=0 hp= 0.0 s30=0.50 t=174s |
| ep=0459 [medium] steps=009 reward= +16.680 evac=1 hp=100.0 s30=0.53 t=174s |
| ep=0460 [medium] steps=150 reward= -18.775 evac=0 hp= 43.5 s30=0.53 t=175s |
| ep=0461 [medium] steps=150 reward= -14.020 evac=0 hp= 97.0 s30=0.53 t=175s |
| ep=0462 [medium] steps=150 reward= -11.730 evac=0 hp=100.0 s30=0.50 t=176s |
| ep=0463 [hard ] steps=024 reward= -10.500 evac=0 hp= 0.0 s30=0.50 t=176s |
| ep=0464 [hard ] steps=032 reward= -11.090 evac=0 hp= 0.0 s30=0.47 t=176s |
| >> PPO update pi_loss=-0.0086 v_loss=13.1403 entropy=1.5553 kl=0.0028 lr=9.12e-05 |
| ep=0465 [hard ] steps=024 reward= -11.580 evac=0 hp= 0.0 s30=0.43 t=177s |
| ep=0466 [medium] steps=018 reward= +16.110 evac=1 hp=100.0 s30=0.43 t=177s |
| ep=0467 [hard ] steps=100 reward= -9.110 evac=0 hp=100.0 s30=0.43 t=177s |
| ep=0468 [medium] steps=019 reward= +15.550 evac=1 hp=100.0 s30=0.43 t=178s |
| ep=0469 [easy ] steps=200 reward= -23.130 evac=0 hp=100.0 s30=0.40 t=178s |
| ep=0470 [hard ] steps=025 reward= -14.400 evac=0 hp= 0.0 s30=0.37 t=179s |
| ep=0471 [medium] steps=062 reward= -13.210 evac=0 hp= 0.0 s30=0.33 t=179s |
| ep=0472 [medium] steps=150 reward= -23.735 evac=0 hp= 96.5 s30=0.33 t=179s |
| >> PPO update pi_loss=-0.0109 v_loss=10.0870 entropy=1.3562 kl=0.0016 lr=8.76e-05 |
| ep=0473 [hard ] steps=023 reward= -13.950 evac=0 hp= 0.0 s30=0.33 t=180s |
| ep=0474 [hard ] steps=100 reward= -10.855 evac=0 hp= 98.5 s30=0.33 t=181s |
| ep=0475 [hard ] steps=100 reward= -9.080 evac=0 hp=100.0 s30=0.33 t=181s |
| ** EVAL [hard] reward=-9.072 success=0.00 |
| ep=0476 [hard ] steps=100 reward= -9.125 evac=0 hp= 74.5 s30=0.30 t=184s |
| ep=0477 [easy ] steps=011 reward= +18.740 evac=1 hp=100.0 s30=0.33 t=184s |
| ep=0478 [easy ] steps=016 reward= +17.720 evac=1 hp=100.0 s30=0.37 t=184s |
| ep=0479 [medium] steps=150 reward= -17.390 evac=0 hp=100.0 s30=0.37 t=185s |
| ep=0480 [easy ] steps=004 reward= +18.050 evac=1 hp=100.0 s30=0.37 t=185s |
| >> PPO update pi_loss=-0.0001 v_loss=7.6346 entropy=1.6924 kl=0.0008 lr=8.40e-05 |
| ep=0481 [easy ] steps=123 reward= +12.600 evac=1 hp=100.0 s30=0.37 t=186s |
| ep=0482 [hard ] steps=100 reward= -10.260 evac=0 hp=100.0 s30=0.33 t=186s |
| ep=0483 [easy ] steps=007 reward= +18.650 evac=1 hp=100.0 s30=0.33 t=186s |
| ep=0484 [medium] steps=016 reward= +16.180 evac=1 hp=100.0 s30=0.37 t=186s |
| ep=0485 [easy ] steps=200 reward= -21.930 evac=0 hp=100.0 s30=0.37 t=187s |
| ep=0486 [easy ] steps=017 reward= +17.980 evac=1 hp=100.0 s30=0.37 t=187s |
| ep=0487 [hard ] steps=100 reward= -7.490 evac=0 hp=100.0 s30=0.33 t=188s |
| ep=0488 [hard ] steps=043 reward= -12.790 evac=0 hp= 0.0 s30=0.33 t=188s |
| >> PPO update pi_loss=-0.0032 v_loss=7.0721 entropy=1.6047 kl=0.0014 lr=8.04e-05 |
| ep=0489 [hard ] steps=100 reward= -8.410 evac=0 hp=100.0 s30=0.30 t=189s |
| ep=0490 [medium] steps=015 reward= +16.340 evac=1 hp=100.0 s30=0.33 t=189s |
| ep=0491 [easy ] steps=029 reward= +18.520 evac=1 hp=100.0 s30=0.37 t=189s |
| ep=0492 [easy ] steps=012 reward= +18.370 evac=1 hp=100.0 s30=0.40 t=189s |
| ep=0493 [hard ] steps=036 reward= -13.860 evac=0 hp= 0.0 s30=0.40 t=190s |
| ep=0494 [medium] steps=150 reward= -17.640 evac=0 hp= 78.0 s30=0.40 t=190s |
| ep=0495 [medium] steps=003 reward= +15.160 evac=1 hp=100.0 s30=0.43 t=190s |
| ep=0496 [hard ] steps=025 reward= -9.910 evac=0 hp= 0.0 s30=0.40 t=190s |
| >> PPO update pi_loss=+0.0086 v_loss=13.3058 entropy=1.0254 kl=0.0005 lr=7.68e-05 |
| ep=0497 [hard ] steps=032 reward= -12.570 evac=0 hp= 0.0 s30=0.40 t=191s |
| ep=0498 [hard ] steps=100 reward= -9.605 evac=0 hp= 56.5 s30=0.37 t=191s |
| ep=0499 [easy ] steps=009 reward= +18.100 evac=1 hp=100.0 s30=0.40 t=191s |
| ep=0500 [hard ] steps=078 reward= +11.160 evac=1 hp=100.0 s30=0.43 t=192s |
| ** EVAL [hard] reward=-12.050 success=0.00 |
| ep=0501 [medium] steps=026 reward= +6.730 evac=1 hp= 14.0 s30=0.47 t=194s |
| ep=0502 [medium] steps=017 reward= +15.100 evac=1 hp=100.0 s30=0.50 t=194s |
| ep=0503 [hard ] steps=037 reward= -12.610 evac=0 hp= 0.0 s30=0.50 t=194s |
| ep=0504 [easy ] steps=200 reward= -17.930 evac=0 hp=100.0 s30=0.50 t=195s |
| >> PPO update pi_loss=+0.0009 v_loss=15.9530 entropy=1.5948 kl=0.0006 lr=7.32e-05 |
| ep=0505 [easy ] steps=200 reward= -15.700 evac=0 hp=100.0 s30=0.50 t=196s |
| ep=0506 [hard ] steps=100 reward= -10.160 evac=0 hp=100.0 s30=0.50 t=197s |
| ep=0507 [medium] steps=008 reward= +16.740 evac=1 hp=100.0 s30=0.50 t=197s |
| ep=0508 [hard ] steps=100 reward= -8.520 evac=0 hp=100.0 s30=0.47 t=197s |
| ep=0509 [hard ] steps=028 reward= -11.370 evac=0 hp= 0.0 s30=0.47 t=197s |
| ep=0510 [hard ] steps=032 reward= -9.960 evac=0 hp= 0.0 s30=0.43 t=198s |
| ep=0511 [medium] steps=017 reward= +15.520 evac=1 hp=100.0 s30=0.43 t=198s |
| ep=0512 [medium] steps=013 reward= +16.330 evac=1 hp=100.0 s30=0.47 t=198s |
| >> PPO update pi_loss=-0.0003 v_loss=10.8486 entropy=2.0690 kl=0.0005 lr=6.96e-05 |
| ep=0513 [hard ] steps=040 reward= -10.930 evac=0 hp= 0.0 s30=0.43 t=198s |
| ep=0514 [hard ] steps=100 reward= -9.770 evac=0 hp=100.0 s30=0.40 t=199s |
| ep=0515 [easy ] steps=003 reward= +17.170 evac=1 hp=100.0 s30=0.43 t=199s |
| ep=0516 [hard ] steps=026 reward= +13.380 evac=1 hp=100.0 s30=0.43 t=199s |
| ep=0517 [hard ] steps=035 reward= -11.330 evac=0 hp= 0.0 s30=0.43 t=199s |
| ep=0518 [medium] steps=020 reward= +13.890 evac=1 hp= 86.0 s30=0.47 t=199s |
| ep=0519 [easy ] steps=019 reward= +19.480 evac=1 hp=100.0 s30=0.50 t=199s |
| ep=0520 [hard ] steps=033 reward= -11.450 evac=0 hp= 0.0 s30=0.47 t=200s |
| >> PPO update pi_loss=-0.0634 v_loss=27.4050 entropy=1.8160 kl=0.0016 lr=6.60e-05 |
| ep=0521 [medium] steps=009 reward= +16.700 evac=1 hp=100.0 s30=0.47 t=200s |
| ep=0522 [medium] steps=017 reward= -13.670 evac=0 hp= 0.0 s30=0.43 t=200s |
| ep=0523 [hard ] steps=014 reward= -13.050 evac=0 hp= 0.0 s30=0.43 t=200s |
| ep=0524 [medium] steps=150 reward= -6.580 evac=0 hp=100.0 s30=0.43 t=201s |
| ep=0525 [hard ] steps=100 reward= -10.760 evac=0 hp=100.0 s30=0.40 t=201s |
| ** EVAL [hard] reward=-5.528 success=0.20 |
| ep=0526 [medium] steps=080 reward= -25.070 evac=0 hp= 0.0 s30=0.40 t=203s |
| ep=0527 [medium] steps=150 reward= -13.725 evac=0 hp= 91.5 s30=0.40 t=203s |
| ep=0528 [medium] steps=068 reward= -10.380 evac=0 hp= 0.0 s30=0.40 t=204s |
| >> PPO update pi_loss=+0.0074 v_loss=18.0052 entropy=1.7626 kl=0.0021 lr=6.24e-05 |
| ep=0529 [hard ] steps=100 reward= -8.910 evac=0 hp=100.0 s30=0.37 t=205s |
| ep=0530 [easy ] steps=200 reward= -11.215 evac=0 hp= 95.5 s30=0.33 t=206s |
| ep=0531 [hard ] steps=047 reward= +14.700 evac=1 hp=100.0 s30=0.33 t=206s |
| ep=0532 [hard ] steps=037 reward= -11.340 evac=0 hp= 0.0 s30=0.30 t=206s |
| ep=0533 [hard ] steps=028 reward= -10.550 evac=0 hp= 0.0 s30=0.30 t=206s |
| ep=0534 [medium] steps=014 reward= +16.560 evac=1 hp=100.0 s30=0.33 t=206s |
| ep=0535 [medium] steps=067 reward= -21.630 evac=0 hp= 0.0 s30=0.33 t=207s |
| ep=0536 [hard ] steps=053 reward= -10.830 evac=0 hp= 0.0 s30=0.33 t=207s |
| >> PPO update pi_loss=-0.0165 v_loss=15.0212 entropy=1.7101 kl=0.0018 lr=5.88e-05 |
| ep=0537 [medium] steps=150 reward= -6.565 evac=0 hp= 63.5 s30=0.30 t=208s |
| ep=0538 [hard ] steps=100 reward= -8.425 evac=0 hp= 95.5 s30=0.30 t=209s |
| ep=0539 [medium] steps=015 reward= +16.190 evac=1 hp=100.0 s30=0.33 t=209s |
| ep=0540 [hard ] steps=100 reward= -9.880 evac=0 hp=100.0 s30=0.33 t=209s |
| ep=0541 [medium] steps=014 reward= +17.190 evac=1 hp=100.0 s30=0.33 t=209s |
| ep=0542 [hard ] steps=044 reward= -10.020 evac=0 hp= 0.0 s30=0.30 t=209s |
| ep=0543 [easy ] steps=072 reward= +15.770 evac=1 hp=100.0 s30=0.33 t=210s |
| ep=0544 [hard ] steps=100 reward= -8.800 evac=0 hp=100.0 s30=0.33 t=210s |
| >> PPO update pi_loss=+0.0102 v_loss=7.6336 entropy=1.9990 kl=0.0004 lr=5.52e-05 |
| ep=0545 [hard ] steps=024 reward= +14.900 evac=1 hp=100.0 s30=0.33 t=211s |
| ep=0546 [hard ] steps=027 reward= -14.450 evac=0 hp= 0.0 s30=0.30 t=211s |
| ep=0547 [easy ] steps=011 reward= +17.590 evac=1 hp=100.0 s30=0.33 t=211s |
| ep=0548 [easy ] steps=017 reward= +18.350 evac=1 hp=100.0 s30=0.33 t=211s |
| ep=0549 [hard ] steps=022 reward= -14.160 evac=0 hp= 0.0 s30=0.30 t=211s |
| ep=0550 [medium] steps=019 reward= +9.498 evac=1 hp= 64.5 s30=0.33 t=211s |
| ** EVAL [hard] reward=-11.274 success=0.00 |
| ep=0551 [easy ] steps=033 reward= +17.740 evac=1 hp=100.0 s30=0.33 t=213s |
| ep=0552 [medium] steps=003 reward= +14.670 evac=1 hp=100.0 s30=0.37 t=213s |
| >> PPO update pi_loss=-0.0022 v_loss=59.5561 entropy=1.5268 kl=0.0003 lr=5.16e-05 |
| ep=0553 [medium] steps=014 reward= +16.810 evac=1 hp=100.0 s30=0.40 t=213s |
| ep=0554 [hard ] steps=022 reward= +14.210 evac=1 hp=100.0 s30=0.43 t=213s |
| ep=0555 [hard ] steps=035 reward= -13.820 evac=0 hp= 0.0 s30=0.43 t=213s |
| ep=0556 [easy ] steps=047 reward= +17.360 evac=1 hp=100.0 s30=0.47 t=214s |
| ep=0557 [easy ] steps=048 reward= +13.830 evac=1 hp=100.0 s30=0.50 t=214s |
| ep=0558 [medium] steps=008 reward= +16.440 evac=1 hp=100.0 s30=0.53 t=214s |
| ep=0559 [hard ] steps=100 reward= -7.800 evac=0 hp=100.0 s30=0.53 t=214s |
| ep=0560 [hard ] steps=025 reward= -11.750 evac=0 hp= 0.0 s30=0.53 t=215s |
| >> PPO update pi_loss=-0.0207 v_loss=16.8330 entropy=1.8783 kl=0.0006 lr=4.80e-05 |
| ep=0561 [medium] steps=008 reward= +16.440 evac=1 hp=100.0 s30=0.53 t=215s |
| ep=0562 [easy ] steps=112 reward= +10.037 evac=1 hp= 80.5 s30=0.57 t=215s |
| ep=0563 [easy ] steps=016 reward= +18.610 evac=1 hp=100.0 s30=0.60 t=216s |
| ep=0564 [medium] steps=004 reward= +15.600 evac=1 hp=100.0 s30=0.60 t=216s |
| ep=0565 [medium] steps=031 reward= -9.960 evac=0 hp= 0.0 s30=0.60 t=216s |
| ep=0566 [hard ] steps=014 reward= -11.270 evac=0 hp= 0.0 s30=0.60 t=216s |
| ep=0567 [hard ] steps=100 reward= -5.240 evac=0 hp=100.0 s30=0.60 t=216s |
| ep=0568 [easy ] steps=045 reward= +16.020 evac=1 hp=100.0 s30=0.63 t=216s |
| >> PPO update pi_loss=-0.0140 v_loss=16.1354 entropy=1.8723 kl=0.0002 lr=4.44e-05 |
| ep=0569 [medium] steps=004 reward= +15.550 evac=1 hp=100.0 s30=0.63 t=217s |
| ep=0570 [hard ] steps=021 reward= -9.080 evac=0 hp= 0.0 s30=0.63 t=217s |
| ep=0571 [hard ] steps=100 reward= -10.860 evac=0 hp=100.0 s30=0.60 t=217s |
| ep=0572 [medium] steps=015 reward= -19.150 evac=0 hp= 0.0 s30=0.60 t=217s |
| ep=0573 [hard ] steps=025 reward= +14.630 evac=1 hp=100.0 s30=0.60 t=218s |
| ep=0574 [hard ] steps=034 reward= -11.920 evac=0 hp= 0.0 s30=0.60 t=218s |
| ep=0575 [medium] steps=079 reward= -19.430 evac=0 hp= 0.0 s30=0.57 t=218s |
| ** EVAL [hard] reward=-10.578 success=0.00 |
| ep=0576 [medium] steps=013 reward= +16.620 evac=1 hp=100.0 s30=0.60 t=219s |
| >> PPO update pi_loss=+0.0070 v_loss=26.4067 entropy=1.4878 kl=0.0001 lr=4.08e-05 |
| ep=0577 [medium] steps=001 reward= +14.260 evac=1 hp=100.0 s30=0.60 t=220s |
| ep=0578 [hard ] steps=030 reward= -12.950 evac=0 hp= 0.0 s30=0.57 t=220s |
| ep=0579 [medium] steps=150 reward= -25.410 evac=0 hp= 57.0 s30=0.57 t=221s |
| ep=0580 [hard ] steps=100 reward= -8.330 evac=0 hp=100.0 s30=0.53 t=221s |
| ep=0581 [hard ] steps=021 reward= -11.530 evac=0 hp= 0.0 s30=0.50 t=221s |
| ep=0582 [medium] steps=011 reward= +15.930 evac=1 hp=100.0 s30=0.50 t=221s |
| ep=0583 [medium] steps=014 reward= +12.270 evac=1 hp= 76.0 s30=0.50 t=221s |
| ep=0584 [easy ] steps=200 reward= -11.730 evac=0 hp= 67.0 s30=0.47 t=222s |
| >> PPO update pi_loss=-0.0108 v_loss=10.7300 entropy=1.5916 kl=0.0001 lr=3.72e-05 |
| ep=0585 [medium] steps=016 reward= +12.483 evac=1 hp= 67.5 s30=0.50 t=223s |
| ep=0586 [medium] steps=031 reward= +14.330 evac=1 hp=100.0 s30=0.50 t=223s |
| ep=0587 [hard ] steps=027 reward= -9.930 evac=0 hp= 0.0 s30=0.47 t=223s |
| ep=0588 [hard ] steps=100 reward= -8.820 evac=0 hp= 99.0 s30=0.43 t=223s |
| ep=0589 [hard ] steps=100 reward= -8.980 evac=0 hp=100.0 s30=0.43 t=224s |
| ep=0590 [hard ] steps=100 reward= -10.670 evac=0 hp=100.0 s30=0.43 t=225s |
| ep=0591 [easy ] steps=095 reward= +11.080 evac=1 hp=100.0 s30=0.43 t=225s |
| ep=0592 [hard ] steps=021 reward= -10.660 evac=0 hp= 0.0 s30=0.40 t=225s |
| >> PPO update pi_loss=-0.0007 v_loss=5.2941 entropy=1.8152 kl=0.0001 lr=3.36e-05 |
| ep=0593 [easy ] steps=012 reward= +18.350 evac=1 hp=100.0 s30=0.40 t=226s |
| ep=0594 [medium] steps=021 reward= +10.757 evac=1 hp= 74.5 s30=0.40 t=226s |
| ep=0595 [medium] steps=012 reward= +15.830 evac=1 hp=100.0 s30=0.43 t=226s |
| ep=0596 [medium] steps=028 reward= -10.200 evac=0 hp= 0.0 s30=0.43 t=226s |
| ep=0597 [medium] steps=019 reward= +13.750 evac=1 hp= 88.0 s30=0.47 t=226s |
| ep=0598 [medium] steps=009 reward= +16.700 evac=1 hp=100.0 s30=0.47 t=226s |
| ep=0599 [hard ] steps=100 reward= -8.560 evac=0 hp=100.0 s30=0.43 t=226s |
| ep=0600 [medium] steps=044 reward= +13.770 evac=1 hp=100.0 s30=0.47 t=227s |
| >> PPO update pi_loss=-0.0007 v_loss=29.9181 entropy=1.5398 kl=0.0001 lr=3.00e-05 |
| ** EVAL [hard] reward=-12.068 success=0.00 |
|
|