diff --git "a/training.log" "b/training.log" --- "a/training.log" +++ "b/training.log" @@ -1,9136 +1,701 @@ -[log] Writing console output to artifacts/pyre_ppo_fixed_training.log -[server] Connecting to http://localhost:8000 ... OK - -[config] server=http://localhost:8000 -[config] device=cpu episodes=200 batch=5 eps -[config] curriculum: easy,medium -[config] PPO clip_eps=0.2 entropy=0.03 lr=0.0003 - -[network] Parameters: 12,065,650 -[network] Input dim: 23,140 (encoder.base_dim=5785 x 4 frames) -[network] Action dim: 41 (4 move + 4 look + 1 wait + 16 open + 16 close) - -[curriculum] static: easy,medium - step=001 action=door(target_id='door_5', door_state='open') hp=100.0 - step=002 action=move(direction='east') hp=100.0 - step=003 action=door(target_id='door_5', door_state='open') hp=100.0 - step=004 action=move(direction='east') hp=100.0 - step=005 action=move(direction='east') hp=100.0 - step=006 action=door(target_id='door_6', door_state='close') hp=100.0 - step=007 action=door(target_id='door_6', door_state='close') hp=100.0 - step=008 action=move(direction='south') hp=100.0 - step=009 action=door(target_id='door_2', door_state='open') hp=100.0 - step=010 action=door(target_id='door_6', door_state='close') hp=100.0 - step=011 action=move(direction='north') hp= 99.5 - step=012 action=wait() hp= 84.5 - step=013 action=move(direction='south') hp= 69.5 - step=014 action=wait() hp= 67.5 - step=015 action=move(direction='south') hp= 65.5 - step=016 action=wait() hp= 63.5 - step=017 action=wait() hp= 61.5 - step=018 action=move(direction='east') hp= 59.5 - step=019 action=move(direction='west') hp= 57.5 - step=020 action=door(target_id='door_6', door_state='close') hp= 55.5 - step=021 action=move(direction='west') hp= 53.5 - step=022 action=move(direction='west') hp= 51.5 - step=023 action=door(target_id='door_5', door_state='open') hp= 49.5 - step=024 action=move(direction='east') hp= 47.5 - step=025 action=move(direction='west') hp= 45.5 - step=026 action=move(direction='north') hp= 43.5 - step=027 action=move(direction='west') hp= 41.5 - step=028 action=door(target_id='door_5', door_state='open') hp= 39.5 - step=029 action=door(target_id='door_5', door_state='open') hp= 37.5 - step=030 action=move(direction='south') hp= 35.5 - step=031 action=move(direction='south') hp= 33.5 - step=032 action=door(target_id='door_5', door_state='open') hp= 33.0 - step=033 action=move(direction='east') hp= 32.5 - step=034 action=door(target_id='door_6', door_state='close') hp= 32.0 - step=035 action=move(direction='west') hp= 31.5 - step=036 action=move(direction='south') hp= 31.0 - step=037 action=move(direction='south') hp= 30.5 - step=038 action=move(direction='north') hp= 30.5 - step=039 action=move(direction='south') hp= 30.0 - step=040 action=move(direction='west') hp= 30.0 - step=041 action=move(direction='east') hp= 30.0 - step=042 action=move(direction='east') hp= 30.0 - step=043 action=move(direction='south') hp= 30.0 - step=044 action=move(direction='north') hp= 30.0 - step=045 action=door(target_id='door_5', door_state='close') hp= 30.0 - step=046 action=wait() hp= 30.0 - step=047 action=move(direction='west') hp= 30.0 - step=048 action=move(direction='west') hp= 30.0 - step=049 action=move(direction='east') hp= 30.0 - step=050 action=move(direction='south') hp= 30.0 - step=051 action=move(direction='east') hp= 30.0 - step=052 action=wait() hp= 30.0 - step=053 action=wait() hp= 30.0 - step=054 action=move(direction='south') hp= 30.0 - step=055 action=door(target_id='door_5', door_state='open') hp= 30.0 - step=056 action=move(direction='south') hp= 30.0 - step=057 action=move(direction='west') hp= 30.0 - step=058 action=wait() hp= 30.0 - step=059 action=door(target_id='door_5', door_state='open') hp= 30.0 - step=060 action=move(direction='east') hp= 30.0 - step=061 action=move(direction='north') hp= 30.0 - step=062 action=move(direction='north') hp= 30.0 - step=063 action=door(target_id='door_5', door_state='open') hp= 30.0 - step=064 action=move(direction='south') hp= 30.0 - step=065 action=move(direction='west') hp= 30.0 - step=066 action=move(direction='west') hp= 30.0 - step=067 action=wait() hp= 30.0 - step=068 action=move(direction='north') hp= 30.0 - step=069 action=door(target_id='door_5', door_state='open') hp= 30.0 - step=070 action=move(direction='south') hp= 30.0 - step=071 action=wait() hp= 30.0 - step=072 action=door(target_id='door_5', door_state='open') hp= 30.0 - step=073 action=move(direction='east') hp= 30.0 - step=074 action=move(direction='north') hp= 30.0 - step=075 action=move(direction='east') hp= 30.0 - step=076 action=move(direction='south') hp= 30.0 - step=077 action=move(direction='north') hp= 30.0 - step=078 action=door(target_id='door_5', door_state='open') hp= 30.0 - step=079 action=move(direction='west') hp= 30.0 - step=080 action=move(direction='west') hp= 30.0 - step=081 action=door(target_id='door_5', door_state='open') hp= 30.0 - step=082 action=move(direction='east') hp= 30.0 - step=083 action=door(target_id='door_5', door_state='open') hp= 30.0 - step=084 action=move(direction='east') hp= 30.0 - step=085 action=wait() hp= 30.0 - step=086 action=door(target_id='door_5', door_state='close') hp= 30.0 - step=087 action=move(direction='south') hp= 30.0 - step=088 action=wait() hp= 30.0 - step=089 action=move(direction='north') hp= 30.0 - step=090 action=move(direction='south') hp= 30.0 - step=091 action=move(direction='north') hp= 30.0 - step=092 action=move(direction='west') hp= 30.0 - step=093 action=door(target_id='door_5', door_state='close') hp= 30.0 - step=094 action=move(direction='north') hp= 30.0 - step=095 action=move(direction='east') hp= 30.0 - step=096 action=door(target_id='door_5', door_state='open') hp= 30.0 - step=097 action=move(direction='south') hp= 30.0 - step=098 action=move(direction='north') hp= 30.0 - step=099 action=door(target_id='door_5', door_state='close') hp= 30.0 - step=100 action=move(direction='west') hp= 30.0 - step=101 action=wait() hp= 30.0 - step=102 action=move(direction='south') hp= 30.0 - step=103 action=wait() hp= 30.0 - step=104 action=move(direction='east') hp= 30.0 - step=105 action=move(direction='north') hp= 30.0 - step=106 action=wait() hp= 30.0 - step=107 action=move(direction='west') hp= 30.0 - step=108 action=move(direction='south') hp= 30.0 - step=109 action=move(direction='west') hp= 30.0 - step=110 action=move(direction='south') hp= 30.0 - step=111 action=move(direction='east') hp= 30.0 - step=112 action=move(direction='west') hp= 30.0 - step=113 action=move(direction='east') hp= 30.0 - step=114 action=door(target_id='door_5', door_state='open') hp= 30.0 - step=115 action=door(target_id='door_5', door_state='open') hp= 30.0 - step=116 action=wait() hp= 30.0 - step=117 action=move(direction='west') hp= 30.0 - step=118 action=door(target_id='door_5', door_state='open') hp= 30.0 - step=119 action=move(direction='north') hp= 30.0 - step=120 action=move(direction='south') hp= 30.0 - step=121 action=wait() hp= 30.0 - step=122 action=move(direction='south') hp= 30.0 - step=123 action=door(target_id='door_5', door_state='open') hp= 30.0 - step=124 action=move(direction='north') hp= 30.0 - step=125 action=door(target_id='door_5', door_state='open') hp= 30.0 - step=126 action=wait() hp= 30.0 - step=127 action=door(target_id='door_5', door_state='open') hp= 30.0 - step=128 action=wait() hp= 30.0 - step=129 action=wait() hp= 30.0 - step=130 action=door(target_id='door_5', door_state='open') hp= 30.0 - step=131 action=door(target_id='door_5', door_state='open') hp= 30.0 - step=132 action=door(target_id='door_5', door_state='open') hp= 30.0 - step=133 action=wait() hp= 30.0 - step=134 action=move(direction='north') hp= 30.0 - step=135 action=door(target_id='door_5', door_state='open') hp= 30.0 - step=136 action=move(direction='north') hp= 30.0 - step=137 action=move(direction='east') hp= 30.0 - step=138 action=move(direction='west') hp= 30.0 - step=139 action=move(direction='east') hp= 30.0 - step=140 action=move(direction='west') hp= 30.0 - step=141 action=door(target_id='door_5', door_state='open') hp= 30.0 - step=142 action=door(target_id='door_5', door_state='close') hp= 30.0 - step=143 action=move(direction='east') hp= 30.0 - step=144 action=door(target_id='door_5', door_state='open') hp= 30.0 - step=145 action=wait() hp= 30.0 - step=146 action=move(direction='south') hp= 30.0 - step=147 action=move(direction='east') hp= 30.0 - step=148 action=door(target_id='door_5', door_state='close') hp= 30.0 - step=149 action=move(direction='west') hp= 30.0 - step=150 action=move(direction='west') hp= 30.0 - step=151 action=wait() hp= 30.0 - step=152 action=door(target_id='door_5', door_state='close') hp= 30.0 - step=153 action=wait() hp= 30.0 - step=154 action=move(direction='east') hp= 30.0 - step=155 action=wait() hp= 30.0 - step=156 action=move(direction='east') hp= 30.0 - step=157 action=move(direction='south') hp= 30.0 - step=158 action=door(target_id='door_5', door_state='close') hp= 30.0 - step=159 action=wait() hp= 30.0 - step=160 action=wait() hp= 30.0 - step=161 action=move(direction='north') hp= 30.0 - step=162 action=door(target_id='door_5', door_state='close') hp= 30.0 - step=163 action=move(direction='south') hp= 30.0 - step=164 action=door(target_id='door_5', door_state='close') hp= 30.0 - step=165 action=move(direction='south') hp= 30.0 - step=166 action=wait() hp= 30.0 - step=167 action=move(direction='north') hp= 30.0 - step=168 action=move(direction='south') hp= 30.0 - step=169 action=move(direction='north') hp= 30.0 - step=170 action=move(direction='north') hp= 30.0 - step=171 action=door(target_id='door_5', door_state='close') hp= 30.0 - step=172 action=door(target_id='door_5', door_state='close') hp= 30.0 - step=173 action=move(direction='north') hp= 30.0 - step=174 action=move(direction='west') hp= 30.0 - step=175 action=move(direction='north') hp= 30.0 - step=176 action=door(target_id='door_5', door_state='close') hp= 30.0 - step=177 action=wait() hp= 30.0 - step=178 action=move(direction='south') hp= 30.0 - step=179 action=move(direction='west') hp= 30.0 - step=180 action=door(target_id='door_5', door_state='open') hp= 30.0 - step=181 action=door(target_id='door_5', door_state='close') hp= 30.0 - step=182 action=wait() hp= 30.0 - step=183 action=wait() hp= 30.0 - step=184 action=wait() hp= 30.0 - step=185 action=door(target_id='door_5', door_state='open') hp= 30.0 - step=186 action=move(direction='east') hp= 30.0 - step=187 action=door(target_id='door_5', door_state='close') hp= 30.0 - step=188 action=wait() hp= 30.0 - step=189 action=move(direction='east') hp= 30.0 - step=190 action=door(target_id='door_5', door_state='open') hp= 30.0 - step=191 action=door(target_id='door_5', door_state='close') hp= 30.0 - step=192 action=door(target_id='door_5', door_state='open') hp= 30.0 - step=193 action=wait() hp= 30.0 - step=194 action=move(direction='south') hp= 30.0 - step=195 action=door(target_id='door_5', door_state='close') hp= 30.0 - step=196 action=move(direction='north') hp= 30.0 - step=197 action=move(direction='south') hp= 30.0 - step=198 action=door(target_id='door_5', door_state='close') hp= 30.0 - step=199 action=door(target_id='door_5', door_state='close') hp= 30.0 - step=200 action=move(direction='north') hp= 30.0 -ep=0001 [easy ] steps=200 reward= -22.960 evac=0 hp= 30.0 suc30=0.00 r30= -22.96 t=0s - step=001 action=move(direction='west') hp=100.0 -ep=0002 [easy ] steps=001 reward= +16.760 evac=1 hp=100.0 suc30=0.50 r30= -3.10 t=0s - step=001 action=wait() hp=100.0 - step=002 action=move(direction='south') hp=100.0 - step=003 action=door(target_id='door_3', door_state='close') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=wait() hp=100.0 - step=006 action=door(target_id='door_2', door_state='open') hp=100.0 - step=007 action=door(target_id='door_2', door_state='open') hp=100.0 - step=008 action=door(target_id='door_2', door_state='open') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=wait() hp=100.0 - step=011 action=move(direction='east') hp=100.0 - step=012 action=move(direction='east') hp=100.0 - step=013 action=move(direction='west') hp=100.0 - step=014 action=door(target_id='door_2', door_state='open') hp=100.0 - step=015 action=move(direction='east') hp=100.0 - step=016 action=move(direction='north') hp=100.0 - step=017 action=move(direction='south') hp=100.0 - step=018 action=wait() hp=100.0 - step=019 action=move(direction='north') hp=100.0 - step=020 action=move(direction='south') hp=100.0 - step=021 action=move(direction='south') hp=100.0 - step=022 action=wait() hp=100.0 - step=023 action=move(direction='south') hp=100.0 - step=024 action=wait() hp=100.0 - step=025 action=wait() hp=100.0 - step=026 action=wait() hp=100.0 - step=027 action=move(direction='north') hp=100.0 - step=028 action=wait() hp=100.0 - step=029 action=wait() hp=100.0 - step=030 action=move(direction='north') hp=100.0 - step=031 action=move(direction='north') hp=100.0 - step=032 action=move(direction='south') hp=100.0 - step=033 action=door(target_id='door_3', door_state='close') hp=100.0 - step=034 action=move(direction='south') hp=100.0 - step=035 action=wait() hp=100.0 - step=036 action=move(direction='south') hp=100.0 - step=037 action=move(direction='north') hp=100.0 - step=038 action=wait() hp=100.0 - step=039 action=move(direction='north') hp=100.0 - step=040 action=move(direction='east') hp=100.0 - step=041 action=wait() hp=100.0 - step=042 action=move(direction='west') hp=100.0 - step=043 action=wait() hp=100.0 - step=044 action=move(direction='south') hp=100.0 - step=045 action=wait() hp=100.0 - step=046 action=move(direction='north') hp=100.0 - step=047 action=door(target_id='door_3', door_state='close') hp=100.0 - step=048 action=move(direction='east') hp=100.0 - step=049 action=door(target_id='door_3', door_state='close') hp=100.0 - step=050 action=door(target_id='door_3', door_state='close') hp=100.0 - step=051 action=move(direction='east') hp=100.0 - step=052 action=move(direction='south') hp=100.0 - step=053 action=wait() hp=100.0 - step=054 action=door(target_id='door_3', door_state='close') hp=100.0 - step=055 action=move(direction='south') hp=100.0 - step=056 action=wait() hp=100.0 - step=057 action=wait() hp=100.0 - step=058 action=wait() hp=100.0 - step=059 action=door(target_id='door_3', door_state='open') hp=100.0 - step=060 action=wait() hp=100.0 - step=061 action=move(direction='north') hp=100.0 - step=062 action=move(direction='east') hp=100.0 - step=063 action=door(target_id='door_3', door_state='close') hp=100.0 - step=064 action=wait() hp=100.0 - step=065 action=move(direction='west') hp=100.0 - step=066 action=wait() hp=100.0 - step=067 action=wait() hp=100.0 - step=068 action=door(target_id='door_3', door_state='close') hp=100.0 - step=069 action=wait() hp=100.0 - step=070 action=move(direction='south') hp=100.0 - step=071 action=wait() hp=100.0 - step=072 action=move(direction='north') hp=100.0 - step=073 action=move(direction='north') hp=100.0 - step=074 action=move(direction='west') hp=100.0 - step=075 action=wait() hp=100.0 - step=076 action=move(direction='west') hp=100.0 - step=077 action=door(target_id='door_3', door_state='open') hp=100.0 - step=078 action=door(target_id='door_3', door_state='open') hp=100.0 - step=079 action=wait() hp=100.0 - step=080 action=door(target_id='door_3', door_state='open') hp=100.0 - step=081 action=wait() hp=100.0 - step=082 action=door(target_id='door_3', door_state='open') hp=100.0 - step=083 action=door(target_id='door_3', door_state='open') hp=100.0 - step=084 action=door(target_id='door_2', door_state='open') hp=100.0 - step=085 action=move(direction='west') hp=100.0 - step=086 action=move(direction='west') hp=100.0 - step=087 action=move(direction='south') hp=100.0 - step=088 action=move(direction='west') hp=100.0 - step=089 action=wait() hp=100.0 - step=090 action=move(direction='north') hp=100.0 - step=091 action=move(direction='south') hp=100.0 - step=092 action=move(direction='north') hp=100.0 - step=093 action=door(target_id='door_2', door_state='open') hp=100.0 - step=094 action=move(direction='south') hp=100.0 - step=095 action=wait() hp=100.0 - step=096 action=move(direction='north') hp=100.0 - step=097 action=wait() hp=100.0 - step=098 action=move(direction='south') hp=100.0 - step=099 action=move(direction='east') hp=100.0 - step=100 action=move(direction='north') hp=100.0 - step=101 action=move(direction='west') hp=100.0 - step=102 action=move(direction='west') hp=100.0 - step=103 action=move(direction='east') hp=100.0 - step=104 action=move(direction='south') hp=100.0 - step=105 action=move(direction='east') hp=100.0 - step=106 action=door(target_id='door_2', door_state='open') hp=100.0 - step=107 action=door(target_id='door_2', door_state='close') hp=100.0 - step=108 action=move(direction='north') hp=100.0 - step=109 action=move(direction='south') hp=100.0 - step=110 action=move(direction='south') hp=100.0 - step=111 action=move(direction='west') hp=100.0 - step=112 action=move(direction='east') hp=100.0 - step=113 action=wait() hp=100.0 - step=114 action=move(direction='north') hp=100.0 - step=115 action=wait() hp=100.0 - step=116 action=move(direction='west') hp=100.0 - step=117 action=move(direction='north') hp=100.0 - step=118 action=door(target_id='door_2', door_state='open') hp=100.0 - step=119 action=door(target_id='door_2', door_state='open') hp=100.0 - step=120 action=move(direction='east') hp=100.0 - step=121 action=move(direction='south') hp=100.0 - step=122 action=move(direction='south') hp=100.0 - step=123 action=door(target_id='door_2', door_state='open') hp=100.0 - step=124 action=move(direction='west') hp=100.0 - step=125 action=door(target_id='door_2', door_state='close') hp=100.0 - step=126 action=wait() hp=100.0 - step=127 action=move(direction='north') hp=100.0 - step=128 action=move(direction='north') hp=100.0 - step=129 action=wait() hp=100.0 - step=130 action=door(target_id='door_1', door_state='close') hp=100.0 - step=131 action=move(direction='west') hp=100.0 - step=132 action=move(direction='east') hp=100.0 - step=133 action=move(direction='south') hp=100.0 - step=134 action=move(direction='north') hp=100.0 - step=135 action=door(target_id='door_1', door_state='close') hp=100.0 - step=136 action=move(direction='south') hp=100.0 - step=137 action=move(direction='north') hp=100.0 - step=138 action=door(target_id='door_2', door_state='open') hp=100.0 - step=139 action=move(direction='south') hp=100.0 - step=140 action=move(direction='east') hp=100.0 - step=141 action=move(direction='south') hp=100.0 - step=142 action=move(direction='west') hp=100.0 - step=143 action=door(target_id='door_2', door_state='open') hp=100.0 - step=144 action=door(target_id='door_2', door_state='close') hp=100.0 - step=145 action=wait() hp=100.0 - step=146 action=wait() hp=100.0 - step=147 action=move(direction='east') hp=100.0 - step=148 action=door(target_id='door_2', door_state='open') hp=100.0 - step=149 action=wait() hp=100.0 - step=150 action=move(direction='west') hp=100.0 - step=151 action=move(direction='north') hp=100.0 - step=152 action=move(direction='north') hp=100.0 - step=153 action=move(direction='west') hp=100.0 - step=154 action=door(target_id='door_1', door_state='close') hp=100.0 - step=155 action=wait() hp=100.0 - step=156 action=move(direction='east') hp=100.0 - step=157 action=move(direction='east') hp=100.0 - step=158 action=move(direction='south') hp=100.0 - step=159 action=wait() hp=100.0 - step=160 action=move(direction='south') hp=100.0 - step=161 action=door(target_id='door_2', door_state='close') hp=100.0 - step=162 action=door(target_id='door_2', door_state='open') hp=100.0 - step=163 action=move(direction='north') hp=100.0 - step=164 action=move(direction='west') hp=100.0 - step=165 action=move(direction='north') hp=100.0 - step=166 action=move(direction='south') hp=100.0 - step=167 action=door(target_id='door_2', door_state='close') hp=100.0 - step=168 action=wait() hp=100.0 - step=169 action=move(direction='south') hp=100.0 - step=170 action=wait() hp=100.0 - step=171 action=wait() hp=100.0 - step=172 action=move(direction='north') hp=100.0 - step=173 action=move(direction='south') hp=100.0 - step=174 action=move(direction='east') hp=100.0 - step=175 action=wait() hp=100.0 - step=176 action=move(direction='north') hp=100.0 - step=177 action=move(direction='north') hp=100.0 - step=178 action=move(direction='east') hp=100.0 - step=179 action=wait() hp=100.0 - step=180 action=move(direction='east') hp=100.0 - step=181 action=door(target_id='door_3', door_state='open') hp=100.0 - step=182 action=move(direction='east') hp=100.0 - step=183 action=move(direction='east') hp=100.0 - step=184 action=move(direction='west') hp=100.0 - step=185 action=door(target_id='door_3', door_state='open') hp=100.0 - step=186 action=door(target_id='door_3', door_state='open') hp=100.0 - step=187 action=wait() hp=100.0 - step=188 action=wait() hp=100.0 - step=189 action=move(direction='east') hp=100.0 - step=190 action=move(direction='south') hp=100.0 - step=191 action=wait() hp=100.0 - step=192 action=move(direction='north') hp=100.0 - step=193 action=move(direction='west') hp=100.0 - step=194 action=move(direction='west') hp=100.0 - step=195 action=move(direction='north') hp=100.0 - step=196 action=move(direction='north') hp=100.0 - step=197 action=wait() hp=100.0 - step=198 action=move(direction='north') hp=100.0 - step=199 action=move(direction='south') hp=100.0 - step=200 action=move(direction='north') hp=100.0 -ep=0003 [easy ] steps=200 reward= -16.890 evac=0 hp=100.0 suc30=0.33 r30= -7.70 t=1s - step=001 action=move(direction='south') hp=100.0 - step=002 action=move(direction='north') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=door(target_id='door_2', door_state='close') hp=100.0 - step=005 action=door(target_id='door_2', door_state='close') hp=100.0 - step=006 action=wait() hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=door(target_id='door_5', door_state='close') hp=100.0 - step=009 action=move(direction='east') hp=100.0 - step=010 action=door(target_id='door_6', door_state='close') hp=100.0 - step=011 action=move(direction='north') hp=100.0 - step=012 action=wait() hp=100.0 - step=013 action=move(direction='east') hp=100.0 - step=014 action=move(direction='north') hp=100.0 - step=015 action=wait() hp=100.0 - step=016 action=door(target_id='door_2', door_state='close') hp=100.0 - step=017 action=move(direction='east') hp=100.0 - step=018 action=move(direction='north') hp=100.0 - step=019 action=door(target_id='door_3', door_state='close') hp=100.0 - step=020 action=move(direction='south') hp=100.0 - step=021 action=move(direction='south') hp=100.0 - step=022 action=move(direction='east') hp=100.0 - step=023 action=move(direction='south') hp=100.0 - step=024 action=move(direction='north') hp=100.0 - step=025 action=wait() hp=100.0 - step=026 action=move(direction='north') hp=100.0 - step=027 action=wait() hp=100.0 - step=028 action=move(direction='east') hp=100.0 - step=029 action=door(target_id='door_7', door_state='close') hp=100.0 - step=030 action=door(target_id='door_7', door_state='close') hp=100.0 - step=031 action=door(target_id='door_7', door_state='close') hp=100.0 - step=032 action=move(direction='west') hp=100.0 - step=033 action=door(target_id='door_3', door_state='close') hp=100.0 - step=034 action=door(target_id='door_2', door_state='close') hp=100.0 - step=035 action=wait() hp=100.0 - step=036 action=door(target_id='door_3', door_state='close') hp=100.0 - step=037 action=move(direction='south') hp=100.0 - step=038 action=wait() hp=100.0 - step=039 action=move(direction='west') hp=100.0 - step=040 action=move(direction='south') hp=100.0 - step=041 action=move(direction='north') hp=100.0 - step=042 action=door(target_id='door_3', door_state='close') hp=100.0 - step=043 action=door(target_id='door_6', door_state='open') hp=100.0 - step=044 action=door(target_id='door_6', door_state='open') hp=100.0 - step=045 action=door(target_id='door_7', door_state='close') hp=100.0 - step=046 action=move(direction='north') hp=100.0 - step=047 action=move(direction='west') hp=100.0 - step=048 action=move(direction='east') hp=100.0 - step=049 action=door(target_id='door_3', door_state='close') hp=100.0 - step=050 action=move(direction='north') hp=100.0 - step=051 action=move(direction='south') hp=100.0 - step=052 action=wait() hp=100.0 - step=053 action=door(target_id='door_2', door_state='close') hp=100.0 - step=054 action=move(direction='west') hp=100.0 - step=055 action=door(target_id='door_3', door_state='close') hp=100.0 - step=056 action=wait() hp=100.0 - step=057 action=move(direction='north') hp=100.0 - step=058 action=door(target_id='door_3', door_state='close') hp=100.0 - step=059 action=wait() hp=100.0 - step=060 action=move(direction='south') hp=100.0 - step=061 action=door(target_id='door_3', door_state='close') hp=100.0 - step=062 action=door(target_id='door_2', door_state='close') hp=100.0 - step=063 action=wait() hp=100.0 - step=064 action=move(direction='north') hp=100.0 - step=065 action=move(direction='south') hp=100.0 - step=066 action=move(direction='south') hp=100.0 - step=067 action=move(direction='east') hp=100.0 - step=068 action=door(target_id='door_7', door_state='close') hp=100.0 - step=069 action=move(direction='west') hp=100.0 - step=070 action=move(direction='west') hp=100.0 - step=071 action=move(direction='west') hp=100.0 - step=072 action=door(target_id='door_2', door_state='close') hp=100.0 - step=073 action=move(direction='north') hp=100.0 - step=074 action=door(target_id='door_2', door_state='close') hp=100.0 - step=075 action=move(direction='north') hp=100.0 - step=076 action=move(direction='south') hp=100.0 - step=077 action=door(target_id='door_6', door_state='open') hp=100.0 - step=078 action=move(direction='south') hp=100.0 - step=079 action=move(direction='north') hp=100.0 - step=080 action=door(target_id='door_2', door_state='close') hp=100.0 - step=081 action=wait() hp=100.0 - step=082 action=door(target_id='door_1', door_state='close') hp=100.0 - step=083 action=door(target_id='door_2', door_state='close') hp=100.0 - step=084 action=move(direction='east') hp=100.0 - step=085 action=move(direction='north') hp=100.0 - step=086 action=wait() hp=100.0 - step=087 action=wait() hp=100.0 - step=088 action=move(direction='south') hp=100.0 - step=089 action=move(direction='east') hp=100.0 - step=090 action=move(direction='west') hp=100.0 - step=091 action=door(target_id='door_6', door_state='open') hp=100.0 - step=092 action=move(direction='south') hp=100.0 - step=093 action=move(direction='west') hp=100.0 - step=094 action=move(direction='east') hp=100.0 - step=095 action=door(target_id='door_2', door_state='close') hp=100.0 - step=096 action=door(target_id='door_2', door_state='close') hp=100.0 - step=097 action=move(direction='east') hp=100.0 - step=098 action=move(direction='north') hp=100.0 - step=099 action=door(target_id='door_6', door_state='open') hp=100.0 - step=100 action=move(direction='east') hp=100.0 - step=101 action=move(direction='west') hp=100.0 - step=102 action=move(direction='south') hp=100.0 - step=103 action=move(direction='west') hp=100.0 - step=104 action=move(direction='north') hp=100.0 - step=105 action=move(direction='west') hp=100.0 - step=106 action=move(direction='west') hp=100.0 - step=107 action=move(direction='west') hp=100.0 - step=108 action=move(direction='south') hp=100.0 - step=109 action=wait() hp=100.0 - step=110 action=move(direction='east') hp=100.0 - step=111 action=door(target_id='door_6', door_state='open') hp=100.0 - step=112 action=move(direction='north') hp=100.0 - step=113 action=move(direction='west') hp=100.0 - step=114 action=move(direction='north') hp=100.0 - step=115 action=move(direction='south') hp=100.0 - step=116 action=door(target_id='door_1', door_state='close') hp=100.0 - step=117 action=move(direction='south') hp=100.0 - step=118 action=move(direction='south') hp=100.0 - step=119 action=move(direction='north') hp=100.0 - step=120 action=move(direction='east') hp=100.0 - step=121 action=door(target_id='door_2', door_state='close') hp=100.0 - step=122 action=door(target_id='door_6', door_state='open') hp=100.0 - step=123 action=door(target_id='door_5', door_state='close') hp=100.0 - step=124 action=door(target_id='door_6', door_state='open') hp=100.0 - step=125 action=door(target_id='door_1', door_state='close') hp=100.0 - step=126 action=door(target_id='door_2', door_state='close') hp=100.0 - step=127 action=move(direction='south') hp=100.0 - step=128 action=move(direction='east') hp=100.0 - step=129 action=move(direction='south') hp=100.0 - step=130 action=wait() hp=100.0 - step=131 action=wait() hp=100.0 - step=132 action=door(target_id='door_6', door_state='open') hp=100.0 - step=133 action=move(direction='north') hp=100.0 - step=134 action=move(direction='south') hp=100.0 - step=135 action=door(target_id='door_5', door_state='close') hp=100.0 - step=136 action=door(target_id='door_5', door_state='close') hp=100.0 - step=137 action=move(direction='west') hp=100.0 - step=138 action=move(direction='west') hp=100.0 - step=139 action=move(direction='west') hp=100.0 - step=140 action=move(direction='east') hp=100.0 - step=141 action=door(target_id='door_5', door_state='close') hp=100.0 - step=142 action=move(direction='west') hp=100.0 - step=143 action=move(direction='east') hp=100.0 - step=144 action=move(direction='west') hp=100.0 - step=145 action=door(target_id='door_5', door_state='open') hp=100.0 - step=146 action=move(direction='east') hp=100.0 - step=147 action=wait() hp=100.0 - step=148 action=move(direction='north') hp=100.0 - step=149 action=move(direction='north') hp=100.0 - step=150 action=move(direction='west') hp=100.0 - step=151 action=move(direction='north') hp=100.0 - step=152 action=move(direction='east') hp=100.0 - step=153 action=move(direction='west') hp=100.0 - step=154 action=move(direction='east') hp=100.0 - step=155 action=move(direction='south') hp=100.0 - step=156 action=door(target_id='door_5', door_state='close') hp=100.0 - step=157 action=door(target_id='door_5', door_state='close') hp=100.0 - step=158 action=door(target_id='door_5', door_state='close') hp=100.0 - step=159 action=wait() hp=100.0 - step=160 action=door(target_id='door_1', door_state='close') hp=100.0 - step=161 action=door(target_id='door_1', door_state='close') hp=100.0 - step=162 action=move(direction='south') hp=100.0 - step=163 action=door(target_id='door_6', door_state='close') hp=100.0 - step=164 action=move(direction='north') hp=100.0 - step=165 action=move(direction='west') hp=100.0 - step=166 action=move(direction='east') hp=100.0 - step=167 action=move(direction='north') hp=100.0 - step=168 action=door(target_id='door_1', door_state='close') hp=100.0 - step=169 action=move(direction='west') hp=100.0 - step=170 action=move(direction='west') hp=100.0 - step=171 action=door(target_id='door_1', door_state='close') hp=100.0 - step=172 action=door(target_id='door_5', door_state='close') hp=100.0 - step=173 action=move(direction='north') hp=100.0 - step=174 action=move(direction='east') hp=100.0 - step=175 action=door(target_id='door_2', door_state='close') hp=100.0 - step=176 action=door(target_id='door_1', door_state='close') hp=100.0 - step=177 action=move(direction='south') hp=100.0 - step=178 action=move(direction='north') hp=100.0 - step=179 action=move(direction='west') hp=100.0 - step=180 action=wait() hp=100.0 - step=181 action=move(direction='east') hp=100.0 - step=182 action=move(direction='east') hp=100.0 - step=183 action=door(target_id='door_1', door_state='open') hp=100.0 - step=184 action=move(direction='west') hp=100.0 - step=185 action=move(direction='north') hp=100.0 - step=186 action=wait() hp=100.0 - step=187 action=move(direction='south') hp=100.0 - step=188 action=move(direction='north') hp=100.0 - step=189 action=move(direction='south') hp=100.0 - step=190 action=door(target_id='door_2', door_state='close') hp=100.0 - step=191 action=door(target_id='door_2', door_state='close') hp=100.0 - step=192 action=door(target_id='door_1', door_state='close') hp=100.0 - step=193 action=door(target_id='door_1', door_state='open') hp=100.0 - step=194 action=move(direction='east') hp=100.0 - step=195 action=door(target_id='door_1', door_state='close') hp=100.0 - step=196 action=door(target_id='door_1', door_state='open') hp=100.0 - step=197 action=move(direction='east') hp=100.0 - step=198 action=wait() hp=100.0 - step=199 action=door(target_id='door_2', door_state='close') hp=100.0 - step=200 action=door(target_id='door_1', door_state='close') hp=100.0 -ep=0004 [easy ] steps=200 reward= -8.240 evac=0 hp=100.0 suc30=0.25 r30= -7.83 t=2s - step=001 action=wait() hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='east') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='east') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='east') hp=100.0 - step=011 action=wait() hp=100.0 - step=012 action=wait() hp=100.0 - step=013 action=move(direction='west') hp=100.0 - step=014 action=move(direction='east') hp=100.0 - step=015 action=move(direction='east') hp=100.0 - step=016 action=wait() hp=100.0 - step=017 action=wait() hp=100.0 - step=018 action=move(direction='south') hp=100.0 - step=019 action=move(direction='west') hp=100.0 - step=020 action=move(direction='south') hp=100.0 - step=021 action=move(direction='south') hp=100.0 - step=022 action=move(direction='west') hp=100.0 - step=023 action=wait() hp=100.0 - step=024 action=move(direction='north') hp=100.0 - step=025 action=wait() hp=100.0 - step=026 action=move(direction='north') hp=100.0 - step=027 action=move(direction='north') hp=100.0 - step=028 action=move(direction='east') hp=100.0 - step=029 action=move(direction='south') hp=100.0 - step=030 action=move(direction='north') hp=100.0 - step=031 action=move(direction='east') hp=100.0 - step=032 action=wait() hp=100.0 - step=033 action=move(direction='south') hp=100.0 - step=034 action=move(direction='north') hp=100.0 - step=035 action=wait() hp=100.0 - step=036 action=move(direction='west') hp=100.0 - step=037 action=wait() hp=100.0 - step=038 action=move(direction='east') hp=100.0 - step=039 action=wait() hp=100.0 - step=040 action=move(direction='west') hp=100.0 - step=041 action=move(direction='east') hp=100.0 - step=042 action=move(direction='west') hp=100.0 - step=043 action=wait() hp=100.0 - step=044 action=move(direction='west') hp=100.0 - step=045 action=move(direction='west') hp=100.0 -ep=0005 [easy ] steps=045 reward= +17.340 evac=1 hp=100.0 suc30=0.40 r30= -2.80 t=2s - >> PPO update samples=flushed pi_loss=-0.0122 v_loss=7.4767 entropy=1.6864 kl=0.0014 clip%=0.00 lr=2.93e-04 - step=001 action=door(target_id='door_2', door_state='open') hp=100.0 - step=002 action=door(target_id='door_2', door_state='open') hp=100.0 - step=003 action=door(target_id='door_2', door_state='open') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='south') hp=100.0 - step=006 action=wait() hp=100.0 - step=007 action=door(target_id='door_1', door_state='close') hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=move(direction='south') hp=100.0 - step=010 action=move(direction='south') hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=move(direction='east') hp=100.0 - step=013 action=move(direction='north') hp=100.0 - step=014 action=move(direction='south') hp=100.0 - step=015 action=wait() hp=100.0 - step=016 action=move(direction='west') hp=100.0 - step=017 action=move(direction='north') hp=100.0 - step=018 action=wait() hp=100.0 - step=019 action=door(target_id='door_1', door_state='open') hp=100.0 - step=020 action=wait() hp=100.0 - step=021 action=wait() hp=100.0 - step=022 action=move(direction='north') hp=100.0 - step=023 action=move(direction='west') hp=100.0 -ep=0006 [easy ] steps=023 reward= +17.100 evac=1 hp=100.0 suc30=0.50 r30= +0.52 t=3s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=wait() hp=100.0 - step=005 action=wait() hp=100.0 - step=006 action=move(direction='east') hp=100.0 - step=007 action=move(direction='east') hp=100.0 - step=008 action=door(target_id='door_2', door_state='close') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='east') hp=100.0 - step=011 action=door(target_id='door_2', door_state='close') hp=100.0 - step=012 action=door(target_id='door_2', door_state='close') hp=100.0 - step=013 action=move(direction='east') hp=100.0 - step=014 action=move(direction='west') hp=100.0 - step=015 action=wait() hp=100.0 - step=016 action=move(direction='east') hp=100.0 - step=017 action=move(direction='south') hp=100.0 - step=018 action=door(target_id='door_2', door_state='close') hp=100.0 - step=019 action=door(target_id='door_2', door_state='close') hp=100.0 - step=020 action=door(target_id='door_1', door_state='open') hp=100.0 - step=021 action=move(direction='west') hp=100.0 - step=022 action=move(direction='east') hp=100.0 - step=023 action=door(target_id='door_2', door_state='close') hp=100.0 - step=024 action=door(target_id='door_1', door_state='open') hp=100.0 - step=025 action=door(target_id='door_1', door_state='open') hp=100.0 - step=026 action=door(target_id='door_2', door_state='close') hp=100.0 - step=027 action=move(direction='north') hp=100.0 - step=028 action=move(direction='west') hp=100.0 - step=029 action=move(direction='south') hp=100.0 - step=030 action=door(target_id='door_5', door_state='close') hp=100.0 - step=031 action=door(target_id='door_2', door_state='close') hp=100.0 - step=032 action=move(direction='south') hp=100.0 - step=033 action=move(direction='east') hp=100.0 - step=034 action=move(direction='south') hp=100.0 - step=035 action=wait() hp=100.0 - step=036 action=move(direction='south') hp=100.0 - step=037 action=move(direction='north') hp=100.0 - step=038 action=wait() hp=100.0 - step=039 action=door(target_id='door_6', door_state='close') hp=100.0 - step=040 action=move(direction='west') hp=100.0 - step=041 action=move(direction='west') hp=100.0 - step=042 action=move(direction='east') hp=100.0 - step=043 action=move(direction='west') hp=100.0 - step=044 action=move(direction='east') hp=100.0 - step=045 action=move(direction='east') hp=100.0 - step=046 action=move(direction='west') hp=100.0 - step=047 action=wait() hp=100.0 - step=048 action=door(target_id='door_1', door_state='open') hp=100.0 - step=049 action=wait() hp=100.0 - step=050 action=move(direction='south') hp=100.0 - step=051 action=move(direction='north') hp=100.0 - step=052 action=door(target_id='door_1', door_state='open') hp=100.0 - step=053 action=door(target_id='door_5', door_state='close') hp=100.0 - step=054 action=wait() hp=100.0 - step=055 action=door(target_id='door_1', door_state='open') hp=100.0 - step=056 action=door(target_id='door_6', door_state='close') hp=100.0 - step=057 action=door(target_id='door_6', door_state='close') hp=100.0 - step=058 action=wait() hp=100.0 - step=059 action=move(direction='east') hp=100.0 - step=060 action=move(direction='south') hp=100.0 - step=061 action=door(target_id='door_5', door_state='close') hp=100.0 - step=062 action=move(direction='east') hp=100.0 - step=063 action=move(direction='west') hp=100.0 - step=064 action=move(direction='east') hp=100.0 - step=065 action=move(direction='north') hp=100.0 - step=066 action=door(target_id='door_5', door_state='close') hp=100.0 - step=067 action=move(direction='north') hp=100.0 - step=068 action=move(direction='west') hp=100.0 - step=069 action=door(target_id='door_1', door_state='open') hp=100.0 - step=070 action=move(direction='west') hp=100.0 - step=071 action=move(direction='west') hp=100.0 - step=072 action=move(direction='west') hp=100.0 - step=073 action=move(direction='south') hp=100.0 - step=074 action=door(target_id='door_5', door_state='close') hp=100.0 - step=075 action=move(direction='east') hp=100.0 - step=076 action=door(target_id='door_1', door_state='open') hp=100.0 - step=077 action=move(direction='north') hp=100.0 - step=078 action=door(target_id='door_5', door_state='close') hp=100.0 - step=079 action=move(direction='south') hp=100.0 - step=080 action=door(target_id='door_1', door_state='open') hp=100.0 - step=081 action=door(target_id='door_1', door_state='open') hp=100.0 - step=082 action=wait() hp=100.0 - step=083 action=door(target_id='door_5', door_state='close') hp=100.0 - step=084 action=move(direction='west') hp=100.0 - step=085 action=move(direction='south') hp=100.0 - step=086 action=door(target_id='door_5', door_state='open') hp=100.0 - step=087 action=move(direction='east') hp=100.0 - step=088 action=move(direction='west') hp=100.0 - step=089 action=move(direction='east') hp=100.0 - step=090 action=move(direction='north') hp=100.0 - step=091 action=door(target_id='door_1', door_state='open') hp=100.0 - step=092 action=wait() hp=100.0 - step=093 action=wait() hp=100.0 - step=094 action=wait() hp=100.0 - step=095 action=door(target_id='door_5', door_state='close') hp=100.0 - step=096 action=move(direction='west') hp=100.0 - step=097 action=move(direction='south') hp=100.0 - step=098 action=wait() hp=100.0 - step=099 action=move(direction='east') hp=100.0 - step=100 action=move(direction='east') hp=100.0 - step=101 action=door(target_id='door_5', door_state='open') hp=100.0 - step=102 action=move(direction='west') hp=100.0 - step=103 action=door(target_id='door_5', door_state='close') hp=100.0 - step=104 action=move(direction='west') hp=100.0 - step=105 action=wait() hp=100.0 - step=106 action=door(target_id='door_5', door_state='open') hp=100.0 - step=107 action=move(direction='north') hp=100.0 - step=108 action=move(direction='north') hp=100.0 - step=109 action=wait() hp=100.0 - step=110 action=wait() hp=100.0 - step=111 action=wait() hp=100.0 - step=112 action=door(target_id='door_5', door_state='close') hp=100.0 - step=113 action=door(target_id='door_5', door_state='close') hp=100.0 - step=114 action=door(target_id='door_5', door_state='close') hp=100.0 - step=115 action=move(direction='south') hp=100.0 - step=116 action=door(target_id='door_5', door_state='close') hp=100.0 - step=117 action=wait() hp=100.0 - step=118 action=move(direction='south') hp=100.0 - step=119 action=wait() hp=100.0 - step=120 action=move(direction='east') hp=100.0 - step=121 action=move(direction='east') hp=100.0 - step=122 action=door(target_id='door_6', door_state='close') hp=100.0 - step=123 action=move(direction='north') hp=100.0 - step=124 action=door(target_id='door_1', door_state='open') hp=100.0 - step=125 action=door(target_id='door_1', door_state='open') hp=100.0 - step=126 action=door(target_id='door_1', door_state='open') hp=100.0 - step=127 action=move(direction='south') hp=100.0 - step=128 action=wait() hp=100.0 - step=129 action=move(direction='west') hp=100.0 - step=130 action=door(target_id='door_1', door_state='open') hp=100.0 - step=131 action=door(target_id='door_5', door_state='close') hp=100.0 - step=132 action=door(target_id='door_6', door_state='close') hp=100.0 - step=133 action=wait() hp=100.0 - step=134 action=door(target_id='door_6', door_state='close') hp=100.0 - step=135 action=door(target_id='door_1', door_state='open') hp=100.0 - step=136 action=move(direction='east') hp=100.0 - step=137 action=move(direction='east') hp=100.0 - step=138 action=door(target_id='door_5', door_state='open') hp=100.0 - step=139 action=wait() hp=100.0 - step=140 action=door(target_id='door_5', door_state='open') hp=100.0 - step=141 action=door(target_id='door_6', door_state='close') hp=100.0 - step=142 action=door(target_id='door_6', door_state='close') hp=100.0 - step=143 action=wait() hp=100.0 - step=144 action=move(direction='east') hp=100.0 - step=145 action=door(target_id='door_6', door_state='close') hp=100.0 - step=146 action=move(direction='east') hp=100.0 - step=147 action=move(direction='north') hp=100.0 - step=148 action=move(direction='west') hp=100.0 - step=149 action=move(direction='south') hp=100.0 - step=150 action=door(target_id='door_5', door_state='open') hp=100.0 - step=151 action=door(target_id='door_5', door_state='open') hp=100.0 - step=152 action=move(direction='north') hp=100.0 - step=153 action=wait() hp=100.0 - step=154 action=wait() hp=100.0 - step=155 action=door(target_id='door_2', door_state='close') hp=100.0 - step=156 action=door(target_id='door_5', door_state='open') hp=100.0 - step=157 action=move(direction='west') hp=100.0 - step=158 action=move(direction='east') hp=100.0 - step=159 action=move(direction='north') hp=100.0 - step=160 action=door(target_id='door_6', door_state='open') hp=100.0 - step=161 action=wait() hp=100.0 - step=162 action=move(direction='north') hp=100.0 - step=163 action=move(direction='south') hp=100.0 - step=164 action=door(target_id='door_2', door_state='close') hp=100.0 - step=165 action=move(direction='east') hp=100.0 - step=166 action=move(direction='west') hp=100.0 - step=167 action=move(direction='north') hp=100.0 - step=168 action=door(target_id='door_2', door_state='close') hp=100.0 - step=169 action=wait() hp=100.0 - step=170 action=move(direction='west') hp=100.0 - step=171 action=door(target_id='door_1', door_state='open') hp=100.0 - step=172 action=door(target_id='door_2', door_state='close') hp=100.0 - step=173 action=move(direction='north') hp=100.0 - step=174 action=move(direction='west') hp=100.0 - step=175 action=door(target_id='door_1', door_state='open') hp=100.0 - step=176 action=move(direction='south') hp=100.0 - step=177 action=move(direction='north') hp=100.0 - step=178 action=wait() hp=100.0 - step=179 action=move(direction='south') hp=100.0 - step=180 action=move(direction='north') hp=100.0 - step=181 action=wait() hp=100.0 - step=182 action=move(direction='south') hp=100.0 - step=183 action=move(direction='east') hp=100.0 - step=184 action=move(direction='south') hp=100.0 - step=185 action=move(direction='west') hp=100.0 - step=186 action=move(direction='east') hp=100.0 - step=187 action=move(direction='north') hp=100.0 - step=188 action=move(direction='west') hp=100.0 - step=189 action=move(direction='north') hp=100.0 - step=190 action=door(target_id='door_2', door_state='close') hp=100.0 - step=191 action=move(direction='west') hp=100.0 - step=192 action=move(direction='south') hp=100.0 - step=193 action=move(direction='north') hp=100.0 - step=194 action=door(target_id='door_1', door_state='close') hp=100.0 - step=195 action=wait() hp=100.0 - step=196 action=door(target_id='door_5', door_state='open') hp=100.0 - step=197 action=door(target_id='door_2', door_state='close') hp=100.0 - step=198 action=door(target_id='door_5', door_state='open') hp=100.0 - step=199 action=wait() hp=100.0 - step=200 action=door(target_id='door_2', door_state='close') hp=100.0 -ep=0007 [easy ] steps=200 reward= -7.530 evac=0 hp=100.0 suc30=0.43 r30= -0.63 t=4s - step=001 action=move(direction='south') hp=100.0 - step=002 action=move(direction='east') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=wait() hp=100.0 - step=006 action=wait() hp=100.0 - step=007 action=move(direction='north') hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=wait() hp=100.0 - step=010 action=move(direction='north') hp=100.0 - step=011 action=move(direction='north') hp=100.0 - step=012 action=move(direction='south') hp=100.0 - step=013 action=wait() hp=100.0 - step=014 action=wait() hp=100.0 - step=015 action=wait() hp=100.0 - step=016 action=move(direction='east') hp=100.0 - step=017 action=move(direction='west') hp=100.0 - step=018 action=wait() hp=100.0 - step=019 action=move(direction='north') hp=100.0 - step=020 action=move(direction='north') hp=100.0 - step=021 action=move(direction='south') hp=100.0 - step=022 action=move(direction='south') hp=100.0 - step=023 action=wait() hp=100.0 - step=024 action=move(direction='north') hp=100.0 - step=025 action=move(direction='north') hp=100.0 - step=026 action=move(direction='east') hp=100.0 - step=027 action=move(direction='north') hp=100.0 - step=028 action=move(direction='south') hp=100.0 - step=029 action=move(direction='east') hp=100.0 - step=030 action=move(direction='north') hp=100.0 - step=031 action=move(direction='west') hp=100.0 - step=032 action=move(direction='west') hp=100.0 - step=033 action=move(direction='south') hp=100.0 - step=034 action=move(direction='north') hp=100.0 - step=035 action=move(direction='south') hp=100.0 - step=036 action=move(direction='east') hp=100.0 - step=037 action=move(direction='east') hp=100.0 - step=038 action=move(direction='east') hp=100.0 - step=039 action=move(direction='west') hp=100.0 - step=040 action=wait() hp=100.0 - step=041 action=move(direction='north') hp=100.0 - step=042 action=move(direction='west') hp=100.0 - step=043 action=wait() hp=100.0 - step=044 action=wait() hp=100.0 - step=045 action=wait() hp=100.0 - step=046 action=wait() hp=100.0 - step=047 action=move(direction='south') hp=100.0 - step=048 action=move(direction='west') hp=100.0 - step=049 action=move(direction='east') hp=100.0 - step=050 action=move(direction='north') hp=100.0 - step=051 action=move(direction='south') hp=100.0 - step=052 action=move(direction='north') hp=100.0 - step=053 action=move(direction='east') hp=100.0 - step=054 action=move(direction='west') hp=100.0 - step=055 action=wait() hp=100.0 - step=056 action=move(direction='west') hp=100.0 - step=057 action=wait() hp=100.0 - step=058 action=move(direction='west') hp=100.0 -ep=0008 [easy ] steps=058 reward= +17.150 evac=1 hp=100.0 suc30=0.50 r30= +1.59 t=4s - step=001 action=move(direction='north') hp=100.0 - step=002 action=wait() hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='south') hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=wait() hp=100.0 - step=007 action=move(direction='north') hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=move(direction='north') hp=100.0 - step=010 action=wait() hp=100.0 - step=011 action=move(direction='north') hp=100.0 - step=012 action=move(direction='north') hp=100.0 -ep=0009 [easy ] steps=012 reward= +18.340 evac=1 hp=100.0 suc30=0.56 r30= +3.45 t=4s - step=001 action=door(target_id='door_5', door_state='open') hp=100.0 - step=002 action=move(direction='north') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=wait() hp=100.0 - step=005 action=wait() hp=100.0 - step=006 action=move(direction='north') hp=100.0 - step=007 action=move(direction='north') hp=100.0 - step=008 action=door(target_id='door_1', door_state='open') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=wait() hp=100.0 - step=011 action=move(direction='east') hp=100.0 - step=012 action=move(direction='east') hp=100.0 - step=013 action=wait() hp=100.0 - step=014 action=door(target_id='door_6', door_state='open') hp=100.0 - step=015 action=move(direction='east') hp=100.0 - step=016 action=move(direction='south') hp=100.0 - step=017 action=door(target_id='door_2', door_state='open') hp=100.0 - step=018 action=move(direction='west') hp=100.0 - step=019 action=wait() hp=100.0 - step=020 action=wait() hp=100.0 - step=021 action=door(target_id='door_2', door_state='open') hp=100.0 - step=022 action=wait() hp=100.0 - step=023 action=wait() hp=100.0 - step=024 action=door(target_id='door_2', door_state='close') hp=100.0 - step=025 action=wait() hp=100.0 - step=026 action=door(target_id='door_2', door_state='open') hp=100.0 - step=027 action=move(direction='west') hp=100.0 - step=028 action=move(direction='north') hp=100.0 - step=029 action=wait() hp=100.0 - step=030 action=move(direction='west') hp=100.0 - step=031 action=move(direction='south') hp=100.0 - step=032 action=move(direction='east') hp=100.0 - step=033 action=move(direction='east') hp=100.0 - step=034 action=move(direction='north') hp=100.0 - step=035 action=wait() hp=100.0 - step=036 action=door(target_id='door_6', door_state='open') hp=100.0 - step=037 action=door(target_id='door_3', door_state='open') hp=100.0 - step=038 action=wait() hp=100.0 - step=039 action=door(target_id='door_1', door_state='open') hp=100.0 - step=040 action=move(direction='north') hp=100.0 - step=041 action=wait() hp=100.0 - step=042 action=move(direction='south') hp=100.0 - step=043 action=door(target_id='door_1', door_state='open') hp=100.0 - step=044 action=door(target_id='door_6', door_state='open') hp=100.0 - step=045 action=door(target_id='door_1', door_state='open') hp=100.0 - step=046 action=wait() hp=100.0 - step=047 action=wait() hp=100.0 - step=048 action=door(target_id='door_1', door_state='open') hp=100.0 - step=049 action=door(target_id='door_2', door_state='close') hp=100.0 - step=050 action=door(target_id='door_6', door_state='open') hp=100.0 - step=051 action=move(direction='south') hp=100.0 - step=052 action=move(direction='south') hp=100.0 - step=053 action=move(direction='east') hp=100.0 - step=054 action=move(direction='north') hp=100.0 - step=055 action=door(target_id='door_2', door_state='open') hp=100.0 - step=056 action=door(target_id='door_3', door_state='open') hp=100.0 - step=057 action=door(target_id='door_3', door_state='open') hp=100.0 - step=058 action=door(target_id='door_2', door_state='open') hp=100.0 - step=059 action=door(target_id='door_3', door_state='open') hp=100.0 - step=060 action=door(target_id='door_6', door_state='open') hp=100.0 - step=061 action=move(direction='north') hp=100.0 - step=062 action=move(direction='east') hp=100.0 - step=063 action=door(target_id='door_3', door_state='open') hp=100.0 - step=064 action=door(target_id='door_3', door_state='open') hp=100.0 - step=065 action=move(direction='west') hp=100.0 - step=066 action=door(target_id='door_2', door_state='open') hp=100.0 - step=067 action=door(target_id='door_2', door_state='close') hp=100.0 - step=068 action=move(direction='east') hp=100.0 - step=069 action=door(target_id='door_3', door_state='open') hp=100.0 - step=070 action=door(target_id='door_3', door_state='open') hp=100.0 - step=071 action=move(direction='west') hp=100.0 - step=072 action=wait() hp=100.0 - step=073 action=door(target_id='door_3', door_state='open') hp=100.0 - step=074 action=move(direction='west') hp=100.0 - step=075 action=move(direction='west') hp=100.0 - step=076 action=wait() hp=100.0 - step=077 action=move(direction='west') hp=100.0 - step=078 action=move(direction='south') hp=100.0 - step=079 action=move(direction='south') hp=100.0 - step=080 action=move(direction='east') hp=100.0 - step=081 action=door(target_id='door_6', door_state='open') hp=100.0 - step=082 action=door(target_id='door_2', door_state='open') hp=100.0 - step=083 action=door(target_id='door_2', door_state='open') hp=100.0 - step=084 action=door(target_id='door_2', door_state='open') hp=100.0 - step=085 action=move(direction='north') hp=100.0 - step=086 action=door(target_id='door_6', door_state='open') hp=100.0 - step=087 action=door(target_id='door_2', door_state='open') hp=100.0 - step=088 action=move(direction='north') hp=100.0 - step=089 action=door(target_id='door_1', door_state='open') hp=100.0 - step=090 action=door(target_id='door_2', door_state='open') hp=100.0 - step=091 action=move(direction='south') hp=100.0 - step=092 action=door(target_id='door_1', door_state='open') hp=100.0 - step=093 action=wait() hp=100.0 - step=094 action=move(direction='north') hp=100.0 - step=095 action=wait() hp=100.0 - step=096 action=door(target_id='door_1', door_state='open') hp=100.0 - step=097 action=move(direction='south') hp=100.0 - step=098 action=move(direction='east') hp=100.0 - step=099 action=wait() hp=100.0 - step=100 action=move(direction='east') hp=100.0 - step=101 action=wait() hp=100.0 - step=102 action=move(direction='east') hp=100.0 - step=103 action=move(direction='north') hp=100.0 - step=104 action=door(target_id='door_3', door_state='open') hp=100.0 - step=105 action=door(target_id='door_2', door_state='close') hp=100.0 - step=106 action=move(direction='west') hp=100.0 - step=107 action=wait() hp=100.0 - step=108 action=move(direction='east') hp=100.0 - step=109 action=wait() hp=100.0 - step=110 action=move(direction='west') hp=100.0 - step=111 action=move(direction='south') hp=100.0 - step=112 action=move(direction='south') hp=100.0 - step=113 action=move(direction='south') hp=100.0 - step=114 action=move(direction='south') hp=100.0 - step=115 action=door(target_id='door_6', door_state='open') hp=100.0 - step=116 action=move(direction='west') hp=100.0 - step=117 action=door(target_id='door_2', door_state='close') hp=100.0 - step=118 action=move(direction='west') hp=100.0 - step=119 action=move(direction='west') hp=100.0 - step=120 action=wait() hp=100.0 - step=121 action=move(direction='west') hp=100.0 - step=122 action=move(direction='east') hp=100.0 - step=123 action=move(direction='north') hp=100.0 - step=124 action=wait() hp=100.0 - step=125 action=move(direction='east') hp=100.0 - step=126 action=move(direction='south') hp=100.0 - step=127 action=door(target_id='door_5', door_state='open') hp=100.0 - step=128 action=door(target_id='door_5', door_state='open') hp=100.0 - step=129 action=move(direction='west') hp=100.0 - step=130 action=wait() hp=100.0 - step=131 action=move(direction='north') hp=100.0 - step=132 action=door(target_id='door_5', door_state='open') hp=100.0 - step=133 action=move(direction='east') hp=100.0 - step=134 action=move(direction='north') hp=100.0 - step=135 action=door(target_id='door_2', door_state='close') hp=100.0 - step=136 action=wait() hp=100.0 - step=137 action=move(direction='west') hp=100.0 - step=138 action=door(target_id='door_2', door_state='close') hp=100.0 - step=139 action=door(target_id='door_6', door_state='close') hp=100.0 - step=140 action=wait() hp=100.0 - step=141 action=wait() hp=100.0 - step=142 action=door(target_id='door_1', door_state='open') hp=100.0 - step=143 action=move(direction='north') hp=100.0 - step=144 action=wait() hp=100.0 - step=145 action=wait() hp=100.0 - step=146 action=wait() hp=100.0 - step=147 action=wait() hp=100.0 - step=148 action=move(direction='north') hp=100.0 - step=149 action=move(direction='east') hp=100.0 - step=150 action=wait() hp=100.0 - step=151 action=door(target_id='door_2', door_state='close') hp=100.0 - step=152 action=door(target_id='door_2', door_state='open') hp=100.0 - step=153 action=wait() hp=100.0 - step=154 action=door(target_id='door_2', door_state='close') hp=100.0 - step=155 action=move(direction='east') hp=100.0 - step=156 action=door(target_id='door_6', door_state='close') hp=100.0 - step=157 action=move(direction='south') hp=100.0 - step=158 action=move(direction='south') hp=100.0 - step=159 action=wait() hp=100.0 - step=160 action=door(target_id='door_2', door_state='open') hp=100.0 - step=161 action=move(direction='west') hp=100.0 - step=162 action=move(direction='east') hp=100.0 - step=163 action=move(direction='west') hp=100.0 - step=164 action=move(direction='west') hp=100.0 - step=165 action=wait() hp=100.0 - step=166 action=door(target_id='door_6', door_state='close') hp=100.0 - step=167 action=move(direction='east') hp=100.0 - step=168 action=move(direction='west') hp=100.0 - step=169 action=door(target_id='door_2', door_state='open') hp=100.0 - step=170 action=door(target_id='door_2', door_state='open') hp=100.0 - step=171 action=door(target_id='door_6', door_state='close') hp=100.0 - step=172 action=move(direction='east') hp=100.0 - step=173 action=door(target_id='door_6', door_state='close') hp=100.0 - step=174 action=door(target_id='door_2', door_state='open') hp=100.0 - step=175 action=move(direction='east') hp=100.0 - step=176 action=wait() hp=100.0 - step=177 action=door(target_id='door_2', door_state='open') hp=100.0 - step=178 action=move(direction='east') hp=100.0 - step=179 action=move(direction='west') hp=100.0 - step=180 action=move(direction='south') hp=100.0 - step=181 action=wait() hp=100.0 - step=182 action=wait() hp=100.0 - step=183 action=move(direction='north') hp=100.0 - step=184 action=move(direction='south') hp=100.0 - step=185 action=door(target_id='door_2', door_state='open') hp=100.0 - step=186 action=move(direction='east') hp=100.0 - step=187 action=move(direction='north') hp=100.0 - step=188 action=move(direction='west') hp=100.0 - step=189 action=wait() hp=100.0 - step=190 action=move(direction='east') hp=100.0 - step=191 action=move(direction='north') hp=100.0 - step=192 action=door(target_id='door_2', door_state='open') hp=100.0 - step=193 action=move(direction='west') hp=100.0 - step=194 action=move(direction='east') hp=100.0 - step=195 action=door(target_id='door_6', door_state='close') hp=100.0 - step=196 action=door(target_id='door_3', door_state='open') hp=100.0 - step=197 action=wait() hp=100.0 - step=198 action=wait() hp=100.0 - step=199 action=move(direction='north') hp=100.0 - step=200 action=door(target_id='door_3', door_state='open') hp=100.0 -ep=0010 [easy ] steps=200 reward= -12.040 evac=0 hp=100.0 suc30=0.50 r30= +1.90 t=5s - >> PPO update samples=flushed pi_loss=-0.0045 v_loss=12.5066 entropy=1.8199 kl=0.0010 clip%=0.00 lr=2.87e-04 - step=001 action=wait() hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='east') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='south') hp=100.0 - step=006 action=wait() hp=100.0 - step=007 action=move(direction='north') hp=100.0 - step=008 action=move(direction='south') hp=100.0 - step=009 action=move(direction='north') hp=100.0 - step=010 action=move(direction='east') hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=wait() hp=100.0 - step=013 action=wait() hp=100.0 - step=014 action=wait() hp=100.0 - step=015 action=move(direction='north') hp=100.0 - step=016 action=move(direction='south') hp=100.0 - step=017 action=move(direction='south') hp=100.0 - step=018 action=move(direction='north') hp=100.0 - step=019 action=move(direction='north') hp=100.0 - step=020 action=move(direction='south') hp=100.0 - step=021 action=move(direction='north') hp=100.0 - step=022 action=wait() hp=100.0 - step=023 action=wait() hp=100.0 - step=024 action=move(direction='north') hp=100.0 - step=025 action=wait() hp=100.0 - step=026 action=move(direction='east') hp=100.0 - step=027 action=move(direction='east') hp=100.0 - step=028 action=wait() hp=100.0 - step=029 action=move(direction='west') hp=100.0 - step=030 action=move(direction='south') hp=100.0 - step=031 action=move(direction='west') hp=100.0 - step=032 action=wait() hp=100.0 - step=033 action=move(direction='east') hp=100.0 - step=034 action=move(direction='north') hp=100.0 - step=035 action=move(direction='west') hp=100.0 - step=036 action=move(direction='south') hp=100.0 - step=037 action=move(direction='east') hp=100.0 - step=038 action=move(direction='west') hp=100.0 - step=039 action=move(direction='north') hp=100.0 - step=040 action=move(direction='north') hp=100.0 - step=041 action=wait() hp=100.0 - step=042 action=move(direction='south') hp=100.0 - step=043 action=move(direction='north') hp=100.0 - step=044 action=move(direction='north') hp=100.0 - step=045 action=move(direction='east') hp=100.0 - step=046 action=wait() hp=100.0 - step=047 action=wait() hp=100.0 - step=048 action=move(direction='north') hp=100.0 - step=049 action=move(direction='west') hp=100.0 - step=050 action=wait() hp=100.0 - step=051 action=wait() hp=100.0 - step=052 action=move(direction='north') hp=100.0 - step=053 action=move(direction='east') hp=100.0 - step=054 action=move(direction='south') hp=100.0 - step=055 action=move(direction='west') hp=100.0 - step=056 action=move(direction='north') hp=100.0 - step=057 action=move(direction='south') hp=100.0 - step=058 action=move(direction='north') hp=100.0 - step=059 action=move(direction='north') hp=100.0 - step=060 action=move(direction='south') hp=100.0 - step=061 action=wait() hp=100.0 - step=062 action=move(direction='north') hp=100.0 - step=063 action=wait() hp=100.0 - step=064 action=move(direction='north') hp=100.0 - step=065 action=move(direction='south') hp=100.0 - step=066 action=move(direction='south') hp=100.0 - step=067 action=move(direction='south') hp=100.0 - step=068 action=move(direction='south') hp=100.0 - step=069 action=move(direction='south') hp=100.0 - step=070 action=move(direction='north') hp=100.0 - step=071 action=wait() hp=100.0 - step=072 action=move(direction='north') hp=100.0 - step=073 action=wait() hp=100.0 - step=074 action=move(direction='north') hp=100.0 - step=075 action=wait() hp=100.0 - step=076 action=wait() hp=100.0 - step=077 action=move(direction='north') hp=100.0 - step=078 action=wait() hp=100.0 - step=079 action=move(direction='north') hp=100.0 - step=080 action=move(direction='east') hp=100.0 - step=081 action=move(direction='north') hp=100.0 - step=082 action=wait() hp=100.0 - step=083 action=move(direction='south') hp=100.0 - step=084 action=wait() hp=100.0 - step=085 action=move(direction='west') hp=100.0 - step=086 action=move(direction='east') hp=100.0 - step=087 action=move(direction='west') hp=100.0 - step=088 action=wait() hp=100.0 - step=089 action=move(direction='east') hp=100.0 - step=090 action=wait() hp=100.0 - step=091 action=move(direction='west') hp=100.0 - step=092 action=wait() hp=100.0 - step=093 action=move(direction='east') hp=100.0 - step=094 action=wait() hp=100.0 - step=095 action=move(direction='north') hp=100.0 - step=096 action=move(direction='north') hp=100.0 - step=097 action=move(direction='east') hp=100.0 - step=098 action=move(direction='north') hp=100.0 - step=099 action=move(direction='east') hp=100.0 - step=100 action=move(direction='west') hp=100.0 - step=101 action=move(direction='south') hp=100.0 - step=102 action=move(direction='west') hp=100.0 - step=103 action=wait() hp=100.0 - step=104 action=move(direction='north') hp=100.0 - step=105 action=move(direction='west') hp=100.0 - step=106 action=wait() hp=100.0 - step=107 action=wait() hp=100.0 - step=108 action=move(direction='west') hp=100.0 -ep=0011 [easy ] steps=108 reward= +10.180 evac=1 hp=100.0 suc30=0.55 r30= +2.66 t=6s - step=001 action=wait() hp=100.0 - step=002 action=move(direction='south') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=wait() hp=100.0 - step=005 action=move(direction='south') hp=100.0 - step=006 action=move(direction='north') hp=100.0 - step=007 action=move(direction='north') hp=100.0 - step=008 action=wait() hp=100.0 - step=009 action=wait() hp=100.0 - step=010 action=move(direction='south') hp=100.0 - step=011 action=wait() hp=100.0 - step=012 action=move(direction='south') hp=100.0 - step=013 action=move(direction='south') hp=100.0 - step=014 action=door(target_id='door_3', door_state='open') hp=100.0 - step=015 action=move(direction='east') hp=100.0 - step=016 action=move(direction='west') hp=100.0 - step=017 action=wait() hp=100.0 - step=018 action=door(target_id='door_3', door_state='open') hp=100.0 - step=019 action=move(direction='north') hp=100.0 - step=020 action=wait() hp=100.0 - step=021 action=move(direction='south') hp=100.0 - step=022 action=move(direction='east') hp=100.0 - step=023 action=move(direction='east') hp=100.0 - step=024 action=move(direction='east') hp=100.0 - step=025 action=door(target_id='door_3', door_state='open') hp=100.0 - step=026 action=move(direction='west') hp=100.0 - step=027 action=wait() hp=100.0 - step=028 action=wait() hp=100.0 - step=029 action=move(direction='west') hp=100.0 - step=030 action=wait() hp=100.0 - step=031 action=door(target_id='door_3', door_state='open') hp=100.0 - step=032 action=move(direction='west') hp=100.0 - step=033 action=door(target_id='door_3', door_state='open') hp=100.0 - step=034 action=move(direction='east') hp=100.0 - step=035 action=door(target_id='door_3', door_state='open') hp=100.0 - step=036 action=wait() hp=100.0 - step=037 action=wait() hp=100.0 - step=038 action=move(direction='west') hp=100.0 - step=039 action=move(direction='north') hp=100.0 - step=040 action=wait() hp=100.0 - step=041 action=wait() hp=100.0 - step=042 action=move(direction='south') hp=100.0 - step=043 action=door(target_id='door_3', door_state='open') hp=100.0 - step=044 action=move(direction='north') hp=100.0 - step=045 action=move(direction='south') hp=100.0 - step=046 action=move(direction='east') hp=100.0 - step=047 action=move(direction='west') hp=100.0 - step=048 action=move(direction='south') hp=100.0 - step=049 action=wait() hp=100.0 - step=050 action=wait() hp=100.0 - step=051 action=wait() hp=100.0 - step=052 action=wait() hp=100.0 - step=053 action=move(direction='south') hp=100.0 - step=054 action=wait() hp=100.0 - step=055 action=move(direction='south') hp=100.0 - step=056 action=move(direction='south') hp=100.0 - step=057 action=move(direction='north') hp=100.0 - step=058 action=wait() hp=100.0 - step=059 action=move(direction='south') hp=100.0 - step=060 action=move(direction='south') hp=100.0 - step=061 action=wait() hp=100.0 - step=062 action=move(direction='south') hp=100.0 - step=063 action=move(direction='north') hp=100.0 - step=064 action=wait() hp=100.0 - step=065 action=move(direction='north') hp=100.0 - step=066 action=move(direction='north') hp=100.0 - step=067 action=wait() hp=100.0 - step=068 action=move(direction='south') hp=100.0 - step=069 action=move(direction='north') hp=100.0 - step=070 action=wait() hp=100.0 - step=071 action=wait() hp=100.0 - step=072 action=wait() hp=100.0 - step=073 action=move(direction='north') hp=100.0 - step=074 action=wait() hp=100.0 - step=075 action=wait() hp=100.0 - step=076 action=move(direction='north') hp=100.0 - step=077 action=move(direction='north') hp=100.0 - step=078 action=move(direction='west') hp=100.0 - step=079 action=move(direction='west') hp=100.0 - step=080 action=wait() hp=100.0 - step=081 action=move(direction='south') hp=100.0 - step=082 action=move(direction='west') hp=100.0 - step=083 action=door(target_id='door_2', door_state='close') hp=100.0 - step=084 action=move(direction='north') hp=100.0 - step=085 action=move(direction='west') hp=100.0 - step=086 action=door(target_id='door_1', door_state='close') hp=100.0 - step=087 action=door(target_id='door_1', door_state='close') hp=100.0 - step=088 action=wait() hp=100.0 - step=089 action=door(target_id='door_2', door_state='close') hp=100.0 - step=090 action=move(direction='west') hp=100.0 - step=091 action=wait() hp=100.0 - step=092 action=wait() hp=100.0 - step=093 action=move(direction='east') hp=100.0 - step=094 action=move(direction='west') hp=100.0 - step=095 action=wait() hp=100.0 - step=096 action=move(direction='east') hp=100.0 - step=097 action=wait() hp=100.0 - step=098 action=door(target_id='door_1', door_state='close') hp=100.0 - step=099 action=door(target_id='door_2', door_state='close') hp=100.0 - step=100 action=move(direction='west') hp=100.0 - step=101 action=wait() hp=100.0 - step=102 action=move(direction='east') hp=100.0 - step=103 action=door(target_id='door_1', door_state='close') hp=100.0 - step=104 action=wait() hp=100.0 - step=105 action=door(target_id='door_1', door_state='close') hp=100.0 - step=106 action=wait() hp=100.0 - step=107 action=move(direction='east') hp=100.0 - step=108 action=door(target_id='door_1', door_state='close') hp=100.0 - step=109 action=wait() hp=100.0 - step=110 action=wait() hp=100.0 - step=111 action=move(direction='south') hp=100.0 - step=112 action=move(direction='north') hp=100.0 - step=113 action=door(target_id='door_2', door_state='close') hp=100.0 - step=114 action=move(direction='east') hp=100.0 - step=115 action=move(direction='west') hp=100.0 - step=116 action=move(direction='west') hp=100.0 - step=117 action=move(direction='west') hp=100.0 - step=118 action=wait() hp=100.0 - step=119 action=move(direction='west') hp=100.0 - step=120 action=door(target_id='door_1', door_state='close') hp=100.0 - step=121 action=move(direction='east') hp=100.0 - step=122 action=move(direction='west') hp=100.0 - step=123 action=wait() hp=100.0 - step=124 action=door(target_id='door_1', door_state='close') hp=100.0 - step=125 action=wait() hp=100.0 - step=126 action=move(direction='east') hp=100.0 - step=127 action=move(direction='south') hp=100.0 - step=128 action=move(direction='north') hp=100.0 - step=129 action=move(direction='west') hp=100.0 - step=130 action=move(direction='east') hp=100.0 - step=131 action=move(direction='west') hp=100.0 - step=132 action=move(direction='west') hp=100.0 -ep=0012 [easy ] steps=132 reward= +7.700 evac=1 hp=100.0 suc30=0.58 r30= +3.08 t=6s - step=001 action=door(target_id='door_2', door_state='close') hp=100.0 - step=002 action=wait() hp=100.0 - step=003 action=door(target_id='door_2', door_state='close') hp=100.0 - step=004 action=wait() hp=100.0 - step=005 action=door(target_id='door_6', door_state='open') hp=100.0 - step=006 action=move(direction='south') hp=100.0 - step=007 action=move(direction='south') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=wait() hp=100.0 - step=010 action=wait() hp=100.0 - step=011 action=door(target_id='door_6', door_state='open') hp=100.0 - step=012 action=door(target_id='door_5', door_state='close') hp=100.0 - step=013 action=move(direction='east') hp=100.0 - step=014 action=door(target_id='door_6', door_state='open') hp=100.0 - step=015 action=move(direction='north') hp=100.0 - step=016 action=move(direction='west') hp=100.0 - step=017 action=move(direction='south') hp=100.0 - step=018 action=door(target_id='door_6', door_state='close') hp=100.0 - step=019 action=door(target_id='door_6', door_state='close') hp=100.0 - step=020 action=move(direction='west') hp=100.0 - step=021 action=move(direction='north') hp=100.0 - step=022 action=door(target_id='door_6', door_state='close') hp=100.0 - step=023 action=door(target_id='door_6', door_state='close') hp=100.0 - step=024 action=move(direction='west') hp=100.0 - step=025 action=move(direction='north') hp=100.0 - step=026 action=door(target_id='door_1', door_state='close') hp=100.0 - step=027 action=move(direction='north') hp=100.0 - step=028 action=door(target_id='door_1', door_state='close') hp=100.0 - step=029 action=door(target_id='door_1', door_state='open') hp=100.0 - step=030 action=wait() hp=100.0 - step=031 action=door(target_id='door_1', door_state='close') hp=100.0 - step=032 action=door(target_id='door_1', door_state='open') hp=100.0 - step=033 action=move(direction='west') hp=100.0 - step=034 action=door(target_id='door_5', door_state='close') hp=100.0 - step=035 action=door(target_id='door_1', door_state='close') hp=100.0 - step=036 action=wait() hp=100.0 - step=037 action=wait() hp=100.0 - step=038 action=move(direction='north') hp=100.0 - step=039 action=move(direction='south') hp=100.0 - step=040 action=move(direction='south') hp=100.0 - step=041 action=move(direction='south') hp=100.0 - step=042 action=door(target_id='door_1', door_state='close') hp=100.0 - step=043 action=move(direction='south') hp=100.0 - step=044 action=move(direction='east') hp=100.0 - step=045 action=door(target_id='door_6', door_state='close') hp=100.0 - step=046 action=wait() hp=100.0 - step=047 action=move(direction='east') hp=100.0 - step=048 action=move(direction='north') hp=100.0 - step=049 action=wait() hp=100.0 - step=050 action=door(target_id='door_6', door_state='close') hp=100.0 - step=051 action=door(target_id='door_5', door_state='close') hp=100.0 - step=052 action=door(target_id='door_5', door_state='close') hp=100.0 - step=053 action=door(target_id='door_5', door_state='close') hp=100.0 - step=054 action=door(target_id='door_1', door_state='close') hp=100.0 - step=055 action=door(target_id='door_1', door_state='close') hp=100.0 - step=056 action=door(target_id='door_5', door_state='close') hp=100.0 - step=057 action=move(direction='south') hp=100.0 - step=058 action=move(direction='east') hp=100.0 - step=059 action=wait() hp=100.0 - step=060 action=move(direction='north') hp=100.0 - step=061 action=move(direction='north') hp=100.0 - step=062 action=door(target_id='door_6', door_state='close') hp=100.0 - step=063 action=move(direction='south') hp=100.0 - step=064 action=move(direction='east') hp=100.0 - step=065 action=door(target_id='door_2', door_state='close') hp=100.0 - step=066 action=door(target_id='door_2', door_state='close') hp=100.0 - step=067 action=move(direction='west') hp=100.0 - step=068 action=move(direction='south') hp=100.0 - step=069 action=wait() hp=100.0 - step=070 action=move(direction='west') hp=100.0 - step=071 action=move(direction='east') hp=100.0 - step=072 action=door(target_id='door_5', door_state='close') hp=100.0 - step=073 action=move(direction='east') hp=100.0 - step=074 action=move(direction='north') hp=100.0 - step=075 action=door(target_id='door_6', door_state='close') hp=100.0 - step=076 action=move(direction='south') hp=100.0 - step=077 action=door(target_id='door_5', door_state='close') hp=100.0 - step=078 action=move(direction='west') hp=100.0 - step=079 action=move(direction='west') hp=100.0 - step=080 action=door(target_id='door_6', door_state='close') hp=100.0 - step=081 action=move(direction='north') hp=100.0 - step=082 action=move(direction='west') hp=100.0 - step=083 action=door(target_id='door_1', door_state='close') hp=100.0 - step=084 action=move(direction='east') hp=100.0 - step=085 action=door(target_id='door_5', door_state='close') hp=100.0 - step=086 action=door(target_id='door_1', door_state='close') hp=100.0 - step=087 action=move(direction='west') hp=100.0 - step=088 action=move(direction='north') hp=100.0 - step=089 action=door(target_id='door_1', door_state='close') hp=100.0 - step=090 action=move(direction='west') hp=100.0 - step=091 action=move(direction='east') hp=100.0 - step=092 action=move(direction='north') hp=100.0 - step=093 action=wait() hp=100.0 - step=094 action=move(direction='west') hp=100.0 - step=095 action=move(direction='north') hp=100.0 - step=096 action=door(target_id='door_1', door_state='close') hp=100.0 - step=097 action=move(direction='east') hp=100.0 - step=098 action=door(target_id='door_5', door_state='close') hp=100.0 - step=099 action=move(direction='west') hp=100.0 - step=100 action=move(direction='east') hp=100.0 - step=101 action=door(target_id='door_2', door_state='close') hp=100.0 - step=102 action=door(target_id='door_2', door_state='close') hp=100.0 - step=103 action=door(target_id='door_2', door_state='close') hp=100.0 - step=104 action=move(direction='south') hp=100.0 - step=105 action=wait() hp=100.0 - step=106 action=move(direction='west') hp=100.0 - step=107 action=move(direction='west') hp=100.0 -ep=0013 [easy ] steps=107 reward= +14.170 evac=1 hp=100.0 suc30=0.62 r30= +3.93 t=7s - step=001 action=move(direction='south') hp=100.0 - step=002 action=move(direction='east') hp=100.0 - step=003 action=wait() hp=100.0 - step=004 action=wait() hp=100.0 - step=005 action=move(direction='south') hp=100.0 - step=006 action=move(direction='east') hp=100.0 - step=007 action=move(direction='north') hp=100.0 - step=008 action=move(direction='east') hp=100.0 - step=009 action=move(direction='east') hp=100.0 - step=010 action=move(direction='north') hp=100.0 - step=011 action=wait() hp=100.0 - step=012 action=move(direction='west') hp=100.0 - step=013 action=move(direction='east') hp=100.0 - step=014 action=move(direction='north') hp=100.0 - step=015 action=move(direction='west') hp=100.0 - step=016 action=move(direction='west') hp=100.0 - step=017 action=move(direction='west') hp=100.0 - step=018 action=move(direction='south') hp=100.0 - step=019 action=wait() hp=100.0 - step=020 action=move(direction='south') hp=100.0 - step=021 action=move(direction='north') hp=100.0 - step=022 action=move(direction='west') hp=100.0 - step=023 action=move(direction='north') hp=100.0 - step=024 action=move(direction='west') hp=100.0 - step=025 action=wait() hp=100.0 - step=026 action=wait() hp=100.0 - step=027 action=wait() hp=100.0 - step=028 action=move(direction='west') hp=100.0 - step=029 action=wait() hp=100.0 - step=030 action=move(direction='south') hp=100.0 - step=031 action=move(direction='north') hp=100.0 - step=032 action=wait() hp=100.0 - step=033 action=move(direction='north') hp=100.0 - step=034 action=wait() hp=100.0 - step=035 action=wait() hp=100.0 - step=036 action=move(direction='north') hp=100.0 - step=037 action=wait() hp=100.0 - step=038 action=wait() hp=100.0 - step=039 action=move(direction='north') hp=100.0 - step=040 action=move(direction='south') hp=100.0 - step=041 action=wait() hp=100.0 - step=042 action=move(direction='east') hp=100.0 - step=043 action=wait() hp=100.0 - step=044 action=move(direction='west') hp=100.0 - step=045 action=move(direction='north') hp=100.0 - step=046 action=move(direction='north') hp=100.0 - step=047 action=move(direction='north') hp=100.0 - step=048 action=move(direction='east') hp=100.0 - step=049 action=move(direction='south') hp=100.0 - step=050 action=move(direction='west') hp=100.0 - step=051 action=wait() hp=100.0 - step=052 action=move(direction='north') hp=100.0 - step=053 action=wait() hp=100.0 - step=054 action=move(direction='east') hp=100.0 - step=055 action=move(direction='north') hp=100.0 - step=056 action=move(direction='west') hp=100.0 - step=057 action=move(direction='west') hp=100.0 -ep=0014 [easy ] steps=057 reward= +17.940 evac=1 hp=100.0 suc30=0.64 r30= +4.93 t=7s - step=001 action=door(target_id='door_4', door_state='close') hp=100.0 - step=002 action=door(target_id='door_4', door_state='close') hp=100.0 - step=003 action=wait() hp=100.0 - step=004 action=move(direction='south') hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=wait() hp=100.0 - step=008 action=move(direction='east') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='south') hp=100.0 - step=011 action=move(direction='north') hp=100.0 - step=012 action=move(direction='west') hp=100.0 - step=013 action=door(target_id='door_3', door_state='open') hp=100.0 - step=014 action=wait() hp=100.0 - step=015 action=move(direction='east') hp=100.0 - step=016 action=move(direction='west') hp=100.0 - step=017 action=door(target_id='door_3', door_state='open') hp=100.0 - step=018 action=door(target_id='door_3', door_state='open') hp=100.0 - step=019 action=move(direction='east') hp=100.0 - step=020 action=wait() hp=100.0 - step=021 action=move(direction='south') hp=100.0 - step=022 action=door(target_id='door_4', door_state='close') hp=100.0 - step=023 action=wait() hp=100.0 - step=024 action=wait() hp=100.0 - step=025 action=move(direction='north') hp=100.0 - step=026 action=move(direction='west') hp=100.0 - step=027 action=door(target_id='door_3', door_state='open') hp=100.0 - step=028 action=move(direction='east') hp=100.0 - step=029 action=move(direction='west') hp=100.0 - step=030 action=door(target_id='door_3', door_state='open') hp=100.0 - step=031 action=wait() hp=100.0 - step=032 action=move(direction='east') hp=100.0 - step=033 action=move(direction='east') hp=100.0 - step=034 action=wait() hp=100.0 - step=035 action=door(target_id='door_4', door_state='close') hp=100.0 - step=036 action=move(direction='east') hp=100.0 - step=037 action=move(direction='south') hp=100.0 - step=038 action=move(direction='south') hp=100.0 - step=039 action=door(target_id='door_4', door_state='close') hp=100.0 - step=040 action=move(direction='north') hp=100.0 - step=041 action=door(target_id='door_4', door_state='open') hp=100.0 - step=042 action=move(direction='west') hp=100.0 - step=043 action=move(direction='north') hp=100.0 - step=044 action=move(direction='east') hp=100.0 - step=045 action=move(direction='west') hp=100.0 - step=046 action=move(direction='south') hp=100.0 - step=047 action=move(direction='south') hp=100.0 - step=048 action=move(direction='west') hp=100.0 - step=049 action=wait() hp=100.0 - step=050 action=door(target_id='door_4', door_state='close') hp=100.0 - step=051 action=move(direction='east') hp=100.0 - step=052 action=door(target_id='door_4', door_state='close') hp=100.0 - step=053 action=door(target_id='door_4', door_state='open') hp=100.0 - step=054 action=wait() hp=100.0 - step=055 action=move(direction='west') hp=100.0 - step=056 action=wait() hp=100.0 - step=057 action=move(direction='east') hp=100.0 - step=058 action=door(target_id='door_4', door_state='close') hp=100.0 - step=059 action=door(target_id='door_4', door_state='open') hp=100.0 - step=060 action=move(direction='north') hp=100.0 - step=061 action=door(target_id='door_4', door_state='close') hp=100.0 - step=062 action=move(direction='west') hp=100.0 - step=063 action=door(target_id='door_4', door_state='close') hp=100.0 - step=064 action=wait() hp=100.0 - step=065 action=door(target_id='door_4', door_state='close') hp=100.0 - step=066 action=move(direction='south') hp=100.0 - step=067 action=move(direction='north') hp=100.0 - step=068 action=wait() hp=100.0 - step=069 action=door(target_id='door_4', door_state='close') hp=100.0 - step=070 action=door(target_id='door_4', door_state='close') hp=100.0 - step=071 action=move(direction='east') hp=100.0 - step=072 action=wait() hp=100.0 - step=073 action=wait() hp=100.0 - step=074 action=wait() hp=100.0 - step=075 action=door(target_id='door_4', door_state='close') hp=100.0 - step=076 action=move(direction='east') hp=100.0 - step=077 action=move(direction='north') hp=100.0 - step=078 action=door(target_id='door_4', door_state='close') hp=100.0 - step=079 action=wait() hp=100.0 - step=080 action=move(direction='east') hp=100.0 -ep=0015 [easy ] steps=080 reward= +14.650 evac=1 hp=100.0 suc30=0.67 r30= +5.58 t=7s - >> PPO update samples=flushed pi_loss=-0.0131 v_loss=10.8125 entropy=1.5430 kl=0.0023 clip%=0.01 lr=2.80e-04 - step=001 action=door(target_id='door_4', door_state='open') hp=100.0 - step=002 action=door(target_id='door_7', door_state='close') hp=100.0 - step=003 action=move(direction='east') hp=100.0 - step=004 action=move(direction='south') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='south') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='east') hp=100.0 - step=009 action=door(target_id='door_7', door_state='close') hp=100.0 - step=010 action=move(direction='east') hp=100.0 - step=011 action=door(target_id='door_7', door_state='close') hp=100.0 - step=012 action=door(target_id='door_3', door_state='close') hp=100.0 - step=013 action=wait() hp=100.0 - step=014 action=door(target_id='door_3', door_state='close') hp=100.0 - step=015 action=move(direction='east') hp=100.0 - step=016 action=door(target_id='door_3', door_state='close') hp=100.0 - step=017 action=door(target_id='door_3', door_state='close') hp=100.0 - step=018 action=door(target_id='door_8', door_state='open') hp=100.0 - step=019 action=door(target_id='door_3', door_state='close') hp=100.0 - step=020 action=door(target_id='door_8', door_state='open') hp=100.0 - step=021 action=move(direction='east') hp=100.0 - step=022 action=move(direction='east') hp=100.0 - step=023 action=move(direction='north') hp=100.0 - step=024 action=move(direction='north') hp=100.0 - step=025 action=door(target_id='door_4', door_state='open') hp=100.0 - step=026 action=move(direction='north') hp=100.0 - step=027 action=move(direction='south') hp=100.0 - step=028 action=door(target_id='door_8', door_state='open') hp=100.0 - step=029 action=wait() hp=100.0 - step=030 action=wait() hp=100.0 - step=031 action=move(direction='north') hp=100.0 - step=032 action=move(direction='north') hp=100.0 - step=033 action=move(direction='west') hp=100.0 - step=034 action=door(target_id='door_4', door_state='close') hp=100.0 - step=035 action=move(direction='north') hp=100.0 - step=036 action=door(target_id='door_4', door_state='open') hp=100.0 - step=037 action=wait() hp=100.0 - step=038 action=move(direction='south') hp=100.0 - step=039 action=move(direction='east') hp=100.0 - step=040 action=move(direction='west') hp=100.0 - step=041 action=move(direction='north') hp=100.0 - step=042 action=move(direction='north') hp=100.0 - step=043 action=move(direction='east') hp=100.0 - step=044 action=move(direction='south') hp=100.0 - step=045 action=wait() hp=100.0 - step=046 action=move(direction='west') hp=100.0 - step=047 action=move(direction='south') hp=100.0 - step=048 action=door(target_id='door_4', door_state='open') hp=100.0 - step=049 action=wait() hp=100.0 - step=050 action=move(direction='east') hp=100.0 - step=051 action=wait() hp=100.0 - step=052 action=wait() hp=100.0 - step=053 action=move(direction='north') hp=100.0 - step=054 action=wait() hp=100.0 - step=055 action=door(target_id='door_4', door_state='close') hp=100.0 - step=056 action=move(direction='south') hp=100.0 - step=057 action=move(direction='west') hp=100.0 - step=058 action=move(direction='east') hp=100.0 - step=059 action=move(direction='west') hp=100.0 - step=060 action=wait() hp=100.0 - step=061 action=wait() hp=100.0 - step=062 action=move(direction='north') hp=100.0 - step=063 action=move(direction='east') hp=100.0 - step=064 action=move(direction='south') hp=100.0 - step=065 action=wait() hp=100.0 - step=066 action=move(direction='west') hp=100.0 - step=067 action=move(direction='east') hp=100.0 - step=068 action=move(direction='north') hp=100.0 - step=069 action=move(direction='west') hp=100.0 - step=070 action=move(direction='east') hp=100.0 - step=071 action=door(target_id='door_4', door_state='open') hp=100.0 - step=072 action=wait() hp=100.0 - step=073 action=move(direction='south') hp=100.0 - step=074 action=move(direction='south') hp=100.0 - step=075 action=move(direction='south') hp=100.0 - step=076 action=door(target_id='door_3', door_state='close') hp=100.0 - step=077 action=door(target_id='door_8', door_state='open') hp=100.0 - step=078 action=wait() hp=100.0 - step=079 action=door(target_id='door_8', door_state='open') hp=100.0 - step=080 action=wait() hp=100.0 - step=081 action=move(direction='south') hp=100.0 - step=082 action=door(target_id='door_8', door_state='open') hp=100.0 - step=083 action=wait() hp=100.0 - step=084 action=move(direction='west') hp=100.0 - step=085 action=move(direction='west') hp=100.0 - step=086 action=move(direction='south') hp=100.0 - step=087 action=wait() hp=100.0 - step=088 action=door(target_id='door_4', door_state='close') hp=100.0 - step=089 action=wait() hp=100.0 - step=090 action=move(direction='east') hp=100.0 - step=091 action=move(direction='west') hp=100.0 - step=092 action=door(target_id='door_7', door_state='close') hp=100.0 - step=093 action=door(target_id='door_4', door_state='close') hp=100.0 - step=094 action=move(direction='east') hp=100.0 - step=095 action=door(target_id='door_4', door_state='close') hp=100.0 - step=096 action=door(target_id='door_8', door_state='open') hp=100.0 - step=097 action=move(direction='north') hp=100.0 - step=098 action=door(target_id='door_4', door_state='close') hp=100.0 - step=099 action=door(target_id='door_4', door_state='close') hp=100.0 - step=100 action=move(direction='north') hp=100.0 - step=101 action=door(target_id='door_4', door_state='close') hp=100.0 - step=102 action=move(direction='south') hp=100.0 - step=103 action=door(target_id='door_4', door_state='open') hp=100.0 - step=104 action=door(target_id='door_3', door_state='close') hp=100.0 - step=105 action=wait() hp=100.0 - step=106 action=door(target_id='door_3', door_state='close') hp=100.0 - step=107 action=door(target_id='door_4', door_state='open') hp=100.0 - step=108 action=move(direction='west') hp=100.0 - step=109 action=move(direction='north') hp=100.0 - step=110 action=door(target_id='door_4', door_state='open') hp=100.0 - step=111 action=door(target_id='door_4', door_state='open') hp=100.0 - step=112 action=move(direction='south') hp=100.0 - step=113 action=move(direction='north') hp=100.0 - step=114 action=move(direction='west') hp=100.0 - step=115 action=wait() hp=100.0 - step=116 action=door(target_id='door_4', door_state='open') hp=100.0 - step=117 action=door(target_id='door_3', door_state='close') hp=100.0 - step=118 action=door(target_id='door_3', door_state='open') hp=100.0 - step=119 action=door(target_id='door_4', door_state='open') hp=100.0 - step=120 action=move(direction='west') hp=100.0 - step=121 action=wait() hp=100.0 - step=122 action=move(direction='south') hp=100.0 - step=123 action=wait() hp=100.0 - step=124 action=wait() hp=100.0 - step=125 action=move(direction='north') hp=100.0 - step=126 action=door(target_id='door_7', door_state='close') hp=100.0 - step=127 action=move(direction='east') hp=100.0 - step=128 action=door(target_id='door_4', door_state='open') hp=100.0 - step=129 action=move(direction='east') hp=100.0 - step=130 action=door(target_id='door_4', door_state='open') hp=100.0 - step=131 action=wait() hp=100.0 - step=132 action=door(target_id='door_4', door_state='open') hp=100.0 - step=133 action=door(target_id='door_3', door_state='close') hp=100.0 - step=134 action=move(direction='south') hp=100.0 - step=135 action=move(direction='north') hp=100.0 - step=136 action=door(target_id='door_3', door_state='close') hp=100.0 - step=137 action=move(direction='east') hp=100.0 - step=138 action=move(direction='west') hp=100.0 - step=139 action=door(target_id='door_4', door_state='open') hp=100.0 - step=140 action=move(direction='east') hp=100.0 - step=141 action=move(direction='east') hp=100.0 - step=142 action=move(direction='west') hp=100.0 - step=143 action=move(direction='west') hp=100.0 - step=144 action=wait() hp=100.0 - step=145 action=door(target_id='door_3', door_state='close') hp=100.0 - step=146 action=door(target_id='door_4', door_state='open') hp=100.0 - step=147 action=move(direction='south') hp=100.0 - step=148 action=move(direction='east') hp=100.0 - step=149 action=wait() hp=100.0 - step=150 action=wait() hp=100.0 - step=151 action=door(target_id='door_4', door_state='open') hp=100.0 - step=152 action=move(direction='west') hp=100.0 - step=153 action=door(target_id='door_3', door_state='close') hp=100.0 - step=154 action=door(target_id='door_4', door_state='open') hp=100.0 - step=155 action=move(direction='east') hp=100.0 - step=156 action=move(direction='west') hp=100.0 - step=157 action=move(direction='south') hp=100.0 - step=158 action=wait() hp=100.0 - step=159 action=door(target_id='door_4', door_state='open') hp=100.0 - step=160 action=move(direction='east') hp=100.0 - step=161 action=wait() hp=100.0 - step=162 action=wait() hp=100.0 - step=163 action=move(direction='north') hp=100.0 - step=164 action=wait() hp=100.0 - step=165 action=move(direction='north') hp=100.0 - step=166 action=door(target_id='door_3', door_state='close') hp=100.0 - step=167 action=move(direction='west') hp=100.0 - step=168 action=door(target_id='door_3', door_state='close') hp=100.0 - step=169 action=door(target_id='door_3', door_state='close') hp=100.0 - step=170 action=door(target_id='door_4', door_state='open') hp=100.0 - step=171 action=door(target_id='door_4', door_state='open') hp=100.0 - step=172 action=door(target_id='door_3', door_state='close') hp=100.0 - step=173 action=move(direction='west') hp=100.0 - step=174 action=move(direction='west') hp=100.0 - step=175 action=wait() hp=100.0 - step=176 action=move(direction='south') hp=100.0 - step=177 action=move(direction='south') hp=100.0 - step=178 action=move(direction='west') hp=100.0 - step=179 action=wait() hp=100.0 - step=180 action=move(direction='north') hp=100.0 - step=181 action=door(target_id='door_7', door_state='close') hp=100.0 - step=182 action=move(direction='north') hp=100.0 - step=183 action=move(direction='east') hp=100.0 - step=184 action=move(direction='west') hp=100.0 - step=185 action=move(direction='east') hp=100.0 - step=186 action=door(target_id='door_7', door_state='close') hp=100.0 - step=187 action=door(target_id='door_2', door_state='open') hp=100.0 - step=188 action=wait() hp=100.0 - step=189 action=move(direction='west') hp=100.0 - step=190 action=door(target_id='door_2', door_state='open') hp=100.0 - step=191 action=move(direction='west') hp=100.0 - step=192 action=wait() hp=100.0 - step=193 action=door(target_id='door_3', door_state='close') hp=100.0 - step=194 action=door(target_id='door_2', door_state='open') hp=100.0 - step=195 action=door(target_id='door_3', door_state='close') hp=100.0 - step=196 action=move(direction='west') hp=100.0 - step=197 action=move(direction='west') hp=100.0 - step=198 action=door(target_id='door_1', door_state='open') hp=100.0 - step=199 action=door(target_id='door_2', door_state='open') hp=100.0 - step=200 action=move(direction='east') hp=100.0 -ep=0016 [easy ] steps=200 reward= -11.210 evac=0 hp=100.0 suc30=0.62 r30= +4.53 t=9s - step=001 action=move(direction='south') hp=100.0 - step=002 action=wait() hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=move(direction='north') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='north') hp=100.0 - step=010 action=move(direction='south') hp=100.0 - step=011 action=move(direction='east') hp=100.0 - step=012 action=move(direction='north') hp=100.0 - step=013 action=move(direction='north') hp=100.0 - step=014 action=move(direction='south') hp=100.0 - step=015 action=move(direction='west') hp=100.0 - step=016 action=move(direction='west') hp=100.0 - step=017 action=move(direction='north') hp=100.0 - step=018 action=move(direction='west') hp=100.0 - step=019 action=move(direction='south') hp=100.0 - step=020 action=wait() hp=100.0 - step=021 action=move(direction='north') hp=100.0 - step=022 action=move(direction='east') hp=100.0 - step=023 action=move(direction='south') hp=100.0 - step=024 action=move(direction='north') hp=100.0 - step=025 action=move(direction='west') hp=100.0 - step=026 action=move(direction='west') hp=100.0 - step=027 action=move(direction='west') hp=100.0 - step=028 action=move(direction='north') hp=100.0 - step=029 action=wait() hp=100.0 - step=030 action=move(direction='north') hp=100.0 - step=031 action=move(direction='north') hp=100.0 - step=032 action=move(direction='north') hp=100.0 - step=033 action=move(direction='north') hp=100.0 - step=034 action=move(direction='north') hp=100.0 - step=035 action=move(direction='south') hp=100.0 - step=036 action=move(direction='east') hp=100.0 - step=037 action=move(direction='west') hp=100.0 - step=038 action=move(direction='east') hp=100.0 - step=039 action=move(direction='south') hp=100.0 - step=040 action=move(direction='west') hp=100.0 - step=041 action=wait() hp=100.0 - step=042 action=move(direction='north') hp=100.0 - step=043 action=move(direction='east') hp=100.0 - step=044 action=move(direction='north') hp=100.0 - step=045 action=wait() hp=100.0 - step=046 action=move(direction='north') hp=100.0 - step=047 action=move(direction='west') hp=100.0 - step=048 action=move(direction='north') hp=100.0 - step=049 action=wait() hp=100.0 - step=050 action=move(direction='west') hp=100.0 -ep=0017 [easy ] steps=050 reward= +19.920 evac=1 hp=100.0 suc30=0.65 r30= +5.43 t=9s - step=001 action=wait() hp=100.0 - step=002 action=move(direction='south') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=wait() hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=move(direction='south') hp=100.0 - step=007 action=wait() hp=100.0 - step=008 action=wait() hp=100.0 - step=009 action=move(direction='south') hp=100.0 - step=010 action=move(direction='north') hp=100.0 - step=011 action=move(direction='north') hp=100.0 - step=012 action=wait() hp=100.0 - step=013 action=move(direction='north') hp=100.0 - step=014 action=wait() hp=100.0 - step=015 action=wait() hp=100.0 - step=016 action=wait() hp=100.0 - step=017 action=move(direction='south') hp=100.0 - step=018 action=move(direction='south') hp=100.0 - step=019 action=wait() hp=100.0 - step=020 action=move(direction='north') hp=100.0 - step=021 action=move(direction='north') hp=100.0 - step=022 action=wait() hp=100.0 - step=023 action=move(direction='north') hp=100.0 - step=024 action=wait() hp=100.0 - step=025 action=wait() hp=100.0 - step=026 action=move(direction='south') hp=100.0 - step=027 action=wait() hp=100.0 - step=028 action=move(direction='south') hp=100.0 - step=029 action=wait() hp=100.0 - step=030 action=wait() hp=100.0 - step=031 action=wait() hp=100.0 - step=032 action=wait() hp=100.0 - step=033 action=wait() hp=100.0 - step=034 action=move(direction='north') hp=100.0 - step=035 action=wait() hp=100.0 - step=036 action=wait() hp=100.0 - step=037 action=move(direction='south') hp=100.0 - step=038 action=move(direction='south') hp=100.0 - step=039 action=move(direction='north') hp=100.0 - step=040 action=move(direction='north') hp=100.0 - step=041 action=move(direction='north') hp=100.0 - step=042 action=wait() hp=100.0 - step=043 action=move(direction='north') hp=100.0 - step=044 action=move(direction='north') hp=100.0 -ep=0018 [easy ] steps=044 reward= +14.680 evac=1 hp=100.0 suc30=0.67 r30= +5.95 t=9s - step=001 action=wait() hp=100.0 - step=002 action=wait() hp=100.0 - step=003 action=door(target_id='door_1', door_state='close') hp=100.0 - step=004 action=wait() hp=100.0 - step=005 action=wait() hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=door(target_id='door_1', door_state='close') hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=move(direction='west') hp=100.0 -ep=0019 [easy ] steps=009 reward= +17.100 evac=1 hp=100.0 suc30=0.68 r30= +6.53 t=9s - step=001 action=move(direction='north') hp=100.0 - step=002 action=wait() hp=100.0 - step=003 action=move(direction='south') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=move(direction='north') hp=100.0 - step=007 action=move(direction='north') hp=100.0 - step=008 action=wait() hp=100.0 - step=009 action=move(direction='north') hp=100.0 - step=010 action=move(direction='west') hp=100.0 - step=011 action=wait() hp=100.0 - step=012 action=move(direction='east') hp=100.0 - step=013 action=move(direction='east') hp=100.0 - step=014 action=move(direction='west') hp=100.0 - step=015 action=move(direction='west') hp=100.0 - step=016 action=wait() hp=100.0 - step=017 action=move(direction='east') hp=100.0 - step=018 action=wait() hp=100.0 - step=019 action=move(direction='north') hp=100.0 - step=020 action=move(direction='south') hp=100.0 - step=021 action=move(direction='north') hp=100.0 - step=022 action=move(direction='west') hp=100.0 - step=023 action=move(direction='south') hp=100.0 - step=024 action=wait() hp=100.0 - step=025 action=move(direction='north') hp=100.0 - step=026 action=move(direction='west') hp=100.0 - step=027 action=move(direction='east') hp=100.0 - step=028 action=move(direction='west') hp=100.0 - step=029 action=move(direction='west') hp=100.0 - step=030 action=move(direction='south') hp=100.0 - step=031 action=move(direction='north') hp=100.0 - step=032 action=move(direction='east') hp=100.0 - step=033 action=wait() hp=100.0 - step=034 action=wait() hp=100.0 - step=035 action=move(direction='west') hp=100.0 - step=036 action=wait() hp=100.0 - step=037 action=move(direction='west') hp=100.0 - step=038 action=move(direction='east') hp=100.0 - step=039 action=move(direction='west') hp=100.0 - step=040 action=move(direction='west') hp=100.0 - step=041 action=move(direction='west') hp=100.0 - step=042 action=move(direction='south') hp=100.0 - step=043 action=move(direction='north') hp=100.0 - step=044 action=move(direction='west') hp=100.0 - step=045 action=move(direction='west') hp=100.0 - step=046 action=wait() hp=100.0 - step=047 action=wait() hp=100.0 - step=048 action=move(direction='west') hp=100.0 - step=049 action=wait() hp=100.0 - step=050 action=wait() hp=100.0 - step=051 action=wait() hp=100.0 - step=052 action=move(direction='east') hp=100.0 - step=053 action=wait() hp=100.0 - step=054 action=wait() hp=100.0 - step=055 action=move(direction='west') hp=100.0 - step=056 action=move(direction='west') hp=100.0 - step=057 action=move(direction='west') hp=100.0 - step=058 action=wait() hp=100.0 - step=059 action=move(direction='east') hp=100.0 - step=060 action=wait() hp=100.0 - step=061 action=move(direction='south') hp=100.0 - step=062 action=move(direction='north') hp=100.0 - step=063 action=wait() hp=100.0 - step=064 action=move(direction='west') hp=100.0 - step=065 action=wait() hp=100.0 - step=066 action=move(direction='west') hp=100.0 -ep=0020 [easy ] steps=066 reward= +16.890 evac=1 hp=100.0 suc30=0.70 r30= +7.05 t=9s - >> PPO update samples=flushed pi_loss=-0.0216 v_loss=13.0783 entropy=1.5819 kl=0.0026 clip%=0.03 lr=2.73e-04 - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp= 99.5 - step=007 action=move(direction='west') hp= 99.5 - step=008 action=move(direction='west') hp= 99.5 - step=009 action=move(direction='west') hp= 99.5 - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='north') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - ** EVAL [medium] reward=+15.697 success=1.00 steps=7.0 - step=001 action=move(direction='south') hp=100.0 - step=002 action=move(direction='north') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=wait() hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=wait() hp=100.0 - step=008 action=move(direction='east') hp=100.0 - step=009 action=move(direction='east') hp=100.0 - step=010 action=move(direction='west') hp=100.0 - step=011 action=wait() hp=100.0 - step=012 action=wait() hp=100.0 - step=013 action=move(direction='west') hp=100.0 - step=014 action=door(target_id='door_3', door_state='close') hp=100.0 - step=015 action=door(target_id='door_3', door_state='close') hp=100.0 - step=016 action=door(target_id='door_3', door_state='close') hp=100.0 - step=017 action=move(direction='east') hp=100.0 - step=018 action=move(direction='east') hp=100.0 - step=019 action=door(target_id='door_4', door_state='open') hp=100.0 - step=020 action=move(direction='west') hp=100.0 - step=021 action=move(direction='west') hp=100.0 - step=022 action=door(target_id='door_3', door_state='close') hp=100.0 - step=023 action=door(target_id='door_3', door_state='close') hp=100.0 - step=024 action=move(direction='south') hp=100.0 - step=025 action=wait() hp=100.0 - step=026 action=door(target_id='door_3', door_state='close') hp=100.0 - step=027 action=move(direction='south') hp=100.0 - step=028 action=wait() hp=100.0 - step=029 action=move(direction='west') hp=100.0 - step=030 action=move(direction='east') hp=100.0 - step=031 action=move(direction='west') hp=100.0 - step=032 action=move(direction='east') hp=100.0 - step=033 action=door(target_id='door_3', door_state='close') hp=100.0 - step=034 action=door(target_id='door_3', door_state='open') hp=100.0 - step=035 action=door(target_id='door_3', door_state='close') hp=100.0 - step=036 action=wait() hp=100.0 - step=037 action=move(direction='north') hp=100.0 - step=038 action=door(target_id='door_3', door_state='open') hp=100.0 - step=039 action=move(direction='west') hp=100.0 - step=040 action=move(direction='south') hp=100.0 - step=041 action=move(direction='north') hp=100.0 - step=042 action=move(direction='north') hp=100.0 - step=043 action=move(direction='west') hp=100.0 - step=044 action=move(direction='west') hp=100.0 - step=045 action=move(direction='west') hp=100.0 - step=046 action=door(target_id='door_2', door_state='close') hp=100.0 - step=047 action=wait() hp=100.0 - step=048 action=wait() hp=100.0 - step=049 action=wait() hp=100.0 - step=050 action=door(target_id='door_2', door_state='close') hp=100.0 - step=051 action=wait() hp=100.0 - step=052 action=door(target_id='door_2', door_state='close') hp=100.0 - step=053 action=wait() hp=100.0 - step=054 action=wait() hp=100.0 - step=055 action=door(target_id='door_2', door_state='close') hp=100.0 - step=056 action=move(direction='east') hp=100.0 - step=057 action=door(target_id='door_3', door_state='open') hp=100.0 - step=058 action=door(target_id='door_2', door_state='close') hp=100.0 - step=059 action=door(target_id='door_2', door_state='close') hp=100.0 - step=060 action=wait() hp=100.0 - step=061 action=move(direction='east') hp=100.0 - step=062 action=move(direction='east') hp=100.0 - step=063 action=door(target_id='door_3', door_state='open') hp=100.0 - step=064 action=wait() hp=100.0 - step=065 action=move(direction='west') hp=100.0 - step=066 action=door(target_id='door_3', door_state='open') hp=100.0 - step=067 action=move(direction='west') hp=100.0 - step=068 action=move(direction='north') hp=100.0 - step=069 action=move(direction='north') hp=100.0 - step=070 action=wait() hp=100.0 - step=071 action=move(direction='north') hp=100.0 - step=072 action=wait() hp=100.0 - step=073 action=move(direction='south') hp=100.0 - step=074 action=move(direction='north') hp=100.0 - step=075 action=wait() hp=100.0 - step=076 action=wait() hp=100.0 - step=077 action=move(direction='north') hp=100.0 - step=078 action=move(direction='south') hp=100.0 - step=079 action=move(direction='north') hp=100.0 - step=080 action=move(direction='north') hp=100.0 - step=081 action=move(direction='north') hp=100.0 - step=082 action=move(direction='north') hp=100.0 -ep=0021 [easy ] steps=082 reward= +12.220 evac=1 hp=100.0 suc30=0.71 r30= +7.30 t=10s - step=001 action=door(target_id='door_2', door_state='close') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=wait() hp=100.0 - step=004 action=door(target_id='door_2', door_state='close') hp=100.0 - step=005 action=door(target_id='door_2', door_state='close') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=door(target_id='door_2', door_state='close') hp=100.0 - step=008 action=door(target_id='door_3', door_state='close') hp=100.0 - step=009 action=wait() hp=100.0 - step=010 action=wait() hp=100.0 - step=011 action=wait() hp=100.0 - step=012 action=move(direction='west') hp=100.0 - step=013 action=move(direction='west') hp=100.0 - step=014 action=move(direction='west') hp=100.0 - step=015 action=wait() hp=100.0 - step=016 action=move(direction='east') hp=100.0 - step=017 action=move(direction='west') hp=100.0 - step=018 action=door(target_id='door_2', door_state='open') hp=100.0 - step=019 action=wait() hp=100.0 - step=020 action=wait() hp=100.0 - step=021 action=move(direction='east') hp=100.0 - step=022 action=move(direction='west') hp=100.0 - step=023 action=move(direction='west') hp=100.0 - step=024 action=move(direction='west') hp=100.0 - step=025 action=door(target_id='door_5', door_state='close') hp=100.0 - step=026 action=move(direction='west') hp=100.0 - step=027 action=wait() hp=100.0 - step=028 action=move(direction='south') hp=100.0 - step=029 action=wait() hp=100.0 - step=030 action=move(direction='north') hp=100.0 - step=031 action=move(direction='south') hp=100.0 - step=032 action=door(target_id='door_5', door_state='close') hp=100.0 - step=033 action=move(direction='east') hp=100.0 - step=034 action=move(direction='south') hp=100.0 - step=035 action=move(direction='north') hp=100.0 - step=036 action=move(direction='north') hp=100.0 - step=037 action=move(direction='west') hp=100.0 - step=038 action=wait() hp=100.0 - step=039 action=door(target_id='door_1', door_state='close') hp=100.0 - step=040 action=move(direction='east') hp=100.0 - step=041 action=wait() hp=100.0 - step=042 action=wait() hp=100.0 - step=043 action=move(direction='east') hp=100.0 - step=044 action=move(direction='east') hp=100.0 - step=045 action=wait() hp=100.0 - step=046 action=door(target_id='door_2', door_state='open') hp=100.0 - step=047 action=door(target_id='door_2', door_state='open') hp=100.0 - step=048 action=move(direction='west') hp=100.0 - step=049 action=move(direction='west') hp=100.0 - step=050 action=move(direction='south') hp=100.0 - step=051 action=wait() hp=100.0 - step=052 action=move(direction='west') hp=100.0 - step=053 action=move(direction='south') hp=100.0 - step=054 action=move(direction='north') hp=100.0 - step=055 action=move(direction='west') hp=100.0 -ep=0022 [easy ] steps=055 reward= +16.940 evac=1 hp=100.0 suc30=0.73 r30= +7.74 t=10s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='east') hp=100.0 - step=007 action=move(direction='south') hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='west') hp=100.0 - step=011 action=move(direction='south') hp=100.0 - step=012 action=move(direction='north') hp=100.0 - step=013 action=move(direction='west') hp=100.0 - step=014 action=move(direction='east') hp=100.0 - step=015 action=move(direction='south') hp=100.0 - step=016 action=move(direction='north') hp=100.0 - step=017 action=move(direction='west') hp=100.0 - step=018 action=move(direction='south') hp=100.0 - step=019 action=move(direction='north') hp=100.0 - step=020 action=move(direction='west') hp=100.0 -ep=0023 [easy ] steps=020 reward= +19.400 evac=1 hp=100.0 suc30=0.74 r30= +8.24 t=10s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='east') hp=100.0 - step=003 action=wait() hp=100.0 - step=004 action=door(target_id='door_2', door_state='close') hp=100.0 - step=005 action=door(target_id='door_2', door_state='close') hp=100.0 - step=006 action=door(target_id='door_2', door_state='close') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=wait() hp=100.0 - step=009 action=door(target_id='door_2', door_state='close') hp=100.0 - step=010 action=wait() hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=door(target_id='door_2', door_state='close') hp=100.0 - step=013 action=move(direction='west') hp=100.0 - step=014 action=move(direction='west') hp=100.0 - step=015 action=move(direction='west') hp=100.0 - step=016 action=move(direction='west') hp=100.0 -ep=0024 [easy ] steps=016 reward= +18.470 evac=1 hp=100.0 suc30=0.75 r30= +8.67 t=10s - step=001 action=door(target_id='door_3', door_state='open') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=door(target_id='door_3', door_state='open') hp=100.0 - step=004 action=wait() hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=door(target_id='door_2', door_state='open') hp=100.0 - step=007 action=move(direction='north') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=wait() hp=100.0 - step=011 action=door(target_id='door_2', door_state='open') hp=100.0 - step=012 action=move(direction='west') hp=100.0 - step=013 action=wait() hp=100.0 - step=014 action=wait() hp=100.0 - step=015 action=move(direction='west') hp=100.0 - step=016 action=move(direction='west') hp=100.0 - step=017 action=door(target_id='door_5', door_state='close') hp=100.0 - step=018 action=move(direction='south') hp=100.0 - step=019 action=door(target_id='door_1', door_state='open') hp=100.0 - step=020 action=move(direction='west') hp=100.0 - step=021 action=move(direction='west') hp=100.0 -ep=0025 [easy ] steps=021 reward= +18.470 evac=1 hp=100.0 suc30=0.76 r30= +9.06 t=11s - >> PPO update samples=flushed pi_loss=-0.0041 v_loss=31.9513 entropy=1.4992 kl=0.0007 clip%=0.00 lr=2.66e-04 - [ckpt] saved -> artifacts/pyre_ppo_http_ckpt.pt - step=001 action=move(direction='north') hp=100.0 - step=002 action=move(direction='north') hp=100.0 - step=003 action=move(direction='east') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=move(direction='south') hp=100.0 - step=007 action=wait() hp=100.0 - step=008 action=wait() hp=100.0 - step=009 action=move(direction='east') hp=100.0 - step=010 action=move(direction='west') hp=100.0 - step=011 action=wait() hp=100.0 - step=012 action=move(direction='east') hp=100.0 - step=013 action=move(direction='west') hp=100.0 - step=014 action=move(direction='north') hp=100.0 - step=015 action=move(direction='west') hp=100.0 - step=016 action=wait() hp=100.0 - step=017 action=move(direction='south') hp=100.0 - step=018 action=wait() hp=100.0 - step=019 action=move(direction='south') hp=100.0 - step=020 action=wait() hp=100.0 - step=021 action=move(direction='west') hp=100.0 - step=022 action=move(direction='east') hp=100.0 - step=023 action=move(direction='west') hp=100.0 - step=024 action=move(direction='south') hp=100.0 - step=025 action=move(direction='west') hp=100.0 - step=026 action=move(direction='north') hp=100.0 - step=027 action=move(direction='north') hp=100.0 - step=028 action=move(direction='west') hp=100.0 - step=029 action=move(direction='north') hp=100.0 - step=030 action=move(direction='north') hp=100.0 - step=031 action=move(direction='west') hp=100.0 - step=032 action=move(direction='north') hp=100.0 - step=033 action=move(direction='east') hp=100.0 - step=034 action=move(direction='west') hp=100.0 - step=035 action=move(direction='west') hp=100.0 - step=036 action=move(direction='north') hp=100.0 - step=037 action=move(direction='west') hp=100.0 - step=038 action=wait() hp=100.0 - step=039 action=move(direction='west') hp=100.0 - step=040 action=move(direction='west') hp=100.0 - step=041 action=move(direction='east') hp=100.0 - step=042 action=move(direction='west') hp=100.0 - step=043 action=move(direction='west') hp=100.0 - step=044 action=wait() hp=100.0 - step=045 action=move(direction='west') hp=100.0 - step=046 action=wait() hp=100.0 - step=047 action=move(direction='east') hp=100.0 - step=048 action=wait() hp=100.0 - step=049 action=move(direction='south') hp=100.0 - step=050 action=move(direction='west') hp=100.0 - step=051 action=move(direction='north') hp=100.0 - step=052 action=move(direction='west') hp=100.0 - step=053 action=move(direction='south') hp=100.0 - step=054 action=move(direction='south') hp=100.0 - step=055 action=wait() hp=100.0 - step=056 action=move(direction='north') hp=100.0 - step=057 action=move(direction='north') hp=100.0 - step=058 action=wait() hp=100.0 - step=059 action=wait() hp=100.0 - step=060 action=move(direction='west') hp=100.0 -ep=0026 [easy ] steps=060 reward= +19.690 evac=1 hp=100.0 suc30=0.77 r30= +9.47 t=11s - step=001 action=move(direction='north') hp=100.0 - step=002 action=move(direction='north') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=wait() hp=100.0 - step=006 action=wait() hp=100.0 - step=007 action=wait() hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=move(direction='north') hp=100.0 -ep=0027 [easy ] steps=009 reward= +18.380 evac=1 hp=100.0 suc30=0.78 r30= +9.80 t=11s - step=001 action=move(direction='north') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=door(target_id='door_3', door_state='close') hp=100.0 - step=005 action=wait() hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='north') hp=100.0 - step=008 action=move(direction='south') hp=100.0 - step=009 action=wait() hp=100.0 - step=010 action=door(target_id='door_3', door_state='close') hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=door(target_id='door_3', door_state='close') hp=100.0 - step=013 action=move(direction='north') hp=100.0 - step=014 action=move(direction='east') hp=100.0 - step=015 action=move(direction='south') hp=100.0 - step=016 action=move(direction='east') hp=100.0 - step=017 action=wait() hp=100.0 - step=018 action=move(direction='west') hp=100.0 - step=019 action=move(direction='east') hp=100.0 - step=020 action=wait() hp=100.0 - step=021 action=door(target_id='door_8', door_state='close') hp=100.0 - step=022 action=move(direction='east') hp=100.0 - step=023 action=move(direction='north') hp=100.0 - step=024 action=move(direction='north') hp=100.0 - step=025 action=move(direction='west') hp=100.0 - step=026 action=move(direction='south') hp=100.0 - step=027 action=move(direction='west') hp=100.0 - step=028 action=door(target_id='door_4', door_state='open') hp=100.0 - step=029 action=move(direction='west') hp=100.0 - step=030 action=move(direction='west') hp=100.0 - step=031 action=door(target_id='door_3', door_state='close') hp=100.0 - step=032 action=move(direction='north') hp=100.0 - step=033 action=door(target_id='door_3', door_state='close') hp=100.0 - step=034 action=move(direction='west') hp=100.0 - step=035 action=move(direction='west') hp=100.0 - step=036 action=move(direction='south') hp=100.0 - step=037 action=door(target_id='door_6', door_state='close') hp=100.0 - step=038 action=door(target_id='door_3', door_state='open') hp=100.0 - step=039 action=door(target_id='door_3', door_state='open') hp=100.0 - step=040 action=move(direction='west') hp=100.0 - step=041 action=move(direction='west') hp=100.0 - step=042 action=move(direction='north') hp=100.0 - step=043 action=door(target_id='door_2', door_state='open') hp=100.0 - step=044 action=door(target_id='door_2', door_state='close') hp=100.0 - step=045 action=move(direction='south') hp=100.0 - step=046 action=door(target_id='door_2', door_state='open') hp=100.0 - step=047 action=move(direction='west') hp=100.0 - step=048 action=move(direction='south') hp=100.0 - step=049 action=door(target_id='door_6', door_state='close') hp=100.0 - step=050 action=move(direction='north') hp=100.0 - step=051 action=move(direction='west') hp=100.0 - step=052 action=move(direction='west') hp=100.0 - step=053 action=door(target_id='door_1', door_state='close') hp=100.0 - step=054 action=move(direction='east') hp=100.0 - step=055 action=move(direction='north') hp=100.0 - step=056 action=door(target_id='door_2', door_state='open') hp=100.0 - step=057 action=door(target_id='door_2', door_state='open') hp=100.0 - step=058 action=door(target_id='door_1', door_state='open') hp=100.0 - step=059 action=move(direction='west') hp=100.0 - step=060 action=door(target_id='door_2', door_state='open') hp=100.0 - step=061 action=move(direction='west') hp=100.0 - step=062 action=wait() hp=100.0 - step=063 action=wait() hp=100.0 - step=064 action=move(direction='east') hp=100.0 - step=065 action=move(direction='east') hp=100.0 - step=066 action=move(direction='east') hp=100.0 - step=067 action=move(direction='east') hp=100.0 - step=068 action=move(direction='south') hp=100.0 - step=069 action=door(target_id='door_1', door_state='close') hp=100.0 - step=070 action=door(target_id='door_2', door_state='open') hp=100.0 - step=071 action=move(direction='east') hp=100.0 - step=072 action=move(direction='north') hp=100.0 - step=073 action=wait() hp=100.0 - step=074 action=move(direction='west') hp=100.0 - step=075 action=door(target_id='door_1', door_state='close') hp=100.0 - step=076 action=wait() hp=100.0 - step=077 action=move(direction='west') hp=100.0 - step=078 action=move(direction='south') hp=100.0 - step=079 action=move(direction='south') hp=100.0 - step=080 action=move(direction='east') hp=100.0 - step=081 action=door(target_id='door_2', door_state='open') hp=100.0 - step=082 action=wait() hp=100.0 - step=083 action=move(direction='south') hp=100.0 - step=084 action=door(target_id='door_2', door_state='open') hp=100.0 - step=085 action=move(direction='east') hp=100.0 - step=086 action=move(direction='east') hp=100.0 - step=087 action=move(direction='north') hp=100.0 - step=088 action=door(target_id='door_2', door_state='open') hp=100.0 - step=089 action=move(direction='west') hp=100.0 - step=090 action=door(target_id='door_2', door_state='open') hp=100.0 - step=091 action=move(direction='east') hp=100.0 - step=092 action=move(direction='east') hp=100.0 - step=093 action=move(direction='west') hp=100.0 - step=094 action=move(direction='north') hp=100.0 - step=095 action=move(direction='west') hp=100.0 - step=096 action=move(direction='north') hp=100.0 - step=097 action=door(target_id='door_1', door_state='close') hp=100.0 - step=098 action=door(target_id='door_2', door_state='open') hp=100.0 - step=099 action=door(target_id='door_1', door_state='close') hp=100.0 - step=100 action=move(direction='west') hp=100.0 - step=101 action=move(direction='west') hp=100.0 - step=102 action=wait() hp=100.0 - step=103 action=wait() hp=100.0 - step=104 action=wait() hp=100.0 - step=105 action=door(target_id='door_2', door_state='close') hp=100.0 - step=106 action=move(direction='west') hp=100.0 - step=107 action=move(direction='west') hp=100.0 - step=108 action=move(direction='west') hp=100.0 - step=109 action=wait() hp=100.0 - step=110 action=wait() hp=100.0 - step=111 action=wait() hp=100.0 - step=112 action=door(target_id='door_1', door_state='close') hp=100.0 - step=113 action=move(direction='east') hp=100.0 - step=114 action=door(target_id='door_1', door_state='open') hp=100.0 - step=115 action=door(target_id='door_5', door_state='close') hp=100.0 - step=116 action=move(direction='west') hp=100.0 - step=117 action=door(target_id='door_1', door_state='close') hp=100.0 - step=118 action=wait() hp=100.0 - step=119 action=move(direction='east') hp=100.0 - step=120 action=move(direction='east') hp=100.0 - step=121 action=move(direction='west') hp=100.0 - step=122 action=door(target_id='door_2', door_state='close') hp=100.0 - step=123 action=door(target_id='door_2', door_state='close') hp=100.0 - step=124 action=door(target_id='door_1', door_state='open') hp=100.0 - step=125 action=move(direction='west') hp=100.0 - step=126 action=wait() hp=100.0 - step=127 action=door(target_id='door_1', door_state='close') hp=100.0 - step=128 action=move(direction='east') hp=100.0 - step=129 action=move(direction='west') hp=100.0 - step=130 action=wait() hp=100.0 - step=131 action=wait() hp=100.0 - step=132 action=move(direction='south') hp=100.0 - step=133 action=move(direction='west') hp=100.0 -ep=0028 [easy ] steps=133 reward= +13.980 evac=1 hp=100.0 suc30=0.79 r30= +9.95 t=12s - step=001 action=move(direction='south') hp=100.0 - step=002 action=move(direction='north') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='south') hp=100.0 - step=005 action=wait() hp=100.0 - step=006 action=move(direction='east') hp=100.0 - step=007 action=wait() hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=wait() hp=100.0 - step=010 action=move(direction='east') hp=100.0 - step=011 action=move(direction='north') hp=100.0 - step=012 action=move(direction='west') hp=100.0 - step=013 action=move(direction='west') hp=100.0 - step=014 action=move(direction='north') hp=100.0 - step=015 action=move(direction='north') hp=100.0 - step=016 action=move(direction='west') hp=100.0 - step=017 action=wait() hp=100.0 - step=018 action=wait() hp=100.0 - step=019 action=move(direction='north') hp=100.0 - step=020 action=move(direction='south') hp=100.0 - step=021 action=wait() hp=100.0 - step=022 action=move(direction='south') hp=100.0 - step=023 action=wait() hp=100.0 - step=024 action=wait() hp=100.0 - step=025 action=move(direction='south') hp=100.0 - step=026 action=move(direction='north') hp=100.0 - step=027 action=move(direction='south') hp=100.0 - step=028 action=move(direction='west') hp=100.0 - step=029 action=move(direction='north') hp=100.0 - step=030 action=move(direction='west') hp=100.0 - step=031 action=move(direction='north') hp=100.0 - step=032 action=move(direction='north') hp=100.0 - step=033 action=move(direction='south') hp=100.0 - step=034 action=wait() hp=100.0 - step=035 action=wait() hp=100.0 - step=036 action=move(direction='west') hp=100.0 - step=037 action=move(direction='west') hp=100.0 - step=038 action=wait() hp=100.0 - step=039 action=move(direction='north') hp=100.0 - step=040 action=move(direction='north') hp=100.0 - step=041 action=wait() hp=100.0 - step=042 action=move(direction='east') hp=100.0 - step=043 action=move(direction='west') hp=100.0 - step=044 action=wait() hp=100.0 - step=045 action=move(direction='north') hp=100.0 - step=046 action=move(direction='north') hp=100.0 - step=047 action=move(direction='north') hp=100.0 - step=048 action=wait() hp=100.0 - step=049 action=move(direction='west') hp=100.0 - step=050 action=move(direction='south') hp=100.0 - step=051 action=move(direction='west') hp=100.0 - step=052 action=move(direction='north') hp=100.0 - step=053 action=move(direction='north') hp=100.0 - step=054 action=move(direction='north') hp=100.0 - step=055 action=move(direction='east') hp=100.0 - step=056 action=wait() hp=100.0 - step=057 action=move(direction='west') hp=100.0 - step=058 action=wait() hp=100.0 - step=059 action=move(direction='east') hp=100.0 - step=060 action=move(direction='west') hp=100.0 - step=061 action=move(direction='west') hp=100.0 - step=062 action=wait() hp=100.0 - step=063 action=wait() hp=100.0 - step=064 action=move(direction='west') hp=100.0 - step=065 action=move(direction='east') hp=100.0 - step=066 action=wait() hp=100.0 - step=067 action=move(direction='west') hp=100.0 - step=068 action=move(direction='west') hp=100.0 - step=069 action=move(direction='west') hp=100.0 - step=070 action=move(direction='south') hp=100.0 - step=071 action=move(direction='north') hp=100.0 - step=072 action=move(direction='west') hp=100.0 -ep=0029 [easy ] steps=072 reward= +20.350 evac=1 hp=100.0 suc30=0.79 r30= +10.31 t=12s - step=001 action=move(direction='north') hp=100.0 - step=002 action=wait() hp=100.0 - step=003 action=move(direction='south') hp=100.0 - step=004 action=wait() hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=move(direction='south') hp=100.0 - step=007 action=move(direction='north') hp=100.0 - step=008 action=wait() hp=100.0 - step=009 action=wait() hp=100.0 - step=010 action=move(direction='north') hp=100.0 - step=011 action=wait() hp=100.0 - step=012 action=move(direction='north') hp=100.0 - step=013 action=wait() hp=100.0 - step=014 action=move(direction='north') hp=100.0 - step=015 action=move(direction='north') hp=100.0 - step=016 action=move(direction='north') hp=100.0 -ep=0030 [easy ] steps=016 reward= +17.630 evac=1 hp=100.0 suc30=0.80 r30= +10.55 t=12s - >> PPO update samples=flushed pi_loss=-0.0072 v_loss=14.1700 entropy=1.4895 kl=0.0035 clip%=0.05 lr=2.60e-04 - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='south') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=wait() hp=100.0 - step=006 action=door(target_id='door_1', door_state='close') hp=100.0 - step=007 action=move(direction='north') hp=100.0 - step=008 action=wait() hp=100.0 - step=009 action=move(direction='north') hp=100.0 - step=010 action=move(direction='north') hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=move(direction='south') hp=100.0 - step=013 action=door(target_id='door_1', door_state='close') hp=100.0 - step=014 action=door(target_id='door_1', door_state='close') hp=100.0 - step=015 action=move(direction='north') hp=100.0 - step=016 action=move(direction='north') hp=100.0 - step=017 action=door(target_id='door_1', door_state='close') hp=100.0 - step=018 action=wait() hp=100.0 - step=019 action=move(direction='east') hp=100.0 - step=020 action=move(direction='west') hp=100.0 - step=021 action=wait() hp=100.0 - step=022 action=wait() hp=100.0 - step=023 action=move(direction='south') hp=100.0 - step=024 action=door(target_id='door_5', door_state='close') hp=100.0 - step=025 action=move(direction='west') hp=100.0 -ep=0031 [easy ] steps=025 reward= +18.650 evac=1 hp=100.0 suc30=0.83 r30= +11.94 t=13s - step=001 action=move(direction='north') hp=100.0 - step=002 action=wait() hp=100.0 - step=003 action=move(direction='east') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=move(direction='east') hp=100.0 - step=006 action=move(direction='north') hp=100.0 - step=007 action=move(direction='north') hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='west') hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=move(direction='west') hp=100.0 - step=013 action=move(direction='west') hp=100.0 - step=014 action=move(direction='west') hp=100.0 - step=015 action=move(direction='south') hp=100.0 - step=016 action=wait() hp=100.0 - step=017 action=move(direction='west') hp=100.0 - step=018 action=move(direction='north') hp=100.0 - step=019 action=wait() hp=100.0 - step=020 action=move(direction='north') hp=100.0 - step=021 action=move(direction='north') hp=100.0 - step=022 action=wait() hp=100.0 - step=023 action=wait() hp=100.0 - step=024 action=move(direction='west') hp=100.0 - step=025 action=move(direction='north') hp=100.0 - step=026 action=move(direction='west') hp=100.0 - step=027 action=move(direction='north') hp=100.0 - step=028 action=wait() hp=100.0 - step=029 action=move(direction='west') hp=100.0 - step=030 action=move(direction='south') hp=100.0 - step=031 action=wait() hp=100.0 - step=032 action=move(direction='north') hp=100.0 - step=033 action=move(direction='west') hp=100.0 - step=034 action=move(direction='east') hp=100.0 - step=035 action=move(direction='south') hp=100.0 - step=036 action=move(direction='west') hp=100.0 - step=037 action=move(direction='north') hp=100.0 - step=038 action=move(direction='west') hp=100.0 - step=039 action=move(direction='west') hp=100.0 - step=040 action=move(direction='west') hp=100.0 -ep=0032 [easy ] steps=040 reward= +21.890 evac=1 hp=100.0 suc30=0.83 r30= +12.11 t=13s - step=001 action=wait() hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=door(target_id='door_2', door_state='close') hp=100.0 - step=004 action=door(target_id='door_2', door_state='close') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=wait() hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='west') hp=100.0 -ep=0033 [easy ] steps=010 reward= +18.370 evac=1 hp=100.0 suc30=0.87 r30= +13.29 t=13s - step=001 action=wait() hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=wait() hp=100.0 - step=004 action=wait() hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=door(target_id='door_6', door_state='close') hp=100.0 - step=008 action=move(direction='east') hp=100.0 - step=009 action=move(direction='north') hp=100.0 - step=010 action=move(direction='west') hp=100.0 - step=011 action=move(direction='south') hp=100.0 - step=012 action=wait() hp=100.0 - step=013 action=move(direction='west') hp=100.0 - step=014 action=door(target_id='door_2', door_state='close') hp=100.0 - step=015 action=move(direction='south') hp=100.0 - step=016 action=wait() hp=100.0 - step=017 action=move(direction='north') hp=100.0 - step=018 action=move(direction='north') hp=100.0 - step=019 action=move(direction='west') hp=100.0 - step=020 action=door(target_id='door_2', door_state='close') hp=100.0 - step=021 action=move(direction='north') hp=100.0 - step=022 action=move(direction='west') hp=100.0 - step=023 action=wait() hp=100.0 - step=024 action=move(direction='west') hp=100.0 - step=025 action=wait() hp=100.0 - step=026 action=door(target_id='door_1', door_state='close') hp=100.0 - step=027 action=door(target_id='door_2', door_state='close') hp=100.0 - step=028 action=wait() hp=100.0 - step=029 action=door(target_id='door_1', door_state='open') hp=100.0 - step=030 action=door(target_id='door_2', door_state='close') hp=100.0 - step=031 action=move(direction='west') hp=100.0 - step=032 action=door(target_id='door_2', door_state='close') hp=100.0 - step=033 action=door(target_id='door_2', door_state='close') hp=100.0 - step=034 action=door(target_id='door_1', door_state='close') hp=100.0 - step=035 action=door(target_id='door_2', door_state='close') hp=100.0 - step=036 action=door(target_id='door_5', door_state='open') hp=100.0 - step=037 action=move(direction='west') hp=100.0 - step=038 action=move(direction='east') hp=100.0 - step=039 action=door(target_id='door_2', door_state='close') hp=100.0 - step=040 action=door(target_id='door_2', door_state='close') hp=100.0 - step=041 action=door(target_id='door_2', door_state='close') hp=100.0 - step=042 action=wait() hp=100.0 - step=043 action=door(target_id='door_2', door_state='close') hp=100.0 - step=044 action=move(direction='west') hp=100.0 - step=045 action=door(target_id='door_1', door_state='open') hp=100.0 - step=046 action=wait() hp=100.0 - step=047 action=wait() hp=100.0 - step=048 action=wait() hp=100.0 - step=049 action=door(target_id='door_1', door_state='close') hp=100.0 - step=050 action=wait() hp=100.0 - step=051 action=move(direction='east') hp=100.0 - step=052 action=move(direction='west') hp=100.0 - step=053 action=wait() hp=100.0 - step=054 action=door(target_id='door_1', door_state='open') hp=100.0 - step=055 action=wait() hp=100.0 - step=056 action=move(direction='south') hp=100.0 - step=057 action=move(direction='west') hp=100.0 -ep=0034 [easy ] steps=057 reward= +15.420 evac=1 hp=100.0 suc30=0.90 r30= +14.07 t=13s - step=001 action=move(direction='west') hp=100.0 - step=002 action=wait() hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='south') hp=100.0 - step=005 action=wait() hp=100.0 - step=006 action=move(direction='south') hp=100.0 - step=007 action=move(direction='north') hp=100.0 - step=008 action=wait() hp=100.0 - step=009 action=move(direction='east') hp=100.0 - step=010 action=move(direction='north') hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=move(direction='north') hp=100.0 - step=013 action=move(direction='north') hp=100.0 - step=014 action=wait() hp=100.0 - step=015 action=move(direction='west') hp=100.0 -ep=0035 [easy ] steps=015 reward= +17.990 evac=1 hp=100.0 suc30=0.90 r30= +14.10 t=13s - >> PPO update samples=flushed pi_loss=-0.0054 v_loss=19.6221 entropy=1.4137 kl=0.0007 clip%=0.00 lr=2.53e-04 - step=001 action=wait() hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=wait() hp=100.0 - step=005 action=move(direction='east') hp=100.0 - step=006 action=move(direction='north') hp=100.0 - step=007 action=wait() hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=move(direction='north') hp=100.0 - step=010 action=move(direction='north') hp=100.0 - step=011 action=move(direction='north') hp=100.0 - step=012 action=move(direction='north') hp=100.0 - step=013 action=wait() hp=100.0 - step=014 action=wait() hp=100.0 - step=015 action=move(direction='north') hp=100.0 -ep=0036 [easy ] steps=015 reward= +18.250 evac=1 hp=100.0 suc30=0.90 r30= +14.13 t=13s - step=001 action=door(target_id='door_2', door_state='open') hp=100.0 - step=002 action=door(target_id='door_2', door_state='open') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=door(target_id='door_1', door_state='close') hp=100.0 - step=007 action=door(target_id='door_1', door_state='open') hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=move(direction='south') hp=100.0 - step=010 action=move(direction='north') hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=door(target_id='door_1', door_state='close') hp=100.0 - step=013 action=wait() hp=100.0 - step=014 action=move(direction='east') hp=100.0 - step=015 action=move(direction='north') hp=100.0 - step=016 action=move(direction='west') hp=100.0 - step=017 action=wait() hp=100.0 - step=018 action=move(direction='north') hp=100.0 - step=019 action=door(target_id='door_1', door_state='open') hp=100.0 - step=020 action=wait() hp=100.0 - step=021 action=wait() hp=100.0 - step=022 action=door(target_id='door_1', door_state='open') hp=100.0 - step=023 action=door(target_id='door_1', door_state='open') hp=100.0 - step=024 action=wait() hp=100.0 - step=025 action=wait() hp=100.0 - step=026 action=wait() hp=100.0 - step=027 action=wait() hp=100.0 - step=028 action=wait() hp=100.0 - step=029 action=wait() hp=100.0 - step=030 action=door(target_id='door_1', door_state='open') hp=100.0 - step=031 action=wait() hp=100.0 - step=032 action=door(target_id='door_1', door_state='open') hp=100.0 - step=033 action=move(direction='east') hp=100.0 - step=034 action=move(direction='west') hp=100.0 - step=035 action=move(direction='east') hp=100.0 - step=036 action=move(direction='west') hp=100.0 - step=037 action=move(direction='east') hp=100.0 - step=038 action=move(direction='west') hp=100.0 - step=039 action=move(direction='south') hp=100.0 - step=040 action=move(direction='north') hp=100.0 - step=041 action=move(direction='south') hp=100.0 - step=042 action=move(direction='north') hp=100.0 - step=043 action=wait() hp=100.0 - step=044 action=move(direction='east') hp=100.0 - step=045 action=wait() hp=100.0 - step=046 action=move(direction='west') hp=100.0 - step=047 action=wait() hp=100.0 - step=048 action=move(direction='south') hp=100.0 - step=049 action=move(direction='north') hp=100.0 - step=050 action=door(target_id='door_1', door_state='open') hp=100.0 - step=051 action=door(target_id='door_1', door_state='open') hp=100.0 - step=052 action=move(direction='east') hp=100.0 - step=053 action=wait() hp=100.0 - step=054 action=move(direction='west') hp=100.0 - step=055 action=move(direction='south') hp=100.0 - step=056 action=move(direction='north') hp=100.0 - step=057 action=wait() hp=100.0 - step=058 action=move(direction='south') hp=100.0 - step=059 action=move(direction='north') hp=100.0 - step=060 action=wait() hp=100.0 - step=061 action=wait() hp=100.0 - step=062 action=wait() hp=100.0 - step=063 action=move(direction='east') hp=100.0 - step=064 action=wait() hp=100.0 - step=065 action=move(direction='west') hp=100.0 - step=066 action=door(target_id='door_1', door_state='open') hp=100.0 - step=067 action=move(direction='south') hp=100.0 - step=068 action=wait() hp=100.0 - step=069 action=move(direction='north') hp=100.0 - step=070 action=door(target_id='door_1', door_state='open') hp=100.0 - step=071 action=wait() hp=100.0 - step=072 action=move(direction='east') hp=100.0 - step=073 action=move(direction='west') hp=100.0 - step=074 action=move(direction='south') hp=100.0 - step=075 action=move(direction='north') hp=100.0 - step=076 action=move(direction='east') hp=100.0 - step=077 action=wait() hp=100.0 - step=078 action=move(direction='west') hp=100.0 - step=079 action=door(target_id='door_1', door_state='open') hp=100.0 - step=080 action=door(target_id='door_1', door_state='open') hp=100.0 - step=081 action=move(direction='south') hp=100.0 - step=082 action=move(direction='north') hp=100.0 - step=083 action=wait() hp=100.0 - step=084 action=wait() hp=100.0 - step=085 action=wait() hp=100.0 - step=086 action=wait() hp=100.0 - step=087 action=door(target_id='door_1', door_state='open') hp=100.0 - step=088 action=move(direction='south') hp=100.0 - step=089 action=move(direction='north') hp=100.0 - step=090 action=wait() hp=100.0 - step=091 action=move(direction='east') hp=100.0 - step=092 action=move(direction='west') hp=100.0 - step=093 action=move(direction='east') hp=100.0 - step=094 action=wait() hp=100.0 - step=095 action=move(direction='west') hp=100.0 - step=096 action=door(target_id='door_1', door_state='open') hp=100.0 - step=097 action=wait() hp=100.0 - step=098 action=door(target_id='door_1', door_state='open') hp=100.0 - step=099 action=move(direction='east') hp=100.0 - step=100 action=move(direction='west') hp=100.0 - step=101 action=move(direction='east') hp=100.0 - step=102 action=move(direction='west') hp=100.0 - step=103 action=door(target_id='door_1', door_state='open') hp=100.0 - step=104 action=wait() hp=100.0 - step=105 action=door(target_id='door_1', door_state='open') hp=100.0 - step=106 action=move(direction='south') hp=100.0 - step=107 action=wait() hp=100.0 - step=108 action=move(direction='north') hp=100.0 - step=109 action=wait() hp=100.0 - step=110 action=wait() hp=100.0 - step=111 action=wait() hp=100.0 - step=112 action=door(target_id='door_1', door_state='open') hp=100.0 - step=113 action=move(direction='east') hp=100.0 - step=114 action=move(direction='west') hp=100.0 - step=115 action=wait() hp=100.0 - step=116 action=door(target_id='door_1', door_state='open') hp=100.0 - step=117 action=move(direction='south') hp=100.0 - step=118 action=wait() hp=100.0 - step=119 action=move(direction='east') hp=100.0 - step=120 action=move(direction='east') hp=100.0 - step=121 action=wait() hp=100.0 - step=122 action=move(direction='west') hp=100.0 - step=123 action=move(direction='west') hp=100.0 - step=124 action=move(direction='north') hp=100.0 - step=125 action=door(target_id='door_1', door_state='open') hp=100.0 - step=126 action=wait() hp=100.0 - step=127 action=wait() hp=100.0 - step=128 action=move(direction='south') hp=100.0 - step=129 action=move(direction='north') hp=100.0 - step=130 action=wait() hp=100.0 - step=131 action=wait() hp=100.0 - step=132 action=wait() hp=100.0 - step=133 action=door(target_id='door_1', door_state='open') hp=100.0 - step=134 action=door(target_id='door_1', door_state='open') hp=100.0 - step=135 action=door(target_id='door_1', door_state='open') hp=100.0 - step=136 action=move(direction='east') hp=100.0 - step=137 action=move(direction='west') hp=100.0 - step=138 action=move(direction='east') hp=100.0 - step=139 action=move(direction='west') hp=100.0 - step=140 action=wait() hp=100.0 - step=141 action=door(target_id='door_1', door_state='open') hp=100.0 - step=142 action=door(target_id='door_1', door_state='open') hp=100.0 - step=143 action=move(direction='east') hp=100.0 - step=144 action=wait() hp=100.0 - step=145 action=wait() hp=100.0 - step=146 action=move(direction='west') hp=100.0 - step=147 action=wait() hp=100.0 - step=148 action=wait() hp=100.0 - step=149 action=move(direction='east') hp=100.0 - step=150 action=move(direction='west') hp=100.0 - step=151 action=wait() hp=100.0 - step=152 action=wait() hp=100.0 - step=153 action=wait() hp=100.0 - step=154 action=wait() hp=100.0 - step=155 action=move(direction='south') hp=100.0 - step=156 action=move(direction='south') hp=100.0 - step=157 action=move(direction='north') hp=100.0 - step=158 action=wait() hp=100.0 - step=159 action=move(direction='north') hp=100.0 - step=160 action=door(target_id='door_1', door_state='open') hp=100.0 - step=161 action=wait() hp=100.0 - step=162 action=wait() hp=100.0 - step=163 action=wait() hp=100.0 - step=164 action=door(target_id='door_1', door_state='open') hp=100.0 - step=165 action=wait() hp=100.0 - step=166 action=move(direction='south') hp=100.0 - step=167 action=wait() hp=100.0 - step=168 action=move(direction='south') hp=100.0 - step=169 action=move(direction='north') hp=100.0 - step=170 action=move(direction='south') hp=100.0 - step=171 action=move(direction='east') hp=100.0 - step=172 action=wait() hp=100.0 - step=173 action=move(direction='north') hp=100.0 - step=174 action=move(direction='north') hp=100.0 - step=175 action=wait() hp=100.0 - step=176 action=move(direction='west') hp=100.0 - step=177 action=door(target_id='door_1', door_state='open') hp=100.0 - step=178 action=wait() hp=100.0 - step=179 action=wait() hp=100.0 - step=180 action=move(direction='east') hp=100.0 - step=181 action=move(direction='west') hp=100.0 - step=182 action=wait() hp=100.0 - step=183 action=move(direction='east') hp=100.0 - step=184 action=door(target_id='door_1', door_state='open') hp=100.0 - step=185 action=move(direction='west') hp=100.0 - step=186 action=door(target_id='door_1', door_state='open') hp=100.0 - step=187 action=move(direction='east') hp=100.0 - step=188 action=move(direction='west') hp=100.0 - step=189 action=door(target_id='door_1', door_state='open') hp=100.0 - step=190 action=move(direction='south') hp=100.0 - step=191 action=move(direction='north') hp=100.0 - step=192 action=wait() hp=100.0 - step=193 action=door(target_id='door_1', door_state='open') hp=100.0 - step=194 action=wait() hp=100.0 - step=195 action=move(direction='south') hp=100.0 - step=196 action=move(direction='south') hp=100.0 - step=197 action=wait() hp=100.0 - step=198 action=move(direction='north') hp=100.0 - step=199 action=move(direction='north') hp=100.0 - step=200 action=wait() hp=100.0 -ep=0037 [easy ] steps=200 reward= -20.440 evac=0 hp=100.0 suc30=0.90 r30= +13.70 t=14s - step=001 action=wait() hp=100.0 - step=002 action=wait() hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=wait() hp=100.0 - step=005 action=move(direction='west') hp=100.0 -ep=0038 [easy ] steps=005 reward= +16.900 evac=1 hp=100.0 suc30=0.90 r30= +13.70 t=14s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='south') hp=100.0 - step=003 action=wait() hp=100.0 - step=004 action=wait() hp=100.0 - step=005 action=wait() hp=100.0 - step=006 action=move(direction='north') hp=100.0 - step=007 action=wait() hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=wait() hp=100.0 - step=010 action=door(target_id='door_3', door_state='close') hp=100.0 - step=011 action=wait() hp=100.0 - step=012 action=move(direction='west') hp=100.0 - step=013 action=wait() hp=100.0 - step=014 action=move(direction='west') hp=100.0 - step=015 action=door(target_id='door_2', door_state='open') hp=100.0 - step=016 action=move(direction='north') hp=100.0 - step=017 action=wait() hp=100.0 - step=018 action=wait() hp=100.0 - step=019 action=wait() hp=100.0 - step=020 action=move(direction='south') hp=100.0 - step=021 action=move(direction='east') hp=100.0 - step=022 action=move(direction='west') hp=100.0 - step=023 action=move(direction='west') hp=100.0 - step=024 action=move(direction='west') hp=100.0 - step=025 action=wait() hp=100.0 - step=026 action=move(direction='west') hp=100.0 - step=027 action=wait() hp=100.0 - step=028 action=move(direction='west') hp=100.0 - step=029 action=move(direction='west') hp=100.0 - step=030 action=move(direction='west') hp=100.0 - step=031 action=move(direction='west') hp=100.0 -ep=0039 [easy ] steps=031 reward= +16.240 evac=1 hp=100.0 suc30=0.90 r30= +13.63 t=14s - step=001 action=move(direction='west') hp=100.0 - step=002 action=wait() hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=wait() hp=100.0 - step=005 action=wait() hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=move(direction='north') hp=100.0 - step=010 action=door(target_id='door_1', door_state='close') hp=100.0 - step=011 action=move(direction='north') hp=100.0 - step=012 action=move(direction='north') hp=100.0 - step=013 action=move(direction='west') hp=100.0 - step=014 action=wait() hp=100.0 - step=015 action=wait() hp=100.0 - step=016 action=door(target_id='door_1', door_state='open') hp=100.0 - step=017 action=move(direction='north') hp=100.0 - step=018 action=move(direction='south') hp=100.0 - step=019 action=move(direction='north') hp=100.0 - step=020 action=wait() hp=100.0 - step=021 action=wait() hp=100.0 - step=022 action=wait() hp=100.0 - step=023 action=wait() hp=100.0 - step=024 action=wait() hp=100.0 - step=025 action=wait() hp=100.0 - step=026 action=wait() hp=100.0 - step=027 action=wait() hp=100.0 - step=028 action=wait() hp=100.0 - step=029 action=door(target_id='door_1', door_state='open') hp=100.0 - step=030 action=move(direction='east') hp=100.0 - step=031 action=move(direction='west') hp=100.0 - step=032 action=wait() hp=100.0 - step=033 action=wait() hp=100.0 - step=034 action=wait() hp=100.0 - step=035 action=door(target_id='door_1', door_state='open') hp=100.0 - step=036 action=wait() hp=100.0 - step=037 action=wait() hp=100.0 - step=038 action=door(target_id='door_1', door_state='open') hp=100.0 - step=039 action=move(direction='east') hp=100.0 - step=040 action=move(direction='west') hp=100.0 - step=041 action=wait() hp=100.0 - step=042 action=wait() hp=100.0 - step=043 action=move(direction='east') hp=100.0 - step=044 action=move(direction='west') hp=100.0 - step=045 action=move(direction='east') hp=100.0 - step=046 action=wait() hp=100.0 - step=047 action=move(direction='west') hp=100.0 - step=048 action=wait() hp=100.0 - step=049 action=wait() hp=100.0 - step=050 action=wait() hp=100.0 - step=051 action=wait() hp=100.0 - step=052 action=move(direction='south') hp=100.0 - step=053 action=wait() hp=100.0 - step=054 action=move(direction='north') hp=100.0 - step=055 action=wait() hp=100.0 - step=056 action=wait() hp=100.0 - step=057 action=wait() hp=100.0 - step=058 action=door(target_id='door_1', door_state='open') hp=100.0 - step=059 action=door(target_id='door_1', door_state='open') hp=100.0 - step=060 action=door(target_id='door_1', door_state='open') hp=100.0 - step=061 action=wait() hp=100.0 - step=062 action=move(direction='south') hp=100.0 - step=063 action=door(target_id='door_1', door_state='open') hp=100.0 - step=064 action=move(direction='north') hp=100.0 - step=065 action=wait() hp=100.0 - step=066 action=door(target_id='door_1', door_state='open') hp=100.0 - step=067 action=move(direction='east') hp=100.0 - step=068 action=move(direction='west') hp=100.0 - step=069 action=wait() hp=100.0 - step=070 action=wait() hp=100.0 - step=071 action=wait() hp=100.0 - step=072 action=door(target_id='door_1', door_state='open') hp=100.0 - step=073 action=move(direction='south') hp=100.0 - step=074 action=move(direction='east') hp=100.0 - step=075 action=door(target_id='door_1', door_state='open') hp=100.0 - step=076 action=move(direction='west') hp=100.0 - step=077 action=move(direction='north') hp=100.0 - step=078 action=door(target_id='door_1', door_state='close') hp=100.0 - step=079 action=wait() hp=100.0 - step=080 action=wait() hp=100.0 - step=081 action=move(direction='south') hp=100.0 - step=082 action=move(direction='north') hp=100.0 - step=083 action=wait() hp=100.0 - step=084 action=door(target_id='door_1', door_state='close') hp=100.0 - step=085 action=wait() hp=100.0 - step=086 action=door(target_id='door_1', door_state='close') hp=100.0 - step=087 action=door(target_id='door_1', door_state='close') hp=100.0 - step=088 action=wait() hp=100.0 - step=089 action=door(target_id='door_1', door_state='close') hp=100.0 - step=090 action=door(target_id='door_1', door_state='close') hp=100.0 - step=091 action=wait() hp=100.0 - step=092 action=wait() hp=100.0 - step=093 action=door(target_id='door_1', door_state='close') hp=100.0 - step=094 action=wait() hp=100.0 - step=095 action=wait() hp=100.0 - step=096 action=wait() hp=100.0 - step=097 action=wait() hp=100.0 - step=098 action=door(target_id='door_1', door_state='close') hp=100.0 - step=099 action=door(target_id='door_1', door_state='close') hp=100.0 - step=100 action=wait() hp=100.0 - step=101 action=door(target_id='door_1', door_state='close') hp=100.0 - step=102 action=wait() hp=100.0 - step=103 action=wait() hp=100.0 - step=104 action=move(direction='south') hp=100.0 - step=105 action=move(direction='north') hp=100.0 - step=106 action=wait() hp=100.0 - step=107 action=door(target_id='door_1', door_state='close') hp=100.0 - step=108 action=door(target_id='door_1', door_state='close') hp=100.0 - step=109 action=wait() hp=100.0 - step=110 action=wait() hp=100.0 - step=111 action=door(target_id='door_1', door_state='close') hp=100.0 - step=112 action=door(target_id='door_1', door_state='close') hp=100.0 - step=113 action=door(target_id='door_1', door_state='close') hp=100.0 - step=114 action=move(direction='south') hp=100.0 - step=115 action=door(target_id='door_1', door_state='close') hp=100.0 - step=116 action=door(target_id='door_1', door_state='close') hp=100.0 - step=117 action=move(direction='north') hp=100.0 - step=118 action=door(target_id='door_1', door_state='close') hp=100.0 - step=119 action=wait() hp=100.0 - step=120 action=wait() hp=100.0 - step=121 action=wait() hp=100.0 - step=122 action=wait() hp=100.0 - step=123 action=wait() hp=100.0 - step=124 action=wait() hp=100.0 - step=125 action=door(target_id='door_1', door_state='close') hp=100.0 - step=126 action=wait() hp=100.0 - step=127 action=door(target_id='door_1', door_state='close') hp=100.0 - step=128 action=wait() hp=100.0 - step=129 action=door(target_id='door_1', door_state='close') hp=100.0 - step=130 action=move(direction='east') hp=100.0 - step=131 action=move(direction='south') hp=100.0 - step=132 action=door(target_id='door_1', door_state='close') hp=100.0 - step=133 action=wait() hp=100.0 - step=134 action=move(direction='west') hp=100.0 - step=135 action=move(direction='east') hp=100.0 - step=136 action=wait() hp=100.0 - step=137 action=move(direction='east') hp=100.0 - step=138 action=move(direction='west') hp=100.0 - step=139 action=move(direction='north') hp=100.0 - step=140 action=move(direction='west') hp=100.0 - step=141 action=move(direction='east') hp=100.0 - step=142 action=move(direction='west') hp=100.0 - step=143 action=wait() hp=100.0 - step=144 action=wait() hp=100.0 - step=145 action=wait() hp=100.0 - step=146 action=wait() hp=100.0 - step=147 action=wait() hp=100.0 - step=148 action=move(direction='south') hp=100.0 - step=149 action=move(direction='north') hp=100.0 - step=150 action=move(direction='south') hp=100.0 - step=151 action=wait() hp=100.0 - step=152 action=wait() hp=100.0 - step=153 action=move(direction='north') hp=100.0 - step=154 action=wait() hp=100.0 - step=155 action=move(direction='south') hp=100.0 - step=156 action=move(direction='north') hp=100.0 - step=157 action=wait() hp=100.0 - step=158 action=door(target_id='door_1', door_state='open') hp=100.0 - step=159 action=wait() hp=100.0 - step=160 action=move(direction='south') hp=100.0 - step=161 action=move(direction='north') hp=100.0 - step=162 action=wait() hp=100.0 - step=163 action=move(direction='east') hp=100.0 - step=164 action=move(direction='south') hp=100.0 - step=165 action=move(direction='east') hp=100.0 - step=166 action=move(direction='north') hp=100.0 - step=167 action=move(direction='west') hp=100.0 - step=168 action=door(target_id='door_1', door_state='open') hp=100.0 - step=169 action=wait() hp=100.0 - step=170 action=move(direction='west') hp=100.0 - step=171 action=wait() hp=100.0 - step=172 action=door(target_id='door_1', door_state='open') hp=100.0 - step=173 action=wait() hp=100.0 - step=174 action=wait() hp=100.0 - step=175 action=move(direction='south') hp=100.0 - step=176 action=move(direction='north') hp=100.0 - step=177 action=wait() hp=100.0 - step=178 action=move(direction='south') hp=100.0 - step=179 action=move(direction='east') hp=100.0 - step=180 action=move(direction='north') hp=100.0 - step=181 action=move(direction='west') hp=100.0 - step=182 action=door(target_id='door_1', door_state='open') hp=100.0 - step=183 action=wait() hp=100.0 - step=184 action=wait() hp=100.0 - step=185 action=move(direction='south') hp=100.0 - step=186 action=move(direction='north') hp=100.0 - step=187 action=door(target_id='door_1', door_state='open') hp=100.0 - step=188 action=wait() hp=100.0 - step=189 action=move(direction='south') hp=100.0 - step=190 action=move(direction='north') hp=100.0 - step=191 action=move(direction='east') hp=100.0 - step=192 action=move(direction='west') hp=100.0 - step=193 action=move(direction='east') hp=100.0 - step=194 action=move(direction='west') hp=100.0 - step=195 action=wait() hp=100.0 - step=196 action=move(direction='east') hp=100.0 - step=197 action=move(direction='west') hp=100.0 - step=198 action=wait() hp=100.0 - step=199 action=wait() hp=100.0 - step=200 action=wait() hp=100.0 -ep=0040 [easy ] steps=200 reward= -18.790 evac=0 hp=100.0 suc30=0.90 r30= +13.40 t=15s - >> PPO update samples=flushed pi_loss=+0.0048 v_loss=26.4871 entropy=1.2155 kl=0.0004 clip%=0.00 lr=2.46e-04 - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - ** EVAL [medium] reward=+15.640 success=1.00 steps=4.3 - step=001 action=move(direction='north') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='south') hp=100.0 - step=005 action=wait() hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='north') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='west') hp=100.0 - step=011 action=move(direction='north') hp=100.0 - step=012 action=move(direction='east') hp=100.0 - step=013 action=move(direction='west') hp=100.0 - step=014 action=move(direction='north') hp=100.0 - step=015 action=move(direction='west') hp=100.0 - step=016 action=move(direction='north') hp=100.0 - step=017 action=move(direction='west') hp=100.0 - step=018 action=wait() hp=100.0 - step=019 action=move(direction='west') hp=100.0 - step=020 action=wait() hp=100.0 - step=021 action=move(direction='south') hp=100.0 - step=022 action=move(direction='north') hp=100.0 - step=023 action=wait() hp=100.0 - step=024 action=move(direction='north') hp=100.0 - step=025 action=move(direction='east') hp=100.0 - step=026 action=move(direction='north') hp=100.0 - step=027 action=move(direction='west') hp=100.0 - step=028 action=move(direction='north') hp=100.0 - step=029 action=move(direction='north') hp=100.0 - step=030 action=wait() hp=100.0 - step=031 action=move(direction='north') hp=100.0 - step=032 action=move(direction='east') hp=100.0 - step=033 action=move(direction='west') hp=100.0 - step=034 action=move(direction='west') hp=100.0 -ep=0041 [easy ] steps=034 reward= +21.470 evac=1 hp=100.0 suc30=0.90 r30= +13.78 t=16s - step=001 action=move(direction='west') hp=100.0 - step=002 action=door(target_id='door_3', door_state='close') hp=100.0 - step=003 action=wait() hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=move(direction='south') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=door(target_id='door_1', door_state='close') hp=100.0 - step=011 action=wait() hp=100.0 - step=012 action=move(direction='west') hp=100.0 - step=013 action=move(direction='west') hp=100.0 - step=014 action=move(direction='west') hp=100.0 - step=015 action=move(direction='west') hp=100.0 -ep=0042 [easy ] steps=015 reward= +18.270 evac=1 hp=100.0 suc30=0.90 r30= +14.13 t=16s - step=001 action=wait() hp=100.0 - step=002 action=move(direction='north') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=wait() hp=100.0 - step=006 action=wait() hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=door(target_id='door_1', door_state='open') hp=100.0 - step=009 action=move(direction='east') hp=100.0 - step=010 action=move(direction='west') hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=door(target_id='door_2', door_state='close') hp=100.0 - step=013 action=move(direction='west') hp=100.0 - step=014 action=wait() hp=100.0 - step=015 action=wait() hp=100.0 - step=016 action=door(target_id='door_1', door_state='close') hp=100.0 - step=017 action=wait() hp=100.0 - step=018 action=wait() hp=100.0 - step=019 action=wait() hp=100.0 - step=020 action=wait() hp=100.0 - step=021 action=door(target_id='door_1', door_state='open') hp=100.0 - step=022 action=wait() hp=100.0 - step=023 action=wait() hp=100.0 - step=024 action=wait() hp=100.0 - step=025 action=door(target_id='door_1', door_state='close') hp=100.0 - step=026 action=wait() hp=100.0 - step=027 action=move(direction='east') hp=100.0 - step=028 action=door(target_id='door_2', door_state='close') hp=100.0 - step=029 action=door(target_id='door_5', door_state='close') hp=100.0 - step=030 action=door(target_id='door_1', door_state='open') hp=100.0 - step=031 action=move(direction='north') hp=100.0 - step=032 action=wait() hp=100.0 - step=033 action=wait() hp=100.0 - step=034 action=wait() hp=100.0 - step=035 action=wait() hp=100.0 - step=036 action=wait() hp=100.0 - step=037 action=wait() hp=100.0 - step=038 action=door(target_id='door_1', door_state='close') hp=100.0 - step=039 action=wait() hp=100.0 - step=040 action=wait() hp=100.0 - step=041 action=wait() hp=100.0 - step=042 action=wait() hp=100.0 - step=043 action=wait() hp=100.0 - step=044 action=move(direction='south') hp=100.0 - step=045 action=door(target_id='door_5', door_state='close') hp=100.0 - step=046 action=move(direction='west') hp=100.0 - step=047 action=wait() hp=100.0 - step=048 action=wait() hp=100.0 - step=049 action=move(direction='south') hp=100.0 - step=050 action=move(direction='north') hp=100.0 - step=051 action=move(direction='east') hp=100.0 - step=052 action=wait() hp=100.0 - step=053 action=move(direction='south') hp=100.0 - step=054 action=move(direction='west') hp=100.0 - step=055 action=move(direction='north') hp=100.0 - step=056 action=move(direction='south') hp=100.0 - step=057 action=move(direction='north') hp=100.0 - step=058 action=wait() hp=100.0 - step=059 action=wait() hp=100.0 - step=060 action=move(direction='east') hp=100.0 - step=061 action=wait() hp=100.0 - step=062 action=move(direction='east') hp=100.0 - step=063 action=wait() hp=100.0 - step=064 action=move(direction='west') hp=100.0 - step=065 action=move(direction='west') hp=100.0 - step=066 action=move(direction='east') hp=100.0 - step=067 action=wait() hp=100.0 - step=068 action=door(target_id='door_2', door_state='close') hp=100.0 - step=069 action=move(direction='west') hp=100.0 - step=070 action=wait() hp=100.0 - step=071 action=door(target_id='door_1', door_state='open') hp=100.0 - step=072 action=move(direction='south') hp=100.0 - step=073 action=move(direction='north') hp=100.0 - step=074 action=wait() hp=100.0 - step=075 action=wait() hp=100.0 - step=076 action=door(target_id='door_1', door_state='close') hp=100.0 - step=077 action=wait() hp=100.0 - step=078 action=move(direction='south') hp=100.0 - step=079 action=move(direction='north') hp=100.0 - step=080 action=door(target_id='door_1', door_state='open') hp=100.0 - step=081 action=door(target_id='door_1', door_state='close') hp=100.0 - step=082 action=wait() hp=100.0 - step=083 action=wait() hp=100.0 - step=084 action=wait() hp=100.0 - step=085 action=move(direction='south') hp=100.0 - step=086 action=wait() hp=100.0 - step=087 action=move(direction='west') hp=100.0 -ep=0043 [easy ] steps=087 reward= +13.420 evac=1 hp=100.0 suc30=0.90 r30= +14.10 t=16s - step=001 action=move(direction='north') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=wait() hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='north') hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=move(direction='north') hp=100.0 - step=013 action=move(direction='north') hp=100.0 - step=014 action=move(direction='east') hp=100.0 - step=015 action=move(direction='north') hp=100.0 - step=016 action=wait() hp=100.0 - step=017 action=wait() hp=100.0 - step=018 action=move(direction='north') hp=100.0 - step=019 action=wait() hp=100.0 - step=020 action=move(direction='north') hp=100.0 - step=021 action=move(direction='west') hp=100.0 - step=022 action=move(direction='west') hp=100.0 - step=023 action=move(direction='north') hp=100.0 - step=024 action=move(direction='west') hp=100.0 - step=025 action=move(direction='north') hp=100.0 - step=026 action=move(direction='west') hp=100.0 - step=027 action=wait() hp=100.0 - step=028 action=move(direction='south') hp=100.0 - step=029 action=move(direction='west') hp=100.0 - step=030 action=wait() hp=100.0 - step=031 action=move(direction='north') hp=100.0 - step=032 action=move(direction='west') hp=100.0 - step=033 action=move(direction='west') hp=100.0 -ep=0044 [easy ] steps=033 reward= +22.840 evac=1 hp=100.0 suc30=0.90 r30= +14.27 t=16s - step=001 action=move(direction='west') hp=100.0 - step=002 action=wait() hp=100.0 - step=003 action=door(target_id='door_2', door_state='open') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=wait() hp=100.0 - step=006 action=move(direction='north') hp=100.0 - step=007 action=wait() hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=move(direction='north') hp=100.0 - step=010 action=move(direction='north') hp=100.0 - step=011 action=move(direction='north') hp=100.0 - step=012 action=wait() hp=100.0 - step=013 action=wait() hp=100.0 - step=014 action=move(direction='north') hp=100.0 -ep=0045 [easy ] steps=014 reward= +18.330 evac=1 hp=100.0 suc30=0.90 r30= +14.39 t=16s - >> PPO update samples=flushed pi_loss=+0.0003 v_loss=13.1441 entropy=1.2221 kl=0.0003 clip%=0.00 lr=2.39e-04 - step=001 action=move(direction='north') hp=100.0 - step=002 action=move(direction='north') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='north') hp=100.0 - step=008 action=door(target_id='door_1', door_state='close') hp=100.0 - step=009 action=move(direction='north') hp=100.0 - step=010 action=wait() hp=100.0 - step=011 action=door(target_id='door_1', door_state='close') hp=100.0 - step=012 action=wait() hp=100.0 - step=013 action=move(direction='south') hp=100.0 - step=014 action=wait() hp=100.0 - step=015 action=move(direction='north') hp=100.0 - step=016 action=wait() hp=100.0 - step=017 action=door(target_id='door_1', door_state='close') hp=100.0 - step=018 action=move(direction='south') hp=100.0 - step=019 action=move(direction='north') hp=100.0 - step=020 action=wait() hp=100.0 - step=021 action=move(direction='south') hp=100.0 - step=022 action=move(direction='north') hp=100.0 - step=023 action=door(target_id='door_1', door_state='close') hp=100.0 - step=024 action=wait() hp=100.0 - step=025 action=move(direction='south') hp=100.0 - step=026 action=door(target_id='door_1', door_state='close') hp=100.0 - step=027 action=move(direction='north') hp=100.0 - step=028 action=door(target_id='door_1', door_state='close') hp=100.0 - step=029 action=move(direction='east') hp=100.0 - step=030 action=wait() hp=100.0 - step=031 action=door(target_id='door_1', door_state='close') hp=100.0 - step=032 action=move(direction='south') hp=100.0 - step=033 action=move(direction='west') hp=100.0 - step=034 action=move(direction='north') hp=100.0 - step=035 action=door(target_id='door_1', door_state='close') hp=100.0 - step=036 action=move(direction='east') hp=100.0 - step=037 action=door(target_id='door_1', door_state='close') hp=100.0 - step=038 action=move(direction='west') hp=100.0 - step=039 action=door(target_id='door_1', door_state='close') hp=100.0 - step=040 action=wait() hp=100.0 - step=041 action=door(target_id='door_1', door_state='close') hp=100.0 - step=042 action=move(direction='east') hp=100.0 - step=043 action=door(target_id='door_1', door_state='close') hp=100.0 - step=044 action=wait() hp=100.0 - step=045 action=door(target_id='door_1', door_state='close') hp=100.0 - step=046 action=wait() hp=100.0 - step=047 action=wait() hp=100.0 - step=048 action=door(target_id='door_1', door_state='close') hp=100.0 - step=049 action=move(direction='west') hp=100.0 - step=050 action=wait() hp=100.0 - step=051 action=move(direction='east') hp=100.0 - step=052 action=wait() hp=100.0 - step=053 action=move(direction='west') hp=100.0 - step=054 action=door(target_id='door_1', door_state='close') hp=100.0 - step=055 action=move(direction='east') hp=100.0 - step=056 action=wait() hp=100.0 - step=057 action=move(direction='west') hp=100.0 - step=058 action=wait() hp=100.0 - step=059 action=wait() hp=100.0 - step=060 action=door(target_id='door_1', door_state='close') hp=100.0 - step=061 action=door(target_id='door_1', door_state='close') hp=100.0 - step=062 action=move(direction='east') hp=100.0 - step=063 action=wait() hp=100.0 - step=064 action=move(direction='west') hp=100.0 - step=065 action=door(target_id='door_1', door_state='close') hp=100.0 - step=066 action=wait() hp=100.0 - step=067 action=wait() hp=100.0 - step=068 action=wait() hp=100.0 - step=069 action=door(target_id='door_1', door_state='close') hp=100.0 - step=070 action=wait() hp=100.0 - step=071 action=wait() hp=100.0 - step=072 action=door(target_id='door_1', door_state='close') hp=100.0 - step=073 action=door(target_id='door_1', door_state='close') hp=100.0 - step=074 action=move(direction='south') hp=100.0 - step=075 action=wait() hp=100.0 - step=076 action=move(direction='north') hp=100.0 - step=077 action=door(target_id='door_1', door_state='close') hp=100.0 - step=078 action=wait() hp=100.0 - step=079 action=move(direction='south') hp=100.0 - step=080 action=move(direction='north') hp=100.0 - step=081 action=wait() hp=100.0 - step=082 action=door(target_id='door_1', door_state='close') hp=100.0 - step=083 action=wait() hp=100.0 - step=084 action=wait() hp=100.0 - step=085 action=wait() hp=100.0 - step=086 action=wait() hp=100.0 - step=087 action=wait() hp=100.0 - step=088 action=door(target_id='door_1', door_state='close') hp=100.0 - step=089 action=door(target_id='door_1', door_state='close') hp=100.0 - step=090 action=door(target_id='door_1', door_state='close') hp=100.0 - step=091 action=door(target_id='door_1', door_state='close') hp=100.0 - step=092 action=move(direction='east') hp=100.0 - step=093 action=door(target_id='door_1', door_state='close') hp=100.0 - step=094 action=wait() hp=100.0 - step=095 action=move(direction='west') hp=100.0 - step=096 action=door(target_id='door_1', door_state='close') hp=100.0 - step=097 action=wait() hp=100.0 - step=098 action=door(target_id='door_1', door_state='close') hp=100.0 - step=099 action=door(target_id='door_1', door_state='close') hp=100.0 - step=100 action=door(target_id='door_1', door_state='close') hp=100.0 - step=101 action=door(target_id='door_1', door_state='close') hp=100.0 - step=102 action=door(target_id='door_1', door_state='close') hp=100.0 - step=103 action=door(target_id='door_1', door_state='close') hp=100.0 - step=104 action=move(direction='south') hp=100.0 - step=105 action=move(direction='north') hp=100.0 - step=106 action=wait() hp=100.0 - step=107 action=move(direction='east') hp=100.0 - step=108 action=move(direction='east') hp=100.0 - step=109 action=move(direction='west') hp=100.0 - step=110 action=move(direction='west') hp=100.0 - step=111 action=door(target_id='door_1', door_state='close') hp=100.0 - step=112 action=wait() hp=100.0 - step=113 action=wait() hp=100.0 - step=114 action=move(direction='east') hp=100.0 - step=115 action=door(target_id='door_1', door_state='close') hp=100.0 - step=116 action=door(target_id='door_1', door_state='close') hp=100.0 - step=117 action=move(direction='west') hp=100.0 - step=118 action=door(target_id='door_1', door_state='close') hp=100.0 - step=119 action=door(target_id='door_1', door_state='close') hp=100.0 - step=120 action=move(direction='east') hp=100.0 - step=121 action=move(direction='west') hp=100.0 - step=122 action=door(target_id='door_1', door_state='close') hp=100.0 - step=123 action=wait() hp=100.0 - step=124 action=wait() hp=100.0 - step=125 action=door(target_id='door_1', door_state='close') hp=100.0 - step=126 action=wait() hp=100.0 - step=127 action=move(direction='south') hp=100.0 - step=128 action=wait() hp=100.0 - step=129 action=move(direction='north') hp=100.0 - step=130 action=door(target_id='door_1', door_state='close') hp=100.0 - step=131 action=move(direction='south') hp=100.0 - step=132 action=wait() hp=100.0 - step=133 action=move(direction='east') hp=100.0 - step=134 action=move(direction='west') hp=100.0 - step=135 action=move(direction='north') hp=100.0 - step=136 action=door(target_id='door_1', door_state='close') hp=100.0 - step=137 action=wait() hp=100.0 - step=138 action=move(direction='east') hp=100.0 - step=139 action=door(target_id='door_1', door_state='close') hp=100.0 - step=140 action=door(target_id='door_1', door_state='close') hp=100.0 - step=141 action=move(direction='east') hp=100.0 - step=142 action=move(direction='south') hp=100.0 - step=143 action=move(direction='west') hp=100.0 - step=144 action=move(direction='west') hp=100.0 - step=145 action=wait() hp=100.0 - step=146 action=move(direction='north') hp=100.0 - step=147 action=door(target_id='door_1', door_state='close') hp=100.0 - step=148 action=wait() hp=100.0 - step=149 action=door(target_id='door_1', door_state='close') hp=100.0 - step=150 action=move(direction='east') hp=100.0 - step=151 action=door(target_id='door_1', door_state='close') hp=100.0 - step=152 action=door(target_id='door_1', door_state='close') hp=100.0 - step=153 action=door(target_id='door_1', door_state='close') hp=100.0 - step=154 action=move(direction='west') hp=100.0 - step=155 action=door(target_id='door_1', door_state='close') hp=100.0 - step=156 action=wait() hp=100.0 - step=157 action=wait() hp=100.0 - step=158 action=door(target_id='door_1', door_state='close') hp=100.0 - step=159 action=wait() hp=100.0 - step=160 action=move(direction='east') hp=100.0 - step=161 action=move(direction='west') hp=100.0 - step=162 action=wait() hp=100.0 - step=163 action=wait() hp=100.0 - step=164 action=door(target_id='door_1', door_state='close') hp=100.0 - step=165 action=wait() hp=100.0 - step=166 action=door(target_id='door_1', door_state='close') hp=100.0 - step=167 action=wait() hp=100.0 - step=168 action=wait() hp=100.0 - step=169 action=wait() hp=100.0 - step=170 action=move(direction='south') hp=100.0 - step=171 action=move(direction='north') hp=100.0 - step=172 action=wait() hp=100.0 - step=173 action=wait() hp=100.0 - step=174 action=wait() hp=100.0 - step=175 action=wait() hp=100.0 - step=176 action=move(direction='east') hp=100.0 - step=177 action=door(target_id='door_1', door_state='close') hp=100.0 - step=178 action=move(direction='west') hp=100.0 - step=179 action=wait() hp=100.0 - step=180 action=door(target_id='door_1', door_state='close') hp=100.0 - step=181 action=wait() hp=100.0 - step=182 action=move(direction='east') hp=100.0 - step=183 action=move(direction='west') hp=100.0 - step=184 action=door(target_id='door_1', door_state='close') hp=100.0 - step=185 action=door(target_id='door_1', door_state='close') hp=100.0 - step=186 action=door(target_id='door_1', door_state='close') hp=100.0 - step=187 action=door(target_id='door_1', door_state='close') hp=100.0 - step=188 action=door(target_id='door_1', door_state='close') hp=100.0 - step=189 action=door(target_id='door_1', door_state='close') hp=100.0 - step=190 action=wait() hp=100.0 - step=191 action=door(target_id='door_1', door_state='close') hp=100.0 - step=192 action=wait() hp=100.0 - step=193 action=move(direction='south') hp=100.0 - step=194 action=move(direction='north') hp=100.0 - step=195 action=door(target_id='door_1', door_state='close') hp=100.0 - step=196 action=wait() hp=100.0 - step=197 action=wait() hp=100.0 - step=198 action=door(target_id='door_1', door_state='close') hp=100.0 - step=199 action=door(target_id='door_1', door_state='close') hp=100.0 - step=200 action=door(target_id='door_1', door_state='close') hp=100.0 -ep=0046 [easy ] steps=200 reward= -18.080 evac=0 hp=100.0 suc30=0.90 r30= +14.16 t=17s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='south') hp=100.0 - step=003 action=move(direction='south') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='north') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='east') hp=100.0 - step=011 action=wait() hp=100.0 - step=012 action=move(direction='west') hp=100.0 - step=013 action=move(direction='north') hp= 99.5 - step=014 action=wait() hp= 99.5 - step=015 action=move(direction='north') hp= 99.5 - step=016 action=move(direction='north') hp= 99.5 - step=017 action=move(direction='north') hp= 99.5 - step=018 action=move(direction='west') hp= 99.5 - step=019 action=move(direction='west') hp= 99.5 - step=020 action=move(direction='north') hp= 99.5 - step=021 action=move(direction='north') hp= 99.5 - step=022 action=move(direction='east') hp= 99.5 - step=023 action=move(direction='west') hp= 99.5 - step=024 action=move(direction='west') hp= 99.5 - step=025 action=wait() hp= 99.5 - step=026 action=wait() hp= 99.5 - step=027 action=move(direction='east') hp= 99.5 - step=028 action=wait() hp= 99.5 - step=029 action=wait() hp= 99.5 - step=030 action=move(direction='west') hp= 99.5 - step=031 action=move(direction='west') hp= 99.5 - step=032 action=wait() hp= 99.5 - step=033 action=move(direction='west') hp= 99.5 - step=034 action=move(direction='west') hp= 99.5 - step=035 action=move(direction='west') hp= 99.5 - step=036 action=wait() hp= 99.5 - step=037 action=wait() hp= 99.5 - step=038 action=move(direction='east') hp= 99.5 - step=039 action=move(direction='west') hp= 99.5 - step=040 action=wait() hp= 99.5 - step=041 action=move(direction='west') hp= 99.5 -ep=0047 [easy ] steps=041 reward= +19.012 evac=1 hp= 99.5 suc30=0.90 r30= +14.13 t=17s - step=001 action=move(direction='north') hp=100.0 - step=002 action=wait() hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=wait() hp=100.0 - step=006 action=wait() hp=100.0 - step=007 action=move(direction='north') hp=100.0 - step=008 action=wait() hp=100.0 - step=009 action=move(direction='north') hp=100.0 - step=010 action=wait() hp=100.0 - step=011 action=move(direction='north') hp=100.0 -ep=0048 [easy ] steps=011 reward= +18.160 evac=1 hp=100.0 suc30=0.90 r30= +14.25 t=17s - step=001 action=move(direction='north') hp=100.0 - step=002 action=wait() hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=move(direction='east') hp=100.0 - step=007 action=move(direction='north') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=wait() hp=100.0 - step=010 action=door(target_id='door_2', door_state='close') hp=100.0 - step=011 action=door(target_id='door_2', door_state='close') hp=100.0 - step=012 action=door(target_id='door_2', door_state='close') hp=100.0 - step=013 action=move(direction='east') hp=100.0 - step=014 action=move(direction='west') hp=100.0 - step=015 action=move(direction='east') hp=100.0 - step=016 action=move(direction='east') hp=100.0 - step=017 action=door(target_id='door_2', door_state='close') hp=100.0 - step=018 action=move(direction='west') hp=100.0 - step=019 action=door(target_id='door_2', door_state='close') hp=100.0 - step=020 action=door(target_id='door_2', door_state='close') hp=100.0 - step=021 action=wait() hp=100.0 - step=022 action=door(target_id='door_2', door_state='close') hp=100.0 - step=023 action=wait() hp=100.0 - step=024 action=move(direction='west') hp=100.0 - step=025 action=door(target_id='door_2', door_state='close') hp=100.0 - step=026 action=door(target_id='door_2', door_state='close') hp=100.0 - step=027 action=wait() hp=100.0 - step=028 action=wait() hp=100.0 - step=029 action=door(target_id='door_2', door_state='close') hp=100.0 - step=030 action=wait() hp=100.0 - step=031 action=wait() hp=100.0 - step=032 action=move(direction='south') hp=100.0 - step=033 action=door(target_id='door_2', door_state='close') hp=100.0 - step=034 action=move(direction='north') hp=100.0 - step=035 action=door(target_id='door_2', door_state='close') hp=100.0 - step=036 action=wait() hp=100.0 - step=037 action=move(direction='east') hp=100.0 - step=038 action=move(direction='west') hp=100.0 - step=039 action=door(target_id='door_2', door_state='close') hp=100.0 - step=040 action=move(direction='east') hp=100.0 - step=041 action=move(direction='west') hp=100.0 - step=042 action=wait() hp=100.0 - step=043 action=wait() hp=100.0 - step=044 action=door(target_id='door_2', door_state='close') hp=100.0 - step=045 action=move(direction='south') hp=100.0 - step=046 action=wait() hp= 99.5 - step=047 action=wait() hp= 99.0 - step=048 action=move(direction='north') hp= 98.5 - step=049 action=wait() hp= 98.5 - step=050 action=move(direction='south') hp= 98.0 - step=051 action=move(direction='east') hp= 97.5 - step=052 action=move(direction='west') hp= 97.0 - step=053 action=move(direction='north') hp= 96.5 - step=054 action=door(target_id='door_2', door_state='close') hp= 96.0 - step=055 action=door(target_id='door_2', door_state='close') hp= 95.5 - step=056 action=door(target_id='door_2', door_state='close') hp= 95.0 - step=057 action=door(target_id='door_2', door_state='close') hp= 94.5 - step=058 action=wait() hp= 94.0 - step=059 action=door(target_id='door_2', door_state='close') hp= 93.5 - step=060 action=wait() hp= 93.0 - step=061 action=door(target_id='door_2', door_state='close') hp= 92.5 - step=062 action=door(target_id='door_2', door_state='close') hp= 92.0 - step=063 action=wait() hp= 91.5 - step=064 action=move(direction='east') hp= 91.0 - step=065 action=door(target_id='door_2', door_state='close') hp= 90.5 - step=066 action=move(direction='east') hp= 90.0 - step=067 action=move(direction='west') hp= 89.5 - step=068 action=wait() hp= 89.0 - step=069 action=wait() hp= 88.5 - step=070 action=move(direction='west') hp= 88.0 - step=071 action=move(direction='east') hp= 87.5 - step=072 action=door(target_id='door_2', door_state='close') hp= 87.5 - step=073 action=wait() hp= 87.5 - step=074 action=door(target_id='door_2', door_state='close') hp= 87.5 - step=075 action=door(target_id='door_2', door_state='close') hp= 87.5 - step=076 action=door(target_id='door_2', door_state='close') hp= 87.5 - step=077 action=wait() hp= 87.5 - step=078 action=move(direction='east') hp= 87.5 - step=079 action=move(direction='west') hp= 87.5 - step=080 action=door(target_id='door_2', door_state='close') hp= 87.5 - step=081 action=move(direction='south') hp= 87.5 - step=082 action=move(direction='west') hp= 87.5 - step=083 action=move(direction='east') hp= 87.5 - step=084 action=move(direction='west') hp= 87.5 - step=085 action=move(direction='north') hp= 87.5 - step=086 action=move(direction='east') hp= 87.5 - step=087 action=door(target_id='door_2', door_state='close') hp= 87.5 - step=088 action=door(target_id='door_2', door_state='close') hp= 87.5 - step=089 action=move(direction='east') hp= 87.5 - step=090 action=move(direction='west') hp= 87.5 - step=091 action=wait() hp= 87.5 - step=092 action=move(direction='west') hp= 87.5 - step=093 action=move(direction='south') hp= 87.5 - step=094 action=move(direction='north') hp= 87.5 - step=095 action=door(target_id='door_2', door_state='close') hp= 87.5 - step=096 action=wait() hp= 87.5 - step=097 action=wait() hp= 87.5 - step=098 action=wait() hp= 87.5 - step=099 action=wait() hp= 87.5 - step=100 action=wait() hp= 87.5 - step=101 action=wait() hp= 87.5 - step=102 action=door(target_id='door_2', door_state='close') hp= 87.5 - step=103 action=wait() hp= 87.5 - step=104 action=wait() hp= 87.5 - step=105 action=move(direction='south') hp= 87.5 - step=106 action=move(direction='north') hp= 87.5 - step=107 action=door(target_id='door_2', door_state='close') hp= 87.5 - step=108 action=wait() hp= 87.5 - step=109 action=move(direction='south') hp= 87.5 - step=110 action=move(direction='north') hp= 87.5 - step=111 action=wait() hp= 87.5 - step=112 action=door(target_id='door_2', door_state='close') hp= 87.5 - step=113 action=door(target_id='door_2', door_state='close') hp= 87.5 - step=114 action=wait() hp= 87.5 - step=115 action=wait() hp= 87.5 - step=116 action=wait() hp= 87.5 - step=117 action=door(target_id='door_2', door_state='close') hp= 87.5 - step=118 action=wait() hp= 87.5 - step=119 action=door(target_id='door_2', door_state='close') hp= 87.5 - step=120 action=wait() hp= 87.5 - step=121 action=move(direction='south') hp= 87.5 - step=122 action=wait() hp= 87.5 - step=123 action=move(direction='north') hp= 87.5 - step=124 action=move(direction='south') hp= 87.5 - step=125 action=wait() hp= 87.5 - step=126 action=move(direction='north') hp= 87.5 - step=127 action=wait() hp= 87.5 - step=128 action=door(target_id='door_2', door_state='close') hp= 87.5 - step=129 action=door(target_id='door_2', door_state='close') hp= 87.5 - step=130 action=wait() hp= 87.5 - step=131 action=wait() hp= 87.5 - step=132 action=door(target_id='door_2', door_state='close') hp= 87.5 - step=133 action=wait() hp= 87.5 - step=134 action=wait() hp= 87.5 - step=135 action=door(target_id='door_2', door_state='close') hp= 87.5 - step=136 action=wait() hp= 87.5 - step=137 action=wait() hp= 87.5 - step=138 action=wait() hp= 87.5 - step=139 action=wait() hp= 87.5 - step=140 action=door(target_id='door_2', door_state='close') hp= 87.5 - step=141 action=door(target_id='door_2', door_state='close') hp= 87.5 - step=142 action=move(direction='south') hp= 87.5 - step=143 action=move(direction='north') hp= 87.5 - step=144 action=door(target_id='door_2', door_state='close') hp= 87.5 - step=145 action=wait() hp= 87.5 - step=146 action=wait() hp= 87.5 - step=147 action=wait() hp= 87.5 - step=148 action=move(direction='south') hp= 87.5 - step=149 action=move(direction='north') hp= 87.5 - step=150 action=wait() hp= 87.5 - step=151 action=wait() hp= 87.5 - step=152 action=move(direction='east') hp= 87.5 - step=153 action=wait() hp= 87.5 - step=154 action=move(direction='west') hp= 87.5 - step=155 action=wait() hp= 87.5 - step=156 action=wait() hp= 87.5 - step=157 action=wait() hp= 87.5 - step=158 action=door(target_id='door_2', door_state='close') hp= 87.5 - step=159 action=wait() hp= 87.5 - step=160 action=door(target_id='door_2', door_state='close') hp= 87.5 - step=161 action=wait() hp= 87.5 - step=162 action=wait() hp= 87.5 - step=163 action=wait() hp= 87.5 - step=164 action=door(target_id='door_2', door_state='close') hp= 87.5 - step=165 action=wait() hp= 87.5 - step=166 action=door(target_id='door_2', door_state='close') hp= 87.5 - step=167 action=wait() hp= 87.5 - step=168 action=wait() hp= 87.5 - step=169 action=wait() hp= 87.5 - step=170 action=wait() hp= 87.5 - step=171 action=door(target_id='door_2', door_state='close') hp= 87.5 - step=172 action=wait() hp= 87.5 - step=173 action=wait() hp= 87.5 - step=174 action=move(direction='south') hp= 87.5 - step=175 action=move(direction='north') hp= 87.5 - step=176 action=wait() hp= 87.5 - step=177 action=move(direction='south') hp= 87.5 - step=178 action=move(direction='north') hp= 87.5 - step=179 action=wait() hp= 87.5 - step=180 action=door(target_id='door_2', door_state='close') hp= 87.5 - step=181 action=door(target_id='door_2', door_state='close') hp= 87.5 - step=182 action=door(target_id='door_2', door_state='close') hp= 87.5 - step=183 action=door(target_id='door_2', door_state='close') hp= 87.5 - step=184 action=door(target_id='door_2', door_state='close') hp= 87.5 - step=185 action=door(target_id='door_2', door_state='close') hp= 87.5 - step=186 action=wait() hp= 87.5 - step=187 action=wait() hp= 87.5 - step=188 action=wait() hp= 87.5 - step=189 action=door(target_id='door_2', door_state='close') hp= 87.5 - step=190 action=door(target_id='door_2', door_state='close') hp= 87.5 - step=191 action=wait() hp= 87.5 - step=192 action=wait() hp= 87.5 - step=193 action=wait() hp= 87.5 - step=194 action=wait() hp= 87.5 - step=195 action=move(direction='east') hp= 87.5 - step=196 action=move(direction='west') hp= 87.5 - step=197 action=door(target_id='door_2', door_state='close') hp= 87.5 - step=198 action=wait() hp= 87.5 - step=199 action=wait() hp= 87.5 - step=200 action=wait() hp= 87.5 -ep=0049 [easy ] steps=200 reward= -20.515 evac=0 hp= 87.5 suc30=0.87 r30= +12.99 t=18s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='east') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='east') hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='east') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=wait() hp=100.0 - step=011 action=wait() hp=100.0 - step=012 action=move(direction='west') hp=100.0 - step=013 action=move(direction='west') hp=100.0 - step=014 action=move(direction='west') hp=100.0 - step=015 action=wait() hp=100.0 - step=016 action=wait() hp=100.0 - step=017 action=wait() hp=100.0 - step=018 action=move(direction='west') hp=100.0 - step=019 action=wait() hp=100.0 - step=020 action=move(direction='west') hp=100.0 - step=021 action=move(direction='west') hp=100.0 - step=022 action=move(direction='west') hp=100.0 - step=023 action=wait() hp=100.0 - step=024 action=move(direction='west') hp=100.0 - step=025 action=move(direction='west') hp=100.0 - step=026 action=move(direction='west') hp=100.0 - step=027 action=move(direction='west') hp=100.0 - step=028 action=move(direction='west') hp=100.0 -ep=0050 [easy ] steps=028 reward= +20.040 evac=1 hp=100.0 suc30=0.87 r30= +13.10 t=18s - >> PPO update samples=flushed pi_loss=-0.0001 v_loss=20.0428 entropy=1.1850 kl=0.0004 clip%=0.00 lr=2.32e-04 - [ckpt] saved -> artifacts/pyre_ppo_http_ckpt.pt - step=001 action=move(direction='west') hp=100.0 - step=002 action=door(target_id='door_2', door_state='open') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 -ep=0051 [easy ] steps=005 reward= +17.990 evac=1 hp=100.0 suc30=0.87 r30= +13.29 t=19s - step=001 action=move(direction='west') hp=100.0 - step=002 action=door(target_id='door_6', door_state='open') hp=100.0 - step=003 action=door(target_id='door_7', door_state='close') hp=100.0 - step=004 action=door(target_id='door_7', door_state='close') hp=100.0 - step=005 action=door(target_id='door_7', door_state='close') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=wait() hp=100.0 - step=010 action=move(direction='north') hp=100.0 - step=011 action=move(direction='north') hp=100.0 - step=012 action=wait() hp=100.0 - step=013 action=wait() hp=100.0 - step=014 action=move(direction='west') hp=100.0 - step=015 action=move(direction='north') hp=100.0 - step=016 action=wait() hp=100.0 - step=017 action=door(target_id='door_1', door_state='close') hp=100.0 - step=018 action=move(direction='south') hp=100.0 - step=019 action=door(target_id='door_1', door_state='close') hp=100.0 - step=020 action=door(target_id='door_1', door_state='close') hp=100.0 - step=021 action=wait() hp=100.0 - step=022 action=move(direction='west') hp=100.0 - step=023 action=move(direction='north') hp=100.0 - step=024 action=move(direction='west') hp=100.0 - step=025 action=move(direction='west') hp=100.0 - step=026 action=door(target_id='door_1', door_state='close') hp=100.0 - step=027 action=door(target_id='door_1', door_state='open') hp=100.0 - step=028 action=move(direction='south') hp=100.0 - step=029 action=door(target_id='door_1', door_state='close') hp= 99.5 - step=030 action=move(direction='west') hp= 99.0 -ep=0052 [easy ] steps=030 reward= +18.365 evac=1 hp= 99.0 suc30=0.87 r30= +13.34 t=19s - step=001 action=move(direction='north') hp=100.0 - step=002 action=wait() hp=100.0 - step=003 action=wait() hp=100.0 - step=004 action=move(direction='south') hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=move(direction='north') hp=100.0 - step=007 action=move(direction='west') hp=100.0 -ep=0053 [easy ] steps=007 reward= +17.570 evac=1 hp=100.0 suc30=0.87 r30= +13.28 t=19s - step=001 action=move(direction='west') hp=100.0 - step=002 action=door(target_id='door_1', door_state='close') hp=100.0 - step=003 action=move(direction='east') hp=100.0 - step=004 action=door(target_id='door_1', door_state='close') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=wait() hp=100.0 - step=007 action=move(direction='west') hp=100.0 -ep=0054 [easy ] steps=007 reward= +17.210 evac=1 hp=100.0 suc30=0.87 r30= +13.24 t=19s - step=001 action=wait() hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=door(target_id='door_1', door_state='close') hp=100.0 - step=007 action=move(direction='north') hp=100.0 - step=008 action=wait() hp=100.0 - step=009 action=move(direction='north') hp=100.0 - step=010 action=door(target_id='door_1', door_state='close') hp=100.0 - step=011 action=wait() hp=100.0 - step=012 action=move(direction='east') hp=100.0 - step=013 action=move(direction='west') hp=100.0 - step=014 action=move(direction='east') hp=100.0 - step=015 action=door(target_id='door_5', door_state='close') hp=100.0 - step=016 action=move(direction='west') hp=100.0 - step=017 action=wait() hp=100.0 - step=018 action=wait() hp=100.0 - step=019 action=wait() hp=100.0 - step=020 action=move(direction='south') hp=100.0 - step=021 action=move(direction='west') hp=100.0 -ep=0055 [easy ] steps=021 reward= +18.180 evac=1 hp=100.0 suc30=0.87 r30= +13.23 t=19s - >> PPO update samples=flushed pi_loss=+0.0005 v_loss=20.9054 entropy=1.3904 kl=0.0001 clip%=0.00 lr=2.26e-04 - step=001 action=wait() hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=wait() hp=100.0 - step=004 action=wait() hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=move(direction='east') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=wait() hp=100.0 - step=009 action=move(direction='east') hp=100.0 - step=010 action=move(direction='north') hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=wait() hp=100.0 - step=013 action=move(direction='north') hp=100.0 - step=014 action=move(direction='north') hp=100.0 - step=015 action=move(direction='east') hp=100.0 - step=016 action=move(direction='west') hp=100.0 - step=017 action=move(direction='north') hp=100.0 - step=018 action=wait() hp=100.0 - step=019 action=move(direction='north') hp=100.0 - step=020 action=wait() hp=100.0 - step=021 action=move(direction='south') hp=100.0 - step=022 action=wait() hp=100.0 - step=023 action=move(direction='north') hp=100.0 - step=024 action=move(direction='east') hp=100.0 - step=025 action=move(direction='west') hp=100.0 - step=026 action=wait() hp=100.0 - step=027 action=wait() hp=100.0 - step=028 action=move(direction='north') hp=100.0 - step=029 action=wait() hp=100.0 - step=030 action=wait() hp=100.0 - step=031 action=move(direction='north') hp=100.0 - step=032 action=move(direction='west') hp=100.0 -ep=0056 [easy ] steps=032 reward= +17.970 evac=1 hp=100.0 suc30=0.87 r30= +13.17 t=19s - step=001 action=wait() hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=wait() hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 -ep=0057 [easy ] steps=005 reward= +17.440 evac=1 hp=100.0 suc30=0.87 r30= +13.14 t=19s - step=001 action=move(direction='east') hp=100.0 - step=002 action=wait() hp=100.0 - step=003 action=door(target_id='door_2', door_state='close') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='north') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=wait() hp=100.0 - step=009 action=door(target_id='door_2', door_state='close') hp=100.0 - step=010 action=move(direction='west') hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=wait() hp=100.0 - step=013 action=move(direction='west') hp=100.0 - step=014 action=wait() hp=100.0 - step=015 action=door(target_id='door_1', door_state='open') hp=100.0 - step=016 action=move(direction='east') hp=100.0 - step=017 action=move(direction='north') hp=100.0 - step=018 action=wait() hp=100.0 - step=019 action=wait() hp=100.0 - step=020 action=door(target_id='door_1', door_state='close') hp=100.0 - step=021 action=move(direction='north') hp=100.0 - step=022 action=move(direction='west') hp=100.0 - step=023 action=move(direction='north') hp=100.0 - step=024 action=door(target_id='door_1', door_state='open') hp=100.0 - step=025 action=move(direction='north') hp=100.0 - step=026 action=move(direction='south') hp=100.0 - step=027 action=move(direction='south') hp=100.0 - step=028 action=move(direction='north') hp=100.0 - step=029 action=move(direction='north') hp=100.0 - step=030 action=move(direction='south') hp=100.0 - step=031 action=wait() hp=100.0 - step=032 action=move(direction='east') hp=100.0 - step=033 action=move(direction='west') hp=100.0 - step=034 action=move(direction='east') hp=100.0 - step=035 action=wait() hp=100.0 - step=036 action=move(direction='south') hp=100.0 - step=037 action=move(direction='north') hp=100.0 - step=038 action=move(direction='west') hp=100.0 - step=039 action=move(direction='north') hp=100.0 - step=040 action=wait() hp=100.0 - step=041 action=wait() hp=100.0 - step=042 action=move(direction='south') hp=100.0 - step=043 action=move(direction='north') hp=100.0 - step=044 action=move(direction='east') hp=100.0 - step=045 action=wait() hp=100.0 - step=046 action=move(direction='west') hp=100.0 - step=047 action=wait() hp=100.0 - step=048 action=move(direction='south') hp=100.0 - step=049 action=move(direction='east') hp=100.0 - step=050 action=wait() hp=100.0 - step=051 action=wait() hp=100.0 - step=052 action=move(direction='north') hp=100.0 - step=053 action=move(direction='west') hp=100.0 - step=054 action=wait() hp=100.0 - step=055 action=door(target_id='door_1', door_state='open') hp=100.0 - step=056 action=move(direction='south') hp=100.0 - step=057 action=wait() hp=100.0 - step=058 action=wait() hp=100.0 - step=059 action=move(direction='north') hp=100.0 - step=060 action=move(direction='east') hp=100.0 - step=061 action=move(direction='west') hp=100.0 - step=062 action=wait() hp=100.0 - step=063 action=move(direction='east') hp=100.0 - step=064 action=move(direction='west') hp=100.0 - step=065 action=wait() hp=100.0 - step=066 action=door(target_id='door_1', door_state='open') hp=100.0 - step=067 action=wait() hp=100.0 - step=068 action=move(direction='east') hp=100.0 - step=069 action=move(direction='south') hp=100.0 - step=070 action=move(direction='north') hp=100.0 - step=071 action=move(direction='west') hp=100.0 - step=072 action=move(direction='south') hp=100.0 - step=073 action=move(direction='north') hp=100.0 - step=074 action=move(direction='east') hp=100.0 - step=075 action=move(direction='west') hp=100.0 - step=076 action=wait() hp=100.0 - step=077 action=wait() hp=100.0 - step=078 action=wait() hp=100.0 - step=079 action=wait() hp=100.0 - step=080 action=move(direction='south') hp=100.0 - step=081 action=door(target_id='door_1', door_state='open') hp=100.0 - step=082 action=move(direction='north') hp=100.0 - step=083 action=wait() hp=100.0 - step=084 action=move(direction='south') hp=100.0 - step=085 action=move(direction='north') hp=100.0 - step=086 action=wait() hp=100.0 - step=087 action=wait() hp=100.0 - step=088 action=wait() hp=100.0 - step=089 action=wait() hp=100.0 - step=090 action=wait() hp=100.0 - step=091 action=move(direction='south') hp=100.0 - step=092 action=wait() hp=100.0 - step=093 action=move(direction='north') hp=100.0 - step=094 action=move(direction='south') hp=100.0 - step=095 action=move(direction='south') hp=100.0 - step=096 action=move(direction='north') hp=100.0 - step=097 action=wait() hp=100.0 - step=098 action=move(direction='north') hp=100.0 - step=099 action=wait() hp=100.0 - step=100 action=wait() hp=100.0 - step=101 action=wait() hp=100.0 - step=102 action=door(target_id='door_1', door_state='open') hp=100.0 - step=103 action=door(target_id='door_1', door_state='open') hp=100.0 - step=104 action=wait() hp=100.0 - step=105 action=wait() hp=100.0 - step=106 action=move(direction='east') hp=100.0 - step=107 action=door(target_id='door_1', door_state='open') hp=100.0 - step=108 action=move(direction='east') hp=100.0 - step=109 action=move(direction='west') hp=100.0 - step=110 action=move(direction='west') hp=100.0 - step=111 action=wait() hp=100.0 - step=112 action=move(direction='south') hp=100.0 - step=113 action=move(direction='north') hp=100.0 - step=114 action=wait() hp=100.0 - step=115 action=wait() hp=100.0 - step=116 action=move(direction='south') hp=100.0 - step=117 action=move(direction='north') hp=100.0 - step=118 action=wait() hp=100.0 - step=119 action=wait() hp=100.0 - step=120 action=wait() hp=100.0 - step=121 action=move(direction='south') hp=100.0 - step=122 action=move(direction='south') hp=100.0 - step=123 action=move(direction='east') hp=100.0 - step=124 action=move(direction='north') hp=100.0 - step=125 action=wait() hp=100.0 - step=126 action=wait() hp=100.0 - step=127 action=move(direction='west') hp=100.0 - step=128 action=move(direction='north') hp=100.0 - step=129 action=move(direction='east') hp=100.0 - step=130 action=move(direction='east') hp=100.0 - step=131 action=move(direction='west') hp=100.0 - step=132 action=move(direction='west') hp=100.0 - step=133 action=move(direction='south') hp=100.0 - step=134 action=move(direction='north') hp=100.0 - step=135 action=door(target_id='door_1', door_state='open') hp=100.0 - step=136 action=move(direction='east') hp=100.0 - step=137 action=move(direction='east') hp=100.0 - step=138 action=move(direction='west') hp=100.0 - step=139 action=wait() hp=100.0 - step=140 action=move(direction='west') hp=100.0 - step=141 action=move(direction='east') hp=100.0 - step=142 action=move(direction='west') hp=100.0 - step=143 action=wait() hp=100.0 - step=144 action=move(direction='south') hp=100.0 - step=145 action=move(direction='north') hp=100.0 - step=146 action=move(direction='east') hp=100.0 - step=147 action=move(direction='west') hp=100.0 - step=148 action=move(direction='south') hp=100.0 - step=149 action=move(direction='north') hp=100.0 - step=150 action=wait() hp=100.0 - step=151 action=move(direction='east') hp=100.0 - step=152 action=move(direction='west') hp=100.0 - step=153 action=wait() hp=100.0 - step=154 action=move(direction='south') hp=100.0 - step=155 action=move(direction='north') hp=100.0 - step=156 action=move(direction='south') hp=100.0 - step=157 action=door(target_id='door_1', door_state='open') hp=100.0 - step=158 action=move(direction='south') hp=100.0 - step=159 action=move(direction='north') hp=100.0 - step=160 action=move(direction='north') hp=100.0 - step=161 action=wait() hp=100.0 - step=162 action=door(target_id='door_1', door_state='open') hp=100.0 - step=163 action=wait() hp=100.0 - step=164 action=wait() hp=100.0 - step=165 action=wait() hp=100.0 - step=166 action=move(direction='east') hp=100.0 - step=167 action=move(direction='west') hp=100.0 - step=168 action=wait() hp=100.0 - step=169 action=wait() hp=100.0 - step=170 action=wait() hp=100.0 - step=171 action=wait() hp=100.0 - step=172 action=door(target_id='door_1', door_state='open') hp=100.0 - step=173 action=wait() hp=100.0 - step=174 action=wait() hp=100.0 - step=175 action=wait() hp=100.0 - step=176 action=move(direction='south') hp=100.0 - step=177 action=move(direction='north') hp=100.0 - step=178 action=wait() hp=100.0 - step=179 action=wait() hp=100.0 - step=180 action=wait() hp=100.0 - step=181 action=wait() hp=100.0 - step=182 action=wait() hp=100.0 - step=183 action=wait() hp=100.0 - step=184 action=wait() hp=100.0 - step=185 action=wait() hp=100.0 - step=186 action=wait() hp=100.0 - step=187 action=wait() hp=100.0 - step=188 action=wait() hp=100.0 - step=189 action=wait() hp=100.0 - step=190 action=move(direction='south') hp=100.0 - step=191 action=wait() hp=100.0 - step=192 action=wait() hp=100.0 - step=193 action=wait() hp=100.0 - step=194 action=move(direction='south') hp=100.0 - step=195 action=move(direction='north') hp=100.0 - step=196 action=move(direction='north') hp=100.0 - step=197 action=wait() hp=100.0 - step=198 action=wait() hp=100.0 - step=199 action=wait() hp=100.0 - step=200 action=wait() hp=100.0 -ep=0058 [easy ] steps=200 reward= -19.920 evac=0 hp=100.0 suc30=0.83 r30= +12.01 t=20s - step=001 action=wait() hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='south') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=wait() hp=100.0 - step=007 action=move(direction='north') hp=100.0 - step=008 action=move(direction='south') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='north') hp=100.0 - step=011 action=wait() hp=100.0 - step=012 action=move(direction='north') hp=100.0 - step=013 action=wait() hp=100.0 - step=014 action=move(direction='north') hp=100.0 - step=015 action=move(direction='north') hp=100.0 - step=016 action=move(direction='west') hp=100.0 -ep=0059 [easy ] steps=016 reward= +18.780 evac=1 hp=100.0 suc30=0.83 r30= +11.95 t=20s - step=001 action=move(direction='west') hp=100.0 - step=002 action=door(target_id='door_2', door_state='close') hp=100.0 - step=003 action=door(target_id='door_1', door_state='open') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=door(target_id='door_2', door_state='close') hp=100.0 - step=006 action=door(target_id='door_2', door_state='close') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=door(target_id='door_1', door_state='open') hp=100.0 - step=009 action=wait() hp=100.0 - step=010 action=move(direction='west') hp=100.0 - step=011 action=wait() hp=100.0 - step=012 action=move(direction='south') hp=100.0 - step=013 action=door(target_id='door_1', door_state='open') hp=100.0 - step=014 action=wait() hp=100.0 - step=015 action=wait() hp=100.0 - step=016 action=move(direction='north') hp=100.0 - step=017 action=move(direction='west') hp=100.0 -ep=0060 [easy ] steps=017 reward= +17.830 evac=1 hp=100.0 suc30=0.83 r30= +11.96 t=20s - >> PPO update samples=flushed pi_loss=-0.0194 v_loss=18.6526 entropy=1.2473 kl=0.0044 clip%=0.05 lr=2.19e-04 - step=001 action=move(direction='north') hp=100.0 - step=002 action=move(direction='north') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='west') hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=move(direction='north') hp=100.0 - step=013 action=move(direction='north') hp=100.0 - step=014 action=move(direction='north') hp=100.0 - step=015 action=move(direction='north') hp=100.0 - step=016 action=move(direction='north') hp=100.0 - step=017 action=move(direction='north') hp=100.0 - step=018 action=move(direction='north') hp=100.0 - step=019 action=move(direction='west') hp=100.0 - step=001 action=move(direction='north') hp=100.0 - step=002 action=move(direction='north') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=move(direction='north') hp=100.0 - ** EVAL [medium] reward=+16.887 success=1.00 steps=9.0 - step=001 action=move(direction='north') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=wait() hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='north') hp=100.0 - step=010 action=door(target_id='door_1', door_state='close') hp=100.0 - step=011 action=move(direction='north') hp=100.0 - step=012 action=move(direction='west') hp=100.0 - step=013 action=wait() hp=100.0 - step=014 action=door(target_id='door_1', door_state='open') hp=100.0 - step=015 action=door(target_id='door_1', door_state='close') hp=100.0 - step=016 action=move(direction='east') hp=100.0 - step=017 action=move(direction='west') hp=100.0 - step=018 action=move(direction='north') hp=100.0 - step=019 action=move(direction='north') hp=100.0 - step=020 action=move(direction='east') hp=100.0 - step=021 action=move(direction='west') hp=100.0 - step=022 action=wait() hp=100.0 - step=023 action=move(direction='east') hp=100.0 - step=024 action=wait() hp=100.0 - step=025 action=door(target_id='door_1', door_state='open') hp=100.0 - step=026 action=move(direction='west') hp=100.0 - step=027 action=door(target_id='door_1', door_state='open') hp=100.0 - step=028 action=move(direction='east') hp=100.0 - step=029 action=door(target_id='door_1', door_state='open') hp=100.0 - step=030 action=move(direction='west') hp=100.0 - step=031 action=move(direction='east') hp=100.0 - step=032 action=move(direction='west') hp=100.0 - step=033 action=wait() hp=100.0 - step=034 action=door(target_id='door_1', door_state='open') hp=100.0 - step=035 action=wait() hp=100.0 - step=036 action=move(direction='east') hp=100.0 - step=037 action=move(direction='west') hp=100.0 - step=038 action=door(target_id='door_1', door_state='open') hp=100.0 - step=039 action=move(direction='east') hp=100.0 - step=040 action=move(direction='west') hp=100.0 - step=041 action=move(direction='south') hp=100.0 - step=042 action=wait() hp=100.0 - step=043 action=move(direction='north') hp=100.0 - step=044 action=move(direction='east') hp=100.0 - step=045 action=move(direction='south') hp=100.0 - step=046 action=door(target_id='door_1', door_state='open') hp=100.0 - step=047 action=move(direction='west') hp=100.0 - step=048 action=door(target_id='door_1', door_state='close') hp=100.0 - step=049 action=door(target_id='door_1', door_state='close') hp=100.0 - step=050 action=wait() hp=100.0 - step=051 action=move(direction='north') hp=100.0 - step=052 action=wait() hp=100.0 - step=053 action=move(direction='south') hp=100.0 - step=054 action=door(target_id='door_1', door_state='close') hp=100.0 - step=055 action=wait() hp=100.0 - step=056 action=move(direction='east') hp=100.0 - step=057 action=wait() hp=100.0 - step=058 action=wait() hp=100.0 - step=059 action=move(direction='north') hp=100.0 - step=060 action=move(direction='west') hp=100.0 - step=061 action=move(direction='south') hp=100.0 - step=062 action=wait() hp=100.0 - step=063 action=move(direction='north') hp=100.0 - step=064 action=wait() hp=100.0 - step=065 action=wait() hp=100.0 - step=066 action=door(target_id='door_1', door_state='close') hp=100.0 - step=067 action=wait() hp=100.0 - step=068 action=door(target_id='door_1', door_state='close') hp=100.0 - step=069 action=wait() hp=100.0 - step=070 action=wait() hp=100.0 - step=071 action=move(direction='south') hp=100.0 - step=072 action=move(direction='north') hp=100.0 - step=073 action=wait() hp=100.0 - step=074 action=move(direction='south') hp=100.0 - step=075 action=move(direction='north') hp=100.0 - step=076 action=door(target_id='door_1', door_state='close') hp=100.0 - step=077 action=wait() hp=100.0 - step=078 action=move(direction='south') hp=100.0 - step=079 action=move(direction='south') hp=100.0 - step=080 action=move(direction='north') hp=100.0 - step=081 action=wait() hp=100.0 - step=082 action=wait() hp=100.0 - step=083 action=move(direction='east') hp=100.0 - step=084 action=move(direction='north') hp=100.0 - step=085 action=wait() hp=100.0 - step=086 action=move(direction='west') hp=100.0 - step=087 action=door(target_id='door_1', door_state='close') hp=100.0 - step=088 action=wait() hp=100.0 - step=089 action=move(direction='east') hp=100.0 - step=090 action=move(direction='south') hp=100.0 - step=091 action=move(direction='west') hp=100.0 - step=092 action=move(direction='north') hp=100.0 - step=093 action=door(target_id='door_1', door_state='close') hp=100.0 - step=094 action=move(direction='east') hp=100.0 - step=095 action=door(target_id='door_1', door_state='close') hp=100.0 - step=096 action=move(direction='south') hp=100.0 - step=097 action=move(direction='north') hp=100.0 - step=098 action=door(target_id='door_1', door_state='close') hp=100.0 - step=099 action=move(direction='west') hp=100.0 - step=100 action=move(direction='east') hp=100.0 - step=101 action=move(direction='west') hp=100.0 - step=102 action=move(direction='east') hp=100.0 - step=103 action=move(direction='east') hp=100.0 - step=104 action=move(direction='west') hp=100.0 - step=105 action=move(direction='east') hp=100.0 - step=106 action=move(direction='west') hp=100.0 - step=107 action=move(direction='west') hp=100.0 - step=108 action=move(direction='south') hp=100.0 - step=109 action=move(direction='east') hp=100.0 - step=110 action=wait() hp=100.0 - step=111 action=move(direction='north') hp=100.0 - step=112 action=wait() hp=100.0 - step=113 action=move(direction='west') hp=100.0 - step=114 action=door(target_id='door_1', door_state='close') hp=100.0 - step=115 action=move(direction='east') hp=100.0 - step=116 action=move(direction='west') hp=100.0 - step=117 action=wait() hp=100.0 - step=118 action=wait() hp=100.0 - step=119 action=wait() hp=100.0 - step=120 action=wait() hp=100.0 - step=121 action=move(direction='east') hp=100.0 - step=122 action=move(direction='west') hp=100.0 - step=123 action=move(direction='south') hp=100.0 - step=124 action=move(direction='north') hp=100.0 - step=125 action=move(direction='east') hp=100.0 - step=126 action=move(direction='south') hp=100.0 - step=127 action=move(direction='north') hp=100.0 - step=128 action=move(direction='west') hp=100.0 - step=129 action=wait() hp=100.0 - step=130 action=wait() hp=100.0 - step=131 action=wait() hp=100.0 - step=132 action=door(target_id='door_1', door_state='close') hp=100.0 - step=133 action=door(target_id='door_1', door_state='close') hp=100.0 - step=134 action=move(direction='south') hp=100.0 - step=135 action=move(direction='north') hp=100.0 - step=136 action=wait() hp=100.0 - step=137 action=wait() hp=100.0 - step=138 action=door(target_id='door_1', door_state='close') hp=100.0 - step=139 action=wait() hp=100.0 - step=140 action=wait() hp=100.0 - step=141 action=door(target_id='door_1', door_state='close') hp=100.0 - step=142 action=move(direction='south') hp=100.0 - step=143 action=door(target_id='door_1', door_state='close') hp=100.0 - step=144 action=move(direction='south') hp=100.0 - step=145 action=door(target_id='door_1', door_state='close') hp=100.0 - step=146 action=move(direction='north') hp=100.0 - step=147 action=door(target_id='door_1', door_state='open') hp=100.0 - step=148 action=move(direction='north') hp=100.0 - step=149 action=move(direction='south') hp=100.0 - step=150 action=move(direction='east') hp=100.0 - step=151 action=move(direction='east') hp=100.0 - step=152 action=move(direction='north') hp=100.0 - step=153 action=move(direction='west') hp=100.0 - step=154 action=door(target_id='door_1', door_state='open') hp=100.0 - step=155 action=move(direction='west') hp=100.0 - step=156 action=wait() hp=100.0 - step=157 action=move(direction='south') hp=100.0 - step=158 action=move(direction='north') hp=100.0 - step=159 action=move(direction='south') hp=100.0 - step=160 action=door(target_id='door_1', door_state='open') hp=100.0 - step=161 action=wait() hp=100.0 - step=162 action=wait() hp=100.0 - step=163 action=door(target_id='door_1', door_state='open') hp=100.0 - step=164 action=move(direction='north') hp=100.0 - step=165 action=door(target_id='door_1', door_state='open') hp=100.0 - step=166 action=move(direction='south') hp=100.0 - step=167 action=move(direction='north') hp=100.0 - step=168 action=wait() hp=100.0 - step=169 action=wait() hp=100.0 - step=170 action=move(direction='east') hp=100.0 - step=171 action=move(direction='south') hp=100.0 - step=172 action=move(direction='north') hp=100.0 - step=173 action=move(direction='east') hp=100.0 - step=174 action=move(direction='west') hp=100.0 - step=175 action=move(direction='west') hp=100.0 - step=176 action=door(target_id='door_1', door_state='open') hp=100.0 - step=177 action=wait() hp=100.0 - step=178 action=wait() hp=100.0 - step=179 action=wait() hp=100.0 - step=180 action=move(direction='south') hp=100.0 - step=181 action=move(direction='north') hp=100.0 - step=182 action=wait() hp=100.0 - step=183 action=wait() hp=100.0 - step=184 action=door(target_id='door_1', door_state='open') hp=100.0 - step=185 action=move(direction='east') hp=100.0 - step=186 action=wait() hp=100.0 - step=187 action=door(target_id='door_1', door_state='open') hp=100.0 - step=188 action=move(direction='west') hp=100.0 - step=189 action=wait() hp=100.0 - step=190 action=wait() hp=100.0 - step=191 action=wait() hp=100.0 - step=192 action=wait() hp=100.0 - step=193 action=move(direction='south') hp=100.0 - step=194 action=wait() hp=100.0 - step=195 action=move(direction='north') hp=100.0 - step=196 action=move(direction='east') hp=100.0 - step=197 action=wait() hp=100.0 - step=198 action=move(direction='west') hp=100.0 - step=199 action=wait() hp=100.0 - step=200 action=door(target_id='door_1', door_state='open') hp=100.0 -ep=0061 [easy ] steps=200 reward= -19.670 evac=0 hp=100.0 suc30=0.80 r30= +10.68 t=22s - step=001 action=move(direction='north') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=wait() hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=wait() hp=100.0 - step=006 action=move(direction='north') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=wait() hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=move(direction='east') hp=100.0 - step=013 action=move(direction='west') hp=100.0 - step=014 action=wait() hp=100.0 - step=015 action=wait() hp=100.0 - step=016 action=wait() hp=100.0 - step=017 action=wait() hp=100.0 - step=018 action=wait() hp=100.0 - step=019 action=move(direction='east') hp=100.0 - step=020 action=move(direction='west') hp=100.0 - step=021 action=wait() hp=100.0 - step=022 action=move(direction='east') hp=100.0 - step=023 action=move(direction='east') hp=100.0 - step=024 action=move(direction='south') hp=100.0 - step=025 action=move(direction='west') hp=100.0 - step=026 action=move(direction='west') hp=100.0 - step=027 action=move(direction='west') hp=100.0 - step=028 action=move(direction='west') hp=100.0 - step=029 action=wait() hp=100.0 - step=030 action=wait() hp=100.0 - step=031 action=move(direction='south') hp=100.0 - step=032 action=move(direction='west') hp=100.0 - step=033 action=wait() hp=100.0 - step=034 action=wait() hp=100.0 - step=035 action=move(direction='west') hp=100.0 - step=036 action=wait() hp=100.0 - step=037 action=move(direction='south') hp=100.0 - step=038 action=move(direction='north') hp=100.0 - step=039 action=move(direction='north') hp=100.0 - step=040 action=move(direction='north') hp=100.0 - step=041 action=move(direction='west') hp=100.0 - step=042 action=move(direction='west') hp=100.0 - step=043 action=wait() hp=100.0 - step=044 action=move(direction='west') hp=100.0 - step=045 action=move(direction='west') hp=100.0 - step=046 action=move(direction='west') hp=100.0 -ep=0062 [easy ] steps=046 reward= +19.330 evac=1 hp=100.0 suc30=0.80 r30= +10.60 t=22s - step=001 action=wait() hp=100.0 - step=002 action=door(target_id='door_3', door_state='open') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=door(target_id='door_3', door_state='open') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=wait() hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=door(target_id='door_3', door_state='open') hp=100.0 - step=010 action=wait() hp=100.0 - step=011 action=move(direction='north') hp=100.0 - step=012 action=move(direction='north') hp=100.0 - step=013 action=move(direction='north') hp=100.0 - step=014 action=move(direction='north') hp=100.0 - step=015 action=wait() hp=100.0 - step=016 action=wait() hp=100.0 - step=017 action=move(direction='south') hp=100.0 - step=018 action=move(direction='north') hp=100.0 - step=019 action=move(direction='north') hp=100.0 - step=020 action=move(direction='north') hp=100.0 - step=021 action=move(direction='north') hp=100.0 -ep=0063 [easy ] steps=021 reward= +17.550 evac=1 hp=100.0 suc30=0.80 r30= +10.57 t=22s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='north') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=wait() hp=100.0 - step=007 action=door(target_id='door_6', door_state='close') hp=100.0 - step=008 action=door(target_id='door_1', door_state='close') hp=100.0 - step=009 action=door(target_id='door_2', door_state='close') hp=100.0 - step=010 action=move(direction='west') hp=100.0 - step=011 action=door(target_id='door_1', door_state='close') hp=100.0 - step=012 action=move(direction='north') hp=100.0 - step=013 action=move(direction='west') hp=100.0 - step=014 action=move(direction='west') hp=100.0 - step=015 action=move(direction='west') hp=100.0 - step=016 action=wait() hp=100.0 - step=017 action=wait() hp=100.0 - step=018 action=wait() hp=100.0 - step=019 action=wait() hp=100.0 - step=020 action=move(direction='east') hp=100.0 - step=021 action=wait() hp=100.0 - step=022 action=move(direction='west') hp=100.0 - step=023 action=door(target_id='door_1', door_state='close') hp=100.0 - step=024 action=wait() hp=100.0 - step=025 action=door(target_id='door_1', door_state='open') hp=100.0 - step=026 action=door(target_id='door_1', door_state='close') hp=100.0 - step=027 action=move(direction='south') hp=100.0 - step=028 action=wait() hp=100.0 - step=029 action=move(direction='north') hp=100.0 - step=030 action=move(direction='east') hp=100.0 - step=031 action=move(direction='west') hp=100.0 - step=032 action=wait() hp=100.0 - step=033 action=wait() hp=100.0 - step=034 action=wait() hp=100.0 - step=035 action=move(direction='east') hp=100.0 - step=036 action=door(target_id='door_2', door_state='close') hp=100.0 - step=037 action=move(direction='south') hp=100.0 - step=038 action=move(direction='south') hp=100.0 - step=039 action=wait() hp=100.0 - step=040 action=move(direction='west') hp=100.0 - step=041 action=move(direction='north') hp=100.0 - step=042 action=move(direction='west') hp=100.0 -ep=0064 [easy ] steps=042 reward= +17.810 evac=1 hp=100.0 suc30=0.80 r30= +10.65 t=22s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=move(direction='north') hp=100.0 - step=007 action=move(direction='north') hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=move(direction='north') hp=100.0 - step=010 action=wait() hp=100.0 - step=011 action=wait() hp=100.0 - step=012 action=move(direction='west') hp=100.0 - step=013 action=move(direction='north') hp=100.0 - step=014 action=move(direction='east') hp=100.0 - step=015 action=move(direction='north') hp=100.0 - step=016 action=wait() hp=100.0 - step=017 action=move(direction='north') hp=100.0 - step=018 action=move(direction='north') hp=100.0 - step=019 action=move(direction='south') hp=100.0 - step=020 action=move(direction='west') hp=100.0 - step=021 action=wait() hp=100.0 - step=022 action=wait() hp=100.0 - step=023 action=move(direction='east') hp=100.0 - step=024 action=move(direction='north') hp=100.0 - step=025 action=move(direction='west') hp=100.0 - step=026 action=move(direction='north') hp=100.0 - step=027 action=wait() hp=100.0 - step=028 action=move(direction='west') hp=100.0 - step=029 action=wait() hp=100.0 - step=030 action=move(direction='west') hp=100.0 - step=031 action=move(direction='west') hp=100.0 - step=032 action=move(direction='west') hp=100.0 - step=033 action=wait() hp=100.0 - step=034 action=move(direction='west') hp=100.0 -ep=0065 [easy ] steps=034 reward= +21.030 evac=1 hp=100.0 suc30=0.80 r30= +10.75 t=22s - >> PPO update samples=flushed pi_loss=-0.0136 v_loss=10.9852 entropy=1.3308 kl=0.0037 clip%=0.04 lr=2.12e-04 - step=001 action=move(direction='west') hp=100.0 -ep=0066 [easy ] steps=001 reward= +16.760 evac=1 hp=100.0 suc30=0.80 r30= +10.70 t=22s - step=001 action=move(direction='north') hp=100.0 - step=002 action=move(direction='north') hp=100.0 - step=003 action=move(direction='east') hp=100.0 - step=004 action=door(target_id='door_2', door_state='close') hp=100.0 - step=005 action=door(target_id='door_5', door_state='close') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=wait() hp=100.0 - step=008 action=wait() hp=100.0 - step=009 action=door(target_id='door_1', door_state='close') hp=100.0 - step=010 action=move(direction='east') hp=100.0 - step=011 action=door(target_id='door_2', door_state='close') hp=100.0 - step=012 action=wait() hp=100.0 - step=013 action=move(direction='west') hp=100.0 - step=014 action=wait() hp=100.0 - step=015 action=door(target_id='door_1', door_state='open') hp=100.0 - step=016 action=move(direction='south') hp=100.0 - step=017 action=door(target_id='door_5', door_state='close') hp=100.0 - step=018 action=wait() hp=100.0 - step=019 action=move(direction='west') hp=100.0 -ep=0067 [easy ] steps=019 reward= +16.630 evac=1 hp=100.0 suc30=0.83 r30= +11.94 t=23s - step=001 action=wait() hp=100.0 - step=002 action=move(direction='north') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=wait() hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=wait() hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='north') hp=100.0 - step=010 action=move(direction='east') hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=move(direction='west') hp=100.0 - step=013 action=wait() hp=100.0 - step=014 action=move(direction='west') hp=100.0 - step=015 action=move(direction='west') hp=100.0 - step=016 action=move(direction='east') hp=100.0 - step=017 action=move(direction='east') hp=100.0 - step=018 action=move(direction='west') hp=100.0 - step=019 action=wait() hp=100.0 - step=020 action=move(direction='west') hp=100.0 - step=021 action=move(direction='west') hp=100.0 - step=022 action=move(direction='west') hp=100.0 -ep=0068 [easy ] steps=022 reward= +19.160 evac=1 hp=100.0 suc30=0.83 r30= +12.01 t=23s - step=001 action=door(target_id='door_2', door_state='close') hp=100.0 - step=002 action=door(target_id='door_2', door_state='close') hp=100.0 - step=003 action=door(target_id='door_2', door_state='close') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='east') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='east') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=door(target_id='door_1', door_state='open') hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=wait() hp=100.0 - step=013 action=move(direction='west') hp=100.0 - step=014 action=move(direction='east') hp=100.0 - step=015 action=move(direction='east') hp=100.0 - step=016 action=move(direction='west') hp=100.0 - step=017 action=wait() hp=100.0 - step=018 action=move(direction='west') hp=100.0 - step=019 action=move(direction='west') hp=100.0 -ep=0069 [easy ] steps=019 reward= +17.580 evac=1 hp=100.0 suc30=0.83 r30= +12.06 t=23s - step=001 action=door(target_id='door_4', door_state='close') hp=100.0 - step=002 action=wait() hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=move(direction='south') hp=100.0 - step=007 action=wait() hp=100.0 - step=008 action=door(target_id='door_8', door_state='close') hp=100.0 - step=009 action=move(direction='north') hp=100.0 - step=010 action=wait() hp=100.0 - step=011 action=door(target_id='door_4', door_state='close') hp=100.0 - step=012 action=door(target_id='door_4', door_state='open') hp=100.0 - step=013 action=wait() hp=100.0 - step=014 action=wait() hp=100.0 - step=015 action=door(target_id='door_4', door_state='close') hp=100.0 - step=016 action=wait() hp=100.0 - step=017 action=door(target_id='door_4', door_state='open') hp=100.0 - step=018 action=wait() hp=100.0 - step=019 action=move(direction='north') hp=100.0 - step=020 action=wait() hp=100.0 - step=021 action=door(target_id='door_4', door_state='close') hp=100.0 - step=022 action=move(direction='west') hp=100.0 - step=023 action=move(direction='east') hp=100.0 - step=024 action=door(target_id='door_4', door_state='open') hp=100.0 - step=025 action=move(direction='west') hp=100.0 - step=026 action=move(direction='north') hp=100.0 - step=027 action=move(direction='north') hp=100.0 - step=028 action=door(target_id='door_4', door_state='close') hp=100.0 - step=029 action=door(target_id='door_4', door_state='close') hp=100.0 - step=030 action=door(target_id='door_4', door_state='close') hp=100.0 - step=031 action=door(target_id='door_4', door_state='close') hp=100.0 - step=032 action=move(direction='east') hp=100.0 - step=033 action=move(direction='west') hp=100.0 - step=034 action=door(target_id='door_4', door_state='close') hp=100.0 - step=035 action=wait() hp=100.0 - step=036 action=move(direction='south') hp=100.0 - step=037 action=move(direction='east') hp=100.0 - step=038 action=door(target_id='door_4', door_state='close') hp=100.0 - step=039 action=wait() hp=100.0 - step=040 action=move(direction='south') hp=100.0 - step=041 action=wait() hp=100.0 - step=042 action=door(target_id='door_4', door_state='open') hp=100.0 - step=043 action=move(direction='west') hp=100.0 - step=044 action=move(direction='north') hp=100.0 - step=045 action=move(direction='east') hp=100.0 - step=046 action=move(direction='west') hp=100.0 - step=047 action=move(direction='north') hp=100.0 - step=048 action=move(direction='south') hp=100.0 - step=049 action=door(target_id='door_4', door_state='close') hp=100.0 - step=050 action=door(target_id='door_4', door_state='close') hp=100.0 - step=051 action=wait() hp=100.0 - step=052 action=move(direction='north') hp=100.0 - step=053 action=door(target_id='door_4', door_state='close') hp=100.0 - step=054 action=door(target_id='door_4', door_state='close') hp=100.0 - step=055 action=door(target_id='door_4', door_state='close') hp=100.0 - step=056 action=door(target_id='door_4', door_state='close') hp=100.0 - step=057 action=wait() hp=100.0 - step=058 action=wait() hp=100.0 - step=059 action=door(target_id='door_4', door_state='close') hp=100.0 - step=060 action=move(direction='east') hp=100.0 - step=061 action=move(direction='west') hp=100.0 - step=062 action=move(direction='south') hp=100.0 - step=063 action=move(direction='south') hp=100.0 - step=064 action=move(direction='east') hp=100.0 - step=065 action=move(direction='north') hp=100.0 - step=066 action=wait() hp=100.0 - step=067 action=move(direction='west') hp=100.0 - step=068 action=door(target_id='door_4', door_state='close') hp=100.0 - step=069 action=door(target_id='door_4', door_state='close') hp=100.0 - step=070 action=door(target_id='door_4', door_state='close') hp=100.0 - step=071 action=door(target_id='door_4', door_state='close') hp=100.0 - step=072 action=move(direction='north') hp=100.0 - step=073 action=door(target_id='door_4', door_state='close') hp=100.0 - step=074 action=wait() hp=100.0 - step=075 action=move(direction='south') hp=100.0 - step=076 action=move(direction='north') hp=100.0 - step=077 action=wait() hp=100.0 - step=078 action=wait() hp=100.0 - step=079 action=door(target_id='door_4', door_state='close') hp=100.0 - step=080 action=door(target_id='door_4', door_state='close') hp=100.0 - step=081 action=wait() hp=100.0 - step=082 action=wait() hp=100.0 - step=083 action=door(target_id='door_4', door_state='close') hp=100.0 - step=084 action=door(target_id='door_4', door_state='close') hp=100.0 - step=085 action=door(target_id='door_4', door_state='close') hp=100.0 - step=086 action=door(target_id='door_4', door_state='close') hp=100.0 - step=087 action=door(target_id='door_4', door_state='close') hp=100.0 - step=088 action=wait() hp=100.0 - step=089 action=wait() hp=100.0 - step=090 action=wait() hp=100.0 - step=091 action=door(target_id='door_4', door_state='close') hp=100.0 - step=092 action=wait() hp=100.0 - step=093 action=wait() hp=100.0 - step=094 action=wait() hp=100.0 - step=095 action=door(target_id='door_4', door_state='close') hp=100.0 - step=096 action=wait() hp=100.0 - step=097 action=wait() hp=100.0 - step=098 action=door(target_id='door_4', door_state='close') hp=100.0 - step=099 action=wait() hp=100.0 - step=100 action=wait() hp=100.0 - step=101 action=wait() hp=100.0 - step=102 action=move(direction='south') hp=100.0 - step=103 action=wait() hp=100.0 - step=104 action=move(direction='north') hp=100.0 - step=105 action=door(target_id='door_4', door_state='close') hp=100.0 - step=106 action=move(direction='east') hp=100.0 - step=107 action=move(direction='west') hp=100.0 - step=108 action=wait() hp=100.0 - step=109 action=wait() hp=100.0 - step=110 action=move(direction='south') hp=100.0 - step=111 action=move(direction='north') hp=100.0 - step=112 action=door(target_id='door_4', door_state='close') hp=100.0 - step=113 action=door(target_id='door_4', door_state='close') hp=100.0 - step=114 action=door(target_id='door_4', door_state='close') hp=100.0 - step=115 action=wait() hp=100.0 - step=116 action=wait() hp=100.0 - step=117 action=wait() hp=100.0 - step=118 action=move(direction='south') hp=100.0 - step=119 action=wait() hp=100.0 - step=120 action=door(target_id='door_4', door_state='close') hp=100.0 - step=121 action=move(direction='south') hp=100.0 - step=122 action=wait() hp=100.0 - step=123 action=door(target_id='door_4', door_state='close') hp=100.0 - step=124 action=move(direction='north') hp=100.0 - step=125 action=move(direction='north') hp=100.0 - step=126 action=move(direction='east') hp=100.0 - step=127 action=move(direction='west') hp=100.0 - step=128 action=move(direction='east') hp=100.0 - step=129 action=wait() hp=100.0 - step=130 action=door(target_id='door_4', door_state='open') hp=100.0 - step=131 action=wait() hp=100.0 - step=132 action=move(direction='west') hp=100.0 - step=133 action=move(direction='south') hp=100.0 - step=134 action=move(direction='east') hp=100.0 - step=135 action=move(direction='west') hp=100.0 - step=136 action=move(direction='north') hp=100.0 - step=137 action=wait() hp=100.0 - step=138 action=move(direction='south') hp=100.0 - step=139 action=door(target_id='door_4', door_state='open') hp=100.0 - step=140 action=move(direction='north') hp=100.0 - step=141 action=move(direction='east') hp=100.0 - step=142 action=door(target_id='door_4', door_state='open') hp=100.0 - step=143 action=move(direction='south') hp=100.0 - step=144 action=move(direction='west') hp=100.0 - step=145 action=door(target_id='door_4', door_state='open') hp=100.0 - step=146 action=move(direction='north') hp=100.0 - step=147 action=move(direction='south') hp=100.0 - step=148 action=move(direction='east') hp=100.0 - step=149 action=wait() hp=100.0 - step=150 action=move(direction='west') hp=100.0 - step=151 action=move(direction='east') hp=100.0 - step=152 action=move(direction='west') hp=100.0 - step=153 action=door(target_id='door_4', door_state='open') hp=100.0 - step=154 action=wait() hp=100.0 - step=155 action=move(direction='east') hp=100.0 - step=156 action=move(direction='west') hp=100.0 - step=157 action=move(direction='east') hp=100.0 - step=158 action=move(direction='west') hp=100.0 - step=159 action=move(direction='north') hp=100.0 - step=160 action=door(target_id='door_4', door_state='open') hp=100.0 - step=161 action=door(target_id='door_4', door_state='open') hp=100.0 - step=162 action=door(target_id='door_4', door_state='open') hp=100.0 - step=163 action=door(target_id='door_4', door_state='open') hp=100.0 - step=164 action=move(direction='east') hp=100.0 - step=165 action=door(target_id='door_4', door_state='open') hp=100.0 - step=166 action=wait() hp=100.0 - step=167 action=door(target_id='door_4', door_state='open') hp=100.0 - step=168 action=wait() hp=100.0 - step=169 action=wait() hp=100.0 - step=170 action=move(direction='west') hp=100.0 - step=171 action=move(direction='east') hp=100.0 - step=172 action=door(target_id='door_4', door_state='open') hp=100.0 - step=173 action=move(direction='west') hp=100.0 - step=174 action=door(target_id='door_4', door_state='open') hp=100.0 - step=175 action=move(direction='south') hp=100.0 - step=176 action=move(direction='north') hp=100.0 - step=177 action=move(direction='east') hp=100.0 - step=178 action=door(target_id='door_4', door_state='open') hp=100.0 - step=179 action=move(direction='west') hp=100.0 - step=180 action=door(target_id='door_4', door_state='open') hp=100.0 - step=181 action=door(target_id='door_4', door_state='open') hp=100.0 - step=182 action=wait() hp=100.0 - step=183 action=wait() hp=100.0 - step=184 action=move(direction='south') hp=100.0 - step=185 action=move(direction='north') hp=100.0 - step=186 action=move(direction='south') hp=100.0 - step=187 action=wait() hp=100.0 - step=188 action=door(target_id='door_4', door_state='open') hp=100.0 - step=189 action=door(target_id='door_4', door_state='open') hp=100.0 - step=190 action=move(direction='north') hp=100.0 - step=191 action=wait() hp=100.0 - step=192 action=door(target_id='door_4', door_state='open') hp=100.0 - step=193 action=move(direction='south') hp=100.0 - step=194 action=move(direction='north') hp=100.0 - step=195 action=door(target_id='door_4', door_state='open') hp=100.0 - step=196 action=wait() hp=100.0 - step=197 action=door(target_id='door_4', door_state='open') hp=100.0 - step=198 action=move(direction='east') hp=100.0 - step=199 action=wait() hp=100.0 - step=200 action=wait() hp=100.0 -ep=0070 [easy ] steps=200 reward= -21.270 evac=0 hp=100.0 suc30=0.83 r30= +11.98 t=24s - >> PPO update samples=flushed pi_loss=+0.2003 v_loss=11.4670 entropy=1.3652 kl=0.0050 clip%=0.05 lr=2.05e-04 - step=001 action=move(direction='west') hp=100.0 - step=002 action=wait() hp=100.0 - step=003 action=move(direction='south') hp=100.0 - step=004 action=wait() hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=wait() hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=wait() hp=100.0 - step=010 action=wait() hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=wait() hp=100.0 - step=013 action=move(direction='west') hp=100.0 - step=014 action=move(direction='west') hp=100.0 - step=015 action=move(direction='west') hp=100.0 - step=016 action=wait() hp=100.0 - step=017 action=move(direction='west') hp=100.0 - step=018 action=move(direction='west') hp=100.0 - step=019 action=move(direction='west') hp=100.0 - step=020 action=move(direction='west') hp=100.0 - step=021 action=move(direction='west') hp=100.0 -ep=0071 [easy ] steps=021 reward= +19.360 evac=1 hp=100.0 suc30=0.83 r30= +11.91 t=24s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=door(target_id='door_2', door_state='close') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=wait() hp=100.0 - step=006 action=door(target_id='door_2', door_state='close') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=door(target_id='door_1', door_state='close') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=door(target_id='door_1', door_state='close') hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=move(direction='west') hp=100.0 - step=013 action=move(direction='west') hp=100.0 -ep=0072 [easy ] steps=013 reward= +18.590 evac=1 hp=100.0 suc30=0.83 r30= +11.92 t=24s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='north') hp=100.0 - step=003 action=door(target_id='door_2', door_state='close') hp=100.0 - step=004 action=door(target_id='door_2', door_state='close') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=door(target_id='door_2', door_state='close') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='east') hp=100.0 - step=010 action=wait() hp=100.0 - step=011 action=wait() hp=100.0 - step=012 action=door(target_id='door_1', door_state='close') hp=100.0 - step=013 action=door(target_id='door_5', door_state='close') hp=100.0 - step=014 action=move(direction='west') hp=100.0 - step=015 action=move(direction='south') hp=100.0 - step=016 action=move(direction='east') hp=100.0 - step=017 action=move(direction='west') hp=100.0 - step=018 action=move(direction='west') hp=100.0 -ep=0073 [easy ] steps=018 reward= +18.250 evac=1 hp=100.0 suc30=0.83 r30= +12.08 t=24s - step=001 action=move(direction='north') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='east') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='north') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=wait() hp=100.0 - step=010 action=move(direction='north') hp=100.0 - step=011 action=wait() hp=100.0 - step=012 action=move(direction='south') hp=100.0 - step=013 action=move(direction='north') hp=100.0 - step=014 action=move(direction='south') hp=100.0 - step=015 action=wait() hp=100.0 - step=016 action=move(direction='north') hp=100.0 - step=017 action=move(direction='north') hp=100.0 - step=018 action=wait() hp=100.0 - step=019 action=move(direction='north') hp=100.0 - step=020 action=move(direction='south') hp=100.0 - step=021 action=move(direction='east') hp=100.0 - step=022 action=move(direction='north') hp=100.0 - step=023 action=move(direction='west') hp=100.0 - step=024 action=move(direction='north') hp=100.0 - step=025 action=move(direction='west') hp=100.0 -ep=0074 [easy ] steps=025 reward= +19.190 evac=1 hp=100.0 suc30=0.83 r30= +11.96 t=24s - step=001 action=move(direction='west') hp=100.0 -ep=0075 [easy ] steps=001 reward= +16.760 evac=1 hp=100.0 suc30=0.83 r30= +11.90 t=24s - >> PPO update samples=flushed pi_loss=+0.0008 v_loss=12.9646 entropy=1.2285 kl=0.0003 clip%=0.00 lr=1.99e-04 - [ckpt] saved -> artifacts/pyre_ppo_http_ckpt.pt - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='north') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='east') hp=100.0 - step=005 action=door(target_id='door_5', door_state='close') hp=100.0 - step=006 action=wait() hp=100.0 - step=007 action=door(target_id='door_5', door_state='close') hp=100.0 - step=008 action=door(target_id='door_1', door_state='open') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='south') hp=100.0 - step=011 action=wait() hp=100.0 - step=012 action=move(direction='west') hp=100.0 -ep=0076 [easy ] steps=012 reward= +17.640 evac=1 hp=100.0 suc30=0.87 r30= +13.09 t=24s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='north') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=move(direction='north') hp=100.0 - step=007 action=move(direction='north') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='north') hp=100.0 - step=010 action=move(direction='west') hp=100.0 - step=011 action=move(direction='north') hp=100.0 - step=012 action=wait() hp=100.0 - step=013 action=move(direction='west') hp=100.0 - step=014 action=move(direction='west') hp=100.0 - step=015 action=move(direction='south') hp=100.0 - step=016 action=move(direction='north') hp=100.0 - step=017 action=move(direction='north') hp=100.0 - step=018 action=move(direction='west') hp=100.0 - step=019 action=wait() hp=100.0 - step=020 action=move(direction='west') hp=100.0 - step=021 action=move(direction='south') hp=100.0 - step=022 action=move(direction='west') hp=100.0 - step=023 action=move(direction='north') hp=100.0 - step=024 action=move(direction='north') hp=100.0 - step=025 action=wait() hp=100.0 - step=026 action=move(direction='north') hp=100.0 - step=027 action=wait() hp=100.0 - step=028 action=move(direction='north') hp=100.0 - step=029 action=move(direction='north') hp=100.0 - step=030 action=move(direction='east') hp=100.0 - step=031 action=move(direction='east') hp=100.0 - step=032 action=move(direction='west') hp=100.0 - step=033 action=move(direction='west') hp=100.0 - step=034 action=move(direction='east') hp=100.0 - step=035 action=move(direction='west') hp=100.0 - step=036 action=move(direction='west') hp=100.0 - step=037 action=wait() hp=100.0 - step=038 action=move(direction='west') hp=100.0 - step=039 action=wait() hp=100.0 - step=040 action=move(direction='west') hp=100.0 - step=041 action=wait() hp=100.0 - step=042 action=move(direction='west') hp=100.0 - step=043 action=move(direction='west') hp=100.0 -ep=0077 [easy ] steps=043 reward= +22.680 evac=1 hp=100.0 suc30=0.87 r30= +13.22 t=25s - step=001 action=wait() hp=100.0 - step=002 action=move(direction='west') hp=100.0 -ep=0078 [easy ] steps=002 reward= +16.650 evac=1 hp=100.0 suc30=0.87 r30= +13.17 t=25s - step=001 action=door(target_id='door_2', door_state='close') hp=100.0 - step=002 action=wait() hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='north') hp=100.0 - step=007 action=door(target_id='door_5', door_state='close') hp=100.0 - step=008 action=wait() hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='north') hp=100.0 - step=011 action=move(direction='west') hp=100.0 -ep=0079 [easy ] steps=011 reward= +18.640 evac=1 hp=100.0 suc30=0.90 r30= +14.47 t=25s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=move(direction='east') hp=100.0 - step=007 action=move(direction='north') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='west') hp=100.0 - step=011 action=move(direction='north') hp=100.0 - step=012 action=move(direction='north') hp=100.0 - step=013 action=move(direction='north') hp=100.0 - step=014 action=move(direction='west') hp=100.0 - step=015 action=move(direction='west') hp=100.0 - step=016 action=move(direction='west') hp=100.0 - step=017 action=move(direction='west') hp=100.0 - step=018 action=move(direction='south') hp=100.0 - step=019 action=move(direction='east') hp=100.0 - step=020 action=move(direction='west') hp=100.0 - step=021 action=move(direction='north') hp=100.0 - step=022 action=move(direction='north') hp=100.0 - step=023 action=move(direction='west') hp=100.0 -ep=0080 [easy ] steps=023 reward= +21.870 evac=1 hp=100.0 suc30=0.90 r30= +14.53 t=25s - >> PPO update samples=flushed pi_loss=-0.0006 v_loss=7.6417 entropy=1.2525 kl=0.0003 clip%=0.00 lr=1.92e-04 - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='west') hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=move(direction='west') hp=100.0 - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='north') hp=100.0 - step=007 action=move(direction='north') hp= 88.0 - step=008 action=move(direction='north') hp= 83.0 - step=009 action=move(direction='north') hp= 71.0 - step=010 action=move(direction='north') hp= 71.0 - step=011 action=move(direction='north') hp= 71.0 - step=012 action=move(direction='west') hp= 71.0 - ** EVAL [medium] reward=+15.162 success=1.00 steps=10.3 - step=001 action=wait() hp=100.0 - step=002 action=move(direction='north') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=move(direction='north') hp=100.0 -ep=0081 [easy ] steps=005 reward= +17.940 evac=1 hp=100.0 suc30=0.90 r30= +14.53 t=25s - step=001 action=move(direction='north') hp=100.0 - step=002 action=move(direction='west') hp=100.0 -ep=0082 [easy ] steps=002 reward= +17.230 evac=1 hp=100.0 suc30=0.90 r30= +14.49 t=25s - step=001 action=move(direction='south') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=wait() hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=wait() hp=100.0 - step=007 action=wait() hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=wait() hp=100.0 - step=011 action=move(direction='east') hp=100.0 - step=012 action=move(direction='west') hp=100.0 - step=013 action=wait() hp=100.0 - step=014 action=move(direction='west') hp=100.0 - step=015 action=move(direction='west') hp=100.0 - step=016 action=move(direction='west') hp=100.0 - step=017 action=wait() hp=100.0 - step=018 action=move(direction='west') hp=100.0 - step=019 action=move(direction='west') hp=100.0 - step=020 action=move(direction='east') hp=100.0 - step=021 action=wait() hp=100.0 - step=022 action=move(direction='west') hp=100.0 - step=023 action=move(direction='west') hp=100.0 - step=024 action=move(direction='east') hp=100.0 - step=025 action=move(direction='west') hp=100.0 - step=026 action=wait() hp=100.0 - step=027 action=move(direction='east') hp=100.0 - step=028 action=move(direction='west') hp=100.0 - step=029 action=move(direction='west') hp=100.0 - step=030 action=wait() hp=100.0 - step=031 action=move(direction='west') hp=100.0 - step=032 action=wait() hp=100.0 - step=033 action=move(direction='west') hp=100.0 - step=034 action=move(direction='south') hp=100.0 - step=035 action=move(direction='north') hp=100.0 - step=036 action=move(direction='west') hp=100.0 -ep=0083 [easy ] steps=036 reward= +18.860 evac=1 hp=100.0 suc30=0.90 r30= +14.54 t=25s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=door(target_id='door_3', door_state='close') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='north') hp=100.0 - step=007 action=move(direction='north') hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=move(direction='north') hp=100.0 - step=010 action=move(direction='north') hp=100.0 - step=011 action=move(direction='north') hp=100.0 - step=012 action=wait() hp=100.0 - step=013 action=move(direction='north') hp=100.0 -ep=0084 [easy ] steps=013 reward= +18.260 evac=1 hp=100.0 suc30=0.90 r30= +14.57 t=25s - step=001 action=door(target_id='door_7', door_state='open') hp=100.0 - step=002 action=wait() hp=100.0 - step=003 action=door(target_id='door_7', door_state='open') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=wait() hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='east') hp=100.0 - step=008 action=door(target_id='door_2', door_state='close') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='west') hp=100.0 - step=011 action=door(target_id='door_6', door_state='close') hp=100.0 - step=012 action=move(direction='west') hp=100.0 - step=013 action=door(target_id='door_6', door_state='close') hp=100.0 - step=014 action=move(direction='east') hp=100.0 - step=015 action=move(direction='north') hp=100.0 - step=016 action=move(direction='west') hp=100.0 - step=017 action=move(direction='west') hp=100.0 - step=018 action=move(direction='north') hp=100.0 - step=019 action=wait() hp=100.0 - step=020 action=move(direction='west') hp=100.0 - step=021 action=move(direction='north') hp=100.0 - step=022 action=move(direction='west') hp=100.0 -ep=0085 [easy ] steps=022 reward= +18.320 evac=1 hp=100.0 suc30=0.90 r30= +14.57 t=25s - >> PPO update samples=flushed pi_loss=-0.0081 v_loss=3.2728 entropy=1.1286 kl=0.0006 clip%=0.00 lr=1.85e-04 - step=001 action=move(direction='north') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='north') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=wait() hp=100.0 - step=009 action=move(direction='north') hp=100.0 - step=010 action=move(direction='north') hp=100.0 - step=011 action=move(direction='north') hp=100.0 - step=012 action=move(direction='west') hp=100.0 -ep=0086 [easy ] steps=012 reward= +19.910 evac=1 hp=100.0 suc30=0.90 r30= +14.64 t=26s - step=001 action=move(direction='south') hp=100.0 - step=002 action=wait() hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=wait() hp=100.0 - step=006 action=wait() hp=100.0 - step=007 action=wait() hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='east') hp=100.0 - step=011 action=wait() hp=100.0 - step=012 action=door(target_id='door_3', door_state='open') hp=100.0 - step=013 action=door(target_id='door_2', door_state='close') hp=100.0 - step=014 action=door(target_id='door_2', door_state='close') hp=100.0 - step=015 action=move(direction='north') hp=100.0 - step=016 action=move(direction='north') hp=100.0 - step=017 action=move(direction='north') hp=100.0 - step=018 action=move(direction='north') hp=100.0 - step=019 action=move(direction='north') hp=100.0 - step=020 action=wait() hp=100.0 - step=021 action=move(direction='north') hp=100.0 - step=022 action=move(direction='south') hp=100.0 - step=023 action=move(direction='north') hp=100.0 - step=024 action=wait() hp=100.0 - step=025 action=wait() hp=100.0 - step=026 action=wait() hp=100.0 - step=027 action=move(direction='north') hp=100.0 -ep=0087 [easy ] steps=027 reward= +17.510 evac=1 hp=100.0 suc30=0.90 r30= +14.64 t=26s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='east') hp=100.0 - step=003 action=wait() hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=wait() hp=100.0 - step=006 action=wait() hp=100.0 - step=007 action=wait() hp=100.0 - step=008 action=move(direction='east') hp=100.0 - step=009 action=move(direction='east') hp=100.0 - step=010 action=door(target_id='door_1', door_state='open') hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=move(direction='north') hp=100.0 - step=013 action=move(direction='north') hp=100.0 - step=014 action=wait() hp=100.0 - step=015 action=move(direction='west') hp=100.0 - step=016 action=door(target_id='door_1', door_state='close') hp=100.0 - step=017 action=door(target_id='door_1', door_state='open') hp=100.0 - step=018 action=wait() hp=100.0 - step=019 action=wait() hp=100.0 - step=020 action=wait() hp=100.0 - step=021 action=door(target_id='door_1', door_state='close') hp=100.0 - step=022 action=move(direction='east') hp=100.0 - step=023 action=move(direction='north') hp=100.0 - step=024 action=door(target_id='door_1', door_state='open') hp=100.0 - step=025 action=move(direction='west') hp=100.0 - step=026 action=move(direction='north') hp=100.0 - step=027 action=door(target_id='door_1', door_state='close') hp=100.0 - step=028 action=wait() hp=100.0 - step=029 action=wait() hp=100.0 - step=030 action=wait() hp=100.0 - step=031 action=wait() hp=100.0 - step=032 action=move(direction='south') hp=100.0 - step=033 action=door(target_id='door_1', door_state='close') hp=100.0 - step=034 action=move(direction='north') hp=100.0 - step=035 action=wait() hp=100.0 - step=036 action=move(direction='east') hp=100.0 - step=037 action=move(direction='south') hp=100.0 - step=038 action=move(direction='north') hp=100.0 - step=039 action=move(direction='west') hp=100.0 - step=040 action=wait() hp=100.0 - step=041 action=move(direction='south') hp=100.0 - step=042 action=move(direction='north') hp=100.0 - step=043 action=wait() hp=100.0 - step=044 action=door(target_id='door_1', door_state='close') hp=100.0 - step=045 action=move(direction='east') hp=100.0 - step=046 action=wait() hp=100.0 - step=047 action=wait() hp=100.0 - step=048 action=wait() hp=100.0 - step=049 action=door(target_id='door_1', door_state='close') hp=100.0 - step=050 action=move(direction='west') hp=100.0 - step=051 action=move(direction='south') hp=100.0 - step=052 action=door(target_id='door_1', door_state='close') hp=100.0 - step=053 action=move(direction='east') hp=100.0 - step=054 action=move(direction='west') hp=100.0 - step=055 action=move(direction='east') hp=100.0 - step=056 action=move(direction='north') hp=100.0 - step=057 action=move(direction='west') hp=100.0 - step=058 action=move(direction='south') hp=100.0 - step=059 action=move(direction='north') hp=100.0 - step=060 action=wait() hp=100.0 - step=061 action=move(direction='south') hp=100.0 - step=062 action=move(direction='north') hp=100.0 - step=063 action=wait() hp=100.0 - step=064 action=door(target_id='door_1', door_state='close') hp=100.0 - step=065 action=door(target_id='door_1', door_state='close') hp=100.0 - step=066 action=move(direction='east') hp=100.0 - step=067 action=door(target_id='door_1', door_state='close') hp=100.0 - step=068 action=move(direction='south') hp=100.0 - step=069 action=move(direction='east') hp=100.0 - step=070 action=move(direction='north') hp=100.0 - step=071 action=move(direction='south') hp=100.0 - step=072 action=move(direction='south') hp=100.0 - step=073 action=door(target_id='door_1', door_state='close') hp=100.0 - step=074 action=wait() hp=100.0 - step=075 action=move(direction='north') hp=100.0 - step=076 action=move(direction='west') hp=100.0 - step=077 action=move(direction='east') hp=100.0 - step=078 action=move(direction='west') hp=100.0 - step=079 action=move(direction='west') hp=100.0 - step=080 action=door(target_id='door_1', door_state='open') hp=100.0 - step=081 action=move(direction='north') hp=100.0 - step=082 action=move(direction='south') hp=100.0 - step=083 action=door(target_id='door_1', door_state='open') hp=100.0 - step=084 action=move(direction='north') hp=100.0 - step=085 action=wait() hp=100.0 - step=086 action=move(direction='east') hp=100.0 - step=087 action=wait() hp=100.0 - step=088 action=wait() hp=100.0 - step=089 action=move(direction='east') hp=100.0 - step=090 action=door(target_id='door_1', door_state='open') hp=100.0 - step=091 action=move(direction='west') hp=100.0 - step=092 action=move(direction='east') hp=100.0 - step=093 action=move(direction='west') hp=100.0 - step=094 action=move(direction='west') hp=100.0 - step=095 action=door(target_id='door_1', door_state='open') hp=100.0 - step=096 action=move(direction='south') hp=100.0 - step=097 action=wait() hp=100.0 - step=098 action=wait() hp=100.0 - step=099 action=wait() hp=100.0 - step=100 action=move(direction='south') hp=100.0 - step=101 action=move(direction='north') hp=100.0 - step=102 action=move(direction='east') hp=100.0 - step=103 action=door(target_id='door_1', door_state='open') hp=100.0 - step=104 action=move(direction='west') hp=100.0 - step=105 action=move(direction='north') hp=100.0 - step=106 action=wait() hp=100.0 - step=107 action=door(target_id='door_1', door_state='close') hp=100.0 - step=108 action=wait() hp=100.0 - step=109 action=move(direction='south') hp=100.0 - step=110 action=door(target_id='door_1', door_state='close') hp=100.0 - step=111 action=door(target_id='door_1', door_state='close') hp=100.0 - step=112 action=move(direction='south') hp=100.0 - step=113 action=move(direction='east') hp=100.0 - step=114 action=door(target_id='door_1', door_state='close') hp=100.0 - step=115 action=move(direction='west') hp=100.0 - step=116 action=wait() hp=100.0 - step=117 action=wait() hp=100.0 - step=118 action=move(direction='north') hp=100.0 - step=119 action=wait() hp=100.0 - step=120 action=wait() hp=100.0 - step=121 action=move(direction='north') hp=100.0 - step=122 action=move(direction='east') hp=100.0 - step=123 action=move(direction='west') hp=100.0 - step=124 action=move(direction='south') hp=100.0 - step=125 action=door(target_id='door_1', door_state='open') hp=100.0 - step=126 action=move(direction='south') hp=100.0 - step=127 action=move(direction='east') hp=100.0 - step=128 action=wait() hp=100.0 - step=129 action=move(direction='east') hp=100.0 - step=130 action=move(direction='west') hp=100.0 - step=131 action=move(direction='north') hp=100.0 - step=132 action=wait() hp=100.0 - step=133 action=move(direction='north') hp=100.0 - step=134 action=wait() hp=100.0 - step=135 action=move(direction='south') hp=100.0 - step=136 action=move(direction='west') hp=100.0 - step=137 action=move(direction='north') hp=100.0 - step=138 action=move(direction='south') hp=100.0 - step=139 action=move(direction='south') hp=100.0 - step=140 action=wait() hp=100.0 - step=141 action=move(direction='east') hp=100.0 - step=142 action=move(direction='north') hp=100.0 - step=143 action=move(direction='west') hp=100.0 - step=144 action=move(direction='north') hp=100.0 - step=145 action=door(target_id='door_1', door_state='open') hp=100.0 - step=146 action=move(direction='east') hp=100.0 - step=147 action=move(direction='west') hp=100.0 - step=148 action=wait() hp=100.0 - step=149 action=move(direction='east') hp=100.0 - step=150 action=move(direction='south') hp=100.0 - step=151 action=move(direction='east') hp=100.0 - step=152 action=move(direction='north') hp=100.0 - step=153 action=move(direction='west') hp=100.0 - step=154 action=move(direction='east') hp=100.0 - step=155 action=door(target_id='door_1', door_state='open') hp=100.0 - step=156 action=move(direction='west') hp=100.0 - step=157 action=wait() hp=100.0 - step=158 action=move(direction='east') hp=100.0 - step=159 action=move(direction='west') hp=100.0 - step=160 action=wait() hp=100.0 - step=161 action=move(direction='west') hp=100.0 - step=162 action=wait() hp=100.0 - step=163 action=door(target_id='door_1', door_state='open') hp=100.0 - step=164 action=wait() hp=100.0 - step=165 action=door(target_id='door_1', door_state='open') hp=100.0 - step=166 action=wait() hp=100.0 - step=167 action=door(target_id='door_1', door_state='open') hp=100.0 - step=168 action=move(direction='south') hp=100.0 - step=169 action=move(direction='east') hp=100.0 - step=170 action=door(target_id='door_1', door_state='open') hp=100.0 - step=171 action=move(direction='north') hp=100.0 - step=172 action=move(direction='south') hp=100.0 - step=173 action=wait() hp=100.0 - step=174 action=wait() hp=100.0 - step=175 action=move(direction='north') hp=100.0 - step=176 action=move(direction='west') hp=100.0 - step=177 action=wait() hp=100.0 - step=178 action=wait() hp=100.0 - step=179 action=wait() hp=100.0 - step=180 action=move(direction='south') hp=100.0 - step=181 action=move(direction='north') hp=100.0 - step=182 action=move(direction='south') hp=100.0 - step=183 action=move(direction='north') hp=100.0 - step=184 action=move(direction='east') hp=100.0 - step=185 action=wait() hp=100.0 - step=186 action=move(direction='west') hp=100.0 - step=187 action=wait() hp=100.0 - step=188 action=wait() hp=100.0 - step=189 action=move(direction='south') hp=100.0 - step=190 action=move(direction='east') hp=100.0 - step=191 action=move(direction='south') hp=100.0 - step=192 action=move(direction='west') hp=100.0 - step=193 action=door(target_id='door_1', door_state='close') hp=100.0 - step=194 action=wait() hp=100.0 - step=195 action=door(target_id='door_1', door_state='open') hp=100.0 - step=196 action=move(direction='east') hp=100.0 - step=197 action=move(direction='north') hp=100.0 - step=198 action=move(direction='west') hp=100.0 - step=199 action=move(direction='north') hp=100.0 - step=200 action=door(target_id='door_1', door_state='close') hp=100.0 -ep=0088 [easy ] steps=200 reward= -19.360 evac=0 hp=100.0 suc30=0.90 r30= +14.66 t=27s - step=001 action=move(direction='north') hp=100.0 - step=002 action=move(direction='north') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='south') hp=100.0 - step=007 action=wait() hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=move(direction='north') hp=100.0 - step=010 action=move(direction='west') hp=100.0 - step=011 action=move(direction='north') hp=100.0 - step=012 action=move(direction='west') hp=100.0 - step=013 action=move(direction='north') hp=100.0 - step=014 action=move(direction='west') hp=100.0 - step=015 action=move(direction='west') hp=100.0 - step=016 action=move(direction='north') hp=100.0 - step=017 action=wait() hp= 98.0 - step=018 action=move(direction='east') hp= 83.0 - step=019 action=wait() hp= 82.5 - step=020 action=wait() hp= 82.0 - step=021 action=move(direction='west') hp= 70.0 - step=022 action=wait() hp= 55.0 - step=023 action=wait() hp= 50.0 - step=024 action=wait() hp= 45.0 - step=025 action=wait() hp= 40.0 - step=026 action=wait() hp= 35.0 - step=027 action=move(direction='south') hp= 30.0 - step=028 action=move(direction='west') hp= 15.0 - step=029 action=move(direction='west') hp= 13.0 - step=030 action=move(direction='west') hp= 1.0 -ep=0089 [easy ] steps=030 reward= -12.380 evac=0 hp= 0.0 suc30=0.87 r30= +13.62 t=27s - step=001 action=door(target_id='door_3', door_state='close') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='east') hp=100.0 - step=006 action=door(target_id='door_3', door_state='close') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='west') hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=door(target_id='door_2', door_state='close') hp=100.0 - step=013 action=move(direction='west') hp=100.0 - step=014 action=move(direction='west') hp=100.0 - step=015 action=move(direction='west') hp=100.0 - step=016 action=door(target_id='door_1', door_state='open') hp=100.0 - step=017 action=move(direction='west') hp=100.0 -ep=0090 [easy ] steps=017 reward= +17.180 evac=1 hp=100.0 suc30=0.87 r30= +13.60 t=27s - >> PPO update samples=flushed pi_loss=+0.0606 v_loss=41.9331 entropy=1.3482 kl=0.0011 clip%=0.00 lr=1.78e-04 - step=001 action=move(direction='north') hp=100.0 - step=002 action=move(direction='west') hp=100.0 -ep=0091 [easy ] steps=002 reward= +17.230 evac=1 hp=100.0 suc30=0.90 r30= +14.83 t=27s - step=001 action=move(direction='north') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=wait() hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='east') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='north') hp=100.0 - step=011 action=wait() hp=100.0 - step=012 action=wait() hp=100.0 - step=013 action=move(direction='north') hp=100.0 - step=014 action=move(direction='north') hp=100.0 - step=015 action=move(direction='north') hp=100.0 - step=016 action=move(direction='south') hp=100.0 - step=017 action=move(direction='north') hp=100.0 - step=018 action=move(direction='south') hp=100.0 - step=019 action=move(direction='north') hp=100.0 - step=020 action=move(direction='west') hp=100.0 - step=021 action=move(direction='north') hp=100.0 - step=022 action=move(direction='west') hp=100.0 - step=023 action=move(direction='west') hp=100.0 - step=024 action=move(direction='west') hp=100.0 - step=025 action=move(direction='south') hp= 99.5 - step=026 action=move(direction='north') hp= 99.5 - step=027 action=wait() hp= 99.5 - step=028 action=wait() hp= 99.5 - step=029 action=wait() hp= 99.5 - step=030 action=wait() hp= 99.5 - step=031 action=move(direction='east') hp= 99.5 - step=032 action=move(direction='west') hp= 99.0 - step=033 action=move(direction='south') hp= 99.0 - step=034 action=move(direction='north') hp= 99.0 - step=035 action=wait() hp= 99.0 - step=036 action=wait() hp= 99.0 - step=037 action=wait() hp= 99.0 - step=038 action=move(direction='south') hp= 99.0 - step=039 action=move(direction='north') hp= 99.0 - step=040 action=move(direction='east') hp= 99.0 - step=041 action=move(direction='west') hp= 99.0 - step=042 action=wait() hp= 99.0 - step=043 action=wait() hp= 99.0 - step=044 action=move(direction='east') hp= 99.0 - step=045 action=move(direction='west') hp= 99.0 - step=046 action=move(direction='east') hp= 99.0 - step=047 action=move(direction='west') hp= 99.0 - step=048 action=wait() hp= 99.0 - step=049 action=wait() hp= 99.0 - step=050 action=wait() hp= 99.0 - step=051 action=wait() hp= 99.0 - step=052 action=move(direction='south') hp= 99.0 - step=053 action=move(direction='north') hp= 99.0 - step=054 action=wait() hp= 99.0 - step=055 action=move(direction='south') hp= 99.0 - step=056 action=wait() hp= 99.0 - step=057 action=move(direction='north') hp= 99.0 - step=058 action=wait() hp= 99.0 - step=059 action=wait() hp= 99.0 - step=060 action=move(direction='east') hp= 99.0 - step=061 action=move(direction='west') hp= 99.0 - step=062 action=move(direction='east') hp= 99.0 - step=063 action=move(direction='west') hp= 99.0 - step=064 action=wait() hp= 99.0 - step=065 action=wait() hp= 99.0 - step=066 action=wait() hp= 99.0 - step=067 action=move(direction='south') hp= 99.0 - step=068 action=move(direction='east') hp= 99.0 - step=069 action=move(direction='west') hp= 99.0 - step=070 action=wait() hp= 99.0 - step=071 action=move(direction='north') hp= 99.0 - step=072 action=move(direction='south') hp= 99.0 - step=073 action=wait() hp= 99.0 - step=074 action=move(direction='north') hp= 99.0 - step=075 action=wait() hp= 99.0 - step=076 action=move(direction='south') hp= 99.0 - step=077 action=wait() hp= 99.0 - step=078 action=wait() hp= 99.0 - step=079 action=move(direction='north') hp= 99.0 - step=080 action=wait() hp= 99.0 - step=081 action=move(direction='east') hp= 99.0 - step=082 action=move(direction='east') hp= 99.0 - step=083 action=move(direction='north') hp= 99.0 - step=084 action=wait() hp= 99.0 - step=085 action=move(direction='north') hp= 99.0 - step=086 action=move(direction='west') hp= 99.0 - step=087 action=move(direction='north') hp= 99.0 - step=088 action=wait() hp= 99.0 - step=089 action=wait() hp= 99.0 - step=090 action=wait() hp= 99.0 - step=091 action=wait() hp= 99.0 - step=092 action=wait() hp= 99.0 - step=093 action=move(direction='east') hp= 99.0 - step=094 action=move(direction='west') hp= 99.0 - step=095 action=move(direction='east') hp= 99.0 - step=096 action=move(direction='west') hp= 99.0 - step=097 action=move(direction='south') hp= 99.0 - step=098 action=move(direction='north') hp= 99.0 - step=099 action=wait() hp= 99.0 - step=100 action=wait() hp= 99.0 - step=101 action=move(direction='east') hp= 99.0 - step=102 action=move(direction='west') hp= 99.0 - step=103 action=wait() hp= 99.0 - step=104 action=wait() hp= 99.0 - step=105 action=wait() hp= 99.0 - step=106 action=wait() hp= 99.0 - step=107 action=move(direction='east') hp= 99.0 - step=108 action=move(direction='west') hp= 99.0 - step=109 action=move(direction='south') hp= 99.0 - step=110 action=move(direction='north') hp= 99.0 - step=111 action=move(direction='east') hp= 99.0 - step=112 action=move(direction='west') hp= 99.0 - step=113 action=move(direction='east') hp= 99.0 - step=114 action=move(direction='west') hp= 99.0 - step=115 action=move(direction='south') hp= 99.0 - step=116 action=move(direction='north') hp= 99.0 - step=117 action=wait() hp= 99.0 - step=118 action=wait() hp= 99.0 - step=119 action=move(direction='south') hp= 99.0 - step=120 action=move(direction='north') hp= 99.0 - step=121 action=wait() hp= 99.0 - step=122 action=wait() hp= 99.0 - step=123 action=move(direction='south') hp= 99.0 - step=124 action=move(direction='north') hp= 99.0 - step=125 action=wait() hp= 99.0 - step=126 action=wait() hp= 99.0 - step=127 action=wait() hp= 99.0 - step=128 action=wait() hp= 99.0 - step=129 action=wait() hp= 99.0 - step=130 action=move(direction='east') hp= 99.0 - step=131 action=move(direction='west') hp= 99.0 - step=132 action=wait() hp= 99.0 - step=133 action=move(direction='east') hp= 99.0 - step=134 action=wait() hp= 99.0 - step=135 action=move(direction='west') hp= 99.0 - step=136 action=wait() hp= 99.0 - step=137 action=move(direction='east') hp= 99.0 - step=138 action=move(direction='west') hp= 99.0 - step=139 action=move(direction='east') hp= 99.0 - step=140 action=move(direction='west') hp= 99.0 - step=141 action=wait() hp= 99.0 - step=142 action=move(direction='east') hp= 99.0 - step=143 action=move(direction='west') hp= 99.0 - step=144 action=wait() hp= 99.0 - step=145 action=wait() hp= 99.0 - step=146 action=wait() hp= 99.0 - step=147 action=wait() hp= 99.0 - step=148 action=wait() hp= 99.0 - step=149 action=move(direction='south') hp= 99.0 - step=150 action=move(direction='east') hp= 99.0 - step=151 action=move(direction='west') hp= 99.0 - step=152 action=move(direction='north') hp= 99.0 - step=153 action=move(direction='south') hp= 99.0 - step=154 action=wait() hp= 99.0 - step=155 action=move(direction='north') hp= 99.0 - step=156 action=move(direction='east') hp= 99.0 - step=157 action=wait() hp= 99.0 - step=158 action=move(direction='west') hp= 99.0 - step=159 action=wait() hp= 99.0 - step=160 action=move(direction='south') hp= 99.0 - step=161 action=move(direction='north') hp= 99.0 - step=162 action=wait() hp= 99.0 - step=163 action=wait() hp= 99.0 - step=164 action=move(direction='south') hp= 99.0 - step=165 action=move(direction='north') hp= 99.0 - step=166 action=wait() hp= 99.0 - step=167 action=move(direction='south') hp= 99.0 - step=168 action=move(direction='north') hp= 99.0 - step=169 action=move(direction='south') hp= 99.0 - step=170 action=move(direction='north') hp= 99.0 - step=171 action=wait() hp= 99.0 - step=172 action=wait() hp= 99.0 - step=173 action=move(direction='south') hp= 99.0 - step=174 action=wait() hp= 99.0 - step=175 action=move(direction='north') hp= 99.0 - step=176 action=move(direction='east') hp= 99.0 - step=177 action=move(direction='west') hp= 99.0 - step=178 action=wait() hp= 99.0 - step=179 action=wait() hp= 99.0 - step=180 action=wait() hp= 99.0 - step=181 action=wait() hp= 99.0 - step=182 action=wait() hp= 99.0 - step=183 action=wait() hp= 99.0 - step=184 action=wait() hp= 99.0 - step=185 action=wait() hp= 99.0 - step=186 action=wait() hp= 99.0 - step=187 action=wait() hp= 99.0 - step=188 action=wait() hp= 99.0 - step=189 action=wait() hp= 99.0 - step=190 action=wait() hp= 99.0 - step=191 action=move(direction='east') hp= 99.0 - step=192 action=move(direction='west') hp= 99.0 - step=193 action=move(direction='east') hp= 99.0 - step=194 action=move(direction='west') hp= 99.0 - step=195 action=wait() hp= 99.0 - step=196 action=wait() hp= 99.0 - step=197 action=wait() hp= 99.0 - step=198 action=move(direction='east') hp= 99.0 - step=199 action=wait() hp= 99.0 - step=200 action=move(direction='east') hp= 99.0 -ep=0092 [easy ] steps=200 reward= -20.100 evac=0 hp= 99.0 suc30=0.87 r30= +13.52 t=28s - step=001 action=wait() hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=door(target_id='door_3', door_state='close') hp=100.0 - step=004 action=move(direction='east') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=wait() hp=100.0 - step=010 action=move(direction='west') hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=move(direction='south') hp=100.0 - step=013 action=door(target_id='door_2', door_state='close') hp=100.0 - step=014 action=door(target_id='door_2', door_state='close') hp=100.0 - step=015 action=door(target_id='door_2', door_state='close') hp=100.0 - step=016 action=move(direction='north') hp=100.0 - step=017 action=move(direction='west') hp=100.0 - step=018 action=move(direction='west') hp=100.0 - step=019 action=wait() hp=100.0 - step=020 action=move(direction='west') hp=100.0 - step=021 action=door(target_id='door_1', door_state='open') hp=100.0 - step=022 action=wait() hp=100.0 - step=023 action=wait() hp=100.0 - step=024 action=move(direction='west') hp=100.0 -ep=0093 [easy ] steps=024 reward= +17.600 evac=1 hp=100.0 suc30=0.87 r30= +13.52 t=28s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=wait() hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='south') hp=100.0 - step=008 action=door(target_id='door_7', door_state='close') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=door(target_id='door_7', door_state='close') hp=100.0 - step=011 action=move(direction='north') hp=100.0 - step=012 action=move(direction='west') hp=100.0 - step=013 action=door(target_id='door_2', door_state='open') hp=100.0 - step=014 action=wait() hp=100.0 - step=015 action=move(direction='west') hp=100.0 - step=016 action=move(direction='west') hp=100.0 - step=017 action=move(direction='west') hp=100.0 - step=018 action=door(target_id='door_1', door_state='close') hp=100.0 - step=019 action=move(direction='west') hp=100.0 - step=020 action=move(direction='north') hp=100.0 - step=021 action=move(direction='west') hp=100.0 - step=022 action=move(direction='north') hp=100.0 - step=023 action=move(direction='west') hp=100.0 -ep=0094 [easy ] steps=023 reward= +17.950 evac=1 hp=100.0 suc30=0.87 r30= +13.52 t=28s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=wait() hp=100.0 - step=004 action=move(direction='east') hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=move(direction='north') hp=100.0 - step=007 action=wait() hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='north') hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=move(direction='north') hp=100.0 - step=013 action=move(direction='west') hp=100.0 - step=014 action=wait() hp=100.0 - step=015 action=move(direction='west') hp=100.0 - step=016 action=move(direction='north') hp=100.0 - step=017 action=move(direction='north') hp=100.0 - step=018 action=move(direction='north') hp=100.0 - step=019 action=move(direction='west') hp=100.0 - step=020 action=move(direction='north') hp=100.0 - step=021 action=move(direction='west') hp=100.0 - step=022 action=move(direction='west') hp=100.0 - step=023 action=move(direction='west') hp=100.0 - step=024 action=move(direction='west') hp=100.0 -ep=0095 [easy ] steps=024 reward= +21.620 evac=1 hp=100.0 suc30=0.87 r30= +13.54 t=28s - >> PPO update samples=flushed pi_loss=-0.0290 v_loss=28.5095 entropy=0.9965 kl=0.0008 clip%=0.00 lr=1.72e-04 - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=door(target_id='door_1', door_state='close') hp=100.0 - step=010 action=move(direction='west') hp=100.0 - step=011 action=move(direction='west') hp=100.0 -ep=0096 [easy ] steps=011 reward= +18.510 evac=1 hp=100.0 suc30=0.87 r30= +13.60 t=29s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='north') hp=100.0 - step=003 action=wait() hp=100.0 - step=004 action=door(target_id='door_1', door_state='close') hp=100.0 - step=005 action=move(direction='east') hp=100.0 - step=006 action=door(target_id='door_1', door_state='close') hp=100.0 - step=007 action=wait() hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='north') hp=100.0 - step=010 action=door(target_id='door_5', door_state='close') hp=100.0 - step=011 action=door(target_id='door_5', door_state='close') hp=100.0 - step=012 action=door(target_id='door_5', door_state='close') hp=100.0 - step=013 action=move(direction='north') hp=100.0 - step=014 action=move(direction='west') hp=100.0 -ep=0097 [easy ] steps=014 reward= +17.950 evac=1 hp=100.0 suc30=0.87 r30= +13.64 t=29s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=wait() hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=wait() hp=100.0 - step=010 action=move(direction='south') hp=100.0 - step=011 action=move(direction='north') hp=100.0 - step=012 action=move(direction='north') hp=100.0 - step=013 action=move(direction='north') hp=100.0 - step=014 action=move(direction='north') hp=100.0 - step=015 action=move(direction='north') hp=100.0 - step=016 action=move(direction='west') hp=100.0 -ep=0098 [easy ] steps=016 reward= +19.970 evac=1 hp=100.0 suc30=0.87 r30= +13.67 t=29s - step=001 action=wait() hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 -ep=0099 [easy ] steps=004 reward= +17.550 evac=1 hp=100.0 suc30=0.87 r30= +13.67 t=29s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='east') hp=100.0 - step=003 action=door(target_id='door_5', door_state='open') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=door(target_id='door_1', door_state='open') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=move(direction='south') hp=100.0 - step=010 action=move(direction='west') hp=100.0 - step=011 action=door(target_id='door_1', door_state='open') hp=100.0 - step=012 action=move(direction='north') hp=100.0 - step=013 action=move(direction='north') hp=100.0 - step=014 action=wait() hp=100.0 - step=015 action=move(direction='west') hp=100.0 -ep=0100 [easy ] steps=015 reward= +18.600 evac=1 hp=100.0 suc30=0.90 r30= +15.00 t=29s - >> PPO update samples=flushed pi_loss=-0.0025 v_loss=4.3529 entropy=1.2106 kl=0.0001 clip%=0.00 lr=1.65e-04 - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp= 85.0 - step=008 action=move(direction='west') hp= 84.5 - step=009 action=move(direction='west') hp= 84.5 - step=010 action=move(direction='west') hp= 84.5 - step=011 action=move(direction='west') hp= 84.5 - step=012 action=move(direction='north') hp= 84.5 - step=013 action=move(direction='north') hp= 84.5 - step=014 action=move(direction='north') hp= 84.5 - step=015 action=move(direction='north') hp= 84.5 - step=016 action=move(direction='north') hp= 84.5 - step=017 action=move(direction='north') hp= 84.5 - step=018 action=move(direction='north') hp= 84.5 - step=019 action=move(direction='west') hp= 84.5 - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=wait() hp=100.0 - step=008 action=wait() hp=100.0 - step=009 action=wait() hp=100.0 - step=010 action=wait() hp=100.0 - step=011 action=wait() hp=100.0 - step=012 action=wait() hp=100.0 - step=013 action=wait() hp=100.0 - step=014 action=wait() hp=100.0 - step=015 action=wait() hp=100.0 - step=016 action=wait() hp=100.0 - step=017 action=wait() hp=100.0 - step=018 action=wait() hp=100.0 - step=019 action=wait() hp=100.0 - step=020 action=wait() hp=100.0 - step=021 action=wait() hp=100.0 - step=022 action=wait() hp=100.0 - step=023 action=wait() hp= 99.5 - step=024 action=wait() hp= 99.0 - step=025 action=wait() hp= 98.5 - step=026 action=wait() hp= 98.0 - step=027 action=wait() hp= 97.5 - step=028 action=wait() hp= 97.0 - step=029 action=wait() hp= 96.5 - step=030 action=wait() hp= 96.0 - step=031 action=wait() hp= 95.5 - step=032 action=wait() hp= 95.0 - step=033 action=wait() hp= 94.5 - step=034 action=wait() hp= 94.0 - step=035 action=wait() hp= 92.0 - step=036 action=wait() hp= 90.0 - step=037 action=wait() hp= 88.0 - step=038 action=wait() hp= 86.0 - step=039 action=wait() hp= 84.0 - step=040 action=wait() hp= 82.0 - step=041 action=wait() hp= 80.0 - step=042 action=wait() hp= 78.0 - step=043 action=wait() hp= 76.0 - step=044 action=wait() hp= 74.0 - step=045 action=wait() hp= 72.0 - step=046 action=wait() hp= 70.0 - step=047 action=wait() hp= 68.0 - step=048 action=wait() hp= 66.0 - step=049 action=wait() hp= 64.0 - step=050 action=wait() hp= 63.5 - step=051 action=wait() hp= 63.0 - step=052 action=wait() hp= 62.5 - step=053 action=wait() hp= 62.0 - step=054 action=wait() hp= 61.5 - step=055 action=wait() hp= 61.0 - step=056 action=wait() hp= 60.5 - step=057 action=wait() hp= 60.0 - step=058 action=wait() hp= 59.5 - step=059 action=wait() hp= 59.0 - step=060 action=wait() hp= 58.5 - step=061 action=wait() hp= 58.5 - step=062 action=wait() hp= 58.5 - step=063 action=wait() hp= 58.5 - step=064 action=wait() hp= 58.5 - step=065 action=wait() hp= 58.5 - step=066 action=wait() hp= 58.5 - step=067 action=wait() hp= 58.5 - step=068 action=wait() hp= 58.5 - step=069 action=wait() hp= 58.5 - step=070 action=wait() hp= 58.5 - step=071 action=wait() hp= 58.5 - step=072 action=wait() hp= 58.5 - step=073 action=wait() hp= 58.5 - step=074 action=wait() hp= 58.5 - step=075 action=wait() hp= 58.5 - step=076 action=wait() hp= 58.5 - step=077 action=wait() hp= 58.5 - step=078 action=wait() hp= 58.5 - step=079 action=wait() hp= 58.5 - step=080 action=wait() hp= 58.5 - step=081 action=wait() hp= 58.5 - step=082 action=wait() hp= 58.5 - step=083 action=wait() hp= 58.5 - step=084 action=wait() hp= 58.5 - step=085 action=wait() hp= 58.5 - step=086 action=wait() hp= 58.5 - step=087 action=wait() hp= 58.5 - step=088 action=wait() hp= 58.5 - step=089 action=wait() hp= 58.5 - step=090 action=wait() hp= 58.5 - step=091 action=wait() hp= 58.5 - step=092 action=wait() hp= 58.5 - step=093 action=wait() hp= 58.5 - step=094 action=wait() hp= 58.5 - step=095 action=wait() hp= 58.5 - step=096 action=wait() hp= 58.5 - step=097 action=wait() hp= 58.5 - step=098 action=wait() hp= 58.5 - step=099 action=wait() hp= 58.5 - step=100 action=wait() hp= 58.5 - step=101 action=wait() hp= 58.5 - step=102 action=wait() hp= 58.5 - step=103 action=wait() hp= 58.5 - step=104 action=wait() hp= 58.5 - step=105 action=wait() hp= 58.5 - step=106 action=wait() hp= 58.5 - step=107 action=wait() hp= 58.5 - step=108 action=wait() hp= 58.5 - step=109 action=wait() hp= 58.5 - step=110 action=wait() hp= 58.5 - step=111 action=wait() hp= 58.5 - step=112 action=wait() hp= 58.5 - step=113 action=wait() hp= 58.5 - step=114 action=wait() hp= 58.5 - step=115 action=wait() hp= 58.5 - step=116 action=wait() hp= 58.5 - step=117 action=wait() hp= 58.5 - step=118 action=wait() hp= 58.5 - step=119 action=wait() hp= 58.5 - step=120 action=wait() hp= 58.5 - step=121 action=wait() hp= 58.5 - step=122 action=wait() hp= 58.5 - step=123 action=wait() hp= 58.5 - step=124 action=wait() hp= 58.5 - step=125 action=wait() hp= 58.5 - step=126 action=wait() hp= 58.5 - step=127 action=wait() hp= 58.5 - step=128 action=wait() hp= 58.5 - step=129 action=wait() hp= 58.5 - step=130 action=wait() hp= 58.5 - step=131 action=wait() hp= 58.5 - step=132 action=wait() hp= 58.5 - step=133 action=wait() hp= 58.5 - step=134 action=wait() hp= 58.5 - step=135 action=wait() hp= 58.5 - step=136 action=wait() hp= 58.5 - step=137 action=wait() hp= 58.5 - step=138 action=wait() hp= 58.5 - step=139 action=wait() hp= 58.5 - step=140 action=wait() hp= 58.5 - step=141 action=wait() hp= 58.5 - step=142 action=wait() hp= 58.5 - step=143 action=wait() hp= 58.5 - step=144 action=wait() hp= 58.5 - step=145 action=wait() hp= 58.5 - step=146 action=wait() hp= 58.5 - step=147 action=wait() hp= 58.5 - step=148 action=wait() hp= 58.5 - step=149 action=wait() hp= 58.5 - step=150 action=wait() hp= 58.5 - ** EVAL [medium] reward=+6.008 success=0.67 steps=57.0 - [ckpt] saved -> artifacts/pyre_ppo_http_ckpt.pt - step=001 action=wait() hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=move(direction='east') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=wait() hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=move(direction='north') hp=100.0 - step=010 action=wait() hp=100.0 - step=011 action=move(direction='north') hp=100.0 - step=012 action=wait() hp=100.0 - step=013 action=move(direction='north') hp=100.0 - step=014 action=move(direction='north') hp=100.0 - step=015 action=wait() hp=100.0 - step=016 action=move(direction='west') hp=100.0 -ep=0101 [medium] steps=016 reward= +16.310 evac=1 hp=100.0 suc30=0.90 r30= +14.90 t=30s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=door(target_id='door_3', door_state='open') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=wait() hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=wait() hp= 85.0 - step=010 action=wait() hp= 70.0 - step=011 action=wait() hp= 55.0 - step=012 action=wait() hp= 50.0 - step=013 action=wait() hp= 45.0 - step=014 action=wait() hp= 40.0 - step=015 action=move(direction='south') hp= 35.0 - step=016 action=move(direction='west') hp= 30.0 - step=017 action=wait() hp= 29.5 - step=018 action=move(direction='west') hp= 29.0 - step=019 action=move(direction='south') hp= 29.0 - step=020 action=move(direction='west') hp= 29.0 - step=021 action=wait() hp= 29.0 - step=022 action=door(target_id='door_2', door_state='open') hp= 29.0 - step=023 action=move(direction='south') hp= 29.0 - step=024 action=move(direction='north') hp= 29.0 - step=025 action=door(target_id='door_2', door_state='open') hp= 29.0 - step=026 action=move(direction='east') hp= 29.0 - step=027 action=wait() hp= 29.0 - step=028 action=wait() hp= 29.0 - step=029 action=move(direction='west') hp= 29.0 - step=030 action=move(direction='north') hp= 29.0 - step=031 action=door(target_id='door_1', door_state='open') hp= 29.0 - step=032 action=move(direction='west') hp= 29.0 - step=033 action=move(direction='west') hp= 29.0 - step=034 action=move(direction='west') hp= 29.0 - step=035 action=move(direction='west') hp= 29.0 -ep=0102 [medium] steps=035 reward= +9.635 evac=1 hp= 29.0 suc30=0.90 r30= +14.60 t=30s - step=001 action=door(target_id='door_3', door_state='close') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=wait() hp=100.0 - step=006 action=door(target_id='door_4', door_state='close') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=wait() hp=100.0 - step=010 action=move(direction='west') hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=wait() hp=100.0 - step=013 action=door(target_id='door_2', door_state='close') hp=100.0 - step=014 action=move(direction='west') hp=100.0 - step=015 action=door(target_id='door_1', door_state='close') hp=100.0 - step=016 action=move(direction='west') hp=100.0 - step=017 action=move(direction='west') hp=100.0 - step=018 action=move(direction='west') hp=100.0 - step=019 action=move(direction='west') hp=100.0 - step=020 action=move(direction='north') hp=100.0 - step=021 action=door(target_id='door_1', door_state='close') hp=100.0 - step=022 action=move(direction='north') hp=100.0 - step=023 action=move(direction='west') hp=100.0 - step=024 action=wait() hp=100.0 - step=025 action=wait() hp=100.0 - step=026 action=move(direction='east') hp=100.0 - step=027 action=door(target_id='door_1', door_state='open') hp=100.0 - step=028 action=wait() hp=100.0 - step=029 action=move(direction='north') hp=100.0 - step=030 action=move(direction='north') hp=100.0 - step=031 action=move(direction='west') hp=100.0 - step=032 action=move(direction='south') hp=100.0 - step=033 action=door(target_id='door_1', door_state='close') hp=100.0 - step=034 action=move(direction='north') hp=100.0 - step=035 action=move(direction='south') hp=100.0 - step=036 action=move(direction='east') hp=100.0 - step=037 action=move(direction='east') hp=100.0 - step=038 action=move(direction='west') hp=100.0 - step=039 action=move(direction='west') hp=100.0 - step=040 action=move(direction='south') hp=100.0 - step=041 action=move(direction='north') hp=100.0 - step=042 action=door(target_id='door_1', door_state='close') hp=100.0 - step=043 action=wait() hp=100.0 - step=044 action=door(target_id='door_1', door_state='close') hp=100.0 - step=045 action=wait() hp=100.0 - step=046 action=move(direction='north') hp=100.0 - step=047 action=door(target_id='door_1', door_state='close') hp=100.0 - step=048 action=wait() hp=100.0 - step=049 action=wait() hp=100.0 - step=050 action=move(direction='east') hp= 99.5 - step=051 action=move(direction='west') hp= 84.5 - step=052 action=move(direction='south') hp= 69.5 - step=053 action=move(direction='north') hp= 54.5 - step=054 action=move(direction='south') hp= 39.5 - step=055 action=move(direction='north') hp= 34.5 - step=056 action=wait() hp= 29.5 - step=057 action=wait() hp= 24.5 - step=058 action=wait() hp= 19.5 - step=059 action=wait() hp= 14.5 - step=060 action=wait() hp= 9.5 - step=061 action=wait() hp= 4.5 -ep=0103 [medium] steps=061 reward= -15.470 evac=0 hp= 0.0 suc30=0.87 r30= +13.48 t=30s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=wait() hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='south') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='east') hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=move(direction='north') hp=100.0 - step=013 action=move(direction='north') hp=100.0 - step=014 action=move(direction='west') hp=100.0 -ep=0104 [medium] steps=014 reward= +17.030 evac=1 hp=100.0 suc30=0.87 r30= +13.40 t=30s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=door(target_id='door_2', door_state='close') hp=100.0 - step=005 action=door(target_id='door_2', door_state='close') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='east') hp=100.0 - step=008 action=door(target_id='door_2', door_state='close') hp=100.0 - step=009 action=move(direction='north') hp=100.0 - step=010 action=wait() hp= 99.5 - step=011 action=wait() hp= 99.0 - step=012 action=wait() hp= 98.5 - step=013 action=move(direction='south') hp= 98.0 - step=014 action=wait() hp= 98.0 - step=015 action=move(direction='north') hp= 97.5 - step=016 action=move(direction='south') hp= 97.5 - step=017 action=move(direction='south') hp= 97.0 - step=018 action=move(direction='north') hp= 97.0 - step=019 action=wait() hp= 95.0 - step=020 action=move(direction='north') hp= 94.5 - step=021 action=wait() hp= 94.0 - step=022 action=move(direction='south') hp= 93.5 - step=023 action=move(direction='north') hp= 78.5 - step=024 action=move(direction='south') hp= 76.5 - step=025 action=wait() hp= 61.5 - step=026 action=move(direction='north') hp= 56.5 - step=027 action=wait() hp= 41.5 - step=028 action=wait() hp= 26.5 - step=029 action=wait() hp= 11.5 - step=030 action=wait() hp= 6.5 - step=031 action=wait() hp= 1.5 -ep=0105 [medium] steps=031 reward= -18.740 evac=0 hp= 0.0 suc30=0.83 r30= +12.22 t=30s - >> PPO update samples=flushed pi_loss=+0.0005 v_loss=79.8427 entropy=1.0409 kl=0.0001 clip%=0.00 lr=1.58e-04 - step=001 action=door(target_id='door_3', door_state='open') hp=100.0 - step=002 action=door(target_id='door_3', door_state='close') hp=100.0 - step=003 action=door(target_id='door_3', door_state='open') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp= 88.0 - step=008 action=move(direction='west') hp= 83.0 - step=009 action=move(direction='south') hp= 68.0 - step=010 action=move(direction='north') hp= 66.0 - step=011 action=move(direction='north') hp= 51.0 - step=012 action=wait() hp= 36.0 - step=013 action=move(direction='west') hp= 21.0 - step=014 action=move(direction='west') hp= 6.0 - step=015 action=move(direction='west') hp= 5.5 - step=016 action=move(direction='south') hp= 5.5 - step=017 action=wait() hp= 5.0 - step=018 action=door(target_id='door_5', door_state='close') hp= 4.5 - step=019 action=move(direction='north') hp= 4.0 -ep=0106 [medium] steps=019 reward= -15.590 evac=0 hp= 0.0 suc30=0.80 r30= +11.11 t=31s - step=001 action=wait() hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=wait() hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='north') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='north') hp=100.0 - step=011 action=move(direction='north') hp=100.0 - step=012 action=move(direction='west') hp=100.0 - step=013 action=move(direction='west') hp=100.0 - step=014 action=move(direction='north') hp=100.0 - step=015 action=move(direction='west') hp=100.0 - step=016 action=move(direction='west') hp=100.0 - step=017 action=move(direction='west') hp=100.0 -ep=0107 [medium] steps=017 reward= +17.390 evac=1 hp=100.0 suc30=0.80 r30= +10.94 t=31s - step=001 action=door(target_id='door_3', door_state='open') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp= 85.0 - step=007 action=door(target_id='door_2', door_state='close') hp= 84.5 - step=008 action=move(direction='west') hp= 84.0 - step=009 action=door(target_id='door_1', door_state='open') hp= 83.5 - step=010 action=move(direction='west') hp= 83.0 - step=011 action=wait() hp= 83.0 - step=012 action=door(target_id='door_2', door_state='close') hp= 83.0 - step=013 action=move(direction='west') hp= 83.0 - step=014 action=door(target_id='door_1', door_state='open') hp= 83.0 - step=015 action=door(target_id='door_1', door_state='open') hp= 83.0 - step=016 action=move(direction='east') hp= 83.0 - step=017 action=wait() hp= 82.5 - step=018 action=move(direction='west') hp= 80.5 - step=019 action=move(direction='west') hp= 80.0 - step=020 action=wait() hp= 80.0 - step=021 action=move(direction='west') hp= 80.0 -ep=0108 [medium] steps=021 reward= +11.310 evac=1 hp= 80.0 suc30=0.80 r30= +10.76 t=31s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='south') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='north') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='north') hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=move(direction='west') hp=100.0 - step=013 action=wait() hp=100.0 - step=014 action=door(target_id='door_5', door_state='close') hp=100.0 - step=015 action=move(direction='west') hp=100.0 -ep=0109 [medium] steps=015 reward= +15.850 evac=1 hp=100.0 suc30=0.80 r30= +10.66 t=31s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=wait() hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp=100.0 -ep=0110 [medium] steps=007 reward= +16.100 evac=1 hp=100.0 suc30=0.80 r30= +10.47 t=31s - >> PPO update samples=flushed pi_loss=+0.0046 v_loss=59.2558 entropy=1.1233 kl=0.0003 clip%=0.00 lr=1.51e-04 - step=001 action=door(target_id='door_3', door_state='close') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=door(target_id='door_2', door_state='close') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='south') hp=100.0 - step=009 action=move(direction='south') hp=100.0 - step=010 action=door(target_id='door_2', door_state='close') hp=100.0 - step=011 action=wait() hp=100.0 - step=012 action=wait() hp= 99.5 - step=013 action=move(direction='north') hp= 99.0 - step=014 action=move(direction='south') hp= 99.0 - step=015 action=move(direction='north') hp= 98.5 - step=016 action=move(direction='south') hp= 98.0 - step=017 action=move(direction='east') hp= 97.5 - step=018 action=move(direction='west') hp= 97.0 - step=019 action=wait() hp= 96.5 - step=020 action=wait() hp= 96.0 - step=021 action=wait() hp= 95.5 - step=022 action=move(direction='north') hp= 95.0 - step=023 action=move(direction='north') hp= 94.5 - step=024 action=move(direction='west') hp= 94.0 - step=025 action=move(direction='west') hp= 94.0 - step=026 action=move(direction='west') hp= 94.0 - step=027 action=move(direction='west') hp= 94.0 -ep=0111 [medium] steps=027 reward= +14.950 evac=1 hp= 94.0 suc30=0.80 r30= +10.37 t=31s - step=001 action=move(direction='south') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=door(target_id='door_8', door_state='close') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=wait() hp=100.0 - step=011 action=wait() hp=100.0 - step=012 action=move(direction='west') hp=100.0 - step=013 action=move(direction='west') hp=100.0 - step=014 action=move(direction='north') hp=100.0 - step=015 action=move(direction='west') hp=100.0 - step=016 action=move(direction='west') hp=100.0 - step=017 action=door(target_id='door_1', door_state='close') hp=100.0 - step=018 action=move(direction='north') hp=100.0 - step=019 action=move(direction='west') hp=100.0 -ep=0112 [medium] steps=019 reward= +15.740 evac=1 hp=100.0 suc30=0.80 r30= +10.32 t=31s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='south') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='south') hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=move(direction='south') hp=100.0 - step=010 action=move(direction='north') hp=100.0 - step=011 action=move(direction='east') hp=100.0 - step=012 action=wait() hp=100.0 - step=013 action=move(direction='east') hp=100.0 - step=014 action=move(direction='west') hp=100.0 - step=015 action=move(direction='west') hp=100.0 - step=016 action=move(direction='west') hp=100.0 - step=017 action=move(direction='north') hp=100.0 - step=018 action=move(direction='west') hp=100.0 - step=019 action=move(direction='north') hp=100.0 - step=020 action=move(direction='north') hp=100.0 - step=021 action=wait() hp=100.0 - step=022 action=move(direction='north') hp=100.0 - step=023 action=move(direction='north') hp=100.0 - step=024 action=move(direction='north') hp=100.0 - step=025 action=wait() hp=100.0 - step=026 action=wait() hp=100.0 - step=027 action=move(direction='north') hp=100.0 - step=028 action=move(direction='north') hp=100.0 - step=029 action=move(direction='north') hp=100.0 - step=030 action=move(direction='north') hp=100.0 - step=031 action=move(direction='north') hp=100.0 - step=032 action=move(direction='north') hp=100.0 - step=033 action=move(direction='west') hp=100.0 -ep=0113 [medium] steps=033 reward= +17.970 evac=1 hp=100.0 suc30=0.80 r30= +10.29 t=31s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 -ep=0114 [medium] steps=003 reward= +15.160 evac=1 hp=100.0 suc30=0.80 r30= +10.19 t=31s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='south') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp=100.0 -ep=0115 [medium] steps=007 reward= +16.480 evac=1 hp=100.0 suc30=0.80 r30= +10.13 t=31s - >> PPO update samples=flushed pi_loss=-0.0026 v_loss=1.6322 entropy=1.0911 kl=0.0002 clip%=0.00 lr=1.45e-04 - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=wait() hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=move(direction='north') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='east') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='north') hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=move(direction='north') hp=100.0 - step=013 action=move(direction='north') hp=100.0 - step=014 action=move(direction='north') hp=100.0 - step=015 action=wait() hp=100.0 - step=016 action=move(direction='west') hp=100.0 - step=017 action=wait() hp=100.0 - step=018 action=move(direction='north') hp=100.0 - step=019 action=move(direction='north') hp=100.0 - step=020 action=move(direction='west') hp=100.0 -ep=0116 [medium] steps=020 reward= +17.860 evac=1 hp=100.0 suc30=0.80 r30= +10.06 t=32s - step=001 action=door(target_id='door_4', door_state='close') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='west') hp= 99.5 - step=011 action=door(target_id='door_2', door_state='open') hp= 99.5 - step=012 action=move(direction='west') hp= 99.5 - step=013 action=move(direction='west') hp= 99.5 - step=014 action=move(direction='west') hp= 99.5 -ep=0117 [medium] steps=014 reward= +15.042 evac=1 hp= 99.5 suc30=0.80 r30= +9.98 t=32s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='north') hp=100.0 - step=003 action=move(direction='south') hp=100.0 - step=004 action=door(target_id='door_5', door_state='close') hp=100.0 - step=005 action=wait() hp=100.0 - step=006 action=door(target_id='door_5', door_state='close') hp=100.0 - step=007 action=door(target_id='door_5', door_state='close') hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=move(direction='north') hp=100.0 - step=010 action=move(direction='west') hp=100.0 -ep=0118 [medium] steps=010 reward= +15.190 evac=1 hp=100.0 suc30=0.83 r30= +11.13 t=32s - step=001 action=move(direction='north') hp=100.0 - step=002 action=wait() hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp= 88.0 - step=008 action=move(direction='north') hp= 87.5 - step=009 action=move(direction='west') hp= 72.5 - step=010 action=wait() hp= 72.0 - step=011 action=move(direction='west') hp= 60.0 - step=012 action=move(direction='west') hp= 59.5 - step=013 action=wait() hp= 59.0 - step=014 action=move(direction='east') hp= 58.5 - step=015 action=move(direction='north') hp= 56.5 - step=016 action=move(direction='west') hp= 56.0 - step=017 action=move(direction='north') hp= 55.5 - step=018 action=move(direction='north') hp= 55.5 - step=019 action=move(direction='east') hp= 55.5 - step=020 action=move(direction='north') hp= 55.5 - step=021 action=wait() hp= 55.5 - step=022 action=move(direction='west') hp= 55.5 - step=023 action=move(direction='north') hp= 55.5 - step=024 action=move(direction='west') hp= 55.5 -ep=0119 [medium] steps=024 reward= +10.373 evac=1 hp= 55.5 suc30=0.87 r30= +11.89 t=32s - step=001 action=move(direction='west') hp=100.0 - step=002 action=door(target_id='door_2', door_state='close') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=wait() hp=100.0 - step=005 action=wait() hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='west') hp=100.0 -ep=0120 [medium] steps=008 reward= +15.620 evac=1 hp=100.0 suc30=0.87 r30= +11.84 t=32s - >> PPO update samples=flushed pi_loss=-0.0013 v_loss=2.5435 entropy=1.0417 kl=0.0001 clip%=0.00 lr=1.38e-04 - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp= 99.5 - step=007 action=move(direction='west') hp= 99.5 - step=008 action=move(direction='west') hp= 99.5 - step=009 action=move(direction='west') hp= 99.5 - step=010 action=move(direction='north') hp= 99.5 - step=011 action=move(direction='north') hp= 99.5 - step=012 action=move(direction='west') hp= 99.5 - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='west') hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=move(direction='west') hp=100.0 - step=013 action=move(direction='west') hp=100.0 - step=014 action=move(direction='north') hp=100.0 - step=015 action=move(direction='north') hp=100.0 - step=016 action=move(direction='north') hp= 99.5 - step=017 action=wait() hp= 99.0 - step=018 action=wait() hp= 98.5 - step=019 action=wait() hp= 98.0 - step=020 action=wait() hp= 97.5 - step=021 action=wait() hp= 97.5 - step=022 action=wait() hp= 97.5 - step=023 action=wait() hp= 97.5 - step=024 action=wait() hp= 97.5 - step=025 action=wait() hp= 97.5 - step=026 action=wait() hp= 97.5 - step=027 action=wait() hp= 97.5 - step=028 action=wait() hp= 97.5 - step=029 action=wait() hp= 97.5 - step=030 action=wait() hp= 97.5 - step=031 action=wait() hp= 97.5 - step=032 action=wait() hp= 97.5 - step=033 action=wait() hp= 97.5 - step=034 action=wait() hp= 97.5 - step=035 action=wait() hp= 97.5 - step=036 action=wait() hp= 97.5 - step=037 action=wait() hp= 97.5 - step=038 action=wait() hp= 97.5 - step=039 action=wait() hp= 97.5 - step=040 action=wait() hp= 97.5 - step=041 action=wait() hp= 97.5 - step=042 action=wait() hp= 97.5 - step=043 action=wait() hp= 97.5 - step=044 action=wait() hp= 97.5 - step=045 action=wait() hp= 97.5 - step=046 action=wait() hp= 97.5 - step=047 action=wait() hp= 97.5 - step=048 action=wait() hp= 97.5 - step=049 action=wait() hp= 97.5 - step=050 action=wait() hp= 97.5 - step=051 action=wait() hp= 97.5 - step=052 action=wait() hp= 97.5 - step=053 action=wait() hp= 97.5 - step=054 action=wait() hp= 97.5 - step=055 action=wait() hp= 97.5 - step=056 action=wait() hp= 97.5 - step=057 action=wait() hp= 97.5 - step=058 action=wait() hp= 97.5 - step=059 action=wait() hp= 97.5 - step=060 action=wait() hp= 97.5 - step=061 action=wait() hp= 97.5 - step=062 action=wait() hp= 97.5 - step=063 action=wait() hp= 97.5 - step=064 action=wait() hp= 97.5 - step=065 action=wait() hp= 97.5 - step=066 action=wait() hp= 97.5 - step=067 action=wait() hp= 97.5 - step=068 action=wait() hp= 97.0 - step=069 action=wait() hp= 96.5 - step=070 action=wait() hp= 96.0 - step=071 action=wait() hp= 95.5 - step=072 action=wait() hp= 95.0 - step=073 action=wait() hp= 83.0 - step=074 action=wait() hp= 68.0 - step=075 action=wait() hp= 53.0 - step=076 action=wait() hp= 38.0 - step=077 action=wait() hp= 23.0 - step=078 action=wait() hp= 18.0 - step=079 action=wait() hp= 13.0 - step=080 action=wait() hp= 8.0 - step=081 action=wait() hp= 3.0 - step=001 action=move(direction='north') hp=100.0 - step=002 action=move(direction='north') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=move(direction='north') hp=100.0 - ** EVAL [medium] reward=+6.401 success=0.67 steps=32.7 - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=door(target_id='door_2', door_state='close') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=wait() hp=100.0 - step=009 action=wait() hp=100.0 - step=010 action=door(target_id='door_1', door_state='close') hp=100.0 - step=011 action=move(direction='south') hp=100.0 - step=012 action=door(target_id='door_1', door_state='open') hp=100.0 - step=013 action=move(direction='south') hp=100.0 - step=014 action=move(direction='north') hp=100.0 - step=015 action=move(direction='west') hp=100.0 -ep=0121 [medium] steps=015 reward= +16.250 evac=1 hp=100.0 suc30=0.87 r30= +11.80 t=32s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=move(direction='north') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='east') hp= 99.5 - step=009 action=move(direction='west') hp= 99.5 - step=010 action=move(direction='north') hp= 99.5 - step=011 action=move(direction='west') hp= 99.5 - step=012 action=move(direction='west') hp= 99.5 - step=013 action=move(direction='west') hp= 99.0 - step=014 action=move(direction='west') hp= 99.0 -ep=0122 [medium] steps=014 reward= +16.135 evac=1 hp= 99.0 suc30=0.90 r30= +13.01 t=32s - step=001 action=move(direction='west') hp=100.0 -ep=0123 [medium] steps=001 reward= +14.260 evac=1 hp=100.0 suc30=0.90 r30= +12.90 t=32s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='south') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=wait() hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=door(target_id='door_1', door_state='close') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=door(target_id='door_1', door_state='close') hp=100.0 - step=010 action=move(direction='north') hp=100.0 - step=011 action=wait() hp=100.0 - step=012 action=move(direction='north') hp=100.0 - step=013 action=move(direction='west') hp=100.0 -ep=0124 [medium] steps=013 reward= +16.720 evac=1 hp=100.0 suc30=0.90 r30= +12.86 t=33s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='north') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='north') hp=100.0 - step=010 action=move(direction='west') hp=100.0 - step=011 action=move(direction='north') hp= 99.5 - step=012 action=move(direction='north') hp= 99.5 - step=013 action=move(direction='south') hp= 99.5 - step=014 action=wait() hp= 99.0 - step=015 action=wait() hp= 87.0 - step=016 action=move(direction='north') hp= 72.0 - step=017 action=move(direction='north') hp= 71.5 - step=018 action=move(direction='north') hp= 71.5 - step=019 action=move(direction='north') hp= 71.5 - step=020 action=move(direction='east') hp= 71.5 - step=021 action=move(direction='west') hp= 71.5 - step=022 action=move(direction='east') hp= 71.5 - step=023 action=move(direction='east') hp= 71.5 - step=024 action=move(direction='west') hp= 71.5 - step=025 action=move(direction='west') hp= 71.5 - step=026 action=wait() hp= 71.5 - step=027 action=move(direction='north') hp= 71.0 - step=028 action=move(direction='south') hp= 71.0 - step=029 action=move(direction='north') hp= 56.0 - step=030 action=move(direction='west') hp= 55.5 -ep=0125 [medium] steps=030 reward= +10.393 evac=1 hp= 55.5 suc30=0.90 r30= +12.48 t=33s - >> PPO update samples=flushed pi_loss=+0.0014 v_loss=5.3116 entropy=1.1161 kl=0.0002 clip%=0.00 lr=1.31e-04 - [ckpt] saved -> artifacts/pyre_ppo_http_ckpt.pt - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=door(target_id='door_1', door_state='close') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='west') hp=100.0 -ep=0126 [medium] steps=008 reward= +16.920 evac=1 hp=100.0 suc30=0.90 r30= +12.43 t=33s - step=001 action=move(direction='north') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=door(target_id='door_7', door_state='close') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='north') hp=100.0 - step=011 action=wait() hp=100.0 - step=012 action=move(direction='west') hp=100.0 - step=013 action=move(direction='north') hp= 98.0 - step=014 action=move(direction='east') hp= 96.0 - step=015 action=door(target_id='door_6', door_state='close') hp= 95.5 - step=016 action=move(direction='west') hp= 95.0 - step=017 action=move(direction='north') hp= 93.0 - step=018 action=door(target_id='door_2', door_state='close') hp= 78.0 - step=019 action=wait() hp= 63.0 - step=020 action=door(target_id='door_2', door_state='open') hp= 48.0 - step=021 action=door(target_id='door_2', door_state='close') hp= 43.0 - step=022 action=door(target_id='door_2', door_state='open') hp= 38.0 - step=023 action=door(target_id='door_2', door_state='close') hp= 33.0 - step=024 action=move(direction='south') hp= 28.0 - step=025 action=move(direction='east') hp= 13.0 -ep=0127 [medium] steps=025 reward= -13.340 evac=0 hp= 0.0 suc30=0.87 r30= +11.39 t=33s - step=001 action=wait() hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp=100.0 -ep=0128 [medium] steps=007 reward= +16.100 evac=1 hp=100.0 suc30=0.87 r30= +11.26 t=33s - step=001 action=move(direction='west') hp=100.0 - step=002 action=door(target_id='door_3', door_state='close') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=door(target_id='door_3', door_state='close') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='east') hp=100.0 - step=008 action=wait() hp=100.0 - step=009 action=move(direction='west') hp= 99.5 - step=010 action=move(direction='west') hp= 99.5 - step=011 action=wait() hp= 99.0 - step=012 action=move(direction='north') hp= 98.5 - step=013 action=move(direction='north') hp= 98.5 - step=014 action=move(direction='north') hp= 98.5 - step=015 action=wait() hp= 98.5 - step=016 action=move(direction='north') hp= 98.5 - step=017 action=move(direction='north') hp= 98.5 - step=018 action=move(direction='north') hp= 98.5 - step=019 action=move(direction='north') hp= 98.5 -ep=0129 [medium] steps=019 reward= +14.168 evac=1 hp= 98.5 suc30=0.87 r30= +11.15 t=33s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=wait() hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='north') hp=100.0 - step=008 action=door(target_id='door_1', door_state='close') hp=100.0 - step=009 action=move(direction='north') hp=100.0 - step=010 action=door(target_id='door_1', door_state='open') hp=100.0 - step=011 action=move(direction='north') hp=100.0 - step=012 action=move(direction='west') hp=100.0 - step=013 action=move(direction='north') hp=100.0 - step=014 action=wait() hp=100.0 - step=015 action=door(target_id='door_1', door_state='close') hp=100.0 - step=016 action=move(direction='east') hp=100.0 - step=017 action=move(direction='west') hp=100.0 - step=018 action=wait() hp=100.0 - step=019 action=door(target_id='door_1', door_state='close') hp=100.0 - step=020 action=wait() hp=100.0 - step=021 action=move(direction='east') hp=100.0 - step=022 action=move(direction='west') hp=100.0 - step=023 action=wait() hp=100.0 - step=024 action=door(target_id='door_1', door_state='close') hp=100.0 - step=025 action=wait() hp=100.0 - step=026 action=wait() hp=100.0 - step=027 action=wait() hp=100.0 - step=028 action=wait() hp=100.0 - step=029 action=move(direction='east') hp=100.0 - step=030 action=move(direction='south') hp=100.0 - step=031 action=move(direction='north') hp=100.0 - step=032 action=move(direction='west') hp=100.0 - step=033 action=move(direction='south') hp=100.0 - step=034 action=wait() hp=100.0 - step=035 action=wait() hp=100.0 - step=036 action=move(direction='south') hp=100.0 - step=037 action=wait() hp=100.0 - step=038 action=move(direction='north') hp=100.0 - step=039 action=move(direction='north') hp=100.0 - step=040 action=move(direction='south') hp=100.0 - step=041 action=move(direction='east') hp=100.0 - step=042 action=door(target_id='door_1', door_state='close') hp=100.0 - step=043 action=move(direction='east') hp=100.0 - step=044 action=move(direction='north') hp=100.0 - step=045 action=move(direction='west') hp=100.0 - step=046 action=door(target_id='door_1', door_state='open') hp=100.0 - step=047 action=move(direction='west') hp=100.0 - step=048 action=door(target_id='door_1', door_state='open') hp=100.0 - step=049 action=wait() hp=100.0 - step=050 action=move(direction='south') hp=100.0 - step=051 action=move(direction='south') hp=100.0 - step=052 action=wait() hp=100.0 - step=053 action=door(target_id='door_1', door_state='open') hp=100.0 - step=054 action=move(direction='north') hp=100.0 - step=055 action=wait() hp=100.0 - step=056 action=move(direction='east') hp=100.0 - step=057 action=door(target_id='door_1', door_state='close') hp=100.0 - step=058 action=move(direction='south') hp=100.0 - step=059 action=move(direction='west') hp= 88.0 - step=060 action=wait() hp= 88.0 - step=061 action=move(direction='north') hp= 76.0 - step=062 action=move(direction='east') hp= 75.5 - step=063 action=move(direction='west') hp= 60.5 - step=064 action=wait() hp= 45.5 - step=065 action=wait() hp= 30.5 - step=066 action=move(direction='north') hp= 15.5 - step=067 action=wait() hp= 0.5 -ep=0130 [medium] steps=067 reward= -16.020 evac=0 hp= 0.0 suc30=0.83 r30= +9.99 t=33s - >> PPO update samples=flushed pi_loss=-0.0008 v_loss=60.5100 entropy=1.2579 kl=0.0001 clip%=0.00 lr=1.24e-04 - step=001 action=move(direction='east') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='north') hp=100.0 - step=010 action=move(direction='north') hp= 99.5 - step=011 action=move(direction='west') hp= 99.5 - step=012 action=move(direction='north') hp= 87.5 - step=013 action=move(direction='west') hp= 87.0 - step=014 action=move(direction='east') hp= 86.5 - step=015 action=wait() hp= 74.5 - step=016 action=move(direction='west') hp= 59.5 - step=017 action=wait() hp= 44.5 - step=018 action=move(direction='west') hp= 29.5 - step=019 action=move(direction='west') hp= 27.5 - step=020 action=move(direction='west') hp= 27.0 - step=021 action=move(direction='north') hp= 27.0 - step=022 action=move(direction='east') hp= 27.0 - step=023 action=wait() hp= 27.0 - step=024 action=move(direction='north') hp= 26.5 - step=025 action=move(direction='north') hp= 26.0 - step=026 action=move(direction='north') hp= 11.0 -ep=0131 [medium] steps=026 reward= -12.050 evac=0 hp= 0.0 suc30=0.80 r30= +9.05 t=34s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='east') hp=100.0 - step=007 action=door(target_id='door_3', door_state='close') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='west') hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=move(direction='west') hp= 98.0 - step=013 action=move(direction='east') hp= 97.5 - step=014 action=move(direction='west') hp= 97.0 - step=015 action=move(direction='west') hp= 96.5 - step=016 action=move(direction='east') hp= 96.0 - step=017 action=move(direction='west') hp= 95.5 - step=018 action=door(target_id='door_1', door_state='close') hp= 95.0 - step=019 action=wait() hp= 94.5 - step=020 action=wait() hp= 94.0 - step=021 action=move(direction='west') hp= 93.5 - step=022 action=move(direction='east') hp= 93.5 - step=023 action=move(direction='west') hp= 93.5 - step=024 action=wait() hp= 93.5 - step=025 action=move(direction='east') hp= 93.5 - step=026 action=move(direction='west') hp= 93.5 - step=027 action=move(direction='west') hp= 93.5 - step=028 action=move(direction='south') hp= 93.5 - step=029 action=move(direction='north') hp= 93.5 - step=030 action=move(direction='west') hp= 93.5 -ep=0132 [medium] steps=030 reward= +13.742 evac=1 hp= 93.5 suc30=0.80 r30= +9.18 t=34s - step=001 action=move(direction='north') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=wait() hp=100.0 - step=006 action=door(target_id='door_7', door_state='close') hp=100.0 - step=007 action=move(direction='north') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='west') hp=100.0 - step=011 action=wait() hp=100.0 - step=012 action=move(direction='west') hp= 99.5 - step=013 action=move(direction='west') hp= 99.0 - step=014 action=move(direction='west') hp= 98.5 - step=015 action=move(direction='north') hp= 98.0 - step=016 action=move(direction='west') hp= 96.0 - step=017 action=move(direction='west') hp= 95.5 - step=018 action=door(target_id='door_1', door_state='close') hp= 95.5 - step=019 action=move(direction='east') hp= 95.5 - step=020 action=move(direction='north') hp= 95.0 - step=021 action=move(direction='west') hp= 94.5 - step=022 action=door(target_id='door_1', door_state='close') hp= 94.0 - step=023 action=door(target_id='door_1', door_state='open') hp= 93.5 - step=024 action=move(direction='south') hp= 93.0 - step=025 action=move(direction='west') hp= 92.5 -ep=0133 [medium] steps=025 reward= +14.447 evac=1 hp= 92.5 suc30=0.83 r30= +10.18 t=34s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='north') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='north') hp=100.0 - step=010 action=move(direction='west') hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=move(direction='west') hp=100.0 - step=013 action=move(direction='west') hp=100.0 - step=014 action=move(direction='north') hp= 99.5 - step=015 action=wait() hp= 99.5 - step=016 action=move(direction='north') hp= 99.5 - step=017 action=move(direction='west') hp= 99.0 - step=018 action=wait() hp= 84.0 - step=019 action=move(direction='south') hp= 69.0 - step=020 action=move(direction='east') hp= 67.0 - step=021 action=move(direction='west') hp= 65.0 - step=022 action=move(direction='east') hp= 64.5 - step=023 action=wait() hp= 62.5 - step=024 action=move(direction='west') hp= 60.5 - step=025 action=wait() hp= 60.0 - step=026 action=move(direction='south') hp= 59.5 - step=027 action=wait() hp= 59.0 - step=028 action=wait() hp= 58.5 - step=029 action=move(direction='north') hp= 58.0 - step=030 action=move(direction='south') hp= 57.5 - step=031 action=move(direction='north') hp= 57.0 - step=032 action=move(direction='east') hp= 56.5 - step=033 action=move(direction='west') hp= 56.0 - step=034 action=move(direction='east') hp= 55.5 - step=035 action=wait() hp= 55.0 - step=036 action=move(direction='west') hp= 54.5 - step=037 action=wait() hp= 54.5 - step=038 action=move(direction='south') hp= 54.5 - step=039 action=move(direction='north') hp= 54.5 - step=040 action=move(direction='east') hp= 54.5 - step=041 action=move(direction='west') hp= 54.5 - step=042 action=move(direction='south') hp= 54.5 - step=043 action=wait() hp= 54.5 - step=044 action=wait() hp= 54.5 - step=045 action=move(direction='north') hp= 54.5 - step=046 action=wait() hp= 54.5 - step=047 action=wait() hp= 54.5 - step=048 action=wait() hp= 54.5 - step=049 action=move(direction='east') hp= 54.5 - step=050 action=wait() hp= 54.5 - step=051 action=move(direction='west') hp= 54.5 - step=052 action=wait() hp= 54.5 - step=053 action=wait() hp= 54.5 - step=054 action=wait() hp= 54.5 - step=055 action=wait() hp= 54.5 - step=056 action=wait() hp= 54.5 - step=057 action=wait() hp= 54.5 - step=058 action=move(direction='south') hp= 54.5 - step=059 action=wait() hp= 54.5 - step=060 action=move(direction='north') hp= 54.5 - step=061 action=wait() hp= 54.5 - step=062 action=move(direction='south') hp= 54.5 - step=063 action=move(direction='east') hp= 54.5 - step=064 action=move(direction='west') hp= 54.5 - step=065 action=move(direction='east') hp= 54.5 - step=066 action=move(direction='north') hp= 54.5 - step=067 action=wait() hp= 54.5 - step=068 action=move(direction='south') hp= 54.5 - step=069 action=move(direction='north') hp= 54.5 - step=070 action=move(direction='south') hp= 54.5 - step=071 action=wait() hp= 54.5 - step=072 action=move(direction='west') hp= 54.5 - step=073 action=wait() hp= 54.5 - step=074 action=move(direction='east') hp= 54.5 - step=075 action=move(direction='west') hp= 54.5 - step=076 action=move(direction='east') hp= 54.5 - step=077 action=move(direction='north') hp= 54.5 - step=078 action=wait() hp= 54.5 - step=079 action=move(direction='west') hp= 54.5 - step=080 action=move(direction='south') hp= 54.5 - step=081 action=move(direction='east') hp= 54.5 - step=082 action=move(direction='west') hp= 54.5 - step=083 action=move(direction='east') hp= 54.5 - step=084 action=move(direction='west') hp= 54.5 - step=085 action=move(direction='north') hp= 54.5 - step=086 action=move(direction='east') hp= 54.5 - step=087 action=move(direction='west') hp= 54.5 - step=088 action=move(direction='south') hp= 54.5 - step=089 action=move(direction='north') hp= 54.5 - step=090 action=move(direction='south') hp= 54.5 - step=091 action=wait() hp= 54.5 - step=092 action=move(direction='north') hp= 54.5 - step=093 action=wait() hp= 54.5 - step=094 action=wait() hp= 54.5 - step=095 action=move(direction='east') hp= 54.5 - step=096 action=wait() hp= 54.5 - step=097 action=wait() hp= 54.5 - step=098 action=wait() hp= 54.5 - step=099 action=move(direction='south') hp= 54.5 - step=100 action=move(direction='west') hp= 54.5 - step=101 action=move(direction='east') hp= 54.5 - step=102 action=move(direction='west') hp= 54.5 - step=103 action=move(direction='east') hp= 54.5 - step=104 action=move(direction='west') hp= 54.5 - step=105 action=move(direction='north') hp= 54.5 - step=106 action=wait() hp= 54.5 - step=107 action=move(direction='south') hp= 54.5 - step=108 action=move(direction='north') hp= 54.5 - step=109 action=wait() hp= 54.5 - step=110 action=move(direction='south') hp= 54.5 - step=111 action=move(direction='north') hp= 54.5 - step=112 action=wait() hp= 54.5 - step=113 action=wait() hp= 54.5 - step=114 action=move(direction='south') hp= 54.5 - step=115 action=move(direction='east') hp= 54.5 - step=116 action=move(direction='north') hp= 54.5 - step=117 action=move(direction='south') hp= 54.5 - step=118 action=move(direction='north') hp= 54.5 - step=119 action=move(direction='west') hp= 54.5 - step=120 action=move(direction='east') hp= 54.5 - step=121 action=move(direction='west') hp= 54.5 - step=122 action=wait() hp= 54.5 - step=123 action=move(direction='east') hp= 54.5 - step=124 action=move(direction='west') hp= 54.5 - step=125 action=wait() hp= 54.5 - step=126 action=wait() hp= 54.5 - step=127 action=wait() hp= 54.5 - step=128 action=wait() hp= 54.5 - step=129 action=move(direction='east') hp= 54.5 - step=130 action=move(direction='west') hp= 54.5 - step=131 action=move(direction='east') hp= 54.5 - step=132 action=wait() hp= 54.5 - step=133 action=move(direction='west') hp= 54.5 - step=134 action=move(direction='east') hp= 54.5 - step=135 action=move(direction='south') hp= 54.5 - step=136 action=move(direction='west') hp= 54.5 - step=137 action=wait() hp= 54.5 - step=138 action=move(direction='north') hp= 54.5 - step=139 action=wait() hp= 54.5 - step=140 action=move(direction='east') hp= 54.5 - step=141 action=move(direction='west') hp= 54.5 - step=142 action=wait() hp= 54.5 - step=143 action=move(direction='east') hp= 54.5 - step=144 action=wait() hp= 54.5 - step=145 action=move(direction='south') hp= 54.5 - step=146 action=move(direction='north') hp= 54.5 - step=147 action=move(direction='west') hp= 54.5 - step=148 action=move(direction='east') hp= 54.5 - step=149 action=wait() hp= 54.5 - step=150 action=move(direction='west') hp= 54.5 -ep=0134 [medium] steps=150 reward= -26.935 evac=0 hp= 54.5 suc30=0.80 r30= +8.72 t=35s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='east') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=door(target_id='door_1', door_state='close') hp=100.0 - step=010 action=wait() hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=move(direction='west') hp=100.0 - step=013 action=move(direction='west') hp=100.0 -ep=0135 [medium] steps=013 reward= +15.770 evac=1 hp=100.0 suc30=0.83 r30= +9.87 t=35s - >> PPO update samples=flushed pi_loss=+0.0010 v_loss=34.2685 entropy=1.0653 kl=0.0005 clip%=0.00 lr=1.18e-04 - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=wait() hp=100.0 - step=005 action=wait() hp=100.0 - step=006 action=move(direction='north') hp=100.0 - step=007 action=door(target_id='door_2', door_state='close') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=door(target_id='door_2', door_state='open') hp=100.0 - step=010 action=wait() hp=100.0 - step=011 action=move(direction='north') hp=100.0 - step=012 action=move(direction='north') hp=100.0 - step=013 action=door(target_id='door_2', door_state='close') hp=100.0 - step=014 action=door(target_id='door_2', door_state='close') hp=100.0 - step=015 action=wait() hp=100.0 - step=016 action=door(target_id='door_2', door_state='close') hp=100.0 - step=017 action=door(target_id='door_2', door_state='close') hp=100.0 - step=018 action=door(target_id='door_2', door_state='close') hp=100.0 - step=019 action=door(target_id='door_2', door_state='close') hp=100.0 - step=020 action=door(target_id='door_2', door_state='close') hp=100.0 - step=021 action=door(target_id='door_2', door_state='close') hp=100.0 - step=022 action=door(target_id='door_2', door_state='close') hp=100.0 - step=023 action=wait() hp=100.0 - step=024 action=wait() hp=100.0 - step=025 action=door(target_id='door_2', door_state='close') hp=100.0 - step=026 action=door(target_id='door_2', door_state='close') hp=100.0 - step=027 action=wait() hp=100.0 - step=028 action=move(direction='south') hp=100.0 - step=029 action=move(direction='north') hp=100.0 - step=030 action=wait() hp=100.0 - step=031 action=door(target_id='door_2', door_state='close') hp=100.0 - step=032 action=door(target_id='door_2', door_state='close') hp=100.0 - step=033 action=move(direction='south') hp=100.0 - step=034 action=door(target_id='door_2', door_state='close') hp=100.0 - step=035 action=move(direction='north') hp=100.0 - step=036 action=door(target_id='door_2', door_state='close') hp=100.0 - step=037 action=door(target_id='door_2', door_state='close') hp=100.0 - step=038 action=door(target_id='door_2', door_state='close') hp=100.0 - step=039 action=wait() hp=100.0 - step=040 action=move(direction='east') hp=100.0 - step=041 action=wait() hp=100.0 - step=042 action=move(direction='west') hp=100.0 - step=043 action=door(target_id='door_2', door_state='close') hp=100.0 - step=044 action=wait() hp=100.0 - step=045 action=door(target_id='door_2', door_state='close') hp=100.0 - step=046 action=wait() hp=100.0 - step=047 action=wait() hp=100.0 - step=048 action=wait() hp=100.0 - step=049 action=wait() hp=100.0 - step=050 action=door(target_id='door_2', door_state='close') hp=100.0 - step=051 action=wait() hp=100.0 - step=052 action=door(target_id='door_2', door_state='close') hp=100.0 - step=053 action=wait() hp=100.0 - step=054 action=door(target_id='door_2', door_state='close') hp=100.0 - step=055 action=door(target_id='door_2', door_state='close') hp=100.0 - step=056 action=move(direction='south') hp=100.0 - step=057 action=move(direction='east') hp=100.0 - step=058 action=door(target_id='door_2', door_state='close') hp=100.0 - step=059 action=move(direction='north') hp=100.0 - step=060 action=wait() hp=100.0 - step=061 action=move(direction='west') hp=100.0 - step=062 action=wait() hp=100.0 - step=063 action=wait() hp=100.0 - step=064 action=wait() hp=100.0 - step=065 action=wait() hp=100.0 - step=066 action=door(target_id='door_2', door_state='open') hp=100.0 - step=067 action=door(target_id='door_2', door_state='open') hp=100.0 - step=068 action=door(target_id='door_2', door_state='open') hp=100.0 - step=069 action=move(direction='south') hp=100.0 - step=070 action=move(direction='north') hp=100.0 - step=071 action=door(target_id='door_2', door_state='open') hp=100.0 - step=072 action=move(direction='south') hp=100.0 - step=073 action=move(direction='north') hp=100.0 - step=074 action=door(target_id='door_2', door_state='open') hp=100.0 - step=075 action=move(direction='south') hp=100.0 - step=076 action=move(direction='east') hp=100.0 - step=077 action=move(direction='north') hp=100.0 - step=078 action=move(direction='west') hp=100.0 - step=079 action=wait() hp=100.0 - step=080 action=door(target_id='door_2', door_state='open') hp=100.0 - step=081 action=move(direction='east') hp=100.0 - step=082 action=move(direction='west') hp=100.0 - step=083 action=move(direction='south') hp=100.0 - step=084 action=wait() hp=100.0 - step=085 action=move(direction='north') hp=100.0 - step=086 action=move(direction='east') hp=100.0 - step=087 action=move(direction='east') hp=100.0 - step=088 action=wait() hp=100.0 - step=089 action=move(direction='west') hp=100.0 - step=090 action=move(direction='east') hp=100.0 - step=091 action=move(direction='west') hp=100.0 - step=092 action=move(direction='west') hp=100.0 - step=093 action=door(target_id='door_2', door_state='open') hp=100.0 - step=094 action=wait() hp=100.0 - step=095 action=door(target_id='door_2', door_state='open') hp=100.0 - step=096 action=wait() hp=100.0 - step=097 action=door(target_id='door_2', door_state='open') hp=100.0 - step=098 action=door(target_id='door_2', door_state='open') hp=100.0 - step=099 action=door(target_id='door_2', door_state='open') hp=100.0 - step=100 action=wait() hp=100.0 - step=101 action=wait() hp=100.0 - step=102 action=door(target_id='door_2', door_state='open') hp=100.0 - step=103 action=move(direction='south') hp=100.0 - step=104 action=move(direction='south') hp=100.0 - step=105 action=move(direction='north') hp=100.0 - step=106 action=wait() hp=100.0 - step=107 action=move(direction='north') hp=100.0 - step=108 action=move(direction='south') hp=100.0 - step=109 action=move(direction='east') hp=100.0 - step=110 action=move(direction='west') hp=100.0 - step=111 action=move(direction='north') hp=100.0 - step=112 action=door(target_id='door_2', door_state='open') hp=100.0 - step=113 action=wait() hp=100.0 - step=114 action=door(target_id='door_2', door_state='open') hp=100.0 - step=115 action=wait() hp=100.0 - step=116 action=move(direction='east') hp=100.0 - step=117 action=door(target_id='door_2', door_state='open') hp=100.0 - step=118 action=move(direction='west') hp=100.0 - step=119 action=wait() hp=100.0 - step=120 action=wait() hp=100.0 - step=121 action=wait() hp=100.0 - step=122 action=door(target_id='door_2', door_state='open') hp=100.0 - step=123 action=door(target_id='door_2', door_state='open') hp=100.0 - step=124 action=wait() hp=100.0 - step=125 action=move(direction='east') hp=100.0 - step=126 action=door(target_id='door_2', door_state='open') hp=100.0 - step=127 action=move(direction='west') hp=100.0 - step=128 action=wait() hp=100.0 - step=129 action=door(target_id='door_2', door_state='open') hp=100.0 - step=130 action=door(target_id='door_2', door_state='open') hp=100.0 - step=131 action=wait() hp=100.0 - step=132 action=move(direction='east') hp=100.0 - step=133 action=move(direction='west') hp=100.0 - step=134 action=move(direction='south') hp=100.0 - step=135 action=move(direction='north') hp=100.0 - step=136 action=wait() hp=100.0 - step=137 action=door(target_id='door_2', door_state='open') hp=100.0 - step=138 action=door(target_id='door_2', door_state='open') hp=100.0 - step=139 action=door(target_id='door_2', door_state='open') hp=100.0 - step=140 action=wait() hp=100.0 - step=141 action=move(direction='south') hp=100.0 - step=142 action=door(target_id='door_2', door_state='open') hp=100.0 - step=143 action=door(target_id='door_2', door_state='open') hp=100.0 - step=144 action=door(target_id='door_2', door_state='open') hp=100.0 - step=145 action=move(direction='north') hp=100.0 - step=146 action=door(target_id='door_2', door_state='open') hp=100.0 - step=147 action=door(target_id='door_2', door_state='open') hp=100.0 - step=148 action=wait() hp=100.0 - step=149 action=door(target_id='door_2', door_state='open') hp=100.0 - step=150 action=wait() hp=100.0 -ep=0136 [medium] steps=150 reward= -16.300 evac=0 hp=100.0 suc30=0.83 r30= +9.84 t=35s - step=001 action=move(direction='north') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=wait() hp=100.0 - step=006 action=move(direction='north') hp=100.0 - step=007 action=move(direction='north') hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=move(direction='north') hp=100.0 - step=010 action=move(direction='north') hp=100.0 - step=011 action=move(direction='north') hp=100.0 - step=012 action=move(direction='south') hp= 85.0 - step=013 action=move(direction='north') hp= 74.5 - step=014 action=wait() hp= 59.5 - step=015 action=move(direction='east') hp= 54.5 - step=016 action=move(direction='south') hp= 54.0 - step=017 action=move(direction='north') hp= 53.5 - step=018 action=move(direction='north') hp= 51.5 - step=019 action=wait() hp= 36.5 - step=020 action=move(direction='south') hp= 31.5 - step=021 action=move(direction='east') hp= 19.5 - step=022 action=move(direction='west') hp= 19.0 - step=023 action=move(direction='east') hp= 4.0 - step=024 action=move(direction='west') hp= 3.5 -ep=0137 [medium] steps=024 reward= -17.860 evac=0 hp= 0.0 suc30=0.80 r30= +8.67 t=36s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 -ep=0138 [medium] steps=002 reward= +14.730 evac=1 hp=100.0 suc30=0.80 r30= +8.78 t=36s - step=001 action=move(direction='north') hp=100.0 - step=002 action=door(target_id='door_1', door_state='close') hp=100.0 - step=003 action=wait() hp=100.0 - step=004 action=wait() hp=100.0 - step=005 action=wait() hp=100.0 - step=006 action=wait() hp=100.0 - step=007 action=door(target_id='door_1', door_state='open') hp=100.0 - step=008 action=wait() hp=100.0 - step=009 action=move(direction='east') hp=100.0 - step=010 action=move(direction='north') hp=100.0 - step=011 action=move(direction='north') hp=100.0 - step=012 action=move(direction='south') hp=100.0 - step=013 action=move(direction='north') hp=100.0 - step=014 action=move(direction='north') hp=100.0 - step=015 action=move(direction='north') hp=100.0 - step=016 action=door(target_id='door_1', door_state='close') hp=100.0 - step=017 action=wait() hp=100.0 - step=018 action=move(direction='west') hp=100.0 - step=019 action=wait() hp=100.0 - step=020 action=move(direction='east') hp=100.0 - step=021 action=move(direction='west') hp=100.0 - step=022 action=move(direction='east') hp=100.0 - step=023 action=door(target_id='door_1', door_state='close') hp=100.0 - step=024 action=door(target_id='door_1', door_state='close') hp=100.0 - step=025 action=door(target_id='door_1', door_state='close') hp=100.0 - step=026 action=move(direction='west') hp=100.0 - step=027 action=wait() hp=100.0 - step=028 action=move(direction='east') hp=100.0 - step=029 action=move(direction='west') hp=100.0 - step=030 action=wait() hp=100.0 - step=031 action=move(direction='south') hp=100.0 - step=032 action=wait() hp=100.0 - step=033 action=wait() hp=100.0 - step=034 action=move(direction='east') hp=100.0 - step=035 action=wait() hp=100.0 - step=036 action=move(direction='south') hp=100.0 - step=037 action=move(direction='north') hp=100.0 - step=038 action=move(direction='south') hp=100.0 - step=039 action=move(direction='east') hp=100.0 - step=040 action=door(target_id='door_1', door_state='close') hp=100.0 - step=041 action=wait() hp=100.0 - step=042 action=door(target_id='door_1', door_state='open') hp=100.0 - step=043 action=move(direction='north') hp=100.0 - step=044 action=move(direction='north') hp=100.0 - step=045 action=door(target_id='door_1', door_state='close') hp=100.0 - step=046 action=move(direction='west') hp=100.0 - step=047 action=move(direction='west') hp=100.0 - step=048 action=move(direction='south') hp=100.0 - step=049 action=wait() hp=100.0 - step=050 action=move(direction='east') hp=100.0 - step=051 action=move(direction='west') hp= 99.5 - step=052 action=move(direction='east') hp= 99.0 - step=053 action=move(direction='north') hp= 98.5 - step=054 action=move(direction='east') hp= 98.5 - step=055 action=move(direction='west') hp= 98.5 - step=056 action=move(direction='west') hp= 98.0 - step=057 action=door(target_id='door_1', door_state='close') hp= 97.5 - step=058 action=door(target_id='door_1', door_state='close') hp= 97.0 - step=059 action=move(direction='south') hp= 96.5 - step=060 action=move(direction='south') hp= 96.0 - step=061 action=door(target_id='door_1', door_state='close') hp= 95.5 - step=062 action=wait() hp= 95.0 - step=063 action=wait() hp= 94.5 - step=064 action=wait() hp= 94.0 - step=065 action=move(direction='east') hp= 93.5 - step=066 action=door(target_id='door_1', door_state='open') hp= 93.0 - step=067 action=door(target_id='door_1', door_state='close') hp= 92.5 - step=068 action=move(direction='north') hp= 92.0 - step=069 action=wait() hp= 91.5 - step=070 action=wait() hp= 91.0 - step=071 action=move(direction='east') hp= 90.5 - step=072 action=move(direction='south') hp= 90.0 - step=073 action=move(direction='west') hp= 89.5 - step=074 action=move(direction='west') hp= 89.0 - step=075 action=wait() hp= 88.5 - step=076 action=wait() hp= 88.0 - step=077 action=wait() hp= 87.5 - step=078 action=move(direction='north') hp= 87.0 - step=079 action=move(direction='east') hp= 86.5 - step=080 action=move(direction='east') hp= 86.0 - step=081 action=wait() hp= 85.5 - step=082 action=move(direction='north') hp= 85.0 - step=083 action=door(target_id='door_1', door_state='open') hp= 84.5 - step=084 action=move(direction='west') hp= 84.5 - step=085 action=door(target_id='door_1', door_state='open') hp= 84.5 - step=086 action=move(direction='east') hp= 84.5 - step=087 action=wait() hp= 84.5 - step=088 action=move(direction='west') hp= 84.5 - step=089 action=wait() hp= 84.5 - step=090 action=move(direction='east') hp= 84.5 - step=091 action=move(direction='west') hp= 84.5 - step=092 action=door(target_id='door_1', door_state='open') hp= 84.5 - step=093 action=move(direction='west') hp= 84.5 - step=094 action=wait() hp= 84.5 - step=095 action=wait() hp= 84.5 - step=096 action=door(target_id='door_1', door_state='open') hp= 84.5 - step=097 action=wait() hp= 84.5 - step=098 action=move(direction='east') hp= 84.5 - step=099 action=door(target_id='door_1', door_state='open') hp= 84.5 - step=100 action=move(direction='west') hp= 84.5 - step=101 action=move(direction='south') hp= 84.5 - step=102 action=move(direction='east') hp= 84.5 - step=103 action=wait() hp= 84.5 - step=104 action=move(direction='north') hp= 84.5 - step=105 action=move(direction='west') hp= 84.5 - step=106 action=door(target_id='door_1', door_state='open') hp= 84.5 - step=107 action=move(direction='east') hp= 84.5 - step=108 action=wait() hp= 84.5 - step=109 action=move(direction='west') hp= 84.5 - step=110 action=door(target_id='door_1', door_state='open') hp= 84.5 - step=111 action=move(direction='south') hp= 84.5 - step=112 action=wait() hp= 84.5 - step=113 action=move(direction='north') hp= 84.5 - step=114 action=move(direction='south') hp= 84.5 - step=115 action=door(target_id='door_1', door_state='open') hp= 84.5 - step=116 action=door(target_id='door_1', door_state='open') hp= 84.5 - step=117 action=door(target_id='door_1', door_state='open') hp= 84.5 - step=118 action=move(direction='north') hp= 84.5 - step=119 action=move(direction='south') hp= 84.5 - step=120 action=move(direction='north') hp= 84.5 - step=121 action=wait() hp= 84.5 - step=122 action=move(direction='south') hp= 84.5 - step=123 action=move(direction='north') hp= 84.5 - step=124 action=move(direction='south') hp= 84.5 - step=125 action=move(direction='north') hp= 84.5 - step=126 action=wait() hp= 84.5 - step=127 action=move(direction='east') hp= 84.5 - step=128 action=move(direction='west') hp= 84.5 - step=129 action=move(direction='east') hp= 84.5 - step=130 action=wait() hp= 84.5 - step=131 action=move(direction='south') hp= 84.5 - step=132 action=door(target_id='door_1', door_state='open') hp= 84.5 - step=133 action=door(target_id='door_1', door_state='close') hp= 84.5 - step=134 action=move(direction='west') hp= 84.5 - step=135 action=wait() hp= 84.5 - step=136 action=move(direction='north') hp= 84.5 - step=137 action=door(target_id='door_1', door_state='open') hp= 84.5 - step=138 action=move(direction='south') hp= 84.5 - step=139 action=move(direction='north') hp= 84.5 - step=140 action=move(direction='south') hp= 84.5 - step=141 action=move(direction='south') hp= 84.5 - step=142 action=door(target_id='door_1', door_state='open') hp= 84.5 - step=143 action=wait() hp= 84.5 - step=144 action=move(direction='north') hp= 84.5 - step=145 action=move(direction='north') hp= 84.5 - step=146 action=door(target_id='door_1', door_state='close') hp= 84.5 - step=147 action=move(direction='east') hp= 84.5 - step=148 action=door(target_id='door_1', door_state='close') hp= 84.5 - step=149 action=door(target_id='door_1', door_state='close') hp= 84.5 - step=150 action=move(direction='west') hp= 84.5 -ep=0139 [medium] steps=150 reward= -20.595 evac=0 hp= 84.5 suc30=0.77 r30= +7.57 t=36s - step=001 action=move(direction='north') hp=100.0 - step=002 action=move(direction='north') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='south') hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=move(direction='north') hp=100.0 - step=007 action=move(direction='north') hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=move(direction='north') hp=100.0 - step=010 action=move(direction='north') hp=100.0 - step=011 action=move(direction='east') hp= 99.5 - step=012 action=move(direction='west') hp= 84.5 - step=013 action=move(direction='north') hp= 84.0 - step=014 action=move(direction='north') hp= 84.0 - step=015 action=move(direction='east') hp= 84.0 - step=016 action=move(direction='west') hp= 84.0 - step=017 action=move(direction='north') hp= 84.0 - step=018 action=move(direction='west') hp= 84.0 -ep=0140 [medium] steps=018 reward= +15.610 evac=1 hp= 84.0 suc30=0.77 r30= +7.55 t=36s - >> PPO update samples=flushed pi_loss=+0.0136 v_loss=38.0948 entropy=1.3180 kl=0.0008 clip%=0.00 lr=1.11e-04 - step=001 action=move(direction='north') hp=100.0 - step=002 action=move(direction='north') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=move(direction='north') hp=100.0 - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='north') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='north') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=move(direction='north') hp=100.0 - step=007 action=move(direction='north') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - ** EVAL [medium] reward=+16.283 success=1.00 steps=6.3 - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='south') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 -ep=0141 [medium] steps=005 reward= +15.310 evac=1 hp=100.0 suc30=0.77 r30= +7.56 t=37s - step=001 action=door(target_id='door_2', door_state='close') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='north') hp= 85.0 - step=007 action=move(direction='west') hp= 84.5 - step=008 action=move(direction='south') hp= 84.5 - step=009 action=door(target_id='door_1', door_state='close') hp= 84.0 - step=010 action=move(direction='west') hp= 72.0 - step=011 action=door(target_id='door_5', door_state='close') hp= 72.0 - step=012 action=wait() hp= 71.5 - step=013 action=move(direction='south') hp= 59.5 - step=014 action=move(direction='north') hp= 47.5 - step=015 action=move(direction='north') hp= 32.5 - step=016 action=move(direction='north') hp= 17.5 - step=017 action=move(direction='north') hp= 17.0 - step=018 action=wait() hp= 16.5 - step=019 action=wait() hp= 1.5 -ep=0142 [medium] steps=019 reward= -11.900 evac=0 hp= 0.0 suc30=0.73 r30= +6.64 t=37s - step=001 action=move(direction='north') hp=100.0 - step=002 action=move(direction='north') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=wait() hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='west') hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=move(direction='west') hp=100.0 - step=013 action=move(direction='west') hp=100.0 - step=014 action=move(direction='west') hp=100.0 - step=015 action=move(direction='west') hp=100.0 - step=016 action=move(direction='west') hp=100.0 - step=017 action=move(direction='west') hp=100.0 - step=018 action=move(direction='north') hp=100.0 - step=019 action=move(direction='north') hp=100.0 - step=020 action=wait() hp=100.0 - step=021 action=move(direction='north') hp=100.0 - step=022 action=move(direction='north') hp=100.0 - step=023 action=move(direction='west') hp=100.0 -ep=0143 [medium] steps=023 reward= +19.750 evac=1 hp=100.0 suc30=0.73 r30= +6.70 t=37s - step=001 action=move(direction='north') hp=100.0 - step=002 action=move(direction='north') hp=100.0 - step=003 action=move(direction='south') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=wait() hp=100.0 - step=007 action=move(direction='north') hp=100.0 - step=008 action=move(direction='north') hp=100.0 -ep=0144 [medium] steps=008 reward= +15.640 evac=1 hp=100.0 suc30=0.73 r30= +6.72 t=37s - step=001 action=door(target_id='door_4', door_state='close') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=door(target_id='door_3', door_state='open') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=door(target_id='door_3', door_state='close') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=door(target_id='door_3', door_state='close') hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=door(target_id='door_1', door_state='close') hp=100.0 - step=013 action=wait() hp=100.0 - step=014 action=move(direction='west') hp=100.0 - step=015 action=move(direction='west') hp=100.0 - step=016 action=move(direction='west') hp=100.0 - step=017 action=door(target_id='door_1', door_state='close') hp=100.0 - step=018 action=wait() hp=100.0 - step=019 action=move(direction='east') hp=100.0 - step=020 action=move(direction='west') hp= 99.5 - step=021 action=door(target_id='door_1', door_state='open') hp= 99.5 - step=022 action=move(direction='west') hp= 99.0 - step=023 action=move(direction='north') hp= 99.0 - step=024 action=move(direction='south') hp= 99.0 - step=025 action=move(direction='north') hp= 99.0 - step=026 action=move(direction='north') hp= 99.0 - step=027 action=move(direction='north') hp= 99.0 - step=028 action=door(target_id='door_1', door_state='close') hp= 99.0 - step=029 action=door(target_id='door_1', door_state='open') hp= 99.0 - step=030 action=move(direction='east') hp= 99.0 - step=031 action=wait() hp= 99.0 - step=032 action=move(direction='north') hp= 99.0 - step=033 action=door(target_id='door_1', door_state='close') hp= 99.0 - step=034 action=move(direction='south') hp= 99.0 - step=035 action=move(direction='west') hp= 99.0 - step=036 action=move(direction='west') hp= 99.0 - step=037 action=door(target_id='door_1', door_state='close') hp= 99.0 - step=038 action=move(direction='north') hp= 99.0 - step=039 action=move(direction='south') hp= 99.0 - step=040 action=wait() hp= 99.0 - step=041 action=move(direction='north') hp= 99.0 - step=042 action=move(direction='south') hp= 99.0 - step=043 action=move(direction='north') hp= 98.5 - step=044 action=wait() hp= 98.5 - step=045 action=move(direction='east') hp= 98.5 - step=046 action=wait() hp= 98.0 - step=047 action=move(direction='south') hp= 86.0 - step=048 action=wait() hp= 71.0 - step=049 action=move(direction='north') hp= 56.0 - step=050 action=move(direction='west') hp= 41.0 - step=051 action=wait() hp= 26.0 - step=052 action=wait() hp= 11.0 -ep=0145 [medium] steps=052 reward= -14.460 evac=0 hp= 0.0 suc30=0.70 r30= +5.69 t=37s - >> PPO update samples=flushed pi_loss=+0.0010 v_loss=60.1304 entropy=1.2687 kl=0.0002 clip%=0.00 lr=1.04e-04 - step=001 action=move(direction='north') hp=100.0 - step=002 action=move(direction='north') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='south') hp=100.0 - step=005 action=move(direction='south') hp=100.0 - step=006 action=move(direction='north') hp=100.0 - step=007 action=move(direction='north') hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=wait() hp=100.0 - step=010 action=wait() hp=100.0 - step=011 action=wait() hp=100.0 - step=012 action=move(direction='east') hp=100.0 - step=013 action=wait() hp=100.0 - step=014 action=move(direction='west') hp=100.0 - step=015 action=wait() hp=100.0 - step=016 action=move(direction='north') hp=100.0 - step=017 action=move(direction='north') hp=100.0 - step=018 action=move(direction='north') hp=100.0 - step=019 action=move(direction='north') hp=100.0 - step=020 action=move(direction='west') hp=100.0 -ep=0146 [medium] steps=020 reward= +15.630 evac=1 hp=100.0 suc30=0.70 r30= +5.61 t=37s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=wait() hp=100.0 - step=004 action=move(direction='west') hp=100.0 -ep=0147 [medium] steps=004 reward= +15.050 evac=1 hp=100.0 suc30=0.70 r30= +5.61 t=37s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='east') hp=100.0 - step=003 action=door(target_id='door_7', door_state='close') hp=100.0 - step=004 action=door(target_id='door_6', door_state='close') hp=100.0 - step=005 action=door(target_id='door_6', door_state='close') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=door(target_id='door_7', door_state='open') hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=door(target_id='door_7', door_state='open') hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=wait() hp=100.0 - step=013 action=move(direction='east') hp= 99.5 - step=014 action=move(direction='west') hp= 99.5 - step=015 action=move(direction='west') hp= 99.0 - step=016 action=move(direction='west') hp= 98.5 - step=017 action=move(direction='east') hp= 86.5 - step=018 action=move(direction='north') hp= 84.5 - step=019 action=move(direction='west') hp= 84.0 - step=020 action=move(direction='north') hp= 69.0 - step=021 action=move(direction='east') hp= 67.0 - step=022 action=move(direction='west') hp= 65.0 - step=023 action=move(direction='north') hp= 50.0 - step=024 action=move(direction='west') hp= 48.0 - step=025 action=move(direction='west') hp= 33.0 - step=026 action=wait() hp= 18.0 - step=027 action=wait() hp= 3.0 -ep=0148 [medium] steps=027 reward= -18.930 evac=0 hp= 0.0 suc30=0.67 r30= +4.47 t=37s - step=001 action=move(direction='north') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=wait() hp=100.0 - step=006 action=wait() hp=100.0 - step=007 action=move(direction='north') hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='north') hp=100.0 - step=011 action=wait() hp=100.0 - step=012 action=move(direction='west') hp=100.0 - step=013 action=move(direction='east') hp=100.0 - step=014 action=wait() hp=100.0 - step=015 action=move(direction='west') hp=100.0 - step=016 action=move(direction='west') hp=100.0 - step=017 action=move(direction='west') hp=100.0 - step=018 action=move(direction='west') hp=100.0 - step=019 action=move(direction='west') hp=100.0 - step=020 action=move(direction='north') hp=100.0 - step=021 action=move(direction='north') hp=100.0 - step=022 action=move(direction='north') hp=100.0 - step=023 action=move(direction='east') hp=100.0 - step=024 action=move(direction='north') hp=100.0 - step=025 action=move(direction='west') hp=100.0 - step=026 action=move(direction='west') hp=100.0 -ep=0149 [medium] steps=026 reward= +18.900 evac=1 hp=100.0 suc30=0.67 r30= +4.76 t=38s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='south') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='east') hp=100.0 - step=005 action=door(target_id='door_4', door_state='close') hp=100.0 - step=006 action=door(target_id='door_4', door_state='close') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=wait() hp=100.0 - step=011 action=door(target_id='door_3', door_state='close') hp=100.0 - step=012 action=move(direction='west') hp=100.0 - step=013 action=move(direction='south') hp= 99.5 - step=014 action=move(direction='west') hp= 99.0 - step=015 action=door(target_id='door_3', door_state='close') hp= 98.5 - step=016 action=move(direction='north') hp= 98.0 - step=017 action=move(direction='west') hp= 97.5 - step=018 action=move(direction='west') hp= 97.5 - step=019 action=move(direction='west') hp= 97.5 - step=020 action=wait() hp= 97.5 - step=021 action=move(direction='west') hp= 97.5 - step=022 action=move(direction='west') hp= 97.5 - step=023 action=move(direction='west') hp= 97.5 - step=024 action=move(direction='west') hp= 97.5 - step=025 action=door(target_id='door_1', door_state='open') hp= 97.5 - step=026 action=move(direction='west') hp= 97.5 - step=027 action=move(direction='west') hp= 97.5 -ep=0150 [medium] steps=027 reward= +15.582 evac=1 hp= 97.5 suc30=0.67 r30= +4.76 t=38s - >> PPO update samples=flushed pi_loss=+0.0014 v_loss=56.4011 entropy=1.1502 kl=0.0001 clip%=0.00 lr=9.75e-05 - [ckpt] saved -> artifacts/pyre_ppo_http_ckpt.pt - step=001 action=move(direction='south') hp=100.0 - step=002 action=move(direction='west') hp=100.0 -ep=0151 [medium] steps=002 reward= +14.730 evac=1 hp=100.0 suc30=0.67 r30= +4.71 t=38s - step=001 action=move(direction='east') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='south') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=wait() hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=wait() hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='west') hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=wait() hp=100.0 - step=013 action=move(direction='west') hp=100.0 - step=014 action=move(direction='west') hp=100.0 - step=015 action=move(direction='west') hp=100.0 - step=016 action=move(direction='north') hp=100.0 - step=017 action=move(direction='west') hp=100.0 -ep=0152 [medium] steps=017 reward= +17.050 evac=1 hp=100.0 suc30=0.67 r30= +4.74 t=38s - step=001 action=move(direction='west') hp=100.0 - step=002 action=door(target_id='door_3', door_state='close') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=door(target_id='door_3', door_state='close') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp= 99.5 - step=007 action=move(direction='west') hp= 99.5 - step=008 action=wait() hp= 99.5 - step=009 action=move(direction='west') hp= 99.5 - step=010 action=wait() hp= 99.5 - step=011 action=move(direction='west') hp= 99.5 - step=012 action=move(direction='west') hp= 99.5 - step=013 action=move(direction='west') hp= 99.5 - step=014 action=wait() hp= 99.5 - step=015 action=door(target_id='door_2', door_state='open') hp= 99.5 - step=016 action=wait() hp= 99.5 - step=017 action=move(direction='west') hp= 99.5 - step=018 action=move(direction='west') hp= 99.5 - step=019 action=move(direction='east') hp= 99.5 - step=020 action=move(direction='west') hp= 99.5 - step=021 action=move(direction='west') hp= 99.5 -ep=0153 [medium] steps=021 reward= +14.402 evac=1 hp= 99.5 suc30=0.67 r30= +4.74 t=38s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='north') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=door(target_id='door_2', door_state='close') hp=100.0 - step=007 action=move(direction='north') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=door(target_id='door_2', door_state='close') hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=move(direction='west') hp=100.0 - step=013 action=door(target_id='door_1', door_state='open') hp=100.0 - step=014 action=wait() hp=100.0 - step=015 action=door(target_id='door_1', door_state='close') hp=100.0 - step=016 action=wait() hp=100.0 - step=017 action=move(direction='east') hp=100.0 - step=018 action=move(direction='west') hp=100.0 - step=019 action=wait() hp=100.0 - step=020 action=move(direction='south') hp=100.0 - step=021 action=move(direction='north') hp=100.0 - step=022 action=door(target_id='door_1', door_state='open') hp=100.0 - step=023 action=wait() hp=100.0 - step=024 action=door(target_id='door_1', door_state='close') hp=100.0 - step=025 action=move(direction='east') hp=100.0 - step=026 action=door(target_id='door_2', door_state='close') hp=100.0 - step=027 action=door(target_id='door_1', door_state='open') hp=100.0 - step=028 action=door(target_id='door_2', door_state='close') hp=100.0 - step=029 action=move(direction='west') hp=100.0 - step=030 action=move(direction='east') hp=100.0 - step=031 action=door(target_id='door_2', door_state='close') hp=100.0 - step=032 action=move(direction='west') hp=100.0 - step=033 action=wait() hp=100.0 - step=034 action=move(direction='south') hp=100.0 - step=035 action=move(direction='north') hp=100.0 - step=036 action=move(direction='east') hp=100.0 - step=037 action=door(target_id='door_5', door_state='open') hp=100.0 - step=038 action=move(direction='south') hp=100.0 - step=039 action=move(direction='west') hp=100.0 - step=040 action=move(direction='east') hp=100.0 - step=041 action=wait() hp=100.0 - step=042 action=move(direction='west') hp=100.0 - step=043 action=move(direction='north') hp=100.0 - step=044 action=door(target_id='door_1', door_state='close') hp=100.0 - step=045 action=door(target_id='door_1', door_state='open') hp=100.0 - step=046 action=wait() hp=100.0 - step=047 action=wait() hp=100.0 - step=048 action=door(target_id='door_1', door_state='close') hp=100.0 - step=049 action=door(target_id='door_1', door_state='open') hp=100.0 - step=050 action=door(target_id='door_1', door_state='close') hp=100.0 - step=051 action=door(target_id='door_1', door_state='open') hp=100.0 - step=052 action=wait() hp=100.0 - step=053 action=wait() hp=100.0 - step=054 action=door(target_id='door_1', door_state='close') hp=100.0 - step=055 action=move(direction='east') hp=100.0 - step=056 action=door(target_id='door_2', door_state='close') hp= 99.5 - step=057 action=wait() hp= 99.0 - step=058 action=wait() hp= 98.5 - step=059 action=move(direction='south') hp= 98.0 - step=060 action=move(direction='north') hp= 96.0 - step=061 action=door(target_id='door_1', door_state='open') hp= 94.0 - step=062 action=door(target_id='door_1', door_state='close') hp= 92.0 - step=063 action=move(direction='west') hp= 90.0 - step=064 action=door(target_id='door_1', door_state='open') hp= 89.5 - step=065 action=move(direction='south') hp= 87.5 - step=066 action=move(direction='north') hp= 72.5 - step=067 action=door(target_id='door_1', door_state='close') hp= 70.5 - step=068 action=door(target_id='door_1', door_state='open') hp= 65.5 - step=069 action=wait() hp= 50.5 - step=070 action=door(target_id='door_1', door_state='close') hp= 35.5 - step=071 action=door(target_id='door_1', door_state='open') hp= 20.5 - step=072 action=door(target_id='door_1', door_state='close') hp= 5.5 -ep=0154 [medium] steps=072 reward= -9.510 evac=0 hp= 0.0 suc30=0.63 r30= +3.87 t=38s - step=001 action=move(direction='east') hp=100.0 - step=002 action=move(direction='north') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=wait() hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=wait() hp=100.0 - step=010 action=move(direction='north') hp=100.0 - step=011 action=move(direction='north') hp=100.0 - step=012 action=wait() hp= 99.5 - step=013 action=wait() hp= 87.5 - step=014 action=move(direction='south') hp= 72.5 - step=015 action=move(direction='north') hp= 72.0 - step=016 action=wait() hp= 57.0 - step=017 action=move(direction='east') hp= 42.0 - step=018 action=move(direction='south') hp= 30.0 - step=019 action=move(direction='west') hp= 29.5 - step=020 action=move(direction='east') hp= 14.5 - step=021 action=move(direction='west') hp= 12.5 -ep=0155 [medium] steps=021 reward= -14.840 evac=0 hp= 0.0 suc30=0.60 r30= +3.03 t=38s - >> PPO update samples=flushed pi_loss=-0.0004 v_loss=52.0180 entropy=1.2495 kl=0.0001 clip%=0.00 lr=9.07e-05 - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=door(target_id='door_2', door_state='close') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp= 88.0 - step=008 action=move(direction='west') hp= 73.0 - step=009 action=move(direction='west') hp= 72.5 - step=010 action=wait() hp= 72.5 - step=011 action=move(direction='west') hp= 72.5 - step=012 action=move(direction='south') hp= 72.5 - step=013 action=move(direction='north') hp= 72.5 - step=014 action=wait() hp= 72.5 - step=015 action=move(direction='west') hp= 72.5 -ep=0156 [medium] steps=015 reward= +13.008 evac=1 hp= 72.5 suc30=0.60 r30= +2.90 t=39s - step=001 action=wait() hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=wait() hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=wait() hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=wait() hp=100.0 - step=010 action=move(direction='west') hp=100.0 - step=011 action=wait() hp=100.0 - step=012 action=move(direction='west') hp=100.0 - step=013 action=move(direction='north') hp=100.0 - step=014 action=wait() hp=100.0 - step=015 action=move(direction='east') hp=100.0 - step=016 action=door(target_id='door_1', door_state='close') hp=100.0 - step=017 action=door(target_id='door_2', door_state='close') hp=100.0 - step=018 action=door(target_id='door_1', door_state='open') hp=100.0 - step=019 action=door(target_id='door_2', door_state='close') hp=100.0 - step=020 action=door(target_id='door_2', door_state='close') hp=100.0 - step=021 action=move(direction='west') hp=100.0 - step=022 action=move(direction='south') hp=100.0 - step=023 action=move(direction='east') hp=100.0 - step=024 action=move(direction='west') hp=100.0 - step=025 action=move(direction='north') hp=100.0 - step=026 action=wait() hp=100.0 - step=027 action=wait() hp=100.0 - step=028 action=wait() hp=100.0 - step=029 action=move(direction='east') hp=100.0 - step=030 action=move(direction='south') hp=100.0 - step=031 action=move(direction='west') hp=100.0 - step=032 action=door(target_id='door_1', door_state='close') hp=100.0 - step=033 action=move(direction='east') hp=100.0 - step=034 action=move(direction='west') hp=100.0 - step=035 action=move(direction='west') hp=100.0 -ep=0157 [medium] steps=035 reward= +15.730 evac=1 hp=100.0 suc30=0.63 r30= +3.86 t=39s - step=001 action=wait() hp=100.0 - step=002 action=move(direction='east') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=wait() hp=100.0 - step=006 action=move(direction='north') hp=100.0 - step=007 action=move(direction='east') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='north') hp=100.0 - step=010 action=move(direction='north') hp=100.0 - step=011 action=move(direction='north') hp= 99.5 - step=012 action=move(direction='north') hp= 84.5 - step=013 action=move(direction='east') hp= 84.0 - step=014 action=move(direction='west') hp= 83.5 - step=015 action=move(direction='north') hp= 68.5 - step=016 action=move(direction='north') hp= 68.0 - step=017 action=wait() hp= 68.0 - step=018 action=wait() hp= 68.0 - step=019 action=move(direction='north') hp= 68.0 - step=020 action=move(direction='west') hp= 68.0 -ep=0158 [medium] steps=020 reward= +11.760 evac=1 hp= 68.0 suc30=0.63 r30= +3.72 t=39s - step=001 action=move(direction='north') hp=100.0 - step=002 action=wait() hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=wait() hp=100.0 - step=006 action=move(direction='north') hp=100.0 - step=007 action=wait() hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=wait() hp=100.0 - step=010 action=move(direction='south') hp=100.0 - step=011 action=wait() hp=100.0 - step=012 action=wait() hp=100.0 - step=013 action=move(direction='north') hp=100.0 - step=014 action=move(direction='north') hp=100.0 -ep=0159 [medium] steps=014 reward= +15.560 evac=1 hp=100.0 suc30=0.63 r30= +3.77 t=39s - step=001 action=door(target_id='door_6', door_state='open') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=door(target_id='door_1', door_state='close') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='east') hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=door(target_id='door_1', door_state='close') hp=100.0 - step=010 action=move(direction='west') hp=100.0 - step=011 action=move(direction='north') hp=100.0 - step=012 action=door(target_id='door_1', door_state='close') hp=100.0 - step=013 action=wait() hp=100.0 - step=014 action=move(direction='north') hp=100.0 - step=015 action=move(direction='west') hp=100.0 -ep=0160 [medium] steps=015 reward= +16.880 evac=1 hp=100.0 suc30=0.67 r30= +4.86 t=39s - >> PPO update samples=flushed pi_loss=+0.0009 v_loss=8.6643 entropy=1.2206 kl=0.0001 clip%=0.00 lr=8.40e-05 - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=move(direction='north') hp=100.0 - step=007 action=move(direction='north') hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='west') hp=100.0 - ** EVAL [medium] reward=+16.573 success=1.00 steps=8.3 - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='east') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='north') hp=100.0 - step=011 action=move(direction='north') hp=100.0 - step=012 action=wait() hp=100.0 - step=013 action=wait() hp= 99.5 - step=014 action=wait() hp= 99.0 - step=015 action=wait() hp= 98.5 - step=016 action=move(direction='south') hp= 98.0 - step=017 action=move(direction='north') hp= 97.5 - step=018 action=wait() hp= 97.0 - step=019 action=wait() hp= 96.5 - step=020 action=wait() hp= 96.0 - step=021 action=wait() hp= 95.5 - step=022 action=move(direction='south') hp= 95.5 - step=023 action=wait() hp= 95.0 - step=024 action=move(direction='north') hp= 94.5 - step=025 action=move(direction='south') hp= 94.5 - step=026 action=wait() hp= 94.0 - step=027 action=wait() hp= 82.0 - step=028 action=wait() hp= 67.0 - step=029 action=wait() hp= 52.0 - step=030 action=wait() hp= 37.0 - step=031 action=wait() hp= 22.0 - step=032 action=wait() hp= 17.0 - step=033 action=wait() hp= 12.0 - step=034 action=move(direction='north') hp= 7.0 - step=035 action=wait() hp= 6.5 - step=036 action=wait() hp= 6.0 - step=037 action=wait() hp= 6.0 - step=038 action=wait() hp= 6.0 - step=039 action=wait() hp= 6.0 - step=040 action=wait() hp= 6.0 - step=041 action=wait() hp= 6.0 - step=042 action=wait() hp= 6.0 - step=043 action=wait() hp= 6.0 - step=044 action=wait() hp= 6.0 - step=045 action=wait() hp= 6.0 - step=046 action=wait() hp= 6.0 - step=047 action=wait() hp= 6.0 - step=048 action=wait() hp= 6.0 - step=049 action=wait() hp= 6.0 - step=050 action=wait() hp= 6.0 - step=051 action=wait() hp= 6.0 - step=052 action=wait() hp= 6.0 - step=053 action=wait() hp= 6.0 - step=054 action=wait() hp= 6.0 - step=055 action=wait() hp= 6.0 - step=056 action=wait() hp= 6.0 - step=057 action=wait() hp= 6.0 - step=058 action=wait() hp= 6.0 - step=059 action=wait() hp= 6.0 - step=060 action=wait() hp= 6.0 - step=061 action=wait() hp= 6.0 - step=062 action=wait() hp= 6.0 - step=063 action=wait() hp= 6.0 - step=064 action=wait() hp= 6.0 - step=065 action=wait() hp= 6.0 - step=066 action=wait() hp= 6.0 - step=067 action=wait() hp= 6.0 - step=068 action=wait() hp= 6.0 - step=069 action=wait() hp= 6.0 - step=070 action=wait() hp= 6.0 - step=071 action=wait() hp= 6.0 - step=072 action=wait() hp= 6.0 - step=073 action=wait() hp= 6.0 - step=074 action=wait() hp= 6.0 - step=075 action=wait() hp= 6.0 - step=076 action=wait() hp= 6.0 - step=077 action=wait() hp= 6.0 - step=078 action=wait() hp= 6.0 - step=079 action=wait() hp= 6.0 - step=080 action=wait() hp= 6.0 - step=081 action=wait() hp= 6.0 - step=082 action=wait() hp= 6.0 - step=083 action=wait() hp= 6.0 - step=084 action=wait() hp= 6.0 - step=085 action=wait() hp= 6.0 - step=086 action=wait() hp= 6.0 - step=087 action=wait() hp= 6.0 - step=088 action=wait() hp= 6.0 - step=089 action=wait() hp= 6.0 - step=090 action=wait() hp= 6.0 - step=091 action=wait() hp= 6.0 - step=092 action=wait() hp= 6.0 - step=093 action=wait() hp= 6.0 - step=094 action=wait() hp= 6.0 - step=095 action=wait() hp= 6.0 - step=096 action=wait() hp= 6.0 - step=097 action=wait() hp= 6.0 - step=098 action=wait() hp= 6.0 - step=099 action=wait() hp= 6.0 - step=100 action=wait() hp= 6.0 - step=101 action=wait() hp= 6.0 - step=102 action=wait() hp= 6.0 - step=103 action=wait() hp= 6.0 - step=104 action=wait() hp= 6.0 - step=105 action=wait() hp= 6.0 - step=106 action=wait() hp= 6.0 - step=107 action=wait() hp= 6.0 - step=108 action=wait() hp= 6.0 - step=109 action=wait() hp= 6.0 - step=110 action=wait() hp= 6.0 - step=111 action=wait() hp= 6.0 - step=112 action=wait() hp= 6.0 - step=113 action=wait() hp= 6.0 - step=114 action=wait() hp= 6.0 - step=115 action=wait() hp= 6.0 - step=116 action=wait() hp= 6.0 - step=117 action=wait() hp= 6.0 - step=118 action=wait() hp= 6.0 - step=119 action=wait() hp= 6.0 - step=120 action=wait() hp= 6.0 - step=121 action=wait() hp= 6.0 - step=122 action=wait() hp= 6.0 - step=123 action=wait() hp= 6.0 - step=124 action=wait() hp= 6.0 - step=125 action=wait() hp= 6.0 - step=126 action=wait() hp= 6.0 - step=127 action=wait() hp= 6.0 - step=128 action=wait() hp= 6.0 - step=129 action=wait() hp= 6.0 - step=130 action=wait() hp= 6.0 - step=131 action=wait() hp= 6.0 - step=132 action=wait() hp= 6.0 - step=133 action=wait() hp= 6.0 - step=134 action=wait() hp= 6.0 - step=135 action=wait() hp= 6.0 - step=136 action=wait() hp= 6.0 - step=137 action=wait() hp= 6.0 - step=138 action=wait() hp= 6.0 - step=139 action=wait() hp= 6.0 - step=140 action=wait() hp= 6.0 - step=141 action=wait() hp= 6.0 - step=142 action=wait() hp= 6.0 - step=143 action=wait() hp= 6.0 - step=144 action=wait() hp= 6.0 - step=145 action=wait() hp= 6.0 - step=146 action=wait() hp= 6.0 - step=147 action=wait() hp= 6.0 - step=148 action=wait() hp= 6.0 - step=149 action=wait() hp= 6.0 - step=150 action=wait() hp= 6.0 -ep=0161 [medium] steps=150 reward= -16.180 evac=0 hp= 6.0 suc30=0.67 r30= +4.73 t=40s - step=001 action=move(direction='west') hp=100.0 - step=002 action=door(target_id='door_2', door_state='close') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=wait() hp=100.0 - step=008 action=move(direction='west') hp=100.0 -ep=0162 [medium] steps=008 reward= +16.040 evac=1 hp=100.0 suc30=0.67 r30= +4.80 t=40s - step=001 action=door(target_id='door_1', door_state='close') hp=100.0 - step=002 action=move(direction='south') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=wait() hp=100.0 - step=005 action=wait() hp=100.0 - step=006 action=wait() hp=100.0 - step=007 action=wait() hp=100.0 - step=008 action=wait() hp=100.0 - step=009 action=wait() hp=100.0 - step=010 action=door(target_id='door_1', door_state='open') hp=100.0 - step=011 action=move(direction='south') hp=100.0 - step=012 action=move(direction='west') hp=100.0 -ep=0163 [medium] steps=012 reward= +14.160 evac=1 hp=100.0 suc30=0.67 r30= +4.79 t=40s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=wait() hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='west') hp=100.0 - step=011 action=move(direction='north') hp=100.0 - step=012 action=wait() hp=100.0 - step=013 action=move(direction='west') hp=100.0 - step=014 action=move(direction='north') hp=100.0 - step=015 action=move(direction='north') hp=100.0 - step=016 action=wait() hp=100.0 - step=017 action=move(direction='north') hp=100.0 - step=018 action=move(direction='north') hp=100.0 - step=019 action=move(direction='east') hp=100.0 - step=020 action=move(direction='west') hp=100.0 - step=021 action=move(direction='north') hp=100.0 - step=022 action=move(direction='north') hp=100.0 - step=023 action=move(direction='north') hp=100.0 - step=024 action=move(direction='west') hp=100.0 -ep=0164 [medium] steps=024 reward= +19.260 evac=1 hp=100.0 suc30=0.70 r30= +6.33 t=40s - step=001 action=move(direction='north') hp=100.0 - step=002 action=move(direction='north') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=wait() hp=100.0 - step=006 action=move(direction='north') hp=100.0 -ep=0165 [medium] steps=006 reward= +15.790 evac=1 hp=100.0 suc30=0.70 r30= +6.33 t=40s - >> PPO update samples=flushed pi_loss=-0.0015 v_loss=15.5974 entropy=0.4246 kl=0.0000 clip%=0.00 lr=7.72e-05 - step=001 action=move(direction='north') hp=100.0 - step=002 action=wait() hp=100.0 - step=003 action=move(direction='east') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='north') hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=move(direction='west') hp=100.0 -ep=0166 [medium] steps=009 reward= +16.010 evac=1 hp=100.0 suc30=0.73 r30= +7.41 t=40s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='north') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=wait() hp= 88.0 - step=006 action=move(direction='north') hp= 73.0 - step=007 action=move(direction='west') hp= 61.0 - step=008 action=move(direction='west') hp= 61.0 - step=009 action=move(direction='south') hp= 61.0 - step=010 action=move(direction='west') hp= 61.0 - step=011 action=move(direction='north') hp= 61.0 - step=012 action=wait() hp= 61.0 - step=013 action=move(direction='west') hp= 61.0 - step=014 action=move(direction='north') hp= 61.0 - step=015 action=move(direction='south') hp= 61.0 - step=016 action=move(direction='north') hp= 61.0 - step=017 action=move(direction='west') hp= 61.0 - step=018 action=move(direction='west') hp= 61.0 - step=019 action=wait() hp= 61.0 - step=020 action=move(direction='south') hp= 61.0 - step=021 action=move(direction='north') hp= 61.0 - step=022 action=move(direction='north') hp= 61.0 - step=023 action=move(direction='north') hp= 61.0 - step=024 action=move(direction='north') hp= 61.0 - step=025 action=move(direction='west') hp= 61.0 - step=026 action=move(direction='west') hp= 61.0 - step=027 action=move(direction='west') hp= 61.0 - step=028 action=move(direction='west') hp= 61.0 - step=029 action=move(direction='west') hp= 61.0 -ep=0167 [medium] steps=029 reward= +14.365 evac=1 hp= 61.0 suc30=0.77 r30= +8.48 t=40s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='east') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=door(target_id='door_2', door_state='close') hp=100.0 - step=007 action=wait() hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='west') hp=100.0 -ep=0168 [medium] steps=010 reward= +15.810 evac=1 hp=100.0 suc30=0.77 r30= +8.52 t=40s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='east') hp=100.0 - step=006 action=move(direction='north') hp=100.0 - step=007 action=door(target_id='door_2', door_state='close') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='north') hp=100.0 - step=011 action=wait() hp=100.0 - step=012 action=move(direction='west') hp=100.0 - step=013 action=move(direction='west') hp=100.0 - step=014 action=move(direction='west') hp=100.0 - step=015 action=move(direction='north') hp=100.0 - step=016 action=move(direction='west') hp=100.0 -ep=0169 [medium] steps=016 reward= +16.260 evac=1 hp=100.0 suc30=0.80 r30= +9.75 t=41s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=move(direction='east') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=wait() hp=100.0 - step=010 action=move(direction='north') hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=wait() hp=100.0 - step=013 action=move(direction='east') hp=100.0 - step=014 action=move(direction='north') hp=100.0 - step=015 action=wait() hp=100.0 - step=016 action=move(direction='north') hp=100.0 - step=017 action=move(direction='east') hp=100.0 - step=018 action=wait() hp=100.0 - step=019 action=move(direction='west') hp=100.0 - step=020 action=move(direction='west') hp=100.0 - step=021 action=move(direction='west') hp=100.0 -ep=0170 [medium] steps=021 reward= +16.360 evac=1 hp=100.0 suc30=0.80 r30= +9.77 t=41s - >> PPO update samples=flushed pi_loss=-0.0023 v_loss=2.6779 entropy=1.0867 kl=0.0001 clip%=0.00 lr=7.05e-05 - step=001 action=move(direction='west') hp=100.0 -ep=0171 [medium] steps=001 reward= +14.260 evac=1 hp=100.0 suc30=0.80 r30= +9.74 t=41s - step=001 action=move(direction='north') hp=100.0 - step=002 action=wait() hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=door(target_id='door_1', door_state='open') hp=100.0 - step=006 action=wait() hp=100.0 - step=007 action=move(direction='east') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=door(target_id='door_1', door_state='close') hp=100.0 - step=010 action=door(target_id='door_1', door_state='open') hp=100.0 - step=011 action=door(target_id='door_1', door_state='close') hp=100.0 - step=012 action=wait() hp=100.0 - step=013 action=wait() hp=100.0 - step=014 action=wait() hp=100.0 - step=015 action=move(direction='south') hp=100.0 - step=016 action=door(target_id='door_1', door_state='open') hp=100.0 - step=017 action=move(direction='west') hp=100.0 -ep=0172 [medium] steps=017 reward= +14.690 evac=1 hp=100.0 suc30=0.83 r30= +10.63 t=41s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='north') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='north') hp=100.0 - step=011 action=move(direction='north') hp=100.0 - step=012 action=move(direction='north') hp=100.0 - step=013 action=move(direction='south') hp=100.0 - step=014 action=move(direction='north') hp=100.0 - step=015 action=move(direction='north') hp=100.0 - step=016 action=move(direction='north') hp=100.0 - step=017 action=move(direction='north') hp=100.0 - step=018 action=move(direction='south') hp=100.0 - step=019 action=move(direction='north') hp=100.0 - step=020 action=move(direction='north') hp=100.0 - step=021 action=wait() hp=100.0 - step=022 action=move(direction='west') hp=100.0 -ep=0173 [medium] steps=022 reward= +17.940 evac=1 hp=100.0 suc30=0.83 r30= +10.56 t=41s - step=001 action=wait() hp=100.0 - step=002 action=move(direction='north') hp=100.0 - step=003 action=wait() hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=move(direction='north') hp=100.0 - step=007 action=move(direction='north') hp=100.0 - step=008 action=move(direction='north') hp=100.0 -ep=0174 [medium] steps=008 reward= +15.990 evac=1 hp=100.0 suc30=0.83 r30= +10.58 t=41s - step=001 action=move(direction='west') hp=100.0 - step=002 action=door(target_id='door_8', door_state='close') hp=100.0 - step=003 action=door(target_id='door_8', door_state='close') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp= 99.5 - step=008 action=move(direction='north') hp= 94.5 - step=009 action=move(direction='west') hp= 79.5 - step=010 action=move(direction='north') hp= 64.5 - step=011 action=move(direction='south') hp= 64.0 - step=012 action=wait() hp= 49.0 - step=013 action=move(direction='west') hp= 34.0 - step=014 action=move(direction='west') hp= 19.0 - step=015 action=door(target_id='door_2', door_state='close') hp= 17.0 - step=016 action=move(direction='north') hp= 2.0 -ep=0175 [medium] steps=016 reward= -17.260 evac=0 hp= 0.0 suc30=0.83 r30= +10.48 t=41s - >> PPO update samples=flushed pi_loss=-0.0026 v_loss=56.9066 entropy=1.1764 kl=0.0001 clip%=0.00 lr=6.37e-05 - [ckpt] saved -> artifacts/pyre_ppo_http_ckpt.pt - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='north') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='north') hp= 85.0 - step=007 action=move(direction='east') hp= 70.0 - step=008 action=move(direction='west') hp= 65.0 - step=009 action=move(direction='north') hp= 50.0 - step=010 action=move(direction='west') hp= 48.0 - step=011 action=move(direction='west') hp= 47.5 - step=012 action=move(direction='north') hp= 47.5 - step=013 action=wait() hp= 47.5 - step=014 action=move(direction='north') hp= 47.5 - step=015 action=move(direction='north') hp= 47.0 - step=016 action=move(direction='north') hp= 47.0 - step=017 action=move(direction='north') hp= 47.0 - step=018 action=move(direction='north') hp= 47.0 - step=019 action=move(direction='west') hp= 47.0 - step=020 action=move(direction='north') hp= 47.0 - step=021 action=move(direction='west') hp= 47.0 -ep=0176 [medium] steps=021 reward= +12.625 evac=1 hp= 47.0 suc30=0.83 r30= +10.38 t=42s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=door(target_id='door_2', door_state='close') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp=100.0 -ep=0177 [medium] steps=007 reward= +16.150 evac=1 hp=100.0 suc30=0.83 r30= +10.42 t=42s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='east') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=door(target_id='door_6', door_state='open') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=move(direction='north') hp=100.0 - step=010 action=move(direction='west') hp= 99.5 - step=011 action=move(direction='west') hp= 99.0 - step=012 action=wait() hp= 99.0 - step=013 action=move(direction='north') hp= 99.0 - step=014 action=move(direction='west') hp= 97.0 - step=015 action=move(direction='north') hp= 96.5 - step=016 action=wait() hp= 96.0 - step=017 action=door(target_id='door_1', door_state='close') hp= 95.5 - step=018 action=move(direction='south') hp= 95.0 - step=019 action=wait() hp= 94.5 - step=020 action=move(direction='west') hp= 94.0 -ep=0178 [medium] steps=020 reward= +13.820 evac=1 hp= 94.0 suc30=0.87 r30= +11.51 t=42s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='north') hp=100.0 - step=003 action=wait() hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='west') hp=100.0 -ep=0179 [medium] steps=009 reward= +16.630 evac=1 hp=100.0 suc30=0.87 r30= +11.44 t=43s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 -ep=0180 [medium] steps=002 reward= +14.730 evac=1 hp=100.0 suc30=0.87 r30= +11.41 t=43s - >> PPO update samples=flushed pi_loss=-0.0006 v_loss=11.5171 entropy=1.1012 kl=0.0001 clip%=0.00 lr=5.70e-05 - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='west') hp=100.0 - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='north') hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - ** EVAL [medium] reward=+16.397 success=1.00 steps=8.0 - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='north') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=door(target_id='door_2', door_state='close') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='north') hp=100.0 - step=007 action=door(target_id='door_2', door_state='close') hp=100.0 - step=008 action=door(target_id='door_2', door_state='close') hp=100.0 - step=009 action=move(direction='east') hp=100.0 - step=010 action=move(direction='west') hp=100.0 - step=011 action=door(target_id='door_2', door_state='close') hp=100.0 - step=012 action=move(direction='west') hp=100.0 - step=013 action=move(direction='north') hp=100.0 - step=014 action=door(target_id='door_1', door_state='close') hp=100.0 - step=015 action=wait() hp=100.0 - step=016 action=move(direction='north') hp=100.0 - step=017 action=move(direction='north') hp=100.0 - step=018 action=wait() hp=100.0 - step=019 action=door(target_id='door_1', door_state='open') hp=100.0 - step=020 action=move(direction='east') hp=100.0 - step=021 action=move(direction='north') hp=100.0 - step=022 action=move(direction='west') hp=100.0 - step=023 action=move(direction='west') hp=100.0 - step=024 action=door(target_id='door_1', door_state='close') hp=100.0 - step=025 action=move(direction='south') hp=100.0 - step=026 action=move(direction='north') hp=100.0 - step=027 action=wait() hp=100.0 - step=028 action=wait() hp=100.0 - step=029 action=move(direction='east') hp=100.0 - step=030 action=move(direction='west') hp=100.0 - step=031 action=move(direction='east') hp=100.0 - step=032 action=move(direction='west') hp=100.0 - step=033 action=move(direction='south') hp=100.0 - step=034 action=wait() hp=100.0 - step=035 action=move(direction='east') hp=100.0 - step=036 action=door(target_id='door_1', door_state='close') hp=100.0 - step=037 action=move(direction='north') hp=100.0 - step=038 action=wait() hp=100.0 - step=039 action=move(direction='west') hp=100.0 - step=040 action=door(target_id='door_1', door_state='open') hp=100.0 - step=041 action=door(target_id='door_1', door_state='open') hp=100.0 - step=042 action=move(direction='east') hp=100.0 - step=043 action=wait() hp=100.0 - step=044 action=move(direction='west') hp=100.0 - step=045 action=wait() hp=100.0 - step=046 action=move(direction='east') hp=100.0 - step=047 action=wait() hp=100.0 - step=048 action=move(direction='south') hp=100.0 - step=049 action=wait() hp=100.0 - step=050 action=wait() hp=100.0 - step=051 action=door(target_id='door_1', door_state='open') hp=100.0 - step=052 action=move(direction='north') hp=100.0 - step=053 action=door(target_id='door_1', door_state='close') hp=100.0 - step=054 action=wait() hp=100.0 - step=055 action=move(direction='west') hp=100.0 - step=056 action=move(direction='south') hp=100.0 - step=057 action=move(direction='east') hp=100.0 - step=058 action=move(direction='east') hp=100.0 - step=059 action=wait() hp=100.0 - step=060 action=move(direction='west') hp=100.0 - step=061 action=move(direction='north') hp=100.0 - step=062 action=move(direction='east') hp=100.0 - step=063 action=door(target_id='door_1', door_state='close') hp=100.0 - step=064 action=move(direction='south') hp=100.0 - step=065 action=move(direction='west') hp=100.0 - step=066 action=move(direction='south') hp=100.0 - step=067 action=wait() hp=100.0 - step=068 action=move(direction='north') hp=100.0 - step=069 action=move(direction='west') hp=100.0 - step=070 action=move(direction='north') hp=100.0 - step=071 action=door(target_id='door_1', door_state='close') hp=100.0 - step=072 action=wait() hp=100.0 - step=073 action=wait() hp=100.0 - step=074 action=move(direction='east') hp=100.0 - step=075 action=move(direction='west') hp=100.0 - step=076 action=door(target_id='door_1', door_state='close') hp=100.0 - step=077 action=wait() hp=100.0 - step=078 action=wait() hp=100.0 - step=079 action=door(target_id='door_1', door_state='close') hp=100.0 - step=080 action=door(target_id='door_1', door_state='close') hp=100.0 - step=081 action=move(direction='south') hp=100.0 - step=082 action=move(direction='north') hp=100.0 - step=083 action=move(direction='east') hp=100.0 - step=084 action=move(direction='east') hp=100.0 - step=085 action=move(direction='south') hp=100.0 - step=086 action=wait() hp=100.0 - step=087 action=door(target_id='door_1', door_state='close') hp=100.0 - step=088 action=move(direction='north') hp=100.0 - step=089 action=move(direction='west') hp=100.0 - step=090 action=move(direction='west') hp=100.0 - step=091 action=door(target_id='door_1', door_state='close') hp=100.0 - step=092 action=wait() hp=100.0 - step=093 action=wait() hp=100.0 - step=094 action=move(direction='south') hp=100.0 - step=095 action=move(direction='north') hp=100.0 - step=096 action=wait() hp=100.0 - step=097 action=door(target_id='door_1', door_state='close') hp=100.0 - step=098 action=wait() hp=100.0 - step=099 action=door(target_id='door_1', door_state='close') hp=100.0 - step=100 action=move(direction='south') hp=100.0 - step=101 action=move(direction='north') hp=100.0 - step=102 action=wait() hp=100.0 - step=103 action=move(direction='south') hp=100.0 - step=104 action=door(target_id='door_1', door_state='close') hp=100.0 - step=105 action=move(direction='east') hp=100.0 - step=106 action=wait() hp=100.0 - step=107 action=move(direction='north') hp=100.0 - step=108 action=move(direction='east') hp=100.0 - step=109 action=door(target_id='door_1', door_state='close') hp=100.0 - step=110 action=move(direction='west') hp=100.0 - step=111 action=move(direction='west') hp=100.0 - step=112 action=wait() hp=100.0 - step=113 action=move(direction='south') hp=100.0 - step=114 action=move(direction='east') hp=100.0 - step=115 action=move(direction='north') hp=100.0 - step=116 action=move(direction='west') hp=100.0 - step=117 action=move(direction='south') hp=100.0 - step=118 action=move(direction='south') hp=100.0 - step=119 action=door(target_id='door_1', door_state='close') hp=100.0 - step=120 action=move(direction='north') hp=100.0 - step=121 action=wait() hp=100.0 - step=122 action=door(target_id='door_1', door_state='open') hp=100.0 - step=123 action=move(direction='south') hp=100.0 - step=124 action=door(target_id='door_1', door_state='open') hp=100.0 - step=125 action=wait() hp=100.0 - step=126 action=door(target_id='door_1', door_state='close') hp=100.0 - step=127 action=move(direction='east') hp=100.0 - step=128 action=wait() hp=100.0 - step=129 action=door(target_id='door_1', door_state='open') hp=100.0 - step=130 action=move(direction='south') hp=100.0 - step=131 action=wait() hp=100.0 - step=132 action=wait() hp=100.0 - step=133 action=wait() hp=100.0 - step=134 action=move(direction='north') hp=100.0 - step=135 action=move(direction='north') hp=100.0 - step=136 action=door(target_id='door_1', door_state='close') hp=100.0 - step=137 action=wait() hp=100.0 - step=138 action=move(direction='east') hp=100.0 - step=139 action=move(direction='north') hp=100.0 - step=140 action=move(direction='west') hp=100.0 - step=141 action=door(target_id='door_1', door_state='open') hp=100.0 - step=142 action=move(direction='south') hp=100.0 - step=143 action=move(direction='south') hp=100.0 - step=144 action=door(target_id='door_1', door_state='open') hp=100.0 - step=145 action=move(direction='north') hp=100.0 - step=146 action=move(direction='west') hp=100.0 - step=147 action=move(direction='south') hp=100.0 - step=148 action=door(target_id='door_1', door_state='close') hp=100.0 - step=149 action=move(direction='north') hp=100.0 - step=150 action=move(direction='north') hp=100.0 -ep=0181 [medium] steps=150 reward= -18.260 evac=0 hp=100.0 suc30=0.83 r30= +10.31 t=43s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=move(direction='west') hp=100.0 -ep=0182 [medium] steps=005 reward= +15.900 evac=1 hp=100.0 suc30=0.83 r30= +10.27 t=43s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 -ep=0183 [medium] steps=003 reward= +15.160 evac=1 hp=100.0 suc30=0.83 r30= +10.29 t=43s - step=001 action=move(direction='north') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='south') hp=100.0 - step=007 action=wait() hp=100.0 - step=008 action=door(target_id='door_1', door_state='close') hp=100.0 - step=009 action=door(target_id='door_5', door_state='close') hp=100.0 - step=010 action=door(target_id='door_1', door_state='close') hp=100.0 - step=011 action=wait() hp=100.0 - step=012 action=move(direction='north') hp=100.0 - step=013 action=door(target_id='door_5', door_state='close') hp=100.0 - step=014 action=move(direction='west') hp=100.0 -ep=0184 [medium] steps=014 reward= +15.980 evac=1 hp=100.0 suc30=0.87 r30= +11.14 t=43s - step=001 action=move(direction='north') hp=100.0 - step=002 action=wait() hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='east') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='north') hp=100.0 - step=007 action=wait() hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='north') hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=move(direction='west') hp=100.0 - step=013 action=move(direction='west') hp=100.0 - step=014 action=move(direction='west') hp=100.0 - step=015 action=move(direction='north') hp=100.0 - step=016 action=wait() hp=100.0 - step=017 action=move(direction='north') hp=100.0 - step=018 action=wait() hp= 85.0 - step=019 action=move(direction='south') hp= 70.0 - step=020 action=wait() hp= 69.5 - step=021 action=wait() hp= 69.0 - step=022 action=move(direction='south') hp= 69.0 - step=023 action=move(direction='north') hp= 69.0 - step=024 action=move(direction='south') hp= 69.0 - step=025 action=move(direction='north') hp= 69.0 - step=026 action=wait() hp= 69.0 - step=027 action=wait() hp= 69.0 - step=028 action=wait() hp= 69.0 - step=029 action=move(direction='south') hp= 69.0 - step=030 action=wait() hp= 69.0 - step=031 action=wait() hp= 69.0 - step=032 action=move(direction='south') hp= 69.0 - step=033 action=wait() hp= 69.0 - step=034 action=move(direction='north') hp= 69.0 - step=035 action=wait() hp= 69.0 - step=036 action=move(direction='north') hp= 69.0 - step=037 action=wait() hp= 69.0 - step=038 action=move(direction='south') hp= 69.0 - step=039 action=move(direction='east') hp= 69.0 - step=040 action=move(direction='east') hp= 69.0 - step=041 action=wait() hp= 69.0 - step=042 action=wait() hp= 69.0 - step=043 action=move(direction='east') hp= 69.0 - step=044 action=move(direction='west') hp= 69.0 - step=045 action=move(direction='west') hp= 69.0 - step=046 action=move(direction='west') hp= 69.0 - step=047 action=move(direction='north') hp= 69.0 - step=048 action=wait() hp= 69.0 - step=049 action=wait() hp= 69.0 - step=050 action=move(direction='east') hp= 69.0 - step=051 action=move(direction='west') hp= 69.0 - step=052 action=wait() hp= 69.0 - step=053 action=wait() hp= 69.0 - step=054 action=move(direction='east') hp= 69.0 - step=055 action=move(direction='south') hp= 69.0 - step=056 action=move(direction='west') hp= 69.0 - step=057 action=move(direction='north') hp= 69.0 - step=058 action=move(direction='south') hp= 69.0 - step=059 action=wait() hp= 69.0 - step=060 action=move(direction='north') hp= 69.0 - step=061 action=wait() hp= 69.0 - step=062 action=move(direction='east') hp= 69.0 - step=063 action=move(direction='west') hp= 69.0 - step=064 action=wait() hp= 69.0 - step=065 action=move(direction='east') hp= 69.0 - step=066 action=move(direction='west') hp= 69.0 - step=067 action=move(direction='east') hp= 69.0 - step=068 action=wait() hp= 69.0 - step=069 action=move(direction='west') hp= 69.0 - step=070 action=move(direction='south') hp= 69.0 - step=071 action=move(direction='south') hp= 69.0 - step=072 action=wait() hp= 69.0 - step=073 action=move(direction='east') hp= 69.0 - step=074 action=move(direction='west') hp= 69.0 - step=075 action=move(direction='south') hp= 69.0 - step=076 action=move(direction='east') hp= 69.0 - step=077 action=move(direction='west') hp= 69.0 - step=078 action=move(direction='north') hp= 69.0 - step=079 action=move(direction='south') hp= 69.0 - step=080 action=move(direction='east') hp= 69.0 - step=081 action=move(direction='north') hp= 69.0 - step=082 action=wait() hp= 69.0 - step=083 action=move(direction='west') hp= 69.0 - step=084 action=wait() hp= 69.0 - step=085 action=wait() hp= 69.0 - step=086 action=wait() hp= 69.0 - step=087 action=move(direction='south') hp= 69.0 - step=088 action=move(direction='north') hp= 69.0 - step=089 action=wait() hp= 69.0 - step=090 action=move(direction='north') hp= 69.0 - step=091 action=move(direction='south') hp= 69.0 - step=092 action=move(direction='north') hp= 69.0 - step=093 action=move(direction='south') hp= 69.0 - step=094 action=move(direction='south') hp= 69.0 - step=095 action=wait() hp= 69.0 - step=096 action=wait() hp= 69.0 - step=097 action=move(direction='north') hp= 69.0 - step=098 action=move(direction='south') hp= 69.0 - step=099 action=move(direction='east') hp= 69.0 - step=100 action=move(direction='west') hp= 69.0 - step=101 action=move(direction='south') hp= 69.0 - step=102 action=move(direction='east') hp= 69.0 - step=103 action=move(direction='north') hp= 69.0 - step=104 action=move(direction='west') hp= 69.0 - step=105 action=move(direction='east') hp= 69.0 - step=106 action=move(direction='north') hp= 69.0 - step=107 action=move(direction='north') hp= 69.0 - step=108 action=wait() hp= 69.0 - step=109 action=move(direction='north') hp= 69.0 - step=110 action=move(direction='west') hp= 69.0 - step=111 action=wait() hp= 69.0 - step=112 action=move(direction='east') hp= 69.0 - step=113 action=move(direction='west') hp= 69.0 - step=114 action=move(direction='south') hp= 69.0 - step=115 action=move(direction='north') hp= 69.0 - step=116 action=move(direction='east') hp= 69.0 - step=117 action=move(direction='west') hp= 69.0 - step=118 action=wait() hp= 69.0 - step=119 action=move(direction='east') hp= 69.0 - step=120 action=move(direction='west') hp= 69.0 - step=121 action=wait() hp= 69.0 - step=122 action=wait() hp= 69.0 - step=123 action=move(direction='east') hp= 69.0 - step=124 action=move(direction='south') hp= 69.0 - step=125 action=move(direction='south') hp= 69.0 - step=126 action=move(direction='west') hp= 69.0 - step=127 action=move(direction='south') hp= 69.0 - step=128 action=move(direction='south') hp= 69.0 - step=129 action=move(direction='south') hp= 69.0 - step=130 action=move(direction='north') hp= 69.0 - step=131 action=move(direction='north') hp= 69.0 - step=132 action=move(direction='south') hp= 69.0 - step=133 action=move(direction='east') hp= 69.0 - step=134 action=wait() hp= 69.0 - step=135 action=move(direction='north') hp= 69.0 - step=136 action=move(direction='south') hp= 69.0 - step=137 action=move(direction='west') hp= 69.0 - step=138 action=move(direction='north') hp= 69.0 - step=139 action=move(direction='south') hp= 69.0 - step=140 action=wait() hp= 69.0 - step=141 action=move(direction='south') hp= 69.0 - step=142 action=move(direction='east') hp= 69.0 - step=143 action=move(direction='east') hp= 69.0 - step=144 action=wait() hp= 69.0 - step=145 action=move(direction='east') hp= 69.0 - step=146 action=move(direction='west') hp= 69.0 - step=147 action=move(direction='north') hp= 69.0 - step=148 action=move(direction='west') hp= 69.0 - step=149 action=move(direction='east') hp= 69.0 - step=150 action=move(direction='west') hp= 69.0 -ep=0185 [medium] steps=150 reward= -20.690 evac=0 hp= 69.0 suc30=0.87 r30= +10.95 t=44s - >> PPO update samples=flushed pi_loss=-0.0119 v_loss=12.5926 entropy=1.3605 kl=0.0001 clip%=0.00 lr=5.02e-05 - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=wait() hp=100.0 - step=005 action=move(direction='south') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='west') hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=move(direction='west') hp=100.0 -ep=0186 [medium] steps=012 reward= +16.000 evac=1 hp=100.0 suc30=0.87 r30= +11.05 t=44s - step=001 action=door(target_id='door_8', door_state='close') hp=100.0 - step=002 action=move(direction='north') hp=100.0 - step=003 action=move(direction='south') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=wait() hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='north') hp=100.0 - step=011 action=door(target_id='door_3', door_state='close') hp=100.0 - step=012 action=door(target_id='door_3', door_state='close') hp=100.0 - step=013 action=move(direction='west') hp=100.0 - step=014 action=move(direction='west') hp=100.0 - step=015 action=move(direction='west') hp=100.0 - step=016 action=door(target_id='door_3', door_state='close') hp=100.0 - step=017 action=move(direction='west') hp=100.0 - step=018 action=door(target_id='door_3', door_state='close') hp=100.0 - step=019 action=move(direction='west') hp=100.0 - step=020 action=move(direction='west') hp= 99.5 - step=021 action=move(direction='east') hp= 99.5 - step=022 action=wait() hp= 99.0 - step=023 action=move(direction='west') hp= 98.5 - step=024 action=move(direction='west') hp= 98.0 - step=025 action=door(target_id='door_1', door_state='close') hp= 98.0 - step=026 action=move(direction='north') hp= 98.0 - step=027 action=move(direction='east') hp= 98.0 - step=028 action=wait() hp= 97.5 - step=029 action=wait() hp= 97.0 - step=030 action=wait() hp= 96.5 - step=031 action=move(direction='east') hp= 96.0 - step=032 action=move(direction='west') hp= 94.0 - step=033 action=door(target_id='door_1', door_state='close') hp= 93.5 - step=034 action=move(direction='west') hp= 91.5 - step=035 action=move(direction='west') hp= 91.0 - step=036 action=door(target_id='door_1', door_state='close') hp= 90.5 - step=037 action=move(direction='west') hp= 90.0 - step=038 action=door(target_id='door_5', door_state='close') hp= 90.0 - step=039 action=move(direction='west') hp= 90.0 - step=040 action=door(target_id='door_1', door_state='open') hp= 90.0 - step=041 action=door(target_id='door_1', door_state='close') hp= 90.0 - step=042 action=door(target_id='door_1', door_state='open') hp= 90.0 - step=043 action=wait() hp= 90.0 - step=044 action=wait() hp= 90.0 - step=045 action=move(direction='east') hp= 90.0 - step=046 action=move(direction='west') hp= 89.5 - step=047 action=wait() hp= 89.5 - step=048 action=wait() hp= 89.5 - step=049 action=move(direction='east') hp= 89.5 - step=050 action=move(direction='west') hp= 89.0 - step=051 action=wait() hp= 89.0 - step=052 action=door(target_id='door_1', door_state='close') hp= 89.0 - step=053 action=door(target_id='door_1', door_state='open') hp= 89.0 - step=054 action=wait() hp= 89.0 - step=055 action=door(target_id='door_1', door_state='close') hp= 89.0 - step=056 action=door(target_id='door_1', door_state='open') hp= 89.0 - step=057 action=door(target_id='door_1', door_state='close') hp= 89.0 - step=058 action=wait() hp= 89.0 - step=059 action=door(target_id='door_1', door_state='open') hp= 89.0 - step=060 action=move(direction='south') hp= 89.0 - step=061 action=wait() hp= 89.0 - step=062 action=move(direction='west') hp= 89.0 -ep=0187 [medium] steps=062 reward= +11.835 evac=1 hp= 89.0 suc30=0.87 r30= +10.92 t=45s - step=001 action=move(direction='north') hp=100.0 - step=002 action=move(direction='north') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 -ep=0188 [medium] steps=004 reward= +15.550 evac=1 hp=100.0 suc30=0.87 r30= +11.05 t=45s - step=001 action=move(direction='north') hp=100.0 - step=002 action=wait() hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=move(direction='north') hp=100.0 - step=007 action=move(direction='north') hp=100.0 -ep=0189 [medium] steps=007 reward= +16.100 evac=1 hp=100.0 suc30=0.87 r30= +11.06 t=45s - step=001 action=move(direction='north') hp=100.0 - step=002 action=door(target_id='door_3', door_state='open') hp=100.0 - step=003 action=move(direction='east') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=door(target_id='door_2', door_state='open') hp=100.0 - step=009 action=wait() hp=100.0 - step=010 action=door(target_id='door_1', door_state='close') hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=move(direction='west') hp=100.0 - step=013 action=move(direction='east') hp=100.0 - step=014 action=door(target_id='door_1', door_state='open') hp=100.0 - step=015 action=move(direction='west') hp=100.0 - step=016 action=door(target_id='door_1', door_state='close') hp=100.0 - step=017 action=move(direction='east') hp=100.0 - step=018 action=door(target_id='door_5', door_state='close') hp=100.0 - step=019 action=door(target_id='door_2', door_state='open') hp=100.0 - step=020 action=door(target_id='door_5', door_state='close') hp=100.0 - step=021 action=move(direction='west') hp=100.0 - step=022 action=wait() hp=100.0 - step=023 action=door(target_id='door_1', door_state='open') hp=100.0 - step=024 action=wait() hp=100.0 - step=025 action=move(direction='south') hp=100.0 - step=026 action=move(direction='west') hp=100.0 -ep=0190 [medium] steps=026 reward= +15.130 evac=1 hp=100.0 suc30=0.87 r30= +11.01 t=45s - >> PPO update samples=flushed pi_loss=-0.0017 v_loss=6.0652 entropy=1.2936 kl=0.0001 clip%=0.00 lr=4.35e-05 - step=001 action=move(direction='north') hp=100.0 - step=002 action=wait() hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=wait() hp=100.0 - step=007 action=move(direction='north') hp=100.0 - step=008 action=wait() hp=100.0 - step=009 action=move(direction='north') hp=100.0 - step=010 action=move(direction='south') hp=100.0 - step=011 action=move(direction='north') hp=100.0 - step=012 action=wait() hp=100.0 - step=013 action=move(direction='north') hp=100.0 - step=014 action=move(direction='north') hp=100.0 - step=015 action=move(direction='north') hp=100.0 - step=016 action=move(direction='south') hp=100.0 - step=017 action=move(direction='south') hp=100.0 - step=018 action=move(direction='east') hp=100.0 - step=019 action=move(direction='north') hp= 99.5 - step=020 action=move(direction='west') hp= 99.5 - step=021 action=wait() hp= 99.5 - step=022 action=move(direction='north') hp= 99.5 - step=023 action=wait() hp= 99.5 - step=024 action=move(direction='east') hp= 99.5 - step=025 action=move(direction='west') hp= 99.5 - step=026 action=move(direction='north') hp= 99.5 - step=027 action=wait() hp= 99.5 - step=028 action=move(direction='south') hp= 99.5 - step=029 action=move(direction='south') hp= 99.5 - step=030 action=move(direction='north') hp= 97.5 - step=031 action=move(direction='north') hp= 97.0 - step=032 action=move(direction='north') hp= 97.0 - step=033 action=move(direction='east') hp= 97.0 - step=034 action=move(direction='west') hp= 97.0 - step=035 action=move(direction='west') hp= 97.0 -ep=0191 [medium] steps=035 reward= +15.075 evac=1 hp= 97.0 suc30=0.90 r30= +12.05 t=45s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=move(direction='north') hp=100.0 - step=010 action=move(direction='south') hp=100.0 - step=011 action=wait() hp=100.0 - step=012 action=move(direction='north') hp=100.0 - step=013 action=move(direction='north') hp=100.0 - step=014 action=move(direction='north') hp=100.0 - step=015 action=move(direction='north') hp=100.0 - step=016 action=move(direction='north') hp=100.0 - step=017 action=move(direction='north') hp=100.0 -ep=0192 [medium] steps=017 reward= +15.030 evac=1 hp=100.0 suc30=0.90 r30= +12.01 t=45s - step=001 action=move(direction='north') hp=100.0 - step=002 action=door(target_id='door_2', door_state='open') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=move(direction='south') hp=100.0 - step=006 action=door(target_id='door_6', door_state='close') hp=100.0 - step=007 action=move(direction='east') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='south') hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=move(direction='west') hp=100.0 - step=013 action=move(direction='west') hp=100.0 - step=014 action=move(direction='north') hp=100.0 - step=015 action=move(direction='north') hp=100.0 - step=016 action=move(direction='west') hp=100.0 -ep=0193 [medium] steps=016 reward= +16.800 evac=1 hp=100.0 suc30=0.90 r30= +12.10 t=45s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='south') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='south') hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=wait() hp=100.0 - step=011 action=wait() hp=100.0 - step=012 action=move(direction='north') hp=100.0 - step=013 action=move(direction='south') hp= 89.5 - step=014 action=move(direction='north') hp= 89.5 - step=015 action=wait() hp= 74.5 - step=016 action=wait() hp= 59.5 - step=017 action=wait() hp= 44.5 - step=018 action=wait() hp= 39.5 - step=019 action=wait() hp= 34.5 - step=020 action=wait() hp= 29.5 - step=021 action=move(direction='south') hp= 24.5 - step=022 action=move(direction='south') hp= 19.5 - step=023 action=wait() hp= 7.5 -ep=0194 [medium] steps=023 reward= -14.930 evac=0 hp= 0.0 suc30=0.87 r30= +10.96 t=45s - step=001 action=move(direction='west') hp=100.0 -ep=0195 [medium] steps=001 reward= +14.260 evac=1 hp=100.0 suc30=0.87 r30= +10.91 t=45s - >> PPO update samples=flushed pi_loss=-0.0001 v_loss=51.1310 entropy=1.0334 kl=0.0000 clip%=0.00 lr=3.67e-05 - step=001 action=door(target_id='door_4', door_state='close') hp=100.0 - step=002 action=door(target_id='door_4', door_state='close') hp=100.0 - step=003 action=door(target_id='door_4', door_state='close') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=door(target_id='door_4', door_state='close') hp=100.0 - step=007 action=door(target_id='door_7', door_state='close') hp=100.0 - step=008 action=door(target_id='door_8', door_state='close') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='north') hp=100.0 - step=011 action=door(target_id='door_4', door_state='close') hp=100.0 - step=012 action=door(target_id='door_3', door_state='open') hp=100.0 - step=013 action=door(target_id='door_3', door_state='open') hp=100.0 - step=014 action=door(target_id='door_4', door_state='close') hp=100.0 - step=015 action=move(direction='east') hp=100.0 - step=016 action=wait() hp=100.0 - step=017 action=move(direction='east') hp=100.0 - step=018 action=move(direction='west') hp=100.0 - step=019 action=move(direction='north') hp=100.0 - step=020 action=door(target_id='door_8', door_state='close') hp=100.0 - step=021 action=move(direction='west') hp=100.0 - step=022 action=move(direction='west') hp=100.0 - step=023 action=move(direction='north') hp=100.0 - step=024 action=door(target_id='door_4', door_state='close') hp=100.0 - step=025 action=move(direction='west') hp=100.0 - step=026 action=move(direction='west') hp=100.0 - step=027 action=move(direction='west') hp=100.0 - step=028 action=move(direction='west') hp=100.0 - step=029 action=door(target_id='door_3', door_state='open') hp=100.0 - step=030 action=door(target_id='door_2', door_state='close') hp=100.0 - step=031 action=move(direction='west') hp=100.0 - step=032 action=wait() hp=100.0 - step=033 action=wait() hp=100.0 - step=034 action=move(direction='west') hp=100.0 - step=035 action=move(direction='east') hp=100.0 - step=036 action=door(target_id='door_2', door_state='open') hp=100.0 - step=037 action=move(direction='west') hp=100.0 - step=038 action=move(direction='south') hp=100.0 - step=039 action=move(direction='west') hp= 99.5 - step=040 action=move(direction='west') hp= 97.5 - step=041 action=move(direction='north') hp= 82.5 - step=042 action=move(direction='west') hp= 80.5 - step=043 action=move(direction='west') hp= 80.0 - step=044 action=wait() hp= 79.5 - step=045 action=wait() hp= 79.0 - step=046 action=door(target_id='door_1', door_state='open') hp= 78.5 - step=047 action=door(target_id='door_1', door_state='close') hp= 78.0 - step=048 action=wait() hp= 76.0 - step=049 action=wait() hp= 74.0 - step=050 action=door(target_id='door_1', door_state='open') hp= 72.0 - step=051 action=move(direction='south') hp= 70.0 - step=052 action=move(direction='west') hp= 68.0 -ep=0196 [medium] steps=052 reward= +10.780 evac=1 hp= 68.0 suc30=0.87 r30= +10.74 t=46s - step=001 action=move(direction='north') hp=100.0 - step=002 action=wait() hp=100.0 - step=003 action=move(direction='south') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=move(direction='south') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='north') hp=100.0 - step=009 action=move(direction='north') hp=100.0 - step=010 action=move(direction='west') hp=100.0 - step=011 action=move(direction='south') hp=100.0 - step=012 action=wait() hp=100.0 - step=013 action=move(direction='north') hp=100.0 - step=014 action=move(direction='east') hp=100.0 - step=015 action=move(direction='west') hp=100.0 - step=016 action=move(direction='north') hp=100.0 - step=017 action=move(direction='north') hp=100.0 - step=018 action=move(direction='north') hp=100.0 - step=019 action=move(direction='west') hp=100.0 -ep=0197 [medium] steps=019 reward= +16.220 evac=1 hp=100.0 suc30=0.87 r30= +10.80 t=46s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='east') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=door(target_id='door_3', door_state='open') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=door(target_id='door_3', door_state='open') hp=100.0 - step=007 action=wait() hp=100.0 - step=008 action=door(target_id='door_3', door_state='open') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=wait() hp= 85.0 - step=011 action=move(direction='east') hp= 70.0 - step=012 action=move(direction='east') hp= 58.0 - step=013 action=wait() hp= 58.0 - step=014 action=door(target_id='door_3', door_state='open') hp= 57.5 - step=015 action=door(target_id='door_3', door_state='open') hp= 57.0 - step=016 action=move(direction='west') hp= 56.5 - step=017 action=move(direction='south') hp= 51.5 - step=018 action=door(target_id='door_3', door_state='open') hp= 36.5 - step=019 action=door(target_id='door_3', door_state='close') hp= 31.5 - step=020 action=wait() hp= 26.5 - step=021 action=door(target_id='door_3', door_state='open') hp= 21.5 - step=022 action=wait() hp= 16.5 - step=023 action=wait() hp= 11.5 - step=024 action=wait() hp= 6.5 - step=025 action=wait() hp= 1.5 -ep=0198 [medium] steps=025 reward= -14.820 evac=0 hp= 0.0 suc30=0.83 r30= +9.78 t=46s - step=001 action=move(direction='west') hp=100.0 - step=002 action=door(target_id='door_4', door_state='close') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=door(target_id='door_2', door_state='close') hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=move(direction='north') hp=100.0 - step=007 action=door(target_id='door_3', door_state='close') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=wait() hp=100.0 - step=010 action=move(direction='north') hp=100.0 - step=011 action=move(direction='north') hp=100.0 - step=012 action=door(target_id='door_3', door_state='open') hp=100.0 - step=013 action=door(target_id='door_3', door_state='open') hp=100.0 - step=014 action=move(direction='south') hp=100.0 - step=015 action=move(direction='north') hp=100.0 - step=016 action=wait() hp=100.0 - step=017 action=move(direction='east') hp=100.0 - step=018 action=move(direction='east') hp=100.0 - step=019 action=move(direction='west') hp=100.0 - step=020 action=move(direction='west') hp=100.0 - step=021 action=door(target_id='door_3', door_state='open') hp=100.0 - step=022 action=move(direction='south') hp=100.0 - step=023 action=move(direction='north') hp=100.0 - step=024 action=move(direction='east') hp=100.0 - step=025 action=door(target_id='door_3', door_state='open') hp=100.0 - step=026 action=move(direction='west') hp=100.0 - step=027 action=door(target_id='door_3', door_state='open') hp=100.0 - step=028 action=wait() hp=100.0 - step=029 action=door(target_id='door_3', door_state='open') hp=100.0 - step=030 action=move(direction='south') hp=100.0 - step=031 action=move(direction='north') hp=100.0 - step=032 action=move(direction='east') hp=100.0 - step=033 action=move(direction='west') hp=100.0 - step=034 action=move(direction='south') hp=100.0 - step=035 action=wait() hp=100.0 - step=036 action=door(target_id='door_3', door_state='open') hp=100.0 - step=037 action=move(direction='east') hp=100.0 - step=038 action=wait() hp=100.0 - step=039 action=move(direction='west') hp=100.0 - step=040 action=move(direction='north') hp=100.0 - step=041 action=wait() hp=100.0 - step=042 action=wait() hp=100.0 - step=043 action=door(target_id='door_3', door_state='open') hp=100.0 - step=044 action=wait() hp=100.0 - step=045 action=wait() hp=100.0 - step=046 action=door(target_id='door_3', door_state='open') hp=100.0 - step=047 action=wait() hp=100.0 - step=048 action=move(direction='south') hp=100.0 - step=049 action=door(target_id='door_3', door_state='open') hp=100.0 - step=050 action=door(target_id='door_3', door_state='open') hp=100.0 - step=051 action=door(target_id='door_3', door_state='open') hp=100.0 - step=052 action=move(direction='south') hp=100.0 - step=053 action=move(direction='north') hp=100.0 - step=054 action=move(direction='north') hp=100.0 - step=055 action=wait() hp=100.0 - step=056 action=move(direction='east') hp=100.0 - step=057 action=move(direction='east') hp=100.0 - step=058 action=move(direction='west') hp=100.0 - step=059 action=door(target_id='door_3', door_state='open') hp=100.0 - step=060 action=move(direction='west') hp=100.0 - step=061 action=door(target_id='door_3', door_state='open') hp=100.0 - step=062 action=wait() hp=100.0 - step=063 action=move(direction='south') hp=100.0 - step=064 action=move(direction='north') hp=100.0 - step=065 action=move(direction='east') hp=100.0 - step=066 action=wait() hp=100.0 - step=067 action=wait() hp=100.0 - step=068 action=wait() hp=100.0 - step=069 action=wait() hp=100.0 - step=070 action=door(target_id='door_3', door_state='open') hp=100.0 - step=071 action=wait() hp=100.0 - step=072 action=move(direction='west') hp=100.0 - step=073 action=wait() hp=100.0 - step=074 action=wait() hp=100.0 - step=075 action=move(direction='south') hp=100.0 - step=076 action=wait() hp=100.0 - step=077 action=door(target_id='door_3', door_state='open') hp=100.0 - step=078 action=move(direction='north') hp=100.0 - step=079 action=move(direction='east') hp=100.0 - step=080 action=wait() hp=100.0 - step=081 action=door(target_id='door_3', door_state='open') hp=100.0 - step=082 action=move(direction='south') hp=100.0 - step=083 action=move(direction='east') hp=100.0 - step=084 action=move(direction='north') hp=100.0 - step=085 action=move(direction='west') hp=100.0 - step=086 action=wait() hp=100.0 - step=087 action=move(direction='west') hp=100.0 - step=088 action=move(direction='south') hp=100.0 - step=089 action=move(direction='north') hp=100.0 - step=090 action=door(target_id='door_3', door_state='open') hp=100.0 - step=091 action=wait() hp=100.0 - step=092 action=wait() hp=100.0 - step=093 action=wait() hp=100.0 - step=094 action=move(direction='east') hp=100.0 - step=095 action=move(direction='east') hp=100.0 - step=096 action=wait() hp=100.0 - step=097 action=move(direction='south') hp=100.0 - step=098 action=door(target_id='door_3', door_state='open') hp=100.0 - step=099 action=move(direction='west') hp=100.0 - step=100 action=move(direction='east') hp=100.0 - step=101 action=move(direction='west') hp=100.0 - step=102 action=move(direction='east') hp=100.0 - step=103 action=move(direction='south') hp=100.0 - step=104 action=wait() hp=100.0 - step=105 action=move(direction='west') hp=100.0 - step=106 action=move(direction='west') hp=100.0 - step=107 action=move(direction='east') hp=100.0 - step=108 action=wait() hp=100.0 - step=109 action=move(direction='east') hp=100.0 - step=110 action=wait() hp=100.0 - step=111 action=move(direction='north') hp=100.0 - step=112 action=wait() hp=100.0 - step=113 action=move(direction='south') hp=100.0 - step=114 action=move(direction='west') hp=100.0 - step=115 action=wait() hp=100.0 - step=116 action=door(target_id='door_3', door_state='open') hp=100.0 - step=117 action=door(target_id='door_3', door_state='close') hp=100.0 - step=118 action=move(direction='east') hp=100.0 - step=119 action=move(direction='west') hp=100.0 - step=120 action=move(direction='east') hp=100.0 - step=121 action=move(direction='north') hp=100.0 - step=122 action=move(direction='west') hp=100.0 - step=123 action=move(direction='west') hp=100.0 - step=124 action=wait() hp=100.0 - step=125 action=wait() hp=100.0 - step=126 action=move(direction='north') hp=100.0 - step=127 action=move(direction='east') hp=100.0 - step=128 action=door(target_id='door_3', door_state='open') hp=100.0 - step=129 action=move(direction='south') hp=100.0 - step=130 action=move(direction='west') hp=100.0 - step=131 action=move(direction='north') hp=100.0 - step=132 action=wait() hp=100.0 - step=133 action=move(direction='east') hp=100.0 - step=134 action=move(direction='west') hp=100.0 - step=135 action=move(direction='south') hp=100.0 - step=136 action=door(target_id='door_3', door_state='open') hp=100.0 - step=137 action=door(target_id='door_3', door_state='open') hp=100.0 - step=138 action=door(target_id='door_3', door_state='open') hp=100.0 - step=139 action=move(direction='east') hp=100.0 - step=140 action=move(direction='west') hp=100.0 - step=141 action=move(direction='south') hp=100.0 - step=142 action=move(direction='east') hp=100.0 - step=143 action=move(direction='east') hp=100.0 - step=144 action=move(direction='west') hp=100.0 - step=145 action=move(direction='west') hp=100.0 - step=146 action=wait() hp=100.0 - step=147 action=wait() hp=100.0 - step=148 action=wait() hp=100.0 - step=149 action=door(target_id='door_3', door_state='open') hp=100.0 - step=150 action=move(direction='east') hp=100.0 -ep=0199 [medium] steps=150 reward= -22.310 evac=0 hp=100.0 suc30=0.80 r30= +8.49 t=47s - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=wait() hp=100.0 - step=005 action=move(direction='north') hp=100.0 - step=006 action=wait() hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='north') hp=100.0 - step=010 action=move(direction='west') hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=move(direction='north') hp= 88.0 - step=013 action=move(direction='north') hp= 88.0 - step=014 action=move(direction='west') hp= 88.0 - step=015 action=move(direction='east') hp= 88.0 - step=016 action=move(direction='west') hp= 88.0 - step=017 action=move(direction='north') hp= 88.0 - step=018 action=wait() hp= 88.0 - step=019 action=move(direction='north') hp= 88.0 - step=020 action=move(direction='north') hp= 88.0 - step=021 action=move(direction='south') hp= 88.0 - step=022 action=move(direction='south') hp= 88.0 - step=023 action=move(direction='north') hp= 87.5 - step=024 action=move(direction='north') hp= 87.5 - step=025 action=move(direction='north') hp= 87.5 - step=026 action=move(direction='west') hp= 87.5 -ep=0200 [medium] steps=026 reward= +14.982 evac=1 hp= 87.5 suc30=0.80 r30= +8.45 t=47s - >> PPO update samples=flushed pi_loss=-0.0949 v_loss=24.6195 entropy=1.3600 kl=0.0001 clip%=0.00 lr=3.00e-05 - step=001 action=move(direction='north') hp=100.0 - step=002 action=move(direction='north') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='west') hp=100.0 - step=004 action=move(direction='west') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='north') hp=100.0 - step=007 action=move(direction='north') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=001 action=move(direction='west') hp=100.0 - step=002 action=move(direction='west') hp=100.0 - step=003 action=move(direction='north') hp=100.0 - step=004 action=move(direction='north') hp=100.0 - step=005 action=move(direction='west') hp=100.0 - step=006 action=move(direction='west') hp=100.0 - step=007 action=move(direction='west') hp=100.0 - step=008 action=move(direction='west') hp=100.0 - step=009 action=move(direction='west') hp=100.0 - step=010 action=move(direction='west') hp=100.0 - step=011 action=move(direction='west') hp=100.0 - step=012 action=move(direction='west') hp=100.0 - step=013 action=move(direction='west') hp= 98.0 - step=014 action=move(direction='north') hp= 93.0 - step=015 action=wait() hp= 88.0 - step=016 action=wait() hp= 83.0 - step=017 action=wait() hp= 78.0 - step=018 action=wait() hp= 73.0 - step=019 action=wait() hp= 68.0 - step=020 action=wait() hp= 63.0 - step=021 action=wait() hp= 58.0 - step=022 action=wait() hp= 53.0 - step=023 action=wait() hp= 48.0 - step=024 action=wait() hp= 43.0 - step=025 action=wait() hp= 38.0 - step=026 action=wait() hp= 33.0 - step=027 action=wait() hp= 28.0 - step=028 action=wait() hp= 23.0 - step=029 action=wait() hp= 18.0 - step=030 action=wait() hp= 13.0 - step=031 action=wait() hp= 8.0 - step=032 action=wait() hp= 3.0 - ** EVAL [medium] reward=+6.807 success=0.67 steps=14.7 - [ckpt] saved -> artifacts/pyre_ppo_http_ckpt.pt - -[done] Model saved -> artifacts/pyre_ppo_fixed.pti -[done] Metrics CSV -> artifacts/pyre_ppo_fixed.csv -[done] Eval CSV -> artifacts/pyre_ppo_fixed_eval.csv -[done] Graph PNG -> artifacts/pyre_ppo_fixed.png - -[summary] 200 episodes in 48.2s (4.2 eps/s) -[summary] Final success rate (last 30): 0.80 -[summary] Final reward mean (last 30): +8.446 +ep=0001 [easy ] steps=030 reward= +17.290 evac=1 hp=100.0 s30=1.00 t=0s +ep=0002 [easy ] steps=200 reward= -9.295 evac=0 hp= 98.5 s30=0.50 t=1s +ep=0003 [easy ] steps=200 reward= -9.090 evac=0 hp=100.0 s30=0.33 t=2s +ep=0004 [easy ] steps=200 reward= -16.130 evac=0 hp= 62.0 s30=0.25 t=3s +ep=0005 [easy ] steps=200 reward= -12.080 evac=0 hp=100.0 s30=0.20 t=3s +ep=0006 [easy ] steps=200 reward= -12.320 evac=0 hp=100.0 s30=0.17 t=4s +ep=0007 [easy ] steps=200 reward= -14.560 evac=0 hp=100.0 s30=0.14 t=6s +ep=0008 [easy ] steps=200 reward= -9.890 evac=0 hp=100.0 s30=0.12 t=7s + >> PPO update pi_loss=-0.0065 v_loss=4.3169 entropy=2.0778 kl=0.0066 lr=2.96e-04 +ep=0009 [easy ] steps=141 reward= +10.370 evac=1 hp= 98.0 s30=0.22 t=8s +ep=0010 [easy ] steps=200 reward= -17.620 evac=0 hp=100.0 s30=0.20 t=9s +ep=0011 [easy ] steps=049 reward= +15.740 evac=1 hp=100.0 s30=0.27 t=9s +ep=0012 [easy ] steps=200 reward= -9.980 evac=0 hp=100.0 s30=0.25 t=10s +ep=0013 [easy ] steps=200 reward= -3.220 evac=0 hp=100.0 s30=0.23 t=11s +ep=0014 [easy ] steps=123 reward= +11.430 evac=1 hp=100.0 s30=0.29 t=12s +ep=0015 [easy ] steps=200 reward= -5.360 evac=0 hp=100.0 s30=0.27 t=13s +ep=0016 [easy ] steps=127 reward= +10.970 evac=1 hp=100.0 s30=0.31 t=13s + >> PPO update pi_loss=-0.0035 v_loss=6.9670 entropy=2.0211 kl=0.0010 lr=2.93e-04 +ep=0017 [easy ] steps=156 reward= +10.050 evac=1 hp= 98.0 s30=0.35 t=15s +ep=0018 [easy ] steps=088 reward= +14.830 evac=1 hp=100.0 s30=0.39 t=15s +ep=0019 [easy ] steps=200 reward= -17.770 evac=0 hp=100.0 s30=0.37 t=16s +ep=0020 [easy ] steps=176 reward= +9.470 evac=1 hp=100.0 s30=0.40 t=17s +ep=0021 [easy ] steps=200 reward= -6.850 evac=0 hp= 99.0 s30=0.38 t=18s +ep=0022 [easy ] steps=021 reward= +17.650 evac=1 hp=100.0 s30=0.41 t=18s +ep=0023 [easy ] steps=096 reward= +11.280 evac=1 hp=100.0 s30=0.43 t=18s +ep=0024 [easy ] steps=200 reward= -5.890 evac=0 hp= 95.0 s30=0.42 t=19s + >> PPO update pi_loss=-0.0139 v_loss=8.8112 entropy=2.1823 kl=0.0050 lr=2.89e-04 +ep=0025 [easy ] steps=139 reward= +11.360 evac=1 hp=100.0 s30=0.44 t=21s + ** EVAL [hard] reward=-10.124 success=0.00 +ep=0026 [easy ] steps=200 reward= -19.000 evac=0 hp=100.0 s30=0.42 t=23s +ep=0027 [easy ] steps=063 reward= +14.240 evac=1 hp=100.0 s30=0.44 t=23s +ep=0028 [easy ] steps=200 reward= -15.310 evac=0 hp=100.0 s30=0.43 t=24s +ep=0029 [easy ] steps=200 reward= -11.940 evac=0 hp=100.0 s30=0.41 t=25s +ep=0030 [easy ] steps=200 reward= -12.810 evac=0 hp=100.0 s30=0.40 t=26s +ep=0031 [easy ] steps=031 reward= +16.630 evac=1 hp=100.0 s30=0.40 t=26s +ep=0032 [easy ] steps=200 reward= -9.350 evac=0 hp=100.0 s30=0.40 t=27s + >> PPO update pi_loss=-0.0033 v_loss=6.9677 entropy=1.7971 kl=0.0012 lr=2.86e-04 +ep=0033 [easy ] steps=200 reward= -12.530 evac=0 hp=100.0 s30=0.40 t=29s +ep=0034 [easy ] steps=043 reward= +15.460 evac=1 hp=100.0 s30=0.43 t=29s +ep=0035 [easy ] steps=009 reward= +17.210 evac=1 hp=100.0 s30=0.47 t=29s +ep=0036 [easy ] steps=200 reward= -9.600 evac=0 hp=100.0 s30=0.47 t=30s +ep=0037 [easy ] steps=200 reward= -13.100 evac=0 hp=100.0 s30=0.47 t=31s +ep=0038 [easy ] steps=088 reward= +14.520 evac=1 hp=100.0 s30=0.50 t=31s +ep=0039 [easy ] steps=200 reward= -8.090 evac=0 hp=100.0 s30=0.47 t=32s +ep=0040 [easy ] steps=058 reward= +15.970 evac=1 hp=100.0 s30=0.50 t=32s + >> PPO update pi_loss=-0.0045 v_loss=10.7413 entropy=2.1999 kl=0.0039 lr=2.82e-04 +ep=0041 [easy ] steps=047 reward= +15.940 evac=1 hp=100.0 s30=0.50 t=33s +ep=0042 [easy ] steps=200 reward= -8.200 evac=0 hp=100.0 s30=0.50 t=34s +ep=0043 [easy ] steps=187 reward= +9.560 evac=1 hp=100.0 s30=0.53 t=35s +ep=0044 [easy ] steps=130 reward= +10.540 evac=1 hp=100.0 s30=0.53 t=36s +ep=0045 [easy ] steps=200 reward= -16.440 evac=0 hp=100.0 s30=0.53 t=36s +ep=0046 [easy ] steps=048 reward= +16.420 evac=1 hp=100.0 s30=0.53 t=37s +ep=0047 [easy ] steps=064 reward= +14.910 evac=1 hp=100.0 s30=0.53 t=37s +ep=0048 [easy ] steps=048 reward= -14.320 evac=0 hp= 0.0 s30=0.50 t=37s + >> PPO update pi_loss=-0.0083 v_loss=11.0590 entropy=2.0080 kl=0.0078 lr=2.78e-04 +ep=0049 [easy ] steps=140 reward= +11.540 evac=1 hp=100.0 s30=0.53 t=38s +ep=0050 [easy ] steps=013 reward= +19.200 evac=1 hp=100.0 s30=0.53 t=38s + ** EVAL [hard] reward=-11.184 success=0.00 +ep=0051 [easy ] steps=200 reward= -11.910 evac=0 hp=100.0 s30=0.53 t=41s +ep=0052 [easy ] steps=080 reward= +15.090 evac=1 hp=100.0 s30=0.53 t=41s +ep=0053 [easy ] steps=088 reward= +14.720 evac=1 hp=100.0 s30=0.53 t=42s +ep=0054 [easy ] steps=004 reward= +17.580 evac=1 hp=100.0 s30=0.57 t=42s +ep=0055 [easy ] steps=200 reward= -13.970 evac=0 hp=100.0 s30=0.53 t=43s +ep=0056 [easy ] steps=062 reward= +15.320 evac=1 hp=100.0 s30=0.57 t=43s + >> PPO update pi_loss=-0.0230 v_loss=13.0751 entropy=2.0431 kl=0.0081 lr=2.75e-04 +ep=0057 [easy ] steps=021 reward= +18.980 evac=1 hp=100.0 s30=0.57 t=44s +ep=0058 [easy ] steps=019 reward= +17.800 evac=1 hp=100.0 s30=0.60 t=44s +ep=0059 [easy ] steps=012 reward= +18.630 evac=1 hp=100.0 s30=0.63 t=44s +ep=0060 [easy ] steps=067 reward= +14.700 evac=1 hp=100.0 s30=0.67 t=44s +ep=0061 [easy ] steps=129 reward= +11.070 evac=1 hp=100.0 s30=0.67 t=45s +ep=0062 [easy ] steps=045 reward= +17.620 evac=1 hp=100.0 s30=0.70 t=45s +ep=0063 [easy ] steps=040 reward= +14.960 evac=1 hp=100.0 s30=0.73 t=45s +ep=0064 [easy ] steps=041 reward= +16.660 evac=1 hp=100.0 s30=0.73 t=45s + >> PPO update pi_loss=-0.0191 v_loss=44.0687 entropy=1.8650 kl=0.0033 lr=2.71e-04 +ep=0065 [easy ] steps=082 reward= +14.330 evac=1 hp=100.0 s30=0.73 t=46s +ep=0066 [easy ] steps=015 reward= +17.400 evac=1 hp=100.0 s30=0.77 t=46s +ep=0067 [easy ] steps=018 reward= +17.970 evac=1 hp=100.0 s30=0.80 t=46s +ep=0068 [easy ] steps=200 reward= -16.405 evac=0 hp= 9.5 s30=0.77 t=47s +ep=0069 [easy ] steps=005 reward= +17.930 evac=1 hp=100.0 s30=0.80 t=47s +ep=0070 [easy ] steps=044 reward= +16.420 evac=1 hp=100.0 s30=0.80 t=47s +ep=0071 [easy ] steps=200 reward= -12.220 evac=0 hp=100.0 s30=0.77 t=48s +ep=0072 [easy ] steps=151 reward= +2.060 evac=1 hp=100.0 s30=0.80 t=49s + >> PPO update pi_loss=-0.0054 v_loss=13.8242 entropy=2.0114 kl=0.0043 lr=2.68e-04 +ep=0073 [easy ] steps=005 reward= +17.990 evac=1 hp=100.0 s30=0.80 t=49s +ep=0074 [easy ] steps=200 reward= -12.780 evac=0 hp=100.0 s30=0.77 t=50s +ep=0075 [easy ] steps=034 reward= +16.900 evac=1 hp=100.0 s30=0.80 t=50s + ** EVAL [hard] reward=-11.468 success=0.00 +ep=0076 [easy ] steps=017 reward= +19.290 evac=1 hp=100.0 s30=0.80 t=51s +ep=0077 [easy ] steps=022 reward= +17.490 evac=1 hp=100.0 s30=0.80 t=51s +ep=0078 [easy ] steps=005 reward= +17.050 evac=1 hp=100.0 s30=0.83 t=51s +ep=0079 [easy ] steps=017 reward= +18.580 evac=1 hp=100.0 s30=0.83 t=51s +ep=0080 [easy ] steps=030 reward= +16.785 evac=1 hp= 99.0 s30=0.83 t=51s + >> PPO update pi_loss=-0.0150 v_loss=35.1268 entropy=2.0081 kl=0.0006 lr=2.64e-04 + [curriculum] Advanced to 'medium' (suc30=0.87) +ep=0081 [easy ] steps=176 reward= +8.340 evac=1 hp=100.0 s30=0.87 t=52s +ep=0082 [medium] steps=050 reward= -19.900 evac=0 hp= 0.0 s30=0.83 t=53s +ep=0083 [medium] steps=029 reward= -14.630 evac=0 hp= 0.0 s30=0.80 t=53s +ep=0084 [medium] steps=025 reward= +15.600 evac=1 hp=100.0 s30=0.80 t=53s +ep=0085 [medium] steps=080 reward= -24.320 evac=0 hp= 0.0 s30=0.80 t=53s +ep=0086 [medium] steps=043 reward= +8.883 evac=1 hp= 45.5 s30=0.80 t=53s +ep=0087 [medium] steps=040 reward= -15.070 evac=0 hp= 0.0 s30=0.77 t=54s +ep=0088 [medium] steps=014 reward= -15.090 evac=0 hp= 0.0 s30=0.73 t=54s + >> PPO update pi_loss=-0.0087 v_loss=30.8968 entropy=1.8425 kl=0.0018 lr=2.60e-04 +ep=0089 [medium] steps=014 reward= +16.320 evac=1 hp=100.0 s30=0.73 t=54s +ep=0090 [medium] steps=023 reward= +16.400 evac=1 hp=100.0 s30=0.73 t=54s +ep=0091 [medium] steps=026 reward= -14.010 evac=0 hp= 0.0 s30=0.70 t=54s +ep=0092 [medium] steps=017 reward= +15.510 evac=1 hp=100.0 s30=0.70 t=54s +ep=0093 [medium] steps=011 reward= +15.170 evac=1 hp=100.0 s30=0.70 t=54s +ep=0094 [medium] steps=036 reward= -20.530 evac=0 hp= 0.0 s30=0.67 t=54s +ep=0095 [medium] steps=150 reward= -14.250 evac=0 hp=100.0 s30=0.63 t=55s +ep=0096 [medium] steps=029 reward= -9.600 evac=0 hp= 0.0 s30=0.60 t=55s + >> PPO update pi_loss=-0.0083 v_loss=34.3949 entropy=1.8332 kl=0.0026 lr=2.57e-04 +ep=0097 [medium] steps=016 reward= +16.800 evac=1 hp=100.0 s30=0.60 t=56s +ep=0098 [medium] steps=150 reward= -13.210 evac=0 hp= 99.0 s30=0.60 t=56s +ep=0099 [medium] steps=005 reward= +15.000 evac=1 hp=100.0 s30=0.60 t=56s +ep=0100 [medium] steps=150 reward= -17.065 evac=0 hp= 1.5 s30=0.57 t=57s + ** EVAL [hard] reward=-9.827 success=0.00 +ep=0101 [medium] steps=019 reward= +12.548 evac=1 hp= 70.5 s30=0.60 t=59s +ep=0102 [medium] steps=056 reward= -19.370 evac=0 hp= 0.0 s30=0.57 t=59s +ep=0103 [medium] steps=059 reward= +13.980 evac=1 hp=100.0 s30=0.57 t=59s +ep=0104 [medium] steps=150 reward= -26.195 evac=0 hp= 50.5 s30=0.57 t=60s + >> PPO update pi_loss=-0.0004 v_loss=14.6058 entropy=1.3296 kl=0.0008 lr=2.53e-04 +ep=0105 [medium] steps=045 reward= -12.460 evac=0 hp= 0.0 s30=0.53 t=61s +ep=0106 [medium] steps=040 reward= +15.800 evac=1 hp=100.0 s30=0.53 t=61s +ep=0107 [medium] steps=150 reward= -25.180 evac=0 hp=100.0 s30=0.50 t=62s +ep=0108 [medium] steps=150 reward= -9.150 evac=0 hp= 97.0 s30=0.47 t=62s +ep=0109 [medium] steps=023 reward= +15.840 evac=1 hp=100.0 s30=0.47 t=62s +ep=0110 [medium] steps=130 reward= +2.095 evac=1 hp= 87.0 s30=0.47 t=63s +ep=0111 [medium] steps=150 reward= -12.520 evac=0 hp= 97.0 s30=0.43 t=64s +ep=0112 [medium] steps=058 reward= +13.820 evac=1 hp=100.0 s30=0.47 t=64s + >> PPO update pi_loss=-0.0084 v_loss=11.2875 entropy=1.7357 kl=0.0044 lr=2.50e-04 +ep=0113 [medium] steps=001 reward= +14.260 evac=1 hp=100.0 s30=0.50 t=64s +ep=0114 [medium] steps=012 reward= +16.180 evac=1 hp=100.0 s30=0.50 t=65s +ep=0115 [medium] steps=042 reward= +10.953 evac=1 hp= 71.5 s30=0.53 t=65s +ep=0116 [medium] steps=116 reward= +4.300 evac=1 hp=100.0 s30=0.53 t=65s +ep=0117 [medium] steps=150 reward= -12.700 evac=0 hp=100.0 s30=0.53 t=66s +ep=0118 [medium] steps=083 reward= +12.210 evac=1 hp=100.0 s30=0.57 t=66s +ep=0119 [medium] steps=022 reward= -15.580 evac=0 hp= 0.0 s30=0.53 t=66s +ep=0120 [medium] steps=023 reward= +15.445 evac=1 hp= 99.0 s30=0.53 t=66s + >> PPO update pi_loss=-0.0151 v_loss=18.8505 entropy=1.7544 kl=0.0059 lr=2.46e-04 +ep=0121 [medium] steps=005 reward= +15.020 evac=1 hp=100.0 s30=0.57 t=67s +ep=0122 [medium] steps=065 reward= +11.930 evac=1 hp=100.0 s30=0.57 t=67s +ep=0123 [medium] steps=010 reward= +11.925 evac=1 hp= 55.0 s30=0.57 t=67s +ep=0124 [medium] steps=044 reward= +15.040 evac=1 hp=100.0 s30=0.60 t=67s +ep=0125 [medium] steps=093 reward= -24.720 evac=0 hp= 0.0 s30=0.60 t=68s + ** EVAL [hard] reward=-7.792 success=0.20 +ep=0126 [medium] steps=010 reward= +14.650 evac=1 hp=100.0 s30=0.63 t=69s +ep=0127 [medium] steps=019 reward= -13.110 evac=0 hp= 0.0 s30=0.60 t=69s +ep=0128 [medium] steps=018 reward= +14.210 evac=1 hp=100.0 s30=0.63 t=69s + >> PPO update pi_loss=+0.0149 v_loss=31.2597 entropy=1.5042 kl=0.0039 lr=2.42e-04 +ep=0129 [medium] steps=150 reward= -18.700 evac=0 hp= 95.0 s30=0.60 t=70s +ep=0130 [medium] steps=039 reward= +14.670 evac=1 hp=100.0 s30=0.63 t=70s +ep=0131 [medium] steps=034 reward= -10.750 evac=0 hp= 0.0 s30=0.60 t=70s +ep=0132 [medium] steps=045 reward= -23.760 evac=0 hp= 0.0 s30=0.60 t=70s +ep=0133 [medium] steps=150 reward= -17.000 evac=0 hp=100.0 s30=0.57 t=71s +ep=0134 [medium] steps=029 reward= +15.530 evac=1 hp=100.0 s30=0.60 t=71s +ep=0135 [medium] steps=150 reward= -27.040 evac=0 hp= 96.0 s30=0.60 t=72s +ep=0136 [medium] steps=016 reward= +15.660 evac=1 hp=100.0 s30=0.60 t=72s + >> PPO update pi_loss=-0.0045 v_loss=18.2808 entropy=1.4920 kl=0.0046 lr=2.39e-04 +ep=0137 [medium] steps=016 reward= -14.740 evac=0 hp= 0.0 s30=0.60 t=72s +ep=0138 [medium] steps=012 reward= +16.740 evac=1 hp=100.0 s30=0.63 t=72s +ep=0139 [medium] steps=013 reward= +16.620 evac=1 hp=100.0 s30=0.63 t=73s +ep=0140 [medium] steps=008 reward= +14.720 evac=1 hp=100.0 s30=0.63 t=73s +ep=0141 [medium] steps=061 reward= -18.720 evac=0 hp= 0.0 s30=0.63 t=73s +ep=0142 [medium] steps=005 reward= +14.570 evac=1 hp=100.0 s30=0.63 t=73s +ep=0143 [medium] steps=027 reward= +12.642 evac=1 hp= 59.5 s30=0.63 t=73s +ep=0144 [medium] steps=060 reward= +13.820 evac=1 hp=100.0 s30=0.63 t=73s + >> PPO update pi_loss=-0.0005 v_loss=64.7183 entropy=1.4458 kl=0.0003 lr=2.35e-04 +ep=0145 [medium] steps=150 reward= -16.260 evac=0 hp=100.0 s30=0.60 t=74s +ep=0146 [medium] steps=029 reward= +12.297 evac=1 hp= 94.5 s30=0.60 t=74s +ep=0147 [medium] steps=055 reward= +11.020 evac=1 hp=100.0 s30=0.63 t=74s +ep=0148 [medium] steps=015 reward= +16.030 evac=1 hp=100.0 s30=0.63 t=75s +ep=0149 [medium] steps=029 reward= +15.030 evac=1 hp=100.0 s30=0.67 t=75s +ep=0150 [medium] steps=050 reward= +7.125 evac=1 hp= 47.0 s30=0.67 t=75s + ** EVAL [hard] reward=-4.237 success=0.40 +ep=0151 [medium] steps=048 reward= -0.080 evac=1 hp= 14.0 s30=0.67 t=76s +ep=0152 [medium] steps=022 reward= +14.553 evac=1 hp= 81.5 s30=0.67 t=76s + >> PPO update pi_loss=-0.0157 v_loss=36.3181 entropy=1.6305 kl=0.0048 lr=2.32e-04 +ep=0153 [medium] steps=022 reward= -7.700 evac=0 hp= 0.0 s30=0.63 t=77s +ep=0154 [medium] steps=150 reward= -29.100 evac=0 hp=100.0 s30=0.60 t=77s +ep=0155 [medium] steps=028 reward= -16.030 evac=0 hp= 0.0 s30=0.60 t=77s +ep=0156 [medium] steps=019 reward= +15.060 evac=1 hp=100.0 s30=0.60 t=77s +ep=0157 [medium] steps=004 reward= +15.550 evac=1 hp=100.0 s30=0.63 t=77s +ep=0158 [medium] steps=023 reward= +16.510 evac=1 hp=100.0 s30=0.63 t=78s +ep=0159 [medium] steps=020 reward= +16.350 evac=1 hp=100.0 s30=0.67 t=78s +ep=0160 [medium] steps=025 reward= -12.920 evac=0 hp= 0.0 s30=0.63 t=78s + >> PPO update pi_loss=-0.0265 v_loss=29.0037 entropy=1.5078 kl=0.0049 lr=2.28e-04 +ep=0161 [medium] steps=038 reward= -12.270 evac=0 hp= 0.0 s30=0.63 t=78s +ep=0162 [medium] steps=035 reward= -13.590 evac=0 hp= 0.0 s30=0.63 t=79s +ep=0163 [medium] steps=037 reward= -16.930 evac=0 hp= 0.0 s30=0.63 t=79s +ep=0164 [medium] steps=150 reward= -11.790 evac=0 hp=100.0 s30=0.60 t=79s +ep=0165 [medium] steps=030 reward= -15.790 evac=0 hp= 0.0 s30=0.60 t=80s +ep=0166 [medium] steps=150 reward= -9.340 evac=0 hp=100.0 s30=0.57 t=80s +ep=0167 [medium] steps=094 reward= +10.390 evac=1 hp=100.0 s30=0.60 t=81s +ep=0168 [medium] steps=021 reward= +15.260 evac=1 hp=100.0 s30=0.60 t=81s + >> PPO update pi_loss=+0.0017 v_loss=17.6311 entropy=1.7762 kl=0.0062 lr=2.24e-04 +ep=0169 [medium] steps=064 reward= -26.290 evac=0 hp= 0.0 s30=0.57 t=82s +ep=0170 [medium] steps=021 reward= -11.750 evac=0 hp= 0.0 s30=0.53 t=82s +ep=0171 [medium] steps=017 reward= +16.280 evac=1 hp=100.0 s30=0.57 t=82s +ep=0172 [medium] steps=028 reward= +15.830 evac=1 hp=100.0 s30=0.57 t=82s +ep=0173 [medium] steps=052 reward= +14.260 evac=1 hp=100.0 s30=0.57 t=82s +ep=0174 [medium] steps=018 reward= -12.960 evac=0 hp= 0.0 s30=0.53 t=82s +ep=0175 [medium] steps=040 reward= -19.070 evac=0 hp= 0.0 s30=0.53 t=82s + ** EVAL [hard] reward=-6.674 success=0.20 +ep=0176 [medium] steps=150 reward= -30.735 evac=0 hp= 36.5 s30=0.50 t=84s + >> PPO update pi_loss=-0.0060 v_loss=30.8760 entropy=1.4049 kl=0.0037 lr=2.21e-04 +ep=0177 [medium] steps=008 reward= +15.130 evac=1 hp=100.0 s30=0.50 t=84s +ep=0178 [medium] steps=031 reward= +16.050 evac=1 hp=100.0 s30=0.50 t=84s +ep=0179 [medium] steps=009 reward= +15.070 evac=1 hp=100.0 s30=0.50 t=85s +ep=0180 [medium] steps=150 reward= -15.990 evac=0 hp= 6.0 s30=0.47 t=85s +ep=0181 [medium] steps=039 reward= -14.280 evac=0 hp= 0.0 s30=0.43 t=85s +ep=0182 [medium] steps=013 reward= +17.160 evac=1 hp=100.0 s30=0.43 t=85s +ep=0183 [medium] steps=026 reward= +14.380 evac=1 hp=100.0 s30=0.47 t=86s +ep=0184 [medium] steps=150 reward= -8.320 evac=0 hp=100.0 s30=0.47 t=86s + >> PPO update pi_loss=-0.0047 v_loss=30.2118 entropy=1.9547 kl=0.0032 lr=2.17e-04 +ep=0185 [medium] steps=021 reward= +16.540 evac=1 hp=100.0 s30=0.50 t=87s +ep=0186 [medium] steps=081 reward= +11.440 evac=1 hp=100.0 s30=0.50 t=87s +ep=0187 [medium] steps=019 reward= +13.470 evac=1 hp= 84.0 s30=0.50 t=87s +ep=0188 [medium] steps=002 reward= +14.730 evac=1 hp=100.0 s30=0.50 t=87s +ep=0189 [medium] steps=150 reward= -12.590 evac=0 hp= 92.0 s30=0.47 t=88s +ep=0190 [medium] steps=028 reward= +16.133 evac=1 hp= 95.5 s30=0.50 t=88s +ep=0191 [medium] steps=038 reward= -20.930 evac=0 hp= 0.0 s30=0.50 t=89s +ep=0192 [medium] steps=034 reward= +14.270 evac=1 hp=100.0 s30=0.53 t=89s + >> PPO update pi_loss=-0.0183 v_loss=29.7914 entropy=1.7819 kl=0.0039 lr=2.14e-04 +ep=0193 [medium] steps=037 reward= +11.290 evac=1 hp= 34.0 s30=0.57 t=89s +ep=0194 [medium] steps=020 reward= -16.220 evac=0 hp= 0.0 s30=0.57 t=89s +ep=0195 [medium] steps=017 reward= +17.320 evac=1 hp=100.0 s30=0.60 t=89s +ep=0196 [medium] steps=027 reward= +10.703 evac=1 hp= 35.5 s30=0.63 t=89s +ep=0197 [medium] steps=150 reward= -32.225 evac=0 hp= 1.5 s30=0.60 t=90s +ep=0198 [medium] steps=050 reward= -15.130 evac=0 hp= 0.0 s30=0.57 t=90s +ep=0199 [medium] steps=017 reward= +16.950 evac=1 hp=100.0 s30=0.60 t=90s +ep=0200 [medium] steps=150 reward= -26.000 evac=0 hp= 17.0 s30=0.60 t=91s + >> PPO update pi_loss=-0.0057 v_loss=18.1479 entropy=1.1786 kl=0.0061 lr=2.10e-04 + ** EVAL [hard] reward=-12.304 success=0.00 +ep=0201 [medium] steps=150 reward= -16.065 evac=0 hp= 93.5 s30=0.57 t=94s +ep=0202 [medium] steps=021 reward= +16.650 evac=1 hp=100.0 s30=0.57 t=95s +ep=0203 [medium] steps=078 reward= -9.000 evac=0 hp= 0.0 s30=0.53 t=95s +ep=0204 [medium] steps=023 reward= -18.280 evac=0 hp= 0.0 s30=0.53 t=95s +ep=0205 [medium] steps=002 reward= +14.730 evac=1 hp=100.0 s30=0.57 t=95s +ep=0206 [medium] steps=044 reward= -9.060 evac=0 hp= 0.0 s30=0.57 t=95s +ep=0207 [medium] steps=033 reward= -17.560 evac=0 hp= 0.0 s30=0.53 t=95s +ep=0208 [medium] steps=018 reward= -16.030 evac=0 hp= 0.0 s30=0.50 t=96s + >> PPO update pi_loss=-0.0094 v_loss=20.9687 entropy=1.5537 kl=0.0047 lr=2.06e-04 +ep=0209 [medium] steps=029 reward= +11.915 evac=1 hp= 63.0 s30=0.50 t=96s +ep=0210 [medium] steps=150 reward= -19.650 evac=0 hp=100.0 s30=0.50 t=97s +ep=0211 [medium] steps=013 reward= +16.290 evac=1 hp=100.0 s30=0.53 t=97s +ep=0212 [medium] steps=150 reward= -11.440 evac=0 hp= 88.0 s30=0.50 t=97s +ep=0213 [medium] steps=150 reward= -18.155 evac=0 hp= 99.5 s30=0.47 t=98s +ep=0214 [medium] steps=002 reward= +14.730 evac=1 hp=100.0 s30=0.50 t=98s +ep=0215 [medium] steps=140 reward= +9.375 evac=1 hp= 97.0 s30=0.50 t=99s +ep=0216 [medium] steps=150 reward= -20.495 evac=0 hp= 67.5 s30=0.47 t=99s + >> PPO update pi_loss=-0.0179 v_loss=3.1620 entropy=1.5723 kl=0.0041 lr=2.03e-04 +ep=0217 [medium] steps=019 reward= +13.883 evac=1 hp= 89.5 s30=0.47 t=100s +ep=0218 [medium] steps=045 reward= -10.200 evac=0 hp= 0.0 s30=0.43 t=100s +ep=0219 [medium] steps=007 reward= +15.170 evac=1 hp=100.0 s30=0.47 t=100s +ep=0220 [medium] steps=019 reward= -11.850 evac=0 hp= 0.0 s30=0.43 t=100s +ep=0221 [medium] steps=010 reward= +15.740 evac=1 hp=100.0 s30=0.47 t=100s +ep=0222 [medium] steps=034 reward= +14.350 evac=1 hp=100.0 s30=0.47 t=100s +ep=0223 [medium] steps=150 reward= -10.765 evac=0 hp= 12.5 s30=0.43 t=101s +ep=0224 [medium] steps=150 reward= -8.865 evac=0 hp= 86.5 s30=0.43 t=102s + >> PPO update pi_loss=-0.0082 v_loss=28.3779 entropy=1.9019 kl=0.0052 lr=1.99e-04 +ep=0225 [medium] steps=005 reward= +15.900 evac=1 hp=100.0 s30=0.43 t=102s + ** EVAL [hard] reward=-11.080 success=0.00 +ep=0226 [medium] steps=150 reward= -11.360 evac=0 hp=100.0 s30=0.40 t=105s +ep=0227 [medium] steps=011 reward= +15.840 evac=1 hp=100.0 s30=0.43 t=105s +ep=0228 [medium] steps=025 reward= +15.300 evac=1 hp=100.0 s30=0.47 t=105s +ep=0229 [medium] steps=068 reward= -22.710 evac=0 hp= 0.0 s30=0.43 t=106s +ep=0230 [medium] steps=015 reward= -12.360 evac=0 hp= 0.0 s30=0.43 t=106s +ep=0231 [medium] steps=044 reward= -11.060 evac=0 hp= 0.0 s30=0.43 t=106s +ep=0232 [medium] steps=015 reward= +17.160 evac=1 hp=100.0 s30=0.43 t=106s + >> PPO update pi_loss=+0.0029 v_loss=33.0012 entropy=1.4043 kl=0.0086 lr=1.96e-04 +ep=0233 [medium] steps=013 reward= +16.830 evac=1 hp=100.0 s30=0.47 t=106s +ep=0234 [medium] steps=099 reward= +9.075 evac=1 hp= 59.0 s30=0.50 t=107s +ep=0235 [medium] steps=150 reward= -18.945 evac=0 hp= 66.5 s30=0.47 t=108s +ep=0236 [medium] steps=009 reward= +16.310 evac=1 hp=100.0 s30=0.50 t=108s +ep=0237 [medium] steps=023 reward= -14.880 evac=0 hp= 0.0 s30=0.50 t=108s +ep=0238 [medium] steps=150 reward= -19.820 evac=0 hp=100.0 s30=0.50 t=108s +ep=0239 [medium] steps=150 reward= -14.000 evac=0 hp=100.0 s30=0.47 t=109s +ep=0240 [medium] steps=053 reward= +10.170 evac=1 hp= 32.0 s30=0.50 t=109s + >> PPO update pi_loss=-0.0042 v_loss=13.0209 entropy=1.5150 kl=0.0011 lr=1.92e-04 +ep=0241 [medium] steps=150 reward= -19.830 evac=0 hp=100.0 s30=0.47 t=110s +ep=0242 [medium] steps=150 reward= -11.270 evac=0 hp= 74.0 s30=0.47 t=111s +ep=0243 [medium] steps=047 reward= -10.300 evac=0 hp= 0.0 s30=0.47 t=111s +ep=0244 [medium] steps=046 reward= -21.860 evac=0 hp= 0.0 s30=0.43 t=111s +ep=0245 [medium] steps=150 reward= -12.495 evac=0 hp= 93.5 s30=0.40 t=112s +ep=0246 [medium] steps=030 reward= -10.980 evac=0 hp= 0.0 s30=0.40 t=112s +ep=0247 [medium] steps=031 reward= +9.955 evac=1 hp= 65.0 s30=0.40 t=112s +ep=0248 [medium] steps=050 reward= +14.810 evac=1 hp=100.0 s30=0.43 t=112s + >> PPO update pi_loss=-0.0064 v_loss=11.4714 entropy=1.6570 kl=0.0017 lr=1.88e-04 +ep=0249 [medium] steps=033 reward= +15.620 evac=1 hp=100.0 s30=0.43 t=113s +ep=0250 [medium] steps=042 reward= -13.750 evac=0 hp= 0.0 s30=0.43 t=113s + ** EVAL [hard] reward=-5.648 success=0.20 +ep=0251 [medium] steps=150 reward= -21.140 evac=0 hp=100.0 s30=0.40 t=115s +ep=0252 [medium] steps=017 reward= +14.880 evac=1 hp=100.0 s30=0.40 t=115s +ep=0253 [medium] steps=025 reward= +16.060 evac=1 hp=100.0 s30=0.43 t=115s +ep=0254 [medium] steps=041 reward= +14.490 evac=1 hp=100.0 s30=0.47 t=115s +ep=0255 [medium] steps=150 reward= -10.270 evac=0 hp=100.0 s30=0.43 t=116s +ep=0256 [medium] steps=047 reward= +14.500 evac=1 hp=100.0 s30=0.47 t=116s + >> PPO update pi_loss=-0.0026 v_loss=19.7825 entropy=1.6007 kl=0.0029 lr=1.85e-04 +ep=0257 [medium] steps=085 reward= -22.280 evac=0 hp= 0.0 s30=0.43 t=117s +ep=0258 [medium] steps=014 reward= +15.860 evac=1 hp=100.0 s30=0.43 t=117s +ep=0259 [medium] steps=150 reward= -10.100 evac=0 hp=100.0 s30=0.43 t=118s +ep=0260 [medium] steps=099 reward= -20.870 evac=0 hp= 0.0 s30=0.43 t=118s +ep=0261 [medium] steps=010 reward= +16.800 evac=1 hp=100.0 s30=0.47 t=118s +ep=0262 [medium] steps=043 reward= +15.430 evac=1 hp=100.0 s30=0.47 t=118s +ep=0263 [medium] steps=042 reward= +15.385 evac=1 hp= 97.0 s30=0.47 t=118s +ep=0264 [medium] steps=058 reward= -15.040 evac=0 hp= 0.0 s30=0.43 t=119s + >> PPO update pi_loss=+0.0024 v_loss=18.7074 entropy=1.6059 kl=0.0008 lr=1.81e-04 +ep=0265 [medium] steps=028 reward= +15.670 evac=1 hp=100.0 s30=0.47 t=119s +ep=0266 [medium] steps=037 reward= -20.460 evac=0 hp= 0.0 s30=0.43 t=119s +ep=0267 [medium] steps=023 reward= +12.072 evac=1 hp= 39.5 s30=0.47 t=119s +ep=0268 [medium] steps=024 reward= +7.133 evac=1 hp= 21.5 s30=0.50 t=119s +ep=0269 [medium] steps=014 reward= +16.080 evac=1 hp=100.0 s30=0.53 t=119s +ep=0270 [medium] steps=013 reward= +10.907 evac=1 hp= 30.5 s30=0.53 t=120s +ep=0271 [medium] steps=068 reward= +13.167 evac=1 hp= 98.5 s30=0.57 t=120s +ep=0272 [medium] steps=026 reward= +9.660 evac=1 hp= 50.0 s30=0.60 t=120s + >> PPO update pi_loss=-0.0026 v_loss=51.5106 entropy=1.6209 kl=0.0003 lr=1.78e-04 +ep=0273 [medium] steps=150 reward= -15.250 evac=0 hp= 18.0 s30=0.60 t=121s +ep=0274 [medium] steps=046 reward= +15.253 evac=1 hp= 99.5 s30=0.63 t=121s +ep=0275 [medium] steps=050 reward= -20.940 evac=0 hp= 0.0 s30=0.63 t=121s + ** EVAL [hard] reward=-10.368 success=0.00 +ep=0276 [medium] steps=016 reward= +16.070 evac=1 hp=100.0 s30=0.67 t=123s +ep=0277 [medium] steps=005 reward= +15.490 evac=1 hp=100.0 s30=0.67 t=123s +ep=0278 [medium] steps=032 reward= -12.180 evac=0 hp= 0.0 s30=0.63 t=123s +ep=0279 [medium] steps=027 reward= +14.932 evac=1 hp= 99.5 s30=0.63 t=123s +ep=0280 [medium] steps=007 reward= +15.780 evac=1 hp=100.0 s30=0.67 t=123s + >> PPO update pi_loss=-0.0248 v_loss=32.8994 entropy=1.7511 kl=0.0017 lr=1.74e-04 +ep=0281 [medium] steps=008 reward= +14.720 evac=1 hp=100.0 s30=0.70 t=124s +ep=0282 [medium] steps=044 reward= +13.120 evac=1 hp= 98.0 s30=0.70 t=124s +ep=0283 [medium] steps=007 reward= +15.780 evac=1 hp=100.0 s30=0.70 t=124s +ep=0284 [medium] steps=025 reward= -13.390 evac=0 hp= 0.0 s30=0.67 t=124s +ep=0285 [medium] steps=059 reward= -18.330 evac=0 hp= 0.0 s30=0.67 t=124s +ep=0286 [medium] steps=014 reward= +13.222 evac=1 hp= 87.5 s30=0.67 t=124s +ep=0287 [medium] steps=033 reward= +14.592 evac=1 hp= 93.5 s30=0.70 t=125s +ep=0288 [medium] steps=003 reward= +15.160 evac=1 hp=100.0 s30=0.70 t=125s + >> PPO update pi_loss=-0.0022 v_loss=36.9067 entropy=1.3689 kl=0.0008 lr=1.70e-04 +ep=0289 [medium] steps=017 reward= +15.430 evac=1 hp=100.0 s30=0.73 t=125s +ep=0290 [medium] steps=036 reward= -8.750 evac=0 hp= 0.0 s30=0.73 t=125s +ep=0291 [medium] steps=021 reward= +14.890 evac=1 hp=100.0 s30=0.73 t=125s +ep=0292 [medium] steps=003 reward= +15.160 evac=1 hp=100.0 s30=0.73 t=125s +ep=0293 [medium] steps=048 reward= -10.520 evac=0 hp= 0.0 s30=0.70 t=125s +ep=0294 [medium] steps=027 reward= +16.330 evac=1 hp=100.0 s30=0.73 t=125s +ep=0295 [medium] steps=011 reward= +16.130 evac=1 hp=100.0 s30=0.73 t=125s +ep=0296 [medium] steps=042 reward= -12.320 evac=0 hp= 0.0 s30=0.73 t=126s + >> PPO update pi_loss=-0.0016 v_loss=49.3725 entropy=1.6299 kl=0.0004 lr=1.67e-04 +ep=0297 [medium] steps=150 reward= -30.925 evac=0 hp= 66.5 s30=0.70 t=126s +ep=0298 [medium] steps=008 reward= +15.670 evac=1 hp=100.0 s30=0.70 t=126s +ep=0299 [medium] steps=030 reward= +15.950 evac=1 hp=100.0 s30=0.70 t=127s +ep=0300 [medium] steps=019 reward= -19.910 evac=0 hp= 0.0 s30=0.67 t=127s + ** EVAL [hard] reward=-4.421 success=0.20 +ep=0301 [medium] steps=009 reward= +16.700 evac=1 hp=100.0 s30=0.67 t=129s +ep=0302 [medium] steps=014 reward= +15.740 evac=1 hp=100.0 s30=0.67 t=129s +ep=0303 [medium] steps=019 reward= +15.830 evac=1 hp=100.0 s30=0.70 t=129s +ep=0304 [medium] steps=008 reward= +16.290 evac=1 hp=100.0 s30=0.70 t=129s + >> PPO update pi_loss=-0.0023 v_loss=32.9150 entropy=1.0633 kl=0.0003 lr=1.63e-04 +ep=0305 [medium] steps=018 reward= +11.265 evac=1 hp= 57.0 s30=0.73 t=129s +ep=0306 [medium] steps=006 reward= +15.840 evac=1 hp=100.0 s30=0.73 t=129s +ep=0307 [medium] steps=004 reward= +15.080 evac=1 hp=100.0 s30=0.73 t=129s +ep=0308 [medium] steps=150 reward= -14.790 evac=0 hp=100.0 s30=0.73 t=130s +ep=0309 [medium] steps=027 reward= +15.510 evac=1 hp=100.0 s30=0.73 t=130s +ep=0310 [medium] steps=027 reward= +16.010 evac=1 hp=100.0 s30=0.73 t=130s +ep=0311 [medium] steps=150 reward= -15.270 evac=0 hp=100.0 s30=0.70 t=131s +ep=0312 [medium] steps=047 reward= +16.010 evac=1 hp=100.0 s30=0.70 t=131s + >> PPO update pi_loss=-0.0012 v_loss=20.0507 entropy=1.7914 kl=0.0008 lr=1.60e-04 +ep=0313 [medium] steps=030 reward= -11.460 evac=0 hp= 0.0 s30=0.67 t=131s +ep=0314 [medium] steps=010 reward= +16.820 evac=1 hp=100.0 s30=0.70 t=131s +ep=0315 [medium] steps=150 reward= -10.870 evac=0 hp=100.0 s30=0.70 t=132s +ep=0316 [medium] steps=150 reward= -24.845 evac=0 hp= 71.5 s30=0.67 t=133s +ep=0317 [medium] steps=005 reward= +14.940 evac=1 hp=100.0 s30=0.67 t=133s +ep=0318 [medium] steps=032 reward= -10.190 evac=0 hp= 0.0 s30=0.63 t=133s +ep=0319 [medium] steps=005 reward= +15.900 evac=1 hp=100.0 s30=0.63 t=133s +ep=0320 [medium] steps=002 reward= +14.730 evac=1 hp=100.0 s30=0.67 t=133s + >> PPO update pi_loss=+0.0007 v_loss=20.7397 entropy=1.4529 kl=0.0018 lr=1.56e-04 +ep=0321 [medium] steps=004 reward= +15.550 evac=1 hp=100.0 s30=0.67 t=133s +ep=0322 [medium] steps=007 reward= +15.360 evac=1 hp=100.0 s30=0.67 t=133s +ep=0323 [medium] steps=023 reward= -8.390 evac=0 hp= 0.0 s30=0.67 t=133s +ep=0324 [medium] steps=016 reward= +16.550 evac=1 hp=100.0 s30=0.67 t=133s +ep=0325 [medium] steps=028 reward= +15.430 evac=1 hp=100.0 s30=0.67 t=134s + ** EVAL [hard] reward=-11.180 success=0.00 +ep=0326 [medium] steps=017 reward= +11.340 evac=1 hp= 72.0 s30=0.70 t=135s +ep=0327 [medium] steps=150 reward= -14.040 evac=0 hp=100.0 s30=0.70 t=136s +ep=0328 [medium] steps=020 reward= +16.560 evac=1 hp=100.0 s30=0.70 t=136s + >> PPO update pi_loss=-0.1127 v_loss=34.7078 entropy=1.6140 kl=0.0021 lr=1.52e-04 +ep=0329 [medium] steps=001 reward= +14.260 evac=1 hp=100.0 s30=0.70 t=136s +ep=0330 [medium] steps=015 reward= +16.492 evac=1 hp= 99.5 s30=0.73 t=136s +ep=0331 [medium] steps=026 reward= -10.810 evac=0 hp= 0.0 s30=0.70 t=136s +ep=0332 [medium] steps=035 reward= -16.300 evac=0 hp= 0.0 s30=0.67 t=136s +ep=0333 [medium] steps=024 reward= +12.070 evac=1 hp= 66.0 s30=0.67 t=136s +ep=0334 [medium] steps=021 reward= +15.380 evac=1 hp=100.0 s30=0.67 t=136s +ep=0335 [medium] steps=025 reward= +13.668 evac=1 hp= 96.5 s30=0.67 t=137s +ep=0336 [medium] steps=001 reward= +14.260 evac=1 hp=100.0 s30=0.67 t=137s + >> PPO update pi_loss=-0.0010 v_loss=51.7629 entropy=1.4248 kl=0.0002 lr=1.49e-04 +ep=0337 [medium] steps=150 reward= -8.340 evac=0 hp=100.0 s30=0.63 t=137s +ep=0338 [medium] steps=045 reward= -22.490 evac=0 hp= 0.0 s30=0.63 t=138s +ep=0339 [medium] steps=011 reward= +15.610 evac=1 hp=100.0 s30=0.63 t=138s +ep=0340 [medium] steps=020 reward= +15.480 evac=1 hp= 98.0 s30=0.63 t=138s +ep=0341 [medium] steps=033 reward= -13.980 evac=0 hp= 0.0 s30=0.63 t=138s +ep=0342 [medium] steps=026 reward= +16.010 evac=1 hp=100.0 s30=0.63 t=138s +ep=0343 [medium] steps=010 reward= +16.640 evac=1 hp=100.0 s30=0.67 t=138s +ep=0344 [medium] steps=017 reward= +9.383 evac=1 hp= 37.5 s30=0.67 t=138s + >> PPO update pi_loss=-0.0115 v_loss=29.0228 entropy=1.6334 kl=0.0006 lr=1.45e-04 +ep=0345 [medium] steps=037 reward= +15.953 evac=1 hp= 97.5 s30=0.70 t=139s +ep=0346 [medium] steps=018 reward= +16.840 evac=1 hp=100.0 s30=0.73 t=139s +ep=0347 [medium] steps=014 reward= -14.940 evac=0 hp= 0.0 s30=0.70 t=139s +ep=0348 [medium] steps=009 reward= +16.250 evac=1 hp=100.0 s30=0.73 t=139s +ep=0349 [medium] steps=042 reward= +15.600 evac=1 hp=100.0 s30=0.73 t=139s +ep=0350 [medium] steps=015 reward= +16.420 evac=1 hp=100.0 s30=0.73 t=139s + ** EVAL [hard] reward=-9.845 success=0.00 +ep=0351 [medium] steps=009 reward= +15.030 evac=1 hp=100.0 s30=0.73 t=141s +ep=0352 [medium] steps=015 reward= +16.290 evac=1 hp=100.0 s30=0.73 t=141s + >> PPO update pi_loss=-0.0015 v_loss=44.9861 entropy=1.4760 kl=0.0002 lr=1.42e-04 +ep=0353 [medium] steps=011 reward= +16.240 evac=1 hp=100.0 s30=0.77 t=141s +ep=0354 [medium] steps=150 reward= -25.450 evac=0 hp= 56.0 s30=0.73 t=142s +ep=0355 [medium] steps=150 reward= -12.510 evac=0 hp=100.0 s30=0.70 t=142s +ep=0356 [medium] steps=016 reward= +15.078 evac=1 hp= 98.5 s30=0.70 t=142s +ep=0357 [medium] steps=016 reward= +16.920 evac=1 hp=100.0 s30=0.73 t=143s +ep=0358 [medium] steps=013 reward= +16.350 evac=1 hp=100.0 s30=0.73 t=143s +ep=0359 [medium] steps=015 reward= +15.070 evac=1 hp=100.0 s30=0.73 t=143s +ep=0360 [medium] steps=016 reward= +17.010 evac=1 hp=100.0 s30=0.73 t=143s + >> PPO update pi_loss=+0.0065 v_loss=18.1264 entropy=1.2002 kl=0.0020 lr=1.38e-04 +ep=0361 [medium] steps=014 reward= +16.450 evac=1 hp=100.0 s30=0.77 t=143s +ep=0362 [medium] steps=054 reward= -6.740 evac=0 hp= 0.0 s30=0.77 t=143s +ep=0363 [medium] steps=060 reward= -19.560 evac=0 hp= 0.0 s30=0.73 t=144s + [curriculum] Advanced to 'hard' (suc30=0.73) +ep=0364 [medium] steps=017 reward= +15.980 evac=1 hp=100.0 s30=0.73 t=144s +ep=0365 [hard ] steps=031 reward= -11.430 evac=0 hp= 0.0 s30=0.70 t=144s +ep=0366 [easy ] steps=200 reward= -17.860 evac=0 hp=100.0 s30=0.67 t=145s +ep=0367 [medium] steps=016 reward= +12.742 evac=1 hp= 91.5 s30=0.70 t=145s +ep=0368 [medium] steps=038 reward= -7.300 evac=0 hp= 0.0 s30=0.70 t=145s + >> PPO update pi_loss=-0.0051 v_loss=15.8902 entropy=1.2523 kl=0.0026 lr=1.34e-04 +ep=0369 [hard ] steps=038 reward= -13.110 evac=0 hp= 0.0 s30=0.67 t=145s +ep=0370 [hard ] steps=018 reward= -12.610 evac=0 hp= 0.0 s30=0.63 t=146s +ep=0371 [hard ] steps=100 reward= -8.120 evac=0 hp=100.0 s30=0.63 t=146s +ep=0372 [easy ] steps=030 reward= +18.690 evac=1 hp=100.0 s30=0.63 t=146s +ep=0373 [medium] steps=010 reward= +16.470 evac=1 hp=100.0 s30=0.63 t=146s +ep=0374 [medium] steps=037 reward= +14.580 evac=1 hp=100.0 s30=0.63 t=146s +ep=0375 [hard ] steps=031 reward= -13.080 evac=0 hp= 0.0 s30=0.60 t=147s + ** EVAL [hard] reward=-11.320 success=0.00 +ep=0376 [easy ] steps=008 reward= +18.940 evac=1 hp=100.0 s30=0.60 t=147s + >> PPO update pi_loss=+0.0186 v_loss=42.2980 entropy=1.9139 kl=0.0080 lr=1.31e-04 +ep=0377 [easy ] steps=022 reward= +17.990 evac=1 hp=100.0 s30=0.63 t=148s +ep=0378 [hard ] steps=042 reward= -11.380 evac=0 hp= 0.0 s30=0.60 t=148s +ep=0379 [hard ] steps=045 reward= -12.320 evac=0 hp= 0.0 s30=0.57 t=148s +ep=0380 [hard ] steps=024 reward= -10.440 evac=0 hp= 0.0 s30=0.53 t=148s +ep=0381 [hard ] steps=032 reward= -9.660 evac=0 hp= 0.0 s30=0.50 t=148s +ep=0382 [medium] steps=007 reward= +16.480 evac=1 hp=100.0 s30=0.50 t=148s +ep=0383 [medium] steps=150 reward= -13.880 evac=0 hp=100.0 s30=0.47 t=149s +ep=0384 [hard ] steps=032 reward= -10.670 evac=0 hp= 0.0 s30=0.47 t=149s + >> PPO update pi_loss=-0.0179 v_loss=15.9173 entropy=2.0312 kl=0.0022 lr=1.27e-04 +ep=0385 [medium] steps=013 reward= +15.470 evac=1 hp=100.0 s30=0.50 t=150s +ep=0386 [hard ] steps=040 reward= -12.560 evac=0 hp= 0.0 s30=0.47 t=150s +ep=0387 [hard ] steps=015 reward= -11.420 evac=0 hp= 0.0 s30=0.43 t=150s +ep=0388 [hard ] steps=100 reward= -7.770 evac=0 hp=100.0 s30=0.40 t=150s +ep=0389 [medium] steps=150 reward= -15.300 evac=0 hp= 93.0 s30=0.37 t=151s +ep=0390 [medium] steps=150 reward= -10.360 evac=0 hp=100.0 s30=0.33 t=152s +ep=0391 [hard ] steps=100 reward= -9.240 evac=0 hp=100.0 s30=0.30 t=152s +ep=0392 [medium] steps=010 reward= +16.320 evac=1 hp=100.0 s30=0.33 t=152s + >> PPO update pi_loss=-0.0158 v_loss=8.8704 entropy=1.8786 kl=0.0033 lr=1.24e-04 +ep=0393 [medium] steps=063 reward= -10.280 evac=0 hp= 0.0 s30=0.33 t=153s +ep=0394 [hard ] steps=100 reward= -10.105 evac=0 hp= 72.5 s30=0.30 t=154s +ep=0395 [medium] steps=002 reward= +14.730 evac=1 hp=100.0 s30=0.33 t=154s +ep=0396 [hard ] steps=019 reward= -12.630 evac=0 hp= 0.0 s30=0.33 t=154s +ep=0397 [hard ] steps=027 reward= -15.040 evac=0 hp= 0.0 s30=0.30 t=154s +ep=0398 [easy ] steps=011 reward= +18.760 evac=1 hp=100.0 s30=0.33 t=154s +ep=0399 [easy ] steps=163 reward= +10.190 evac=1 hp=100.0 s30=0.37 t=154s +ep=0400 [easy ] steps=014 reward= +18.110 evac=1 hp=100.0 s30=0.40 t=155s + >> PPO update pi_loss=+0.0007 v_loss=10.2287 entropy=1.9814 kl=0.0007 lr=1.20e-04 + ** EVAL [hard] reward=-12.256 success=0.00 +ep=0401 [hard ] steps=030 reward= -11.450 evac=0 hp= 0.0 s30=0.40 t=156s +ep=0402 [hard ] steps=100 reward= -8.010 evac=0 hp=100.0 s30=0.37 t=157s +ep=0403 [medium] steps=027 reward= -10.860 evac=0 hp= 0.0 s30=0.33 t=157s +ep=0404 [medium] steps=004 reward= +15.550 evac=1 hp=100.0 s30=0.33 t=157s +ep=0405 [hard ] steps=100 reward= -8.615 evac=0 hp= 93.5 s30=0.33 t=157s +ep=0406 [medium] steps=018 reward= -15.140 evac=0 hp= 0.0 s30=0.30 t=157s +ep=0407 [hard ] steps=038 reward= +13.605 evac=1 hp= 97.0 s30=0.30 t=157s +ep=0408 [easy ] steps=026 reward= +17.300 evac=1 hp=100.0 s30=0.33 t=158s + >> PPO update pi_loss=-0.0036 v_loss=28.6206 entropy=1.8868 kl=0.0006 lr=1.16e-04 +ep=0409 [hard ] steps=100 reward= -9.880 evac=0 hp=100.0 s30=0.33 t=158s +ep=0410 [medium] steps=049 reward= +13.980 evac=1 hp=100.0 s30=0.37 t=159s +ep=0411 [hard ] steps=054 reward= -11.150 evac=0 hp= 0.0 s30=0.37 t=159s +ep=0412 [medium] steps=150 reward= -10.620 evac=0 hp=100.0 s30=0.33 t=160s +ep=0413 [medium] steps=010 reward= +16.230 evac=1 hp=100.0 s30=0.37 t=160s +ep=0414 [hard ] steps=029 reward= +13.570 evac=1 hp=100.0 s30=0.40 t=160s +ep=0415 [easy ] steps=026 reward= +15.887 evac=1 hp= 84.5 s30=0.40 t=160s +ep=0416 [medium] steps=072 reward= -20.080 evac=0 hp= 0.0 s30=0.40 t=160s + >> PPO update pi_loss=-0.0059 v_loss=18.3843 entropy=1.7402 kl=0.0007 lr=1.13e-04 +ep=0417 [easy ] steps=200 reward= -15.510 evac=0 hp=100.0 s30=0.40 t=161s +ep=0418 [easy ] steps=200 reward= -10.150 evac=0 hp=100.0 s30=0.40 t=162s +ep=0419 [medium] steps=019 reward= -20.390 evac=0 hp= 0.0 s30=0.40 t=162s +ep=0420 [easy ] steps=012 reward= +17.270 evac=1 hp=100.0 s30=0.43 t=162s +ep=0421 [hard ] steps=029 reward= -12.360 evac=0 hp= 0.0 s30=0.43 t=163s +ep=0422 [hard ] steps=056 reward= -17.780 evac=0 hp= 0.0 s30=0.40 t=163s +ep=0423 [hard ] steps=020 reward= -13.260 evac=0 hp= 0.0 s30=0.40 t=163s +ep=0424 [hard ] steps=041 reward= -12.590 evac=0 hp= 0.0 s30=0.40 t=163s + >> PPO update pi_loss=-0.0197 v_loss=16.7425 entropy=1.9853 kl=0.0074 lr=1.09e-04 +ep=0425 [hard ] steps=038 reward= +13.130 evac=1 hp=100.0 s30=0.40 t=164s + ** EVAL [hard] reward=-7.024 success=0.20 +ep=0426 [hard ] steps=027 reward= -11.930 evac=0 hp= 0.0 s30=0.40 t=165s +ep=0427 [easy ] steps=001 reward= +16.760 evac=1 hp=100.0 s30=0.43 t=165s +ep=0428 [hard ] steps=100 reward= -9.300 evac=0 hp=100.0 s30=0.40 t=165s +ep=0429 [hard ] steps=016 reward= -11.390 evac=0 hp= 0.0 s30=0.37 t=165s +ep=0430 [medium] steps=019 reward= -13.260 evac=0 hp= 0.0 s30=0.33 t=166s +ep=0431 [hard ] steps=100 reward= -7.910 evac=0 hp=100.0 s30=0.33 t=166s +ep=0432 [easy ] steps=010 reward= +18.790 evac=1 hp=100.0 s30=0.37 t=166s + >> PPO update pi_loss=+0.0241 v_loss=23.2591 entropy=2.0669 kl=0.0023 lr=1.06e-04 +ep=0433 [hard ] steps=020 reward= -13.380 evac=0 hp= 0.0 s30=0.37 t=167s +ep=0434 [easy ] steps=011 reward= +18.740 evac=1 hp=100.0 s30=0.37 t=167s +ep=0435 [medium] steps=023 reward= +13.832 evac=1 hp= 99.5 s30=0.40 t=167s +ep=0436 [hard ] steps=013 reward= +13.010 evac=1 hp=100.0 s30=0.43 t=167s +ep=0437 [hard ] steps=037 reward= -17.250 evac=0 hp= 0.0 s30=0.40 t=167s +ep=0438 [easy ] steps=014 reward= +18.400 evac=1 hp=100.0 s30=0.40 t=167s +ep=0439 [medium] steps=003 reward= +14.670 evac=1 hp=100.0 s30=0.43 t=167s +ep=0440 [medium] steps=034 reward= +14.730 evac=1 hp= 94.0 s30=0.43 t=167s + >> PPO update pi_loss=+0.0003 v_loss=67.2617 entropy=1.6175 kl=0.0005 lr=1.02e-04 +ep=0441 [medium] steps=015 reward= +16.330 evac=1 hp=100.0 s30=0.47 t=167s +ep=0442 [hard ] steps=021 reward= -9.930 evac=0 hp= 0.0 s30=0.47 t=168s +ep=0443 [hard ] steps=100 reward= -7.920 evac=0 hp=100.0 s30=0.43 t=168s +ep=0444 [hard ] steps=027 reward= -10.220 evac=0 hp= 0.0 s30=0.40 t=168s +ep=0445 [easy ] steps=200 reward= -13.360 evac=0 hp=100.0 s30=0.37 t=169s +ep=0446 [medium] steps=026 reward= +14.897 evac=1 hp= 96.5 s30=0.40 t=169s +ep=0447 [hard ] steps=100 reward= -8.950 evac=0 hp=100.0 s30=0.40 t=170s +ep=0448 [hard ] steps=028 reward= -11.460 evac=0 hp= 0.0 s30=0.40 t=170s + >> PPO update pi_loss=-0.0618 v_loss=6.9971 entropy=1.9275 kl=0.0016 lr=9.84e-05 +ep=0449 [hard ] steps=100 reward= -7.500 evac=0 hp=100.0 s30=0.40 t=171s +ep=0450 [hard ] steps=057 reward= +13.620 evac=1 hp=100.0 s30=0.40 t=171s + ** EVAL [hard] reward=-10.726 success=0.00 +ep=0451 [medium] steps=007 reward= +16.480 evac=1 hp=100.0 s30=0.43 t=173s +ep=0452 [medium] steps=014 reward= +15.950 evac=1 hp=100.0 s30=0.47 t=173s +ep=0453 [easy ] steps=016 reward= +18.320 evac=1 hp=100.0 s30=0.50 t=173s +ep=0454 [medium] steps=084 reward= -19.540 evac=0 hp= 0.0 s30=0.50 t=173s +ep=0455 [hard ] steps=100 reward= -8.100 evac=0 hp=100.0 s30=0.47 t=173s +ep=0456 [medium] steps=017 reward= +14.253 evac=1 hp= 99.5 s30=0.50 t=174s + >> PPO update pi_loss=+0.0019 v_loss=18.8275 entropy=1.9587 kl=0.0010 lr=9.48e-05 +ep=0457 [medium] steps=012 reward= +15.530 evac=1 hp=100.0 s30=0.50 t=174s +ep=0458 [medium] steps=017 reward= -13.910 evac=0 hp= 0.0 s30=0.50 t=174s +ep=0459 [medium] steps=009 reward= +16.680 evac=1 hp=100.0 s30=0.53 t=174s +ep=0460 [medium] steps=150 reward= -18.775 evac=0 hp= 43.5 s30=0.53 t=175s +ep=0461 [medium] steps=150 reward= -14.020 evac=0 hp= 97.0 s30=0.53 t=175s +ep=0462 [medium] steps=150 reward= -11.730 evac=0 hp=100.0 s30=0.50 t=176s +ep=0463 [hard ] steps=024 reward= -10.500 evac=0 hp= 0.0 s30=0.50 t=176s +ep=0464 [hard ] steps=032 reward= -11.090 evac=0 hp= 0.0 s30=0.47 t=176s + >> PPO update pi_loss=-0.0086 v_loss=13.1403 entropy=1.5553 kl=0.0028 lr=9.12e-05 +ep=0465 [hard ] steps=024 reward= -11.580 evac=0 hp= 0.0 s30=0.43 t=177s +ep=0466 [medium] steps=018 reward= +16.110 evac=1 hp=100.0 s30=0.43 t=177s +ep=0467 [hard ] steps=100 reward= -9.110 evac=0 hp=100.0 s30=0.43 t=177s +ep=0468 [medium] steps=019 reward= +15.550 evac=1 hp=100.0 s30=0.43 t=178s +ep=0469 [easy ] steps=200 reward= -23.130 evac=0 hp=100.0 s30=0.40 t=178s +ep=0470 [hard ] steps=025 reward= -14.400 evac=0 hp= 0.0 s30=0.37 t=179s +ep=0471 [medium] steps=062 reward= -13.210 evac=0 hp= 0.0 s30=0.33 t=179s +ep=0472 [medium] steps=150 reward= -23.735 evac=0 hp= 96.5 s30=0.33 t=179s + >> PPO update pi_loss=-0.0109 v_loss=10.0870 entropy=1.3562 kl=0.0016 lr=8.76e-05 +ep=0473 [hard ] steps=023 reward= -13.950 evac=0 hp= 0.0 s30=0.33 t=180s +ep=0474 [hard ] steps=100 reward= -10.855 evac=0 hp= 98.5 s30=0.33 t=181s +ep=0475 [hard ] steps=100 reward= -9.080 evac=0 hp=100.0 s30=0.33 t=181s + ** EVAL [hard] reward=-9.072 success=0.00 +ep=0476 [hard ] steps=100 reward= -9.125 evac=0 hp= 74.5 s30=0.30 t=184s +ep=0477 [easy ] steps=011 reward= +18.740 evac=1 hp=100.0 s30=0.33 t=184s +ep=0478 [easy ] steps=016 reward= +17.720 evac=1 hp=100.0 s30=0.37 t=184s +ep=0479 [medium] steps=150 reward= -17.390 evac=0 hp=100.0 s30=0.37 t=185s +ep=0480 [easy ] steps=004 reward= +18.050 evac=1 hp=100.0 s30=0.37 t=185s + >> PPO update pi_loss=-0.0001 v_loss=7.6346 entropy=1.6924 kl=0.0008 lr=8.40e-05 +ep=0481 [easy ] steps=123 reward= +12.600 evac=1 hp=100.0 s30=0.37 t=186s +ep=0482 [hard ] steps=100 reward= -10.260 evac=0 hp=100.0 s30=0.33 t=186s +ep=0483 [easy ] steps=007 reward= +18.650 evac=1 hp=100.0 s30=0.33 t=186s +ep=0484 [medium] steps=016 reward= +16.180 evac=1 hp=100.0 s30=0.37 t=186s +ep=0485 [easy ] steps=200 reward= -21.930 evac=0 hp=100.0 s30=0.37 t=187s +ep=0486 [easy ] steps=017 reward= +17.980 evac=1 hp=100.0 s30=0.37 t=187s +ep=0487 [hard ] steps=100 reward= -7.490 evac=0 hp=100.0 s30=0.33 t=188s +ep=0488 [hard ] steps=043 reward= -12.790 evac=0 hp= 0.0 s30=0.33 t=188s + >> PPO update pi_loss=-0.0032 v_loss=7.0721 entropy=1.6047 kl=0.0014 lr=8.04e-05 +ep=0489 [hard ] steps=100 reward= -8.410 evac=0 hp=100.0 s30=0.30 t=189s +ep=0490 [medium] steps=015 reward= +16.340 evac=1 hp=100.0 s30=0.33 t=189s +ep=0491 [easy ] steps=029 reward= +18.520 evac=1 hp=100.0 s30=0.37 t=189s +ep=0492 [easy ] steps=012 reward= +18.370 evac=1 hp=100.0 s30=0.40 t=189s +ep=0493 [hard ] steps=036 reward= -13.860 evac=0 hp= 0.0 s30=0.40 t=190s +ep=0494 [medium] steps=150 reward= -17.640 evac=0 hp= 78.0 s30=0.40 t=190s +ep=0495 [medium] steps=003 reward= +15.160 evac=1 hp=100.0 s30=0.43 t=190s +ep=0496 [hard ] steps=025 reward= -9.910 evac=0 hp= 0.0 s30=0.40 t=190s + >> PPO update pi_loss=+0.0086 v_loss=13.3058 entropy=1.0254 kl=0.0005 lr=7.68e-05 +ep=0497 [hard ] steps=032 reward= -12.570 evac=0 hp= 0.0 s30=0.40 t=191s +ep=0498 [hard ] steps=100 reward= -9.605 evac=0 hp= 56.5 s30=0.37 t=191s +ep=0499 [easy ] steps=009 reward= +18.100 evac=1 hp=100.0 s30=0.40 t=191s +ep=0500 [hard ] steps=078 reward= +11.160 evac=1 hp=100.0 s30=0.43 t=192s + ** EVAL [hard] reward=-12.050 success=0.00 +ep=0501 [medium] steps=026 reward= +6.730 evac=1 hp= 14.0 s30=0.47 t=194s +ep=0502 [medium] steps=017 reward= +15.100 evac=1 hp=100.0 s30=0.50 t=194s +ep=0503 [hard ] steps=037 reward= -12.610 evac=0 hp= 0.0 s30=0.50 t=194s +ep=0504 [easy ] steps=200 reward= -17.930 evac=0 hp=100.0 s30=0.50 t=195s + >> PPO update pi_loss=+0.0009 v_loss=15.9530 entropy=1.5948 kl=0.0006 lr=7.32e-05 +ep=0505 [easy ] steps=200 reward= -15.700 evac=0 hp=100.0 s30=0.50 t=196s +ep=0506 [hard ] steps=100 reward= -10.160 evac=0 hp=100.0 s30=0.50 t=197s +ep=0507 [medium] steps=008 reward= +16.740 evac=1 hp=100.0 s30=0.50 t=197s +ep=0508 [hard ] steps=100 reward= -8.520 evac=0 hp=100.0 s30=0.47 t=197s +ep=0509 [hard ] steps=028 reward= -11.370 evac=0 hp= 0.0 s30=0.47 t=197s +ep=0510 [hard ] steps=032 reward= -9.960 evac=0 hp= 0.0 s30=0.43 t=198s +ep=0511 [medium] steps=017 reward= +15.520 evac=1 hp=100.0 s30=0.43 t=198s +ep=0512 [medium] steps=013 reward= +16.330 evac=1 hp=100.0 s30=0.47 t=198s + >> PPO update pi_loss=-0.0003 v_loss=10.8486 entropy=2.0690 kl=0.0005 lr=6.96e-05 +ep=0513 [hard ] steps=040 reward= -10.930 evac=0 hp= 0.0 s30=0.43 t=198s +ep=0514 [hard ] steps=100 reward= -9.770 evac=0 hp=100.0 s30=0.40 t=199s +ep=0515 [easy ] steps=003 reward= +17.170 evac=1 hp=100.0 s30=0.43 t=199s +ep=0516 [hard ] steps=026 reward= +13.380 evac=1 hp=100.0 s30=0.43 t=199s +ep=0517 [hard ] steps=035 reward= -11.330 evac=0 hp= 0.0 s30=0.43 t=199s +ep=0518 [medium] steps=020 reward= +13.890 evac=1 hp= 86.0 s30=0.47 t=199s +ep=0519 [easy ] steps=019 reward= +19.480 evac=1 hp=100.0 s30=0.50 t=199s +ep=0520 [hard ] steps=033 reward= -11.450 evac=0 hp= 0.0 s30=0.47 t=200s + >> PPO update pi_loss=-0.0634 v_loss=27.4050 entropy=1.8160 kl=0.0016 lr=6.60e-05 +ep=0521 [medium] steps=009 reward= +16.700 evac=1 hp=100.0 s30=0.47 t=200s +ep=0522 [medium] steps=017 reward= -13.670 evac=0 hp= 0.0 s30=0.43 t=200s +ep=0523 [hard ] steps=014 reward= -13.050 evac=0 hp= 0.0 s30=0.43 t=200s +ep=0524 [medium] steps=150 reward= -6.580 evac=0 hp=100.0 s30=0.43 t=201s +ep=0525 [hard ] steps=100 reward= -10.760 evac=0 hp=100.0 s30=0.40 t=201s + ** EVAL [hard] reward=-5.528 success=0.20 +ep=0526 [medium] steps=080 reward= -25.070 evac=0 hp= 0.0 s30=0.40 t=203s +ep=0527 [medium] steps=150 reward= -13.725 evac=0 hp= 91.5 s30=0.40 t=203s +ep=0528 [medium] steps=068 reward= -10.380 evac=0 hp= 0.0 s30=0.40 t=204s + >> PPO update pi_loss=+0.0074 v_loss=18.0052 entropy=1.7626 kl=0.0021 lr=6.24e-05 +ep=0529 [hard ] steps=100 reward= -8.910 evac=0 hp=100.0 s30=0.37 t=205s +ep=0530 [easy ] steps=200 reward= -11.215 evac=0 hp= 95.5 s30=0.33 t=206s +ep=0531 [hard ] steps=047 reward= +14.700 evac=1 hp=100.0 s30=0.33 t=206s +ep=0532 [hard ] steps=037 reward= -11.340 evac=0 hp= 0.0 s30=0.30 t=206s +ep=0533 [hard ] steps=028 reward= -10.550 evac=0 hp= 0.0 s30=0.30 t=206s +ep=0534 [medium] steps=014 reward= +16.560 evac=1 hp=100.0 s30=0.33 t=206s +ep=0535 [medium] steps=067 reward= -21.630 evac=0 hp= 0.0 s30=0.33 t=207s +ep=0536 [hard ] steps=053 reward= -10.830 evac=0 hp= 0.0 s30=0.33 t=207s + >> PPO update pi_loss=-0.0165 v_loss=15.0212 entropy=1.7101 kl=0.0018 lr=5.88e-05 +ep=0537 [medium] steps=150 reward= -6.565 evac=0 hp= 63.5 s30=0.30 t=208s +ep=0538 [hard ] steps=100 reward= -8.425 evac=0 hp= 95.5 s30=0.30 t=209s +ep=0539 [medium] steps=015 reward= +16.190 evac=1 hp=100.0 s30=0.33 t=209s +ep=0540 [hard ] steps=100 reward= -9.880 evac=0 hp=100.0 s30=0.33 t=209s +ep=0541 [medium] steps=014 reward= +17.190 evac=1 hp=100.0 s30=0.33 t=209s +ep=0542 [hard ] steps=044 reward= -10.020 evac=0 hp= 0.0 s30=0.30 t=209s +ep=0543 [easy ] steps=072 reward= +15.770 evac=1 hp=100.0 s30=0.33 t=210s +ep=0544 [hard ] steps=100 reward= -8.800 evac=0 hp=100.0 s30=0.33 t=210s + >> PPO update pi_loss=+0.0102 v_loss=7.6336 entropy=1.9990 kl=0.0004 lr=5.52e-05 +ep=0545 [hard ] steps=024 reward= +14.900 evac=1 hp=100.0 s30=0.33 t=211s +ep=0546 [hard ] steps=027 reward= -14.450 evac=0 hp= 0.0 s30=0.30 t=211s +ep=0547 [easy ] steps=011 reward= +17.590 evac=1 hp=100.0 s30=0.33 t=211s +ep=0548 [easy ] steps=017 reward= +18.350 evac=1 hp=100.0 s30=0.33 t=211s +ep=0549 [hard ] steps=022 reward= -14.160 evac=0 hp= 0.0 s30=0.30 t=211s +ep=0550 [medium] steps=019 reward= +9.498 evac=1 hp= 64.5 s30=0.33 t=211s + ** EVAL [hard] reward=-11.274 success=0.00 +ep=0551 [easy ] steps=033 reward= +17.740 evac=1 hp=100.0 s30=0.33 t=213s +ep=0552 [medium] steps=003 reward= +14.670 evac=1 hp=100.0 s30=0.37 t=213s + >> PPO update pi_loss=-0.0022 v_loss=59.5561 entropy=1.5268 kl=0.0003 lr=5.16e-05 +ep=0553 [medium] steps=014 reward= +16.810 evac=1 hp=100.0 s30=0.40 t=213s +ep=0554 [hard ] steps=022 reward= +14.210 evac=1 hp=100.0 s30=0.43 t=213s +ep=0555 [hard ] steps=035 reward= -13.820 evac=0 hp= 0.0 s30=0.43 t=213s +ep=0556 [easy ] steps=047 reward= +17.360 evac=1 hp=100.0 s30=0.47 t=214s +ep=0557 [easy ] steps=048 reward= +13.830 evac=1 hp=100.0 s30=0.50 t=214s +ep=0558 [medium] steps=008 reward= +16.440 evac=1 hp=100.0 s30=0.53 t=214s +ep=0559 [hard ] steps=100 reward= -7.800 evac=0 hp=100.0 s30=0.53 t=214s +ep=0560 [hard ] steps=025 reward= -11.750 evac=0 hp= 0.0 s30=0.53 t=215s + >> PPO update pi_loss=-0.0207 v_loss=16.8330 entropy=1.8783 kl=0.0006 lr=4.80e-05 +ep=0561 [medium] steps=008 reward= +16.440 evac=1 hp=100.0 s30=0.53 t=215s +ep=0562 [easy ] steps=112 reward= +10.037 evac=1 hp= 80.5 s30=0.57 t=215s +ep=0563 [easy ] steps=016 reward= +18.610 evac=1 hp=100.0 s30=0.60 t=216s +ep=0564 [medium] steps=004 reward= +15.600 evac=1 hp=100.0 s30=0.60 t=216s +ep=0565 [medium] steps=031 reward= -9.960 evac=0 hp= 0.0 s30=0.60 t=216s +ep=0566 [hard ] steps=014 reward= -11.270 evac=0 hp= 0.0 s30=0.60 t=216s +ep=0567 [hard ] steps=100 reward= -5.240 evac=0 hp=100.0 s30=0.60 t=216s +ep=0568 [easy ] steps=045 reward= +16.020 evac=1 hp=100.0 s30=0.63 t=216s + >> PPO update pi_loss=-0.0140 v_loss=16.1354 entropy=1.8723 kl=0.0002 lr=4.44e-05 +ep=0569 [medium] steps=004 reward= +15.550 evac=1 hp=100.0 s30=0.63 t=217s +ep=0570 [hard ] steps=021 reward= -9.080 evac=0 hp= 0.0 s30=0.63 t=217s +ep=0571 [hard ] steps=100 reward= -10.860 evac=0 hp=100.0 s30=0.60 t=217s +ep=0572 [medium] steps=015 reward= -19.150 evac=0 hp= 0.0 s30=0.60 t=217s +ep=0573 [hard ] steps=025 reward= +14.630 evac=1 hp=100.0 s30=0.60 t=218s +ep=0574 [hard ] steps=034 reward= -11.920 evac=0 hp= 0.0 s30=0.60 t=218s +ep=0575 [medium] steps=079 reward= -19.430 evac=0 hp= 0.0 s30=0.57 t=218s + ** EVAL [hard] reward=-10.578 success=0.00 +ep=0576 [medium] steps=013 reward= +16.620 evac=1 hp=100.0 s30=0.60 t=219s + >> PPO update pi_loss=+0.0070 v_loss=26.4067 entropy=1.4878 kl=0.0001 lr=4.08e-05 +ep=0577 [medium] steps=001 reward= +14.260 evac=1 hp=100.0 s30=0.60 t=220s +ep=0578 [hard ] steps=030 reward= -12.950 evac=0 hp= 0.0 s30=0.57 t=220s +ep=0579 [medium] steps=150 reward= -25.410 evac=0 hp= 57.0 s30=0.57 t=221s +ep=0580 [hard ] steps=100 reward= -8.330 evac=0 hp=100.0 s30=0.53 t=221s +ep=0581 [hard ] steps=021 reward= -11.530 evac=0 hp= 0.0 s30=0.50 t=221s +ep=0582 [medium] steps=011 reward= +15.930 evac=1 hp=100.0 s30=0.50 t=221s +ep=0583 [medium] steps=014 reward= +12.270 evac=1 hp= 76.0 s30=0.50 t=221s +ep=0584 [easy ] steps=200 reward= -11.730 evac=0 hp= 67.0 s30=0.47 t=222s + >> PPO update pi_loss=-0.0108 v_loss=10.7300 entropy=1.5916 kl=0.0001 lr=3.72e-05 +ep=0585 [medium] steps=016 reward= +12.483 evac=1 hp= 67.5 s30=0.50 t=223s +ep=0586 [medium] steps=031 reward= +14.330 evac=1 hp=100.0 s30=0.50 t=223s +ep=0587 [hard ] steps=027 reward= -9.930 evac=0 hp= 0.0 s30=0.47 t=223s +ep=0588 [hard ] steps=100 reward= -8.820 evac=0 hp= 99.0 s30=0.43 t=223s +ep=0589 [hard ] steps=100 reward= -8.980 evac=0 hp=100.0 s30=0.43 t=224s +ep=0590 [hard ] steps=100 reward= -10.670 evac=0 hp=100.0 s30=0.43 t=225s +ep=0591 [easy ] steps=095 reward= +11.080 evac=1 hp=100.0 s30=0.43 t=225s +ep=0592 [hard ] steps=021 reward= -10.660 evac=0 hp= 0.0 s30=0.40 t=225s + >> PPO update pi_loss=-0.0007 v_loss=5.2941 entropy=1.8152 kl=0.0001 lr=3.36e-05 +ep=0593 [easy ] steps=012 reward= +18.350 evac=1 hp=100.0 s30=0.40 t=226s +ep=0594 [medium] steps=021 reward= +10.757 evac=1 hp= 74.5 s30=0.40 t=226s +ep=0595 [medium] steps=012 reward= +15.830 evac=1 hp=100.0 s30=0.43 t=226s +ep=0596 [medium] steps=028 reward= -10.200 evac=0 hp= 0.0 s30=0.43 t=226s +ep=0597 [medium] steps=019 reward= +13.750 evac=1 hp= 88.0 s30=0.47 t=226s +ep=0598 [medium] steps=009 reward= +16.700 evac=1 hp=100.0 s30=0.47 t=226s +ep=0599 [hard ] steps=100 reward= -8.560 evac=0 hp=100.0 s30=0.43 t=226s +ep=0600 [medium] steps=044 reward= +13.770 evac=1 hp=100.0 s30=0.47 t=227s + >> PPO update pi_loss=-0.0007 v_loss=29.9181 entropy=1.5398 kl=0.0001 lr=3.00e-05 + ** EVAL [hard] reward=-12.068 success=0.00