diff --git "a/training.log" "b/training.log" new file mode 100644--- /dev/null +++ "b/training.log" @@ -0,0 +1,9136 @@ +[log] Writing console output to artifacts/pyre_ppo_fixed_training.log +[server] Connecting to http://localhost:8000 ... OK + +[config] server=http://localhost:8000 +[config] device=cpu episodes=200 batch=5 eps +[config] curriculum: easy,medium +[config] PPO clip_eps=0.2 entropy=0.03 lr=0.0003 + +[network] Parameters: 12,065,650 +[network] Input dim: 23,140 (encoder.base_dim=5785 x 4 frames) +[network] Action dim: 41 (4 move + 4 look + 1 wait + 16 open + 16 close) + +[curriculum] static: easy,medium + step=001 action=door(target_id='door_5', door_state='open') hp=100.0 + step=002 action=move(direction='east') hp=100.0 + step=003 action=door(target_id='door_5', door_state='open') hp=100.0 + step=004 action=move(direction='east') hp=100.0 + step=005 action=move(direction='east') hp=100.0 + step=006 action=door(target_id='door_6', door_state='close') hp=100.0 + step=007 action=door(target_id='door_6', door_state='close') hp=100.0 + step=008 action=move(direction='south') hp=100.0 + step=009 action=door(target_id='door_2', door_state='open') hp=100.0 + step=010 action=door(target_id='door_6', door_state='close') hp=100.0 + step=011 action=move(direction='north') hp= 99.5 + step=012 action=wait() hp= 84.5 + step=013 action=move(direction='south') hp= 69.5 + step=014 action=wait() hp= 67.5 + step=015 action=move(direction='south') hp= 65.5 + step=016 action=wait() hp= 63.5 + step=017 action=wait() hp= 61.5 + step=018 action=move(direction='east') hp= 59.5 + step=019 action=move(direction='west') hp= 57.5 + step=020 action=door(target_id='door_6', door_state='close') hp= 55.5 + step=021 action=move(direction='west') hp= 53.5 + step=022 action=move(direction='west') hp= 51.5 + step=023 action=door(target_id='door_5', door_state='open') hp= 49.5 + step=024 action=move(direction='east') hp= 47.5 + step=025 action=move(direction='west') hp= 45.5 + step=026 action=move(direction='north') hp= 43.5 + step=027 action=move(direction='west') hp= 41.5 + step=028 action=door(target_id='door_5', door_state='open') hp= 39.5 + step=029 action=door(target_id='door_5', door_state='open') hp= 37.5 + step=030 action=move(direction='south') hp= 35.5 + step=031 action=move(direction='south') hp= 33.5 + step=032 action=door(target_id='door_5', door_state='open') hp= 33.0 + step=033 action=move(direction='east') hp= 32.5 + step=034 action=door(target_id='door_6', door_state='close') hp= 32.0 + step=035 action=move(direction='west') hp= 31.5 + step=036 action=move(direction='south') hp= 31.0 + step=037 action=move(direction='south') hp= 30.5 + step=038 action=move(direction='north') hp= 30.5 + step=039 action=move(direction='south') hp= 30.0 + step=040 action=move(direction='west') hp= 30.0 + step=041 action=move(direction='east') hp= 30.0 + step=042 action=move(direction='east') hp= 30.0 + step=043 action=move(direction='south') hp= 30.0 + step=044 action=move(direction='north') hp= 30.0 + step=045 action=door(target_id='door_5', door_state='close') hp= 30.0 + step=046 action=wait() hp= 30.0 + step=047 action=move(direction='west') hp= 30.0 + step=048 action=move(direction='west') hp= 30.0 + step=049 action=move(direction='east') hp= 30.0 + step=050 action=move(direction='south') hp= 30.0 + step=051 action=move(direction='east') hp= 30.0 + step=052 action=wait() hp= 30.0 + step=053 action=wait() hp= 30.0 + step=054 action=move(direction='south') hp= 30.0 + step=055 action=door(target_id='door_5', door_state='open') hp= 30.0 + step=056 action=move(direction='south') hp= 30.0 + step=057 action=move(direction='west') hp= 30.0 + step=058 action=wait() hp= 30.0 + step=059 action=door(target_id='door_5', door_state='open') hp= 30.0 + step=060 action=move(direction='east') hp= 30.0 + step=061 action=move(direction='north') hp= 30.0 + step=062 action=move(direction='north') hp= 30.0 + step=063 action=door(target_id='door_5', door_state='open') hp= 30.0 + step=064 action=move(direction='south') hp= 30.0 + step=065 action=move(direction='west') hp= 30.0 + step=066 action=move(direction='west') hp= 30.0 + step=067 action=wait() hp= 30.0 + step=068 action=move(direction='north') hp= 30.0 + step=069 action=door(target_id='door_5', door_state='open') hp= 30.0 + step=070 action=move(direction='south') hp= 30.0 + step=071 action=wait() hp= 30.0 + step=072 action=door(target_id='door_5', door_state='open') hp= 30.0 + step=073 action=move(direction='east') hp= 30.0 + step=074 action=move(direction='north') hp= 30.0 + step=075 action=move(direction='east') hp= 30.0 + step=076 action=move(direction='south') hp= 30.0 + step=077 action=move(direction='north') hp= 30.0 + step=078 action=door(target_id='door_5', door_state='open') hp= 30.0 + step=079 action=move(direction='west') hp= 30.0 + step=080 action=move(direction='west') hp= 30.0 + step=081 action=door(target_id='door_5', door_state='open') hp= 30.0 + step=082 action=move(direction='east') hp= 30.0 + step=083 action=door(target_id='door_5', door_state='open') hp= 30.0 + step=084 action=move(direction='east') hp= 30.0 + step=085 action=wait() hp= 30.0 + step=086 action=door(target_id='door_5', door_state='close') hp= 30.0 + step=087 action=move(direction='south') hp= 30.0 + step=088 action=wait() hp= 30.0 + step=089 action=move(direction='north') hp= 30.0 + step=090 action=move(direction='south') hp= 30.0 + step=091 action=move(direction='north') hp= 30.0 + step=092 action=move(direction='west') hp= 30.0 + step=093 action=door(target_id='door_5', door_state='close') hp= 30.0 + step=094 action=move(direction='north') hp= 30.0 + step=095 action=move(direction='east') hp= 30.0 + step=096 action=door(target_id='door_5', door_state='open') hp= 30.0 + step=097 action=move(direction='south') hp= 30.0 + step=098 action=move(direction='north') hp= 30.0 + step=099 action=door(target_id='door_5', door_state='close') hp= 30.0 + step=100 action=move(direction='west') hp= 30.0 + step=101 action=wait() hp= 30.0 + step=102 action=move(direction='south') hp= 30.0 + step=103 action=wait() hp= 30.0 + step=104 action=move(direction='east') hp= 30.0 + step=105 action=move(direction='north') hp= 30.0 + step=106 action=wait() hp= 30.0 + step=107 action=move(direction='west') hp= 30.0 + step=108 action=move(direction='south') hp= 30.0 + step=109 action=move(direction='west') hp= 30.0 + step=110 action=move(direction='south') hp= 30.0 + step=111 action=move(direction='east') hp= 30.0 + step=112 action=move(direction='west') hp= 30.0 + step=113 action=move(direction='east') hp= 30.0 + step=114 action=door(target_id='door_5', door_state='open') hp= 30.0 + step=115 action=door(target_id='door_5', door_state='open') hp= 30.0 + step=116 action=wait() hp= 30.0 + step=117 action=move(direction='west') hp= 30.0 + step=118 action=door(target_id='door_5', door_state='open') hp= 30.0 + step=119 action=move(direction='north') hp= 30.0 + step=120 action=move(direction='south') hp= 30.0 + step=121 action=wait() hp= 30.0 + step=122 action=move(direction='south') hp= 30.0 + step=123 action=door(target_id='door_5', door_state='open') hp= 30.0 + step=124 action=move(direction='north') hp= 30.0 + step=125 action=door(target_id='door_5', door_state='open') hp= 30.0 + step=126 action=wait() hp= 30.0 + step=127 action=door(target_id='door_5', door_state='open') hp= 30.0 + step=128 action=wait() hp= 30.0 + step=129 action=wait() hp= 30.0 + step=130 action=door(target_id='door_5', door_state='open') hp= 30.0 + step=131 action=door(target_id='door_5', door_state='open') hp= 30.0 + step=132 action=door(target_id='door_5', door_state='open') hp= 30.0 + step=133 action=wait() hp= 30.0 + step=134 action=move(direction='north') hp= 30.0 + step=135 action=door(target_id='door_5', door_state='open') hp= 30.0 + step=136 action=move(direction='north') hp= 30.0 + step=137 action=move(direction='east') hp= 30.0 + step=138 action=move(direction='west') hp= 30.0 + step=139 action=move(direction='east') hp= 30.0 + step=140 action=move(direction='west') hp= 30.0 + step=141 action=door(target_id='door_5', door_state='open') hp= 30.0 + step=142 action=door(target_id='door_5', door_state='close') hp= 30.0 + step=143 action=move(direction='east') hp= 30.0 + step=144 action=door(target_id='door_5', door_state='open') hp= 30.0 + step=145 action=wait() hp= 30.0 + step=146 action=move(direction='south') hp= 30.0 + step=147 action=move(direction='east') hp= 30.0 + step=148 action=door(target_id='door_5', door_state='close') hp= 30.0 + step=149 action=move(direction='west') hp= 30.0 + step=150 action=move(direction='west') hp= 30.0 + step=151 action=wait() hp= 30.0 + step=152 action=door(target_id='door_5', door_state='close') hp= 30.0 + step=153 action=wait() hp= 30.0 + step=154 action=move(direction='east') hp= 30.0 + step=155 action=wait() hp= 30.0 + step=156 action=move(direction='east') hp= 30.0 + step=157 action=move(direction='south') hp= 30.0 + step=158 action=door(target_id='door_5', door_state='close') hp= 30.0 + step=159 action=wait() hp= 30.0 + step=160 action=wait() hp= 30.0 + step=161 action=move(direction='north') hp= 30.0 + step=162 action=door(target_id='door_5', door_state='close') hp= 30.0 + step=163 action=move(direction='south') hp= 30.0 + step=164 action=door(target_id='door_5', door_state='close') hp= 30.0 + step=165 action=move(direction='south') hp= 30.0 + step=166 action=wait() hp= 30.0 + step=167 action=move(direction='north') hp= 30.0 + step=168 action=move(direction='south') hp= 30.0 + step=169 action=move(direction='north') hp= 30.0 + step=170 action=move(direction='north') hp= 30.0 + step=171 action=door(target_id='door_5', door_state='close') hp= 30.0 + step=172 action=door(target_id='door_5', door_state='close') hp= 30.0 + step=173 action=move(direction='north') hp= 30.0 + step=174 action=move(direction='west') hp= 30.0 + step=175 action=move(direction='north') hp= 30.0 + step=176 action=door(target_id='door_5', door_state='close') hp= 30.0 + step=177 action=wait() hp= 30.0 + step=178 action=move(direction='south') hp= 30.0 + step=179 action=move(direction='west') hp= 30.0 + step=180 action=door(target_id='door_5', door_state='open') hp= 30.0 + step=181 action=door(target_id='door_5', door_state='close') hp= 30.0 + step=182 action=wait() hp= 30.0 + step=183 action=wait() hp= 30.0 + step=184 action=wait() hp= 30.0 + step=185 action=door(target_id='door_5', door_state='open') hp= 30.0 + step=186 action=move(direction='east') hp= 30.0 + step=187 action=door(target_id='door_5', door_state='close') hp= 30.0 + step=188 action=wait() hp= 30.0 + step=189 action=move(direction='east') hp= 30.0 + step=190 action=door(target_id='door_5', door_state='open') hp= 30.0 + step=191 action=door(target_id='door_5', door_state='close') hp= 30.0 + step=192 action=door(target_id='door_5', door_state='open') hp= 30.0 + step=193 action=wait() hp= 30.0 + step=194 action=move(direction='south') hp= 30.0 + step=195 action=door(target_id='door_5', door_state='close') hp= 30.0 + step=196 action=move(direction='north') hp= 30.0 + step=197 action=move(direction='south') hp= 30.0 + step=198 action=door(target_id='door_5', door_state='close') hp= 30.0 + step=199 action=door(target_id='door_5', door_state='close') hp= 30.0 + step=200 action=move(direction='north') hp= 30.0 +ep=0001 [easy ] steps=200 reward= -22.960 evac=0 hp= 30.0 suc30=0.00 r30= -22.96 t=0s + step=001 action=move(direction='west') hp=100.0 +ep=0002 [easy ] steps=001 reward= +16.760 evac=1 hp=100.0 suc30=0.50 r30= -3.10 t=0s + step=001 action=wait() hp=100.0 + step=002 action=move(direction='south') hp=100.0 + step=003 action=door(target_id='door_3', door_state='close') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=wait() hp=100.0 + step=006 action=door(target_id='door_2', door_state='open') hp=100.0 + step=007 action=door(target_id='door_2', door_state='open') hp=100.0 + step=008 action=door(target_id='door_2', door_state='open') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=wait() hp=100.0 + step=011 action=move(direction='east') hp=100.0 + step=012 action=move(direction='east') hp=100.0 + step=013 action=move(direction='west') hp=100.0 + step=014 action=door(target_id='door_2', door_state='open') hp=100.0 + step=015 action=move(direction='east') hp=100.0 + step=016 action=move(direction='north') hp=100.0 + step=017 action=move(direction='south') hp=100.0 + step=018 action=wait() hp=100.0 + step=019 action=move(direction='north') hp=100.0 + step=020 action=move(direction='south') hp=100.0 + step=021 action=move(direction='south') hp=100.0 + step=022 action=wait() hp=100.0 + step=023 action=move(direction='south') hp=100.0 + step=024 action=wait() hp=100.0 + step=025 action=wait() hp=100.0 + step=026 action=wait() hp=100.0 + step=027 action=move(direction='north') hp=100.0 + step=028 action=wait() hp=100.0 + step=029 action=wait() hp=100.0 + step=030 action=move(direction='north') hp=100.0 + step=031 action=move(direction='north') hp=100.0 + step=032 action=move(direction='south') hp=100.0 + step=033 action=door(target_id='door_3', door_state='close') hp=100.0 + step=034 action=move(direction='south') hp=100.0 + step=035 action=wait() hp=100.0 + step=036 action=move(direction='south') hp=100.0 + step=037 action=move(direction='north') hp=100.0 + step=038 action=wait() hp=100.0 + step=039 action=move(direction='north') hp=100.0 + step=040 action=move(direction='east') hp=100.0 + step=041 action=wait() hp=100.0 + step=042 action=move(direction='west') hp=100.0 + step=043 action=wait() hp=100.0 + step=044 action=move(direction='south') hp=100.0 + step=045 action=wait() hp=100.0 + step=046 action=move(direction='north') hp=100.0 + step=047 action=door(target_id='door_3', door_state='close') hp=100.0 + step=048 action=move(direction='east') hp=100.0 + step=049 action=door(target_id='door_3', door_state='close') hp=100.0 + step=050 action=door(target_id='door_3', door_state='close') hp=100.0 + step=051 action=move(direction='east') hp=100.0 + step=052 action=move(direction='south') hp=100.0 + step=053 action=wait() hp=100.0 + step=054 action=door(target_id='door_3', door_state='close') hp=100.0 + step=055 action=move(direction='south') hp=100.0 + step=056 action=wait() hp=100.0 + step=057 action=wait() hp=100.0 + step=058 action=wait() hp=100.0 + step=059 action=door(target_id='door_3', door_state='open') hp=100.0 + step=060 action=wait() hp=100.0 + step=061 action=move(direction='north') hp=100.0 + step=062 action=move(direction='east') hp=100.0 + step=063 action=door(target_id='door_3', door_state='close') hp=100.0 + step=064 action=wait() hp=100.0 + step=065 action=move(direction='west') hp=100.0 + step=066 action=wait() hp=100.0 + step=067 action=wait() hp=100.0 + step=068 action=door(target_id='door_3', door_state='close') hp=100.0 + step=069 action=wait() hp=100.0 + step=070 action=move(direction='south') hp=100.0 + step=071 action=wait() hp=100.0 + step=072 action=move(direction='north') hp=100.0 + step=073 action=move(direction='north') hp=100.0 + step=074 action=move(direction='west') hp=100.0 + step=075 action=wait() hp=100.0 + step=076 action=move(direction='west') hp=100.0 + step=077 action=door(target_id='door_3', door_state='open') hp=100.0 + step=078 action=door(target_id='door_3', door_state='open') hp=100.0 + step=079 action=wait() hp=100.0 + step=080 action=door(target_id='door_3', door_state='open') hp=100.0 + step=081 action=wait() hp=100.0 + step=082 action=door(target_id='door_3', door_state='open') hp=100.0 + step=083 action=door(target_id='door_3', door_state='open') hp=100.0 + step=084 action=door(target_id='door_2', door_state='open') hp=100.0 + step=085 action=move(direction='west') hp=100.0 + step=086 action=move(direction='west') hp=100.0 + step=087 action=move(direction='south') hp=100.0 + step=088 action=move(direction='west') hp=100.0 + step=089 action=wait() hp=100.0 + step=090 action=move(direction='north') hp=100.0 + step=091 action=move(direction='south') hp=100.0 + step=092 action=move(direction='north') hp=100.0 + step=093 action=door(target_id='door_2', door_state='open') hp=100.0 + step=094 action=move(direction='south') hp=100.0 + step=095 action=wait() hp=100.0 + step=096 action=move(direction='north') hp=100.0 + step=097 action=wait() hp=100.0 + step=098 action=move(direction='south') hp=100.0 + step=099 action=move(direction='east') hp=100.0 + step=100 action=move(direction='north') hp=100.0 + step=101 action=move(direction='west') hp=100.0 + step=102 action=move(direction='west') hp=100.0 + step=103 action=move(direction='east') hp=100.0 + step=104 action=move(direction='south') hp=100.0 + step=105 action=move(direction='east') hp=100.0 + step=106 action=door(target_id='door_2', door_state='open') hp=100.0 + step=107 action=door(target_id='door_2', door_state='close') hp=100.0 + step=108 action=move(direction='north') hp=100.0 + step=109 action=move(direction='south') hp=100.0 + step=110 action=move(direction='south') hp=100.0 + step=111 action=move(direction='west') hp=100.0 + step=112 action=move(direction='east') hp=100.0 + step=113 action=wait() hp=100.0 + step=114 action=move(direction='north') hp=100.0 + step=115 action=wait() hp=100.0 + step=116 action=move(direction='west') hp=100.0 + step=117 action=move(direction='north') hp=100.0 + step=118 action=door(target_id='door_2', door_state='open') hp=100.0 + step=119 action=door(target_id='door_2', door_state='open') hp=100.0 + step=120 action=move(direction='east') hp=100.0 + step=121 action=move(direction='south') hp=100.0 + step=122 action=move(direction='south') hp=100.0 + step=123 action=door(target_id='door_2', door_state='open') hp=100.0 + step=124 action=move(direction='west') hp=100.0 + step=125 action=door(target_id='door_2', door_state='close') hp=100.0 + step=126 action=wait() hp=100.0 + step=127 action=move(direction='north') hp=100.0 + step=128 action=move(direction='north') hp=100.0 + step=129 action=wait() hp=100.0 + step=130 action=door(target_id='door_1', door_state='close') hp=100.0 + step=131 action=move(direction='west') hp=100.0 + step=132 action=move(direction='east') hp=100.0 + step=133 action=move(direction='south') hp=100.0 + step=134 action=move(direction='north') hp=100.0 + step=135 action=door(target_id='door_1', door_state='close') hp=100.0 + step=136 action=move(direction='south') hp=100.0 + step=137 action=move(direction='north') hp=100.0 + step=138 action=door(target_id='door_2', door_state='open') hp=100.0 + step=139 action=move(direction='south') hp=100.0 + step=140 action=move(direction='east') hp=100.0 + step=141 action=move(direction='south') hp=100.0 + step=142 action=move(direction='west') hp=100.0 + step=143 action=door(target_id='door_2', door_state='open') hp=100.0 + step=144 action=door(target_id='door_2', door_state='close') hp=100.0 + step=145 action=wait() hp=100.0 + step=146 action=wait() hp=100.0 + step=147 action=move(direction='east') hp=100.0 + step=148 action=door(target_id='door_2', door_state='open') hp=100.0 + step=149 action=wait() hp=100.0 + step=150 action=move(direction='west') hp=100.0 + step=151 action=move(direction='north') hp=100.0 + step=152 action=move(direction='north') hp=100.0 + step=153 action=move(direction='west') hp=100.0 + step=154 action=door(target_id='door_1', door_state='close') hp=100.0 + step=155 action=wait() hp=100.0 + step=156 action=move(direction='east') hp=100.0 + step=157 action=move(direction='east') hp=100.0 + step=158 action=move(direction='south') hp=100.0 + step=159 action=wait() hp=100.0 + step=160 action=move(direction='south') hp=100.0 + step=161 action=door(target_id='door_2', door_state='close') hp=100.0 + step=162 action=door(target_id='door_2', door_state='open') hp=100.0 + step=163 action=move(direction='north') hp=100.0 + step=164 action=move(direction='west') hp=100.0 + step=165 action=move(direction='north') hp=100.0 + step=166 action=move(direction='south') hp=100.0 + step=167 action=door(target_id='door_2', door_state='close') hp=100.0 + step=168 action=wait() hp=100.0 + step=169 action=move(direction='south') hp=100.0 + step=170 action=wait() hp=100.0 + step=171 action=wait() hp=100.0 + step=172 action=move(direction='north') hp=100.0 + step=173 action=move(direction='south') hp=100.0 + step=174 action=move(direction='east') hp=100.0 + step=175 action=wait() hp=100.0 + step=176 action=move(direction='north') hp=100.0 + step=177 action=move(direction='north') hp=100.0 + step=178 action=move(direction='east') hp=100.0 + step=179 action=wait() hp=100.0 + step=180 action=move(direction='east') hp=100.0 + step=181 action=door(target_id='door_3', door_state='open') hp=100.0 + step=182 action=move(direction='east') hp=100.0 + step=183 action=move(direction='east') hp=100.0 + step=184 action=move(direction='west') hp=100.0 + step=185 action=door(target_id='door_3', door_state='open') hp=100.0 + step=186 action=door(target_id='door_3', door_state='open') hp=100.0 + step=187 action=wait() hp=100.0 + step=188 action=wait() hp=100.0 + step=189 action=move(direction='east') hp=100.0 + step=190 action=move(direction='south') hp=100.0 + step=191 action=wait() hp=100.0 + step=192 action=move(direction='north') hp=100.0 + step=193 action=move(direction='west') hp=100.0 + step=194 action=move(direction='west') hp=100.0 + step=195 action=move(direction='north') hp=100.0 + step=196 action=move(direction='north') hp=100.0 + step=197 action=wait() hp=100.0 + step=198 action=move(direction='north') hp=100.0 + step=199 action=move(direction='south') hp=100.0 + step=200 action=move(direction='north') hp=100.0 +ep=0003 [easy ] steps=200 reward= -16.890 evac=0 hp=100.0 suc30=0.33 r30= -7.70 t=1s + step=001 action=move(direction='south') hp=100.0 + step=002 action=move(direction='north') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=door(target_id='door_2', door_state='close') hp=100.0 + step=005 action=door(target_id='door_2', door_state='close') hp=100.0 + step=006 action=wait() hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=door(target_id='door_5', door_state='close') hp=100.0 + step=009 action=move(direction='east') hp=100.0 + step=010 action=door(target_id='door_6', door_state='close') hp=100.0 + step=011 action=move(direction='north') hp=100.0 + step=012 action=wait() hp=100.0 + step=013 action=move(direction='east') hp=100.0 + step=014 action=move(direction='north') hp=100.0 + step=015 action=wait() hp=100.0 + step=016 action=door(target_id='door_2', door_state='close') hp=100.0 + step=017 action=move(direction='east') hp=100.0 + step=018 action=move(direction='north') hp=100.0 + step=019 action=door(target_id='door_3', door_state='close') hp=100.0 + step=020 action=move(direction='south') hp=100.0 + step=021 action=move(direction='south') hp=100.0 + step=022 action=move(direction='east') hp=100.0 + step=023 action=move(direction='south') hp=100.0 + step=024 action=move(direction='north') hp=100.0 + step=025 action=wait() hp=100.0 + step=026 action=move(direction='north') hp=100.0 + step=027 action=wait() hp=100.0 + step=028 action=move(direction='east') hp=100.0 + step=029 action=door(target_id='door_7', door_state='close') hp=100.0 + step=030 action=door(target_id='door_7', door_state='close') hp=100.0 + step=031 action=door(target_id='door_7', door_state='close') hp=100.0 + step=032 action=move(direction='west') hp=100.0 + step=033 action=door(target_id='door_3', door_state='close') hp=100.0 + step=034 action=door(target_id='door_2', door_state='close') hp=100.0 + step=035 action=wait() hp=100.0 + step=036 action=door(target_id='door_3', door_state='close') hp=100.0 + step=037 action=move(direction='south') hp=100.0 + step=038 action=wait() hp=100.0 + step=039 action=move(direction='west') hp=100.0 + step=040 action=move(direction='south') hp=100.0 + step=041 action=move(direction='north') hp=100.0 + step=042 action=door(target_id='door_3', door_state='close') hp=100.0 + step=043 action=door(target_id='door_6', door_state='open') hp=100.0 + step=044 action=door(target_id='door_6', door_state='open') hp=100.0 + step=045 action=door(target_id='door_7', door_state='close') hp=100.0 + step=046 action=move(direction='north') hp=100.0 + step=047 action=move(direction='west') hp=100.0 + step=048 action=move(direction='east') hp=100.0 + step=049 action=door(target_id='door_3', door_state='close') hp=100.0 + step=050 action=move(direction='north') hp=100.0 + step=051 action=move(direction='south') hp=100.0 + step=052 action=wait() hp=100.0 + step=053 action=door(target_id='door_2', door_state='close') hp=100.0 + step=054 action=move(direction='west') hp=100.0 + step=055 action=door(target_id='door_3', door_state='close') hp=100.0 + step=056 action=wait() hp=100.0 + step=057 action=move(direction='north') hp=100.0 + step=058 action=door(target_id='door_3', door_state='close') hp=100.0 + step=059 action=wait() hp=100.0 + step=060 action=move(direction='south') hp=100.0 + step=061 action=door(target_id='door_3', door_state='close') hp=100.0 + step=062 action=door(target_id='door_2', door_state='close') hp=100.0 + step=063 action=wait() hp=100.0 + step=064 action=move(direction='north') hp=100.0 + step=065 action=move(direction='south') hp=100.0 + step=066 action=move(direction='south') hp=100.0 + step=067 action=move(direction='east') hp=100.0 + step=068 action=door(target_id='door_7', door_state='close') hp=100.0 + step=069 action=move(direction='west') hp=100.0 + step=070 action=move(direction='west') hp=100.0 + step=071 action=move(direction='west') hp=100.0 + step=072 action=door(target_id='door_2', door_state='close') hp=100.0 + step=073 action=move(direction='north') hp=100.0 + step=074 action=door(target_id='door_2', door_state='close') hp=100.0 + step=075 action=move(direction='north') hp=100.0 + step=076 action=move(direction='south') hp=100.0 + step=077 action=door(target_id='door_6', door_state='open') hp=100.0 + step=078 action=move(direction='south') hp=100.0 + step=079 action=move(direction='north') hp=100.0 + step=080 action=door(target_id='door_2', door_state='close') hp=100.0 + step=081 action=wait() hp=100.0 + step=082 action=door(target_id='door_1', door_state='close') hp=100.0 + step=083 action=door(target_id='door_2', door_state='close') hp=100.0 + step=084 action=move(direction='east') hp=100.0 + step=085 action=move(direction='north') hp=100.0 + step=086 action=wait() hp=100.0 + step=087 action=wait() hp=100.0 + step=088 action=move(direction='south') hp=100.0 + step=089 action=move(direction='east') hp=100.0 + step=090 action=move(direction='west') hp=100.0 + step=091 action=door(target_id='door_6', door_state='open') hp=100.0 + step=092 action=move(direction='south') hp=100.0 + step=093 action=move(direction='west') hp=100.0 + step=094 action=move(direction='east') hp=100.0 + step=095 action=door(target_id='door_2', door_state='close') hp=100.0 + step=096 action=door(target_id='door_2', door_state='close') hp=100.0 + step=097 action=move(direction='east') hp=100.0 + step=098 action=move(direction='north') hp=100.0 + step=099 action=door(target_id='door_6', door_state='open') hp=100.0 + step=100 action=move(direction='east') hp=100.0 + step=101 action=move(direction='west') hp=100.0 + step=102 action=move(direction='south') hp=100.0 + step=103 action=move(direction='west') hp=100.0 + step=104 action=move(direction='north') hp=100.0 + step=105 action=move(direction='west') hp=100.0 + step=106 action=move(direction='west') hp=100.0 + step=107 action=move(direction='west') hp=100.0 + step=108 action=move(direction='south') hp=100.0 + step=109 action=wait() hp=100.0 + step=110 action=move(direction='east') hp=100.0 + step=111 action=door(target_id='door_6', door_state='open') hp=100.0 + step=112 action=move(direction='north') hp=100.0 + step=113 action=move(direction='west') hp=100.0 + step=114 action=move(direction='north') hp=100.0 + step=115 action=move(direction='south') hp=100.0 + step=116 action=door(target_id='door_1', door_state='close') hp=100.0 + step=117 action=move(direction='south') hp=100.0 + step=118 action=move(direction='south') hp=100.0 + step=119 action=move(direction='north') hp=100.0 + step=120 action=move(direction='east') hp=100.0 + step=121 action=door(target_id='door_2', door_state='close') hp=100.0 + step=122 action=door(target_id='door_6', door_state='open') hp=100.0 + step=123 action=door(target_id='door_5', door_state='close') hp=100.0 + step=124 action=door(target_id='door_6', door_state='open') hp=100.0 + step=125 action=door(target_id='door_1', door_state='close') hp=100.0 + step=126 action=door(target_id='door_2', door_state='close') hp=100.0 + step=127 action=move(direction='south') hp=100.0 + step=128 action=move(direction='east') hp=100.0 + step=129 action=move(direction='south') hp=100.0 + step=130 action=wait() hp=100.0 + step=131 action=wait() hp=100.0 + step=132 action=door(target_id='door_6', door_state='open') hp=100.0 + step=133 action=move(direction='north') hp=100.0 + step=134 action=move(direction='south') hp=100.0 + step=135 action=door(target_id='door_5', door_state='close') hp=100.0 + step=136 action=door(target_id='door_5', door_state='close') hp=100.0 + step=137 action=move(direction='west') hp=100.0 + step=138 action=move(direction='west') hp=100.0 + step=139 action=move(direction='west') hp=100.0 + step=140 action=move(direction='east') hp=100.0 + step=141 action=door(target_id='door_5', door_state='close') hp=100.0 + step=142 action=move(direction='west') hp=100.0 + step=143 action=move(direction='east') hp=100.0 + step=144 action=move(direction='west') hp=100.0 + step=145 action=door(target_id='door_5', door_state='open') hp=100.0 + step=146 action=move(direction='east') hp=100.0 + step=147 action=wait() hp=100.0 + step=148 action=move(direction='north') hp=100.0 + step=149 action=move(direction='north') hp=100.0 + step=150 action=move(direction='west') hp=100.0 + step=151 action=move(direction='north') hp=100.0 + step=152 action=move(direction='east') hp=100.0 + step=153 action=move(direction='west') hp=100.0 + step=154 action=move(direction='east') hp=100.0 + step=155 action=move(direction='south') hp=100.0 + step=156 action=door(target_id='door_5', door_state='close') hp=100.0 + step=157 action=door(target_id='door_5', door_state='close') hp=100.0 + step=158 action=door(target_id='door_5', door_state='close') hp=100.0 + step=159 action=wait() hp=100.0 + step=160 action=door(target_id='door_1', door_state='close') hp=100.0 + step=161 action=door(target_id='door_1', door_state='close') hp=100.0 + step=162 action=move(direction='south') hp=100.0 + step=163 action=door(target_id='door_6', door_state='close') hp=100.0 + step=164 action=move(direction='north') hp=100.0 + step=165 action=move(direction='west') hp=100.0 + step=166 action=move(direction='east') hp=100.0 + step=167 action=move(direction='north') hp=100.0 + step=168 action=door(target_id='door_1', door_state='close') hp=100.0 + step=169 action=move(direction='west') hp=100.0 + step=170 action=move(direction='west') hp=100.0 + step=171 action=door(target_id='door_1', door_state='close') hp=100.0 + step=172 action=door(target_id='door_5', door_state='close') hp=100.0 + step=173 action=move(direction='north') hp=100.0 + step=174 action=move(direction='east') hp=100.0 + step=175 action=door(target_id='door_2', door_state='close') hp=100.0 + step=176 action=door(target_id='door_1', door_state='close') hp=100.0 + step=177 action=move(direction='south') hp=100.0 + step=178 action=move(direction='north') hp=100.0 + step=179 action=move(direction='west') hp=100.0 + step=180 action=wait() hp=100.0 + step=181 action=move(direction='east') hp=100.0 + step=182 action=move(direction='east') hp=100.0 + step=183 action=door(target_id='door_1', door_state='open') hp=100.0 + step=184 action=move(direction='west') hp=100.0 + step=185 action=move(direction='north') hp=100.0 + step=186 action=wait() hp=100.0 + step=187 action=move(direction='south') hp=100.0 + step=188 action=move(direction='north') hp=100.0 + step=189 action=move(direction='south') hp=100.0 + step=190 action=door(target_id='door_2', door_state='close') hp=100.0 + step=191 action=door(target_id='door_2', door_state='close') hp=100.0 + step=192 action=door(target_id='door_1', door_state='close') hp=100.0 + step=193 action=door(target_id='door_1', door_state='open') hp=100.0 + step=194 action=move(direction='east') hp=100.0 + step=195 action=door(target_id='door_1', door_state='close') hp=100.0 + step=196 action=door(target_id='door_1', door_state='open') hp=100.0 + step=197 action=move(direction='east') hp=100.0 + step=198 action=wait() hp=100.0 + step=199 action=door(target_id='door_2', door_state='close') hp=100.0 + step=200 action=door(target_id='door_1', door_state='close') hp=100.0 +ep=0004 [easy ] steps=200 reward= -8.240 evac=0 hp=100.0 suc30=0.25 r30= -7.83 t=2s + step=001 action=wait() hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='east') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='east') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='east') hp=100.0 + step=011 action=wait() hp=100.0 + step=012 action=wait() hp=100.0 + step=013 action=move(direction='west') hp=100.0 + step=014 action=move(direction='east') hp=100.0 + step=015 action=move(direction='east') hp=100.0 + step=016 action=wait() hp=100.0 + step=017 action=wait() hp=100.0 + step=018 action=move(direction='south') hp=100.0 + step=019 action=move(direction='west') hp=100.0 + step=020 action=move(direction='south') hp=100.0 + step=021 action=move(direction='south') hp=100.0 + step=022 action=move(direction='west') hp=100.0 + step=023 action=wait() hp=100.0 + step=024 action=move(direction='north') hp=100.0 + step=025 action=wait() hp=100.0 + step=026 action=move(direction='north') hp=100.0 + step=027 action=move(direction='north') hp=100.0 + step=028 action=move(direction='east') hp=100.0 + step=029 action=move(direction='south') hp=100.0 + step=030 action=move(direction='north') hp=100.0 + step=031 action=move(direction='east') hp=100.0 + step=032 action=wait() hp=100.0 + step=033 action=move(direction='south') hp=100.0 + step=034 action=move(direction='north') hp=100.0 + step=035 action=wait() hp=100.0 + step=036 action=move(direction='west') hp=100.0 + step=037 action=wait() hp=100.0 + step=038 action=move(direction='east') hp=100.0 + step=039 action=wait() hp=100.0 + step=040 action=move(direction='west') hp=100.0 + step=041 action=move(direction='east') hp=100.0 + step=042 action=move(direction='west') hp=100.0 + step=043 action=wait() hp=100.0 + step=044 action=move(direction='west') hp=100.0 + step=045 action=move(direction='west') hp=100.0 +ep=0005 [easy ] steps=045 reward= +17.340 evac=1 hp=100.0 suc30=0.40 r30= -2.80 t=2s + >> PPO update samples=flushed pi_loss=-0.0122 v_loss=7.4767 entropy=1.6864 kl=0.0014 clip%=0.00 lr=2.93e-04 + step=001 action=door(target_id='door_2', door_state='open') hp=100.0 + step=002 action=door(target_id='door_2', door_state='open') hp=100.0 + step=003 action=door(target_id='door_2', door_state='open') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='south') hp=100.0 + step=006 action=wait() hp=100.0 + step=007 action=door(target_id='door_1', door_state='close') hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=move(direction='south') hp=100.0 + step=010 action=move(direction='south') hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=move(direction='east') hp=100.0 + step=013 action=move(direction='north') hp=100.0 + step=014 action=move(direction='south') hp=100.0 + step=015 action=wait() hp=100.0 + step=016 action=move(direction='west') hp=100.0 + step=017 action=move(direction='north') hp=100.0 + step=018 action=wait() hp=100.0 + step=019 action=door(target_id='door_1', door_state='open') hp=100.0 + step=020 action=wait() hp=100.0 + step=021 action=wait() hp=100.0 + step=022 action=move(direction='north') hp=100.0 + step=023 action=move(direction='west') hp=100.0 +ep=0006 [easy ] steps=023 reward= +17.100 evac=1 hp=100.0 suc30=0.50 r30= +0.52 t=3s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=wait() hp=100.0 + step=005 action=wait() hp=100.0 + step=006 action=move(direction='east') hp=100.0 + step=007 action=move(direction='east') hp=100.0 + step=008 action=door(target_id='door_2', door_state='close') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='east') hp=100.0 + step=011 action=door(target_id='door_2', door_state='close') hp=100.0 + step=012 action=door(target_id='door_2', door_state='close') hp=100.0 + step=013 action=move(direction='east') hp=100.0 + step=014 action=move(direction='west') hp=100.0 + step=015 action=wait() hp=100.0 + step=016 action=move(direction='east') hp=100.0 + step=017 action=move(direction='south') hp=100.0 + step=018 action=door(target_id='door_2', door_state='close') hp=100.0 + step=019 action=door(target_id='door_2', door_state='close') hp=100.0 + step=020 action=door(target_id='door_1', door_state='open') hp=100.0 + step=021 action=move(direction='west') hp=100.0 + step=022 action=move(direction='east') hp=100.0 + step=023 action=door(target_id='door_2', door_state='close') hp=100.0 + step=024 action=door(target_id='door_1', door_state='open') hp=100.0 + step=025 action=door(target_id='door_1', door_state='open') hp=100.0 + step=026 action=door(target_id='door_2', door_state='close') hp=100.0 + step=027 action=move(direction='north') hp=100.0 + step=028 action=move(direction='west') hp=100.0 + step=029 action=move(direction='south') hp=100.0 + step=030 action=door(target_id='door_5', door_state='close') hp=100.0 + step=031 action=door(target_id='door_2', door_state='close') hp=100.0 + step=032 action=move(direction='south') hp=100.0 + step=033 action=move(direction='east') hp=100.0 + step=034 action=move(direction='south') hp=100.0 + step=035 action=wait() hp=100.0 + step=036 action=move(direction='south') hp=100.0 + step=037 action=move(direction='north') hp=100.0 + step=038 action=wait() hp=100.0 + step=039 action=door(target_id='door_6', door_state='close') hp=100.0 + step=040 action=move(direction='west') hp=100.0 + step=041 action=move(direction='west') hp=100.0 + step=042 action=move(direction='east') hp=100.0 + step=043 action=move(direction='west') hp=100.0 + step=044 action=move(direction='east') hp=100.0 + step=045 action=move(direction='east') hp=100.0 + step=046 action=move(direction='west') hp=100.0 + step=047 action=wait() hp=100.0 + step=048 action=door(target_id='door_1', door_state='open') hp=100.0 + step=049 action=wait() hp=100.0 + step=050 action=move(direction='south') hp=100.0 + step=051 action=move(direction='north') hp=100.0 + step=052 action=door(target_id='door_1', door_state='open') hp=100.0 + step=053 action=door(target_id='door_5', door_state='close') hp=100.0 + step=054 action=wait() hp=100.0 + step=055 action=door(target_id='door_1', door_state='open') hp=100.0 + step=056 action=door(target_id='door_6', door_state='close') hp=100.0 + step=057 action=door(target_id='door_6', door_state='close') hp=100.0 + step=058 action=wait() hp=100.0 + step=059 action=move(direction='east') hp=100.0 + step=060 action=move(direction='south') hp=100.0 + step=061 action=door(target_id='door_5', door_state='close') hp=100.0 + step=062 action=move(direction='east') hp=100.0 + step=063 action=move(direction='west') hp=100.0 + step=064 action=move(direction='east') hp=100.0 + step=065 action=move(direction='north') hp=100.0 + step=066 action=door(target_id='door_5', door_state='close') hp=100.0 + step=067 action=move(direction='north') hp=100.0 + step=068 action=move(direction='west') hp=100.0 + step=069 action=door(target_id='door_1', door_state='open') hp=100.0 + step=070 action=move(direction='west') hp=100.0 + step=071 action=move(direction='west') hp=100.0 + step=072 action=move(direction='west') hp=100.0 + step=073 action=move(direction='south') hp=100.0 + step=074 action=door(target_id='door_5', door_state='close') hp=100.0 + step=075 action=move(direction='east') hp=100.0 + step=076 action=door(target_id='door_1', door_state='open') hp=100.0 + step=077 action=move(direction='north') hp=100.0 + step=078 action=door(target_id='door_5', door_state='close') hp=100.0 + step=079 action=move(direction='south') hp=100.0 + step=080 action=door(target_id='door_1', door_state='open') hp=100.0 + step=081 action=door(target_id='door_1', door_state='open') hp=100.0 + step=082 action=wait() hp=100.0 + step=083 action=door(target_id='door_5', door_state='close') hp=100.0 + step=084 action=move(direction='west') hp=100.0 + step=085 action=move(direction='south') hp=100.0 + step=086 action=door(target_id='door_5', door_state='open') hp=100.0 + step=087 action=move(direction='east') hp=100.0 + step=088 action=move(direction='west') hp=100.0 + step=089 action=move(direction='east') hp=100.0 + step=090 action=move(direction='north') hp=100.0 + step=091 action=door(target_id='door_1', door_state='open') hp=100.0 + step=092 action=wait() hp=100.0 + step=093 action=wait() hp=100.0 + step=094 action=wait() hp=100.0 + step=095 action=door(target_id='door_5', door_state='close') hp=100.0 + step=096 action=move(direction='west') hp=100.0 + step=097 action=move(direction='south') hp=100.0 + step=098 action=wait() hp=100.0 + step=099 action=move(direction='east') hp=100.0 + step=100 action=move(direction='east') hp=100.0 + step=101 action=door(target_id='door_5', door_state='open') hp=100.0 + step=102 action=move(direction='west') hp=100.0 + step=103 action=door(target_id='door_5', door_state='close') hp=100.0 + step=104 action=move(direction='west') hp=100.0 + step=105 action=wait() hp=100.0 + step=106 action=door(target_id='door_5', door_state='open') hp=100.0 + step=107 action=move(direction='north') hp=100.0 + step=108 action=move(direction='north') hp=100.0 + step=109 action=wait() hp=100.0 + step=110 action=wait() hp=100.0 + step=111 action=wait() hp=100.0 + step=112 action=door(target_id='door_5', door_state='close') hp=100.0 + step=113 action=door(target_id='door_5', door_state='close') hp=100.0 + step=114 action=door(target_id='door_5', door_state='close') hp=100.0 + step=115 action=move(direction='south') hp=100.0 + step=116 action=door(target_id='door_5', door_state='close') hp=100.0 + step=117 action=wait() hp=100.0 + step=118 action=move(direction='south') hp=100.0 + step=119 action=wait() hp=100.0 + step=120 action=move(direction='east') hp=100.0 + step=121 action=move(direction='east') hp=100.0 + step=122 action=door(target_id='door_6', door_state='close') hp=100.0 + step=123 action=move(direction='north') hp=100.0 + step=124 action=door(target_id='door_1', door_state='open') hp=100.0 + step=125 action=door(target_id='door_1', door_state='open') hp=100.0 + step=126 action=door(target_id='door_1', door_state='open') hp=100.0 + step=127 action=move(direction='south') hp=100.0 + step=128 action=wait() hp=100.0 + step=129 action=move(direction='west') hp=100.0 + step=130 action=door(target_id='door_1', door_state='open') hp=100.0 + step=131 action=door(target_id='door_5', door_state='close') hp=100.0 + step=132 action=door(target_id='door_6', door_state='close') hp=100.0 + step=133 action=wait() hp=100.0 + step=134 action=door(target_id='door_6', door_state='close') hp=100.0 + step=135 action=door(target_id='door_1', door_state='open') hp=100.0 + step=136 action=move(direction='east') hp=100.0 + step=137 action=move(direction='east') hp=100.0 + step=138 action=door(target_id='door_5', door_state='open') hp=100.0 + step=139 action=wait() hp=100.0 + step=140 action=door(target_id='door_5', door_state='open') hp=100.0 + step=141 action=door(target_id='door_6', door_state='close') hp=100.0 + step=142 action=door(target_id='door_6', door_state='close') hp=100.0 + step=143 action=wait() hp=100.0 + step=144 action=move(direction='east') hp=100.0 + step=145 action=door(target_id='door_6', door_state='close') hp=100.0 + step=146 action=move(direction='east') hp=100.0 + step=147 action=move(direction='north') hp=100.0 + step=148 action=move(direction='west') hp=100.0 + step=149 action=move(direction='south') hp=100.0 + step=150 action=door(target_id='door_5', door_state='open') hp=100.0 + step=151 action=door(target_id='door_5', door_state='open') hp=100.0 + step=152 action=move(direction='north') hp=100.0 + step=153 action=wait() hp=100.0 + step=154 action=wait() hp=100.0 + step=155 action=door(target_id='door_2', door_state='close') hp=100.0 + step=156 action=door(target_id='door_5', door_state='open') hp=100.0 + step=157 action=move(direction='west') hp=100.0 + step=158 action=move(direction='east') hp=100.0 + step=159 action=move(direction='north') hp=100.0 + step=160 action=door(target_id='door_6', door_state='open') hp=100.0 + step=161 action=wait() hp=100.0 + step=162 action=move(direction='north') hp=100.0 + step=163 action=move(direction='south') hp=100.0 + step=164 action=door(target_id='door_2', door_state='close') hp=100.0 + step=165 action=move(direction='east') hp=100.0 + step=166 action=move(direction='west') hp=100.0 + step=167 action=move(direction='north') hp=100.0 + step=168 action=door(target_id='door_2', door_state='close') hp=100.0 + step=169 action=wait() hp=100.0 + step=170 action=move(direction='west') hp=100.0 + step=171 action=door(target_id='door_1', door_state='open') hp=100.0 + step=172 action=door(target_id='door_2', door_state='close') hp=100.0 + step=173 action=move(direction='north') hp=100.0 + step=174 action=move(direction='west') hp=100.0 + step=175 action=door(target_id='door_1', door_state='open') hp=100.0 + step=176 action=move(direction='south') hp=100.0 + step=177 action=move(direction='north') hp=100.0 + step=178 action=wait() hp=100.0 + step=179 action=move(direction='south') hp=100.0 + step=180 action=move(direction='north') hp=100.0 + step=181 action=wait() hp=100.0 + step=182 action=move(direction='south') hp=100.0 + step=183 action=move(direction='east') hp=100.0 + step=184 action=move(direction='south') hp=100.0 + step=185 action=move(direction='west') hp=100.0 + step=186 action=move(direction='east') hp=100.0 + step=187 action=move(direction='north') hp=100.0 + step=188 action=move(direction='west') hp=100.0 + step=189 action=move(direction='north') hp=100.0 + step=190 action=door(target_id='door_2', door_state='close') hp=100.0 + step=191 action=move(direction='west') hp=100.0 + step=192 action=move(direction='south') hp=100.0 + step=193 action=move(direction='north') hp=100.0 + step=194 action=door(target_id='door_1', door_state='close') hp=100.0 + step=195 action=wait() hp=100.0 + step=196 action=door(target_id='door_5', door_state='open') hp=100.0 + step=197 action=door(target_id='door_2', door_state='close') hp=100.0 + step=198 action=door(target_id='door_5', door_state='open') hp=100.0 + step=199 action=wait() hp=100.0 + step=200 action=door(target_id='door_2', door_state='close') hp=100.0 +ep=0007 [easy ] steps=200 reward= -7.530 evac=0 hp=100.0 suc30=0.43 r30= -0.63 t=4s + step=001 action=move(direction='south') hp=100.0 + step=002 action=move(direction='east') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=wait() hp=100.0 + step=006 action=wait() hp=100.0 + step=007 action=move(direction='north') hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=wait() hp=100.0 + step=010 action=move(direction='north') hp=100.0 + step=011 action=move(direction='north') hp=100.0 + step=012 action=move(direction='south') hp=100.0 + step=013 action=wait() hp=100.0 + step=014 action=wait() hp=100.0 + step=015 action=wait() hp=100.0 + step=016 action=move(direction='east') hp=100.0 + step=017 action=move(direction='west') hp=100.0 + step=018 action=wait() hp=100.0 + step=019 action=move(direction='north') hp=100.0 + step=020 action=move(direction='north') hp=100.0 + step=021 action=move(direction='south') hp=100.0 + step=022 action=move(direction='south') hp=100.0 + step=023 action=wait() hp=100.0 + step=024 action=move(direction='north') hp=100.0 + step=025 action=move(direction='north') hp=100.0 + step=026 action=move(direction='east') hp=100.0 + step=027 action=move(direction='north') hp=100.0 + step=028 action=move(direction='south') hp=100.0 + step=029 action=move(direction='east') hp=100.0 + step=030 action=move(direction='north') hp=100.0 + step=031 action=move(direction='west') hp=100.0 + step=032 action=move(direction='west') hp=100.0 + step=033 action=move(direction='south') hp=100.0 + step=034 action=move(direction='north') hp=100.0 + step=035 action=move(direction='south') hp=100.0 + step=036 action=move(direction='east') hp=100.0 + step=037 action=move(direction='east') hp=100.0 + step=038 action=move(direction='east') hp=100.0 + step=039 action=move(direction='west') hp=100.0 + step=040 action=wait() hp=100.0 + step=041 action=move(direction='north') hp=100.0 + step=042 action=move(direction='west') hp=100.0 + step=043 action=wait() hp=100.0 + step=044 action=wait() hp=100.0 + step=045 action=wait() hp=100.0 + step=046 action=wait() hp=100.0 + step=047 action=move(direction='south') hp=100.0 + step=048 action=move(direction='west') hp=100.0 + step=049 action=move(direction='east') hp=100.0 + step=050 action=move(direction='north') hp=100.0 + step=051 action=move(direction='south') hp=100.0 + step=052 action=move(direction='north') hp=100.0 + step=053 action=move(direction='east') hp=100.0 + step=054 action=move(direction='west') hp=100.0 + step=055 action=wait() hp=100.0 + step=056 action=move(direction='west') hp=100.0 + step=057 action=wait() hp=100.0 + step=058 action=move(direction='west') hp=100.0 +ep=0008 [easy ] steps=058 reward= +17.150 evac=1 hp=100.0 suc30=0.50 r30= +1.59 t=4s + step=001 action=move(direction='north') hp=100.0 + step=002 action=wait() hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='south') hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=wait() hp=100.0 + step=007 action=move(direction='north') hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=move(direction='north') hp=100.0 + step=010 action=wait() hp=100.0 + step=011 action=move(direction='north') hp=100.0 + step=012 action=move(direction='north') hp=100.0 +ep=0009 [easy ] steps=012 reward= +18.340 evac=1 hp=100.0 suc30=0.56 r30= +3.45 t=4s + step=001 action=door(target_id='door_5', door_state='open') hp=100.0 + step=002 action=move(direction='north') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=wait() hp=100.0 + step=005 action=wait() hp=100.0 + step=006 action=move(direction='north') hp=100.0 + step=007 action=move(direction='north') hp=100.0 + step=008 action=door(target_id='door_1', door_state='open') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=wait() hp=100.0 + step=011 action=move(direction='east') hp=100.0 + step=012 action=move(direction='east') hp=100.0 + step=013 action=wait() hp=100.0 + step=014 action=door(target_id='door_6', door_state='open') hp=100.0 + step=015 action=move(direction='east') hp=100.0 + step=016 action=move(direction='south') hp=100.0 + step=017 action=door(target_id='door_2', door_state='open') hp=100.0 + step=018 action=move(direction='west') hp=100.0 + step=019 action=wait() hp=100.0 + step=020 action=wait() hp=100.0 + step=021 action=door(target_id='door_2', door_state='open') hp=100.0 + step=022 action=wait() hp=100.0 + step=023 action=wait() hp=100.0 + step=024 action=door(target_id='door_2', door_state='close') hp=100.0 + step=025 action=wait() hp=100.0 + step=026 action=door(target_id='door_2', door_state='open') hp=100.0 + step=027 action=move(direction='west') hp=100.0 + step=028 action=move(direction='north') hp=100.0 + step=029 action=wait() hp=100.0 + step=030 action=move(direction='west') hp=100.0 + step=031 action=move(direction='south') hp=100.0 + step=032 action=move(direction='east') hp=100.0 + step=033 action=move(direction='east') hp=100.0 + step=034 action=move(direction='north') hp=100.0 + step=035 action=wait() hp=100.0 + step=036 action=door(target_id='door_6', door_state='open') hp=100.0 + step=037 action=door(target_id='door_3', door_state='open') hp=100.0 + step=038 action=wait() hp=100.0 + step=039 action=door(target_id='door_1', door_state='open') hp=100.0 + step=040 action=move(direction='north') hp=100.0 + step=041 action=wait() hp=100.0 + step=042 action=move(direction='south') hp=100.0 + step=043 action=door(target_id='door_1', door_state='open') hp=100.0 + step=044 action=door(target_id='door_6', door_state='open') hp=100.0 + step=045 action=door(target_id='door_1', door_state='open') hp=100.0 + step=046 action=wait() hp=100.0 + step=047 action=wait() hp=100.0 + step=048 action=door(target_id='door_1', door_state='open') hp=100.0 + step=049 action=door(target_id='door_2', door_state='close') hp=100.0 + step=050 action=door(target_id='door_6', door_state='open') hp=100.0 + step=051 action=move(direction='south') hp=100.0 + step=052 action=move(direction='south') hp=100.0 + step=053 action=move(direction='east') hp=100.0 + step=054 action=move(direction='north') hp=100.0 + step=055 action=door(target_id='door_2', door_state='open') hp=100.0 + step=056 action=door(target_id='door_3', door_state='open') hp=100.0 + step=057 action=door(target_id='door_3', door_state='open') hp=100.0 + step=058 action=door(target_id='door_2', door_state='open') hp=100.0 + step=059 action=door(target_id='door_3', door_state='open') hp=100.0 + step=060 action=door(target_id='door_6', door_state='open') hp=100.0 + step=061 action=move(direction='north') hp=100.0 + step=062 action=move(direction='east') hp=100.0 + step=063 action=door(target_id='door_3', door_state='open') hp=100.0 + step=064 action=door(target_id='door_3', door_state='open') hp=100.0 + step=065 action=move(direction='west') hp=100.0 + step=066 action=door(target_id='door_2', door_state='open') hp=100.0 + step=067 action=door(target_id='door_2', door_state='close') hp=100.0 + step=068 action=move(direction='east') hp=100.0 + step=069 action=door(target_id='door_3', door_state='open') hp=100.0 + step=070 action=door(target_id='door_3', door_state='open') hp=100.0 + step=071 action=move(direction='west') hp=100.0 + step=072 action=wait() hp=100.0 + step=073 action=door(target_id='door_3', door_state='open') hp=100.0 + step=074 action=move(direction='west') hp=100.0 + step=075 action=move(direction='west') hp=100.0 + step=076 action=wait() hp=100.0 + step=077 action=move(direction='west') hp=100.0 + step=078 action=move(direction='south') hp=100.0 + step=079 action=move(direction='south') hp=100.0 + step=080 action=move(direction='east') hp=100.0 + step=081 action=door(target_id='door_6', door_state='open') hp=100.0 + step=082 action=door(target_id='door_2', door_state='open') hp=100.0 + step=083 action=door(target_id='door_2', door_state='open') hp=100.0 + step=084 action=door(target_id='door_2', door_state='open') hp=100.0 + step=085 action=move(direction='north') hp=100.0 + step=086 action=door(target_id='door_6', door_state='open') hp=100.0 + step=087 action=door(target_id='door_2', door_state='open') hp=100.0 + step=088 action=move(direction='north') hp=100.0 + step=089 action=door(target_id='door_1', door_state='open') hp=100.0 + step=090 action=door(target_id='door_2', door_state='open') hp=100.0 + step=091 action=move(direction='south') hp=100.0 + step=092 action=door(target_id='door_1', door_state='open') hp=100.0 + step=093 action=wait() hp=100.0 + step=094 action=move(direction='north') hp=100.0 + step=095 action=wait() hp=100.0 + step=096 action=door(target_id='door_1', door_state='open') hp=100.0 + step=097 action=move(direction='south') hp=100.0 + step=098 action=move(direction='east') hp=100.0 + step=099 action=wait() hp=100.0 + step=100 action=move(direction='east') hp=100.0 + step=101 action=wait() hp=100.0 + step=102 action=move(direction='east') hp=100.0 + step=103 action=move(direction='north') hp=100.0 + step=104 action=door(target_id='door_3', door_state='open') hp=100.0 + step=105 action=door(target_id='door_2', door_state='close') hp=100.0 + step=106 action=move(direction='west') hp=100.0 + step=107 action=wait() hp=100.0 + step=108 action=move(direction='east') hp=100.0 + step=109 action=wait() hp=100.0 + step=110 action=move(direction='west') hp=100.0 + step=111 action=move(direction='south') hp=100.0 + step=112 action=move(direction='south') hp=100.0 + step=113 action=move(direction='south') hp=100.0 + step=114 action=move(direction='south') hp=100.0 + step=115 action=door(target_id='door_6', door_state='open') hp=100.0 + step=116 action=move(direction='west') hp=100.0 + step=117 action=door(target_id='door_2', door_state='close') hp=100.0 + step=118 action=move(direction='west') hp=100.0 + step=119 action=move(direction='west') hp=100.0 + step=120 action=wait() hp=100.0 + step=121 action=move(direction='west') hp=100.0 + step=122 action=move(direction='east') hp=100.0 + step=123 action=move(direction='north') hp=100.0 + step=124 action=wait() hp=100.0 + step=125 action=move(direction='east') hp=100.0 + step=126 action=move(direction='south') hp=100.0 + step=127 action=door(target_id='door_5', door_state='open') hp=100.0 + step=128 action=door(target_id='door_5', door_state='open') hp=100.0 + step=129 action=move(direction='west') hp=100.0 + step=130 action=wait() hp=100.0 + step=131 action=move(direction='north') hp=100.0 + step=132 action=door(target_id='door_5', door_state='open') hp=100.0 + step=133 action=move(direction='east') hp=100.0 + step=134 action=move(direction='north') hp=100.0 + step=135 action=door(target_id='door_2', door_state='close') hp=100.0 + step=136 action=wait() hp=100.0 + step=137 action=move(direction='west') hp=100.0 + step=138 action=door(target_id='door_2', door_state='close') hp=100.0 + step=139 action=door(target_id='door_6', door_state='close') hp=100.0 + step=140 action=wait() hp=100.0 + step=141 action=wait() hp=100.0 + step=142 action=door(target_id='door_1', door_state='open') hp=100.0 + step=143 action=move(direction='north') hp=100.0 + step=144 action=wait() hp=100.0 + step=145 action=wait() hp=100.0 + step=146 action=wait() hp=100.0 + step=147 action=wait() hp=100.0 + step=148 action=move(direction='north') hp=100.0 + step=149 action=move(direction='east') hp=100.0 + step=150 action=wait() hp=100.0 + step=151 action=door(target_id='door_2', door_state='close') hp=100.0 + step=152 action=door(target_id='door_2', door_state='open') hp=100.0 + step=153 action=wait() hp=100.0 + step=154 action=door(target_id='door_2', door_state='close') hp=100.0 + step=155 action=move(direction='east') hp=100.0 + step=156 action=door(target_id='door_6', door_state='close') hp=100.0 + step=157 action=move(direction='south') hp=100.0 + step=158 action=move(direction='south') hp=100.0 + step=159 action=wait() hp=100.0 + step=160 action=door(target_id='door_2', door_state='open') hp=100.0 + step=161 action=move(direction='west') hp=100.0 + step=162 action=move(direction='east') hp=100.0 + step=163 action=move(direction='west') hp=100.0 + step=164 action=move(direction='west') hp=100.0 + step=165 action=wait() hp=100.0 + step=166 action=door(target_id='door_6', door_state='close') hp=100.0 + step=167 action=move(direction='east') hp=100.0 + step=168 action=move(direction='west') hp=100.0 + step=169 action=door(target_id='door_2', door_state='open') hp=100.0 + step=170 action=door(target_id='door_2', door_state='open') hp=100.0 + step=171 action=door(target_id='door_6', door_state='close') hp=100.0 + step=172 action=move(direction='east') hp=100.0 + step=173 action=door(target_id='door_6', door_state='close') hp=100.0 + step=174 action=door(target_id='door_2', door_state='open') hp=100.0 + step=175 action=move(direction='east') hp=100.0 + step=176 action=wait() hp=100.0 + step=177 action=door(target_id='door_2', door_state='open') hp=100.0 + step=178 action=move(direction='east') hp=100.0 + step=179 action=move(direction='west') hp=100.0 + step=180 action=move(direction='south') hp=100.0 + step=181 action=wait() hp=100.0 + step=182 action=wait() hp=100.0 + step=183 action=move(direction='north') hp=100.0 + step=184 action=move(direction='south') hp=100.0 + step=185 action=door(target_id='door_2', door_state='open') hp=100.0 + step=186 action=move(direction='east') hp=100.0 + step=187 action=move(direction='north') hp=100.0 + step=188 action=move(direction='west') hp=100.0 + step=189 action=wait() hp=100.0 + step=190 action=move(direction='east') hp=100.0 + step=191 action=move(direction='north') hp=100.0 + step=192 action=door(target_id='door_2', door_state='open') hp=100.0 + step=193 action=move(direction='west') hp=100.0 + step=194 action=move(direction='east') hp=100.0 + step=195 action=door(target_id='door_6', door_state='close') hp=100.0 + step=196 action=door(target_id='door_3', door_state='open') hp=100.0 + step=197 action=wait() hp=100.0 + step=198 action=wait() hp=100.0 + step=199 action=move(direction='north') hp=100.0 + step=200 action=door(target_id='door_3', door_state='open') hp=100.0 +ep=0010 [easy ] steps=200 reward= -12.040 evac=0 hp=100.0 suc30=0.50 r30= +1.90 t=5s + >> PPO update samples=flushed pi_loss=-0.0045 v_loss=12.5066 entropy=1.8199 kl=0.0010 clip%=0.00 lr=2.87e-04 + step=001 action=wait() hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='east') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='south') hp=100.0 + step=006 action=wait() hp=100.0 + step=007 action=move(direction='north') hp=100.0 + step=008 action=move(direction='south') hp=100.0 + step=009 action=move(direction='north') hp=100.0 + step=010 action=move(direction='east') hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=wait() hp=100.0 + step=013 action=wait() hp=100.0 + step=014 action=wait() hp=100.0 + step=015 action=move(direction='north') hp=100.0 + step=016 action=move(direction='south') hp=100.0 + step=017 action=move(direction='south') hp=100.0 + step=018 action=move(direction='north') hp=100.0 + step=019 action=move(direction='north') hp=100.0 + step=020 action=move(direction='south') hp=100.0 + step=021 action=move(direction='north') hp=100.0 + step=022 action=wait() hp=100.0 + step=023 action=wait() hp=100.0 + step=024 action=move(direction='north') hp=100.0 + step=025 action=wait() hp=100.0 + step=026 action=move(direction='east') hp=100.0 + step=027 action=move(direction='east') hp=100.0 + step=028 action=wait() hp=100.0 + step=029 action=move(direction='west') hp=100.0 + step=030 action=move(direction='south') hp=100.0 + step=031 action=move(direction='west') hp=100.0 + step=032 action=wait() hp=100.0 + step=033 action=move(direction='east') hp=100.0 + step=034 action=move(direction='north') hp=100.0 + step=035 action=move(direction='west') hp=100.0 + step=036 action=move(direction='south') hp=100.0 + step=037 action=move(direction='east') hp=100.0 + step=038 action=move(direction='west') hp=100.0 + step=039 action=move(direction='north') hp=100.0 + step=040 action=move(direction='north') hp=100.0 + step=041 action=wait() hp=100.0 + step=042 action=move(direction='south') hp=100.0 + step=043 action=move(direction='north') hp=100.0 + step=044 action=move(direction='north') hp=100.0 + step=045 action=move(direction='east') hp=100.0 + step=046 action=wait() hp=100.0 + step=047 action=wait() hp=100.0 + step=048 action=move(direction='north') hp=100.0 + step=049 action=move(direction='west') hp=100.0 + step=050 action=wait() hp=100.0 + step=051 action=wait() hp=100.0 + step=052 action=move(direction='north') hp=100.0 + step=053 action=move(direction='east') hp=100.0 + step=054 action=move(direction='south') hp=100.0 + step=055 action=move(direction='west') hp=100.0 + step=056 action=move(direction='north') hp=100.0 + step=057 action=move(direction='south') hp=100.0 + step=058 action=move(direction='north') hp=100.0 + step=059 action=move(direction='north') hp=100.0 + step=060 action=move(direction='south') hp=100.0 + step=061 action=wait() hp=100.0 + step=062 action=move(direction='north') hp=100.0 + step=063 action=wait() hp=100.0 + step=064 action=move(direction='north') hp=100.0 + step=065 action=move(direction='south') hp=100.0 + step=066 action=move(direction='south') hp=100.0 + step=067 action=move(direction='south') hp=100.0 + step=068 action=move(direction='south') hp=100.0 + step=069 action=move(direction='south') hp=100.0 + step=070 action=move(direction='north') hp=100.0 + step=071 action=wait() hp=100.0 + step=072 action=move(direction='north') hp=100.0 + step=073 action=wait() hp=100.0 + step=074 action=move(direction='north') hp=100.0 + step=075 action=wait() hp=100.0 + step=076 action=wait() hp=100.0 + step=077 action=move(direction='north') hp=100.0 + step=078 action=wait() hp=100.0 + step=079 action=move(direction='north') hp=100.0 + step=080 action=move(direction='east') hp=100.0 + step=081 action=move(direction='north') hp=100.0 + step=082 action=wait() hp=100.0 + step=083 action=move(direction='south') hp=100.0 + step=084 action=wait() hp=100.0 + step=085 action=move(direction='west') hp=100.0 + step=086 action=move(direction='east') hp=100.0 + step=087 action=move(direction='west') hp=100.0 + step=088 action=wait() hp=100.0 + step=089 action=move(direction='east') hp=100.0 + step=090 action=wait() hp=100.0 + step=091 action=move(direction='west') hp=100.0 + step=092 action=wait() hp=100.0 + step=093 action=move(direction='east') hp=100.0 + step=094 action=wait() hp=100.0 + step=095 action=move(direction='north') hp=100.0 + step=096 action=move(direction='north') hp=100.0 + step=097 action=move(direction='east') hp=100.0 + step=098 action=move(direction='north') hp=100.0 + step=099 action=move(direction='east') hp=100.0 + step=100 action=move(direction='west') hp=100.0 + step=101 action=move(direction='south') hp=100.0 + step=102 action=move(direction='west') hp=100.0 + step=103 action=wait() hp=100.0 + step=104 action=move(direction='north') hp=100.0 + step=105 action=move(direction='west') hp=100.0 + step=106 action=wait() hp=100.0 + step=107 action=wait() hp=100.0 + step=108 action=move(direction='west') hp=100.0 +ep=0011 [easy ] steps=108 reward= +10.180 evac=1 hp=100.0 suc30=0.55 r30= +2.66 t=6s + step=001 action=wait() hp=100.0 + step=002 action=move(direction='south') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=wait() hp=100.0 + step=005 action=move(direction='south') hp=100.0 + step=006 action=move(direction='north') hp=100.0 + step=007 action=move(direction='north') hp=100.0 + step=008 action=wait() hp=100.0 + step=009 action=wait() hp=100.0 + step=010 action=move(direction='south') hp=100.0 + step=011 action=wait() hp=100.0 + step=012 action=move(direction='south') hp=100.0 + step=013 action=move(direction='south') hp=100.0 + step=014 action=door(target_id='door_3', door_state='open') hp=100.0 + step=015 action=move(direction='east') hp=100.0 + step=016 action=move(direction='west') hp=100.0 + step=017 action=wait() hp=100.0 + step=018 action=door(target_id='door_3', door_state='open') hp=100.0 + step=019 action=move(direction='north') hp=100.0 + step=020 action=wait() hp=100.0 + step=021 action=move(direction='south') hp=100.0 + step=022 action=move(direction='east') hp=100.0 + step=023 action=move(direction='east') hp=100.0 + step=024 action=move(direction='east') hp=100.0 + step=025 action=door(target_id='door_3', door_state='open') hp=100.0 + step=026 action=move(direction='west') hp=100.0 + step=027 action=wait() hp=100.0 + step=028 action=wait() hp=100.0 + step=029 action=move(direction='west') hp=100.0 + step=030 action=wait() hp=100.0 + step=031 action=door(target_id='door_3', door_state='open') hp=100.0 + step=032 action=move(direction='west') hp=100.0 + step=033 action=door(target_id='door_3', door_state='open') hp=100.0 + step=034 action=move(direction='east') hp=100.0 + step=035 action=door(target_id='door_3', door_state='open') hp=100.0 + step=036 action=wait() hp=100.0 + step=037 action=wait() hp=100.0 + step=038 action=move(direction='west') hp=100.0 + step=039 action=move(direction='north') hp=100.0 + step=040 action=wait() hp=100.0 + step=041 action=wait() hp=100.0 + step=042 action=move(direction='south') hp=100.0 + step=043 action=door(target_id='door_3', door_state='open') hp=100.0 + step=044 action=move(direction='north') hp=100.0 + step=045 action=move(direction='south') hp=100.0 + step=046 action=move(direction='east') hp=100.0 + step=047 action=move(direction='west') hp=100.0 + step=048 action=move(direction='south') hp=100.0 + step=049 action=wait() hp=100.0 + step=050 action=wait() hp=100.0 + step=051 action=wait() hp=100.0 + step=052 action=wait() hp=100.0 + step=053 action=move(direction='south') hp=100.0 + step=054 action=wait() hp=100.0 + step=055 action=move(direction='south') hp=100.0 + step=056 action=move(direction='south') hp=100.0 + step=057 action=move(direction='north') hp=100.0 + step=058 action=wait() hp=100.0 + step=059 action=move(direction='south') hp=100.0 + step=060 action=move(direction='south') hp=100.0 + step=061 action=wait() hp=100.0 + step=062 action=move(direction='south') hp=100.0 + step=063 action=move(direction='north') hp=100.0 + step=064 action=wait() hp=100.0 + step=065 action=move(direction='north') hp=100.0 + step=066 action=move(direction='north') hp=100.0 + step=067 action=wait() hp=100.0 + step=068 action=move(direction='south') hp=100.0 + step=069 action=move(direction='north') hp=100.0 + step=070 action=wait() hp=100.0 + step=071 action=wait() hp=100.0 + step=072 action=wait() hp=100.0 + step=073 action=move(direction='north') hp=100.0 + step=074 action=wait() hp=100.0 + step=075 action=wait() hp=100.0 + step=076 action=move(direction='north') hp=100.0 + step=077 action=move(direction='north') hp=100.0 + step=078 action=move(direction='west') hp=100.0 + step=079 action=move(direction='west') hp=100.0 + step=080 action=wait() hp=100.0 + step=081 action=move(direction='south') hp=100.0 + step=082 action=move(direction='west') hp=100.0 + step=083 action=door(target_id='door_2', door_state='close') hp=100.0 + step=084 action=move(direction='north') hp=100.0 + step=085 action=move(direction='west') hp=100.0 + step=086 action=door(target_id='door_1', door_state='close') hp=100.0 + step=087 action=door(target_id='door_1', door_state='close') hp=100.0 + step=088 action=wait() hp=100.0 + step=089 action=door(target_id='door_2', door_state='close') hp=100.0 + step=090 action=move(direction='west') hp=100.0 + step=091 action=wait() hp=100.0 + step=092 action=wait() hp=100.0 + step=093 action=move(direction='east') hp=100.0 + step=094 action=move(direction='west') hp=100.0 + step=095 action=wait() hp=100.0 + step=096 action=move(direction='east') hp=100.0 + step=097 action=wait() hp=100.0 + step=098 action=door(target_id='door_1', door_state='close') hp=100.0 + step=099 action=door(target_id='door_2', door_state='close') hp=100.0 + step=100 action=move(direction='west') hp=100.0 + step=101 action=wait() hp=100.0 + step=102 action=move(direction='east') hp=100.0 + step=103 action=door(target_id='door_1', door_state='close') hp=100.0 + step=104 action=wait() hp=100.0 + step=105 action=door(target_id='door_1', door_state='close') hp=100.0 + step=106 action=wait() hp=100.0 + step=107 action=move(direction='east') hp=100.0 + step=108 action=door(target_id='door_1', door_state='close') hp=100.0 + step=109 action=wait() hp=100.0 + step=110 action=wait() hp=100.0 + step=111 action=move(direction='south') hp=100.0 + step=112 action=move(direction='north') hp=100.0 + step=113 action=door(target_id='door_2', door_state='close') hp=100.0 + step=114 action=move(direction='east') hp=100.0 + step=115 action=move(direction='west') hp=100.0 + step=116 action=move(direction='west') hp=100.0 + step=117 action=move(direction='west') hp=100.0 + step=118 action=wait() hp=100.0 + step=119 action=move(direction='west') hp=100.0 + step=120 action=door(target_id='door_1', door_state='close') hp=100.0 + step=121 action=move(direction='east') hp=100.0 + step=122 action=move(direction='west') hp=100.0 + step=123 action=wait() hp=100.0 + step=124 action=door(target_id='door_1', door_state='close') hp=100.0 + step=125 action=wait() hp=100.0 + step=126 action=move(direction='east') hp=100.0 + step=127 action=move(direction='south') hp=100.0 + step=128 action=move(direction='north') hp=100.0 + step=129 action=move(direction='west') hp=100.0 + step=130 action=move(direction='east') hp=100.0 + step=131 action=move(direction='west') hp=100.0 + step=132 action=move(direction='west') hp=100.0 +ep=0012 [easy ] steps=132 reward= +7.700 evac=1 hp=100.0 suc30=0.58 r30= +3.08 t=6s + step=001 action=door(target_id='door_2', door_state='close') hp=100.0 + step=002 action=wait() hp=100.0 + step=003 action=door(target_id='door_2', door_state='close') hp=100.0 + step=004 action=wait() hp=100.0 + step=005 action=door(target_id='door_6', door_state='open') hp=100.0 + step=006 action=move(direction='south') hp=100.0 + step=007 action=move(direction='south') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=wait() hp=100.0 + step=010 action=wait() hp=100.0 + step=011 action=door(target_id='door_6', door_state='open') hp=100.0 + step=012 action=door(target_id='door_5', door_state='close') hp=100.0 + step=013 action=move(direction='east') hp=100.0 + step=014 action=door(target_id='door_6', door_state='open') hp=100.0 + step=015 action=move(direction='north') hp=100.0 + step=016 action=move(direction='west') hp=100.0 + step=017 action=move(direction='south') hp=100.0 + step=018 action=door(target_id='door_6', door_state='close') hp=100.0 + step=019 action=door(target_id='door_6', door_state='close') hp=100.0 + step=020 action=move(direction='west') hp=100.0 + step=021 action=move(direction='north') hp=100.0 + step=022 action=door(target_id='door_6', door_state='close') hp=100.0 + step=023 action=door(target_id='door_6', door_state='close') hp=100.0 + step=024 action=move(direction='west') hp=100.0 + step=025 action=move(direction='north') hp=100.0 + step=026 action=door(target_id='door_1', door_state='close') hp=100.0 + step=027 action=move(direction='north') hp=100.0 + step=028 action=door(target_id='door_1', door_state='close') hp=100.0 + step=029 action=door(target_id='door_1', door_state='open') hp=100.0 + step=030 action=wait() hp=100.0 + step=031 action=door(target_id='door_1', door_state='close') hp=100.0 + step=032 action=door(target_id='door_1', door_state='open') hp=100.0 + step=033 action=move(direction='west') hp=100.0 + step=034 action=door(target_id='door_5', door_state='close') hp=100.0 + step=035 action=door(target_id='door_1', door_state='close') hp=100.0 + step=036 action=wait() hp=100.0 + step=037 action=wait() hp=100.0 + step=038 action=move(direction='north') hp=100.0 + step=039 action=move(direction='south') hp=100.0 + step=040 action=move(direction='south') hp=100.0 + step=041 action=move(direction='south') hp=100.0 + step=042 action=door(target_id='door_1', door_state='close') hp=100.0 + step=043 action=move(direction='south') hp=100.0 + step=044 action=move(direction='east') hp=100.0 + step=045 action=door(target_id='door_6', door_state='close') hp=100.0 + step=046 action=wait() hp=100.0 + step=047 action=move(direction='east') hp=100.0 + step=048 action=move(direction='north') hp=100.0 + step=049 action=wait() hp=100.0 + step=050 action=door(target_id='door_6', door_state='close') hp=100.0 + step=051 action=door(target_id='door_5', door_state='close') hp=100.0 + step=052 action=door(target_id='door_5', door_state='close') hp=100.0 + step=053 action=door(target_id='door_5', door_state='close') hp=100.0 + step=054 action=door(target_id='door_1', door_state='close') hp=100.0 + step=055 action=door(target_id='door_1', door_state='close') hp=100.0 + step=056 action=door(target_id='door_5', door_state='close') hp=100.0 + step=057 action=move(direction='south') hp=100.0 + step=058 action=move(direction='east') hp=100.0 + step=059 action=wait() hp=100.0 + step=060 action=move(direction='north') hp=100.0 + step=061 action=move(direction='north') hp=100.0 + step=062 action=door(target_id='door_6', door_state='close') hp=100.0 + step=063 action=move(direction='south') hp=100.0 + step=064 action=move(direction='east') hp=100.0 + step=065 action=door(target_id='door_2', door_state='close') hp=100.0 + step=066 action=door(target_id='door_2', door_state='close') hp=100.0 + step=067 action=move(direction='west') hp=100.0 + step=068 action=move(direction='south') hp=100.0 + step=069 action=wait() hp=100.0 + step=070 action=move(direction='west') hp=100.0 + step=071 action=move(direction='east') hp=100.0 + step=072 action=door(target_id='door_5', door_state='close') hp=100.0 + step=073 action=move(direction='east') hp=100.0 + step=074 action=move(direction='north') hp=100.0 + step=075 action=door(target_id='door_6', door_state='close') hp=100.0 + step=076 action=move(direction='south') hp=100.0 + step=077 action=door(target_id='door_5', door_state='close') hp=100.0 + step=078 action=move(direction='west') hp=100.0 + step=079 action=move(direction='west') hp=100.0 + step=080 action=door(target_id='door_6', door_state='close') hp=100.0 + step=081 action=move(direction='north') hp=100.0 + step=082 action=move(direction='west') hp=100.0 + step=083 action=door(target_id='door_1', door_state='close') hp=100.0 + step=084 action=move(direction='east') hp=100.0 + step=085 action=door(target_id='door_5', door_state='close') hp=100.0 + step=086 action=door(target_id='door_1', door_state='close') hp=100.0 + step=087 action=move(direction='west') hp=100.0 + step=088 action=move(direction='north') hp=100.0 + step=089 action=door(target_id='door_1', door_state='close') hp=100.0 + step=090 action=move(direction='west') hp=100.0 + step=091 action=move(direction='east') hp=100.0 + step=092 action=move(direction='north') hp=100.0 + step=093 action=wait() hp=100.0 + step=094 action=move(direction='west') hp=100.0 + step=095 action=move(direction='north') hp=100.0 + step=096 action=door(target_id='door_1', door_state='close') hp=100.0 + step=097 action=move(direction='east') hp=100.0 + step=098 action=door(target_id='door_5', door_state='close') hp=100.0 + step=099 action=move(direction='west') hp=100.0 + step=100 action=move(direction='east') hp=100.0 + step=101 action=door(target_id='door_2', door_state='close') hp=100.0 + step=102 action=door(target_id='door_2', door_state='close') hp=100.0 + step=103 action=door(target_id='door_2', door_state='close') hp=100.0 + step=104 action=move(direction='south') hp=100.0 + step=105 action=wait() hp=100.0 + step=106 action=move(direction='west') hp=100.0 + step=107 action=move(direction='west') hp=100.0 +ep=0013 [easy ] steps=107 reward= +14.170 evac=1 hp=100.0 suc30=0.62 r30= +3.93 t=7s + step=001 action=move(direction='south') hp=100.0 + step=002 action=move(direction='east') hp=100.0 + step=003 action=wait() hp=100.0 + step=004 action=wait() hp=100.0 + step=005 action=move(direction='south') hp=100.0 + step=006 action=move(direction='east') hp=100.0 + step=007 action=move(direction='north') hp=100.0 + step=008 action=move(direction='east') hp=100.0 + step=009 action=move(direction='east') hp=100.0 + step=010 action=move(direction='north') hp=100.0 + step=011 action=wait() hp=100.0 + step=012 action=move(direction='west') hp=100.0 + step=013 action=move(direction='east') hp=100.0 + step=014 action=move(direction='north') hp=100.0 + step=015 action=move(direction='west') hp=100.0 + step=016 action=move(direction='west') hp=100.0 + step=017 action=move(direction='west') hp=100.0 + step=018 action=move(direction='south') hp=100.0 + step=019 action=wait() hp=100.0 + step=020 action=move(direction='south') hp=100.0 + step=021 action=move(direction='north') hp=100.0 + step=022 action=move(direction='west') hp=100.0 + step=023 action=move(direction='north') hp=100.0 + step=024 action=move(direction='west') hp=100.0 + step=025 action=wait() hp=100.0 + step=026 action=wait() hp=100.0 + step=027 action=wait() hp=100.0 + step=028 action=move(direction='west') hp=100.0 + step=029 action=wait() hp=100.0 + step=030 action=move(direction='south') hp=100.0 + step=031 action=move(direction='north') hp=100.0 + step=032 action=wait() hp=100.0 + step=033 action=move(direction='north') hp=100.0 + step=034 action=wait() hp=100.0 + step=035 action=wait() hp=100.0 + step=036 action=move(direction='north') hp=100.0 + step=037 action=wait() hp=100.0 + step=038 action=wait() hp=100.0 + step=039 action=move(direction='north') hp=100.0 + step=040 action=move(direction='south') hp=100.0 + step=041 action=wait() hp=100.0 + step=042 action=move(direction='east') hp=100.0 + step=043 action=wait() hp=100.0 + step=044 action=move(direction='west') hp=100.0 + step=045 action=move(direction='north') hp=100.0 + step=046 action=move(direction='north') hp=100.0 + step=047 action=move(direction='north') hp=100.0 + step=048 action=move(direction='east') hp=100.0 + step=049 action=move(direction='south') hp=100.0 + step=050 action=move(direction='west') hp=100.0 + step=051 action=wait() hp=100.0 + step=052 action=move(direction='north') hp=100.0 + step=053 action=wait() hp=100.0 + step=054 action=move(direction='east') hp=100.0 + step=055 action=move(direction='north') hp=100.0 + step=056 action=move(direction='west') hp=100.0 + step=057 action=move(direction='west') hp=100.0 +ep=0014 [easy ] steps=057 reward= +17.940 evac=1 hp=100.0 suc30=0.64 r30= +4.93 t=7s + step=001 action=door(target_id='door_4', door_state='close') hp=100.0 + step=002 action=door(target_id='door_4', door_state='close') hp=100.0 + step=003 action=wait() hp=100.0 + step=004 action=move(direction='south') hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=wait() hp=100.0 + step=008 action=move(direction='east') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='south') hp=100.0 + step=011 action=move(direction='north') hp=100.0 + step=012 action=move(direction='west') hp=100.0 + step=013 action=door(target_id='door_3', door_state='open') hp=100.0 + step=014 action=wait() hp=100.0 + step=015 action=move(direction='east') hp=100.0 + step=016 action=move(direction='west') hp=100.0 + step=017 action=door(target_id='door_3', door_state='open') hp=100.0 + step=018 action=door(target_id='door_3', door_state='open') hp=100.0 + step=019 action=move(direction='east') hp=100.0 + step=020 action=wait() hp=100.0 + step=021 action=move(direction='south') hp=100.0 + step=022 action=door(target_id='door_4', door_state='close') hp=100.0 + step=023 action=wait() hp=100.0 + step=024 action=wait() hp=100.0 + step=025 action=move(direction='north') hp=100.0 + step=026 action=move(direction='west') hp=100.0 + step=027 action=door(target_id='door_3', door_state='open') hp=100.0 + step=028 action=move(direction='east') hp=100.0 + step=029 action=move(direction='west') hp=100.0 + step=030 action=door(target_id='door_3', door_state='open') hp=100.0 + step=031 action=wait() hp=100.0 + step=032 action=move(direction='east') hp=100.0 + step=033 action=move(direction='east') hp=100.0 + step=034 action=wait() hp=100.0 + step=035 action=door(target_id='door_4', door_state='close') hp=100.0 + step=036 action=move(direction='east') hp=100.0 + step=037 action=move(direction='south') hp=100.0 + step=038 action=move(direction='south') hp=100.0 + step=039 action=door(target_id='door_4', door_state='close') hp=100.0 + step=040 action=move(direction='north') hp=100.0 + step=041 action=door(target_id='door_4', door_state='open') hp=100.0 + step=042 action=move(direction='west') hp=100.0 + step=043 action=move(direction='north') hp=100.0 + step=044 action=move(direction='east') hp=100.0 + step=045 action=move(direction='west') hp=100.0 + step=046 action=move(direction='south') hp=100.0 + step=047 action=move(direction='south') hp=100.0 + step=048 action=move(direction='west') hp=100.0 + step=049 action=wait() hp=100.0 + step=050 action=door(target_id='door_4', door_state='close') hp=100.0 + step=051 action=move(direction='east') hp=100.0 + step=052 action=door(target_id='door_4', door_state='close') hp=100.0 + step=053 action=door(target_id='door_4', door_state='open') hp=100.0 + step=054 action=wait() hp=100.0 + step=055 action=move(direction='west') hp=100.0 + step=056 action=wait() hp=100.0 + step=057 action=move(direction='east') hp=100.0 + step=058 action=door(target_id='door_4', door_state='close') hp=100.0 + step=059 action=door(target_id='door_4', door_state='open') hp=100.0 + step=060 action=move(direction='north') hp=100.0 + step=061 action=door(target_id='door_4', door_state='close') hp=100.0 + step=062 action=move(direction='west') hp=100.0 + step=063 action=door(target_id='door_4', door_state='close') hp=100.0 + step=064 action=wait() hp=100.0 + step=065 action=door(target_id='door_4', door_state='close') hp=100.0 + step=066 action=move(direction='south') hp=100.0 + step=067 action=move(direction='north') hp=100.0 + step=068 action=wait() hp=100.0 + step=069 action=door(target_id='door_4', door_state='close') hp=100.0 + step=070 action=door(target_id='door_4', door_state='close') hp=100.0 + step=071 action=move(direction='east') hp=100.0 + step=072 action=wait() hp=100.0 + step=073 action=wait() hp=100.0 + step=074 action=wait() hp=100.0 + step=075 action=door(target_id='door_4', door_state='close') hp=100.0 + step=076 action=move(direction='east') hp=100.0 + step=077 action=move(direction='north') hp=100.0 + step=078 action=door(target_id='door_4', door_state='close') hp=100.0 + step=079 action=wait() hp=100.0 + step=080 action=move(direction='east') hp=100.0 +ep=0015 [easy ] steps=080 reward= +14.650 evac=1 hp=100.0 suc30=0.67 r30= +5.58 t=7s + >> PPO update samples=flushed pi_loss=-0.0131 v_loss=10.8125 entropy=1.5430 kl=0.0023 clip%=0.01 lr=2.80e-04 + step=001 action=door(target_id='door_4', door_state='open') hp=100.0 + step=002 action=door(target_id='door_7', door_state='close') hp=100.0 + step=003 action=move(direction='east') hp=100.0 + step=004 action=move(direction='south') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='south') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='east') hp=100.0 + step=009 action=door(target_id='door_7', door_state='close') hp=100.0 + step=010 action=move(direction='east') hp=100.0 + step=011 action=door(target_id='door_7', door_state='close') hp=100.0 + step=012 action=door(target_id='door_3', door_state='close') hp=100.0 + step=013 action=wait() hp=100.0 + step=014 action=door(target_id='door_3', door_state='close') hp=100.0 + step=015 action=move(direction='east') hp=100.0 + step=016 action=door(target_id='door_3', door_state='close') hp=100.0 + step=017 action=door(target_id='door_3', door_state='close') hp=100.0 + step=018 action=door(target_id='door_8', door_state='open') hp=100.0 + step=019 action=door(target_id='door_3', door_state='close') hp=100.0 + step=020 action=door(target_id='door_8', door_state='open') hp=100.0 + step=021 action=move(direction='east') hp=100.0 + step=022 action=move(direction='east') hp=100.0 + step=023 action=move(direction='north') hp=100.0 + step=024 action=move(direction='north') hp=100.0 + step=025 action=door(target_id='door_4', door_state='open') hp=100.0 + step=026 action=move(direction='north') hp=100.0 + step=027 action=move(direction='south') hp=100.0 + step=028 action=door(target_id='door_8', door_state='open') hp=100.0 + step=029 action=wait() hp=100.0 + step=030 action=wait() hp=100.0 + step=031 action=move(direction='north') hp=100.0 + step=032 action=move(direction='north') hp=100.0 + step=033 action=move(direction='west') hp=100.0 + step=034 action=door(target_id='door_4', door_state='close') hp=100.0 + step=035 action=move(direction='north') hp=100.0 + step=036 action=door(target_id='door_4', door_state='open') hp=100.0 + step=037 action=wait() hp=100.0 + step=038 action=move(direction='south') hp=100.0 + step=039 action=move(direction='east') hp=100.0 + step=040 action=move(direction='west') hp=100.0 + step=041 action=move(direction='north') hp=100.0 + step=042 action=move(direction='north') hp=100.0 + step=043 action=move(direction='east') hp=100.0 + step=044 action=move(direction='south') hp=100.0 + step=045 action=wait() hp=100.0 + step=046 action=move(direction='west') hp=100.0 + step=047 action=move(direction='south') hp=100.0 + step=048 action=door(target_id='door_4', door_state='open') hp=100.0 + step=049 action=wait() hp=100.0 + step=050 action=move(direction='east') hp=100.0 + step=051 action=wait() hp=100.0 + step=052 action=wait() hp=100.0 + step=053 action=move(direction='north') hp=100.0 + step=054 action=wait() hp=100.0 + step=055 action=door(target_id='door_4', door_state='close') hp=100.0 + step=056 action=move(direction='south') hp=100.0 + step=057 action=move(direction='west') hp=100.0 + step=058 action=move(direction='east') hp=100.0 + step=059 action=move(direction='west') hp=100.0 + step=060 action=wait() hp=100.0 + step=061 action=wait() hp=100.0 + step=062 action=move(direction='north') hp=100.0 + step=063 action=move(direction='east') hp=100.0 + step=064 action=move(direction='south') hp=100.0 + step=065 action=wait() hp=100.0 + step=066 action=move(direction='west') hp=100.0 + step=067 action=move(direction='east') hp=100.0 + step=068 action=move(direction='north') hp=100.0 + step=069 action=move(direction='west') hp=100.0 + step=070 action=move(direction='east') hp=100.0 + step=071 action=door(target_id='door_4', door_state='open') hp=100.0 + step=072 action=wait() hp=100.0 + step=073 action=move(direction='south') hp=100.0 + step=074 action=move(direction='south') hp=100.0 + step=075 action=move(direction='south') hp=100.0 + step=076 action=door(target_id='door_3', door_state='close') hp=100.0 + step=077 action=door(target_id='door_8', door_state='open') hp=100.0 + step=078 action=wait() hp=100.0 + step=079 action=door(target_id='door_8', door_state='open') hp=100.0 + step=080 action=wait() hp=100.0 + step=081 action=move(direction='south') hp=100.0 + step=082 action=door(target_id='door_8', door_state='open') hp=100.0 + step=083 action=wait() hp=100.0 + step=084 action=move(direction='west') hp=100.0 + step=085 action=move(direction='west') hp=100.0 + step=086 action=move(direction='south') hp=100.0 + step=087 action=wait() hp=100.0 + step=088 action=door(target_id='door_4', door_state='close') hp=100.0 + step=089 action=wait() hp=100.0 + step=090 action=move(direction='east') hp=100.0 + step=091 action=move(direction='west') hp=100.0 + step=092 action=door(target_id='door_7', door_state='close') hp=100.0 + step=093 action=door(target_id='door_4', door_state='close') hp=100.0 + step=094 action=move(direction='east') hp=100.0 + step=095 action=door(target_id='door_4', door_state='close') hp=100.0 + step=096 action=door(target_id='door_8', door_state='open') hp=100.0 + step=097 action=move(direction='north') hp=100.0 + step=098 action=door(target_id='door_4', door_state='close') hp=100.0 + step=099 action=door(target_id='door_4', door_state='close') hp=100.0 + step=100 action=move(direction='north') hp=100.0 + step=101 action=door(target_id='door_4', door_state='close') hp=100.0 + step=102 action=move(direction='south') hp=100.0 + step=103 action=door(target_id='door_4', door_state='open') hp=100.0 + step=104 action=door(target_id='door_3', door_state='close') hp=100.0 + step=105 action=wait() hp=100.0 + step=106 action=door(target_id='door_3', door_state='close') hp=100.0 + step=107 action=door(target_id='door_4', door_state='open') hp=100.0 + step=108 action=move(direction='west') hp=100.0 + step=109 action=move(direction='north') hp=100.0 + step=110 action=door(target_id='door_4', door_state='open') hp=100.0 + step=111 action=door(target_id='door_4', door_state='open') hp=100.0 + step=112 action=move(direction='south') hp=100.0 + step=113 action=move(direction='north') hp=100.0 + step=114 action=move(direction='west') hp=100.0 + step=115 action=wait() hp=100.0 + step=116 action=door(target_id='door_4', door_state='open') hp=100.0 + step=117 action=door(target_id='door_3', door_state='close') hp=100.0 + step=118 action=door(target_id='door_3', door_state='open') hp=100.0 + step=119 action=door(target_id='door_4', door_state='open') hp=100.0 + step=120 action=move(direction='west') hp=100.0 + step=121 action=wait() hp=100.0 + step=122 action=move(direction='south') hp=100.0 + step=123 action=wait() hp=100.0 + step=124 action=wait() hp=100.0 + step=125 action=move(direction='north') hp=100.0 + step=126 action=door(target_id='door_7', door_state='close') hp=100.0 + step=127 action=move(direction='east') hp=100.0 + step=128 action=door(target_id='door_4', door_state='open') hp=100.0 + step=129 action=move(direction='east') hp=100.0 + step=130 action=door(target_id='door_4', door_state='open') hp=100.0 + step=131 action=wait() hp=100.0 + step=132 action=door(target_id='door_4', door_state='open') hp=100.0 + step=133 action=door(target_id='door_3', door_state='close') hp=100.0 + step=134 action=move(direction='south') hp=100.0 + step=135 action=move(direction='north') hp=100.0 + step=136 action=door(target_id='door_3', door_state='close') hp=100.0 + step=137 action=move(direction='east') hp=100.0 + step=138 action=move(direction='west') hp=100.0 + step=139 action=door(target_id='door_4', door_state='open') hp=100.0 + step=140 action=move(direction='east') hp=100.0 + step=141 action=move(direction='east') hp=100.0 + step=142 action=move(direction='west') hp=100.0 + step=143 action=move(direction='west') hp=100.0 + step=144 action=wait() hp=100.0 + step=145 action=door(target_id='door_3', door_state='close') hp=100.0 + step=146 action=door(target_id='door_4', door_state='open') hp=100.0 + step=147 action=move(direction='south') hp=100.0 + step=148 action=move(direction='east') hp=100.0 + step=149 action=wait() hp=100.0 + step=150 action=wait() hp=100.0 + step=151 action=door(target_id='door_4', door_state='open') hp=100.0 + step=152 action=move(direction='west') hp=100.0 + step=153 action=door(target_id='door_3', door_state='close') hp=100.0 + step=154 action=door(target_id='door_4', door_state='open') hp=100.0 + step=155 action=move(direction='east') hp=100.0 + step=156 action=move(direction='west') hp=100.0 + step=157 action=move(direction='south') hp=100.0 + step=158 action=wait() hp=100.0 + step=159 action=door(target_id='door_4', door_state='open') hp=100.0 + step=160 action=move(direction='east') hp=100.0 + step=161 action=wait() hp=100.0 + step=162 action=wait() hp=100.0 + step=163 action=move(direction='north') hp=100.0 + step=164 action=wait() hp=100.0 + step=165 action=move(direction='north') hp=100.0 + step=166 action=door(target_id='door_3', door_state='close') hp=100.0 + step=167 action=move(direction='west') hp=100.0 + step=168 action=door(target_id='door_3', door_state='close') hp=100.0 + step=169 action=door(target_id='door_3', door_state='close') hp=100.0 + step=170 action=door(target_id='door_4', door_state='open') hp=100.0 + step=171 action=door(target_id='door_4', door_state='open') hp=100.0 + step=172 action=door(target_id='door_3', door_state='close') hp=100.0 + step=173 action=move(direction='west') hp=100.0 + step=174 action=move(direction='west') hp=100.0 + step=175 action=wait() hp=100.0 + step=176 action=move(direction='south') hp=100.0 + step=177 action=move(direction='south') hp=100.0 + step=178 action=move(direction='west') hp=100.0 + step=179 action=wait() hp=100.0 + step=180 action=move(direction='north') hp=100.0 + step=181 action=door(target_id='door_7', door_state='close') hp=100.0 + step=182 action=move(direction='north') hp=100.0 + step=183 action=move(direction='east') hp=100.0 + step=184 action=move(direction='west') hp=100.0 + step=185 action=move(direction='east') hp=100.0 + step=186 action=door(target_id='door_7', door_state='close') hp=100.0 + step=187 action=door(target_id='door_2', door_state='open') hp=100.0 + step=188 action=wait() hp=100.0 + step=189 action=move(direction='west') hp=100.0 + step=190 action=door(target_id='door_2', door_state='open') hp=100.0 + step=191 action=move(direction='west') hp=100.0 + step=192 action=wait() hp=100.0 + step=193 action=door(target_id='door_3', door_state='close') hp=100.0 + step=194 action=door(target_id='door_2', door_state='open') hp=100.0 + step=195 action=door(target_id='door_3', door_state='close') hp=100.0 + step=196 action=move(direction='west') hp=100.0 + step=197 action=move(direction='west') hp=100.0 + step=198 action=door(target_id='door_1', door_state='open') hp=100.0 + step=199 action=door(target_id='door_2', door_state='open') hp=100.0 + step=200 action=move(direction='east') hp=100.0 +ep=0016 [easy ] steps=200 reward= -11.210 evac=0 hp=100.0 suc30=0.62 r30= +4.53 t=9s + step=001 action=move(direction='south') hp=100.0 + step=002 action=wait() hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=move(direction='north') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='north') hp=100.0 + step=010 action=move(direction='south') hp=100.0 + step=011 action=move(direction='east') hp=100.0 + step=012 action=move(direction='north') hp=100.0 + step=013 action=move(direction='north') hp=100.0 + step=014 action=move(direction='south') hp=100.0 + step=015 action=move(direction='west') hp=100.0 + step=016 action=move(direction='west') hp=100.0 + step=017 action=move(direction='north') hp=100.0 + step=018 action=move(direction='west') hp=100.0 + step=019 action=move(direction='south') hp=100.0 + step=020 action=wait() hp=100.0 + step=021 action=move(direction='north') hp=100.0 + step=022 action=move(direction='east') hp=100.0 + step=023 action=move(direction='south') hp=100.0 + step=024 action=move(direction='north') hp=100.0 + step=025 action=move(direction='west') hp=100.0 + step=026 action=move(direction='west') hp=100.0 + step=027 action=move(direction='west') hp=100.0 + step=028 action=move(direction='north') hp=100.0 + step=029 action=wait() hp=100.0 + step=030 action=move(direction='north') hp=100.0 + step=031 action=move(direction='north') hp=100.0 + step=032 action=move(direction='north') hp=100.0 + step=033 action=move(direction='north') hp=100.0 + step=034 action=move(direction='north') hp=100.0 + step=035 action=move(direction='south') hp=100.0 + step=036 action=move(direction='east') hp=100.0 + step=037 action=move(direction='west') hp=100.0 + step=038 action=move(direction='east') hp=100.0 + step=039 action=move(direction='south') hp=100.0 + step=040 action=move(direction='west') hp=100.0 + step=041 action=wait() hp=100.0 + step=042 action=move(direction='north') hp=100.0 + step=043 action=move(direction='east') hp=100.0 + step=044 action=move(direction='north') hp=100.0 + step=045 action=wait() hp=100.0 + step=046 action=move(direction='north') hp=100.0 + step=047 action=move(direction='west') hp=100.0 + step=048 action=move(direction='north') hp=100.0 + step=049 action=wait() hp=100.0 + step=050 action=move(direction='west') hp=100.0 +ep=0017 [easy ] steps=050 reward= +19.920 evac=1 hp=100.0 suc30=0.65 r30= +5.43 t=9s + step=001 action=wait() hp=100.0 + step=002 action=move(direction='south') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=wait() hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=move(direction='south') hp=100.0 + step=007 action=wait() hp=100.0 + step=008 action=wait() hp=100.0 + step=009 action=move(direction='south') hp=100.0 + step=010 action=move(direction='north') hp=100.0 + step=011 action=move(direction='north') hp=100.0 + step=012 action=wait() hp=100.0 + step=013 action=move(direction='north') hp=100.0 + step=014 action=wait() hp=100.0 + step=015 action=wait() hp=100.0 + step=016 action=wait() hp=100.0 + step=017 action=move(direction='south') hp=100.0 + step=018 action=move(direction='south') hp=100.0 + step=019 action=wait() hp=100.0 + step=020 action=move(direction='north') hp=100.0 + step=021 action=move(direction='north') hp=100.0 + step=022 action=wait() hp=100.0 + step=023 action=move(direction='north') hp=100.0 + step=024 action=wait() hp=100.0 + step=025 action=wait() hp=100.0 + step=026 action=move(direction='south') hp=100.0 + step=027 action=wait() hp=100.0 + step=028 action=move(direction='south') hp=100.0 + step=029 action=wait() hp=100.0 + step=030 action=wait() hp=100.0 + step=031 action=wait() hp=100.0 + step=032 action=wait() hp=100.0 + step=033 action=wait() hp=100.0 + step=034 action=move(direction='north') hp=100.0 + step=035 action=wait() hp=100.0 + step=036 action=wait() hp=100.0 + step=037 action=move(direction='south') hp=100.0 + step=038 action=move(direction='south') hp=100.0 + step=039 action=move(direction='north') hp=100.0 + step=040 action=move(direction='north') hp=100.0 + step=041 action=move(direction='north') hp=100.0 + step=042 action=wait() hp=100.0 + step=043 action=move(direction='north') hp=100.0 + step=044 action=move(direction='north') hp=100.0 +ep=0018 [easy ] steps=044 reward= +14.680 evac=1 hp=100.0 suc30=0.67 r30= +5.95 t=9s + step=001 action=wait() hp=100.0 + step=002 action=wait() hp=100.0 + step=003 action=door(target_id='door_1', door_state='close') hp=100.0 + step=004 action=wait() hp=100.0 + step=005 action=wait() hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=door(target_id='door_1', door_state='close') hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=move(direction='west') hp=100.0 +ep=0019 [easy ] steps=009 reward= +17.100 evac=1 hp=100.0 suc30=0.68 r30= +6.53 t=9s + step=001 action=move(direction='north') hp=100.0 + step=002 action=wait() hp=100.0 + step=003 action=move(direction='south') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=move(direction='north') hp=100.0 + step=007 action=move(direction='north') hp=100.0 + step=008 action=wait() hp=100.0 + step=009 action=move(direction='north') hp=100.0 + step=010 action=move(direction='west') hp=100.0 + step=011 action=wait() hp=100.0 + step=012 action=move(direction='east') hp=100.0 + step=013 action=move(direction='east') hp=100.0 + step=014 action=move(direction='west') hp=100.0 + step=015 action=move(direction='west') hp=100.0 + step=016 action=wait() hp=100.0 + step=017 action=move(direction='east') hp=100.0 + step=018 action=wait() hp=100.0 + step=019 action=move(direction='north') hp=100.0 + step=020 action=move(direction='south') hp=100.0 + step=021 action=move(direction='north') hp=100.0 + step=022 action=move(direction='west') hp=100.0 + step=023 action=move(direction='south') hp=100.0 + step=024 action=wait() hp=100.0 + step=025 action=move(direction='north') hp=100.0 + step=026 action=move(direction='west') hp=100.0 + step=027 action=move(direction='east') hp=100.0 + step=028 action=move(direction='west') hp=100.0 + step=029 action=move(direction='west') hp=100.0 + step=030 action=move(direction='south') hp=100.0 + step=031 action=move(direction='north') hp=100.0 + step=032 action=move(direction='east') hp=100.0 + step=033 action=wait() hp=100.0 + step=034 action=wait() hp=100.0 + step=035 action=move(direction='west') hp=100.0 + step=036 action=wait() hp=100.0 + step=037 action=move(direction='west') hp=100.0 + step=038 action=move(direction='east') hp=100.0 + step=039 action=move(direction='west') hp=100.0 + step=040 action=move(direction='west') hp=100.0 + step=041 action=move(direction='west') hp=100.0 + step=042 action=move(direction='south') hp=100.0 + step=043 action=move(direction='north') hp=100.0 + step=044 action=move(direction='west') hp=100.0 + step=045 action=move(direction='west') hp=100.0 + step=046 action=wait() hp=100.0 + step=047 action=wait() hp=100.0 + step=048 action=move(direction='west') hp=100.0 + step=049 action=wait() hp=100.0 + step=050 action=wait() hp=100.0 + step=051 action=wait() hp=100.0 + step=052 action=move(direction='east') hp=100.0 + step=053 action=wait() hp=100.0 + step=054 action=wait() hp=100.0 + step=055 action=move(direction='west') hp=100.0 + step=056 action=move(direction='west') hp=100.0 + step=057 action=move(direction='west') hp=100.0 + step=058 action=wait() hp=100.0 + step=059 action=move(direction='east') hp=100.0 + step=060 action=wait() hp=100.0 + step=061 action=move(direction='south') hp=100.0 + step=062 action=move(direction='north') hp=100.0 + step=063 action=wait() hp=100.0 + step=064 action=move(direction='west') hp=100.0 + step=065 action=wait() hp=100.0 + step=066 action=move(direction='west') hp=100.0 +ep=0020 [easy ] steps=066 reward= +16.890 evac=1 hp=100.0 suc30=0.70 r30= +7.05 t=9s + >> PPO update samples=flushed pi_loss=-0.0216 v_loss=13.0783 entropy=1.5819 kl=0.0026 clip%=0.03 lr=2.73e-04 + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp= 99.5 + step=007 action=move(direction='west') hp= 99.5 + step=008 action=move(direction='west') hp= 99.5 + step=009 action=move(direction='west') hp= 99.5 + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='north') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + ** EVAL [medium] reward=+15.697 success=1.00 steps=7.0 + step=001 action=move(direction='south') hp=100.0 + step=002 action=move(direction='north') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=wait() hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=wait() hp=100.0 + step=008 action=move(direction='east') hp=100.0 + step=009 action=move(direction='east') hp=100.0 + step=010 action=move(direction='west') hp=100.0 + step=011 action=wait() hp=100.0 + step=012 action=wait() hp=100.0 + step=013 action=move(direction='west') hp=100.0 + step=014 action=door(target_id='door_3', door_state='close') hp=100.0 + step=015 action=door(target_id='door_3', door_state='close') hp=100.0 + step=016 action=door(target_id='door_3', door_state='close') hp=100.0 + step=017 action=move(direction='east') hp=100.0 + step=018 action=move(direction='east') hp=100.0 + step=019 action=door(target_id='door_4', door_state='open') hp=100.0 + step=020 action=move(direction='west') hp=100.0 + step=021 action=move(direction='west') hp=100.0 + step=022 action=door(target_id='door_3', door_state='close') hp=100.0 + step=023 action=door(target_id='door_3', door_state='close') hp=100.0 + step=024 action=move(direction='south') hp=100.0 + step=025 action=wait() hp=100.0 + step=026 action=door(target_id='door_3', door_state='close') hp=100.0 + step=027 action=move(direction='south') hp=100.0 + step=028 action=wait() hp=100.0 + step=029 action=move(direction='west') hp=100.0 + step=030 action=move(direction='east') hp=100.0 + step=031 action=move(direction='west') hp=100.0 + step=032 action=move(direction='east') hp=100.0 + step=033 action=door(target_id='door_3', door_state='close') hp=100.0 + step=034 action=door(target_id='door_3', door_state='open') hp=100.0 + step=035 action=door(target_id='door_3', door_state='close') hp=100.0 + step=036 action=wait() hp=100.0 + step=037 action=move(direction='north') hp=100.0 + step=038 action=door(target_id='door_3', door_state='open') hp=100.0 + step=039 action=move(direction='west') hp=100.0 + step=040 action=move(direction='south') hp=100.0 + step=041 action=move(direction='north') hp=100.0 + step=042 action=move(direction='north') hp=100.0 + step=043 action=move(direction='west') hp=100.0 + step=044 action=move(direction='west') hp=100.0 + step=045 action=move(direction='west') hp=100.0 + step=046 action=door(target_id='door_2', door_state='close') hp=100.0 + step=047 action=wait() hp=100.0 + step=048 action=wait() hp=100.0 + step=049 action=wait() hp=100.0 + step=050 action=door(target_id='door_2', door_state='close') hp=100.0 + step=051 action=wait() hp=100.0 + step=052 action=door(target_id='door_2', door_state='close') hp=100.0 + step=053 action=wait() hp=100.0 + step=054 action=wait() hp=100.0 + step=055 action=door(target_id='door_2', door_state='close') hp=100.0 + step=056 action=move(direction='east') hp=100.0 + step=057 action=door(target_id='door_3', door_state='open') hp=100.0 + step=058 action=door(target_id='door_2', door_state='close') hp=100.0 + step=059 action=door(target_id='door_2', door_state='close') hp=100.0 + step=060 action=wait() hp=100.0 + step=061 action=move(direction='east') hp=100.0 + step=062 action=move(direction='east') hp=100.0 + step=063 action=door(target_id='door_3', door_state='open') hp=100.0 + step=064 action=wait() hp=100.0 + step=065 action=move(direction='west') hp=100.0 + step=066 action=door(target_id='door_3', door_state='open') hp=100.0 + step=067 action=move(direction='west') hp=100.0 + step=068 action=move(direction='north') hp=100.0 + step=069 action=move(direction='north') hp=100.0 + step=070 action=wait() hp=100.0 + step=071 action=move(direction='north') hp=100.0 + step=072 action=wait() hp=100.0 + step=073 action=move(direction='south') hp=100.0 + step=074 action=move(direction='north') hp=100.0 + step=075 action=wait() hp=100.0 + step=076 action=wait() hp=100.0 + step=077 action=move(direction='north') hp=100.0 + step=078 action=move(direction='south') hp=100.0 + step=079 action=move(direction='north') hp=100.0 + step=080 action=move(direction='north') hp=100.0 + step=081 action=move(direction='north') hp=100.0 + step=082 action=move(direction='north') hp=100.0 +ep=0021 [easy ] steps=082 reward= +12.220 evac=1 hp=100.0 suc30=0.71 r30= +7.30 t=10s + step=001 action=door(target_id='door_2', door_state='close') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=wait() hp=100.0 + step=004 action=door(target_id='door_2', door_state='close') hp=100.0 + step=005 action=door(target_id='door_2', door_state='close') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=door(target_id='door_2', door_state='close') hp=100.0 + step=008 action=door(target_id='door_3', door_state='close') hp=100.0 + step=009 action=wait() hp=100.0 + step=010 action=wait() hp=100.0 + step=011 action=wait() hp=100.0 + step=012 action=move(direction='west') hp=100.0 + step=013 action=move(direction='west') hp=100.0 + step=014 action=move(direction='west') hp=100.0 + step=015 action=wait() hp=100.0 + step=016 action=move(direction='east') hp=100.0 + step=017 action=move(direction='west') hp=100.0 + step=018 action=door(target_id='door_2', door_state='open') hp=100.0 + step=019 action=wait() hp=100.0 + step=020 action=wait() hp=100.0 + step=021 action=move(direction='east') hp=100.0 + step=022 action=move(direction='west') hp=100.0 + step=023 action=move(direction='west') hp=100.0 + step=024 action=move(direction='west') hp=100.0 + step=025 action=door(target_id='door_5', door_state='close') hp=100.0 + step=026 action=move(direction='west') hp=100.0 + step=027 action=wait() hp=100.0 + step=028 action=move(direction='south') hp=100.0 + step=029 action=wait() hp=100.0 + step=030 action=move(direction='north') hp=100.0 + step=031 action=move(direction='south') hp=100.0 + step=032 action=door(target_id='door_5', door_state='close') hp=100.0 + step=033 action=move(direction='east') hp=100.0 + step=034 action=move(direction='south') hp=100.0 + step=035 action=move(direction='north') hp=100.0 + step=036 action=move(direction='north') hp=100.0 + step=037 action=move(direction='west') hp=100.0 + step=038 action=wait() hp=100.0 + step=039 action=door(target_id='door_1', door_state='close') hp=100.0 + step=040 action=move(direction='east') hp=100.0 + step=041 action=wait() hp=100.0 + step=042 action=wait() hp=100.0 + step=043 action=move(direction='east') hp=100.0 + step=044 action=move(direction='east') hp=100.0 + step=045 action=wait() hp=100.0 + step=046 action=door(target_id='door_2', door_state='open') hp=100.0 + step=047 action=door(target_id='door_2', door_state='open') hp=100.0 + step=048 action=move(direction='west') hp=100.0 + step=049 action=move(direction='west') hp=100.0 + step=050 action=move(direction='south') hp=100.0 + step=051 action=wait() hp=100.0 + step=052 action=move(direction='west') hp=100.0 + step=053 action=move(direction='south') hp=100.0 + step=054 action=move(direction='north') hp=100.0 + step=055 action=move(direction='west') hp=100.0 +ep=0022 [easy ] steps=055 reward= +16.940 evac=1 hp=100.0 suc30=0.73 r30= +7.74 t=10s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='east') hp=100.0 + step=007 action=move(direction='south') hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='west') hp=100.0 + step=011 action=move(direction='south') hp=100.0 + step=012 action=move(direction='north') hp=100.0 + step=013 action=move(direction='west') hp=100.0 + step=014 action=move(direction='east') hp=100.0 + step=015 action=move(direction='south') hp=100.0 + step=016 action=move(direction='north') hp=100.0 + step=017 action=move(direction='west') hp=100.0 + step=018 action=move(direction='south') hp=100.0 + step=019 action=move(direction='north') hp=100.0 + step=020 action=move(direction='west') hp=100.0 +ep=0023 [easy ] steps=020 reward= +19.400 evac=1 hp=100.0 suc30=0.74 r30= +8.24 t=10s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='east') hp=100.0 + step=003 action=wait() hp=100.0 + step=004 action=door(target_id='door_2', door_state='close') hp=100.0 + step=005 action=door(target_id='door_2', door_state='close') hp=100.0 + step=006 action=door(target_id='door_2', door_state='close') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=wait() hp=100.0 + step=009 action=door(target_id='door_2', door_state='close') hp=100.0 + step=010 action=wait() hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=door(target_id='door_2', door_state='close') hp=100.0 + step=013 action=move(direction='west') hp=100.0 + step=014 action=move(direction='west') hp=100.0 + step=015 action=move(direction='west') hp=100.0 + step=016 action=move(direction='west') hp=100.0 +ep=0024 [easy ] steps=016 reward= +18.470 evac=1 hp=100.0 suc30=0.75 r30= +8.67 t=10s + step=001 action=door(target_id='door_3', door_state='open') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=door(target_id='door_3', door_state='open') hp=100.0 + step=004 action=wait() hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=door(target_id='door_2', door_state='open') hp=100.0 + step=007 action=move(direction='north') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=wait() hp=100.0 + step=011 action=door(target_id='door_2', door_state='open') hp=100.0 + step=012 action=move(direction='west') hp=100.0 + step=013 action=wait() hp=100.0 + step=014 action=wait() hp=100.0 + step=015 action=move(direction='west') hp=100.0 + step=016 action=move(direction='west') hp=100.0 + step=017 action=door(target_id='door_5', door_state='close') hp=100.0 + step=018 action=move(direction='south') hp=100.0 + step=019 action=door(target_id='door_1', door_state='open') hp=100.0 + step=020 action=move(direction='west') hp=100.0 + step=021 action=move(direction='west') hp=100.0 +ep=0025 [easy ] steps=021 reward= +18.470 evac=1 hp=100.0 suc30=0.76 r30= +9.06 t=11s + >> PPO update samples=flushed pi_loss=-0.0041 v_loss=31.9513 entropy=1.4992 kl=0.0007 clip%=0.00 lr=2.66e-04 + [ckpt] saved -> artifacts/pyre_ppo_http_ckpt.pt + step=001 action=move(direction='north') hp=100.0 + step=002 action=move(direction='north') hp=100.0 + step=003 action=move(direction='east') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=move(direction='south') hp=100.0 + step=007 action=wait() hp=100.0 + step=008 action=wait() hp=100.0 + step=009 action=move(direction='east') hp=100.0 + step=010 action=move(direction='west') hp=100.0 + step=011 action=wait() hp=100.0 + step=012 action=move(direction='east') hp=100.0 + step=013 action=move(direction='west') hp=100.0 + step=014 action=move(direction='north') hp=100.0 + step=015 action=move(direction='west') hp=100.0 + step=016 action=wait() hp=100.0 + step=017 action=move(direction='south') hp=100.0 + step=018 action=wait() hp=100.0 + step=019 action=move(direction='south') hp=100.0 + step=020 action=wait() hp=100.0 + step=021 action=move(direction='west') hp=100.0 + step=022 action=move(direction='east') hp=100.0 + step=023 action=move(direction='west') hp=100.0 + step=024 action=move(direction='south') hp=100.0 + step=025 action=move(direction='west') hp=100.0 + step=026 action=move(direction='north') hp=100.0 + step=027 action=move(direction='north') hp=100.0 + step=028 action=move(direction='west') hp=100.0 + step=029 action=move(direction='north') hp=100.0 + step=030 action=move(direction='north') hp=100.0 + step=031 action=move(direction='west') hp=100.0 + step=032 action=move(direction='north') hp=100.0 + step=033 action=move(direction='east') hp=100.0 + step=034 action=move(direction='west') hp=100.0 + step=035 action=move(direction='west') hp=100.0 + step=036 action=move(direction='north') hp=100.0 + step=037 action=move(direction='west') hp=100.0 + step=038 action=wait() hp=100.0 + step=039 action=move(direction='west') hp=100.0 + step=040 action=move(direction='west') hp=100.0 + step=041 action=move(direction='east') hp=100.0 + step=042 action=move(direction='west') hp=100.0 + step=043 action=move(direction='west') hp=100.0 + step=044 action=wait() hp=100.0 + step=045 action=move(direction='west') hp=100.0 + step=046 action=wait() hp=100.0 + step=047 action=move(direction='east') hp=100.0 + step=048 action=wait() hp=100.0 + step=049 action=move(direction='south') hp=100.0 + step=050 action=move(direction='west') hp=100.0 + step=051 action=move(direction='north') hp=100.0 + step=052 action=move(direction='west') hp=100.0 + step=053 action=move(direction='south') hp=100.0 + step=054 action=move(direction='south') hp=100.0 + step=055 action=wait() hp=100.0 + step=056 action=move(direction='north') hp=100.0 + step=057 action=move(direction='north') hp=100.0 + step=058 action=wait() hp=100.0 + step=059 action=wait() hp=100.0 + step=060 action=move(direction='west') hp=100.0 +ep=0026 [easy ] steps=060 reward= +19.690 evac=1 hp=100.0 suc30=0.77 r30= +9.47 t=11s + step=001 action=move(direction='north') hp=100.0 + step=002 action=move(direction='north') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=wait() hp=100.0 + step=006 action=wait() hp=100.0 + step=007 action=wait() hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=move(direction='north') hp=100.0 +ep=0027 [easy ] steps=009 reward= +18.380 evac=1 hp=100.0 suc30=0.78 r30= +9.80 t=11s + step=001 action=move(direction='north') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=door(target_id='door_3', door_state='close') hp=100.0 + step=005 action=wait() hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='north') hp=100.0 + step=008 action=move(direction='south') hp=100.0 + step=009 action=wait() hp=100.0 + step=010 action=door(target_id='door_3', door_state='close') hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=door(target_id='door_3', door_state='close') hp=100.0 + step=013 action=move(direction='north') hp=100.0 + step=014 action=move(direction='east') hp=100.0 + step=015 action=move(direction='south') hp=100.0 + step=016 action=move(direction='east') hp=100.0 + step=017 action=wait() hp=100.0 + step=018 action=move(direction='west') hp=100.0 + step=019 action=move(direction='east') hp=100.0 + step=020 action=wait() hp=100.0 + step=021 action=door(target_id='door_8', door_state='close') hp=100.0 + step=022 action=move(direction='east') hp=100.0 + step=023 action=move(direction='north') hp=100.0 + step=024 action=move(direction='north') hp=100.0 + step=025 action=move(direction='west') hp=100.0 + step=026 action=move(direction='south') hp=100.0 + step=027 action=move(direction='west') hp=100.0 + step=028 action=door(target_id='door_4', door_state='open') hp=100.0 + step=029 action=move(direction='west') hp=100.0 + step=030 action=move(direction='west') hp=100.0 + step=031 action=door(target_id='door_3', door_state='close') hp=100.0 + step=032 action=move(direction='north') hp=100.0 + step=033 action=door(target_id='door_3', door_state='close') hp=100.0 + step=034 action=move(direction='west') hp=100.0 + step=035 action=move(direction='west') hp=100.0 + step=036 action=move(direction='south') hp=100.0 + step=037 action=door(target_id='door_6', door_state='close') hp=100.0 + step=038 action=door(target_id='door_3', door_state='open') hp=100.0 + step=039 action=door(target_id='door_3', door_state='open') hp=100.0 + step=040 action=move(direction='west') hp=100.0 + step=041 action=move(direction='west') hp=100.0 + step=042 action=move(direction='north') hp=100.0 + step=043 action=door(target_id='door_2', door_state='open') hp=100.0 + step=044 action=door(target_id='door_2', door_state='close') hp=100.0 + step=045 action=move(direction='south') hp=100.0 + step=046 action=door(target_id='door_2', door_state='open') hp=100.0 + step=047 action=move(direction='west') hp=100.0 + step=048 action=move(direction='south') hp=100.0 + step=049 action=door(target_id='door_6', door_state='close') hp=100.0 + step=050 action=move(direction='north') hp=100.0 + step=051 action=move(direction='west') hp=100.0 + step=052 action=move(direction='west') hp=100.0 + step=053 action=door(target_id='door_1', door_state='close') hp=100.0 + step=054 action=move(direction='east') hp=100.0 + step=055 action=move(direction='north') hp=100.0 + step=056 action=door(target_id='door_2', door_state='open') hp=100.0 + step=057 action=door(target_id='door_2', door_state='open') hp=100.0 + step=058 action=door(target_id='door_1', door_state='open') hp=100.0 + step=059 action=move(direction='west') hp=100.0 + step=060 action=door(target_id='door_2', door_state='open') hp=100.0 + step=061 action=move(direction='west') hp=100.0 + step=062 action=wait() hp=100.0 + step=063 action=wait() hp=100.0 + step=064 action=move(direction='east') hp=100.0 + step=065 action=move(direction='east') hp=100.0 + step=066 action=move(direction='east') hp=100.0 + step=067 action=move(direction='east') hp=100.0 + step=068 action=move(direction='south') hp=100.0 + step=069 action=door(target_id='door_1', door_state='close') hp=100.0 + step=070 action=door(target_id='door_2', door_state='open') hp=100.0 + step=071 action=move(direction='east') hp=100.0 + step=072 action=move(direction='north') hp=100.0 + step=073 action=wait() hp=100.0 + step=074 action=move(direction='west') hp=100.0 + step=075 action=door(target_id='door_1', door_state='close') hp=100.0 + step=076 action=wait() hp=100.0 + step=077 action=move(direction='west') hp=100.0 + step=078 action=move(direction='south') hp=100.0 + step=079 action=move(direction='south') hp=100.0 + step=080 action=move(direction='east') hp=100.0 + step=081 action=door(target_id='door_2', door_state='open') hp=100.0 + step=082 action=wait() hp=100.0 + step=083 action=move(direction='south') hp=100.0 + step=084 action=door(target_id='door_2', door_state='open') hp=100.0 + step=085 action=move(direction='east') hp=100.0 + step=086 action=move(direction='east') hp=100.0 + step=087 action=move(direction='north') hp=100.0 + step=088 action=door(target_id='door_2', door_state='open') hp=100.0 + step=089 action=move(direction='west') hp=100.0 + step=090 action=door(target_id='door_2', door_state='open') hp=100.0 + step=091 action=move(direction='east') hp=100.0 + step=092 action=move(direction='east') hp=100.0 + step=093 action=move(direction='west') hp=100.0 + step=094 action=move(direction='north') hp=100.0 + step=095 action=move(direction='west') hp=100.0 + step=096 action=move(direction='north') hp=100.0 + step=097 action=door(target_id='door_1', door_state='close') hp=100.0 + step=098 action=door(target_id='door_2', door_state='open') hp=100.0 + step=099 action=door(target_id='door_1', door_state='close') hp=100.0 + step=100 action=move(direction='west') hp=100.0 + step=101 action=move(direction='west') hp=100.0 + step=102 action=wait() hp=100.0 + step=103 action=wait() hp=100.0 + step=104 action=wait() hp=100.0 + step=105 action=door(target_id='door_2', door_state='close') hp=100.0 + step=106 action=move(direction='west') hp=100.0 + step=107 action=move(direction='west') hp=100.0 + step=108 action=move(direction='west') hp=100.0 + step=109 action=wait() hp=100.0 + step=110 action=wait() hp=100.0 + step=111 action=wait() hp=100.0 + step=112 action=door(target_id='door_1', door_state='close') hp=100.0 + step=113 action=move(direction='east') hp=100.0 + step=114 action=door(target_id='door_1', door_state='open') hp=100.0 + step=115 action=door(target_id='door_5', door_state='close') hp=100.0 + step=116 action=move(direction='west') hp=100.0 + step=117 action=door(target_id='door_1', door_state='close') hp=100.0 + step=118 action=wait() hp=100.0 + step=119 action=move(direction='east') hp=100.0 + step=120 action=move(direction='east') hp=100.0 + step=121 action=move(direction='west') hp=100.0 + step=122 action=door(target_id='door_2', door_state='close') hp=100.0 + step=123 action=door(target_id='door_2', door_state='close') hp=100.0 + step=124 action=door(target_id='door_1', door_state='open') hp=100.0 + step=125 action=move(direction='west') hp=100.0 + step=126 action=wait() hp=100.0 + step=127 action=door(target_id='door_1', door_state='close') hp=100.0 + step=128 action=move(direction='east') hp=100.0 + step=129 action=move(direction='west') hp=100.0 + step=130 action=wait() hp=100.0 + step=131 action=wait() hp=100.0 + step=132 action=move(direction='south') hp=100.0 + step=133 action=move(direction='west') hp=100.0 +ep=0028 [easy ] steps=133 reward= +13.980 evac=1 hp=100.0 suc30=0.79 r30= +9.95 t=12s + step=001 action=move(direction='south') hp=100.0 + step=002 action=move(direction='north') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='south') hp=100.0 + step=005 action=wait() hp=100.0 + step=006 action=move(direction='east') hp=100.0 + step=007 action=wait() hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=wait() hp=100.0 + step=010 action=move(direction='east') hp=100.0 + step=011 action=move(direction='north') hp=100.0 + step=012 action=move(direction='west') hp=100.0 + step=013 action=move(direction='west') hp=100.0 + step=014 action=move(direction='north') hp=100.0 + step=015 action=move(direction='north') hp=100.0 + step=016 action=move(direction='west') hp=100.0 + step=017 action=wait() hp=100.0 + step=018 action=wait() hp=100.0 + step=019 action=move(direction='north') hp=100.0 + step=020 action=move(direction='south') hp=100.0 + step=021 action=wait() hp=100.0 + step=022 action=move(direction='south') hp=100.0 + step=023 action=wait() hp=100.0 + step=024 action=wait() hp=100.0 + step=025 action=move(direction='south') hp=100.0 + step=026 action=move(direction='north') hp=100.0 + step=027 action=move(direction='south') hp=100.0 + step=028 action=move(direction='west') hp=100.0 + step=029 action=move(direction='north') hp=100.0 + step=030 action=move(direction='west') hp=100.0 + step=031 action=move(direction='north') hp=100.0 + step=032 action=move(direction='north') hp=100.0 + step=033 action=move(direction='south') hp=100.0 + step=034 action=wait() hp=100.0 + step=035 action=wait() hp=100.0 + step=036 action=move(direction='west') hp=100.0 + step=037 action=move(direction='west') hp=100.0 + step=038 action=wait() hp=100.0 + step=039 action=move(direction='north') hp=100.0 + step=040 action=move(direction='north') hp=100.0 + step=041 action=wait() hp=100.0 + step=042 action=move(direction='east') hp=100.0 + step=043 action=move(direction='west') hp=100.0 + step=044 action=wait() hp=100.0 + step=045 action=move(direction='north') hp=100.0 + step=046 action=move(direction='north') hp=100.0 + step=047 action=move(direction='north') hp=100.0 + step=048 action=wait() hp=100.0 + step=049 action=move(direction='west') hp=100.0 + step=050 action=move(direction='south') hp=100.0 + step=051 action=move(direction='west') hp=100.0 + step=052 action=move(direction='north') hp=100.0 + step=053 action=move(direction='north') hp=100.0 + step=054 action=move(direction='north') hp=100.0 + step=055 action=move(direction='east') hp=100.0 + step=056 action=wait() hp=100.0 + step=057 action=move(direction='west') hp=100.0 + step=058 action=wait() hp=100.0 + step=059 action=move(direction='east') hp=100.0 + step=060 action=move(direction='west') hp=100.0 + step=061 action=move(direction='west') hp=100.0 + step=062 action=wait() hp=100.0 + step=063 action=wait() hp=100.0 + step=064 action=move(direction='west') hp=100.0 + step=065 action=move(direction='east') hp=100.0 + step=066 action=wait() hp=100.0 + step=067 action=move(direction='west') hp=100.0 + step=068 action=move(direction='west') hp=100.0 + step=069 action=move(direction='west') hp=100.0 + step=070 action=move(direction='south') hp=100.0 + step=071 action=move(direction='north') hp=100.0 + step=072 action=move(direction='west') hp=100.0 +ep=0029 [easy ] steps=072 reward= +20.350 evac=1 hp=100.0 suc30=0.79 r30= +10.31 t=12s + step=001 action=move(direction='north') hp=100.0 + step=002 action=wait() hp=100.0 + step=003 action=move(direction='south') hp=100.0 + step=004 action=wait() hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=move(direction='south') hp=100.0 + step=007 action=move(direction='north') hp=100.0 + step=008 action=wait() hp=100.0 + step=009 action=wait() hp=100.0 + step=010 action=move(direction='north') hp=100.0 + step=011 action=wait() hp=100.0 + step=012 action=move(direction='north') hp=100.0 + step=013 action=wait() hp=100.0 + step=014 action=move(direction='north') hp=100.0 + step=015 action=move(direction='north') hp=100.0 + step=016 action=move(direction='north') hp=100.0 +ep=0030 [easy ] steps=016 reward= +17.630 evac=1 hp=100.0 suc30=0.80 r30= +10.55 t=12s + >> PPO update samples=flushed pi_loss=-0.0072 v_loss=14.1700 entropy=1.4895 kl=0.0035 clip%=0.05 lr=2.60e-04 + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='south') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=wait() hp=100.0 + step=006 action=door(target_id='door_1', door_state='close') hp=100.0 + step=007 action=move(direction='north') hp=100.0 + step=008 action=wait() hp=100.0 + step=009 action=move(direction='north') hp=100.0 + step=010 action=move(direction='north') hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=move(direction='south') hp=100.0 + step=013 action=door(target_id='door_1', door_state='close') hp=100.0 + step=014 action=door(target_id='door_1', door_state='close') hp=100.0 + step=015 action=move(direction='north') hp=100.0 + step=016 action=move(direction='north') hp=100.0 + step=017 action=door(target_id='door_1', door_state='close') hp=100.0 + step=018 action=wait() hp=100.0 + step=019 action=move(direction='east') hp=100.0 + step=020 action=move(direction='west') hp=100.0 + step=021 action=wait() hp=100.0 + step=022 action=wait() hp=100.0 + step=023 action=move(direction='south') hp=100.0 + step=024 action=door(target_id='door_5', door_state='close') hp=100.0 + step=025 action=move(direction='west') hp=100.0 +ep=0031 [easy ] steps=025 reward= +18.650 evac=1 hp=100.0 suc30=0.83 r30= +11.94 t=13s + step=001 action=move(direction='north') hp=100.0 + step=002 action=wait() hp=100.0 + step=003 action=move(direction='east') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=move(direction='east') hp=100.0 + step=006 action=move(direction='north') hp=100.0 + step=007 action=move(direction='north') hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='west') hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=move(direction='west') hp=100.0 + step=013 action=move(direction='west') hp=100.0 + step=014 action=move(direction='west') hp=100.0 + step=015 action=move(direction='south') hp=100.0 + step=016 action=wait() hp=100.0 + step=017 action=move(direction='west') hp=100.0 + step=018 action=move(direction='north') hp=100.0 + step=019 action=wait() hp=100.0 + step=020 action=move(direction='north') hp=100.0 + step=021 action=move(direction='north') hp=100.0 + step=022 action=wait() hp=100.0 + step=023 action=wait() hp=100.0 + step=024 action=move(direction='west') hp=100.0 + step=025 action=move(direction='north') hp=100.0 + step=026 action=move(direction='west') hp=100.0 + step=027 action=move(direction='north') hp=100.0 + step=028 action=wait() hp=100.0 + step=029 action=move(direction='west') hp=100.0 + step=030 action=move(direction='south') hp=100.0 + step=031 action=wait() hp=100.0 + step=032 action=move(direction='north') hp=100.0 + step=033 action=move(direction='west') hp=100.0 + step=034 action=move(direction='east') hp=100.0 + step=035 action=move(direction='south') hp=100.0 + step=036 action=move(direction='west') hp=100.0 + step=037 action=move(direction='north') hp=100.0 + step=038 action=move(direction='west') hp=100.0 + step=039 action=move(direction='west') hp=100.0 + step=040 action=move(direction='west') hp=100.0 +ep=0032 [easy ] steps=040 reward= +21.890 evac=1 hp=100.0 suc30=0.83 r30= +12.11 t=13s + step=001 action=wait() hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=door(target_id='door_2', door_state='close') hp=100.0 + step=004 action=door(target_id='door_2', door_state='close') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=wait() hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='west') hp=100.0 +ep=0033 [easy ] steps=010 reward= +18.370 evac=1 hp=100.0 suc30=0.87 r30= +13.29 t=13s + step=001 action=wait() hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=wait() hp=100.0 + step=004 action=wait() hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=door(target_id='door_6', door_state='close') hp=100.0 + step=008 action=move(direction='east') hp=100.0 + step=009 action=move(direction='north') hp=100.0 + step=010 action=move(direction='west') hp=100.0 + step=011 action=move(direction='south') hp=100.0 + step=012 action=wait() hp=100.0 + step=013 action=move(direction='west') hp=100.0 + step=014 action=door(target_id='door_2', door_state='close') hp=100.0 + step=015 action=move(direction='south') hp=100.0 + step=016 action=wait() hp=100.0 + step=017 action=move(direction='north') hp=100.0 + step=018 action=move(direction='north') hp=100.0 + step=019 action=move(direction='west') hp=100.0 + step=020 action=door(target_id='door_2', door_state='close') hp=100.0 + step=021 action=move(direction='north') hp=100.0 + step=022 action=move(direction='west') hp=100.0 + step=023 action=wait() hp=100.0 + step=024 action=move(direction='west') hp=100.0 + step=025 action=wait() hp=100.0 + step=026 action=door(target_id='door_1', door_state='close') hp=100.0 + step=027 action=door(target_id='door_2', door_state='close') hp=100.0 + step=028 action=wait() hp=100.0 + step=029 action=door(target_id='door_1', door_state='open') hp=100.0 + step=030 action=door(target_id='door_2', door_state='close') hp=100.0 + step=031 action=move(direction='west') hp=100.0 + step=032 action=door(target_id='door_2', door_state='close') hp=100.0 + step=033 action=door(target_id='door_2', door_state='close') hp=100.0 + step=034 action=door(target_id='door_1', door_state='close') hp=100.0 + step=035 action=door(target_id='door_2', door_state='close') hp=100.0 + step=036 action=door(target_id='door_5', door_state='open') hp=100.0 + step=037 action=move(direction='west') hp=100.0 + step=038 action=move(direction='east') hp=100.0 + step=039 action=door(target_id='door_2', door_state='close') hp=100.0 + step=040 action=door(target_id='door_2', door_state='close') hp=100.0 + step=041 action=door(target_id='door_2', door_state='close') hp=100.0 + step=042 action=wait() hp=100.0 + step=043 action=door(target_id='door_2', door_state='close') hp=100.0 + step=044 action=move(direction='west') hp=100.0 + step=045 action=door(target_id='door_1', door_state='open') hp=100.0 + step=046 action=wait() hp=100.0 + step=047 action=wait() hp=100.0 + step=048 action=wait() hp=100.0 + step=049 action=door(target_id='door_1', door_state='close') hp=100.0 + step=050 action=wait() hp=100.0 + step=051 action=move(direction='east') hp=100.0 + step=052 action=move(direction='west') hp=100.0 + step=053 action=wait() hp=100.0 + step=054 action=door(target_id='door_1', door_state='open') hp=100.0 + step=055 action=wait() hp=100.0 + step=056 action=move(direction='south') hp=100.0 + step=057 action=move(direction='west') hp=100.0 +ep=0034 [easy ] steps=057 reward= +15.420 evac=1 hp=100.0 suc30=0.90 r30= +14.07 t=13s + step=001 action=move(direction='west') hp=100.0 + step=002 action=wait() hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='south') hp=100.0 + step=005 action=wait() hp=100.0 + step=006 action=move(direction='south') hp=100.0 + step=007 action=move(direction='north') hp=100.0 + step=008 action=wait() hp=100.0 + step=009 action=move(direction='east') hp=100.0 + step=010 action=move(direction='north') hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=move(direction='north') hp=100.0 + step=013 action=move(direction='north') hp=100.0 + step=014 action=wait() hp=100.0 + step=015 action=move(direction='west') hp=100.0 +ep=0035 [easy ] steps=015 reward= +17.990 evac=1 hp=100.0 suc30=0.90 r30= +14.10 t=13s + >> PPO update samples=flushed pi_loss=-0.0054 v_loss=19.6221 entropy=1.4137 kl=0.0007 clip%=0.00 lr=2.53e-04 + step=001 action=wait() hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=wait() hp=100.0 + step=005 action=move(direction='east') hp=100.0 + step=006 action=move(direction='north') hp=100.0 + step=007 action=wait() hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=move(direction='north') hp=100.0 + step=010 action=move(direction='north') hp=100.0 + step=011 action=move(direction='north') hp=100.0 + step=012 action=move(direction='north') hp=100.0 + step=013 action=wait() hp=100.0 + step=014 action=wait() hp=100.0 + step=015 action=move(direction='north') hp=100.0 +ep=0036 [easy ] steps=015 reward= +18.250 evac=1 hp=100.0 suc30=0.90 r30= +14.13 t=13s + step=001 action=door(target_id='door_2', door_state='open') hp=100.0 + step=002 action=door(target_id='door_2', door_state='open') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=door(target_id='door_1', door_state='close') hp=100.0 + step=007 action=door(target_id='door_1', door_state='open') hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=move(direction='south') hp=100.0 + step=010 action=move(direction='north') hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=door(target_id='door_1', door_state='close') hp=100.0 + step=013 action=wait() hp=100.0 + step=014 action=move(direction='east') hp=100.0 + step=015 action=move(direction='north') hp=100.0 + step=016 action=move(direction='west') hp=100.0 + step=017 action=wait() hp=100.0 + step=018 action=move(direction='north') hp=100.0 + step=019 action=door(target_id='door_1', door_state='open') hp=100.0 + step=020 action=wait() hp=100.0 + step=021 action=wait() hp=100.0 + step=022 action=door(target_id='door_1', door_state='open') hp=100.0 + step=023 action=door(target_id='door_1', door_state='open') hp=100.0 + step=024 action=wait() hp=100.0 + step=025 action=wait() hp=100.0 + step=026 action=wait() hp=100.0 + step=027 action=wait() hp=100.0 + step=028 action=wait() hp=100.0 + step=029 action=wait() hp=100.0 + step=030 action=door(target_id='door_1', door_state='open') hp=100.0 + step=031 action=wait() hp=100.0 + step=032 action=door(target_id='door_1', door_state='open') hp=100.0 + step=033 action=move(direction='east') hp=100.0 + step=034 action=move(direction='west') hp=100.0 + step=035 action=move(direction='east') hp=100.0 + step=036 action=move(direction='west') hp=100.0 + step=037 action=move(direction='east') hp=100.0 + step=038 action=move(direction='west') hp=100.0 + step=039 action=move(direction='south') hp=100.0 + step=040 action=move(direction='north') hp=100.0 + step=041 action=move(direction='south') hp=100.0 + step=042 action=move(direction='north') hp=100.0 + step=043 action=wait() hp=100.0 + step=044 action=move(direction='east') hp=100.0 + step=045 action=wait() hp=100.0 + step=046 action=move(direction='west') hp=100.0 + step=047 action=wait() hp=100.0 + step=048 action=move(direction='south') hp=100.0 + step=049 action=move(direction='north') hp=100.0 + step=050 action=door(target_id='door_1', door_state='open') hp=100.0 + step=051 action=door(target_id='door_1', door_state='open') hp=100.0 + step=052 action=move(direction='east') hp=100.0 + step=053 action=wait() hp=100.0 + step=054 action=move(direction='west') hp=100.0 + step=055 action=move(direction='south') hp=100.0 + step=056 action=move(direction='north') hp=100.0 + step=057 action=wait() hp=100.0 + step=058 action=move(direction='south') hp=100.0 + step=059 action=move(direction='north') hp=100.0 + step=060 action=wait() hp=100.0 + step=061 action=wait() hp=100.0 + step=062 action=wait() hp=100.0 + step=063 action=move(direction='east') hp=100.0 + step=064 action=wait() hp=100.0 + step=065 action=move(direction='west') hp=100.0 + step=066 action=door(target_id='door_1', door_state='open') hp=100.0 + step=067 action=move(direction='south') hp=100.0 + step=068 action=wait() hp=100.0 + step=069 action=move(direction='north') hp=100.0 + step=070 action=door(target_id='door_1', door_state='open') hp=100.0 + step=071 action=wait() hp=100.0 + step=072 action=move(direction='east') hp=100.0 + step=073 action=move(direction='west') hp=100.0 + step=074 action=move(direction='south') hp=100.0 + step=075 action=move(direction='north') hp=100.0 + step=076 action=move(direction='east') hp=100.0 + step=077 action=wait() hp=100.0 + step=078 action=move(direction='west') hp=100.0 + step=079 action=door(target_id='door_1', door_state='open') hp=100.0 + step=080 action=door(target_id='door_1', door_state='open') hp=100.0 + step=081 action=move(direction='south') hp=100.0 + step=082 action=move(direction='north') hp=100.0 + step=083 action=wait() hp=100.0 + step=084 action=wait() hp=100.0 + step=085 action=wait() hp=100.0 + step=086 action=wait() hp=100.0 + step=087 action=door(target_id='door_1', door_state='open') hp=100.0 + step=088 action=move(direction='south') hp=100.0 + step=089 action=move(direction='north') hp=100.0 + step=090 action=wait() hp=100.0 + step=091 action=move(direction='east') hp=100.0 + step=092 action=move(direction='west') hp=100.0 + step=093 action=move(direction='east') hp=100.0 + step=094 action=wait() hp=100.0 + step=095 action=move(direction='west') hp=100.0 + step=096 action=door(target_id='door_1', door_state='open') hp=100.0 + step=097 action=wait() hp=100.0 + step=098 action=door(target_id='door_1', door_state='open') hp=100.0 + step=099 action=move(direction='east') hp=100.0 + step=100 action=move(direction='west') hp=100.0 + step=101 action=move(direction='east') hp=100.0 + step=102 action=move(direction='west') hp=100.0 + step=103 action=door(target_id='door_1', door_state='open') hp=100.0 + step=104 action=wait() hp=100.0 + step=105 action=door(target_id='door_1', door_state='open') hp=100.0 + step=106 action=move(direction='south') hp=100.0 + step=107 action=wait() hp=100.0 + step=108 action=move(direction='north') hp=100.0 + step=109 action=wait() hp=100.0 + step=110 action=wait() hp=100.0 + step=111 action=wait() hp=100.0 + step=112 action=door(target_id='door_1', door_state='open') hp=100.0 + step=113 action=move(direction='east') hp=100.0 + step=114 action=move(direction='west') hp=100.0 + step=115 action=wait() hp=100.0 + step=116 action=door(target_id='door_1', door_state='open') hp=100.0 + step=117 action=move(direction='south') hp=100.0 + step=118 action=wait() hp=100.0 + step=119 action=move(direction='east') hp=100.0 + step=120 action=move(direction='east') hp=100.0 + step=121 action=wait() hp=100.0 + step=122 action=move(direction='west') hp=100.0 + step=123 action=move(direction='west') hp=100.0 + step=124 action=move(direction='north') hp=100.0 + step=125 action=door(target_id='door_1', door_state='open') hp=100.0 + step=126 action=wait() hp=100.0 + step=127 action=wait() hp=100.0 + step=128 action=move(direction='south') hp=100.0 + step=129 action=move(direction='north') hp=100.0 + step=130 action=wait() hp=100.0 + step=131 action=wait() hp=100.0 + step=132 action=wait() hp=100.0 + step=133 action=door(target_id='door_1', door_state='open') hp=100.0 + step=134 action=door(target_id='door_1', door_state='open') hp=100.0 + step=135 action=door(target_id='door_1', door_state='open') hp=100.0 + step=136 action=move(direction='east') hp=100.0 + step=137 action=move(direction='west') hp=100.0 + step=138 action=move(direction='east') hp=100.0 + step=139 action=move(direction='west') hp=100.0 + step=140 action=wait() hp=100.0 + step=141 action=door(target_id='door_1', door_state='open') hp=100.0 + step=142 action=door(target_id='door_1', door_state='open') hp=100.0 + step=143 action=move(direction='east') hp=100.0 + step=144 action=wait() hp=100.0 + step=145 action=wait() hp=100.0 + step=146 action=move(direction='west') hp=100.0 + step=147 action=wait() hp=100.0 + step=148 action=wait() hp=100.0 + step=149 action=move(direction='east') hp=100.0 + step=150 action=move(direction='west') hp=100.0 + step=151 action=wait() hp=100.0 + step=152 action=wait() hp=100.0 + step=153 action=wait() hp=100.0 + step=154 action=wait() hp=100.0 + step=155 action=move(direction='south') hp=100.0 + step=156 action=move(direction='south') hp=100.0 + step=157 action=move(direction='north') hp=100.0 + step=158 action=wait() hp=100.0 + step=159 action=move(direction='north') hp=100.0 + step=160 action=door(target_id='door_1', door_state='open') hp=100.0 + step=161 action=wait() hp=100.0 + step=162 action=wait() hp=100.0 + step=163 action=wait() hp=100.0 + step=164 action=door(target_id='door_1', door_state='open') hp=100.0 + step=165 action=wait() hp=100.0 + step=166 action=move(direction='south') hp=100.0 + step=167 action=wait() hp=100.0 + step=168 action=move(direction='south') hp=100.0 + step=169 action=move(direction='north') hp=100.0 + step=170 action=move(direction='south') hp=100.0 + step=171 action=move(direction='east') hp=100.0 + step=172 action=wait() hp=100.0 + step=173 action=move(direction='north') hp=100.0 + step=174 action=move(direction='north') hp=100.0 + step=175 action=wait() hp=100.0 + step=176 action=move(direction='west') hp=100.0 + step=177 action=door(target_id='door_1', door_state='open') hp=100.0 + step=178 action=wait() hp=100.0 + step=179 action=wait() hp=100.0 + step=180 action=move(direction='east') hp=100.0 + step=181 action=move(direction='west') hp=100.0 + step=182 action=wait() hp=100.0 + step=183 action=move(direction='east') hp=100.0 + step=184 action=door(target_id='door_1', door_state='open') hp=100.0 + step=185 action=move(direction='west') hp=100.0 + step=186 action=door(target_id='door_1', door_state='open') hp=100.0 + step=187 action=move(direction='east') hp=100.0 + step=188 action=move(direction='west') hp=100.0 + step=189 action=door(target_id='door_1', door_state='open') hp=100.0 + step=190 action=move(direction='south') hp=100.0 + step=191 action=move(direction='north') hp=100.0 + step=192 action=wait() hp=100.0 + step=193 action=door(target_id='door_1', door_state='open') hp=100.0 + step=194 action=wait() hp=100.0 + step=195 action=move(direction='south') hp=100.0 + step=196 action=move(direction='south') hp=100.0 + step=197 action=wait() hp=100.0 + step=198 action=move(direction='north') hp=100.0 + step=199 action=move(direction='north') hp=100.0 + step=200 action=wait() hp=100.0 +ep=0037 [easy ] steps=200 reward= -20.440 evac=0 hp=100.0 suc30=0.90 r30= +13.70 t=14s + step=001 action=wait() hp=100.0 + step=002 action=wait() hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=wait() hp=100.0 + step=005 action=move(direction='west') hp=100.0 +ep=0038 [easy ] steps=005 reward= +16.900 evac=1 hp=100.0 suc30=0.90 r30= +13.70 t=14s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='south') hp=100.0 + step=003 action=wait() hp=100.0 + step=004 action=wait() hp=100.0 + step=005 action=wait() hp=100.0 + step=006 action=move(direction='north') hp=100.0 + step=007 action=wait() hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=wait() hp=100.0 + step=010 action=door(target_id='door_3', door_state='close') hp=100.0 + step=011 action=wait() hp=100.0 + step=012 action=move(direction='west') hp=100.0 + step=013 action=wait() hp=100.0 + step=014 action=move(direction='west') hp=100.0 + step=015 action=door(target_id='door_2', door_state='open') hp=100.0 + step=016 action=move(direction='north') hp=100.0 + step=017 action=wait() hp=100.0 + step=018 action=wait() hp=100.0 + step=019 action=wait() hp=100.0 + step=020 action=move(direction='south') hp=100.0 + step=021 action=move(direction='east') hp=100.0 + step=022 action=move(direction='west') hp=100.0 + step=023 action=move(direction='west') hp=100.0 + step=024 action=move(direction='west') hp=100.0 + step=025 action=wait() hp=100.0 + step=026 action=move(direction='west') hp=100.0 + step=027 action=wait() hp=100.0 + step=028 action=move(direction='west') hp=100.0 + step=029 action=move(direction='west') hp=100.0 + step=030 action=move(direction='west') hp=100.0 + step=031 action=move(direction='west') hp=100.0 +ep=0039 [easy ] steps=031 reward= +16.240 evac=1 hp=100.0 suc30=0.90 r30= +13.63 t=14s + step=001 action=move(direction='west') hp=100.0 + step=002 action=wait() hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=wait() hp=100.0 + step=005 action=wait() hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=move(direction='north') hp=100.0 + step=010 action=door(target_id='door_1', door_state='close') hp=100.0 + step=011 action=move(direction='north') hp=100.0 + step=012 action=move(direction='north') hp=100.0 + step=013 action=move(direction='west') hp=100.0 + step=014 action=wait() hp=100.0 + step=015 action=wait() hp=100.0 + step=016 action=door(target_id='door_1', door_state='open') hp=100.0 + step=017 action=move(direction='north') hp=100.0 + step=018 action=move(direction='south') hp=100.0 + step=019 action=move(direction='north') hp=100.0 + step=020 action=wait() hp=100.0 + step=021 action=wait() hp=100.0 + step=022 action=wait() hp=100.0 + step=023 action=wait() hp=100.0 + step=024 action=wait() hp=100.0 + step=025 action=wait() hp=100.0 + step=026 action=wait() hp=100.0 + step=027 action=wait() hp=100.0 + step=028 action=wait() hp=100.0 + step=029 action=door(target_id='door_1', door_state='open') hp=100.0 + step=030 action=move(direction='east') hp=100.0 + step=031 action=move(direction='west') hp=100.0 + step=032 action=wait() hp=100.0 + step=033 action=wait() hp=100.0 + step=034 action=wait() hp=100.0 + step=035 action=door(target_id='door_1', door_state='open') hp=100.0 + step=036 action=wait() hp=100.0 + step=037 action=wait() hp=100.0 + step=038 action=door(target_id='door_1', door_state='open') hp=100.0 + step=039 action=move(direction='east') hp=100.0 + step=040 action=move(direction='west') hp=100.0 + step=041 action=wait() hp=100.0 + step=042 action=wait() hp=100.0 + step=043 action=move(direction='east') hp=100.0 + step=044 action=move(direction='west') hp=100.0 + step=045 action=move(direction='east') hp=100.0 + step=046 action=wait() hp=100.0 + step=047 action=move(direction='west') hp=100.0 + step=048 action=wait() hp=100.0 + step=049 action=wait() hp=100.0 + step=050 action=wait() hp=100.0 + step=051 action=wait() hp=100.0 + step=052 action=move(direction='south') hp=100.0 + step=053 action=wait() hp=100.0 + step=054 action=move(direction='north') hp=100.0 + step=055 action=wait() hp=100.0 + step=056 action=wait() hp=100.0 + step=057 action=wait() hp=100.0 + step=058 action=door(target_id='door_1', door_state='open') hp=100.0 + step=059 action=door(target_id='door_1', door_state='open') hp=100.0 + step=060 action=door(target_id='door_1', door_state='open') hp=100.0 + step=061 action=wait() hp=100.0 + step=062 action=move(direction='south') hp=100.0 + step=063 action=door(target_id='door_1', door_state='open') hp=100.0 + step=064 action=move(direction='north') hp=100.0 + step=065 action=wait() hp=100.0 + step=066 action=door(target_id='door_1', door_state='open') hp=100.0 + step=067 action=move(direction='east') hp=100.0 + step=068 action=move(direction='west') hp=100.0 + step=069 action=wait() hp=100.0 + step=070 action=wait() hp=100.0 + step=071 action=wait() hp=100.0 + step=072 action=door(target_id='door_1', door_state='open') hp=100.0 + step=073 action=move(direction='south') hp=100.0 + step=074 action=move(direction='east') hp=100.0 + step=075 action=door(target_id='door_1', door_state='open') hp=100.0 + step=076 action=move(direction='west') hp=100.0 + step=077 action=move(direction='north') hp=100.0 + step=078 action=door(target_id='door_1', door_state='close') hp=100.0 + step=079 action=wait() hp=100.0 + step=080 action=wait() hp=100.0 + step=081 action=move(direction='south') hp=100.0 + step=082 action=move(direction='north') hp=100.0 + step=083 action=wait() hp=100.0 + step=084 action=door(target_id='door_1', door_state='close') hp=100.0 + step=085 action=wait() hp=100.0 + step=086 action=door(target_id='door_1', door_state='close') hp=100.0 + step=087 action=door(target_id='door_1', door_state='close') hp=100.0 + step=088 action=wait() hp=100.0 + step=089 action=door(target_id='door_1', door_state='close') hp=100.0 + step=090 action=door(target_id='door_1', door_state='close') hp=100.0 + step=091 action=wait() hp=100.0 + step=092 action=wait() hp=100.0 + step=093 action=door(target_id='door_1', door_state='close') hp=100.0 + step=094 action=wait() hp=100.0 + step=095 action=wait() hp=100.0 + step=096 action=wait() hp=100.0 + step=097 action=wait() hp=100.0 + step=098 action=door(target_id='door_1', door_state='close') hp=100.0 + step=099 action=door(target_id='door_1', door_state='close') hp=100.0 + step=100 action=wait() hp=100.0 + step=101 action=door(target_id='door_1', door_state='close') hp=100.0 + step=102 action=wait() hp=100.0 + step=103 action=wait() hp=100.0 + step=104 action=move(direction='south') hp=100.0 + step=105 action=move(direction='north') hp=100.0 + step=106 action=wait() hp=100.0 + step=107 action=door(target_id='door_1', door_state='close') hp=100.0 + step=108 action=door(target_id='door_1', door_state='close') hp=100.0 + step=109 action=wait() hp=100.0 + step=110 action=wait() hp=100.0 + step=111 action=door(target_id='door_1', door_state='close') hp=100.0 + step=112 action=door(target_id='door_1', door_state='close') hp=100.0 + step=113 action=door(target_id='door_1', door_state='close') hp=100.0 + step=114 action=move(direction='south') hp=100.0 + step=115 action=door(target_id='door_1', door_state='close') hp=100.0 + step=116 action=door(target_id='door_1', door_state='close') hp=100.0 + step=117 action=move(direction='north') hp=100.0 + step=118 action=door(target_id='door_1', door_state='close') hp=100.0 + step=119 action=wait() hp=100.0 + step=120 action=wait() hp=100.0 + step=121 action=wait() hp=100.0 + step=122 action=wait() hp=100.0 + step=123 action=wait() hp=100.0 + step=124 action=wait() hp=100.0 + step=125 action=door(target_id='door_1', door_state='close') hp=100.0 + step=126 action=wait() hp=100.0 + step=127 action=door(target_id='door_1', door_state='close') hp=100.0 + step=128 action=wait() hp=100.0 + step=129 action=door(target_id='door_1', door_state='close') hp=100.0 + step=130 action=move(direction='east') hp=100.0 + step=131 action=move(direction='south') hp=100.0 + step=132 action=door(target_id='door_1', door_state='close') hp=100.0 + step=133 action=wait() hp=100.0 + step=134 action=move(direction='west') hp=100.0 + step=135 action=move(direction='east') hp=100.0 + step=136 action=wait() hp=100.0 + step=137 action=move(direction='east') hp=100.0 + step=138 action=move(direction='west') hp=100.0 + step=139 action=move(direction='north') hp=100.0 + step=140 action=move(direction='west') hp=100.0 + step=141 action=move(direction='east') hp=100.0 + step=142 action=move(direction='west') hp=100.0 + step=143 action=wait() hp=100.0 + step=144 action=wait() hp=100.0 + step=145 action=wait() hp=100.0 + step=146 action=wait() hp=100.0 + step=147 action=wait() hp=100.0 + step=148 action=move(direction='south') hp=100.0 + step=149 action=move(direction='north') hp=100.0 + step=150 action=move(direction='south') hp=100.0 + step=151 action=wait() hp=100.0 + step=152 action=wait() hp=100.0 + step=153 action=move(direction='north') hp=100.0 + step=154 action=wait() hp=100.0 + step=155 action=move(direction='south') hp=100.0 + step=156 action=move(direction='north') hp=100.0 + step=157 action=wait() hp=100.0 + step=158 action=door(target_id='door_1', door_state='open') hp=100.0 + step=159 action=wait() hp=100.0 + step=160 action=move(direction='south') hp=100.0 + step=161 action=move(direction='north') hp=100.0 + step=162 action=wait() hp=100.0 + step=163 action=move(direction='east') hp=100.0 + step=164 action=move(direction='south') hp=100.0 + step=165 action=move(direction='east') hp=100.0 + step=166 action=move(direction='north') hp=100.0 + step=167 action=move(direction='west') hp=100.0 + step=168 action=door(target_id='door_1', door_state='open') hp=100.0 + step=169 action=wait() hp=100.0 + step=170 action=move(direction='west') hp=100.0 + step=171 action=wait() hp=100.0 + step=172 action=door(target_id='door_1', door_state='open') hp=100.0 + step=173 action=wait() hp=100.0 + step=174 action=wait() hp=100.0 + step=175 action=move(direction='south') hp=100.0 + step=176 action=move(direction='north') hp=100.0 + step=177 action=wait() hp=100.0 + step=178 action=move(direction='south') hp=100.0 + step=179 action=move(direction='east') hp=100.0 + step=180 action=move(direction='north') hp=100.0 + step=181 action=move(direction='west') hp=100.0 + step=182 action=door(target_id='door_1', door_state='open') hp=100.0 + step=183 action=wait() hp=100.0 + step=184 action=wait() hp=100.0 + step=185 action=move(direction='south') hp=100.0 + step=186 action=move(direction='north') hp=100.0 + step=187 action=door(target_id='door_1', door_state='open') hp=100.0 + step=188 action=wait() hp=100.0 + step=189 action=move(direction='south') hp=100.0 + step=190 action=move(direction='north') hp=100.0 + step=191 action=move(direction='east') hp=100.0 + step=192 action=move(direction='west') hp=100.0 + step=193 action=move(direction='east') hp=100.0 + step=194 action=move(direction='west') hp=100.0 + step=195 action=wait() hp=100.0 + step=196 action=move(direction='east') hp=100.0 + step=197 action=move(direction='west') hp=100.0 + step=198 action=wait() hp=100.0 + step=199 action=wait() hp=100.0 + step=200 action=wait() hp=100.0 +ep=0040 [easy ] steps=200 reward= -18.790 evac=0 hp=100.0 suc30=0.90 r30= +13.40 t=15s + >> PPO update samples=flushed pi_loss=+0.0048 v_loss=26.4871 entropy=1.2155 kl=0.0004 clip%=0.00 lr=2.46e-04 + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + ** EVAL [medium] reward=+15.640 success=1.00 steps=4.3 + step=001 action=move(direction='north') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='south') hp=100.0 + step=005 action=wait() hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='north') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='west') hp=100.0 + step=011 action=move(direction='north') hp=100.0 + step=012 action=move(direction='east') hp=100.0 + step=013 action=move(direction='west') hp=100.0 + step=014 action=move(direction='north') hp=100.0 + step=015 action=move(direction='west') hp=100.0 + step=016 action=move(direction='north') hp=100.0 + step=017 action=move(direction='west') hp=100.0 + step=018 action=wait() hp=100.0 + step=019 action=move(direction='west') hp=100.0 + step=020 action=wait() hp=100.0 + step=021 action=move(direction='south') hp=100.0 + step=022 action=move(direction='north') hp=100.0 + step=023 action=wait() hp=100.0 + step=024 action=move(direction='north') hp=100.0 + step=025 action=move(direction='east') hp=100.0 + step=026 action=move(direction='north') hp=100.0 + step=027 action=move(direction='west') hp=100.0 + step=028 action=move(direction='north') hp=100.0 + step=029 action=move(direction='north') hp=100.0 + step=030 action=wait() hp=100.0 + step=031 action=move(direction='north') hp=100.0 + step=032 action=move(direction='east') hp=100.0 + step=033 action=move(direction='west') hp=100.0 + step=034 action=move(direction='west') hp=100.0 +ep=0041 [easy ] steps=034 reward= +21.470 evac=1 hp=100.0 suc30=0.90 r30= +13.78 t=16s + step=001 action=move(direction='west') hp=100.0 + step=002 action=door(target_id='door_3', door_state='close') hp=100.0 + step=003 action=wait() hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=move(direction='south') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=door(target_id='door_1', door_state='close') hp=100.0 + step=011 action=wait() hp=100.0 + step=012 action=move(direction='west') hp=100.0 + step=013 action=move(direction='west') hp=100.0 + step=014 action=move(direction='west') hp=100.0 + step=015 action=move(direction='west') hp=100.0 +ep=0042 [easy ] steps=015 reward= +18.270 evac=1 hp=100.0 suc30=0.90 r30= +14.13 t=16s + step=001 action=wait() hp=100.0 + step=002 action=move(direction='north') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=wait() hp=100.0 + step=006 action=wait() hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=door(target_id='door_1', door_state='open') hp=100.0 + step=009 action=move(direction='east') hp=100.0 + step=010 action=move(direction='west') hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=door(target_id='door_2', door_state='close') hp=100.0 + step=013 action=move(direction='west') hp=100.0 + step=014 action=wait() hp=100.0 + step=015 action=wait() hp=100.0 + step=016 action=door(target_id='door_1', door_state='close') hp=100.0 + step=017 action=wait() hp=100.0 + step=018 action=wait() hp=100.0 + step=019 action=wait() hp=100.0 + step=020 action=wait() hp=100.0 + step=021 action=door(target_id='door_1', door_state='open') hp=100.0 + step=022 action=wait() hp=100.0 + step=023 action=wait() hp=100.0 + step=024 action=wait() hp=100.0 + step=025 action=door(target_id='door_1', door_state='close') hp=100.0 + step=026 action=wait() hp=100.0 + step=027 action=move(direction='east') hp=100.0 + step=028 action=door(target_id='door_2', door_state='close') hp=100.0 + step=029 action=door(target_id='door_5', door_state='close') hp=100.0 + step=030 action=door(target_id='door_1', door_state='open') hp=100.0 + step=031 action=move(direction='north') hp=100.0 + step=032 action=wait() hp=100.0 + step=033 action=wait() hp=100.0 + step=034 action=wait() hp=100.0 + step=035 action=wait() hp=100.0 + step=036 action=wait() hp=100.0 + step=037 action=wait() hp=100.0 + step=038 action=door(target_id='door_1', door_state='close') hp=100.0 + step=039 action=wait() hp=100.0 + step=040 action=wait() hp=100.0 + step=041 action=wait() hp=100.0 + step=042 action=wait() hp=100.0 + step=043 action=wait() hp=100.0 + step=044 action=move(direction='south') hp=100.0 + step=045 action=door(target_id='door_5', door_state='close') hp=100.0 + step=046 action=move(direction='west') hp=100.0 + step=047 action=wait() hp=100.0 + step=048 action=wait() hp=100.0 + step=049 action=move(direction='south') hp=100.0 + step=050 action=move(direction='north') hp=100.0 + step=051 action=move(direction='east') hp=100.0 + step=052 action=wait() hp=100.0 + step=053 action=move(direction='south') hp=100.0 + step=054 action=move(direction='west') hp=100.0 + step=055 action=move(direction='north') hp=100.0 + step=056 action=move(direction='south') hp=100.0 + step=057 action=move(direction='north') hp=100.0 + step=058 action=wait() hp=100.0 + step=059 action=wait() hp=100.0 + step=060 action=move(direction='east') hp=100.0 + step=061 action=wait() hp=100.0 + step=062 action=move(direction='east') hp=100.0 + step=063 action=wait() hp=100.0 + step=064 action=move(direction='west') hp=100.0 + step=065 action=move(direction='west') hp=100.0 + step=066 action=move(direction='east') hp=100.0 + step=067 action=wait() hp=100.0 + step=068 action=door(target_id='door_2', door_state='close') hp=100.0 + step=069 action=move(direction='west') hp=100.0 + step=070 action=wait() hp=100.0 + step=071 action=door(target_id='door_1', door_state='open') hp=100.0 + step=072 action=move(direction='south') hp=100.0 + step=073 action=move(direction='north') hp=100.0 + step=074 action=wait() hp=100.0 + step=075 action=wait() hp=100.0 + step=076 action=door(target_id='door_1', door_state='close') hp=100.0 + step=077 action=wait() hp=100.0 + step=078 action=move(direction='south') hp=100.0 + step=079 action=move(direction='north') hp=100.0 + step=080 action=door(target_id='door_1', door_state='open') hp=100.0 + step=081 action=door(target_id='door_1', door_state='close') hp=100.0 + step=082 action=wait() hp=100.0 + step=083 action=wait() hp=100.0 + step=084 action=wait() hp=100.0 + step=085 action=move(direction='south') hp=100.0 + step=086 action=wait() hp=100.0 + step=087 action=move(direction='west') hp=100.0 +ep=0043 [easy ] steps=087 reward= +13.420 evac=1 hp=100.0 suc30=0.90 r30= +14.10 t=16s + step=001 action=move(direction='north') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=wait() hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='north') hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=move(direction='north') hp=100.0 + step=013 action=move(direction='north') hp=100.0 + step=014 action=move(direction='east') hp=100.0 + step=015 action=move(direction='north') hp=100.0 + step=016 action=wait() hp=100.0 + step=017 action=wait() hp=100.0 + step=018 action=move(direction='north') hp=100.0 + step=019 action=wait() hp=100.0 + step=020 action=move(direction='north') hp=100.0 + step=021 action=move(direction='west') hp=100.0 + step=022 action=move(direction='west') hp=100.0 + step=023 action=move(direction='north') hp=100.0 + step=024 action=move(direction='west') hp=100.0 + step=025 action=move(direction='north') hp=100.0 + step=026 action=move(direction='west') hp=100.0 + step=027 action=wait() hp=100.0 + step=028 action=move(direction='south') hp=100.0 + step=029 action=move(direction='west') hp=100.0 + step=030 action=wait() hp=100.0 + step=031 action=move(direction='north') hp=100.0 + step=032 action=move(direction='west') hp=100.0 + step=033 action=move(direction='west') hp=100.0 +ep=0044 [easy ] steps=033 reward= +22.840 evac=1 hp=100.0 suc30=0.90 r30= +14.27 t=16s + step=001 action=move(direction='west') hp=100.0 + step=002 action=wait() hp=100.0 + step=003 action=door(target_id='door_2', door_state='open') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=wait() hp=100.0 + step=006 action=move(direction='north') hp=100.0 + step=007 action=wait() hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=move(direction='north') hp=100.0 + step=010 action=move(direction='north') hp=100.0 + step=011 action=move(direction='north') hp=100.0 + step=012 action=wait() hp=100.0 + step=013 action=wait() hp=100.0 + step=014 action=move(direction='north') hp=100.0 +ep=0045 [easy ] steps=014 reward= +18.330 evac=1 hp=100.0 suc30=0.90 r30= +14.39 t=16s + >> PPO update samples=flushed pi_loss=+0.0003 v_loss=13.1441 entropy=1.2221 kl=0.0003 clip%=0.00 lr=2.39e-04 + step=001 action=move(direction='north') hp=100.0 + step=002 action=move(direction='north') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='north') hp=100.0 + step=008 action=door(target_id='door_1', door_state='close') hp=100.0 + step=009 action=move(direction='north') hp=100.0 + step=010 action=wait() hp=100.0 + step=011 action=door(target_id='door_1', door_state='close') hp=100.0 + step=012 action=wait() hp=100.0 + step=013 action=move(direction='south') hp=100.0 + step=014 action=wait() hp=100.0 + step=015 action=move(direction='north') hp=100.0 + step=016 action=wait() hp=100.0 + step=017 action=door(target_id='door_1', door_state='close') hp=100.0 + step=018 action=move(direction='south') hp=100.0 + step=019 action=move(direction='north') hp=100.0 + step=020 action=wait() hp=100.0 + step=021 action=move(direction='south') hp=100.0 + step=022 action=move(direction='north') hp=100.0 + step=023 action=door(target_id='door_1', door_state='close') hp=100.0 + step=024 action=wait() hp=100.0 + step=025 action=move(direction='south') hp=100.0 + step=026 action=door(target_id='door_1', door_state='close') hp=100.0 + step=027 action=move(direction='north') hp=100.0 + step=028 action=door(target_id='door_1', door_state='close') hp=100.0 + step=029 action=move(direction='east') hp=100.0 + step=030 action=wait() hp=100.0 + step=031 action=door(target_id='door_1', door_state='close') hp=100.0 + step=032 action=move(direction='south') hp=100.0 + step=033 action=move(direction='west') hp=100.0 + step=034 action=move(direction='north') hp=100.0 + step=035 action=door(target_id='door_1', door_state='close') hp=100.0 + step=036 action=move(direction='east') hp=100.0 + step=037 action=door(target_id='door_1', door_state='close') hp=100.0 + step=038 action=move(direction='west') hp=100.0 + step=039 action=door(target_id='door_1', door_state='close') hp=100.0 + step=040 action=wait() hp=100.0 + step=041 action=door(target_id='door_1', door_state='close') hp=100.0 + step=042 action=move(direction='east') hp=100.0 + step=043 action=door(target_id='door_1', door_state='close') hp=100.0 + step=044 action=wait() hp=100.0 + step=045 action=door(target_id='door_1', door_state='close') hp=100.0 + step=046 action=wait() hp=100.0 + step=047 action=wait() hp=100.0 + step=048 action=door(target_id='door_1', door_state='close') hp=100.0 + step=049 action=move(direction='west') hp=100.0 + step=050 action=wait() hp=100.0 + step=051 action=move(direction='east') hp=100.0 + step=052 action=wait() hp=100.0 + step=053 action=move(direction='west') hp=100.0 + step=054 action=door(target_id='door_1', door_state='close') hp=100.0 + step=055 action=move(direction='east') hp=100.0 + step=056 action=wait() hp=100.0 + step=057 action=move(direction='west') hp=100.0 + step=058 action=wait() hp=100.0 + step=059 action=wait() hp=100.0 + step=060 action=door(target_id='door_1', door_state='close') hp=100.0 + step=061 action=door(target_id='door_1', door_state='close') hp=100.0 + step=062 action=move(direction='east') hp=100.0 + step=063 action=wait() hp=100.0 + step=064 action=move(direction='west') hp=100.0 + step=065 action=door(target_id='door_1', door_state='close') hp=100.0 + step=066 action=wait() hp=100.0 + step=067 action=wait() hp=100.0 + step=068 action=wait() hp=100.0 + step=069 action=door(target_id='door_1', door_state='close') hp=100.0 + step=070 action=wait() hp=100.0 + step=071 action=wait() hp=100.0 + step=072 action=door(target_id='door_1', door_state='close') hp=100.0 + step=073 action=door(target_id='door_1', door_state='close') hp=100.0 + step=074 action=move(direction='south') hp=100.0 + step=075 action=wait() hp=100.0 + step=076 action=move(direction='north') hp=100.0 + step=077 action=door(target_id='door_1', door_state='close') hp=100.0 + step=078 action=wait() hp=100.0 + step=079 action=move(direction='south') hp=100.0 + step=080 action=move(direction='north') hp=100.0 + step=081 action=wait() hp=100.0 + step=082 action=door(target_id='door_1', door_state='close') hp=100.0 + step=083 action=wait() hp=100.0 + step=084 action=wait() hp=100.0 + step=085 action=wait() hp=100.0 + step=086 action=wait() hp=100.0 + step=087 action=wait() hp=100.0 + step=088 action=door(target_id='door_1', door_state='close') hp=100.0 + step=089 action=door(target_id='door_1', door_state='close') hp=100.0 + step=090 action=door(target_id='door_1', door_state='close') hp=100.0 + step=091 action=door(target_id='door_1', door_state='close') hp=100.0 + step=092 action=move(direction='east') hp=100.0 + step=093 action=door(target_id='door_1', door_state='close') hp=100.0 + step=094 action=wait() hp=100.0 + step=095 action=move(direction='west') hp=100.0 + step=096 action=door(target_id='door_1', door_state='close') hp=100.0 + step=097 action=wait() hp=100.0 + step=098 action=door(target_id='door_1', door_state='close') hp=100.0 + step=099 action=door(target_id='door_1', door_state='close') hp=100.0 + step=100 action=door(target_id='door_1', door_state='close') hp=100.0 + step=101 action=door(target_id='door_1', door_state='close') hp=100.0 + step=102 action=door(target_id='door_1', door_state='close') hp=100.0 + step=103 action=door(target_id='door_1', door_state='close') hp=100.0 + step=104 action=move(direction='south') hp=100.0 + step=105 action=move(direction='north') hp=100.0 + step=106 action=wait() hp=100.0 + step=107 action=move(direction='east') hp=100.0 + step=108 action=move(direction='east') hp=100.0 + step=109 action=move(direction='west') hp=100.0 + step=110 action=move(direction='west') hp=100.0 + step=111 action=door(target_id='door_1', door_state='close') hp=100.0 + step=112 action=wait() hp=100.0 + step=113 action=wait() hp=100.0 + step=114 action=move(direction='east') hp=100.0 + step=115 action=door(target_id='door_1', door_state='close') hp=100.0 + step=116 action=door(target_id='door_1', door_state='close') hp=100.0 + step=117 action=move(direction='west') hp=100.0 + step=118 action=door(target_id='door_1', door_state='close') hp=100.0 + step=119 action=door(target_id='door_1', door_state='close') hp=100.0 + step=120 action=move(direction='east') hp=100.0 + step=121 action=move(direction='west') hp=100.0 + step=122 action=door(target_id='door_1', door_state='close') hp=100.0 + step=123 action=wait() hp=100.0 + step=124 action=wait() hp=100.0 + step=125 action=door(target_id='door_1', door_state='close') hp=100.0 + step=126 action=wait() hp=100.0 + step=127 action=move(direction='south') hp=100.0 + step=128 action=wait() hp=100.0 + step=129 action=move(direction='north') hp=100.0 + step=130 action=door(target_id='door_1', door_state='close') hp=100.0 + step=131 action=move(direction='south') hp=100.0 + step=132 action=wait() hp=100.0 + step=133 action=move(direction='east') hp=100.0 + step=134 action=move(direction='west') hp=100.0 + step=135 action=move(direction='north') hp=100.0 + step=136 action=door(target_id='door_1', door_state='close') hp=100.0 + step=137 action=wait() hp=100.0 + step=138 action=move(direction='east') hp=100.0 + step=139 action=door(target_id='door_1', door_state='close') hp=100.0 + step=140 action=door(target_id='door_1', door_state='close') hp=100.0 + step=141 action=move(direction='east') hp=100.0 + step=142 action=move(direction='south') hp=100.0 + step=143 action=move(direction='west') hp=100.0 + step=144 action=move(direction='west') hp=100.0 + step=145 action=wait() hp=100.0 + step=146 action=move(direction='north') hp=100.0 + step=147 action=door(target_id='door_1', door_state='close') hp=100.0 + step=148 action=wait() hp=100.0 + step=149 action=door(target_id='door_1', door_state='close') hp=100.0 + step=150 action=move(direction='east') hp=100.0 + step=151 action=door(target_id='door_1', door_state='close') hp=100.0 + step=152 action=door(target_id='door_1', door_state='close') hp=100.0 + step=153 action=door(target_id='door_1', door_state='close') hp=100.0 + step=154 action=move(direction='west') hp=100.0 + step=155 action=door(target_id='door_1', door_state='close') hp=100.0 + step=156 action=wait() hp=100.0 + step=157 action=wait() hp=100.0 + step=158 action=door(target_id='door_1', door_state='close') hp=100.0 + step=159 action=wait() hp=100.0 + step=160 action=move(direction='east') hp=100.0 + step=161 action=move(direction='west') hp=100.0 + step=162 action=wait() hp=100.0 + step=163 action=wait() hp=100.0 + step=164 action=door(target_id='door_1', door_state='close') hp=100.0 + step=165 action=wait() hp=100.0 + step=166 action=door(target_id='door_1', door_state='close') hp=100.0 + step=167 action=wait() hp=100.0 + step=168 action=wait() hp=100.0 + step=169 action=wait() hp=100.0 + step=170 action=move(direction='south') hp=100.0 + step=171 action=move(direction='north') hp=100.0 + step=172 action=wait() hp=100.0 + step=173 action=wait() hp=100.0 + step=174 action=wait() hp=100.0 + step=175 action=wait() hp=100.0 + step=176 action=move(direction='east') hp=100.0 + step=177 action=door(target_id='door_1', door_state='close') hp=100.0 + step=178 action=move(direction='west') hp=100.0 + step=179 action=wait() hp=100.0 + step=180 action=door(target_id='door_1', door_state='close') hp=100.0 + step=181 action=wait() hp=100.0 + step=182 action=move(direction='east') hp=100.0 + step=183 action=move(direction='west') hp=100.0 + step=184 action=door(target_id='door_1', door_state='close') hp=100.0 + step=185 action=door(target_id='door_1', door_state='close') hp=100.0 + step=186 action=door(target_id='door_1', door_state='close') hp=100.0 + step=187 action=door(target_id='door_1', door_state='close') hp=100.0 + step=188 action=door(target_id='door_1', door_state='close') hp=100.0 + step=189 action=door(target_id='door_1', door_state='close') hp=100.0 + step=190 action=wait() hp=100.0 + step=191 action=door(target_id='door_1', door_state='close') hp=100.0 + step=192 action=wait() hp=100.0 + step=193 action=move(direction='south') hp=100.0 + step=194 action=move(direction='north') hp=100.0 + step=195 action=door(target_id='door_1', door_state='close') hp=100.0 + step=196 action=wait() hp=100.0 + step=197 action=wait() hp=100.0 + step=198 action=door(target_id='door_1', door_state='close') hp=100.0 + step=199 action=door(target_id='door_1', door_state='close') hp=100.0 + step=200 action=door(target_id='door_1', door_state='close') hp=100.0 +ep=0046 [easy ] steps=200 reward= -18.080 evac=0 hp=100.0 suc30=0.90 r30= +14.16 t=17s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='south') hp=100.0 + step=003 action=move(direction='south') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='north') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='east') hp=100.0 + step=011 action=wait() hp=100.0 + step=012 action=move(direction='west') hp=100.0 + step=013 action=move(direction='north') hp= 99.5 + step=014 action=wait() hp= 99.5 + step=015 action=move(direction='north') hp= 99.5 + step=016 action=move(direction='north') hp= 99.5 + step=017 action=move(direction='north') hp= 99.5 + step=018 action=move(direction='west') hp= 99.5 + step=019 action=move(direction='west') hp= 99.5 + step=020 action=move(direction='north') hp= 99.5 + step=021 action=move(direction='north') hp= 99.5 + step=022 action=move(direction='east') hp= 99.5 + step=023 action=move(direction='west') hp= 99.5 + step=024 action=move(direction='west') hp= 99.5 + step=025 action=wait() hp= 99.5 + step=026 action=wait() hp= 99.5 + step=027 action=move(direction='east') hp= 99.5 + step=028 action=wait() hp= 99.5 + step=029 action=wait() hp= 99.5 + step=030 action=move(direction='west') hp= 99.5 + step=031 action=move(direction='west') hp= 99.5 + step=032 action=wait() hp= 99.5 + step=033 action=move(direction='west') hp= 99.5 + step=034 action=move(direction='west') hp= 99.5 + step=035 action=move(direction='west') hp= 99.5 + step=036 action=wait() hp= 99.5 + step=037 action=wait() hp= 99.5 + step=038 action=move(direction='east') hp= 99.5 + step=039 action=move(direction='west') hp= 99.5 + step=040 action=wait() hp= 99.5 + step=041 action=move(direction='west') hp= 99.5 +ep=0047 [easy ] steps=041 reward= +19.012 evac=1 hp= 99.5 suc30=0.90 r30= +14.13 t=17s + step=001 action=move(direction='north') hp=100.0 + step=002 action=wait() hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=wait() hp=100.0 + step=006 action=wait() hp=100.0 + step=007 action=move(direction='north') hp=100.0 + step=008 action=wait() hp=100.0 + step=009 action=move(direction='north') hp=100.0 + step=010 action=wait() hp=100.0 + step=011 action=move(direction='north') hp=100.0 +ep=0048 [easy ] steps=011 reward= +18.160 evac=1 hp=100.0 suc30=0.90 r30= +14.25 t=17s + step=001 action=move(direction='north') hp=100.0 + step=002 action=wait() hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=move(direction='east') hp=100.0 + step=007 action=move(direction='north') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=wait() hp=100.0 + step=010 action=door(target_id='door_2', door_state='close') hp=100.0 + step=011 action=door(target_id='door_2', door_state='close') hp=100.0 + step=012 action=door(target_id='door_2', door_state='close') hp=100.0 + step=013 action=move(direction='east') hp=100.0 + step=014 action=move(direction='west') hp=100.0 + step=015 action=move(direction='east') hp=100.0 + step=016 action=move(direction='east') hp=100.0 + step=017 action=door(target_id='door_2', door_state='close') hp=100.0 + step=018 action=move(direction='west') hp=100.0 + step=019 action=door(target_id='door_2', door_state='close') hp=100.0 + step=020 action=door(target_id='door_2', door_state='close') hp=100.0 + step=021 action=wait() hp=100.0 + step=022 action=door(target_id='door_2', door_state='close') hp=100.0 + step=023 action=wait() hp=100.0 + step=024 action=move(direction='west') hp=100.0 + step=025 action=door(target_id='door_2', door_state='close') hp=100.0 + step=026 action=door(target_id='door_2', door_state='close') hp=100.0 + step=027 action=wait() hp=100.0 + step=028 action=wait() hp=100.0 + step=029 action=door(target_id='door_2', door_state='close') hp=100.0 + step=030 action=wait() hp=100.0 + step=031 action=wait() hp=100.0 + step=032 action=move(direction='south') hp=100.0 + step=033 action=door(target_id='door_2', door_state='close') hp=100.0 + step=034 action=move(direction='north') hp=100.0 + step=035 action=door(target_id='door_2', door_state='close') hp=100.0 + step=036 action=wait() hp=100.0 + step=037 action=move(direction='east') hp=100.0 + step=038 action=move(direction='west') hp=100.0 + step=039 action=door(target_id='door_2', door_state='close') hp=100.0 + step=040 action=move(direction='east') hp=100.0 + step=041 action=move(direction='west') hp=100.0 + step=042 action=wait() hp=100.0 + step=043 action=wait() hp=100.0 + step=044 action=door(target_id='door_2', door_state='close') hp=100.0 + step=045 action=move(direction='south') hp=100.0 + step=046 action=wait() hp= 99.5 + step=047 action=wait() hp= 99.0 + step=048 action=move(direction='north') hp= 98.5 + step=049 action=wait() hp= 98.5 + step=050 action=move(direction='south') hp= 98.0 + step=051 action=move(direction='east') hp= 97.5 + step=052 action=move(direction='west') hp= 97.0 + step=053 action=move(direction='north') hp= 96.5 + step=054 action=door(target_id='door_2', door_state='close') hp= 96.0 + step=055 action=door(target_id='door_2', door_state='close') hp= 95.5 + step=056 action=door(target_id='door_2', door_state='close') hp= 95.0 + step=057 action=door(target_id='door_2', door_state='close') hp= 94.5 + step=058 action=wait() hp= 94.0 + step=059 action=door(target_id='door_2', door_state='close') hp= 93.5 + step=060 action=wait() hp= 93.0 + step=061 action=door(target_id='door_2', door_state='close') hp= 92.5 + step=062 action=door(target_id='door_2', door_state='close') hp= 92.0 + step=063 action=wait() hp= 91.5 + step=064 action=move(direction='east') hp= 91.0 + step=065 action=door(target_id='door_2', door_state='close') hp= 90.5 + step=066 action=move(direction='east') hp= 90.0 + step=067 action=move(direction='west') hp= 89.5 + step=068 action=wait() hp= 89.0 + step=069 action=wait() hp= 88.5 + step=070 action=move(direction='west') hp= 88.0 + step=071 action=move(direction='east') hp= 87.5 + step=072 action=door(target_id='door_2', door_state='close') hp= 87.5 + step=073 action=wait() hp= 87.5 + step=074 action=door(target_id='door_2', door_state='close') hp= 87.5 + step=075 action=door(target_id='door_2', door_state='close') hp= 87.5 + step=076 action=door(target_id='door_2', door_state='close') hp= 87.5 + step=077 action=wait() hp= 87.5 + step=078 action=move(direction='east') hp= 87.5 + step=079 action=move(direction='west') hp= 87.5 + step=080 action=door(target_id='door_2', door_state='close') hp= 87.5 + step=081 action=move(direction='south') hp= 87.5 + step=082 action=move(direction='west') hp= 87.5 + step=083 action=move(direction='east') hp= 87.5 + step=084 action=move(direction='west') hp= 87.5 + step=085 action=move(direction='north') hp= 87.5 + step=086 action=move(direction='east') hp= 87.5 + step=087 action=door(target_id='door_2', door_state='close') hp= 87.5 + step=088 action=door(target_id='door_2', door_state='close') hp= 87.5 + step=089 action=move(direction='east') hp= 87.5 + step=090 action=move(direction='west') hp= 87.5 + step=091 action=wait() hp= 87.5 + step=092 action=move(direction='west') hp= 87.5 + step=093 action=move(direction='south') hp= 87.5 + step=094 action=move(direction='north') hp= 87.5 + step=095 action=door(target_id='door_2', door_state='close') hp= 87.5 + step=096 action=wait() hp= 87.5 + step=097 action=wait() hp= 87.5 + step=098 action=wait() hp= 87.5 + step=099 action=wait() hp= 87.5 + step=100 action=wait() hp= 87.5 + step=101 action=wait() hp= 87.5 + step=102 action=door(target_id='door_2', door_state='close') hp= 87.5 + step=103 action=wait() hp= 87.5 + step=104 action=wait() hp= 87.5 + step=105 action=move(direction='south') hp= 87.5 + step=106 action=move(direction='north') hp= 87.5 + step=107 action=door(target_id='door_2', door_state='close') hp= 87.5 + step=108 action=wait() hp= 87.5 + step=109 action=move(direction='south') hp= 87.5 + step=110 action=move(direction='north') hp= 87.5 + step=111 action=wait() hp= 87.5 + step=112 action=door(target_id='door_2', door_state='close') hp= 87.5 + step=113 action=door(target_id='door_2', door_state='close') hp= 87.5 + step=114 action=wait() hp= 87.5 + step=115 action=wait() hp= 87.5 + step=116 action=wait() hp= 87.5 + step=117 action=door(target_id='door_2', door_state='close') hp= 87.5 + step=118 action=wait() hp= 87.5 + step=119 action=door(target_id='door_2', door_state='close') hp= 87.5 + step=120 action=wait() hp= 87.5 + step=121 action=move(direction='south') hp= 87.5 + step=122 action=wait() hp= 87.5 + step=123 action=move(direction='north') hp= 87.5 + step=124 action=move(direction='south') hp= 87.5 + step=125 action=wait() hp= 87.5 + step=126 action=move(direction='north') hp= 87.5 + step=127 action=wait() hp= 87.5 + step=128 action=door(target_id='door_2', door_state='close') hp= 87.5 + step=129 action=door(target_id='door_2', door_state='close') hp= 87.5 + step=130 action=wait() hp= 87.5 + step=131 action=wait() hp= 87.5 + step=132 action=door(target_id='door_2', door_state='close') hp= 87.5 + step=133 action=wait() hp= 87.5 + step=134 action=wait() hp= 87.5 + step=135 action=door(target_id='door_2', door_state='close') hp= 87.5 + step=136 action=wait() hp= 87.5 + step=137 action=wait() hp= 87.5 + step=138 action=wait() hp= 87.5 + step=139 action=wait() hp= 87.5 + step=140 action=door(target_id='door_2', door_state='close') hp= 87.5 + step=141 action=door(target_id='door_2', door_state='close') hp= 87.5 + step=142 action=move(direction='south') hp= 87.5 + step=143 action=move(direction='north') hp= 87.5 + step=144 action=door(target_id='door_2', door_state='close') hp= 87.5 + step=145 action=wait() hp= 87.5 + step=146 action=wait() hp= 87.5 + step=147 action=wait() hp= 87.5 + step=148 action=move(direction='south') hp= 87.5 + step=149 action=move(direction='north') hp= 87.5 + step=150 action=wait() hp= 87.5 + step=151 action=wait() hp= 87.5 + step=152 action=move(direction='east') hp= 87.5 + step=153 action=wait() hp= 87.5 + step=154 action=move(direction='west') hp= 87.5 + step=155 action=wait() hp= 87.5 + step=156 action=wait() hp= 87.5 + step=157 action=wait() hp= 87.5 + step=158 action=door(target_id='door_2', door_state='close') hp= 87.5 + step=159 action=wait() hp= 87.5 + step=160 action=door(target_id='door_2', door_state='close') hp= 87.5 + step=161 action=wait() hp= 87.5 + step=162 action=wait() hp= 87.5 + step=163 action=wait() hp= 87.5 + step=164 action=door(target_id='door_2', door_state='close') hp= 87.5 + step=165 action=wait() hp= 87.5 + step=166 action=door(target_id='door_2', door_state='close') hp= 87.5 + step=167 action=wait() hp= 87.5 + step=168 action=wait() hp= 87.5 + step=169 action=wait() hp= 87.5 + step=170 action=wait() hp= 87.5 + step=171 action=door(target_id='door_2', door_state='close') hp= 87.5 + step=172 action=wait() hp= 87.5 + step=173 action=wait() hp= 87.5 + step=174 action=move(direction='south') hp= 87.5 + step=175 action=move(direction='north') hp= 87.5 + step=176 action=wait() hp= 87.5 + step=177 action=move(direction='south') hp= 87.5 + step=178 action=move(direction='north') hp= 87.5 + step=179 action=wait() hp= 87.5 + step=180 action=door(target_id='door_2', door_state='close') hp= 87.5 + step=181 action=door(target_id='door_2', door_state='close') hp= 87.5 + step=182 action=door(target_id='door_2', door_state='close') hp= 87.5 + step=183 action=door(target_id='door_2', door_state='close') hp= 87.5 + step=184 action=door(target_id='door_2', door_state='close') hp= 87.5 + step=185 action=door(target_id='door_2', door_state='close') hp= 87.5 + step=186 action=wait() hp= 87.5 + step=187 action=wait() hp= 87.5 + step=188 action=wait() hp= 87.5 + step=189 action=door(target_id='door_2', door_state='close') hp= 87.5 + step=190 action=door(target_id='door_2', door_state='close') hp= 87.5 + step=191 action=wait() hp= 87.5 + step=192 action=wait() hp= 87.5 + step=193 action=wait() hp= 87.5 + step=194 action=wait() hp= 87.5 + step=195 action=move(direction='east') hp= 87.5 + step=196 action=move(direction='west') hp= 87.5 + step=197 action=door(target_id='door_2', door_state='close') hp= 87.5 + step=198 action=wait() hp= 87.5 + step=199 action=wait() hp= 87.5 + step=200 action=wait() hp= 87.5 +ep=0049 [easy ] steps=200 reward= -20.515 evac=0 hp= 87.5 suc30=0.87 r30= +12.99 t=18s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='east') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='east') hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='east') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=wait() hp=100.0 + step=011 action=wait() hp=100.0 + step=012 action=move(direction='west') hp=100.0 + step=013 action=move(direction='west') hp=100.0 + step=014 action=move(direction='west') hp=100.0 + step=015 action=wait() hp=100.0 + step=016 action=wait() hp=100.0 + step=017 action=wait() hp=100.0 + step=018 action=move(direction='west') hp=100.0 + step=019 action=wait() hp=100.0 + step=020 action=move(direction='west') hp=100.0 + step=021 action=move(direction='west') hp=100.0 + step=022 action=move(direction='west') hp=100.0 + step=023 action=wait() hp=100.0 + step=024 action=move(direction='west') hp=100.0 + step=025 action=move(direction='west') hp=100.0 + step=026 action=move(direction='west') hp=100.0 + step=027 action=move(direction='west') hp=100.0 + step=028 action=move(direction='west') hp=100.0 +ep=0050 [easy ] steps=028 reward= +20.040 evac=1 hp=100.0 suc30=0.87 r30= +13.10 t=18s + >> PPO update samples=flushed pi_loss=-0.0001 v_loss=20.0428 entropy=1.1850 kl=0.0004 clip%=0.00 lr=2.32e-04 + [ckpt] saved -> artifacts/pyre_ppo_http_ckpt.pt + step=001 action=move(direction='west') hp=100.0 + step=002 action=door(target_id='door_2', door_state='open') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 +ep=0051 [easy ] steps=005 reward= +17.990 evac=1 hp=100.0 suc30=0.87 r30= +13.29 t=19s + step=001 action=move(direction='west') hp=100.0 + step=002 action=door(target_id='door_6', door_state='open') hp=100.0 + step=003 action=door(target_id='door_7', door_state='close') hp=100.0 + step=004 action=door(target_id='door_7', door_state='close') hp=100.0 + step=005 action=door(target_id='door_7', door_state='close') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=wait() hp=100.0 + step=010 action=move(direction='north') hp=100.0 + step=011 action=move(direction='north') hp=100.0 + step=012 action=wait() hp=100.0 + step=013 action=wait() hp=100.0 + step=014 action=move(direction='west') hp=100.0 + step=015 action=move(direction='north') hp=100.0 + step=016 action=wait() hp=100.0 + step=017 action=door(target_id='door_1', door_state='close') hp=100.0 + step=018 action=move(direction='south') hp=100.0 + step=019 action=door(target_id='door_1', door_state='close') hp=100.0 + step=020 action=door(target_id='door_1', door_state='close') hp=100.0 + step=021 action=wait() hp=100.0 + step=022 action=move(direction='west') hp=100.0 + step=023 action=move(direction='north') hp=100.0 + step=024 action=move(direction='west') hp=100.0 + step=025 action=move(direction='west') hp=100.0 + step=026 action=door(target_id='door_1', door_state='close') hp=100.0 + step=027 action=door(target_id='door_1', door_state='open') hp=100.0 + step=028 action=move(direction='south') hp=100.0 + step=029 action=door(target_id='door_1', door_state='close') hp= 99.5 + step=030 action=move(direction='west') hp= 99.0 +ep=0052 [easy ] steps=030 reward= +18.365 evac=1 hp= 99.0 suc30=0.87 r30= +13.34 t=19s + step=001 action=move(direction='north') hp=100.0 + step=002 action=wait() hp=100.0 + step=003 action=wait() hp=100.0 + step=004 action=move(direction='south') hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=move(direction='north') hp=100.0 + step=007 action=move(direction='west') hp=100.0 +ep=0053 [easy ] steps=007 reward= +17.570 evac=1 hp=100.0 suc30=0.87 r30= +13.28 t=19s + step=001 action=move(direction='west') hp=100.0 + step=002 action=door(target_id='door_1', door_state='close') hp=100.0 + step=003 action=move(direction='east') hp=100.0 + step=004 action=door(target_id='door_1', door_state='close') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=wait() hp=100.0 + step=007 action=move(direction='west') hp=100.0 +ep=0054 [easy ] steps=007 reward= +17.210 evac=1 hp=100.0 suc30=0.87 r30= +13.24 t=19s + step=001 action=wait() hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=door(target_id='door_1', door_state='close') hp=100.0 + step=007 action=move(direction='north') hp=100.0 + step=008 action=wait() hp=100.0 + step=009 action=move(direction='north') hp=100.0 + step=010 action=door(target_id='door_1', door_state='close') hp=100.0 + step=011 action=wait() hp=100.0 + step=012 action=move(direction='east') hp=100.0 + step=013 action=move(direction='west') hp=100.0 + step=014 action=move(direction='east') hp=100.0 + step=015 action=door(target_id='door_5', door_state='close') hp=100.0 + step=016 action=move(direction='west') hp=100.0 + step=017 action=wait() hp=100.0 + step=018 action=wait() hp=100.0 + step=019 action=wait() hp=100.0 + step=020 action=move(direction='south') hp=100.0 + step=021 action=move(direction='west') hp=100.0 +ep=0055 [easy ] steps=021 reward= +18.180 evac=1 hp=100.0 suc30=0.87 r30= +13.23 t=19s + >> PPO update samples=flushed pi_loss=+0.0005 v_loss=20.9054 entropy=1.3904 kl=0.0001 clip%=0.00 lr=2.26e-04 + step=001 action=wait() hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=wait() hp=100.0 + step=004 action=wait() hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=move(direction='east') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=wait() hp=100.0 + step=009 action=move(direction='east') hp=100.0 + step=010 action=move(direction='north') hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=wait() hp=100.0 + step=013 action=move(direction='north') hp=100.0 + step=014 action=move(direction='north') hp=100.0 + step=015 action=move(direction='east') hp=100.0 + step=016 action=move(direction='west') hp=100.0 + step=017 action=move(direction='north') hp=100.0 + step=018 action=wait() hp=100.0 + step=019 action=move(direction='north') hp=100.0 + step=020 action=wait() hp=100.0 + step=021 action=move(direction='south') hp=100.0 + step=022 action=wait() hp=100.0 + step=023 action=move(direction='north') hp=100.0 + step=024 action=move(direction='east') hp=100.0 + step=025 action=move(direction='west') hp=100.0 + step=026 action=wait() hp=100.0 + step=027 action=wait() hp=100.0 + step=028 action=move(direction='north') hp=100.0 + step=029 action=wait() hp=100.0 + step=030 action=wait() hp=100.0 + step=031 action=move(direction='north') hp=100.0 + step=032 action=move(direction='west') hp=100.0 +ep=0056 [easy ] steps=032 reward= +17.970 evac=1 hp=100.0 suc30=0.87 r30= +13.17 t=19s + step=001 action=wait() hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=wait() hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 +ep=0057 [easy ] steps=005 reward= +17.440 evac=1 hp=100.0 suc30=0.87 r30= +13.14 t=19s + step=001 action=move(direction='east') hp=100.0 + step=002 action=wait() hp=100.0 + step=003 action=door(target_id='door_2', door_state='close') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='north') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=wait() hp=100.0 + step=009 action=door(target_id='door_2', door_state='close') hp=100.0 + step=010 action=move(direction='west') hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=wait() hp=100.0 + step=013 action=move(direction='west') hp=100.0 + step=014 action=wait() hp=100.0 + step=015 action=door(target_id='door_1', door_state='open') hp=100.0 + step=016 action=move(direction='east') hp=100.0 + step=017 action=move(direction='north') hp=100.0 + step=018 action=wait() hp=100.0 + step=019 action=wait() hp=100.0 + step=020 action=door(target_id='door_1', door_state='close') hp=100.0 + step=021 action=move(direction='north') hp=100.0 + step=022 action=move(direction='west') hp=100.0 + step=023 action=move(direction='north') hp=100.0 + step=024 action=door(target_id='door_1', door_state='open') hp=100.0 + step=025 action=move(direction='north') hp=100.0 + step=026 action=move(direction='south') hp=100.0 + step=027 action=move(direction='south') hp=100.0 + step=028 action=move(direction='north') hp=100.0 + step=029 action=move(direction='north') hp=100.0 + step=030 action=move(direction='south') hp=100.0 + step=031 action=wait() hp=100.0 + step=032 action=move(direction='east') hp=100.0 + step=033 action=move(direction='west') hp=100.0 + step=034 action=move(direction='east') hp=100.0 + step=035 action=wait() hp=100.0 + step=036 action=move(direction='south') hp=100.0 + step=037 action=move(direction='north') hp=100.0 + step=038 action=move(direction='west') hp=100.0 + step=039 action=move(direction='north') hp=100.0 + step=040 action=wait() hp=100.0 + step=041 action=wait() hp=100.0 + step=042 action=move(direction='south') hp=100.0 + step=043 action=move(direction='north') hp=100.0 + step=044 action=move(direction='east') hp=100.0 + step=045 action=wait() hp=100.0 + step=046 action=move(direction='west') hp=100.0 + step=047 action=wait() hp=100.0 + step=048 action=move(direction='south') hp=100.0 + step=049 action=move(direction='east') hp=100.0 + step=050 action=wait() hp=100.0 + step=051 action=wait() hp=100.0 + step=052 action=move(direction='north') hp=100.0 + step=053 action=move(direction='west') hp=100.0 + step=054 action=wait() hp=100.0 + step=055 action=door(target_id='door_1', door_state='open') hp=100.0 + step=056 action=move(direction='south') hp=100.0 + step=057 action=wait() hp=100.0 + step=058 action=wait() hp=100.0 + step=059 action=move(direction='north') hp=100.0 + step=060 action=move(direction='east') hp=100.0 + step=061 action=move(direction='west') hp=100.0 + step=062 action=wait() hp=100.0 + step=063 action=move(direction='east') hp=100.0 + step=064 action=move(direction='west') hp=100.0 + step=065 action=wait() hp=100.0 + step=066 action=door(target_id='door_1', door_state='open') hp=100.0 + step=067 action=wait() hp=100.0 + step=068 action=move(direction='east') hp=100.0 + step=069 action=move(direction='south') hp=100.0 + step=070 action=move(direction='north') hp=100.0 + step=071 action=move(direction='west') hp=100.0 + step=072 action=move(direction='south') hp=100.0 + step=073 action=move(direction='north') hp=100.0 + step=074 action=move(direction='east') hp=100.0 + step=075 action=move(direction='west') hp=100.0 + step=076 action=wait() hp=100.0 + step=077 action=wait() hp=100.0 + step=078 action=wait() hp=100.0 + step=079 action=wait() hp=100.0 + step=080 action=move(direction='south') hp=100.0 + step=081 action=door(target_id='door_1', door_state='open') hp=100.0 + step=082 action=move(direction='north') hp=100.0 + step=083 action=wait() hp=100.0 + step=084 action=move(direction='south') hp=100.0 + step=085 action=move(direction='north') hp=100.0 + step=086 action=wait() hp=100.0 + step=087 action=wait() hp=100.0 + step=088 action=wait() hp=100.0 + step=089 action=wait() hp=100.0 + step=090 action=wait() hp=100.0 + step=091 action=move(direction='south') hp=100.0 + step=092 action=wait() hp=100.0 + step=093 action=move(direction='north') hp=100.0 + step=094 action=move(direction='south') hp=100.0 + step=095 action=move(direction='south') hp=100.0 + step=096 action=move(direction='north') hp=100.0 + step=097 action=wait() hp=100.0 + step=098 action=move(direction='north') hp=100.0 + step=099 action=wait() hp=100.0 + step=100 action=wait() hp=100.0 + step=101 action=wait() hp=100.0 + step=102 action=door(target_id='door_1', door_state='open') hp=100.0 + step=103 action=door(target_id='door_1', door_state='open') hp=100.0 + step=104 action=wait() hp=100.0 + step=105 action=wait() hp=100.0 + step=106 action=move(direction='east') hp=100.0 + step=107 action=door(target_id='door_1', door_state='open') hp=100.0 + step=108 action=move(direction='east') hp=100.0 + step=109 action=move(direction='west') hp=100.0 + step=110 action=move(direction='west') hp=100.0 + step=111 action=wait() hp=100.0 + step=112 action=move(direction='south') hp=100.0 + step=113 action=move(direction='north') hp=100.0 + step=114 action=wait() hp=100.0 + step=115 action=wait() hp=100.0 + step=116 action=move(direction='south') hp=100.0 + step=117 action=move(direction='north') hp=100.0 + step=118 action=wait() hp=100.0 + step=119 action=wait() hp=100.0 + step=120 action=wait() hp=100.0 + step=121 action=move(direction='south') hp=100.0 + step=122 action=move(direction='south') hp=100.0 + step=123 action=move(direction='east') hp=100.0 + step=124 action=move(direction='north') hp=100.0 + step=125 action=wait() hp=100.0 + step=126 action=wait() hp=100.0 + step=127 action=move(direction='west') hp=100.0 + step=128 action=move(direction='north') hp=100.0 + step=129 action=move(direction='east') hp=100.0 + step=130 action=move(direction='east') hp=100.0 + step=131 action=move(direction='west') hp=100.0 + step=132 action=move(direction='west') hp=100.0 + step=133 action=move(direction='south') hp=100.0 + step=134 action=move(direction='north') hp=100.0 + step=135 action=door(target_id='door_1', door_state='open') hp=100.0 + step=136 action=move(direction='east') hp=100.0 + step=137 action=move(direction='east') hp=100.0 + step=138 action=move(direction='west') hp=100.0 + step=139 action=wait() hp=100.0 + step=140 action=move(direction='west') hp=100.0 + step=141 action=move(direction='east') hp=100.0 + step=142 action=move(direction='west') hp=100.0 + step=143 action=wait() hp=100.0 + step=144 action=move(direction='south') hp=100.0 + step=145 action=move(direction='north') hp=100.0 + step=146 action=move(direction='east') hp=100.0 + step=147 action=move(direction='west') hp=100.0 + step=148 action=move(direction='south') hp=100.0 + step=149 action=move(direction='north') hp=100.0 + step=150 action=wait() hp=100.0 + step=151 action=move(direction='east') hp=100.0 + step=152 action=move(direction='west') hp=100.0 + step=153 action=wait() hp=100.0 + step=154 action=move(direction='south') hp=100.0 + step=155 action=move(direction='north') hp=100.0 + step=156 action=move(direction='south') hp=100.0 + step=157 action=door(target_id='door_1', door_state='open') hp=100.0 + step=158 action=move(direction='south') hp=100.0 + step=159 action=move(direction='north') hp=100.0 + step=160 action=move(direction='north') hp=100.0 + step=161 action=wait() hp=100.0 + step=162 action=door(target_id='door_1', door_state='open') hp=100.0 + step=163 action=wait() hp=100.0 + step=164 action=wait() hp=100.0 + step=165 action=wait() hp=100.0 + step=166 action=move(direction='east') hp=100.0 + step=167 action=move(direction='west') hp=100.0 + step=168 action=wait() hp=100.0 + step=169 action=wait() hp=100.0 + step=170 action=wait() hp=100.0 + step=171 action=wait() hp=100.0 + step=172 action=door(target_id='door_1', door_state='open') hp=100.0 + step=173 action=wait() hp=100.0 + step=174 action=wait() hp=100.0 + step=175 action=wait() hp=100.0 + step=176 action=move(direction='south') hp=100.0 + step=177 action=move(direction='north') hp=100.0 + step=178 action=wait() hp=100.0 + step=179 action=wait() hp=100.0 + step=180 action=wait() hp=100.0 + step=181 action=wait() hp=100.0 + step=182 action=wait() hp=100.0 + step=183 action=wait() hp=100.0 + step=184 action=wait() hp=100.0 + step=185 action=wait() hp=100.0 + step=186 action=wait() hp=100.0 + step=187 action=wait() hp=100.0 + step=188 action=wait() hp=100.0 + step=189 action=wait() hp=100.0 + step=190 action=move(direction='south') hp=100.0 + step=191 action=wait() hp=100.0 + step=192 action=wait() hp=100.0 + step=193 action=wait() hp=100.0 + step=194 action=move(direction='south') hp=100.0 + step=195 action=move(direction='north') hp=100.0 + step=196 action=move(direction='north') hp=100.0 + step=197 action=wait() hp=100.0 + step=198 action=wait() hp=100.0 + step=199 action=wait() hp=100.0 + step=200 action=wait() hp=100.0 +ep=0058 [easy ] steps=200 reward= -19.920 evac=0 hp=100.0 suc30=0.83 r30= +12.01 t=20s + step=001 action=wait() hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='south') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=wait() hp=100.0 + step=007 action=move(direction='north') hp=100.0 + step=008 action=move(direction='south') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='north') hp=100.0 + step=011 action=wait() hp=100.0 + step=012 action=move(direction='north') hp=100.0 + step=013 action=wait() hp=100.0 + step=014 action=move(direction='north') hp=100.0 + step=015 action=move(direction='north') hp=100.0 + step=016 action=move(direction='west') hp=100.0 +ep=0059 [easy ] steps=016 reward= +18.780 evac=1 hp=100.0 suc30=0.83 r30= +11.95 t=20s + step=001 action=move(direction='west') hp=100.0 + step=002 action=door(target_id='door_2', door_state='close') hp=100.0 + step=003 action=door(target_id='door_1', door_state='open') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=door(target_id='door_2', door_state='close') hp=100.0 + step=006 action=door(target_id='door_2', door_state='close') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=door(target_id='door_1', door_state='open') hp=100.0 + step=009 action=wait() hp=100.0 + step=010 action=move(direction='west') hp=100.0 + step=011 action=wait() hp=100.0 + step=012 action=move(direction='south') hp=100.0 + step=013 action=door(target_id='door_1', door_state='open') hp=100.0 + step=014 action=wait() hp=100.0 + step=015 action=wait() hp=100.0 + step=016 action=move(direction='north') hp=100.0 + step=017 action=move(direction='west') hp=100.0 +ep=0060 [easy ] steps=017 reward= +17.830 evac=1 hp=100.0 suc30=0.83 r30= +11.96 t=20s + >> PPO update samples=flushed pi_loss=-0.0194 v_loss=18.6526 entropy=1.2473 kl=0.0044 clip%=0.05 lr=2.19e-04 + step=001 action=move(direction='north') hp=100.0 + step=002 action=move(direction='north') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='west') hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=move(direction='north') hp=100.0 + step=013 action=move(direction='north') hp=100.0 + step=014 action=move(direction='north') hp=100.0 + step=015 action=move(direction='north') hp=100.0 + step=016 action=move(direction='north') hp=100.0 + step=017 action=move(direction='north') hp=100.0 + step=018 action=move(direction='north') hp=100.0 + step=019 action=move(direction='west') hp=100.0 + step=001 action=move(direction='north') hp=100.0 + step=002 action=move(direction='north') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=move(direction='north') hp=100.0 + ** EVAL [medium] reward=+16.887 success=1.00 steps=9.0 + step=001 action=move(direction='north') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=wait() hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='north') hp=100.0 + step=010 action=door(target_id='door_1', door_state='close') hp=100.0 + step=011 action=move(direction='north') hp=100.0 + step=012 action=move(direction='west') hp=100.0 + step=013 action=wait() hp=100.0 + step=014 action=door(target_id='door_1', door_state='open') hp=100.0 + step=015 action=door(target_id='door_1', door_state='close') hp=100.0 + step=016 action=move(direction='east') hp=100.0 + step=017 action=move(direction='west') hp=100.0 + step=018 action=move(direction='north') hp=100.0 + step=019 action=move(direction='north') hp=100.0 + step=020 action=move(direction='east') hp=100.0 + step=021 action=move(direction='west') hp=100.0 + step=022 action=wait() hp=100.0 + step=023 action=move(direction='east') hp=100.0 + step=024 action=wait() hp=100.0 + step=025 action=door(target_id='door_1', door_state='open') hp=100.0 + step=026 action=move(direction='west') hp=100.0 + step=027 action=door(target_id='door_1', door_state='open') hp=100.0 + step=028 action=move(direction='east') hp=100.0 + step=029 action=door(target_id='door_1', door_state='open') hp=100.0 + step=030 action=move(direction='west') hp=100.0 + step=031 action=move(direction='east') hp=100.0 + step=032 action=move(direction='west') hp=100.0 + step=033 action=wait() hp=100.0 + step=034 action=door(target_id='door_1', door_state='open') hp=100.0 + step=035 action=wait() hp=100.0 + step=036 action=move(direction='east') hp=100.0 + step=037 action=move(direction='west') hp=100.0 + step=038 action=door(target_id='door_1', door_state='open') hp=100.0 + step=039 action=move(direction='east') hp=100.0 + step=040 action=move(direction='west') hp=100.0 + step=041 action=move(direction='south') hp=100.0 + step=042 action=wait() hp=100.0 + step=043 action=move(direction='north') hp=100.0 + step=044 action=move(direction='east') hp=100.0 + step=045 action=move(direction='south') hp=100.0 + step=046 action=door(target_id='door_1', door_state='open') hp=100.0 + step=047 action=move(direction='west') hp=100.0 + step=048 action=door(target_id='door_1', door_state='close') hp=100.0 + step=049 action=door(target_id='door_1', door_state='close') hp=100.0 + step=050 action=wait() hp=100.0 + step=051 action=move(direction='north') hp=100.0 + step=052 action=wait() hp=100.0 + step=053 action=move(direction='south') hp=100.0 + step=054 action=door(target_id='door_1', door_state='close') hp=100.0 + step=055 action=wait() hp=100.0 + step=056 action=move(direction='east') hp=100.0 + step=057 action=wait() hp=100.0 + step=058 action=wait() hp=100.0 + step=059 action=move(direction='north') hp=100.0 + step=060 action=move(direction='west') hp=100.0 + step=061 action=move(direction='south') hp=100.0 + step=062 action=wait() hp=100.0 + step=063 action=move(direction='north') hp=100.0 + step=064 action=wait() hp=100.0 + step=065 action=wait() hp=100.0 + step=066 action=door(target_id='door_1', door_state='close') hp=100.0 + step=067 action=wait() hp=100.0 + step=068 action=door(target_id='door_1', door_state='close') hp=100.0 + step=069 action=wait() hp=100.0 + step=070 action=wait() hp=100.0 + step=071 action=move(direction='south') hp=100.0 + step=072 action=move(direction='north') hp=100.0 + step=073 action=wait() hp=100.0 + step=074 action=move(direction='south') hp=100.0 + step=075 action=move(direction='north') hp=100.0 + step=076 action=door(target_id='door_1', door_state='close') hp=100.0 + step=077 action=wait() hp=100.0 + step=078 action=move(direction='south') hp=100.0 + step=079 action=move(direction='south') hp=100.0 + step=080 action=move(direction='north') hp=100.0 + step=081 action=wait() hp=100.0 + step=082 action=wait() hp=100.0 + step=083 action=move(direction='east') hp=100.0 + step=084 action=move(direction='north') hp=100.0 + step=085 action=wait() hp=100.0 + step=086 action=move(direction='west') hp=100.0 + step=087 action=door(target_id='door_1', door_state='close') hp=100.0 + step=088 action=wait() hp=100.0 + step=089 action=move(direction='east') hp=100.0 + step=090 action=move(direction='south') hp=100.0 + step=091 action=move(direction='west') hp=100.0 + step=092 action=move(direction='north') hp=100.0 + step=093 action=door(target_id='door_1', door_state='close') hp=100.0 + step=094 action=move(direction='east') hp=100.0 + step=095 action=door(target_id='door_1', door_state='close') hp=100.0 + step=096 action=move(direction='south') hp=100.0 + step=097 action=move(direction='north') hp=100.0 + step=098 action=door(target_id='door_1', door_state='close') hp=100.0 + step=099 action=move(direction='west') hp=100.0 + step=100 action=move(direction='east') hp=100.0 + step=101 action=move(direction='west') hp=100.0 + step=102 action=move(direction='east') hp=100.0 + step=103 action=move(direction='east') hp=100.0 + step=104 action=move(direction='west') hp=100.0 + step=105 action=move(direction='east') hp=100.0 + step=106 action=move(direction='west') hp=100.0 + step=107 action=move(direction='west') hp=100.0 + step=108 action=move(direction='south') hp=100.0 + step=109 action=move(direction='east') hp=100.0 + step=110 action=wait() hp=100.0 + step=111 action=move(direction='north') hp=100.0 + step=112 action=wait() hp=100.0 + step=113 action=move(direction='west') hp=100.0 + step=114 action=door(target_id='door_1', door_state='close') hp=100.0 + step=115 action=move(direction='east') hp=100.0 + step=116 action=move(direction='west') hp=100.0 + step=117 action=wait() hp=100.0 + step=118 action=wait() hp=100.0 + step=119 action=wait() hp=100.0 + step=120 action=wait() hp=100.0 + step=121 action=move(direction='east') hp=100.0 + step=122 action=move(direction='west') hp=100.0 + step=123 action=move(direction='south') hp=100.0 + step=124 action=move(direction='north') hp=100.0 + step=125 action=move(direction='east') hp=100.0 + step=126 action=move(direction='south') hp=100.0 + step=127 action=move(direction='north') hp=100.0 + step=128 action=move(direction='west') hp=100.0 + step=129 action=wait() hp=100.0 + step=130 action=wait() hp=100.0 + step=131 action=wait() hp=100.0 + step=132 action=door(target_id='door_1', door_state='close') hp=100.0 + step=133 action=door(target_id='door_1', door_state='close') hp=100.0 + step=134 action=move(direction='south') hp=100.0 + step=135 action=move(direction='north') hp=100.0 + step=136 action=wait() hp=100.0 + step=137 action=wait() hp=100.0 + step=138 action=door(target_id='door_1', door_state='close') hp=100.0 + step=139 action=wait() hp=100.0 + step=140 action=wait() hp=100.0 + step=141 action=door(target_id='door_1', door_state='close') hp=100.0 + step=142 action=move(direction='south') hp=100.0 + step=143 action=door(target_id='door_1', door_state='close') hp=100.0 + step=144 action=move(direction='south') hp=100.0 + step=145 action=door(target_id='door_1', door_state='close') hp=100.0 + step=146 action=move(direction='north') hp=100.0 + step=147 action=door(target_id='door_1', door_state='open') hp=100.0 + step=148 action=move(direction='north') hp=100.0 + step=149 action=move(direction='south') hp=100.0 + step=150 action=move(direction='east') hp=100.0 + step=151 action=move(direction='east') hp=100.0 + step=152 action=move(direction='north') hp=100.0 + step=153 action=move(direction='west') hp=100.0 + step=154 action=door(target_id='door_1', door_state='open') hp=100.0 + step=155 action=move(direction='west') hp=100.0 + step=156 action=wait() hp=100.0 + step=157 action=move(direction='south') hp=100.0 + step=158 action=move(direction='north') hp=100.0 + step=159 action=move(direction='south') hp=100.0 + step=160 action=door(target_id='door_1', door_state='open') hp=100.0 + step=161 action=wait() hp=100.0 + step=162 action=wait() hp=100.0 + step=163 action=door(target_id='door_1', door_state='open') hp=100.0 + step=164 action=move(direction='north') hp=100.0 + step=165 action=door(target_id='door_1', door_state='open') hp=100.0 + step=166 action=move(direction='south') hp=100.0 + step=167 action=move(direction='north') hp=100.0 + step=168 action=wait() hp=100.0 + step=169 action=wait() hp=100.0 + step=170 action=move(direction='east') hp=100.0 + step=171 action=move(direction='south') hp=100.0 + step=172 action=move(direction='north') hp=100.0 + step=173 action=move(direction='east') hp=100.0 + step=174 action=move(direction='west') hp=100.0 + step=175 action=move(direction='west') hp=100.0 + step=176 action=door(target_id='door_1', door_state='open') hp=100.0 + step=177 action=wait() hp=100.0 + step=178 action=wait() hp=100.0 + step=179 action=wait() hp=100.0 + step=180 action=move(direction='south') hp=100.0 + step=181 action=move(direction='north') hp=100.0 + step=182 action=wait() hp=100.0 + step=183 action=wait() hp=100.0 + step=184 action=door(target_id='door_1', door_state='open') hp=100.0 + step=185 action=move(direction='east') hp=100.0 + step=186 action=wait() hp=100.0 + step=187 action=door(target_id='door_1', door_state='open') hp=100.0 + step=188 action=move(direction='west') hp=100.0 + step=189 action=wait() hp=100.0 + step=190 action=wait() hp=100.0 + step=191 action=wait() hp=100.0 + step=192 action=wait() hp=100.0 + step=193 action=move(direction='south') hp=100.0 + step=194 action=wait() hp=100.0 + step=195 action=move(direction='north') hp=100.0 + step=196 action=move(direction='east') hp=100.0 + step=197 action=wait() hp=100.0 + step=198 action=move(direction='west') hp=100.0 + step=199 action=wait() hp=100.0 + step=200 action=door(target_id='door_1', door_state='open') hp=100.0 +ep=0061 [easy ] steps=200 reward= -19.670 evac=0 hp=100.0 suc30=0.80 r30= +10.68 t=22s + step=001 action=move(direction='north') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=wait() hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=wait() hp=100.0 + step=006 action=move(direction='north') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=wait() hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=move(direction='east') hp=100.0 + step=013 action=move(direction='west') hp=100.0 + step=014 action=wait() hp=100.0 + step=015 action=wait() hp=100.0 + step=016 action=wait() hp=100.0 + step=017 action=wait() hp=100.0 + step=018 action=wait() hp=100.0 + step=019 action=move(direction='east') hp=100.0 + step=020 action=move(direction='west') hp=100.0 + step=021 action=wait() hp=100.0 + step=022 action=move(direction='east') hp=100.0 + step=023 action=move(direction='east') hp=100.0 + step=024 action=move(direction='south') hp=100.0 + step=025 action=move(direction='west') hp=100.0 + step=026 action=move(direction='west') hp=100.0 + step=027 action=move(direction='west') hp=100.0 + step=028 action=move(direction='west') hp=100.0 + step=029 action=wait() hp=100.0 + step=030 action=wait() hp=100.0 + step=031 action=move(direction='south') hp=100.0 + step=032 action=move(direction='west') hp=100.0 + step=033 action=wait() hp=100.0 + step=034 action=wait() hp=100.0 + step=035 action=move(direction='west') hp=100.0 + step=036 action=wait() hp=100.0 + step=037 action=move(direction='south') hp=100.0 + step=038 action=move(direction='north') hp=100.0 + step=039 action=move(direction='north') hp=100.0 + step=040 action=move(direction='north') hp=100.0 + step=041 action=move(direction='west') hp=100.0 + step=042 action=move(direction='west') hp=100.0 + step=043 action=wait() hp=100.0 + step=044 action=move(direction='west') hp=100.0 + step=045 action=move(direction='west') hp=100.0 + step=046 action=move(direction='west') hp=100.0 +ep=0062 [easy ] steps=046 reward= +19.330 evac=1 hp=100.0 suc30=0.80 r30= +10.60 t=22s + step=001 action=wait() hp=100.0 + step=002 action=door(target_id='door_3', door_state='open') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=door(target_id='door_3', door_state='open') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=wait() hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=door(target_id='door_3', door_state='open') hp=100.0 + step=010 action=wait() hp=100.0 + step=011 action=move(direction='north') hp=100.0 + step=012 action=move(direction='north') hp=100.0 + step=013 action=move(direction='north') hp=100.0 + step=014 action=move(direction='north') hp=100.0 + step=015 action=wait() hp=100.0 + step=016 action=wait() hp=100.0 + step=017 action=move(direction='south') hp=100.0 + step=018 action=move(direction='north') hp=100.0 + step=019 action=move(direction='north') hp=100.0 + step=020 action=move(direction='north') hp=100.0 + step=021 action=move(direction='north') hp=100.0 +ep=0063 [easy ] steps=021 reward= +17.550 evac=1 hp=100.0 suc30=0.80 r30= +10.57 t=22s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='north') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=wait() hp=100.0 + step=007 action=door(target_id='door_6', door_state='close') hp=100.0 + step=008 action=door(target_id='door_1', door_state='close') hp=100.0 + step=009 action=door(target_id='door_2', door_state='close') hp=100.0 + step=010 action=move(direction='west') hp=100.0 + step=011 action=door(target_id='door_1', door_state='close') hp=100.0 + step=012 action=move(direction='north') hp=100.0 + step=013 action=move(direction='west') hp=100.0 + step=014 action=move(direction='west') hp=100.0 + step=015 action=move(direction='west') hp=100.0 + step=016 action=wait() hp=100.0 + step=017 action=wait() hp=100.0 + step=018 action=wait() hp=100.0 + step=019 action=wait() hp=100.0 + step=020 action=move(direction='east') hp=100.0 + step=021 action=wait() hp=100.0 + step=022 action=move(direction='west') hp=100.0 + step=023 action=door(target_id='door_1', door_state='close') hp=100.0 + step=024 action=wait() hp=100.0 + step=025 action=door(target_id='door_1', door_state='open') hp=100.0 + step=026 action=door(target_id='door_1', door_state='close') hp=100.0 + step=027 action=move(direction='south') hp=100.0 + step=028 action=wait() hp=100.0 + step=029 action=move(direction='north') hp=100.0 + step=030 action=move(direction='east') hp=100.0 + step=031 action=move(direction='west') hp=100.0 + step=032 action=wait() hp=100.0 + step=033 action=wait() hp=100.0 + step=034 action=wait() hp=100.0 + step=035 action=move(direction='east') hp=100.0 + step=036 action=door(target_id='door_2', door_state='close') hp=100.0 + step=037 action=move(direction='south') hp=100.0 + step=038 action=move(direction='south') hp=100.0 + step=039 action=wait() hp=100.0 + step=040 action=move(direction='west') hp=100.0 + step=041 action=move(direction='north') hp=100.0 + step=042 action=move(direction='west') hp=100.0 +ep=0064 [easy ] steps=042 reward= +17.810 evac=1 hp=100.0 suc30=0.80 r30= +10.65 t=22s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=move(direction='north') hp=100.0 + step=007 action=move(direction='north') hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=move(direction='north') hp=100.0 + step=010 action=wait() hp=100.0 + step=011 action=wait() hp=100.0 + step=012 action=move(direction='west') hp=100.0 + step=013 action=move(direction='north') hp=100.0 + step=014 action=move(direction='east') hp=100.0 + step=015 action=move(direction='north') hp=100.0 + step=016 action=wait() hp=100.0 + step=017 action=move(direction='north') hp=100.0 + step=018 action=move(direction='north') hp=100.0 + step=019 action=move(direction='south') hp=100.0 + step=020 action=move(direction='west') hp=100.0 + step=021 action=wait() hp=100.0 + step=022 action=wait() hp=100.0 + step=023 action=move(direction='east') hp=100.0 + step=024 action=move(direction='north') hp=100.0 + step=025 action=move(direction='west') hp=100.0 + step=026 action=move(direction='north') hp=100.0 + step=027 action=wait() hp=100.0 + step=028 action=move(direction='west') hp=100.0 + step=029 action=wait() hp=100.0 + step=030 action=move(direction='west') hp=100.0 + step=031 action=move(direction='west') hp=100.0 + step=032 action=move(direction='west') hp=100.0 + step=033 action=wait() hp=100.0 + step=034 action=move(direction='west') hp=100.0 +ep=0065 [easy ] steps=034 reward= +21.030 evac=1 hp=100.0 suc30=0.80 r30= +10.75 t=22s + >> PPO update samples=flushed pi_loss=-0.0136 v_loss=10.9852 entropy=1.3308 kl=0.0037 clip%=0.04 lr=2.12e-04 + step=001 action=move(direction='west') hp=100.0 +ep=0066 [easy ] steps=001 reward= +16.760 evac=1 hp=100.0 suc30=0.80 r30= +10.70 t=22s + step=001 action=move(direction='north') hp=100.0 + step=002 action=move(direction='north') hp=100.0 + step=003 action=move(direction='east') hp=100.0 + step=004 action=door(target_id='door_2', door_state='close') hp=100.0 + step=005 action=door(target_id='door_5', door_state='close') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=wait() hp=100.0 + step=008 action=wait() hp=100.0 + step=009 action=door(target_id='door_1', door_state='close') hp=100.0 + step=010 action=move(direction='east') hp=100.0 + step=011 action=door(target_id='door_2', door_state='close') hp=100.0 + step=012 action=wait() hp=100.0 + step=013 action=move(direction='west') hp=100.0 + step=014 action=wait() hp=100.0 + step=015 action=door(target_id='door_1', door_state='open') hp=100.0 + step=016 action=move(direction='south') hp=100.0 + step=017 action=door(target_id='door_5', door_state='close') hp=100.0 + step=018 action=wait() hp=100.0 + step=019 action=move(direction='west') hp=100.0 +ep=0067 [easy ] steps=019 reward= +16.630 evac=1 hp=100.0 suc30=0.83 r30= +11.94 t=23s + step=001 action=wait() hp=100.0 + step=002 action=move(direction='north') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=wait() hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=wait() hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='north') hp=100.0 + step=010 action=move(direction='east') hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=move(direction='west') hp=100.0 + step=013 action=wait() hp=100.0 + step=014 action=move(direction='west') hp=100.0 + step=015 action=move(direction='west') hp=100.0 + step=016 action=move(direction='east') hp=100.0 + step=017 action=move(direction='east') hp=100.0 + step=018 action=move(direction='west') hp=100.0 + step=019 action=wait() hp=100.0 + step=020 action=move(direction='west') hp=100.0 + step=021 action=move(direction='west') hp=100.0 + step=022 action=move(direction='west') hp=100.0 +ep=0068 [easy ] steps=022 reward= +19.160 evac=1 hp=100.0 suc30=0.83 r30= +12.01 t=23s + step=001 action=door(target_id='door_2', door_state='close') hp=100.0 + step=002 action=door(target_id='door_2', door_state='close') hp=100.0 + step=003 action=door(target_id='door_2', door_state='close') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='east') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='east') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=door(target_id='door_1', door_state='open') hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=wait() hp=100.0 + step=013 action=move(direction='west') hp=100.0 + step=014 action=move(direction='east') hp=100.0 + step=015 action=move(direction='east') hp=100.0 + step=016 action=move(direction='west') hp=100.0 + step=017 action=wait() hp=100.0 + step=018 action=move(direction='west') hp=100.0 + step=019 action=move(direction='west') hp=100.0 +ep=0069 [easy ] steps=019 reward= +17.580 evac=1 hp=100.0 suc30=0.83 r30= +12.06 t=23s + step=001 action=door(target_id='door_4', door_state='close') hp=100.0 + step=002 action=wait() hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=move(direction='south') hp=100.0 + step=007 action=wait() hp=100.0 + step=008 action=door(target_id='door_8', door_state='close') hp=100.0 + step=009 action=move(direction='north') hp=100.0 + step=010 action=wait() hp=100.0 + step=011 action=door(target_id='door_4', door_state='close') hp=100.0 + step=012 action=door(target_id='door_4', door_state='open') hp=100.0 + step=013 action=wait() hp=100.0 + step=014 action=wait() hp=100.0 + step=015 action=door(target_id='door_4', door_state='close') hp=100.0 + step=016 action=wait() hp=100.0 + step=017 action=door(target_id='door_4', door_state='open') hp=100.0 + step=018 action=wait() hp=100.0 + step=019 action=move(direction='north') hp=100.0 + step=020 action=wait() hp=100.0 + step=021 action=door(target_id='door_4', door_state='close') hp=100.0 + step=022 action=move(direction='west') hp=100.0 + step=023 action=move(direction='east') hp=100.0 + step=024 action=door(target_id='door_4', door_state='open') hp=100.0 + step=025 action=move(direction='west') hp=100.0 + step=026 action=move(direction='north') hp=100.0 + step=027 action=move(direction='north') hp=100.0 + step=028 action=door(target_id='door_4', door_state='close') hp=100.0 + step=029 action=door(target_id='door_4', door_state='close') hp=100.0 + step=030 action=door(target_id='door_4', door_state='close') hp=100.0 + step=031 action=door(target_id='door_4', door_state='close') hp=100.0 + step=032 action=move(direction='east') hp=100.0 + step=033 action=move(direction='west') hp=100.0 + step=034 action=door(target_id='door_4', door_state='close') hp=100.0 + step=035 action=wait() hp=100.0 + step=036 action=move(direction='south') hp=100.0 + step=037 action=move(direction='east') hp=100.0 + step=038 action=door(target_id='door_4', door_state='close') hp=100.0 + step=039 action=wait() hp=100.0 + step=040 action=move(direction='south') hp=100.0 + step=041 action=wait() hp=100.0 + step=042 action=door(target_id='door_4', door_state='open') hp=100.0 + step=043 action=move(direction='west') hp=100.0 + step=044 action=move(direction='north') hp=100.0 + step=045 action=move(direction='east') hp=100.0 + step=046 action=move(direction='west') hp=100.0 + step=047 action=move(direction='north') hp=100.0 + step=048 action=move(direction='south') hp=100.0 + step=049 action=door(target_id='door_4', door_state='close') hp=100.0 + step=050 action=door(target_id='door_4', door_state='close') hp=100.0 + step=051 action=wait() hp=100.0 + step=052 action=move(direction='north') hp=100.0 + step=053 action=door(target_id='door_4', door_state='close') hp=100.0 + step=054 action=door(target_id='door_4', door_state='close') hp=100.0 + step=055 action=door(target_id='door_4', door_state='close') hp=100.0 + step=056 action=door(target_id='door_4', door_state='close') hp=100.0 + step=057 action=wait() hp=100.0 + step=058 action=wait() hp=100.0 + step=059 action=door(target_id='door_4', door_state='close') hp=100.0 + step=060 action=move(direction='east') hp=100.0 + step=061 action=move(direction='west') hp=100.0 + step=062 action=move(direction='south') hp=100.0 + step=063 action=move(direction='south') hp=100.0 + step=064 action=move(direction='east') hp=100.0 + step=065 action=move(direction='north') hp=100.0 + step=066 action=wait() hp=100.0 + step=067 action=move(direction='west') hp=100.0 + step=068 action=door(target_id='door_4', door_state='close') hp=100.0 + step=069 action=door(target_id='door_4', door_state='close') hp=100.0 + step=070 action=door(target_id='door_4', door_state='close') hp=100.0 + step=071 action=door(target_id='door_4', door_state='close') hp=100.0 + step=072 action=move(direction='north') hp=100.0 + step=073 action=door(target_id='door_4', door_state='close') hp=100.0 + step=074 action=wait() hp=100.0 + step=075 action=move(direction='south') hp=100.0 + step=076 action=move(direction='north') hp=100.0 + step=077 action=wait() hp=100.0 + step=078 action=wait() hp=100.0 + step=079 action=door(target_id='door_4', door_state='close') hp=100.0 + step=080 action=door(target_id='door_4', door_state='close') hp=100.0 + step=081 action=wait() hp=100.0 + step=082 action=wait() hp=100.0 + step=083 action=door(target_id='door_4', door_state='close') hp=100.0 + step=084 action=door(target_id='door_4', door_state='close') hp=100.0 + step=085 action=door(target_id='door_4', door_state='close') hp=100.0 + step=086 action=door(target_id='door_4', door_state='close') hp=100.0 + step=087 action=door(target_id='door_4', door_state='close') hp=100.0 + step=088 action=wait() hp=100.0 + step=089 action=wait() hp=100.0 + step=090 action=wait() hp=100.0 + step=091 action=door(target_id='door_4', door_state='close') hp=100.0 + step=092 action=wait() hp=100.0 + step=093 action=wait() hp=100.0 + step=094 action=wait() hp=100.0 + step=095 action=door(target_id='door_4', door_state='close') hp=100.0 + step=096 action=wait() hp=100.0 + step=097 action=wait() hp=100.0 + step=098 action=door(target_id='door_4', door_state='close') hp=100.0 + step=099 action=wait() hp=100.0 + step=100 action=wait() hp=100.0 + step=101 action=wait() hp=100.0 + step=102 action=move(direction='south') hp=100.0 + step=103 action=wait() hp=100.0 + step=104 action=move(direction='north') hp=100.0 + step=105 action=door(target_id='door_4', door_state='close') hp=100.0 + step=106 action=move(direction='east') hp=100.0 + step=107 action=move(direction='west') hp=100.0 + step=108 action=wait() hp=100.0 + step=109 action=wait() hp=100.0 + step=110 action=move(direction='south') hp=100.0 + step=111 action=move(direction='north') hp=100.0 + step=112 action=door(target_id='door_4', door_state='close') hp=100.0 + step=113 action=door(target_id='door_4', door_state='close') hp=100.0 + step=114 action=door(target_id='door_4', door_state='close') hp=100.0 + step=115 action=wait() hp=100.0 + step=116 action=wait() hp=100.0 + step=117 action=wait() hp=100.0 + step=118 action=move(direction='south') hp=100.0 + step=119 action=wait() hp=100.0 + step=120 action=door(target_id='door_4', door_state='close') hp=100.0 + step=121 action=move(direction='south') hp=100.0 + step=122 action=wait() hp=100.0 + step=123 action=door(target_id='door_4', door_state='close') hp=100.0 + step=124 action=move(direction='north') hp=100.0 + step=125 action=move(direction='north') hp=100.0 + step=126 action=move(direction='east') hp=100.0 + step=127 action=move(direction='west') hp=100.0 + step=128 action=move(direction='east') hp=100.0 + step=129 action=wait() hp=100.0 + step=130 action=door(target_id='door_4', door_state='open') hp=100.0 + step=131 action=wait() hp=100.0 + step=132 action=move(direction='west') hp=100.0 + step=133 action=move(direction='south') hp=100.0 + step=134 action=move(direction='east') hp=100.0 + step=135 action=move(direction='west') hp=100.0 + step=136 action=move(direction='north') hp=100.0 + step=137 action=wait() hp=100.0 + step=138 action=move(direction='south') hp=100.0 + step=139 action=door(target_id='door_4', door_state='open') hp=100.0 + step=140 action=move(direction='north') hp=100.0 + step=141 action=move(direction='east') hp=100.0 + step=142 action=door(target_id='door_4', door_state='open') hp=100.0 + step=143 action=move(direction='south') hp=100.0 + step=144 action=move(direction='west') hp=100.0 + step=145 action=door(target_id='door_4', door_state='open') hp=100.0 + step=146 action=move(direction='north') hp=100.0 + step=147 action=move(direction='south') hp=100.0 + step=148 action=move(direction='east') hp=100.0 + step=149 action=wait() hp=100.0 + step=150 action=move(direction='west') hp=100.0 + step=151 action=move(direction='east') hp=100.0 + step=152 action=move(direction='west') hp=100.0 + step=153 action=door(target_id='door_4', door_state='open') hp=100.0 + step=154 action=wait() hp=100.0 + step=155 action=move(direction='east') hp=100.0 + step=156 action=move(direction='west') hp=100.0 + step=157 action=move(direction='east') hp=100.0 + step=158 action=move(direction='west') hp=100.0 + step=159 action=move(direction='north') hp=100.0 + step=160 action=door(target_id='door_4', door_state='open') hp=100.0 + step=161 action=door(target_id='door_4', door_state='open') hp=100.0 + step=162 action=door(target_id='door_4', door_state='open') hp=100.0 + step=163 action=door(target_id='door_4', door_state='open') hp=100.0 + step=164 action=move(direction='east') hp=100.0 + step=165 action=door(target_id='door_4', door_state='open') hp=100.0 + step=166 action=wait() hp=100.0 + step=167 action=door(target_id='door_4', door_state='open') hp=100.0 + step=168 action=wait() hp=100.0 + step=169 action=wait() hp=100.0 + step=170 action=move(direction='west') hp=100.0 + step=171 action=move(direction='east') hp=100.0 + step=172 action=door(target_id='door_4', door_state='open') hp=100.0 + step=173 action=move(direction='west') hp=100.0 + step=174 action=door(target_id='door_4', door_state='open') hp=100.0 + step=175 action=move(direction='south') hp=100.0 + step=176 action=move(direction='north') hp=100.0 + step=177 action=move(direction='east') hp=100.0 + step=178 action=door(target_id='door_4', door_state='open') hp=100.0 + step=179 action=move(direction='west') hp=100.0 + step=180 action=door(target_id='door_4', door_state='open') hp=100.0 + step=181 action=door(target_id='door_4', door_state='open') hp=100.0 + step=182 action=wait() hp=100.0 + step=183 action=wait() hp=100.0 + step=184 action=move(direction='south') hp=100.0 + step=185 action=move(direction='north') hp=100.0 + step=186 action=move(direction='south') hp=100.0 + step=187 action=wait() hp=100.0 + step=188 action=door(target_id='door_4', door_state='open') hp=100.0 + step=189 action=door(target_id='door_4', door_state='open') hp=100.0 + step=190 action=move(direction='north') hp=100.0 + step=191 action=wait() hp=100.0 + step=192 action=door(target_id='door_4', door_state='open') hp=100.0 + step=193 action=move(direction='south') hp=100.0 + step=194 action=move(direction='north') hp=100.0 + step=195 action=door(target_id='door_4', door_state='open') hp=100.0 + step=196 action=wait() hp=100.0 + step=197 action=door(target_id='door_4', door_state='open') hp=100.0 + step=198 action=move(direction='east') hp=100.0 + step=199 action=wait() hp=100.0 + step=200 action=wait() hp=100.0 +ep=0070 [easy ] steps=200 reward= -21.270 evac=0 hp=100.0 suc30=0.83 r30= +11.98 t=24s + >> PPO update samples=flushed pi_loss=+0.2003 v_loss=11.4670 entropy=1.3652 kl=0.0050 clip%=0.05 lr=2.05e-04 + step=001 action=move(direction='west') hp=100.0 + step=002 action=wait() hp=100.0 + step=003 action=move(direction='south') hp=100.0 + step=004 action=wait() hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=wait() hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=wait() hp=100.0 + step=010 action=wait() hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=wait() hp=100.0 + step=013 action=move(direction='west') hp=100.0 + step=014 action=move(direction='west') hp=100.0 + step=015 action=move(direction='west') hp=100.0 + step=016 action=wait() hp=100.0 + step=017 action=move(direction='west') hp=100.0 + step=018 action=move(direction='west') hp=100.0 + step=019 action=move(direction='west') hp=100.0 + step=020 action=move(direction='west') hp=100.0 + step=021 action=move(direction='west') hp=100.0 +ep=0071 [easy ] steps=021 reward= +19.360 evac=1 hp=100.0 suc30=0.83 r30= +11.91 t=24s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=door(target_id='door_2', door_state='close') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=wait() hp=100.0 + step=006 action=door(target_id='door_2', door_state='close') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=door(target_id='door_1', door_state='close') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=door(target_id='door_1', door_state='close') hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=move(direction='west') hp=100.0 + step=013 action=move(direction='west') hp=100.0 +ep=0072 [easy ] steps=013 reward= +18.590 evac=1 hp=100.0 suc30=0.83 r30= +11.92 t=24s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='north') hp=100.0 + step=003 action=door(target_id='door_2', door_state='close') hp=100.0 + step=004 action=door(target_id='door_2', door_state='close') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=door(target_id='door_2', door_state='close') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='east') hp=100.0 + step=010 action=wait() hp=100.0 + step=011 action=wait() hp=100.0 + step=012 action=door(target_id='door_1', door_state='close') hp=100.0 + step=013 action=door(target_id='door_5', door_state='close') hp=100.0 + step=014 action=move(direction='west') hp=100.0 + step=015 action=move(direction='south') hp=100.0 + step=016 action=move(direction='east') hp=100.0 + step=017 action=move(direction='west') hp=100.0 + step=018 action=move(direction='west') hp=100.0 +ep=0073 [easy ] steps=018 reward= +18.250 evac=1 hp=100.0 suc30=0.83 r30= +12.08 t=24s + step=001 action=move(direction='north') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='east') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='north') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=wait() hp=100.0 + step=010 action=move(direction='north') hp=100.0 + step=011 action=wait() hp=100.0 + step=012 action=move(direction='south') hp=100.0 + step=013 action=move(direction='north') hp=100.0 + step=014 action=move(direction='south') hp=100.0 + step=015 action=wait() hp=100.0 + step=016 action=move(direction='north') hp=100.0 + step=017 action=move(direction='north') hp=100.0 + step=018 action=wait() hp=100.0 + step=019 action=move(direction='north') hp=100.0 + step=020 action=move(direction='south') hp=100.0 + step=021 action=move(direction='east') hp=100.0 + step=022 action=move(direction='north') hp=100.0 + step=023 action=move(direction='west') hp=100.0 + step=024 action=move(direction='north') hp=100.0 + step=025 action=move(direction='west') hp=100.0 +ep=0074 [easy ] steps=025 reward= +19.190 evac=1 hp=100.0 suc30=0.83 r30= +11.96 t=24s + step=001 action=move(direction='west') hp=100.0 +ep=0075 [easy ] steps=001 reward= +16.760 evac=1 hp=100.0 suc30=0.83 r30= +11.90 t=24s + >> PPO update samples=flushed pi_loss=+0.0008 v_loss=12.9646 entropy=1.2285 kl=0.0003 clip%=0.00 lr=1.99e-04 + [ckpt] saved -> artifacts/pyre_ppo_http_ckpt.pt + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='north') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='east') hp=100.0 + step=005 action=door(target_id='door_5', door_state='close') hp=100.0 + step=006 action=wait() hp=100.0 + step=007 action=door(target_id='door_5', door_state='close') hp=100.0 + step=008 action=door(target_id='door_1', door_state='open') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='south') hp=100.0 + step=011 action=wait() hp=100.0 + step=012 action=move(direction='west') hp=100.0 +ep=0076 [easy ] steps=012 reward= +17.640 evac=1 hp=100.0 suc30=0.87 r30= +13.09 t=24s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='north') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=move(direction='north') hp=100.0 + step=007 action=move(direction='north') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='north') hp=100.0 + step=010 action=move(direction='west') hp=100.0 + step=011 action=move(direction='north') hp=100.0 + step=012 action=wait() hp=100.0 + step=013 action=move(direction='west') hp=100.0 + step=014 action=move(direction='west') hp=100.0 + step=015 action=move(direction='south') hp=100.0 + step=016 action=move(direction='north') hp=100.0 + step=017 action=move(direction='north') hp=100.0 + step=018 action=move(direction='west') hp=100.0 + step=019 action=wait() hp=100.0 + step=020 action=move(direction='west') hp=100.0 + step=021 action=move(direction='south') hp=100.0 + step=022 action=move(direction='west') hp=100.0 + step=023 action=move(direction='north') hp=100.0 + step=024 action=move(direction='north') hp=100.0 + step=025 action=wait() hp=100.0 + step=026 action=move(direction='north') hp=100.0 + step=027 action=wait() hp=100.0 + step=028 action=move(direction='north') hp=100.0 + step=029 action=move(direction='north') hp=100.0 + step=030 action=move(direction='east') hp=100.0 + step=031 action=move(direction='east') hp=100.0 + step=032 action=move(direction='west') hp=100.0 + step=033 action=move(direction='west') hp=100.0 + step=034 action=move(direction='east') hp=100.0 + step=035 action=move(direction='west') hp=100.0 + step=036 action=move(direction='west') hp=100.0 + step=037 action=wait() hp=100.0 + step=038 action=move(direction='west') hp=100.0 + step=039 action=wait() hp=100.0 + step=040 action=move(direction='west') hp=100.0 + step=041 action=wait() hp=100.0 + step=042 action=move(direction='west') hp=100.0 + step=043 action=move(direction='west') hp=100.0 +ep=0077 [easy ] steps=043 reward= +22.680 evac=1 hp=100.0 suc30=0.87 r30= +13.22 t=25s + step=001 action=wait() hp=100.0 + step=002 action=move(direction='west') hp=100.0 +ep=0078 [easy ] steps=002 reward= +16.650 evac=1 hp=100.0 suc30=0.87 r30= +13.17 t=25s + step=001 action=door(target_id='door_2', door_state='close') hp=100.0 + step=002 action=wait() hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='north') hp=100.0 + step=007 action=door(target_id='door_5', door_state='close') hp=100.0 + step=008 action=wait() hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='north') hp=100.0 + step=011 action=move(direction='west') hp=100.0 +ep=0079 [easy ] steps=011 reward= +18.640 evac=1 hp=100.0 suc30=0.90 r30= +14.47 t=25s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=move(direction='east') hp=100.0 + step=007 action=move(direction='north') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='west') hp=100.0 + step=011 action=move(direction='north') hp=100.0 + step=012 action=move(direction='north') hp=100.0 + step=013 action=move(direction='north') hp=100.0 + step=014 action=move(direction='west') hp=100.0 + step=015 action=move(direction='west') hp=100.0 + step=016 action=move(direction='west') hp=100.0 + step=017 action=move(direction='west') hp=100.0 + step=018 action=move(direction='south') hp=100.0 + step=019 action=move(direction='east') hp=100.0 + step=020 action=move(direction='west') hp=100.0 + step=021 action=move(direction='north') hp=100.0 + step=022 action=move(direction='north') hp=100.0 + step=023 action=move(direction='west') hp=100.0 +ep=0080 [easy ] steps=023 reward= +21.870 evac=1 hp=100.0 suc30=0.90 r30= +14.53 t=25s + >> PPO update samples=flushed pi_loss=-0.0006 v_loss=7.6417 entropy=1.2525 kl=0.0003 clip%=0.00 lr=1.92e-04 + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='west') hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=move(direction='west') hp=100.0 + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='north') hp=100.0 + step=007 action=move(direction='north') hp= 88.0 + step=008 action=move(direction='north') hp= 83.0 + step=009 action=move(direction='north') hp= 71.0 + step=010 action=move(direction='north') hp= 71.0 + step=011 action=move(direction='north') hp= 71.0 + step=012 action=move(direction='west') hp= 71.0 + ** EVAL [medium] reward=+15.162 success=1.00 steps=10.3 + step=001 action=wait() hp=100.0 + step=002 action=move(direction='north') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=move(direction='north') hp=100.0 +ep=0081 [easy ] steps=005 reward= +17.940 evac=1 hp=100.0 suc30=0.90 r30= +14.53 t=25s + step=001 action=move(direction='north') hp=100.0 + step=002 action=move(direction='west') hp=100.0 +ep=0082 [easy ] steps=002 reward= +17.230 evac=1 hp=100.0 suc30=0.90 r30= +14.49 t=25s + step=001 action=move(direction='south') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=wait() hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=wait() hp=100.0 + step=007 action=wait() hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=wait() hp=100.0 + step=011 action=move(direction='east') hp=100.0 + step=012 action=move(direction='west') hp=100.0 + step=013 action=wait() hp=100.0 + step=014 action=move(direction='west') hp=100.0 + step=015 action=move(direction='west') hp=100.0 + step=016 action=move(direction='west') hp=100.0 + step=017 action=wait() hp=100.0 + step=018 action=move(direction='west') hp=100.0 + step=019 action=move(direction='west') hp=100.0 + step=020 action=move(direction='east') hp=100.0 + step=021 action=wait() hp=100.0 + step=022 action=move(direction='west') hp=100.0 + step=023 action=move(direction='west') hp=100.0 + step=024 action=move(direction='east') hp=100.0 + step=025 action=move(direction='west') hp=100.0 + step=026 action=wait() hp=100.0 + step=027 action=move(direction='east') hp=100.0 + step=028 action=move(direction='west') hp=100.0 + step=029 action=move(direction='west') hp=100.0 + step=030 action=wait() hp=100.0 + step=031 action=move(direction='west') hp=100.0 + step=032 action=wait() hp=100.0 + step=033 action=move(direction='west') hp=100.0 + step=034 action=move(direction='south') hp=100.0 + step=035 action=move(direction='north') hp=100.0 + step=036 action=move(direction='west') hp=100.0 +ep=0083 [easy ] steps=036 reward= +18.860 evac=1 hp=100.0 suc30=0.90 r30= +14.54 t=25s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=door(target_id='door_3', door_state='close') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='north') hp=100.0 + step=007 action=move(direction='north') hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=move(direction='north') hp=100.0 + step=010 action=move(direction='north') hp=100.0 + step=011 action=move(direction='north') hp=100.0 + step=012 action=wait() hp=100.0 + step=013 action=move(direction='north') hp=100.0 +ep=0084 [easy ] steps=013 reward= +18.260 evac=1 hp=100.0 suc30=0.90 r30= +14.57 t=25s + step=001 action=door(target_id='door_7', door_state='open') hp=100.0 + step=002 action=wait() hp=100.0 + step=003 action=door(target_id='door_7', door_state='open') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=wait() hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='east') hp=100.0 + step=008 action=door(target_id='door_2', door_state='close') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='west') hp=100.0 + step=011 action=door(target_id='door_6', door_state='close') hp=100.0 + step=012 action=move(direction='west') hp=100.0 + step=013 action=door(target_id='door_6', door_state='close') hp=100.0 + step=014 action=move(direction='east') hp=100.0 + step=015 action=move(direction='north') hp=100.0 + step=016 action=move(direction='west') hp=100.0 + step=017 action=move(direction='west') hp=100.0 + step=018 action=move(direction='north') hp=100.0 + step=019 action=wait() hp=100.0 + step=020 action=move(direction='west') hp=100.0 + step=021 action=move(direction='north') hp=100.0 + step=022 action=move(direction='west') hp=100.0 +ep=0085 [easy ] steps=022 reward= +18.320 evac=1 hp=100.0 suc30=0.90 r30= +14.57 t=25s + >> PPO update samples=flushed pi_loss=-0.0081 v_loss=3.2728 entropy=1.1286 kl=0.0006 clip%=0.00 lr=1.85e-04 + step=001 action=move(direction='north') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='north') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=wait() hp=100.0 + step=009 action=move(direction='north') hp=100.0 + step=010 action=move(direction='north') hp=100.0 + step=011 action=move(direction='north') hp=100.0 + step=012 action=move(direction='west') hp=100.0 +ep=0086 [easy ] steps=012 reward= +19.910 evac=1 hp=100.0 suc30=0.90 r30= +14.64 t=26s + step=001 action=move(direction='south') hp=100.0 + step=002 action=wait() hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=wait() hp=100.0 + step=006 action=wait() hp=100.0 + step=007 action=wait() hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='east') hp=100.0 + step=011 action=wait() hp=100.0 + step=012 action=door(target_id='door_3', door_state='open') hp=100.0 + step=013 action=door(target_id='door_2', door_state='close') hp=100.0 + step=014 action=door(target_id='door_2', door_state='close') hp=100.0 + step=015 action=move(direction='north') hp=100.0 + step=016 action=move(direction='north') hp=100.0 + step=017 action=move(direction='north') hp=100.0 + step=018 action=move(direction='north') hp=100.0 + step=019 action=move(direction='north') hp=100.0 + step=020 action=wait() hp=100.0 + step=021 action=move(direction='north') hp=100.0 + step=022 action=move(direction='south') hp=100.0 + step=023 action=move(direction='north') hp=100.0 + step=024 action=wait() hp=100.0 + step=025 action=wait() hp=100.0 + step=026 action=wait() hp=100.0 + step=027 action=move(direction='north') hp=100.0 +ep=0087 [easy ] steps=027 reward= +17.510 evac=1 hp=100.0 suc30=0.90 r30= +14.64 t=26s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='east') hp=100.0 + step=003 action=wait() hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=wait() hp=100.0 + step=006 action=wait() hp=100.0 + step=007 action=wait() hp=100.0 + step=008 action=move(direction='east') hp=100.0 + step=009 action=move(direction='east') hp=100.0 + step=010 action=door(target_id='door_1', door_state='open') hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=move(direction='north') hp=100.0 + step=013 action=move(direction='north') hp=100.0 + step=014 action=wait() hp=100.0 + step=015 action=move(direction='west') hp=100.0 + step=016 action=door(target_id='door_1', door_state='close') hp=100.0 + step=017 action=door(target_id='door_1', door_state='open') hp=100.0 + step=018 action=wait() hp=100.0 + step=019 action=wait() hp=100.0 + step=020 action=wait() hp=100.0 + step=021 action=door(target_id='door_1', door_state='close') hp=100.0 + step=022 action=move(direction='east') hp=100.0 + step=023 action=move(direction='north') hp=100.0 + step=024 action=door(target_id='door_1', door_state='open') hp=100.0 + step=025 action=move(direction='west') hp=100.0 + step=026 action=move(direction='north') hp=100.0 + step=027 action=door(target_id='door_1', door_state='close') hp=100.0 + step=028 action=wait() hp=100.0 + step=029 action=wait() hp=100.0 + step=030 action=wait() hp=100.0 + step=031 action=wait() hp=100.0 + step=032 action=move(direction='south') hp=100.0 + step=033 action=door(target_id='door_1', door_state='close') hp=100.0 + step=034 action=move(direction='north') hp=100.0 + step=035 action=wait() hp=100.0 + step=036 action=move(direction='east') hp=100.0 + step=037 action=move(direction='south') hp=100.0 + step=038 action=move(direction='north') hp=100.0 + step=039 action=move(direction='west') hp=100.0 + step=040 action=wait() hp=100.0 + step=041 action=move(direction='south') hp=100.0 + step=042 action=move(direction='north') hp=100.0 + step=043 action=wait() hp=100.0 + step=044 action=door(target_id='door_1', door_state='close') hp=100.0 + step=045 action=move(direction='east') hp=100.0 + step=046 action=wait() hp=100.0 + step=047 action=wait() hp=100.0 + step=048 action=wait() hp=100.0 + step=049 action=door(target_id='door_1', door_state='close') hp=100.0 + step=050 action=move(direction='west') hp=100.0 + step=051 action=move(direction='south') hp=100.0 + step=052 action=door(target_id='door_1', door_state='close') hp=100.0 + step=053 action=move(direction='east') hp=100.0 + step=054 action=move(direction='west') hp=100.0 + step=055 action=move(direction='east') hp=100.0 + step=056 action=move(direction='north') hp=100.0 + step=057 action=move(direction='west') hp=100.0 + step=058 action=move(direction='south') hp=100.0 + step=059 action=move(direction='north') hp=100.0 + step=060 action=wait() hp=100.0 + step=061 action=move(direction='south') hp=100.0 + step=062 action=move(direction='north') hp=100.0 + step=063 action=wait() hp=100.0 + step=064 action=door(target_id='door_1', door_state='close') hp=100.0 + step=065 action=door(target_id='door_1', door_state='close') hp=100.0 + step=066 action=move(direction='east') hp=100.0 + step=067 action=door(target_id='door_1', door_state='close') hp=100.0 + step=068 action=move(direction='south') hp=100.0 + step=069 action=move(direction='east') hp=100.0 + step=070 action=move(direction='north') hp=100.0 + step=071 action=move(direction='south') hp=100.0 + step=072 action=move(direction='south') hp=100.0 + step=073 action=door(target_id='door_1', door_state='close') hp=100.0 + step=074 action=wait() hp=100.0 + step=075 action=move(direction='north') hp=100.0 + step=076 action=move(direction='west') hp=100.0 + step=077 action=move(direction='east') hp=100.0 + step=078 action=move(direction='west') hp=100.0 + step=079 action=move(direction='west') hp=100.0 + step=080 action=door(target_id='door_1', door_state='open') hp=100.0 + step=081 action=move(direction='north') hp=100.0 + step=082 action=move(direction='south') hp=100.0 + step=083 action=door(target_id='door_1', door_state='open') hp=100.0 + step=084 action=move(direction='north') hp=100.0 + step=085 action=wait() hp=100.0 + step=086 action=move(direction='east') hp=100.0 + step=087 action=wait() hp=100.0 + step=088 action=wait() hp=100.0 + step=089 action=move(direction='east') hp=100.0 + step=090 action=door(target_id='door_1', door_state='open') hp=100.0 + step=091 action=move(direction='west') hp=100.0 + step=092 action=move(direction='east') hp=100.0 + step=093 action=move(direction='west') hp=100.0 + step=094 action=move(direction='west') hp=100.0 + step=095 action=door(target_id='door_1', door_state='open') hp=100.0 + step=096 action=move(direction='south') hp=100.0 + step=097 action=wait() hp=100.0 + step=098 action=wait() hp=100.0 + step=099 action=wait() hp=100.0 + step=100 action=move(direction='south') hp=100.0 + step=101 action=move(direction='north') hp=100.0 + step=102 action=move(direction='east') hp=100.0 + step=103 action=door(target_id='door_1', door_state='open') hp=100.0 + step=104 action=move(direction='west') hp=100.0 + step=105 action=move(direction='north') hp=100.0 + step=106 action=wait() hp=100.0 + step=107 action=door(target_id='door_1', door_state='close') hp=100.0 + step=108 action=wait() hp=100.0 + step=109 action=move(direction='south') hp=100.0 + step=110 action=door(target_id='door_1', door_state='close') hp=100.0 + step=111 action=door(target_id='door_1', door_state='close') hp=100.0 + step=112 action=move(direction='south') hp=100.0 + step=113 action=move(direction='east') hp=100.0 + step=114 action=door(target_id='door_1', door_state='close') hp=100.0 + step=115 action=move(direction='west') hp=100.0 + step=116 action=wait() hp=100.0 + step=117 action=wait() hp=100.0 + step=118 action=move(direction='north') hp=100.0 + step=119 action=wait() hp=100.0 + step=120 action=wait() hp=100.0 + step=121 action=move(direction='north') hp=100.0 + step=122 action=move(direction='east') hp=100.0 + step=123 action=move(direction='west') hp=100.0 + step=124 action=move(direction='south') hp=100.0 + step=125 action=door(target_id='door_1', door_state='open') hp=100.0 + step=126 action=move(direction='south') hp=100.0 + step=127 action=move(direction='east') hp=100.0 + step=128 action=wait() hp=100.0 + step=129 action=move(direction='east') hp=100.0 + step=130 action=move(direction='west') hp=100.0 + step=131 action=move(direction='north') hp=100.0 + step=132 action=wait() hp=100.0 + step=133 action=move(direction='north') hp=100.0 + step=134 action=wait() hp=100.0 + step=135 action=move(direction='south') hp=100.0 + step=136 action=move(direction='west') hp=100.0 + step=137 action=move(direction='north') hp=100.0 + step=138 action=move(direction='south') hp=100.0 + step=139 action=move(direction='south') hp=100.0 + step=140 action=wait() hp=100.0 + step=141 action=move(direction='east') hp=100.0 + step=142 action=move(direction='north') hp=100.0 + step=143 action=move(direction='west') hp=100.0 + step=144 action=move(direction='north') hp=100.0 + step=145 action=door(target_id='door_1', door_state='open') hp=100.0 + step=146 action=move(direction='east') hp=100.0 + step=147 action=move(direction='west') hp=100.0 + step=148 action=wait() hp=100.0 + step=149 action=move(direction='east') hp=100.0 + step=150 action=move(direction='south') hp=100.0 + step=151 action=move(direction='east') hp=100.0 + step=152 action=move(direction='north') hp=100.0 + step=153 action=move(direction='west') hp=100.0 + step=154 action=move(direction='east') hp=100.0 + step=155 action=door(target_id='door_1', door_state='open') hp=100.0 + step=156 action=move(direction='west') hp=100.0 + step=157 action=wait() hp=100.0 + step=158 action=move(direction='east') hp=100.0 + step=159 action=move(direction='west') hp=100.0 + step=160 action=wait() hp=100.0 + step=161 action=move(direction='west') hp=100.0 + step=162 action=wait() hp=100.0 + step=163 action=door(target_id='door_1', door_state='open') hp=100.0 + step=164 action=wait() hp=100.0 + step=165 action=door(target_id='door_1', door_state='open') hp=100.0 + step=166 action=wait() hp=100.0 + step=167 action=door(target_id='door_1', door_state='open') hp=100.0 + step=168 action=move(direction='south') hp=100.0 + step=169 action=move(direction='east') hp=100.0 + step=170 action=door(target_id='door_1', door_state='open') hp=100.0 + step=171 action=move(direction='north') hp=100.0 + step=172 action=move(direction='south') hp=100.0 + step=173 action=wait() hp=100.0 + step=174 action=wait() hp=100.0 + step=175 action=move(direction='north') hp=100.0 + step=176 action=move(direction='west') hp=100.0 + step=177 action=wait() hp=100.0 + step=178 action=wait() hp=100.0 + step=179 action=wait() hp=100.0 + step=180 action=move(direction='south') hp=100.0 + step=181 action=move(direction='north') hp=100.0 + step=182 action=move(direction='south') hp=100.0 + step=183 action=move(direction='north') hp=100.0 + step=184 action=move(direction='east') hp=100.0 + step=185 action=wait() hp=100.0 + step=186 action=move(direction='west') hp=100.0 + step=187 action=wait() hp=100.0 + step=188 action=wait() hp=100.0 + step=189 action=move(direction='south') hp=100.0 + step=190 action=move(direction='east') hp=100.0 + step=191 action=move(direction='south') hp=100.0 + step=192 action=move(direction='west') hp=100.0 + step=193 action=door(target_id='door_1', door_state='close') hp=100.0 + step=194 action=wait() hp=100.0 + step=195 action=door(target_id='door_1', door_state='open') hp=100.0 + step=196 action=move(direction='east') hp=100.0 + step=197 action=move(direction='north') hp=100.0 + step=198 action=move(direction='west') hp=100.0 + step=199 action=move(direction='north') hp=100.0 + step=200 action=door(target_id='door_1', door_state='close') hp=100.0 +ep=0088 [easy ] steps=200 reward= -19.360 evac=0 hp=100.0 suc30=0.90 r30= +14.66 t=27s + step=001 action=move(direction='north') hp=100.0 + step=002 action=move(direction='north') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='south') hp=100.0 + step=007 action=wait() hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=move(direction='north') hp=100.0 + step=010 action=move(direction='west') hp=100.0 + step=011 action=move(direction='north') hp=100.0 + step=012 action=move(direction='west') hp=100.0 + step=013 action=move(direction='north') hp=100.0 + step=014 action=move(direction='west') hp=100.0 + step=015 action=move(direction='west') hp=100.0 + step=016 action=move(direction='north') hp=100.0 + step=017 action=wait() hp= 98.0 + step=018 action=move(direction='east') hp= 83.0 + step=019 action=wait() hp= 82.5 + step=020 action=wait() hp= 82.0 + step=021 action=move(direction='west') hp= 70.0 + step=022 action=wait() hp= 55.0 + step=023 action=wait() hp= 50.0 + step=024 action=wait() hp= 45.0 + step=025 action=wait() hp= 40.0 + step=026 action=wait() hp= 35.0 + step=027 action=move(direction='south') hp= 30.0 + step=028 action=move(direction='west') hp= 15.0 + step=029 action=move(direction='west') hp= 13.0 + step=030 action=move(direction='west') hp= 1.0 +ep=0089 [easy ] steps=030 reward= -12.380 evac=0 hp= 0.0 suc30=0.87 r30= +13.62 t=27s + step=001 action=door(target_id='door_3', door_state='close') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='east') hp=100.0 + step=006 action=door(target_id='door_3', door_state='close') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='west') hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=door(target_id='door_2', door_state='close') hp=100.0 + step=013 action=move(direction='west') hp=100.0 + step=014 action=move(direction='west') hp=100.0 + step=015 action=move(direction='west') hp=100.0 + step=016 action=door(target_id='door_1', door_state='open') hp=100.0 + step=017 action=move(direction='west') hp=100.0 +ep=0090 [easy ] steps=017 reward= +17.180 evac=1 hp=100.0 suc30=0.87 r30= +13.60 t=27s + >> PPO update samples=flushed pi_loss=+0.0606 v_loss=41.9331 entropy=1.3482 kl=0.0011 clip%=0.00 lr=1.78e-04 + step=001 action=move(direction='north') hp=100.0 + step=002 action=move(direction='west') hp=100.0 +ep=0091 [easy ] steps=002 reward= +17.230 evac=1 hp=100.0 suc30=0.90 r30= +14.83 t=27s + step=001 action=move(direction='north') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=wait() hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='east') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='north') hp=100.0 + step=011 action=wait() hp=100.0 + step=012 action=wait() hp=100.0 + step=013 action=move(direction='north') hp=100.0 + step=014 action=move(direction='north') hp=100.0 + step=015 action=move(direction='north') hp=100.0 + step=016 action=move(direction='south') hp=100.0 + step=017 action=move(direction='north') hp=100.0 + step=018 action=move(direction='south') hp=100.0 + step=019 action=move(direction='north') hp=100.0 + step=020 action=move(direction='west') hp=100.0 + step=021 action=move(direction='north') hp=100.0 + step=022 action=move(direction='west') hp=100.0 + step=023 action=move(direction='west') hp=100.0 + step=024 action=move(direction='west') hp=100.0 + step=025 action=move(direction='south') hp= 99.5 + step=026 action=move(direction='north') hp= 99.5 + step=027 action=wait() hp= 99.5 + step=028 action=wait() hp= 99.5 + step=029 action=wait() hp= 99.5 + step=030 action=wait() hp= 99.5 + step=031 action=move(direction='east') hp= 99.5 + step=032 action=move(direction='west') hp= 99.0 + step=033 action=move(direction='south') hp= 99.0 + step=034 action=move(direction='north') hp= 99.0 + step=035 action=wait() hp= 99.0 + step=036 action=wait() hp= 99.0 + step=037 action=wait() hp= 99.0 + step=038 action=move(direction='south') hp= 99.0 + step=039 action=move(direction='north') hp= 99.0 + step=040 action=move(direction='east') hp= 99.0 + step=041 action=move(direction='west') hp= 99.0 + step=042 action=wait() hp= 99.0 + step=043 action=wait() hp= 99.0 + step=044 action=move(direction='east') hp= 99.0 + step=045 action=move(direction='west') hp= 99.0 + step=046 action=move(direction='east') hp= 99.0 + step=047 action=move(direction='west') hp= 99.0 + step=048 action=wait() hp= 99.0 + step=049 action=wait() hp= 99.0 + step=050 action=wait() hp= 99.0 + step=051 action=wait() hp= 99.0 + step=052 action=move(direction='south') hp= 99.0 + step=053 action=move(direction='north') hp= 99.0 + step=054 action=wait() hp= 99.0 + step=055 action=move(direction='south') hp= 99.0 + step=056 action=wait() hp= 99.0 + step=057 action=move(direction='north') hp= 99.0 + step=058 action=wait() hp= 99.0 + step=059 action=wait() hp= 99.0 + step=060 action=move(direction='east') hp= 99.0 + step=061 action=move(direction='west') hp= 99.0 + step=062 action=move(direction='east') hp= 99.0 + step=063 action=move(direction='west') hp= 99.0 + step=064 action=wait() hp= 99.0 + step=065 action=wait() hp= 99.0 + step=066 action=wait() hp= 99.0 + step=067 action=move(direction='south') hp= 99.0 + step=068 action=move(direction='east') hp= 99.0 + step=069 action=move(direction='west') hp= 99.0 + step=070 action=wait() hp= 99.0 + step=071 action=move(direction='north') hp= 99.0 + step=072 action=move(direction='south') hp= 99.0 + step=073 action=wait() hp= 99.0 + step=074 action=move(direction='north') hp= 99.0 + step=075 action=wait() hp= 99.0 + step=076 action=move(direction='south') hp= 99.0 + step=077 action=wait() hp= 99.0 + step=078 action=wait() hp= 99.0 + step=079 action=move(direction='north') hp= 99.0 + step=080 action=wait() hp= 99.0 + step=081 action=move(direction='east') hp= 99.0 + step=082 action=move(direction='east') hp= 99.0 + step=083 action=move(direction='north') hp= 99.0 + step=084 action=wait() hp= 99.0 + step=085 action=move(direction='north') hp= 99.0 + step=086 action=move(direction='west') hp= 99.0 + step=087 action=move(direction='north') hp= 99.0 + step=088 action=wait() hp= 99.0 + step=089 action=wait() hp= 99.0 + step=090 action=wait() hp= 99.0 + step=091 action=wait() hp= 99.0 + step=092 action=wait() hp= 99.0 + step=093 action=move(direction='east') hp= 99.0 + step=094 action=move(direction='west') hp= 99.0 + step=095 action=move(direction='east') hp= 99.0 + step=096 action=move(direction='west') hp= 99.0 + step=097 action=move(direction='south') hp= 99.0 + step=098 action=move(direction='north') hp= 99.0 + step=099 action=wait() hp= 99.0 + step=100 action=wait() hp= 99.0 + step=101 action=move(direction='east') hp= 99.0 + step=102 action=move(direction='west') hp= 99.0 + step=103 action=wait() hp= 99.0 + step=104 action=wait() hp= 99.0 + step=105 action=wait() hp= 99.0 + step=106 action=wait() hp= 99.0 + step=107 action=move(direction='east') hp= 99.0 + step=108 action=move(direction='west') hp= 99.0 + step=109 action=move(direction='south') hp= 99.0 + step=110 action=move(direction='north') hp= 99.0 + step=111 action=move(direction='east') hp= 99.0 + step=112 action=move(direction='west') hp= 99.0 + step=113 action=move(direction='east') hp= 99.0 + step=114 action=move(direction='west') hp= 99.0 + step=115 action=move(direction='south') hp= 99.0 + step=116 action=move(direction='north') hp= 99.0 + step=117 action=wait() hp= 99.0 + step=118 action=wait() hp= 99.0 + step=119 action=move(direction='south') hp= 99.0 + step=120 action=move(direction='north') hp= 99.0 + step=121 action=wait() hp= 99.0 + step=122 action=wait() hp= 99.0 + step=123 action=move(direction='south') hp= 99.0 + step=124 action=move(direction='north') hp= 99.0 + step=125 action=wait() hp= 99.0 + step=126 action=wait() hp= 99.0 + step=127 action=wait() hp= 99.0 + step=128 action=wait() hp= 99.0 + step=129 action=wait() hp= 99.0 + step=130 action=move(direction='east') hp= 99.0 + step=131 action=move(direction='west') hp= 99.0 + step=132 action=wait() hp= 99.0 + step=133 action=move(direction='east') hp= 99.0 + step=134 action=wait() hp= 99.0 + step=135 action=move(direction='west') hp= 99.0 + step=136 action=wait() hp= 99.0 + step=137 action=move(direction='east') hp= 99.0 + step=138 action=move(direction='west') hp= 99.0 + step=139 action=move(direction='east') hp= 99.0 + step=140 action=move(direction='west') hp= 99.0 + step=141 action=wait() hp= 99.0 + step=142 action=move(direction='east') hp= 99.0 + step=143 action=move(direction='west') hp= 99.0 + step=144 action=wait() hp= 99.0 + step=145 action=wait() hp= 99.0 + step=146 action=wait() hp= 99.0 + step=147 action=wait() hp= 99.0 + step=148 action=wait() hp= 99.0 + step=149 action=move(direction='south') hp= 99.0 + step=150 action=move(direction='east') hp= 99.0 + step=151 action=move(direction='west') hp= 99.0 + step=152 action=move(direction='north') hp= 99.0 + step=153 action=move(direction='south') hp= 99.0 + step=154 action=wait() hp= 99.0 + step=155 action=move(direction='north') hp= 99.0 + step=156 action=move(direction='east') hp= 99.0 + step=157 action=wait() hp= 99.0 + step=158 action=move(direction='west') hp= 99.0 + step=159 action=wait() hp= 99.0 + step=160 action=move(direction='south') hp= 99.0 + step=161 action=move(direction='north') hp= 99.0 + step=162 action=wait() hp= 99.0 + step=163 action=wait() hp= 99.0 + step=164 action=move(direction='south') hp= 99.0 + step=165 action=move(direction='north') hp= 99.0 + step=166 action=wait() hp= 99.0 + step=167 action=move(direction='south') hp= 99.0 + step=168 action=move(direction='north') hp= 99.0 + step=169 action=move(direction='south') hp= 99.0 + step=170 action=move(direction='north') hp= 99.0 + step=171 action=wait() hp= 99.0 + step=172 action=wait() hp= 99.0 + step=173 action=move(direction='south') hp= 99.0 + step=174 action=wait() hp= 99.0 + step=175 action=move(direction='north') hp= 99.0 + step=176 action=move(direction='east') hp= 99.0 + step=177 action=move(direction='west') hp= 99.0 + step=178 action=wait() hp= 99.0 + step=179 action=wait() hp= 99.0 + step=180 action=wait() hp= 99.0 + step=181 action=wait() hp= 99.0 + step=182 action=wait() hp= 99.0 + step=183 action=wait() hp= 99.0 + step=184 action=wait() hp= 99.0 + step=185 action=wait() hp= 99.0 + step=186 action=wait() hp= 99.0 + step=187 action=wait() hp= 99.0 + step=188 action=wait() hp= 99.0 + step=189 action=wait() hp= 99.0 + step=190 action=wait() hp= 99.0 + step=191 action=move(direction='east') hp= 99.0 + step=192 action=move(direction='west') hp= 99.0 + step=193 action=move(direction='east') hp= 99.0 + step=194 action=move(direction='west') hp= 99.0 + step=195 action=wait() hp= 99.0 + step=196 action=wait() hp= 99.0 + step=197 action=wait() hp= 99.0 + step=198 action=move(direction='east') hp= 99.0 + step=199 action=wait() hp= 99.0 + step=200 action=move(direction='east') hp= 99.0 +ep=0092 [easy ] steps=200 reward= -20.100 evac=0 hp= 99.0 suc30=0.87 r30= +13.52 t=28s + step=001 action=wait() hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=door(target_id='door_3', door_state='close') hp=100.0 + step=004 action=move(direction='east') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=wait() hp=100.0 + step=010 action=move(direction='west') hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=move(direction='south') hp=100.0 + step=013 action=door(target_id='door_2', door_state='close') hp=100.0 + step=014 action=door(target_id='door_2', door_state='close') hp=100.0 + step=015 action=door(target_id='door_2', door_state='close') hp=100.0 + step=016 action=move(direction='north') hp=100.0 + step=017 action=move(direction='west') hp=100.0 + step=018 action=move(direction='west') hp=100.0 + step=019 action=wait() hp=100.0 + step=020 action=move(direction='west') hp=100.0 + step=021 action=door(target_id='door_1', door_state='open') hp=100.0 + step=022 action=wait() hp=100.0 + step=023 action=wait() hp=100.0 + step=024 action=move(direction='west') hp=100.0 +ep=0093 [easy ] steps=024 reward= +17.600 evac=1 hp=100.0 suc30=0.87 r30= +13.52 t=28s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=wait() hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='south') hp=100.0 + step=008 action=door(target_id='door_7', door_state='close') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=door(target_id='door_7', door_state='close') hp=100.0 + step=011 action=move(direction='north') hp=100.0 + step=012 action=move(direction='west') hp=100.0 + step=013 action=door(target_id='door_2', door_state='open') hp=100.0 + step=014 action=wait() hp=100.0 + step=015 action=move(direction='west') hp=100.0 + step=016 action=move(direction='west') hp=100.0 + step=017 action=move(direction='west') hp=100.0 + step=018 action=door(target_id='door_1', door_state='close') hp=100.0 + step=019 action=move(direction='west') hp=100.0 + step=020 action=move(direction='north') hp=100.0 + step=021 action=move(direction='west') hp=100.0 + step=022 action=move(direction='north') hp=100.0 + step=023 action=move(direction='west') hp=100.0 +ep=0094 [easy ] steps=023 reward= +17.950 evac=1 hp=100.0 suc30=0.87 r30= +13.52 t=28s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=wait() hp=100.0 + step=004 action=move(direction='east') hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=move(direction='north') hp=100.0 + step=007 action=wait() hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='north') hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=move(direction='north') hp=100.0 + step=013 action=move(direction='west') hp=100.0 + step=014 action=wait() hp=100.0 + step=015 action=move(direction='west') hp=100.0 + step=016 action=move(direction='north') hp=100.0 + step=017 action=move(direction='north') hp=100.0 + step=018 action=move(direction='north') hp=100.0 + step=019 action=move(direction='west') hp=100.0 + step=020 action=move(direction='north') hp=100.0 + step=021 action=move(direction='west') hp=100.0 + step=022 action=move(direction='west') hp=100.0 + step=023 action=move(direction='west') hp=100.0 + step=024 action=move(direction='west') hp=100.0 +ep=0095 [easy ] steps=024 reward= +21.620 evac=1 hp=100.0 suc30=0.87 r30= +13.54 t=28s + >> PPO update samples=flushed pi_loss=-0.0290 v_loss=28.5095 entropy=0.9965 kl=0.0008 clip%=0.00 lr=1.72e-04 + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=door(target_id='door_1', door_state='close') hp=100.0 + step=010 action=move(direction='west') hp=100.0 + step=011 action=move(direction='west') hp=100.0 +ep=0096 [easy ] steps=011 reward= +18.510 evac=1 hp=100.0 suc30=0.87 r30= +13.60 t=29s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='north') hp=100.0 + step=003 action=wait() hp=100.0 + step=004 action=door(target_id='door_1', door_state='close') hp=100.0 + step=005 action=move(direction='east') hp=100.0 + step=006 action=door(target_id='door_1', door_state='close') hp=100.0 + step=007 action=wait() hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='north') hp=100.0 + step=010 action=door(target_id='door_5', door_state='close') hp=100.0 + step=011 action=door(target_id='door_5', door_state='close') hp=100.0 + step=012 action=door(target_id='door_5', door_state='close') hp=100.0 + step=013 action=move(direction='north') hp=100.0 + step=014 action=move(direction='west') hp=100.0 +ep=0097 [easy ] steps=014 reward= +17.950 evac=1 hp=100.0 suc30=0.87 r30= +13.64 t=29s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=wait() hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=wait() hp=100.0 + step=010 action=move(direction='south') hp=100.0 + step=011 action=move(direction='north') hp=100.0 + step=012 action=move(direction='north') hp=100.0 + step=013 action=move(direction='north') hp=100.0 + step=014 action=move(direction='north') hp=100.0 + step=015 action=move(direction='north') hp=100.0 + step=016 action=move(direction='west') hp=100.0 +ep=0098 [easy ] steps=016 reward= +19.970 evac=1 hp=100.0 suc30=0.87 r30= +13.67 t=29s + step=001 action=wait() hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 +ep=0099 [easy ] steps=004 reward= +17.550 evac=1 hp=100.0 suc30=0.87 r30= +13.67 t=29s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='east') hp=100.0 + step=003 action=door(target_id='door_5', door_state='open') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=door(target_id='door_1', door_state='open') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=move(direction='south') hp=100.0 + step=010 action=move(direction='west') hp=100.0 + step=011 action=door(target_id='door_1', door_state='open') hp=100.0 + step=012 action=move(direction='north') hp=100.0 + step=013 action=move(direction='north') hp=100.0 + step=014 action=wait() hp=100.0 + step=015 action=move(direction='west') hp=100.0 +ep=0100 [easy ] steps=015 reward= +18.600 evac=1 hp=100.0 suc30=0.90 r30= +15.00 t=29s + >> PPO update samples=flushed pi_loss=-0.0025 v_loss=4.3529 entropy=1.2106 kl=0.0001 clip%=0.00 lr=1.65e-04 + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp= 85.0 + step=008 action=move(direction='west') hp= 84.5 + step=009 action=move(direction='west') hp= 84.5 + step=010 action=move(direction='west') hp= 84.5 + step=011 action=move(direction='west') hp= 84.5 + step=012 action=move(direction='north') hp= 84.5 + step=013 action=move(direction='north') hp= 84.5 + step=014 action=move(direction='north') hp= 84.5 + step=015 action=move(direction='north') hp= 84.5 + step=016 action=move(direction='north') hp= 84.5 + step=017 action=move(direction='north') hp= 84.5 + step=018 action=move(direction='north') hp= 84.5 + step=019 action=move(direction='west') hp= 84.5 + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=wait() hp=100.0 + step=008 action=wait() hp=100.0 + step=009 action=wait() hp=100.0 + step=010 action=wait() hp=100.0 + step=011 action=wait() hp=100.0 + step=012 action=wait() hp=100.0 + step=013 action=wait() hp=100.0 + step=014 action=wait() hp=100.0 + step=015 action=wait() hp=100.0 + step=016 action=wait() hp=100.0 + step=017 action=wait() hp=100.0 + step=018 action=wait() hp=100.0 + step=019 action=wait() hp=100.0 + step=020 action=wait() hp=100.0 + step=021 action=wait() hp=100.0 + step=022 action=wait() hp=100.0 + step=023 action=wait() hp= 99.5 + step=024 action=wait() hp= 99.0 + step=025 action=wait() hp= 98.5 + step=026 action=wait() hp= 98.0 + step=027 action=wait() hp= 97.5 + step=028 action=wait() hp= 97.0 + step=029 action=wait() hp= 96.5 + step=030 action=wait() hp= 96.0 + step=031 action=wait() hp= 95.5 + step=032 action=wait() hp= 95.0 + step=033 action=wait() hp= 94.5 + step=034 action=wait() hp= 94.0 + step=035 action=wait() hp= 92.0 + step=036 action=wait() hp= 90.0 + step=037 action=wait() hp= 88.0 + step=038 action=wait() hp= 86.0 + step=039 action=wait() hp= 84.0 + step=040 action=wait() hp= 82.0 + step=041 action=wait() hp= 80.0 + step=042 action=wait() hp= 78.0 + step=043 action=wait() hp= 76.0 + step=044 action=wait() hp= 74.0 + step=045 action=wait() hp= 72.0 + step=046 action=wait() hp= 70.0 + step=047 action=wait() hp= 68.0 + step=048 action=wait() hp= 66.0 + step=049 action=wait() hp= 64.0 + step=050 action=wait() hp= 63.5 + step=051 action=wait() hp= 63.0 + step=052 action=wait() hp= 62.5 + step=053 action=wait() hp= 62.0 + step=054 action=wait() hp= 61.5 + step=055 action=wait() hp= 61.0 + step=056 action=wait() hp= 60.5 + step=057 action=wait() hp= 60.0 + step=058 action=wait() hp= 59.5 + step=059 action=wait() hp= 59.0 + step=060 action=wait() hp= 58.5 + step=061 action=wait() hp= 58.5 + step=062 action=wait() hp= 58.5 + step=063 action=wait() hp= 58.5 + step=064 action=wait() hp= 58.5 + step=065 action=wait() hp= 58.5 + step=066 action=wait() hp= 58.5 + step=067 action=wait() hp= 58.5 + step=068 action=wait() hp= 58.5 + step=069 action=wait() hp= 58.5 + step=070 action=wait() hp= 58.5 + step=071 action=wait() hp= 58.5 + step=072 action=wait() hp= 58.5 + step=073 action=wait() hp= 58.5 + step=074 action=wait() hp= 58.5 + step=075 action=wait() hp= 58.5 + step=076 action=wait() hp= 58.5 + step=077 action=wait() hp= 58.5 + step=078 action=wait() hp= 58.5 + step=079 action=wait() hp= 58.5 + step=080 action=wait() hp= 58.5 + step=081 action=wait() hp= 58.5 + step=082 action=wait() hp= 58.5 + step=083 action=wait() hp= 58.5 + step=084 action=wait() hp= 58.5 + step=085 action=wait() hp= 58.5 + step=086 action=wait() hp= 58.5 + step=087 action=wait() hp= 58.5 + step=088 action=wait() hp= 58.5 + step=089 action=wait() hp= 58.5 + step=090 action=wait() hp= 58.5 + step=091 action=wait() hp= 58.5 + step=092 action=wait() hp= 58.5 + step=093 action=wait() hp= 58.5 + step=094 action=wait() hp= 58.5 + step=095 action=wait() hp= 58.5 + step=096 action=wait() hp= 58.5 + step=097 action=wait() hp= 58.5 + step=098 action=wait() hp= 58.5 + step=099 action=wait() hp= 58.5 + step=100 action=wait() hp= 58.5 + step=101 action=wait() hp= 58.5 + step=102 action=wait() hp= 58.5 + step=103 action=wait() hp= 58.5 + step=104 action=wait() hp= 58.5 + step=105 action=wait() hp= 58.5 + step=106 action=wait() hp= 58.5 + step=107 action=wait() hp= 58.5 + step=108 action=wait() hp= 58.5 + step=109 action=wait() hp= 58.5 + step=110 action=wait() hp= 58.5 + step=111 action=wait() hp= 58.5 + step=112 action=wait() hp= 58.5 + step=113 action=wait() hp= 58.5 + step=114 action=wait() hp= 58.5 + step=115 action=wait() hp= 58.5 + step=116 action=wait() hp= 58.5 + step=117 action=wait() hp= 58.5 + step=118 action=wait() hp= 58.5 + step=119 action=wait() hp= 58.5 + step=120 action=wait() hp= 58.5 + step=121 action=wait() hp= 58.5 + step=122 action=wait() hp= 58.5 + step=123 action=wait() hp= 58.5 + step=124 action=wait() hp= 58.5 + step=125 action=wait() hp= 58.5 + step=126 action=wait() hp= 58.5 + step=127 action=wait() hp= 58.5 + step=128 action=wait() hp= 58.5 + step=129 action=wait() hp= 58.5 + step=130 action=wait() hp= 58.5 + step=131 action=wait() hp= 58.5 + step=132 action=wait() hp= 58.5 + step=133 action=wait() hp= 58.5 + step=134 action=wait() hp= 58.5 + step=135 action=wait() hp= 58.5 + step=136 action=wait() hp= 58.5 + step=137 action=wait() hp= 58.5 + step=138 action=wait() hp= 58.5 + step=139 action=wait() hp= 58.5 + step=140 action=wait() hp= 58.5 + step=141 action=wait() hp= 58.5 + step=142 action=wait() hp= 58.5 + step=143 action=wait() hp= 58.5 + step=144 action=wait() hp= 58.5 + step=145 action=wait() hp= 58.5 + step=146 action=wait() hp= 58.5 + step=147 action=wait() hp= 58.5 + step=148 action=wait() hp= 58.5 + step=149 action=wait() hp= 58.5 + step=150 action=wait() hp= 58.5 + ** EVAL [medium] reward=+6.008 success=0.67 steps=57.0 + [ckpt] saved -> artifacts/pyre_ppo_http_ckpt.pt + step=001 action=wait() hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=move(direction='east') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=wait() hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=move(direction='north') hp=100.0 + step=010 action=wait() hp=100.0 + step=011 action=move(direction='north') hp=100.0 + step=012 action=wait() hp=100.0 + step=013 action=move(direction='north') hp=100.0 + step=014 action=move(direction='north') hp=100.0 + step=015 action=wait() hp=100.0 + step=016 action=move(direction='west') hp=100.0 +ep=0101 [medium] steps=016 reward= +16.310 evac=1 hp=100.0 suc30=0.90 r30= +14.90 t=30s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=door(target_id='door_3', door_state='open') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=wait() hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=wait() hp= 85.0 + step=010 action=wait() hp= 70.0 + step=011 action=wait() hp= 55.0 + step=012 action=wait() hp= 50.0 + step=013 action=wait() hp= 45.0 + step=014 action=wait() hp= 40.0 + step=015 action=move(direction='south') hp= 35.0 + step=016 action=move(direction='west') hp= 30.0 + step=017 action=wait() hp= 29.5 + step=018 action=move(direction='west') hp= 29.0 + step=019 action=move(direction='south') hp= 29.0 + step=020 action=move(direction='west') hp= 29.0 + step=021 action=wait() hp= 29.0 + step=022 action=door(target_id='door_2', door_state='open') hp= 29.0 + step=023 action=move(direction='south') hp= 29.0 + step=024 action=move(direction='north') hp= 29.0 + step=025 action=door(target_id='door_2', door_state='open') hp= 29.0 + step=026 action=move(direction='east') hp= 29.0 + step=027 action=wait() hp= 29.0 + step=028 action=wait() hp= 29.0 + step=029 action=move(direction='west') hp= 29.0 + step=030 action=move(direction='north') hp= 29.0 + step=031 action=door(target_id='door_1', door_state='open') hp= 29.0 + step=032 action=move(direction='west') hp= 29.0 + step=033 action=move(direction='west') hp= 29.0 + step=034 action=move(direction='west') hp= 29.0 + step=035 action=move(direction='west') hp= 29.0 +ep=0102 [medium] steps=035 reward= +9.635 evac=1 hp= 29.0 suc30=0.90 r30= +14.60 t=30s + step=001 action=door(target_id='door_3', door_state='close') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=wait() hp=100.0 + step=006 action=door(target_id='door_4', door_state='close') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=wait() hp=100.0 + step=010 action=move(direction='west') hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=wait() hp=100.0 + step=013 action=door(target_id='door_2', door_state='close') hp=100.0 + step=014 action=move(direction='west') hp=100.0 + step=015 action=door(target_id='door_1', door_state='close') hp=100.0 + step=016 action=move(direction='west') hp=100.0 + step=017 action=move(direction='west') hp=100.0 + step=018 action=move(direction='west') hp=100.0 + step=019 action=move(direction='west') hp=100.0 + step=020 action=move(direction='north') hp=100.0 + step=021 action=door(target_id='door_1', door_state='close') hp=100.0 + step=022 action=move(direction='north') hp=100.0 + step=023 action=move(direction='west') hp=100.0 + step=024 action=wait() hp=100.0 + step=025 action=wait() hp=100.0 + step=026 action=move(direction='east') hp=100.0 + step=027 action=door(target_id='door_1', door_state='open') hp=100.0 + step=028 action=wait() hp=100.0 + step=029 action=move(direction='north') hp=100.0 + step=030 action=move(direction='north') hp=100.0 + step=031 action=move(direction='west') hp=100.0 + step=032 action=move(direction='south') hp=100.0 + step=033 action=door(target_id='door_1', door_state='close') hp=100.0 + step=034 action=move(direction='north') hp=100.0 + step=035 action=move(direction='south') hp=100.0 + step=036 action=move(direction='east') hp=100.0 + step=037 action=move(direction='east') hp=100.0 + step=038 action=move(direction='west') hp=100.0 + step=039 action=move(direction='west') hp=100.0 + step=040 action=move(direction='south') hp=100.0 + step=041 action=move(direction='north') hp=100.0 + step=042 action=door(target_id='door_1', door_state='close') hp=100.0 + step=043 action=wait() hp=100.0 + step=044 action=door(target_id='door_1', door_state='close') hp=100.0 + step=045 action=wait() hp=100.0 + step=046 action=move(direction='north') hp=100.0 + step=047 action=door(target_id='door_1', door_state='close') hp=100.0 + step=048 action=wait() hp=100.0 + step=049 action=wait() hp=100.0 + step=050 action=move(direction='east') hp= 99.5 + step=051 action=move(direction='west') hp= 84.5 + step=052 action=move(direction='south') hp= 69.5 + step=053 action=move(direction='north') hp= 54.5 + step=054 action=move(direction='south') hp= 39.5 + step=055 action=move(direction='north') hp= 34.5 + step=056 action=wait() hp= 29.5 + step=057 action=wait() hp= 24.5 + step=058 action=wait() hp= 19.5 + step=059 action=wait() hp= 14.5 + step=060 action=wait() hp= 9.5 + step=061 action=wait() hp= 4.5 +ep=0103 [medium] steps=061 reward= -15.470 evac=0 hp= 0.0 suc30=0.87 r30= +13.48 t=30s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=wait() hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='south') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='east') hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=move(direction='north') hp=100.0 + step=013 action=move(direction='north') hp=100.0 + step=014 action=move(direction='west') hp=100.0 +ep=0104 [medium] steps=014 reward= +17.030 evac=1 hp=100.0 suc30=0.87 r30= +13.40 t=30s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=door(target_id='door_2', door_state='close') hp=100.0 + step=005 action=door(target_id='door_2', door_state='close') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='east') hp=100.0 + step=008 action=door(target_id='door_2', door_state='close') hp=100.0 + step=009 action=move(direction='north') hp=100.0 + step=010 action=wait() hp= 99.5 + step=011 action=wait() hp= 99.0 + step=012 action=wait() hp= 98.5 + step=013 action=move(direction='south') hp= 98.0 + step=014 action=wait() hp= 98.0 + step=015 action=move(direction='north') hp= 97.5 + step=016 action=move(direction='south') hp= 97.5 + step=017 action=move(direction='south') hp= 97.0 + step=018 action=move(direction='north') hp= 97.0 + step=019 action=wait() hp= 95.0 + step=020 action=move(direction='north') hp= 94.5 + step=021 action=wait() hp= 94.0 + step=022 action=move(direction='south') hp= 93.5 + step=023 action=move(direction='north') hp= 78.5 + step=024 action=move(direction='south') hp= 76.5 + step=025 action=wait() hp= 61.5 + step=026 action=move(direction='north') hp= 56.5 + step=027 action=wait() hp= 41.5 + step=028 action=wait() hp= 26.5 + step=029 action=wait() hp= 11.5 + step=030 action=wait() hp= 6.5 + step=031 action=wait() hp= 1.5 +ep=0105 [medium] steps=031 reward= -18.740 evac=0 hp= 0.0 suc30=0.83 r30= +12.22 t=30s + >> PPO update samples=flushed pi_loss=+0.0005 v_loss=79.8427 entropy=1.0409 kl=0.0001 clip%=0.00 lr=1.58e-04 + step=001 action=door(target_id='door_3', door_state='open') hp=100.0 + step=002 action=door(target_id='door_3', door_state='close') hp=100.0 + step=003 action=door(target_id='door_3', door_state='open') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp= 88.0 + step=008 action=move(direction='west') hp= 83.0 + step=009 action=move(direction='south') hp= 68.0 + step=010 action=move(direction='north') hp= 66.0 + step=011 action=move(direction='north') hp= 51.0 + step=012 action=wait() hp= 36.0 + step=013 action=move(direction='west') hp= 21.0 + step=014 action=move(direction='west') hp= 6.0 + step=015 action=move(direction='west') hp= 5.5 + step=016 action=move(direction='south') hp= 5.5 + step=017 action=wait() hp= 5.0 + step=018 action=door(target_id='door_5', door_state='close') hp= 4.5 + step=019 action=move(direction='north') hp= 4.0 +ep=0106 [medium] steps=019 reward= -15.590 evac=0 hp= 0.0 suc30=0.80 r30= +11.11 t=31s + step=001 action=wait() hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=wait() hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='north') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='north') hp=100.0 + step=011 action=move(direction='north') hp=100.0 + step=012 action=move(direction='west') hp=100.0 + step=013 action=move(direction='west') hp=100.0 + step=014 action=move(direction='north') hp=100.0 + step=015 action=move(direction='west') hp=100.0 + step=016 action=move(direction='west') hp=100.0 + step=017 action=move(direction='west') hp=100.0 +ep=0107 [medium] steps=017 reward= +17.390 evac=1 hp=100.0 suc30=0.80 r30= +10.94 t=31s + step=001 action=door(target_id='door_3', door_state='open') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp= 85.0 + step=007 action=door(target_id='door_2', door_state='close') hp= 84.5 + step=008 action=move(direction='west') hp= 84.0 + step=009 action=door(target_id='door_1', door_state='open') hp= 83.5 + step=010 action=move(direction='west') hp= 83.0 + step=011 action=wait() hp= 83.0 + step=012 action=door(target_id='door_2', door_state='close') hp= 83.0 + step=013 action=move(direction='west') hp= 83.0 + step=014 action=door(target_id='door_1', door_state='open') hp= 83.0 + step=015 action=door(target_id='door_1', door_state='open') hp= 83.0 + step=016 action=move(direction='east') hp= 83.0 + step=017 action=wait() hp= 82.5 + step=018 action=move(direction='west') hp= 80.5 + step=019 action=move(direction='west') hp= 80.0 + step=020 action=wait() hp= 80.0 + step=021 action=move(direction='west') hp= 80.0 +ep=0108 [medium] steps=021 reward= +11.310 evac=1 hp= 80.0 suc30=0.80 r30= +10.76 t=31s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='south') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='north') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='north') hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=move(direction='west') hp=100.0 + step=013 action=wait() hp=100.0 + step=014 action=door(target_id='door_5', door_state='close') hp=100.0 + step=015 action=move(direction='west') hp=100.0 +ep=0109 [medium] steps=015 reward= +15.850 evac=1 hp=100.0 suc30=0.80 r30= +10.66 t=31s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=wait() hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp=100.0 +ep=0110 [medium] steps=007 reward= +16.100 evac=1 hp=100.0 suc30=0.80 r30= +10.47 t=31s + >> PPO update samples=flushed pi_loss=+0.0046 v_loss=59.2558 entropy=1.1233 kl=0.0003 clip%=0.00 lr=1.51e-04 + step=001 action=door(target_id='door_3', door_state='close') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=door(target_id='door_2', door_state='close') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='south') hp=100.0 + step=009 action=move(direction='south') hp=100.0 + step=010 action=door(target_id='door_2', door_state='close') hp=100.0 + step=011 action=wait() hp=100.0 + step=012 action=wait() hp= 99.5 + step=013 action=move(direction='north') hp= 99.0 + step=014 action=move(direction='south') hp= 99.0 + step=015 action=move(direction='north') hp= 98.5 + step=016 action=move(direction='south') hp= 98.0 + step=017 action=move(direction='east') hp= 97.5 + step=018 action=move(direction='west') hp= 97.0 + step=019 action=wait() hp= 96.5 + step=020 action=wait() hp= 96.0 + step=021 action=wait() hp= 95.5 + step=022 action=move(direction='north') hp= 95.0 + step=023 action=move(direction='north') hp= 94.5 + step=024 action=move(direction='west') hp= 94.0 + step=025 action=move(direction='west') hp= 94.0 + step=026 action=move(direction='west') hp= 94.0 + step=027 action=move(direction='west') hp= 94.0 +ep=0111 [medium] steps=027 reward= +14.950 evac=1 hp= 94.0 suc30=0.80 r30= +10.37 t=31s + step=001 action=move(direction='south') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=door(target_id='door_8', door_state='close') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=wait() hp=100.0 + step=011 action=wait() hp=100.0 + step=012 action=move(direction='west') hp=100.0 + step=013 action=move(direction='west') hp=100.0 + step=014 action=move(direction='north') hp=100.0 + step=015 action=move(direction='west') hp=100.0 + step=016 action=move(direction='west') hp=100.0 + step=017 action=door(target_id='door_1', door_state='close') hp=100.0 + step=018 action=move(direction='north') hp=100.0 + step=019 action=move(direction='west') hp=100.0 +ep=0112 [medium] steps=019 reward= +15.740 evac=1 hp=100.0 suc30=0.80 r30= +10.32 t=31s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='south') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='south') hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=move(direction='south') hp=100.0 + step=010 action=move(direction='north') hp=100.0 + step=011 action=move(direction='east') hp=100.0 + step=012 action=wait() hp=100.0 + step=013 action=move(direction='east') hp=100.0 + step=014 action=move(direction='west') hp=100.0 + step=015 action=move(direction='west') hp=100.0 + step=016 action=move(direction='west') hp=100.0 + step=017 action=move(direction='north') hp=100.0 + step=018 action=move(direction='west') hp=100.0 + step=019 action=move(direction='north') hp=100.0 + step=020 action=move(direction='north') hp=100.0 + step=021 action=wait() hp=100.0 + step=022 action=move(direction='north') hp=100.0 + step=023 action=move(direction='north') hp=100.0 + step=024 action=move(direction='north') hp=100.0 + step=025 action=wait() hp=100.0 + step=026 action=wait() hp=100.0 + step=027 action=move(direction='north') hp=100.0 + step=028 action=move(direction='north') hp=100.0 + step=029 action=move(direction='north') hp=100.0 + step=030 action=move(direction='north') hp=100.0 + step=031 action=move(direction='north') hp=100.0 + step=032 action=move(direction='north') hp=100.0 + step=033 action=move(direction='west') hp=100.0 +ep=0113 [medium] steps=033 reward= +17.970 evac=1 hp=100.0 suc30=0.80 r30= +10.29 t=31s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 +ep=0114 [medium] steps=003 reward= +15.160 evac=1 hp=100.0 suc30=0.80 r30= +10.19 t=31s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='south') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp=100.0 +ep=0115 [medium] steps=007 reward= +16.480 evac=1 hp=100.0 suc30=0.80 r30= +10.13 t=31s + >> PPO update samples=flushed pi_loss=-0.0026 v_loss=1.6322 entropy=1.0911 kl=0.0002 clip%=0.00 lr=1.45e-04 + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=wait() hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=move(direction='north') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='east') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='north') hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=move(direction='north') hp=100.0 + step=013 action=move(direction='north') hp=100.0 + step=014 action=move(direction='north') hp=100.0 + step=015 action=wait() hp=100.0 + step=016 action=move(direction='west') hp=100.0 + step=017 action=wait() hp=100.0 + step=018 action=move(direction='north') hp=100.0 + step=019 action=move(direction='north') hp=100.0 + step=020 action=move(direction='west') hp=100.0 +ep=0116 [medium] steps=020 reward= +17.860 evac=1 hp=100.0 suc30=0.80 r30= +10.06 t=32s + step=001 action=door(target_id='door_4', door_state='close') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='west') hp= 99.5 + step=011 action=door(target_id='door_2', door_state='open') hp= 99.5 + step=012 action=move(direction='west') hp= 99.5 + step=013 action=move(direction='west') hp= 99.5 + step=014 action=move(direction='west') hp= 99.5 +ep=0117 [medium] steps=014 reward= +15.042 evac=1 hp= 99.5 suc30=0.80 r30= +9.98 t=32s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='north') hp=100.0 + step=003 action=move(direction='south') hp=100.0 + step=004 action=door(target_id='door_5', door_state='close') hp=100.0 + step=005 action=wait() hp=100.0 + step=006 action=door(target_id='door_5', door_state='close') hp=100.0 + step=007 action=door(target_id='door_5', door_state='close') hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=move(direction='north') hp=100.0 + step=010 action=move(direction='west') hp=100.0 +ep=0118 [medium] steps=010 reward= +15.190 evac=1 hp=100.0 suc30=0.83 r30= +11.13 t=32s + step=001 action=move(direction='north') hp=100.0 + step=002 action=wait() hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp= 88.0 + step=008 action=move(direction='north') hp= 87.5 + step=009 action=move(direction='west') hp= 72.5 + step=010 action=wait() hp= 72.0 + step=011 action=move(direction='west') hp= 60.0 + step=012 action=move(direction='west') hp= 59.5 + step=013 action=wait() hp= 59.0 + step=014 action=move(direction='east') hp= 58.5 + step=015 action=move(direction='north') hp= 56.5 + step=016 action=move(direction='west') hp= 56.0 + step=017 action=move(direction='north') hp= 55.5 + step=018 action=move(direction='north') hp= 55.5 + step=019 action=move(direction='east') hp= 55.5 + step=020 action=move(direction='north') hp= 55.5 + step=021 action=wait() hp= 55.5 + step=022 action=move(direction='west') hp= 55.5 + step=023 action=move(direction='north') hp= 55.5 + step=024 action=move(direction='west') hp= 55.5 +ep=0119 [medium] steps=024 reward= +10.373 evac=1 hp= 55.5 suc30=0.87 r30= +11.89 t=32s + step=001 action=move(direction='west') hp=100.0 + step=002 action=door(target_id='door_2', door_state='close') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=wait() hp=100.0 + step=005 action=wait() hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='west') hp=100.0 +ep=0120 [medium] steps=008 reward= +15.620 evac=1 hp=100.0 suc30=0.87 r30= +11.84 t=32s + >> PPO update samples=flushed pi_loss=-0.0013 v_loss=2.5435 entropy=1.0417 kl=0.0001 clip%=0.00 lr=1.38e-04 + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp= 99.5 + step=007 action=move(direction='west') hp= 99.5 + step=008 action=move(direction='west') hp= 99.5 + step=009 action=move(direction='west') hp= 99.5 + step=010 action=move(direction='north') hp= 99.5 + step=011 action=move(direction='north') hp= 99.5 + step=012 action=move(direction='west') hp= 99.5 + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='west') hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=move(direction='west') hp=100.0 + step=013 action=move(direction='west') hp=100.0 + step=014 action=move(direction='north') hp=100.0 + step=015 action=move(direction='north') hp=100.0 + step=016 action=move(direction='north') hp= 99.5 + step=017 action=wait() hp= 99.0 + step=018 action=wait() hp= 98.5 + step=019 action=wait() hp= 98.0 + step=020 action=wait() hp= 97.5 + step=021 action=wait() hp= 97.5 + step=022 action=wait() hp= 97.5 + step=023 action=wait() hp= 97.5 + step=024 action=wait() hp= 97.5 + step=025 action=wait() hp= 97.5 + step=026 action=wait() hp= 97.5 + step=027 action=wait() hp= 97.5 + step=028 action=wait() hp= 97.5 + step=029 action=wait() hp= 97.5 + step=030 action=wait() hp= 97.5 + step=031 action=wait() hp= 97.5 + step=032 action=wait() hp= 97.5 + step=033 action=wait() hp= 97.5 + step=034 action=wait() hp= 97.5 + step=035 action=wait() hp= 97.5 + step=036 action=wait() hp= 97.5 + step=037 action=wait() hp= 97.5 + step=038 action=wait() hp= 97.5 + step=039 action=wait() hp= 97.5 + step=040 action=wait() hp= 97.5 + step=041 action=wait() hp= 97.5 + step=042 action=wait() hp= 97.5 + step=043 action=wait() hp= 97.5 + step=044 action=wait() hp= 97.5 + step=045 action=wait() hp= 97.5 + step=046 action=wait() hp= 97.5 + step=047 action=wait() hp= 97.5 + step=048 action=wait() hp= 97.5 + step=049 action=wait() hp= 97.5 + step=050 action=wait() hp= 97.5 + step=051 action=wait() hp= 97.5 + step=052 action=wait() hp= 97.5 + step=053 action=wait() hp= 97.5 + step=054 action=wait() hp= 97.5 + step=055 action=wait() hp= 97.5 + step=056 action=wait() hp= 97.5 + step=057 action=wait() hp= 97.5 + step=058 action=wait() hp= 97.5 + step=059 action=wait() hp= 97.5 + step=060 action=wait() hp= 97.5 + step=061 action=wait() hp= 97.5 + step=062 action=wait() hp= 97.5 + step=063 action=wait() hp= 97.5 + step=064 action=wait() hp= 97.5 + step=065 action=wait() hp= 97.5 + step=066 action=wait() hp= 97.5 + step=067 action=wait() hp= 97.5 + step=068 action=wait() hp= 97.0 + step=069 action=wait() hp= 96.5 + step=070 action=wait() hp= 96.0 + step=071 action=wait() hp= 95.5 + step=072 action=wait() hp= 95.0 + step=073 action=wait() hp= 83.0 + step=074 action=wait() hp= 68.0 + step=075 action=wait() hp= 53.0 + step=076 action=wait() hp= 38.0 + step=077 action=wait() hp= 23.0 + step=078 action=wait() hp= 18.0 + step=079 action=wait() hp= 13.0 + step=080 action=wait() hp= 8.0 + step=081 action=wait() hp= 3.0 + step=001 action=move(direction='north') hp=100.0 + step=002 action=move(direction='north') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=move(direction='north') hp=100.0 + ** EVAL [medium] reward=+6.401 success=0.67 steps=32.7 + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=door(target_id='door_2', door_state='close') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=wait() hp=100.0 + step=009 action=wait() hp=100.0 + step=010 action=door(target_id='door_1', door_state='close') hp=100.0 + step=011 action=move(direction='south') hp=100.0 + step=012 action=door(target_id='door_1', door_state='open') hp=100.0 + step=013 action=move(direction='south') hp=100.0 + step=014 action=move(direction='north') hp=100.0 + step=015 action=move(direction='west') hp=100.0 +ep=0121 [medium] steps=015 reward= +16.250 evac=1 hp=100.0 suc30=0.87 r30= +11.80 t=32s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=move(direction='north') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='east') hp= 99.5 + step=009 action=move(direction='west') hp= 99.5 + step=010 action=move(direction='north') hp= 99.5 + step=011 action=move(direction='west') hp= 99.5 + step=012 action=move(direction='west') hp= 99.5 + step=013 action=move(direction='west') hp= 99.0 + step=014 action=move(direction='west') hp= 99.0 +ep=0122 [medium] steps=014 reward= +16.135 evac=1 hp= 99.0 suc30=0.90 r30= +13.01 t=32s + step=001 action=move(direction='west') hp=100.0 +ep=0123 [medium] steps=001 reward= +14.260 evac=1 hp=100.0 suc30=0.90 r30= +12.90 t=32s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='south') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=wait() hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=door(target_id='door_1', door_state='close') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=door(target_id='door_1', door_state='close') hp=100.0 + step=010 action=move(direction='north') hp=100.0 + step=011 action=wait() hp=100.0 + step=012 action=move(direction='north') hp=100.0 + step=013 action=move(direction='west') hp=100.0 +ep=0124 [medium] steps=013 reward= +16.720 evac=1 hp=100.0 suc30=0.90 r30= +12.86 t=33s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='north') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='north') hp=100.0 + step=010 action=move(direction='west') hp=100.0 + step=011 action=move(direction='north') hp= 99.5 + step=012 action=move(direction='north') hp= 99.5 + step=013 action=move(direction='south') hp= 99.5 + step=014 action=wait() hp= 99.0 + step=015 action=wait() hp= 87.0 + step=016 action=move(direction='north') hp= 72.0 + step=017 action=move(direction='north') hp= 71.5 + step=018 action=move(direction='north') hp= 71.5 + step=019 action=move(direction='north') hp= 71.5 + step=020 action=move(direction='east') hp= 71.5 + step=021 action=move(direction='west') hp= 71.5 + step=022 action=move(direction='east') hp= 71.5 + step=023 action=move(direction='east') hp= 71.5 + step=024 action=move(direction='west') hp= 71.5 + step=025 action=move(direction='west') hp= 71.5 + step=026 action=wait() hp= 71.5 + step=027 action=move(direction='north') hp= 71.0 + step=028 action=move(direction='south') hp= 71.0 + step=029 action=move(direction='north') hp= 56.0 + step=030 action=move(direction='west') hp= 55.5 +ep=0125 [medium] steps=030 reward= +10.393 evac=1 hp= 55.5 suc30=0.90 r30= +12.48 t=33s + >> PPO update samples=flushed pi_loss=+0.0014 v_loss=5.3116 entropy=1.1161 kl=0.0002 clip%=0.00 lr=1.31e-04 + [ckpt] saved -> artifacts/pyre_ppo_http_ckpt.pt + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=door(target_id='door_1', door_state='close') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='west') hp=100.0 +ep=0126 [medium] steps=008 reward= +16.920 evac=1 hp=100.0 suc30=0.90 r30= +12.43 t=33s + step=001 action=move(direction='north') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=door(target_id='door_7', door_state='close') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='north') hp=100.0 + step=011 action=wait() hp=100.0 + step=012 action=move(direction='west') hp=100.0 + step=013 action=move(direction='north') hp= 98.0 + step=014 action=move(direction='east') hp= 96.0 + step=015 action=door(target_id='door_6', door_state='close') hp= 95.5 + step=016 action=move(direction='west') hp= 95.0 + step=017 action=move(direction='north') hp= 93.0 + step=018 action=door(target_id='door_2', door_state='close') hp= 78.0 + step=019 action=wait() hp= 63.0 + step=020 action=door(target_id='door_2', door_state='open') hp= 48.0 + step=021 action=door(target_id='door_2', door_state='close') hp= 43.0 + step=022 action=door(target_id='door_2', door_state='open') hp= 38.0 + step=023 action=door(target_id='door_2', door_state='close') hp= 33.0 + step=024 action=move(direction='south') hp= 28.0 + step=025 action=move(direction='east') hp= 13.0 +ep=0127 [medium] steps=025 reward= -13.340 evac=0 hp= 0.0 suc30=0.87 r30= +11.39 t=33s + step=001 action=wait() hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp=100.0 +ep=0128 [medium] steps=007 reward= +16.100 evac=1 hp=100.0 suc30=0.87 r30= +11.26 t=33s + step=001 action=move(direction='west') hp=100.0 + step=002 action=door(target_id='door_3', door_state='close') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=door(target_id='door_3', door_state='close') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='east') hp=100.0 + step=008 action=wait() hp=100.0 + step=009 action=move(direction='west') hp= 99.5 + step=010 action=move(direction='west') hp= 99.5 + step=011 action=wait() hp= 99.0 + step=012 action=move(direction='north') hp= 98.5 + step=013 action=move(direction='north') hp= 98.5 + step=014 action=move(direction='north') hp= 98.5 + step=015 action=wait() hp= 98.5 + step=016 action=move(direction='north') hp= 98.5 + step=017 action=move(direction='north') hp= 98.5 + step=018 action=move(direction='north') hp= 98.5 + step=019 action=move(direction='north') hp= 98.5 +ep=0129 [medium] steps=019 reward= +14.168 evac=1 hp= 98.5 suc30=0.87 r30= +11.15 t=33s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=wait() hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='north') hp=100.0 + step=008 action=door(target_id='door_1', door_state='close') hp=100.0 + step=009 action=move(direction='north') hp=100.0 + step=010 action=door(target_id='door_1', door_state='open') hp=100.0 + step=011 action=move(direction='north') hp=100.0 + step=012 action=move(direction='west') hp=100.0 + step=013 action=move(direction='north') hp=100.0 + step=014 action=wait() hp=100.0 + step=015 action=door(target_id='door_1', door_state='close') hp=100.0 + step=016 action=move(direction='east') hp=100.0 + step=017 action=move(direction='west') hp=100.0 + step=018 action=wait() hp=100.0 + step=019 action=door(target_id='door_1', door_state='close') hp=100.0 + step=020 action=wait() hp=100.0 + step=021 action=move(direction='east') hp=100.0 + step=022 action=move(direction='west') hp=100.0 + step=023 action=wait() hp=100.0 + step=024 action=door(target_id='door_1', door_state='close') hp=100.0 + step=025 action=wait() hp=100.0 + step=026 action=wait() hp=100.0 + step=027 action=wait() hp=100.0 + step=028 action=wait() hp=100.0 + step=029 action=move(direction='east') hp=100.0 + step=030 action=move(direction='south') hp=100.0 + step=031 action=move(direction='north') hp=100.0 + step=032 action=move(direction='west') hp=100.0 + step=033 action=move(direction='south') hp=100.0 + step=034 action=wait() hp=100.0 + step=035 action=wait() hp=100.0 + step=036 action=move(direction='south') hp=100.0 + step=037 action=wait() hp=100.0 + step=038 action=move(direction='north') hp=100.0 + step=039 action=move(direction='north') hp=100.0 + step=040 action=move(direction='south') hp=100.0 + step=041 action=move(direction='east') hp=100.0 + step=042 action=door(target_id='door_1', door_state='close') hp=100.0 + step=043 action=move(direction='east') hp=100.0 + step=044 action=move(direction='north') hp=100.0 + step=045 action=move(direction='west') hp=100.0 + step=046 action=door(target_id='door_1', door_state='open') hp=100.0 + step=047 action=move(direction='west') hp=100.0 + step=048 action=door(target_id='door_1', door_state='open') hp=100.0 + step=049 action=wait() hp=100.0 + step=050 action=move(direction='south') hp=100.0 + step=051 action=move(direction='south') hp=100.0 + step=052 action=wait() hp=100.0 + step=053 action=door(target_id='door_1', door_state='open') hp=100.0 + step=054 action=move(direction='north') hp=100.0 + step=055 action=wait() hp=100.0 + step=056 action=move(direction='east') hp=100.0 + step=057 action=door(target_id='door_1', door_state='close') hp=100.0 + step=058 action=move(direction='south') hp=100.0 + step=059 action=move(direction='west') hp= 88.0 + step=060 action=wait() hp= 88.0 + step=061 action=move(direction='north') hp= 76.0 + step=062 action=move(direction='east') hp= 75.5 + step=063 action=move(direction='west') hp= 60.5 + step=064 action=wait() hp= 45.5 + step=065 action=wait() hp= 30.5 + step=066 action=move(direction='north') hp= 15.5 + step=067 action=wait() hp= 0.5 +ep=0130 [medium] steps=067 reward= -16.020 evac=0 hp= 0.0 suc30=0.83 r30= +9.99 t=33s + >> PPO update samples=flushed pi_loss=-0.0008 v_loss=60.5100 entropy=1.2579 kl=0.0001 clip%=0.00 lr=1.24e-04 + step=001 action=move(direction='east') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='north') hp=100.0 + step=010 action=move(direction='north') hp= 99.5 + step=011 action=move(direction='west') hp= 99.5 + step=012 action=move(direction='north') hp= 87.5 + step=013 action=move(direction='west') hp= 87.0 + step=014 action=move(direction='east') hp= 86.5 + step=015 action=wait() hp= 74.5 + step=016 action=move(direction='west') hp= 59.5 + step=017 action=wait() hp= 44.5 + step=018 action=move(direction='west') hp= 29.5 + step=019 action=move(direction='west') hp= 27.5 + step=020 action=move(direction='west') hp= 27.0 + step=021 action=move(direction='north') hp= 27.0 + step=022 action=move(direction='east') hp= 27.0 + step=023 action=wait() hp= 27.0 + step=024 action=move(direction='north') hp= 26.5 + step=025 action=move(direction='north') hp= 26.0 + step=026 action=move(direction='north') hp= 11.0 +ep=0131 [medium] steps=026 reward= -12.050 evac=0 hp= 0.0 suc30=0.80 r30= +9.05 t=34s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='east') hp=100.0 + step=007 action=door(target_id='door_3', door_state='close') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='west') hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=move(direction='west') hp= 98.0 + step=013 action=move(direction='east') hp= 97.5 + step=014 action=move(direction='west') hp= 97.0 + step=015 action=move(direction='west') hp= 96.5 + step=016 action=move(direction='east') hp= 96.0 + step=017 action=move(direction='west') hp= 95.5 + step=018 action=door(target_id='door_1', door_state='close') hp= 95.0 + step=019 action=wait() hp= 94.5 + step=020 action=wait() hp= 94.0 + step=021 action=move(direction='west') hp= 93.5 + step=022 action=move(direction='east') hp= 93.5 + step=023 action=move(direction='west') hp= 93.5 + step=024 action=wait() hp= 93.5 + step=025 action=move(direction='east') hp= 93.5 + step=026 action=move(direction='west') hp= 93.5 + step=027 action=move(direction='west') hp= 93.5 + step=028 action=move(direction='south') hp= 93.5 + step=029 action=move(direction='north') hp= 93.5 + step=030 action=move(direction='west') hp= 93.5 +ep=0132 [medium] steps=030 reward= +13.742 evac=1 hp= 93.5 suc30=0.80 r30= +9.18 t=34s + step=001 action=move(direction='north') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=wait() hp=100.0 + step=006 action=door(target_id='door_7', door_state='close') hp=100.0 + step=007 action=move(direction='north') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='west') hp=100.0 + step=011 action=wait() hp=100.0 + step=012 action=move(direction='west') hp= 99.5 + step=013 action=move(direction='west') hp= 99.0 + step=014 action=move(direction='west') hp= 98.5 + step=015 action=move(direction='north') hp= 98.0 + step=016 action=move(direction='west') hp= 96.0 + step=017 action=move(direction='west') hp= 95.5 + step=018 action=door(target_id='door_1', door_state='close') hp= 95.5 + step=019 action=move(direction='east') hp= 95.5 + step=020 action=move(direction='north') hp= 95.0 + step=021 action=move(direction='west') hp= 94.5 + step=022 action=door(target_id='door_1', door_state='close') hp= 94.0 + step=023 action=door(target_id='door_1', door_state='open') hp= 93.5 + step=024 action=move(direction='south') hp= 93.0 + step=025 action=move(direction='west') hp= 92.5 +ep=0133 [medium] steps=025 reward= +14.447 evac=1 hp= 92.5 suc30=0.83 r30= +10.18 t=34s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='north') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='north') hp=100.0 + step=010 action=move(direction='west') hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=move(direction='west') hp=100.0 + step=013 action=move(direction='west') hp=100.0 + step=014 action=move(direction='north') hp= 99.5 + step=015 action=wait() hp= 99.5 + step=016 action=move(direction='north') hp= 99.5 + step=017 action=move(direction='west') hp= 99.0 + step=018 action=wait() hp= 84.0 + step=019 action=move(direction='south') hp= 69.0 + step=020 action=move(direction='east') hp= 67.0 + step=021 action=move(direction='west') hp= 65.0 + step=022 action=move(direction='east') hp= 64.5 + step=023 action=wait() hp= 62.5 + step=024 action=move(direction='west') hp= 60.5 + step=025 action=wait() hp= 60.0 + step=026 action=move(direction='south') hp= 59.5 + step=027 action=wait() hp= 59.0 + step=028 action=wait() hp= 58.5 + step=029 action=move(direction='north') hp= 58.0 + step=030 action=move(direction='south') hp= 57.5 + step=031 action=move(direction='north') hp= 57.0 + step=032 action=move(direction='east') hp= 56.5 + step=033 action=move(direction='west') hp= 56.0 + step=034 action=move(direction='east') hp= 55.5 + step=035 action=wait() hp= 55.0 + step=036 action=move(direction='west') hp= 54.5 + step=037 action=wait() hp= 54.5 + step=038 action=move(direction='south') hp= 54.5 + step=039 action=move(direction='north') hp= 54.5 + step=040 action=move(direction='east') hp= 54.5 + step=041 action=move(direction='west') hp= 54.5 + step=042 action=move(direction='south') hp= 54.5 + step=043 action=wait() hp= 54.5 + step=044 action=wait() hp= 54.5 + step=045 action=move(direction='north') hp= 54.5 + step=046 action=wait() hp= 54.5 + step=047 action=wait() hp= 54.5 + step=048 action=wait() hp= 54.5 + step=049 action=move(direction='east') hp= 54.5 + step=050 action=wait() hp= 54.5 + step=051 action=move(direction='west') hp= 54.5 + step=052 action=wait() hp= 54.5 + step=053 action=wait() hp= 54.5 + step=054 action=wait() hp= 54.5 + step=055 action=wait() hp= 54.5 + step=056 action=wait() hp= 54.5 + step=057 action=wait() hp= 54.5 + step=058 action=move(direction='south') hp= 54.5 + step=059 action=wait() hp= 54.5 + step=060 action=move(direction='north') hp= 54.5 + step=061 action=wait() hp= 54.5 + step=062 action=move(direction='south') hp= 54.5 + step=063 action=move(direction='east') hp= 54.5 + step=064 action=move(direction='west') hp= 54.5 + step=065 action=move(direction='east') hp= 54.5 + step=066 action=move(direction='north') hp= 54.5 + step=067 action=wait() hp= 54.5 + step=068 action=move(direction='south') hp= 54.5 + step=069 action=move(direction='north') hp= 54.5 + step=070 action=move(direction='south') hp= 54.5 + step=071 action=wait() hp= 54.5 + step=072 action=move(direction='west') hp= 54.5 + step=073 action=wait() hp= 54.5 + step=074 action=move(direction='east') hp= 54.5 + step=075 action=move(direction='west') hp= 54.5 + step=076 action=move(direction='east') hp= 54.5 + step=077 action=move(direction='north') hp= 54.5 + step=078 action=wait() hp= 54.5 + step=079 action=move(direction='west') hp= 54.5 + step=080 action=move(direction='south') hp= 54.5 + step=081 action=move(direction='east') hp= 54.5 + step=082 action=move(direction='west') hp= 54.5 + step=083 action=move(direction='east') hp= 54.5 + step=084 action=move(direction='west') hp= 54.5 + step=085 action=move(direction='north') hp= 54.5 + step=086 action=move(direction='east') hp= 54.5 + step=087 action=move(direction='west') hp= 54.5 + step=088 action=move(direction='south') hp= 54.5 + step=089 action=move(direction='north') hp= 54.5 + step=090 action=move(direction='south') hp= 54.5 + step=091 action=wait() hp= 54.5 + step=092 action=move(direction='north') hp= 54.5 + step=093 action=wait() hp= 54.5 + step=094 action=wait() hp= 54.5 + step=095 action=move(direction='east') hp= 54.5 + step=096 action=wait() hp= 54.5 + step=097 action=wait() hp= 54.5 + step=098 action=wait() hp= 54.5 + step=099 action=move(direction='south') hp= 54.5 + step=100 action=move(direction='west') hp= 54.5 + step=101 action=move(direction='east') hp= 54.5 + step=102 action=move(direction='west') hp= 54.5 + step=103 action=move(direction='east') hp= 54.5 + step=104 action=move(direction='west') hp= 54.5 + step=105 action=move(direction='north') hp= 54.5 + step=106 action=wait() hp= 54.5 + step=107 action=move(direction='south') hp= 54.5 + step=108 action=move(direction='north') hp= 54.5 + step=109 action=wait() hp= 54.5 + step=110 action=move(direction='south') hp= 54.5 + step=111 action=move(direction='north') hp= 54.5 + step=112 action=wait() hp= 54.5 + step=113 action=wait() hp= 54.5 + step=114 action=move(direction='south') hp= 54.5 + step=115 action=move(direction='east') hp= 54.5 + step=116 action=move(direction='north') hp= 54.5 + step=117 action=move(direction='south') hp= 54.5 + step=118 action=move(direction='north') hp= 54.5 + step=119 action=move(direction='west') hp= 54.5 + step=120 action=move(direction='east') hp= 54.5 + step=121 action=move(direction='west') hp= 54.5 + step=122 action=wait() hp= 54.5 + step=123 action=move(direction='east') hp= 54.5 + step=124 action=move(direction='west') hp= 54.5 + step=125 action=wait() hp= 54.5 + step=126 action=wait() hp= 54.5 + step=127 action=wait() hp= 54.5 + step=128 action=wait() hp= 54.5 + step=129 action=move(direction='east') hp= 54.5 + step=130 action=move(direction='west') hp= 54.5 + step=131 action=move(direction='east') hp= 54.5 + step=132 action=wait() hp= 54.5 + step=133 action=move(direction='west') hp= 54.5 + step=134 action=move(direction='east') hp= 54.5 + step=135 action=move(direction='south') hp= 54.5 + step=136 action=move(direction='west') hp= 54.5 + step=137 action=wait() hp= 54.5 + step=138 action=move(direction='north') hp= 54.5 + step=139 action=wait() hp= 54.5 + step=140 action=move(direction='east') hp= 54.5 + step=141 action=move(direction='west') hp= 54.5 + step=142 action=wait() hp= 54.5 + step=143 action=move(direction='east') hp= 54.5 + step=144 action=wait() hp= 54.5 + step=145 action=move(direction='south') hp= 54.5 + step=146 action=move(direction='north') hp= 54.5 + step=147 action=move(direction='west') hp= 54.5 + step=148 action=move(direction='east') hp= 54.5 + step=149 action=wait() hp= 54.5 + step=150 action=move(direction='west') hp= 54.5 +ep=0134 [medium] steps=150 reward= -26.935 evac=0 hp= 54.5 suc30=0.80 r30= +8.72 t=35s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='east') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=door(target_id='door_1', door_state='close') hp=100.0 + step=010 action=wait() hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=move(direction='west') hp=100.0 + step=013 action=move(direction='west') hp=100.0 +ep=0135 [medium] steps=013 reward= +15.770 evac=1 hp=100.0 suc30=0.83 r30= +9.87 t=35s + >> PPO update samples=flushed pi_loss=+0.0010 v_loss=34.2685 entropy=1.0653 kl=0.0005 clip%=0.00 lr=1.18e-04 + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=wait() hp=100.0 + step=005 action=wait() hp=100.0 + step=006 action=move(direction='north') hp=100.0 + step=007 action=door(target_id='door_2', door_state='close') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=door(target_id='door_2', door_state='open') hp=100.0 + step=010 action=wait() hp=100.0 + step=011 action=move(direction='north') hp=100.0 + step=012 action=move(direction='north') hp=100.0 + step=013 action=door(target_id='door_2', door_state='close') hp=100.0 + step=014 action=door(target_id='door_2', door_state='close') hp=100.0 + step=015 action=wait() hp=100.0 + step=016 action=door(target_id='door_2', door_state='close') hp=100.0 + step=017 action=door(target_id='door_2', door_state='close') hp=100.0 + step=018 action=door(target_id='door_2', door_state='close') hp=100.0 + step=019 action=door(target_id='door_2', door_state='close') hp=100.0 + step=020 action=door(target_id='door_2', door_state='close') hp=100.0 + step=021 action=door(target_id='door_2', door_state='close') hp=100.0 + step=022 action=door(target_id='door_2', door_state='close') hp=100.0 + step=023 action=wait() hp=100.0 + step=024 action=wait() hp=100.0 + step=025 action=door(target_id='door_2', door_state='close') hp=100.0 + step=026 action=door(target_id='door_2', door_state='close') hp=100.0 + step=027 action=wait() hp=100.0 + step=028 action=move(direction='south') hp=100.0 + step=029 action=move(direction='north') hp=100.0 + step=030 action=wait() hp=100.0 + step=031 action=door(target_id='door_2', door_state='close') hp=100.0 + step=032 action=door(target_id='door_2', door_state='close') hp=100.0 + step=033 action=move(direction='south') hp=100.0 + step=034 action=door(target_id='door_2', door_state='close') hp=100.0 + step=035 action=move(direction='north') hp=100.0 + step=036 action=door(target_id='door_2', door_state='close') hp=100.0 + step=037 action=door(target_id='door_2', door_state='close') hp=100.0 + step=038 action=door(target_id='door_2', door_state='close') hp=100.0 + step=039 action=wait() hp=100.0 + step=040 action=move(direction='east') hp=100.0 + step=041 action=wait() hp=100.0 + step=042 action=move(direction='west') hp=100.0 + step=043 action=door(target_id='door_2', door_state='close') hp=100.0 + step=044 action=wait() hp=100.0 + step=045 action=door(target_id='door_2', door_state='close') hp=100.0 + step=046 action=wait() hp=100.0 + step=047 action=wait() hp=100.0 + step=048 action=wait() hp=100.0 + step=049 action=wait() hp=100.0 + step=050 action=door(target_id='door_2', door_state='close') hp=100.0 + step=051 action=wait() hp=100.0 + step=052 action=door(target_id='door_2', door_state='close') hp=100.0 + step=053 action=wait() hp=100.0 + step=054 action=door(target_id='door_2', door_state='close') hp=100.0 + step=055 action=door(target_id='door_2', door_state='close') hp=100.0 + step=056 action=move(direction='south') hp=100.0 + step=057 action=move(direction='east') hp=100.0 + step=058 action=door(target_id='door_2', door_state='close') hp=100.0 + step=059 action=move(direction='north') hp=100.0 + step=060 action=wait() hp=100.0 + step=061 action=move(direction='west') hp=100.0 + step=062 action=wait() hp=100.0 + step=063 action=wait() hp=100.0 + step=064 action=wait() hp=100.0 + step=065 action=wait() hp=100.0 + step=066 action=door(target_id='door_2', door_state='open') hp=100.0 + step=067 action=door(target_id='door_2', door_state='open') hp=100.0 + step=068 action=door(target_id='door_2', door_state='open') hp=100.0 + step=069 action=move(direction='south') hp=100.0 + step=070 action=move(direction='north') hp=100.0 + step=071 action=door(target_id='door_2', door_state='open') hp=100.0 + step=072 action=move(direction='south') hp=100.0 + step=073 action=move(direction='north') hp=100.0 + step=074 action=door(target_id='door_2', door_state='open') hp=100.0 + step=075 action=move(direction='south') hp=100.0 + step=076 action=move(direction='east') hp=100.0 + step=077 action=move(direction='north') hp=100.0 + step=078 action=move(direction='west') hp=100.0 + step=079 action=wait() hp=100.0 + step=080 action=door(target_id='door_2', door_state='open') hp=100.0 + step=081 action=move(direction='east') hp=100.0 + step=082 action=move(direction='west') hp=100.0 + step=083 action=move(direction='south') hp=100.0 + step=084 action=wait() hp=100.0 + step=085 action=move(direction='north') hp=100.0 + step=086 action=move(direction='east') hp=100.0 + step=087 action=move(direction='east') hp=100.0 + step=088 action=wait() hp=100.0 + step=089 action=move(direction='west') hp=100.0 + step=090 action=move(direction='east') hp=100.0 + step=091 action=move(direction='west') hp=100.0 + step=092 action=move(direction='west') hp=100.0 + step=093 action=door(target_id='door_2', door_state='open') hp=100.0 + step=094 action=wait() hp=100.0 + step=095 action=door(target_id='door_2', door_state='open') hp=100.0 + step=096 action=wait() hp=100.0 + step=097 action=door(target_id='door_2', door_state='open') hp=100.0 + step=098 action=door(target_id='door_2', door_state='open') hp=100.0 + step=099 action=door(target_id='door_2', door_state='open') hp=100.0 + step=100 action=wait() hp=100.0 + step=101 action=wait() hp=100.0 + step=102 action=door(target_id='door_2', door_state='open') hp=100.0 + step=103 action=move(direction='south') hp=100.0 + step=104 action=move(direction='south') hp=100.0 + step=105 action=move(direction='north') hp=100.0 + step=106 action=wait() hp=100.0 + step=107 action=move(direction='north') hp=100.0 + step=108 action=move(direction='south') hp=100.0 + step=109 action=move(direction='east') hp=100.0 + step=110 action=move(direction='west') hp=100.0 + step=111 action=move(direction='north') hp=100.0 + step=112 action=door(target_id='door_2', door_state='open') hp=100.0 + step=113 action=wait() hp=100.0 + step=114 action=door(target_id='door_2', door_state='open') hp=100.0 + step=115 action=wait() hp=100.0 + step=116 action=move(direction='east') hp=100.0 + step=117 action=door(target_id='door_2', door_state='open') hp=100.0 + step=118 action=move(direction='west') hp=100.0 + step=119 action=wait() hp=100.0 + step=120 action=wait() hp=100.0 + step=121 action=wait() hp=100.0 + step=122 action=door(target_id='door_2', door_state='open') hp=100.0 + step=123 action=door(target_id='door_2', door_state='open') hp=100.0 + step=124 action=wait() hp=100.0 + step=125 action=move(direction='east') hp=100.0 + step=126 action=door(target_id='door_2', door_state='open') hp=100.0 + step=127 action=move(direction='west') hp=100.0 + step=128 action=wait() hp=100.0 + step=129 action=door(target_id='door_2', door_state='open') hp=100.0 + step=130 action=door(target_id='door_2', door_state='open') hp=100.0 + step=131 action=wait() hp=100.0 + step=132 action=move(direction='east') hp=100.0 + step=133 action=move(direction='west') hp=100.0 + step=134 action=move(direction='south') hp=100.0 + step=135 action=move(direction='north') hp=100.0 + step=136 action=wait() hp=100.0 + step=137 action=door(target_id='door_2', door_state='open') hp=100.0 + step=138 action=door(target_id='door_2', door_state='open') hp=100.0 + step=139 action=door(target_id='door_2', door_state='open') hp=100.0 + step=140 action=wait() hp=100.0 + step=141 action=move(direction='south') hp=100.0 + step=142 action=door(target_id='door_2', door_state='open') hp=100.0 + step=143 action=door(target_id='door_2', door_state='open') hp=100.0 + step=144 action=door(target_id='door_2', door_state='open') hp=100.0 + step=145 action=move(direction='north') hp=100.0 + step=146 action=door(target_id='door_2', door_state='open') hp=100.0 + step=147 action=door(target_id='door_2', door_state='open') hp=100.0 + step=148 action=wait() hp=100.0 + step=149 action=door(target_id='door_2', door_state='open') hp=100.0 + step=150 action=wait() hp=100.0 +ep=0136 [medium] steps=150 reward= -16.300 evac=0 hp=100.0 suc30=0.83 r30= +9.84 t=35s + step=001 action=move(direction='north') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=wait() hp=100.0 + step=006 action=move(direction='north') hp=100.0 + step=007 action=move(direction='north') hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=move(direction='north') hp=100.0 + step=010 action=move(direction='north') hp=100.0 + step=011 action=move(direction='north') hp=100.0 + step=012 action=move(direction='south') hp= 85.0 + step=013 action=move(direction='north') hp= 74.5 + step=014 action=wait() hp= 59.5 + step=015 action=move(direction='east') hp= 54.5 + step=016 action=move(direction='south') hp= 54.0 + step=017 action=move(direction='north') hp= 53.5 + step=018 action=move(direction='north') hp= 51.5 + step=019 action=wait() hp= 36.5 + step=020 action=move(direction='south') hp= 31.5 + step=021 action=move(direction='east') hp= 19.5 + step=022 action=move(direction='west') hp= 19.0 + step=023 action=move(direction='east') hp= 4.0 + step=024 action=move(direction='west') hp= 3.5 +ep=0137 [medium] steps=024 reward= -17.860 evac=0 hp= 0.0 suc30=0.80 r30= +8.67 t=36s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 +ep=0138 [medium] steps=002 reward= +14.730 evac=1 hp=100.0 suc30=0.80 r30= +8.78 t=36s + step=001 action=move(direction='north') hp=100.0 + step=002 action=door(target_id='door_1', door_state='close') hp=100.0 + step=003 action=wait() hp=100.0 + step=004 action=wait() hp=100.0 + step=005 action=wait() hp=100.0 + step=006 action=wait() hp=100.0 + step=007 action=door(target_id='door_1', door_state='open') hp=100.0 + step=008 action=wait() hp=100.0 + step=009 action=move(direction='east') hp=100.0 + step=010 action=move(direction='north') hp=100.0 + step=011 action=move(direction='north') hp=100.0 + step=012 action=move(direction='south') hp=100.0 + step=013 action=move(direction='north') hp=100.0 + step=014 action=move(direction='north') hp=100.0 + step=015 action=move(direction='north') hp=100.0 + step=016 action=door(target_id='door_1', door_state='close') hp=100.0 + step=017 action=wait() hp=100.0 + step=018 action=move(direction='west') hp=100.0 + step=019 action=wait() hp=100.0 + step=020 action=move(direction='east') hp=100.0 + step=021 action=move(direction='west') hp=100.0 + step=022 action=move(direction='east') hp=100.0 + step=023 action=door(target_id='door_1', door_state='close') hp=100.0 + step=024 action=door(target_id='door_1', door_state='close') hp=100.0 + step=025 action=door(target_id='door_1', door_state='close') hp=100.0 + step=026 action=move(direction='west') hp=100.0 + step=027 action=wait() hp=100.0 + step=028 action=move(direction='east') hp=100.0 + step=029 action=move(direction='west') hp=100.0 + step=030 action=wait() hp=100.0 + step=031 action=move(direction='south') hp=100.0 + step=032 action=wait() hp=100.0 + step=033 action=wait() hp=100.0 + step=034 action=move(direction='east') hp=100.0 + step=035 action=wait() hp=100.0 + step=036 action=move(direction='south') hp=100.0 + step=037 action=move(direction='north') hp=100.0 + step=038 action=move(direction='south') hp=100.0 + step=039 action=move(direction='east') hp=100.0 + step=040 action=door(target_id='door_1', door_state='close') hp=100.0 + step=041 action=wait() hp=100.0 + step=042 action=door(target_id='door_1', door_state='open') hp=100.0 + step=043 action=move(direction='north') hp=100.0 + step=044 action=move(direction='north') hp=100.0 + step=045 action=door(target_id='door_1', door_state='close') hp=100.0 + step=046 action=move(direction='west') hp=100.0 + step=047 action=move(direction='west') hp=100.0 + step=048 action=move(direction='south') hp=100.0 + step=049 action=wait() hp=100.0 + step=050 action=move(direction='east') hp=100.0 + step=051 action=move(direction='west') hp= 99.5 + step=052 action=move(direction='east') hp= 99.0 + step=053 action=move(direction='north') hp= 98.5 + step=054 action=move(direction='east') hp= 98.5 + step=055 action=move(direction='west') hp= 98.5 + step=056 action=move(direction='west') hp= 98.0 + step=057 action=door(target_id='door_1', door_state='close') hp= 97.5 + step=058 action=door(target_id='door_1', door_state='close') hp= 97.0 + step=059 action=move(direction='south') hp= 96.5 + step=060 action=move(direction='south') hp= 96.0 + step=061 action=door(target_id='door_1', door_state='close') hp= 95.5 + step=062 action=wait() hp= 95.0 + step=063 action=wait() hp= 94.5 + step=064 action=wait() hp= 94.0 + step=065 action=move(direction='east') hp= 93.5 + step=066 action=door(target_id='door_1', door_state='open') hp= 93.0 + step=067 action=door(target_id='door_1', door_state='close') hp= 92.5 + step=068 action=move(direction='north') hp= 92.0 + step=069 action=wait() hp= 91.5 + step=070 action=wait() hp= 91.0 + step=071 action=move(direction='east') hp= 90.5 + step=072 action=move(direction='south') hp= 90.0 + step=073 action=move(direction='west') hp= 89.5 + step=074 action=move(direction='west') hp= 89.0 + step=075 action=wait() hp= 88.5 + step=076 action=wait() hp= 88.0 + step=077 action=wait() hp= 87.5 + step=078 action=move(direction='north') hp= 87.0 + step=079 action=move(direction='east') hp= 86.5 + step=080 action=move(direction='east') hp= 86.0 + step=081 action=wait() hp= 85.5 + step=082 action=move(direction='north') hp= 85.0 + step=083 action=door(target_id='door_1', door_state='open') hp= 84.5 + step=084 action=move(direction='west') hp= 84.5 + step=085 action=door(target_id='door_1', door_state='open') hp= 84.5 + step=086 action=move(direction='east') hp= 84.5 + step=087 action=wait() hp= 84.5 + step=088 action=move(direction='west') hp= 84.5 + step=089 action=wait() hp= 84.5 + step=090 action=move(direction='east') hp= 84.5 + step=091 action=move(direction='west') hp= 84.5 + step=092 action=door(target_id='door_1', door_state='open') hp= 84.5 + step=093 action=move(direction='west') hp= 84.5 + step=094 action=wait() hp= 84.5 + step=095 action=wait() hp= 84.5 + step=096 action=door(target_id='door_1', door_state='open') hp= 84.5 + step=097 action=wait() hp= 84.5 + step=098 action=move(direction='east') hp= 84.5 + step=099 action=door(target_id='door_1', door_state='open') hp= 84.5 + step=100 action=move(direction='west') hp= 84.5 + step=101 action=move(direction='south') hp= 84.5 + step=102 action=move(direction='east') hp= 84.5 + step=103 action=wait() hp= 84.5 + step=104 action=move(direction='north') hp= 84.5 + step=105 action=move(direction='west') hp= 84.5 + step=106 action=door(target_id='door_1', door_state='open') hp= 84.5 + step=107 action=move(direction='east') hp= 84.5 + step=108 action=wait() hp= 84.5 + step=109 action=move(direction='west') hp= 84.5 + step=110 action=door(target_id='door_1', door_state='open') hp= 84.5 + step=111 action=move(direction='south') hp= 84.5 + step=112 action=wait() hp= 84.5 + step=113 action=move(direction='north') hp= 84.5 + step=114 action=move(direction='south') hp= 84.5 + step=115 action=door(target_id='door_1', door_state='open') hp= 84.5 + step=116 action=door(target_id='door_1', door_state='open') hp= 84.5 + step=117 action=door(target_id='door_1', door_state='open') hp= 84.5 + step=118 action=move(direction='north') hp= 84.5 + step=119 action=move(direction='south') hp= 84.5 + step=120 action=move(direction='north') hp= 84.5 + step=121 action=wait() hp= 84.5 + step=122 action=move(direction='south') hp= 84.5 + step=123 action=move(direction='north') hp= 84.5 + step=124 action=move(direction='south') hp= 84.5 + step=125 action=move(direction='north') hp= 84.5 + step=126 action=wait() hp= 84.5 + step=127 action=move(direction='east') hp= 84.5 + step=128 action=move(direction='west') hp= 84.5 + step=129 action=move(direction='east') hp= 84.5 + step=130 action=wait() hp= 84.5 + step=131 action=move(direction='south') hp= 84.5 + step=132 action=door(target_id='door_1', door_state='open') hp= 84.5 + step=133 action=door(target_id='door_1', door_state='close') hp= 84.5 + step=134 action=move(direction='west') hp= 84.5 + step=135 action=wait() hp= 84.5 + step=136 action=move(direction='north') hp= 84.5 + step=137 action=door(target_id='door_1', door_state='open') hp= 84.5 + step=138 action=move(direction='south') hp= 84.5 + step=139 action=move(direction='north') hp= 84.5 + step=140 action=move(direction='south') hp= 84.5 + step=141 action=move(direction='south') hp= 84.5 + step=142 action=door(target_id='door_1', door_state='open') hp= 84.5 + step=143 action=wait() hp= 84.5 + step=144 action=move(direction='north') hp= 84.5 + step=145 action=move(direction='north') hp= 84.5 + step=146 action=door(target_id='door_1', door_state='close') hp= 84.5 + step=147 action=move(direction='east') hp= 84.5 + step=148 action=door(target_id='door_1', door_state='close') hp= 84.5 + step=149 action=door(target_id='door_1', door_state='close') hp= 84.5 + step=150 action=move(direction='west') hp= 84.5 +ep=0139 [medium] steps=150 reward= -20.595 evac=0 hp= 84.5 suc30=0.77 r30= +7.57 t=36s + step=001 action=move(direction='north') hp=100.0 + step=002 action=move(direction='north') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='south') hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=move(direction='north') hp=100.0 + step=007 action=move(direction='north') hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=move(direction='north') hp=100.0 + step=010 action=move(direction='north') hp=100.0 + step=011 action=move(direction='east') hp= 99.5 + step=012 action=move(direction='west') hp= 84.5 + step=013 action=move(direction='north') hp= 84.0 + step=014 action=move(direction='north') hp= 84.0 + step=015 action=move(direction='east') hp= 84.0 + step=016 action=move(direction='west') hp= 84.0 + step=017 action=move(direction='north') hp= 84.0 + step=018 action=move(direction='west') hp= 84.0 +ep=0140 [medium] steps=018 reward= +15.610 evac=1 hp= 84.0 suc30=0.77 r30= +7.55 t=36s + >> PPO update samples=flushed pi_loss=+0.0136 v_loss=38.0948 entropy=1.3180 kl=0.0008 clip%=0.00 lr=1.11e-04 + step=001 action=move(direction='north') hp=100.0 + step=002 action=move(direction='north') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=move(direction='north') hp=100.0 + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='north') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='north') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=move(direction='north') hp=100.0 + step=007 action=move(direction='north') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + ** EVAL [medium] reward=+16.283 success=1.00 steps=6.3 + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='south') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 +ep=0141 [medium] steps=005 reward= +15.310 evac=1 hp=100.0 suc30=0.77 r30= +7.56 t=37s + step=001 action=door(target_id='door_2', door_state='close') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='north') hp= 85.0 + step=007 action=move(direction='west') hp= 84.5 + step=008 action=move(direction='south') hp= 84.5 + step=009 action=door(target_id='door_1', door_state='close') hp= 84.0 + step=010 action=move(direction='west') hp= 72.0 + step=011 action=door(target_id='door_5', door_state='close') hp= 72.0 + step=012 action=wait() hp= 71.5 + step=013 action=move(direction='south') hp= 59.5 + step=014 action=move(direction='north') hp= 47.5 + step=015 action=move(direction='north') hp= 32.5 + step=016 action=move(direction='north') hp= 17.5 + step=017 action=move(direction='north') hp= 17.0 + step=018 action=wait() hp= 16.5 + step=019 action=wait() hp= 1.5 +ep=0142 [medium] steps=019 reward= -11.900 evac=0 hp= 0.0 suc30=0.73 r30= +6.64 t=37s + step=001 action=move(direction='north') hp=100.0 + step=002 action=move(direction='north') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=wait() hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='west') hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=move(direction='west') hp=100.0 + step=013 action=move(direction='west') hp=100.0 + step=014 action=move(direction='west') hp=100.0 + step=015 action=move(direction='west') hp=100.0 + step=016 action=move(direction='west') hp=100.0 + step=017 action=move(direction='west') hp=100.0 + step=018 action=move(direction='north') hp=100.0 + step=019 action=move(direction='north') hp=100.0 + step=020 action=wait() hp=100.0 + step=021 action=move(direction='north') hp=100.0 + step=022 action=move(direction='north') hp=100.0 + step=023 action=move(direction='west') hp=100.0 +ep=0143 [medium] steps=023 reward= +19.750 evac=1 hp=100.0 suc30=0.73 r30= +6.70 t=37s + step=001 action=move(direction='north') hp=100.0 + step=002 action=move(direction='north') hp=100.0 + step=003 action=move(direction='south') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=wait() hp=100.0 + step=007 action=move(direction='north') hp=100.0 + step=008 action=move(direction='north') hp=100.0 +ep=0144 [medium] steps=008 reward= +15.640 evac=1 hp=100.0 suc30=0.73 r30= +6.72 t=37s + step=001 action=door(target_id='door_4', door_state='close') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=door(target_id='door_3', door_state='open') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=door(target_id='door_3', door_state='close') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=door(target_id='door_3', door_state='close') hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=door(target_id='door_1', door_state='close') hp=100.0 + step=013 action=wait() hp=100.0 + step=014 action=move(direction='west') hp=100.0 + step=015 action=move(direction='west') hp=100.0 + step=016 action=move(direction='west') hp=100.0 + step=017 action=door(target_id='door_1', door_state='close') hp=100.0 + step=018 action=wait() hp=100.0 + step=019 action=move(direction='east') hp=100.0 + step=020 action=move(direction='west') hp= 99.5 + step=021 action=door(target_id='door_1', door_state='open') hp= 99.5 + step=022 action=move(direction='west') hp= 99.0 + step=023 action=move(direction='north') hp= 99.0 + step=024 action=move(direction='south') hp= 99.0 + step=025 action=move(direction='north') hp= 99.0 + step=026 action=move(direction='north') hp= 99.0 + step=027 action=move(direction='north') hp= 99.0 + step=028 action=door(target_id='door_1', door_state='close') hp= 99.0 + step=029 action=door(target_id='door_1', door_state='open') hp= 99.0 + step=030 action=move(direction='east') hp= 99.0 + step=031 action=wait() hp= 99.0 + step=032 action=move(direction='north') hp= 99.0 + step=033 action=door(target_id='door_1', door_state='close') hp= 99.0 + step=034 action=move(direction='south') hp= 99.0 + step=035 action=move(direction='west') hp= 99.0 + step=036 action=move(direction='west') hp= 99.0 + step=037 action=door(target_id='door_1', door_state='close') hp= 99.0 + step=038 action=move(direction='north') hp= 99.0 + step=039 action=move(direction='south') hp= 99.0 + step=040 action=wait() hp= 99.0 + step=041 action=move(direction='north') hp= 99.0 + step=042 action=move(direction='south') hp= 99.0 + step=043 action=move(direction='north') hp= 98.5 + step=044 action=wait() hp= 98.5 + step=045 action=move(direction='east') hp= 98.5 + step=046 action=wait() hp= 98.0 + step=047 action=move(direction='south') hp= 86.0 + step=048 action=wait() hp= 71.0 + step=049 action=move(direction='north') hp= 56.0 + step=050 action=move(direction='west') hp= 41.0 + step=051 action=wait() hp= 26.0 + step=052 action=wait() hp= 11.0 +ep=0145 [medium] steps=052 reward= -14.460 evac=0 hp= 0.0 suc30=0.70 r30= +5.69 t=37s + >> PPO update samples=flushed pi_loss=+0.0010 v_loss=60.1304 entropy=1.2687 kl=0.0002 clip%=0.00 lr=1.04e-04 + step=001 action=move(direction='north') hp=100.0 + step=002 action=move(direction='north') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='south') hp=100.0 + step=005 action=move(direction='south') hp=100.0 + step=006 action=move(direction='north') hp=100.0 + step=007 action=move(direction='north') hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=wait() hp=100.0 + step=010 action=wait() hp=100.0 + step=011 action=wait() hp=100.0 + step=012 action=move(direction='east') hp=100.0 + step=013 action=wait() hp=100.0 + step=014 action=move(direction='west') hp=100.0 + step=015 action=wait() hp=100.0 + step=016 action=move(direction='north') hp=100.0 + step=017 action=move(direction='north') hp=100.0 + step=018 action=move(direction='north') hp=100.0 + step=019 action=move(direction='north') hp=100.0 + step=020 action=move(direction='west') hp=100.0 +ep=0146 [medium] steps=020 reward= +15.630 evac=1 hp=100.0 suc30=0.70 r30= +5.61 t=37s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=wait() hp=100.0 + step=004 action=move(direction='west') hp=100.0 +ep=0147 [medium] steps=004 reward= +15.050 evac=1 hp=100.0 suc30=0.70 r30= +5.61 t=37s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='east') hp=100.0 + step=003 action=door(target_id='door_7', door_state='close') hp=100.0 + step=004 action=door(target_id='door_6', door_state='close') hp=100.0 + step=005 action=door(target_id='door_6', door_state='close') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=door(target_id='door_7', door_state='open') hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=door(target_id='door_7', door_state='open') hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=wait() hp=100.0 + step=013 action=move(direction='east') hp= 99.5 + step=014 action=move(direction='west') hp= 99.5 + step=015 action=move(direction='west') hp= 99.0 + step=016 action=move(direction='west') hp= 98.5 + step=017 action=move(direction='east') hp= 86.5 + step=018 action=move(direction='north') hp= 84.5 + step=019 action=move(direction='west') hp= 84.0 + step=020 action=move(direction='north') hp= 69.0 + step=021 action=move(direction='east') hp= 67.0 + step=022 action=move(direction='west') hp= 65.0 + step=023 action=move(direction='north') hp= 50.0 + step=024 action=move(direction='west') hp= 48.0 + step=025 action=move(direction='west') hp= 33.0 + step=026 action=wait() hp= 18.0 + step=027 action=wait() hp= 3.0 +ep=0148 [medium] steps=027 reward= -18.930 evac=0 hp= 0.0 suc30=0.67 r30= +4.47 t=37s + step=001 action=move(direction='north') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=wait() hp=100.0 + step=006 action=wait() hp=100.0 + step=007 action=move(direction='north') hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='north') hp=100.0 + step=011 action=wait() hp=100.0 + step=012 action=move(direction='west') hp=100.0 + step=013 action=move(direction='east') hp=100.0 + step=014 action=wait() hp=100.0 + step=015 action=move(direction='west') hp=100.0 + step=016 action=move(direction='west') hp=100.0 + step=017 action=move(direction='west') hp=100.0 + step=018 action=move(direction='west') hp=100.0 + step=019 action=move(direction='west') hp=100.0 + step=020 action=move(direction='north') hp=100.0 + step=021 action=move(direction='north') hp=100.0 + step=022 action=move(direction='north') hp=100.0 + step=023 action=move(direction='east') hp=100.0 + step=024 action=move(direction='north') hp=100.0 + step=025 action=move(direction='west') hp=100.0 + step=026 action=move(direction='west') hp=100.0 +ep=0149 [medium] steps=026 reward= +18.900 evac=1 hp=100.0 suc30=0.67 r30= +4.76 t=38s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='south') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='east') hp=100.0 + step=005 action=door(target_id='door_4', door_state='close') hp=100.0 + step=006 action=door(target_id='door_4', door_state='close') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=wait() hp=100.0 + step=011 action=door(target_id='door_3', door_state='close') hp=100.0 + step=012 action=move(direction='west') hp=100.0 + step=013 action=move(direction='south') hp= 99.5 + step=014 action=move(direction='west') hp= 99.0 + step=015 action=door(target_id='door_3', door_state='close') hp= 98.5 + step=016 action=move(direction='north') hp= 98.0 + step=017 action=move(direction='west') hp= 97.5 + step=018 action=move(direction='west') hp= 97.5 + step=019 action=move(direction='west') hp= 97.5 + step=020 action=wait() hp= 97.5 + step=021 action=move(direction='west') hp= 97.5 + step=022 action=move(direction='west') hp= 97.5 + step=023 action=move(direction='west') hp= 97.5 + step=024 action=move(direction='west') hp= 97.5 + step=025 action=door(target_id='door_1', door_state='open') hp= 97.5 + step=026 action=move(direction='west') hp= 97.5 + step=027 action=move(direction='west') hp= 97.5 +ep=0150 [medium] steps=027 reward= +15.582 evac=1 hp= 97.5 suc30=0.67 r30= +4.76 t=38s + >> PPO update samples=flushed pi_loss=+0.0014 v_loss=56.4011 entropy=1.1502 kl=0.0001 clip%=0.00 lr=9.75e-05 + [ckpt] saved -> artifacts/pyre_ppo_http_ckpt.pt + step=001 action=move(direction='south') hp=100.0 + step=002 action=move(direction='west') hp=100.0 +ep=0151 [medium] steps=002 reward= +14.730 evac=1 hp=100.0 suc30=0.67 r30= +4.71 t=38s + step=001 action=move(direction='east') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='south') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=wait() hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=wait() hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='west') hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=wait() hp=100.0 + step=013 action=move(direction='west') hp=100.0 + step=014 action=move(direction='west') hp=100.0 + step=015 action=move(direction='west') hp=100.0 + step=016 action=move(direction='north') hp=100.0 + step=017 action=move(direction='west') hp=100.0 +ep=0152 [medium] steps=017 reward= +17.050 evac=1 hp=100.0 suc30=0.67 r30= +4.74 t=38s + step=001 action=move(direction='west') hp=100.0 + step=002 action=door(target_id='door_3', door_state='close') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=door(target_id='door_3', door_state='close') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp= 99.5 + step=007 action=move(direction='west') hp= 99.5 + step=008 action=wait() hp= 99.5 + step=009 action=move(direction='west') hp= 99.5 + step=010 action=wait() hp= 99.5 + step=011 action=move(direction='west') hp= 99.5 + step=012 action=move(direction='west') hp= 99.5 + step=013 action=move(direction='west') hp= 99.5 + step=014 action=wait() hp= 99.5 + step=015 action=door(target_id='door_2', door_state='open') hp= 99.5 + step=016 action=wait() hp= 99.5 + step=017 action=move(direction='west') hp= 99.5 + step=018 action=move(direction='west') hp= 99.5 + step=019 action=move(direction='east') hp= 99.5 + step=020 action=move(direction='west') hp= 99.5 + step=021 action=move(direction='west') hp= 99.5 +ep=0153 [medium] steps=021 reward= +14.402 evac=1 hp= 99.5 suc30=0.67 r30= +4.74 t=38s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='north') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=door(target_id='door_2', door_state='close') hp=100.0 + step=007 action=move(direction='north') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=door(target_id='door_2', door_state='close') hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=move(direction='west') hp=100.0 + step=013 action=door(target_id='door_1', door_state='open') hp=100.0 + step=014 action=wait() hp=100.0 + step=015 action=door(target_id='door_1', door_state='close') hp=100.0 + step=016 action=wait() hp=100.0 + step=017 action=move(direction='east') hp=100.0 + step=018 action=move(direction='west') hp=100.0 + step=019 action=wait() hp=100.0 + step=020 action=move(direction='south') hp=100.0 + step=021 action=move(direction='north') hp=100.0 + step=022 action=door(target_id='door_1', door_state='open') hp=100.0 + step=023 action=wait() hp=100.0 + step=024 action=door(target_id='door_1', door_state='close') hp=100.0 + step=025 action=move(direction='east') hp=100.0 + step=026 action=door(target_id='door_2', door_state='close') hp=100.0 + step=027 action=door(target_id='door_1', door_state='open') hp=100.0 + step=028 action=door(target_id='door_2', door_state='close') hp=100.0 + step=029 action=move(direction='west') hp=100.0 + step=030 action=move(direction='east') hp=100.0 + step=031 action=door(target_id='door_2', door_state='close') hp=100.0 + step=032 action=move(direction='west') hp=100.0 + step=033 action=wait() hp=100.0 + step=034 action=move(direction='south') hp=100.0 + step=035 action=move(direction='north') hp=100.0 + step=036 action=move(direction='east') hp=100.0 + step=037 action=door(target_id='door_5', door_state='open') hp=100.0 + step=038 action=move(direction='south') hp=100.0 + step=039 action=move(direction='west') hp=100.0 + step=040 action=move(direction='east') hp=100.0 + step=041 action=wait() hp=100.0 + step=042 action=move(direction='west') hp=100.0 + step=043 action=move(direction='north') hp=100.0 + step=044 action=door(target_id='door_1', door_state='close') hp=100.0 + step=045 action=door(target_id='door_1', door_state='open') hp=100.0 + step=046 action=wait() hp=100.0 + step=047 action=wait() hp=100.0 + step=048 action=door(target_id='door_1', door_state='close') hp=100.0 + step=049 action=door(target_id='door_1', door_state='open') hp=100.0 + step=050 action=door(target_id='door_1', door_state='close') hp=100.0 + step=051 action=door(target_id='door_1', door_state='open') hp=100.0 + step=052 action=wait() hp=100.0 + step=053 action=wait() hp=100.0 + step=054 action=door(target_id='door_1', door_state='close') hp=100.0 + step=055 action=move(direction='east') hp=100.0 + step=056 action=door(target_id='door_2', door_state='close') hp= 99.5 + step=057 action=wait() hp= 99.0 + step=058 action=wait() hp= 98.5 + step=059 action=move(direction='south') hp= 98.0 + step=060 action=move(direction='north') hp= 96.0 + step=061 action=door(target_id='door_1', door_state='open') hp= 94.0 + step=062 action=door(target_id='door_1', door_state='close') hp= 92.0 + step=063 action=move(direction='west') hp= 90.0 + step=064 action=door(target_id='door_1', door_state='open') hp= 89.5 + step=065 action=move(direction='south') hp= 87.5 + step=066 action=move(direction='north') hp= 72.5 + step=067 action=door(target_id='door_1', door_state='close') hp= 70.5 + step=068 action=door(target_id='door_1', door_state='open') hp= 65.5 + step=069 action=wait() hp= 50.5 + step=070 action=door(target_id='door_1', door_state='close') hp= 35.5 + step=071 action=door(target_id='door_1', door_state='open') hp= 20.5 + step=072 action=door(target_id='door_1', door_state='close') hp= 5.5 +ep=0154 [medium] steps=072 reward= -9.510 evac=0 hp= 0.0 suc30=0.63 r30= +3.87 t=38s + step=001 action=move(direction='east') hp=100.0 + step=002 action=move(direction='north') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=wait() hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=wait() hp=100.0 + step=010 action=move(direction='north') hp=100.0 + step=011 action=move(direction='north') hp=100.0 + step=012 action=wait() hp= 99.5 + step=013 action=wait() hp= 87.5 + step=014 action=move(direction='south') hp= 72.5 + step=015 action=move(direction='north') hp= 72.0 + step=016 action=wait() hp= 57.0 + step=017 action=move(direction='east') hp= 42.0 + step=018 action=move(direction='south') hp= 30.0 + step=019 action=move(direction='west') hp= 29.5 + step=020 action=move(direction='east') hp= 14.5 + step=021 action=move(direction='west') hp= 12.5 +ep=0155 [medium] steps=021 reward= -14.840 evac=0 hp= 0.0 suc30=0.60 r30= +3.03 t=38s + >> PPO update samples=flushed pi_loss=-0.0004 v_loss=52.0180 entropy=1.2495 kl=0.0001 clip%=0.00 lr=9.07e-05 + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=door(target_id='door_2', door_state='close') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp= 88.0 + step=008 action=move(direction='west') hp= 73.0 + step=009 action=move(direction='west') hp= 72.5 + step=010 action=wait() hp= 72.5 + step=011 action=move(direction='west') hp= 72.5 + step=012 action=move(direction='south') hp= 72.5 + step=013 action=move(direction='north') hp= 72.5 + step=014 action=wait() hp= 72.5 + step=015 action=move(direction='west') hp= 72.5 +ep=0156 [medium] steps=015 reward= +13.008 evac=1 hp= 72.5 suc30=0.60 r30= +2.90 t=39s + step=001 action=wait() hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=wait() hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=wait() hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=wait() hp=100.0 + step=010 action=move(direction='west') hp=100.0 + step=011 action=wait() hp=100.0 + step=012 action=move(direction='west') hp=100.0 + step=013 action=move(direction='north') hp=100.0 + step=014 action=wait() hp=100.0 + step=015 action=move(direction='east') hp=100.0 + step=016 action=door(target_id='door_1', door_state='close') hp=100.0 + step=017 action=door(target_id='door_2', door_state='close') hp=100.0 + step=018 action=door(target_id='door_1', door_state='open') hp=100.0 + step=019 action=door(target_id='door_2', door_state='close') hp=100.0 + step=020 action=door(target_id='door_2', door_state='close') hp=100.0 + step=021 action=move(direction='west') hp=100.0 + step=022 action=move(direction='south') hp=100.0 + step=023 action=move(direction='east') hp=100.0 + step=024 action=move(direction='west') hp=100.0 + step=025 action=move(direction='north') hp=100.0 + step=026 action=wait() hp=100.0 + step=027 action=wait() hp=100.0 + step=028 action=wait() hp=100.0 + step=029 action=move(direction='east') hp=100.0 + step=030 action=move(direction='south') hp=100.0 + step=031 action=move(direction='west') hp=100.0 + step=032 action=door(target_id='door_1', door_state='close') hp=100.0 + step=033 action=move(direction='east') hp=100.0 + step=034 action=move(direction='west') hp=100.0 + step=035 action=move(direction='west') hp=100.0 +ep=0157 [medium] steps=035 reward= +15.730 evac=1 hp=100.0 suc30=0.63 r30= +3.86 t=39s + step=001 action=wait() hp=100.0 + step=002 action=move(direction='east') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=wait() hp=100.0 + step=006 action=move(direction='north') hp=100.0 + step=007 action=move(direction='east') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='north') hp=100.0 + step=010 action=move(direction='north') hp=100.0 + step=011 action=move(direction='north') hp= 99.5 + step=012 action=move(direction='north') hp= 84.5 + step=013 action=move(direction='east') hp= 84.0 + step=014 action=move(direction='west') hp= 83.5 + step=015 action=move(direction='north') hp= 68.5 + step=016 action=move(direction='north') hp= 68.0 + step=017 action=wait() hp= 68.0 + step=018 action=wait() hp= 68.0 + step=019 action=move(direction='north') hp= 68.0 + step=020 action=move(direction='west') hp= 68.0 +ep=0158 [medium] steps=020 reward= +11.760 evac=1 hp= 68.0 suc30=0.63 r30= +3.72 t=39s + step=001 action=move(direction='north') hp=100.0 + step=002 action=wait() hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=wait() hp=100.0 + step=006 action=move(direction='north') hp=100.0 + step=007 action=wait() hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=wait() hp=100.0 + step=010 action=move(direction='south') hp=100.0 + step=011 action=wait() hp=100.0 + step=012 action=wait() hp=100.0 + step=013 action=move(direction='north') hp=100.0 + step=014 action=move(direction='north') hp=100.0 +ep=0159 [medium] steps=014 reward= +15.560 evac=1 hp=100.0 suc30=0.63 r30= +3.77 t=39s + step=001 action=door(target_id='door_6', door_state='open') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=door(target_id='door_1', door_state='close') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='east') hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=door(target_id='door_1', door_state='close') hp=100.0 + step=010 action=move(direction='west') hp=100.0 + step=011 action=move(direction='north') hp=100.0 + step=012 action=door(target_id='door_1', door_state='close') hp=100.0 + step=013 action=wait() hp=100.0 + step=014 action=move(direction='north') hp=100.0 + step=015 action=move(direction='west') hp=100.0 +ep=0160 [medium] steps=015 reward= +16.880 evac=1 hp=100.0 suc30=0.67 r30= +4.86 t=39s + >> PPO update samples=flushed pi_loss=+0.0009 v_loss=8.6643 entropy=1.2206 kl=0.0001 clip%=0.00 lr=8.40e-05 + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=move(direction='north') hp=100.0 + step=007 action=move(direction='north') hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='west') hp=100.0 + ** EVAL [medium] reward=+16.573 success=1.00 steps=8.3 + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='east') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='north') hp=100.0 + step=011 action=move(direction='north') hp=100.0 + step=012 action=wait() hp=100.0 + step=013 action=wait() hp= 99.5 + step=014 action=wait() hp= 99.0 + step=015 action=wait() hp= 98.5 + step=016 action=move(direction='south') hp= 98.0 + step=017 action=move(direction='north') hp= 97.5 + step=018 action=wait() hp= 97.0 + step=019 action=wait() hp= 96.5 + step=020 action=wait() hp= 96.0 + step=021 action=wait() hp= 95.5 + step=022 action=move(direction='south') hp= 95.5 + step=023 action=wait() hp= 95.0 + step=024 action=move(direction='north') hp= 94.5 + step=025 action=move(direction='south') hp= 94.5 + step=026 action=wait() hp= 94.0 + step=027 action=wait() hp= 82.0 + step=028 action=wait() hp= 67.0 + step=029 action=wait() hp= 52.0 + step=030 action=wait() hp= 37.0 + step=031 action=wait() hp= 22.0 + step=032 action=wait() hp= 17.0 + step=033 action=wait() hp= 12.0 + step=034 action=move(direction='north') hp= 7.0 + step=035 action=wait() hp= 6.5 + step=036 action=wait() hp= 6.0 + step=037 action=wait() hp= 6.0 + step=038 action=wait() hp= 6.0 + step=039 action=wait() hp= 6.0 + step=040 action=wait() hp= 6.0 + step=041 action=wait() hp= 6.0 + step=042 action=wait() hp= 6.0 + step=043 action=wait() hp= 6.0 + step=044 action=wait() hp= 6.0 + step=045 action=wait() hp= 6.0 + step=046 action=wait() hp= 6.0 + step=047 action=wait() hp= 6.0 + step=048 action=wait() hp= 6.0 + step=049 action=wait() hp= 6.0 + step=050 action=wait() hp= 6.0 + step=051 action=wait() hp= 6.0 + step=052 action=wait() hp= 6.0 + step=053 action=wait() hp= 6.0 + step=054 action=wait() hp= 6.0 + step=055 action=wait() hp= 6.0 + step=056 action=wait() hp= 6.0 + step=057 action=wait() hp= 6.0 + step=058 action=wait() hp= 6.0 + step=059 action=wait() hp= 6.0 + step=060 action=wait() hp= 6.0 + step=061 action=wait() hp= 6.0 + step=062 action=wait() hp= 6.0 + step=063 action=wait() hp= 6.0 + step=064 action=wait() hp= 6.0 + step=065 action=wait() hp= 6.0 + step=066 action=wait() hp= 6.0 + step=067 action=wait() hp= 6.0 + step=068 action=wait() hp= 6.0 + step=069 action=wait() hp= 6.0 + step=070 action=wait() hp= 6.0 + step=071 action=wait() hp= 6.0 + step=072 action=wait() hp= 6.0 + step=073 action=wait() hp= 6.0 + step=074 action=wait() hp= 6.0 + step=075 action=wait() hp= 6.0 + step=076 action=wait() hp= 6.0 + step=077 action=wait() hp= 6.0 + step=078 action=wait() hp= 6.0 + step=079 action=wait() hp= 6.0 + step=080 action=wait() hp= 6.0 + step=081 action=wait() hp= 6.0 + step=082 action=wait() hp= 6.0 + step=083 action=wait() hp= 6.0 + step=084 action=wait() hp= 6.0 + step=085 action=wait() hp= 6.0 + step=086 action=wait() hp= 6.0 + step=087 action=wait() hp= 6.0 + step=088 action=wait() hp= 6.0 + step=089 action=wait() hp= 6.0 + step=090 action=wait() hp= 6.0 + step=091 action=wait() hp= 6.0 + step=092 action=wait() hp= 6.0 + step=093 action=wait() hp= 6.0 + step=094 action=wait() hp= 6.0 + step=095 action=wait() hp= 6.0 + step=096 action=wait() hp= 6.0 + step=097 action=wait() hp= 6.0 + step=098 action=wait() hp= 6.0 + step=099 action=wait() hp= 6.0 + step=100 action=wait() hp= 6.0 + step=101 action=wait() hp= 6.0 + step=102 action=wait() hp= 6.0 + step=103 action=wait() hp= 6.0 + step=104 action=wait() hp= 6.0 + step=105 action=wait() hp= 6.0 + step=106 action=wait() hp= 6.0 + step=107 action=wait() hp= 6.0 + step=108 action=wait() hp= 6.0 + step=109 action=wait() hp= 6.0 + step=110 action=wait() hp= 6.0 + step=111 action=wait() hp= 6.0 + step=112 action=wait() hp= 6.0 + step=113 action=wait() hp= 6.0 + step=114 action=wait() hp= 6.0 + step=115 action=wait() hp= 6.0 + step=116 action=wait() hp= 6.0 + step=117 action=wait() hp= 6.0 + step=118 action=wait() hp= 6.0 + step=119 action=wait() hp= 6.0 + step=120 action=wait() hp= 6.0 + step=121 action=wait() hp= 6.0 + step=122 action=wait() hp= 6.0 + step=123 action=wait() hp= 6.0 + step=124 action=wait() hp= 6.0 + step=125 action=wait() hp= 6.0 + step=126 action=wait() hp= 6.0 + step=127 action=wait() hp= 6.0 + step=128 action=wait() hp= 6.0 + step=129 action=wait() hp= 6.0 + step=130 action=wait() hp= 6.0 + step=131 action=wait() hp= 6.0 + step=132 action=wait() hp= 6.0 + step=133 action=wait() hp= 6.0 + step=134 action=wait() hp= 6.0 + step=135 action=wait() hp= 6.0 + step=136 action=wait() hp= 6.0 + step=137 action=wait() hp= 6.0 + step=138 action=wait() hp= 6.0 + step=139 action=wait() hp= 6.0 + step=140 action=wait() hp= 6.0 + step=141 action=wait() hp= 6.0 + step=142 action=wait() hp= 6.0 + step=143 action=wait() hp= 6.0 + step=144 action=wait() hp= 6.0 + step=145 action=wait() hp= 6.0 + step=146 action=wait() hp= 6.0 + step=147 action=wait() hp= 6.0 + step=148 action=wait() hp= 6.0 + step=149 action=wait() hp= 6.0 + step=150 action=wait() hp= 6.0 +ep=0161 [medium] steps=150 reward= -16.180 evac=0 hp= 6.0 suc30=0.67 r30= +4.73 t=40s + step=001 action=move(direction='west') hp=100.0 + step=002 action=door(target_id='door_2', door_state='close') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=wait() hp=100.0 + step=008 action=move(direction='west') hp=100.0 +ep=0162 [medium] steps=008 reward= +16.040 evac=1 hp=100.0 suc30=0.67 r30= +4.80 t=40s + step=001 action=door(target_id='door_1', door_state='close') hp=100.0 + step=002 action=move(direction='south') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=wait() hp=100.0 + step=005 action=wait() hp=100.0 + step=006 action=wait() hp=100.0 + step=007 action=wait() hp=100.0 + step=008 action=wait() hp=100.0 + step=009 action=wait() hp=100.0 + step=010 action=door(target_id='door_1', door_state='open') hp=100.0 + step=011 action=move(direction='south') hp=100.0 + step=012 action=move(direction='west') hp=100.0 +ep=0163 [medium] steps=012 reward= +14.160 evac=1 hp=100.0 suc30=0.67 r30= +4.79 t=40s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=wait() hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='west') hp=100.0 + step=011 action=move(direction='north') hp=100.0 + step=012 action=wait() hp=100.0 + step=013 action=move(direction='west') hp=100.0 + step=014 action=move(direction='north') hp=100.0 + step=015 action=move(direction='north') hp=100.0 + step=016 action=wait() hp=100.0 + step=017 action=move(direction='north') hp=100.0 + step=018 action=move(direction='north') hp=100.0 + step=019 action=move(direction='east') hp=100.0 + step=020 action=move(direction='west') hp=100.0 + step=021 action=move(direction='north') hp=100.0 + step=022 action=move(direction='north') hp=100.0 + step=023 action=move(direction='north') hp=100.0 + step=024 action=move(direction='west') hp=100.0 +ep=0164 [medium] steps=024 reward= +19.260 evac=1 hp=100.0 suc30=0.70 r30= +6.33 t=40s + step=001 action=move(direction='north') hp=100.0 + step=002 action=move(direction='north') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=wait() hp=100.0 + step=006 action=move(direction='north') hp=100.0 +ep=0165 [medium] steps=006 reward= +15.790 evac=1 hp=100.0 suc30=0.70 r30= +6.33 t=40s + >> PPO update samples=flushed pi_loss=-0.0015 v_loss=15.5974 entropy=0.4246 kl=0.0000 clip%=0.00 lr=7.72e-05 + step=001 action=move(direction='north') hp=100.0 + step=002 action=wait() hp=100.0 + step=003 action=move(direction='east') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='north') hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=move(direction='west') hp=100.0 +ep=0166 [medium] steps=009 reward= +16.010 evac=1 hp=100.0 suc30=0.73 r30= +7.41 t=40s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='north') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=wait() hp= 88.0 + step=006 action=move(direction='north') hp= 73.0 + step=007 action=move(direction='west') hp= 61.0 + step=008 action=move(direction='west') hp= 61.0 + step=009 action=move(direction='south') hp= 61.0 + step=010 action=move(direction='west') hp= 61.0 + step=011 action=move(direction='north') hp= 61.0 + step=012 action=wait() hp= 61.0 + step=013 action=move(direction='west') hp= 61.0 + step=014 action=move(direction='north') hp= 61.0 + step=015 action=move(direction='south') hp= 61.0 + step=016 action=move(direction='north') hp= 61.0 + step=017 action=move(direction='west') hp= 61.0 + step=018 action=move(direction='west') hp= 61.0 + step=019 action=wait() hp= 61.0 + step=020 action=move(direction='south') hp= 61.0 + step=021 action=move(direction='north') hp= 61.0 + step=022 action=move(direction='north') hp= 61.0 + step=023 action=move(direction='north') hp= 61.0 + step=024 action=move(direction='north') hp= 61.0 + step=025 action=move(direction='west') hp= 61.0 + step=026 action=move(direction='west') hp= 61.0 + step=027 action=move(direction='west') hp= 61.0 + step=028 action=move(direction='west') hp= 61.0 + step=029 action=move(direction='west') hp= 61.0 +ep=0167 [medium] steps=029 reward= +14.365 evac=1 hp= 61.0 suc30=0.77 r30= +8.48 t=40s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='east') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=door(target_id='door_2', door_state='close') hp=100.0 + step=007 action=wait() hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='west') hp=100.0 +ep=0168 [medium] steps=010 reward= +15.810 evac=1 hp=100.0 suc30=0.77 r30= +8.52 t=40s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='east') hp=100.0 + step=006 action=move(direction='north') hp=100.0 + step=007 action=door(target_id='door_2', door_state='close') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='north') hp=100.0 + step=011 action=wait() hp=100.0 + step=012 action=move(direction='west') hp=100.0 + step=013 action=move(direction='west') hp=100.0 + step=014 action=move(direction='west') hp=100.0 + step=015 action=move(direction='north') hp=100.0 + step=016 action=move(direction='west') hp=100.0 +ep=0169 [medium] steps=016 reward= +16.260 evac=1 hp=100.0 suc30=0.80 r30= +9.75 t=41s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=move(direction='east') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=wait() hp=100.0 + step=010 action=move(direction='north') hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=wait() hp=100.0 + step=013 action=move(direction='east') hp=100.0 + step=014 action=move(direction='north') hp=100.0 + step=015 action=wait() hp=100.0 + step=016 action=move(direction='north') hp=100.0 + step=017 action=move(direction='east') hp=100.0 + step=018 action=wait() hp=100.0 + step=019 action=move(direction='west') hp=100.0 + step=020 action=move(direction='west') hp=100.0 + step=021 action=move(direction='west') hp=100.0 +ep=0170 [medium] steps=021 reward= +16.360 evac=1 hp=100.0 suc30=0.80 r30= +9.77 t=41s + >> PPO update samples=flushed pi_loss=-0.0023 v_loss=2.6779 entropy=1.0867 kl=0.0001 clip%=0.00 lr=7.05e-05 + step=001 action=move(direction='west') hp=100.0 +ep=0171 [medium] steps=001 reward= +14.260 evac=1 hp=100.0 suc30=0.80 r30= +9.74 t=41s + step=001 action=move(direction='north') hp=100.0 + step=002 action=wait() hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=door(target_id='door_1', door_state='open') hp=100.0 + step=006 action=wait() hp=100.0 + step=007 action=move(direction='east') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=door(target_id='door_1', door_state='close') hp=100.0 + step=010 action=door(target_id='door_1', door_state='open') hp=100.0 + step=011 action=door(target_id='door_1', door_state='close') hp=100.0 + step=012 action=wait() hp=100.0 + step=013 action=wait() hp=100.0 + step=014 action=wait() hp=100.0 + step=015 action=move(direction='south') hp=100.0 + step=016 action=door(target_id='door_1', door_state='open') hp=100.0 + step=017 action=move(direction='west') hp=100.0 +ep=0172 [medium] steps=017 reward= +14.690 evac=1 hp=100.0 suc30=0.83 r30= +10.63 t=41s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='north') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='north') hp=100.0 + step=011 action=move(direction='north') hp=100.0 + step=012 action=move(direction='north') hp=100.0 + step=013 action=move(direction='south') hp=100.0 + step=014 action=move(direction='north') hp=100.0 + step=015 action=move(direction='north') hp=100.0 + step=016 action=move(direction='north') hp=100.0 + step=017 action=move(direction='north') hp=100.0 + step=018 action=move(direction='south') hp=100.0 + step=019 action=move(direction='north') hp=100.0 + step=020 action=move(direction='north') hp=100.0 + step=021 action=wait() hp=100.0 + step=022 action=move(direction='west') hp=100.0 +ep=0173 [medium] steps=022 reward= +17.940 evac=1 hp=100.0 suc30=0.83 r30= +10.56 t=41s + step=001 action=wait() hp=100.0 + step=002 action=move(direction='north') hp=100.0 + step=003 action=wait() hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=move(direction='north') hp=100.0 + step=007 action=move(direction='north') hp=100.0 + step=008 action=move(direction='north') hp=100.0 +ep=0174 [medium] steps=008 reward= +15.990 evac=1 hp=100.0 suc30=0.83 r30= +10.58 t=41s + step=001 action=move(direction='west') hp=100.0 + step=002 action=door(target_id='door_8', door_state='close') hp=100.0 + step=003 action=door(target_id='door_8', door_state='close') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp= 99.5 + step=008 action=move(direction='north') hp= 94.5 + step=009 action=move(direction='west') hp= 79.5 + step=010 action=move(direction='north') hp= 64.5 + step=011 action=move(direction='south') hp= 64.0 + step=012 action=wait() hp= 49.0 + step=013 action=move(direction='west') hp= 34.0 + step=014 action=move(direction='west') hp= 19.0 + step=015 action=door(target_id='door_2', door_state='close') hp= 17.0 + step=016 action=move(direction='north') hp= 2.0 +ep=0175 [medium] steps=016 reward= -17.260 evac=0 hp= 0.0 suc30=0.83 r30= +10.48 t=41s + >> PPO update samples=flushed pi_loss=-0.0026 v_loss=56.9066 entropy=1.1764 kl=0.0001 clip%=0.00 lr=6.37e-05 + [ckpt] saved -> artifacts/pyre_ppo_http_ckpt.pt + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='north') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='north') hp= 85.0 + step=007 action=move(direction='east') hp= 70.0 + step=008 action=move(direction='west') hp= 65.0 + step=009 action=move(direction='north') hp= 50.0 + step=010 action=move(direction='west') hp= 48.0 + step=011 action=move(direction='west') hp= 47.5 + step=012 action=move(direction='north') hp= 47.5 + step=013 action=wait() hp= 47.5 + step=014 action=move(direction='north') hp= 47.5 + step=015 action=move(direction='north') hp= 47.0 + step=016 action=move(direction='north') hp= 47.0 + step=017 action=move(direction='north') hp= 47.0 + step=018 action=move(direction='north') hp= 47.0 + step=019 action=move(direction='west') hp= 47.0 + step=020 action=move(direction='north') hp= 47.0 + step=021 action=move(direction='west') hp= 47.0 +ep=0176 [medium] steps=021 reward= +12.625 evac=1 hp= 47.0 suc30=0.83 r30= +10.38 t=42s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=door(target_id='door_2', door_state='close') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp=100.0 +ep=0177 [medium] steps=007 reward= +16.150 evac=1 hp=100.0 suc30=0.83 r30= +10.42 t=42s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='east') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=door(target_id='door_6', door_state='open') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=move(direction='north') hp=100.0 + step=010 action=move(direction='west') hp= 99.5 + step=011 action=move(direction='west') hp= 99.0 + step=012 action=wait() hp= 99.0 + step=013 action=move(direction='north') hp= 99.0 + step=014 action=move(direction='west') hp= 97.0 + step=015 action=move(direction='north') hp= 96.5 + step=016 action=wait() hp= 96.0 + step=017 action=door(target_id='door_1', door_state='close') hp= 95.5 + step=018 action=move(direction='south') hp= 95.0 + step=019 action=wait() hp= 94.5 + step=020 action=move(direction='west') hp= 94.0 +ep=0178 [medium] steps=020 reward= +13.820 evac=1 hp= 94.0 suc30=0.87 r30= +11.51 t=42s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='north') hp=100.0 + step=003 action=wait() hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='west') hp=100.0 +ep=0179 [medium] steps=009 reward= +16.630 evac=1 hp=100.0 suc30=0.87 r30= +11.44 t=43s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 +ep=0180 [medium] steps=002 reward= +14.730 evac=1 hp=100.0 suc30=0.87 r30= +11.41 t=43s + >> PPO update samples=flushed pi_loss=-0.0006 v_loss=11.5171 entropy=1.1012 kl=0.0001 clip%=0.00 lr=5.70e-05 + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='west') hp=100.0 + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='north') hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + ** EVAL [medium] reward=+16.397 success=1.00 steps=8.0 + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='north') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=door(target_id='door_2', door_state='close') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='north') hp=100.0 + step=007 action=door(target_id='door_2', door_state='close') hp=100.0 + step=008 action=door(target_id='door_2', door_state='close') hp=100.0 + step=009 action=move(direction='east') hp=100.0 + step=010 action=move(direction='west') hp=100.0 + step=011 action=door(target_id='door_2', door_state='close') hp=100.0 + step=012 action=move(direction='west') hp=100.0 + step=013 action=move(direction='north') hp=100.0 + step=014 action=door(target_id='door_1', door_state='close') hp=100.0 + step=015 action=wait() hp=100.0 + step=016 action=move(direction='north') hp=100.0 + step=017 action=move(direction='north') hp=100.0 + step=018 action=wait() hp=100.0 + step=019 action=door(target_id='door_1', door_state='open') hp=100.0 + step=020 action=move(direction='east') hp=100.0 + step=021 action=move(direction='north') hp=100.0 + step=022 action=move(direction='west') hp=100.0 + step=023 action=move(direction='west') hp=100.0 + step=024 action=door(target_id='door_1', door_state='close') hp=100.0 + step=025 action=move(direction='south') hp=100.0 + step=026 action=move(direction='north') hp=100.0 + step=027 action=wait() hp=100.0 + step=028 action=wait() hp=100.0 + step=029 action=move(direction='east') hp=100.0 + step=030 action=move(direction='west') hp=100.0 + step=031 action=move(direction='east') hp=100.0 + step=032 action=move(direction='west') hp=100.0 + step=033 action=move(direction='south') hp=100.0 + step=034 action=wait() hp=100.0 + step=035 action=move(direction='east') hp=100.0 + step=036 action=door(target_id='door_1', door_state='close') hp=100.0 + step=037 action=move(direction='north') hp=100.0 + step=038 action=wait() hp=100.0 + step=039 action=move(direction='west') hp=100.0 + step=040 action=door(target_id='door_1', door_state='open') hp=100.0 + step=041 action=door(target_id='door_1', door_state='open') hp=100.0 + step=042 action=move(direction='east') hp=100.0 + step=043 action=wait() hp=100.0 + step=044 action=move(direction='west') hp=100.0 + step=045 action=wait() hp=100.0 + step=046 action=move(direction='east') hp=100.0 + step=047 action=wait() hp=100.0 + step=048 action=move(direction='south') hp=100.0 + step=049 action=wait() hp=100.0 + step=050 action=wait() hp=100.0 + step=051 action=door(target_id='door_1', door_state='open') hp=100.0 + step=052 action=move(direction='north') hp=100.0 + step=053 action=door(target_id='door_1', door_state='close') hp=100.0 + step=054 action=wait() hp=100.0 + step=055 action=move(direction='west') hp=100.0 + step=056 action=move(direction='south') hp=100.0 + step=057 action=move(direction='east') hp=100.0 + step=058 action=move(direction='east') hp=100.0 + step=059 action=wait() hp=100.0 + step=060 action=move(direction='west') hp=100.0 + step=061 action=move(direction='north') hp=100.0 + step=062 action=move(direction='east') hp=100.0 + step=063 action=door(target_id='door_1', door_state='close') hp=100.0 + step=064 action=move(direction='south') hp=100.0 + step=065 action=move(direction='west') hp=100.0 + step=066 action=move(direction='south') hp=100.0 + step=067 action=wait() hp=100.0 + step=068 action=move(direction='north') hp=100.0 + step=069 action=move(direction='west') hp=100.0 + step=070 action=move(direction='north') hp=100.0 + step=071 action=door(target_id='door_1', door_state='close') hp=100.0 + step=072 action=wait() hp=100.0 + step=073 action=wait() hp=100.0 + step=074 action=move(direction='east') hp=100.0 + step=075 action=move(direction='west') hp=100.0 + step=076 action=door(target_id='door_1', door_state='close') hp=100.0 + step=077 action=wait() hp=100.0 + step=078 action=wait() hp=100.0 + step=079 action=door(target_id='door_1', door_state='close') hp=100.0 + step=080 action=door(target_id='door_1', door_state='close') hp=100.0 + step=081 action=move(direction='south') hp=100.0 + step=082 action=move(direction='north') hp=100.0 + step=083 action=move(direction='east') hp=100.0 + step=084 action=move(direction='east') hp=100.0 + step=085 action=move(direction='south') hp=100.0 + step=086 action=wait() hp=100.0 + step=087 action=door(target_id='door_1', door_state='close') hp=100.0 + step=088 action=move(direction='north') hp=100.0 + step=089 action=move(direction='west') hp=100.0 + step=090 action=move(direction='west') hp=100.0 + step=091 action=door(target_id='door_1', door_state='close') hp=100.0 + step=092 action=wait() hp=100.0 + step=093 action=wait() hp=100.0 + step=094 action=move(direction='south') hp=100.0 + step=095 action=move(direction='north') hp=100.0 + step=096 action=wait() hp=100.0 + step=097 action=door(target_id='door_1', door_state='close') hp=100.0 + step=098 action=wait() hp=100.0 + step=099 action=door(target_id='door_1', door_state='close') hp=100.0 + step=100 action=move(direction='south') hp=100.0 + step=101 action=move(direction='north') hp=100.0 + step=102 action=wait() hp=100.0 + step=103 action=move(direction='south') hp=100.0 + step=104 action=door(target_id='door_1', door_state='close') hp=100.0 + step=105 action=move(direction='east') hp=100.0 + step=106 action=wait() hp=100.0 + step=107 action=move(direction='north') hp=100.0 + step=108 action=move(direction='east') hp=100.0 + step=109 action=door(target_id='door_1', door_state='close') hp=100.0 + step=110 action=move(direction='west') hp=100.0 + step=111 action=move(direction='west') hp=100.0 + step=112 action=wait() hp=100.0 + step=113 action=move(direction='south') hp=100.0 + step=114 action=move(direction='east') hp=100.0 + step=115 action=move(direction='north') hp=100.0 + step=116 action=move(direction='west') hp=100.0 + step=117 action=move(direction='south') hp=100.0 + step=118 action=move(direction='south') hp=100.0 + step=119 action=door(target_id='door_1', door_state='close') hp=100.0 + step=120 action=move(direction='north') hp=100.0 + step=121 action=wait() hp=100.0 + step=122 action=door(target_id='door_1', door_state='open') hp=100.0 + step=123 action=move(direction='south') hp=100.0 + step=124 action=door(target_id='door_1', door_state='open') hp=100.0 + step=125 action=wait() hp=100.0 + step=126 action=door(target_id='door_1', door_state='close') hp=100.0 + step=127 action=move(direction='east') hp=100.0 + step=128 action=wait() hp=100.0 + step=129 action=door(target_id='door_1', door_state='open') hp=100.0 + step=130 action=move(direction='south') hp=100.0 + step=131 action=wait() hp=100.0 + step=132 action=wait() hp=100.0 + step=133 action=wait() hp=100.0 + step=134 action=move(direction='north') hp=100.0 + step=135 action=move(direction='north') hp=100.0 + step=136 action=door(target_id='door_1', door_state='close') hp=100.0 + step=137 action=wait() hp=100.0 + step=138 action=move(direction='east') hp=100.0 + step=139 action=move(direction='north') hp=100.0 + step=140 action=move(direction='west') hp=100.0 + step=141 action=door(target_id='door_1', door_state='open') hp=100.0 + step=142 action=move(direction='south') hp=100.0 + step=143 action=move(direction='south') hp=100.0 + step=144 action=door(target_id='door_1', door_state='open') hp=100.0 + step=145 action=move(direction='north') hp=100.0 + step=146 action=move(direction='west') hp=100.0 + step=147 action=move(direction='south') hp=100.0 + step=148 action=door(target_id='door_1', door_state='close') hp=100.0 + step=149 action=move(direction='north') hp=100.0 + step=150 action=move(direction='north') hp=100.0 +ep=0181 [medium] steps=150 reward= -18.260 evac=0 hp=100.0 suc30=0.83 r30= +10.31 t=43s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=move(direction='west') hp=100.0 +ep=0182 [medium] steps=005 reward= +15.900 evac=1 hp=100.0 suc30=0.83 r30= +10.27 t=43s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 +ep=0183 [medium] steps=003 reward= +15.160 evac=1 hp=100.0 suc30=0.83 r30= +10.29 t=43s + step=001 action=move(direction='north') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='south') hp=100.0 + step=007 action=wait() hp=100.0 + step=008 action=door(target_id='door_1', door_state='close') hp=100.0 + step=009 action=door(target_id='door_5', door_state='close') hp=100.0 + step=010 action=door(target_id='door_1', door_state='close') hp=100.0 + step=011 action=wait() hp=100.0 + step=012 action=move(direction='north') hp=100.0 + step=013 action=door(target_id='door_5', door_state='close') hp=100.0 + step=014 action=move(direction='west') hp=100.0 +ep=0184 [medium] steps=014 reward= +15.980 evac=1 hp=100.0 suc30=0.87 r30= +11.14 t=43s + step=001 action=move(direction='north') hp=100.0 + step=002 action=wait() hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='east') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='north') hp=100.0 + step=007 action=wait() hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='north') hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=move(direction='west') hp=100.0 + step=013 action=move(direction='west') hp=100.0 + step=014 action=move(direction='west') hp=100.0 + step=015 action=move(direction='north') hp=100.0 + step=016 action=wait() hp=100.0 + step=017 action=move(direction='north') hp=100.0 + step=018 action=wait() hp= 85.0 + step=019 action=move(direction='south') hp= 70.0 + step=020 action=wait() hp= 69.5 + step=021 action=wait() hp= 69.0 + step=022 action=move(direction='south') hp= 69.0 + step=023 action=move(direction='north') hp= 69.0 + step=024 action=move(direction='south') hp= 69.0 + step=025 action=move(direction='north') hp= 69.0 + step=026 action=wait() hp= 69.0 + step=027 action=wait() hp= 69.0 + step=028 action=wait() hp= 69.0 + step=029 action=move(direction='south') hp= 69.0 + step=030 action=wait() hp= 69.0 + step=031 action=wait() hp= 69.0 + step=032 action=move(direction='south') hp= 69.0 + step=033 action=wait() hp= 69.0 + step=034 action=move(direction='north') hp= 69.0 + step=035 action=wait() hp= 69.0 + step=036 action=move(direction='north') hp= 69.0 + step=037 action=wait() hp= 69.0 + step=038 action=move(direction='south') hp= 69.0 + step=039 action=move(direction='east') hp= 69.0 + step=040 action=move(direction='east') hp= 69.0 + step=041 action=wait() hp= 69.0 + step=042 action=wait() hp= 69.0 + step=043 action=move(direction='east') hp= 69.0 + step=044 action=move(direction='west') hp= 69.0 + step=045 action=move(direction='west') hp= 69.0 + step=046 action=move(direction='west') hp= 69.0 + step=047 action=move(direction='north') hp= 69.0 + step=048 action=wait() hp= 69.0 + step=049 action=wait() hp= 69.0 + step=050 action=move(direction='east') hp= 69.0 + step=051 action=move(direction='west') hp= 69.0 + step=052 action=wait() hp= 69.0 + step=053 action=wait() hp= 69.0 + step=054 action=move(direction='east') hp= 69.0 + step=055 action=move(direction='south') hp= 69.0 + step=056 action=move(direction='west') hp= 69.0 + step=057 action=move(direction='north') hp= 69.0 + step=058 action=move(direction='south') hp= 69.0 + step=059 action=wait() hp= 69.0 + step=060 action=move(direction='north') hp= 69.0 + step=061 action=wait() hp= 69.0 + step=062 action=move(direction='east') hp= 69.0 + step=063 action=move(direction='west') hp= 69.0 + step=064 action=wait() hp= 69.0 + step=065 action=move(direction='east') hp= 69.0 + step=066 action=move(direction='west') hp= 69.0 + step=067 action=move(direction='east') hp= 69.0 + step=068 action=wait() hp= 69.0 + step=069 action=move(direction='west') hp= 69.0 + step=070 action=move(direction='south') hp= 69.0 + step=071 action=move(direction='south') hp= 69.0 + step=072 action=wait() hp= 69.0 + step=073 action=move(direction='east') hp= 69.0 + step=074 action=move(direction='west') hp= 69.0 + step=075 action=move(direction='south') hp= 69.0 + step=076 action=move(direction='east') hp= 69.0 + step=077 action=move(direction='west') hp= 69.0 + step=078 action=move(direction='north') hp= 69.0 + step=079 action=move(direction='south') hp= 69.0 + step=080 action=move(direction='east') hp= 69.0 + step=081 action=move(direction='north') hp= 69.0 + step=082 action=wait() hp= 69.0 + step=083 action=move(direction='west') hp= 69.0 + step=084 action=wait() hp= 69.0 + step=085 action=wait() hp= 69.0 + step=086 action=wait() hp= 69.0 + step=087 action=move(direction='south') hp= 69.0 + step=088 action=move(direction='north') hp= 69.0 + step=089 action=wait() hp= 69.0 + step=090 action=move(direction='north') hp= 69.0 + step=091 action=move(direction='south') hp= 69.0 + step=092 action=move(direction='north') hp= 69.0 + step=093 action=move(direction='south') hp= 69.0 + step=094 action=move(direction='south') hp= 69.0 + step=095 action=wait() hp= 69.0 + step=096 action=wait() hp= 69.0 + step=097 action=move(direction='north') hp= 69.0 + step=098 action=move(direction='south') hp= 69.0 + step=099 action=move(direction='east') hp= 69.0 + step=100 action=move(direction='west') hp= 69.0 + step=101 action=move(direction='south') hp= 69.0 + step=102 action=move(direction='east') hp= 69.0 + step=103 action=move(direction='north') hp= 69.0 + step=104 action=move(direction='west') hp= 69.0 + step=105 action=move(direction='east') hp= 69.0 + step=106 action=move(direction='north') hp= 69.0 + step=107 action=move(direction='north') hp= 69.0 + step=108 action=wait() hp= 69.0 + step=109 action=move(direction='north') hp= 69.0 + step=110 action=move(direction='west') hp= 69.0 + step=111 action=wait() hp= 69.0 + step=112 action=move(direction='east') hp= 69.0 + step=113 action=move(direction='west') hp= 69.0 + step=114 action=move(direction='south') hp= 69.0 + step=115 action=move(direction='north') hp= 69.0 + step=116 action=move(direction='east') hp= 69.0 + step=117 action=move(direction='west') hp= 69.0 + step=118 action=wait() hp= 69.0 + step=119 action=move(direction='east') hp= 69.0 + step=120 action=move(direction='west') hp= 69.0 + step=121 action=wait() hp= 69.0 + step=122 action=wait() hp= 69.0 + step=123 action=move(direction='east') hp= 69.0 + step=124 action=move(direction='south') hp= 69.0 + step=125 action=move(direction='south') hp= 69.0 + step=126 action=move(direction='west') hp= 69.0 + step=127 action=move(direction='south') hp= 69.0 + step=128 action=move(direction='south') hp= 69.0 + step=129 action=move(direction='south') hp= 69.0 + step=130 action=move(direction='north') hp= 69.0 + step=131 action=move(direction='north') hp= 69.0 + step=132 action=move(direction='south') hp= 69.0 + step=133 action=move(direction='east') hp= 69.0 + step=134 action=wait() hp= 69.0 + step=135 action=move(direction='north') hp= 69.0 + step=136 action=move(direction='south') hp= 69.0 + step=137 action=move(direction='west') hp= 69.0 + step=138 action=move(direction='north') hp= 69.0 + step=139 action=move(direction='south') hp= 69.0 + step=140 action=wait() hp= 69.0 + step=141 action=move(direction='south') hp= 69.0 + step=142 action=move(direction='east') hp= 69.0 + step=143 action=move(direction='east') hp= 69.0 + step=144 action=wait() hp= 69.0 + step=145 action=move(direction='east') hp= 69.0 + step=146 action=move(direction='west') hp= 69.0 + step=147 action=move(direction='north') hp= 69.0 + step=148 action=move(direction='west') hp= 69.0 + step=149 action=move(direction='east') hp= 69.0 + step=150 action=move(direction='west') hp= 69.0 +ep=0185 [medium] steps=150 reward= -20.690 evac=0 hp= 69.0 suc30=0.87 r30= +10.95 t=44s + >> PPO update samples=flushed pi_loss=-0.0119 v_loss=12.5926 entropy=1.3605 kl=0.0001 clip%=0.00 lr=5.02e-05 + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=wait() hp=100.0 + step=005 action=move(direction='south') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='west') hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=move(direction='west') hp=100.0 +ep=0186 [medium] steps=012 reward= +16.000 evac=1 hp=100.0 suc30=0.87 r30= +11.05 t=44s + step=001 action=door(target_id='door_8', door_state='close') hp=100.0 + step=002 action=move(direction='north') hp=100.0 + step=003 action=move(direction='south') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=wait() hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='north') hp=100.0 + step=011 action=door(target_id='door_3', door_state='close') hp=100.0 + step=012 action=door(target_id='door_3', door_state='close') hp=100.0 + step=013 action=move(direction='west') hp=100.0 + step=014 action=move(direction='west') hp=100.0 + step=015 action=move(direction='west') hp=100.0 + step=016 action=door(target_id='door_3', door_state='close') hp=100.0 + step=017 action=move(direction='west') hp=100.0 + step=018 action=door(target_id='door_3', door_state='close') hp=100.0 + step=019 action=move(direction='west') hp=100.0 + step=020 action=move(direction='west') hp= 99.5 + step=021 action=move(direction='east') hp= 99.5 + step=022 action=wait() hp= 99.0 + step=023 action=move(direction='west') hp= 98.5 + step=024 action=move(direction='west') hp= 98.0 + step=025 action=door(target_id='door_1', door_state='close') hp= 98.0 + step=026 action=move(direction='north') hp= 98.0 + step=027 action=move(direction='east') hp= 98.0 + step=028 action=wait() hp= 97.5 + step=029 action=wait() hp= 97.0 + step=030 action=wait() hp= 96.5 + step=031 action=move(direction='east') hp= 96.0 + step=032 action=move(direction='west') hp= 94.0 + step=033 action=door(target_id='door_1', door_state='close') hp= 93.5 + step=034 action=move(direction='west') hp= 91.5 + step=035 action=move(direction='west') hp= 91.0 + step=036 action=door(target_id='door_1', door_state='close') hp= 90.5 + step=037 action=move(direction='west') hp= 90.0 + step=038 action=door(target_id='door_5', door_state='close') hp= 90.0 + step=039 action=move(direction='west') hp= 90.0 + step=040 action=door(target_id='door_1', door_state='open') hp= 90.0 + step=041 action=door(target_id='door_1', door_state='close') hp= 90.0 + step=042 action=door(target_id='door_1', door_state='open') hp= 90.0 + step=043 action=wait() hp= 90.0 + step=044 action=wait() hp= 90.0 + step=045 action=move(direction='east') hp= 90.0 + step=046 action=move(direction='west') hp= 89.5 + step=047 action=wait() hp= 89.5 + step=048 action=wait() hp= 89.5 + step=049 action=move(direction='east') hp= 89.5 + step=050 action=move(direction='west') hp= 89.0 + step=051 action=wait() hp= 89.0 + step=052 action=door(target_id='door_1', door_state='close') hp= 89.0 + step=053 action=door(target_id='door_1', door_state='open') hp= 89.0 + step=054 action=wait() hp= 89.0 + step=055 action=door(target_id='door_1', door_state='close') hp= 89.0 + step=056 action=door(target_id='door_1', door_state='open') hp= 89.0 + step=057 action=door(target_id='door_1', door_state='close') hp= 89.0 + step=058 action=wait() hp= 89.0 + step=059 action=door(target_id='door_1', door_state='open') hp= 89.0 + step=060 action=move(direction='south') hp= 89.0 + step=061 action=wait() hp= 89.0 + step=062 action=move(direction='west') hp= 89.0 +ep=0187 [medium] steps=062 reward= +11.835 evac=1 hp= 89.0 suc30=0.87 r30= +10.92 t=45s + step=001 action=move(direction='north') hp=100.0 + step=002 action=move(direction='north') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 +ep=0188 [medium] steps=004 reward= +15.550 evac=1 hp=100.0 suc30=0.87 r30= +11.05 t=45s + step=001 action=move(direction='north') hp=100.0 + step=002 action=wait() hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=move(direction='north') hp=100.0 + step=007 action=move(direction='north') hp=100.0 +ep=0189 [medium] steps=007 reward= +16.100 evac=1 hp=100.0 suc30=0.87 r30= +11.06 t=45s + step=001 action=move(direction='north') hp=100.0 + step=002 action=door(target_id='door_3', door_state='open') hp=100.0 + step=003 action=move(direction='east') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=door(target_id='door_2', door_state='open') hp=100.0 + step=009 action=wait() hp=100.0 + step=010 action=door(target_id='door_1', door_state='close') hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=move(direction='west') hp=100.0 + step=013 action=move(direction='east') hp=100.0 + step=014 action=door(target_id='door_1', door_state='open') hp=100.0 + step=015 action=move(direction='west') hp=100.0 + step=016 action=door(target_id='door_1', door_state='close') hp=100.0 + step=017 action=move(direction='east') hp=100.0 + step=018 action=door(target_id='door_5', door_state='close') hp=100.0 + step=019 action=door(target_id='door_2', door_state='open') hp=100.0 + step=020 action=door(target_id='door_5', door_state='close') hp=100.0 + step=021 action=move(direction='west') hp=100.0 + step=022 action=wait() hp=100.0 + step=023 action=door(target_id='door_1', door_state='open') hp=100.0 + step=024 action=wait() hp=100.0 + step=025 action=move(direction='south') hp=100.0 + step=026 action=move(direction='west') hp=100.0 +ep=0190 [medium] steps=026 reward= +15.130 evac=1 hp=100.0 suc30=0.87 r30= +11.01 t=45s + >> PPO update samples=flushed pi_loss=-0.0017 v_loss=6.0652 entropy=1.2936 kl=0.0001 clip%=0.00 lr=4.35e-05 + step=001 action=move(direction='north') hp=100.0 + step=002 action=wait() hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=wait() hp=100.0 + step=007 action=move(direction='north') hp=100.0 + step=008 action=wait() hp=100.0 + step=009 action=move(direction='north') hp=100.0 + step=010 action=move(direction='south') hp=100.0 + step=011 action=move(direction='north') hp=100.0 + step=012 action=wait() hp=100.0 + step=013 action=move(direction='north') hp=100.0 + step=014 action=move(direction='north') hp=100.0 + step=015 action=move(direction='north') hp=100.0 + step=016 action=move(direction='south') hp=100.0 + step=017 action=move(direction='south') hp=100.0 + step=018 action=move(direction='east') hp=100.0 + step=019 action=move(direction='north') hp= 99.5 + step=020 action=move(direction='west') hp= 99.5 + step=021 action=wait() hp= 99.5 + step=022 action=move(direction='north') hp= 99.5 + step=023 action=wait() hp= 99.5 + step=024 action=move(direction='east') hp= 99.5 + step=025 action=move(direction='west') hp= 99.5 + step=026 action=move(direction='north') hp= 99.5 + step=027 action=wait() hp= 99.5 + step=028 action=move(direction='south') hp= 99.5 + step=029 action=move(direction='south') hp= 99.5 + step=030 action=move(direction='north') hp= 97.5 + step=031 action=move(direction='north') hp= 97.0 + step=032 action=move(direction='north') hp= 97.0 + step=033 action=move(direction='east') hp= 97.0 + step=034 action=move(direction='west') hp= 97.0 + step=035 action=move(direction='west') hp= 97.0 +ep=0191 [medium] steps=035 reward= +15.075 evac=1 hp= 97.0 suc30=0.90 r30= +12.05 t=45s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=move(direction='north') hp=100.0 + step=010 action=move(direction='south') hp=100.0 + step=011 action=wait() hp=100.0 + step=012 action=move(direction='north') hp=100.0 + step=013 action=move(direction='north') hp=100.0 + step=014 action=move(direction='north') hp=100.0 + step=015 action=move(direction='north') hp=100.0 + step=016 action=move(direction='north') hp=100.0 + step=017 action=move(direction='north') hp=100.0 +ep=0192 [medium] steps=017 reward= +15.030 evac=1 hp=100.0 suc30=0.90 r30= +12.01 t=45s + step=001 action=move(direction='north') hp=100.0 + step=002 action=door(target_id='door_2', door_state='open') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=move(direction='south') hp=100.0 + step=006 action=door(target_id='door_6', door_state='close') hp=100.0 + step=007 action=move(direction='east') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='south') hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=move(direction='west') hp=100.0 + step=013 action=move(direction='west') hp=100.0 + step=014 action=move(direction='north') hp=100.0 + step=015 action=move(direction='north') hp=100.0 + step=016 action=move(direction='west') hp=100.0 +ep=0193 [medium] steps=016 reward= +16.800 evac=1 hp=100.0 suc30=0.90 r30= +12.10 t=45s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='south') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='south') hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=wait() hp=100.0 + step=011 action=wait() hp=100.0 + step=012 action=move(direction='north') hp=100.0 + step=013 action=move(direction='south') hp= 89.5 + step=014 action=move(direction='north') hp= 89.5 + step=015 action=wait() hp= 74.5 + step=016 action=wait() hp= 59.5 + step=017 action=wait() hp= 44.5 + step=018 action=wait() hp= 39.5 + step=019 action=wait() hp= 34.5 + step=020 action=wait() hp= 29.5 + step=021 action=move(direction='south') hp= 24.5 + step=022 action=move(direction='south') hp= 19.5 + step=023 action=wait() hp= 7.5 +ep=0194 [medium] steps=023 reward= -14.930 evac=0 hp= 0.0 suc30=0.87 r30= +10.96 t=45s + step=001 action=move(direction='west') hp=100.0 +ep=0195 [medium] steps=001 reward= +14.260 evac=1 hp=100.0 suc30=0.87 r30= +10.91 t=45s + >> PPO update samples=flushed pi_loss=-0.0001 v_loss=51.1310 entropy=1.0334 kl=0.0000 clip%=0.00 lr=3.67e-05 + step=001 action=door(target_id='door_4', door_state='close') hp=100.0 + step=002 action=door(target_id='door_4', door_state='close') hp=100.0 + step=003 action=door(target_id='door_4', door_state='close') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=door(target_id='door_4', door_state='close') hp=100.0 + step=007 action=door(target_id='door_7', door_state='close') hp=100.0 + step=008 action=door(target_id='door_8', door_state='close') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='north') hp=100.0 + step=011 action=door(target_id='door_4', door_state='close') hp=100.0 + step=012 action=door(target_id='door_3', door_state='open') hp=100.0 + step=013 action=door(target_id='door_3', door_state='open') hp=100.0 + step=014 action=door(target_id='door_4', door_state='close') hp=100.0 + step=015 action=move(direction='east') hp=100.0 + step=016 action=wait() hp=100.0 + step=017 action=move(direction='east') hp=100.0 + step=018 action=move(direction='west') hp=100.0 + step=019 action=move(direction='north') hp=100.0 + step=020 action=door(target_id='door_8', door_state='close') hp=100.0 + step=021 action=move(direction='west') hp=100.0 + step=022 action=move(direction='west') hp=100.0 + step=023 action=move(direction='north') hp=100.0 + step=024 action=door(target_id='door_4', door_state='close') hp=100.0 + step=025 action=move(direction='west') hp=100.0 + step=026 action=move(direction='west') hp=100.0 + step=027 action=move(direction='west') hp=100.0 + step=028 action=move(direction='west') hp=100.0 + step=029 action=door(target_id='door_3', door_state='open') hp=100.0 + step=030 action=door(target_id='door_2', door_state='close') hp=100.0 + step=031 action=move(direction='west') hp=100.0 + step=032 action=wait() hp=100.0 + step=033 action=wait() hp=100.0 + step=034 action=move(direction='west') hp=100.0 + step=035 action=move(direction='east') hp=100.0 + step=036 action=door(target_id='door_2', door_state='open') hp=100.0 + step=037 action=move(direction='west') hp=100.0 + step=038 action=move(direction='south') hp=100.0 + step=039 action=move(direction='west') hp= 99.5 + step=040 action=move(direction='west') hp= 97.5 + step=041 action=move(direction='north') hp= 82.5 + step=042 action=move(direction='west') hp= 80.5 + step=043 action=move(direction='west') hp= 80.0 + step=044 action=wait() hp= 79.5 + step=045 action=wait() hp= 79.0 + step=046 action=door(target_id='door_1', door_state='open') hp= 78.5 + step=047 action=door(target_id='door_1', door_state='close') hp= 78.0 + step=048 action=wait() hp= 76.0 + step=049 action=wait() hp= 74.0 + step=050 action=door(target_id='door_1', door_state='open') hp= 72.0 + step=051 action=move(direction='south') hp= 70.0 + step=052 action=move(direction='west') hp= 68.0 +ep=0196 [medium] steps=052 reward= +10.780 evac=1 hp= 68.0 suc30=0.87 r30= +10.74 t=46s + step=001 action=move(direction='north') hp=100.0 + step=002 action=wait() hp=100.0 + step=003 action=move(direction='south') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=move(direction='south') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='north') hp=100.0 + step=009 action=move(direction='north') hp=100.0 + step=010 action=move(direction='west') hp=100.0 + step=011 action=move(direction='south') hp=100.0 + step=012 action=wait() hp=100.0 + step=013 action=move(direction='north') hp=100.0 + step=014 action=move(direction='east') hp=100.0 + step=015 action=move(direction='west') hp=100.0 + step=016 action=move(direction='north') hp=100.0 + step=017 action=move(direction='north') hp=100.0 + step=018 action=move(direction='north') hp=100.0 + step=019 action=move(direction='west') hp=100.0 +ep=0197 [medium] steps=019 reward= +16.220 evac=1 hp=100.0 suc30=0.87 r30= +10.80 t=46s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='east') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=door(target_id='door_3', door_state='open') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=door(target_id='door_3', door_state='open') hp=100.0 + step=007 action=wait() hp=100.0 + step=008 action=door(target_id='door_3', door_state='open') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=wait() hp= 85.0 + step=011 action=move(direction='east') hp= 70.0 + step=012 action=move(direction='east') hp= 58.0 + step=013 action=wait() hp= 58.0 + step=014 action=door(target_id='door_3', door_state='open') hp= 57.5 + step=015 action=door(target_id='door_3', door_state='open') hp= 57.0 + step=016 action=move(direction='west') hp= 56.5 + step=017 action=move(direction='south') hp= 51.5 + step=018 action=door(target_id='door_3', door_state='open') hp= 36.5 + step=019 action=door(target_id='door_3', door_state='close') hp= 31.5 + step=020 action=wait() hp= 26.5 + step=021 action=door(target_id='door_3', door_state='open') hp= 21.5 + step=022 action=wait() hp= 16.5 + step=023 action=wait() hp= 11.5 + step=024 action=wait() hp= 6.5 + step=025 action=wait() hp= 1.5 +ep=0198 [medium] steps=025 reward= -14.820 evac=0 hp= 0.0 suc30=0.83 r30= +9.78 t=46s + step=001 action=move(direction='west') hp=100.0 + step=002 action=door(target_id='door_4', door_state='close') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=door(target_id='door_2', door_state='close') hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=move(direction='north') hp=100.0 + step=007 action=door(target_id='door_3', door_state='close') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=wait() hp=100.0 + step=010 action=move(direction='north') hp=100.0 + step=011 action=move(direction='north') hp=100.0 + step=012 action=door(target_id='door_3', door_state='open') hp=100.0 + step=013 action=door(target_id='door_3', door_state='open') hp=100.0 + step=014 action=move(direction='south') hp=100.0 + step=015 action=move(direction='north') hp=100.0 + step=016 action=wait() hp=100.0 + step=017 action=move(direction='east') hp=100.0 + step=018 action=move(direction='east') hp=100.0 + step=019 action=move(direction='west') hp=100.0 + step=020 action=move(direction='west') hp=100.0 + step=021 action=door(target_id='door_3', door_state='open') hp=100.0 + step=022 action=move(direction='south') hp=100.0 + step=023 action=move(direction='north') hp=100.0 + step=024 action=move(direction='east') hp=100.0 + step=025 action=door(target_id='door_3', door_state='open') hp=100.0 + step=026 action=move(direction='west') hp=100.0 + step=027 action=door(target_id='door_3', door_state='open') hp=100.0 + step=028 action=wait() hp=100.0 + step=029 action=door(target_id='door_3', door_state='open') hp=100.0 + step=030 action=move(direction='south') hp=100.0 + step=031 action=move(direction='north') hp=100.0 + step=032 action=move(direction='east') hp=100.0 + step=033 action=move(direction='west') hp=100.0 + step=034 action=move(direction='south') hp=100.0 + step=035 action=wait() hp=100.0 + step=036 action=door(target_id='door_3', door_state='open') hp=100.0 + step=037 action=move(direction='east') hp=100.0 + step=038 action=wait() hp=100.0 + step=039 action=move(direction='west') hp=100.0 + step=040 action=move(direction='north') hp=100.0 + step=041 action=wait() hp=100.0 + step=042 action=wait() hp=100.0 + step=043 action=door(target_id='door_3', door_state='open') hp=100.0 + step=044 action=wait() hp=100.0 + step=045 action=wait() hp=100.0 + step=046 action=door(target_id='door_3', door_state='open') hp=100.0 + step=047 action=wait() hp=100.0 + step=048 action=move(direction='south') hp=100.0 + step=049 action=door(target_id='door_3', door_state='open') hp=100.0 + step=050 action=door(target_id='door_3', door_state='open') hp=100.0 + step=051 action=door(target_id='door_3', door_state='open') hp=100.0 + step=052 action=move(direction='south') hp=100.0 + step=053 action=move(direction='north') hp=100.0 + step=054 action=move(direction='north') hp=100.0 + step=055 action=wait() hp=100.0 + step=056 action=move(direction='east') hp=100.0 + step=057 action=move(direction='east') hp=100.0 + step=058 action=move(direction='west') hp=100.0 + step=059 action=door(target_id='door_3', door_state='open') hp=100.0 + step=060 action=move(direction='west') hp=100.0 + step=061 action=door(target_id='door_3', door_state='open') hp=100.0 + step=062 action=wait() hp=100.0 + step=063 action=move(direction='south') hp=100.0 + step=064 action=move(direction='north') hp=100.0 + step=065 action=move(direction='east') hp=100.0 + step=066 action=wait() hp=100.0 + step=067 action=wait() hp=100.0 + step=068 action=wait() hp=100.0 + step=069 action=wait() hp=100.0 + step=070 action=door(target_id='door_3', door_state='open') hp=100.0 + step=071 action=wait() hp=100.0 + step=072 action=move(direction='west') hp=100.0 + step=073 action=wait() hp=100.0 + step=074 action=wait() hp=100.0 + step=075 action=move(direction='south') hp=100.0 + step=076 action=wait() hp=100.0 + step=077 action=door(target_id='door_3', door_state='open') hp=100.0 + step=078 action=move(direction='north') hp=100.0 + step=079 action=move(direction='east') hp=100.0 + step=080 action=wait() hp=100.0 + step=081 action=door(target_id='door_3', door_state='open') hp=100.0 + step=082 action=move(direction='south') hp=100.0 + step=083 action=move(direction='east') hp=100.0 + step=084 action=move(direction='north') hp=100.0 + step=085 action=move(direction='west') hp=100.0 + step=086 action=wait() hp=100.0 + step=087 action=move(direction='west') hp=100.0 + step=088 action=move(direction='south') hp=100.0 + step=089 action=move(direction='north') hp=100.0 + step=090 action=door(target_id='door_3', door_state='open') hp=100.0 + step=091 action=wait() hp=100.0 + step=092 action=wait() hp=100.0 + step=093 action=wait() hp=100.0 + step=094 action=move(direction='east') hp=100.0 + step=095 action=move(direction='east') hp=100.0 + step=096 action=wait() hp=100.0 + step=097 action=move(direction='south') hp=100.0 + step=098 action=door(target_id='door_3', door_state='open') hp=100.0 + step=099 action=move(direction='west') hp=100.0 + step=100 action=move(direction='east') hp=100.0 + step=101 action=move(direction='west') hp=100.0 + step=102 action=move(direction='east') hp=100.0 + step=103 action=move(direction='south') hp=100.0 + step=104 action=wait() hp=100.0 + step=105 action=move(direction='west') hp=100.0 + step=106 action=move(direction='west') hp=100.0 + step=107 action=move(direction='east') hp=100.0 + step=108 action=wait() hp=100.0 + step=109 action=move(direction='east') hp=100.0 + step=110 action=wait() hp=100.0 + step=111 action=move(direction='north') hp=100.0 + step=112 action=wait() hp=100.0 + step=113 action=move(direction='south') hp=100.0 + step=114 action=move(direction='west') hp=100.0 + step=115 action=wait() hp=100.0 + step=116 action=door(target_id='door_3', door_state='open') hp=100.0 + step=117 action=door(target_id='door_3', door_state='close') hp=100.0 + step=118 action=move(direction='east') hp=100.0 + step=119 action=move(direction='west') hp=100.0 + step=120 action=move(direction='east') hp=100.0 + step=121 action=move(direction='north') hp=100.0 + step=122 action=move(direction='west') hp=100.0 + step=123 action=move(direction='west') hp=100.0 + step=124 action=wait() hp=100.0 + step=125 action=wait() hp=100.0 + step=126 action=move(direction='north') hp=100.0 + step=127 action=move(direction='east') hp=100.0 + step=128 action=door(target_id='door_3', door_state='open') hp=100.0 + step=129 action=move(direction='south') hp=100.0 + step=130 action=move(direction='west') hp=100.0 + step=131 action=move(direction='north') hp=100.0 + step=132 action=wait() hp=100.0 + step=133 action=move(direction='east') hp=100.0 + step=134 action=move(direction='west') hp=100.0 + step=135 action=move(direction='south') hp=100.0 + step=136 action=door(target_id='door_3', door_state='open') hp=100.0 + step=137 action=door(target_id='door_3', door_state='open') hp=100.0 + step=138 action=door(target_id='door_3', door_state='open') hp=100.0 + step=139 action=move(direction='east') hp=100.0 + step=140 action=move(direction='west') hp=100.0 + step=141 action=move(direction='south') hp=100.0 + step=142 action=move(direction='east') hp=100.0 + step=143 action=move(direction='east') hp=100.0 + step=144 action=move(direction='west') hp=100.0 + step=145 action=move(direction='west') hp=100.0 + step=146 action=wait() hp=100.0 + step=147 action=wait() hp=100.0 + step=148 action=wait() hp=100.0 + step=149 action=door(target_id='door_3', door_state='open') hp=100.0 + step=150 action=move(direction='east') hp=100.0 +ep=0199 [medium] steps=150 reward= -22.310 evac=0 hp=100.0 suc30=0.80 r30= +8.49 t=47s + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=wait() hp=100.0 + step=005 action=move(direction='north') hp=100.0 + step=006 action=wait() hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='north') hp=100.0 + step=010 action=move(direction='west') hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=move(direction='north') hp= 88.0 + step=013 action=move(direction='north') hp= 88.0 + step=014 action=move(direction='west') hp= 88.0 + step=015 action=move(direction='east') hp= 88.0 + step=016 action=move(direction='west') hp= 88.0 + step=017 action=move(direction='north') hp= 88.0 + step=018 action=wait() hp= 88.0 + step=019 action=move(direction='north') hp= 88.0 + step=020 action=move(direction='north') hp= 88.0 + step=021 action=move(direction='south') hp= 88.0 + step=022 action=move(direction='south') hp= 88.0 + step=023 action=move(direction='north') hp= 87.5 + step=024 action=move(direction='north') hp= 87.5 + step=025 action=move(direction='north') hp= 87.5 + step=026 action=move(direction='west') hp= 87.5 +ep=0200 [medium] steps=026 reward= +14.982 evac=1 hp= 87.5 suc30=0.80 r30= +8.45 t=47s + >> PPO update samples=flushed pi_loss=-0.0949 v_loss=24.6195 entropy=1.3600 kl=0.0001 clip%=0.00 lr=3.00e-05 + step=001 action=move(direction='north') hp=100.0 + step=002 action=move(direction='north') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='west') hp=100.0 + step=004 action=move(direction='west') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='north') hp=100.0 + step=007 action=move(direction='north') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=001 action=move(direction='west') hp=100.0 + step=002 action=move(direction='west') hp=100.0 + step=003 action=move(direction='north') hp=100.0 + step=004 action=move(direction='north') hp=100.0 + step=005 action=move(direction='west') hp=100.0 + step=006 action=move(direction='west') hp=100.0 + step=007 action=move(direction='west') hp=100.0 + step=008 action=move(direction='west') hp=100.0 + step=009 action=move(direction='west') hp=100.0 + step=010 action=move(direction='west') hp=100.0 + step=011 action=move(direction='west') hp=100.0 + step=012 action=move(direction='west') hp=100.0 + step=013 action=move(direction='west') hp= 98.0 + step=014 action=move(direction='north') hp= 93.0 + step=015 action=wait() hp= 88.0 + step=016 action=wait() hp= 83.0 + step=017 action=wait() hp= 78.0 + step=018 action=wait() hp= 73.0 + step=019 action=wait() hp= 68.0 + step=020 action=wait() hp= 63.0 + step=021 action=wait() hp= 58.0 + step=022 action=wait() hp= 53.0 + step=023 action=wait() hp= 48.0 + step=024 action=wait() hp= 43.0 + step=025 action=wait() hp= 38.0 + step=026 action=wait() hp= 33.0 + step=027 action=wait() hp= 28.0 + step=028 action=wait() hp= 23.0 + step=029 action=wait() hp= 18.0 + step=030 action=wait() hp= 13.0 + step=031 action=wait() hp= 8.0 + step=032 action=wait() hp= 3.0 + ** EVAL [medium] reward=+6.807 success=0.67 steps=14.7 + [ckpt] saved -> artifacts/pyre_ppo_http_ckpt.pt + +[done] Model saved -> artifacts/pyre_ppo_fixed.pti +[done] Metrics CSV -> artifacts/pyre_ppo_fixed.csv +[done] Eval CSV -> artifacts/pyre_ppo_fixed_eval.csv +[done] Graph PNG -> artifacts/pyre_ppo_fixed.png + +[summary] 200 episodes in 48.2s (4.2 eps/s) +[summary] Final success rate (last 30): 0.80 +[summary] Final reward mean (last 30): +8.446