| !!python/object/apply:collections.OrderedDict | |
| - - - alive_bonus_offset | |
| - -1 | |
| - - delta_std | |
| - 0.025 | |
| - - learning_rate | |
| - 0.01 | |
| - - n_delta | |
| - 8 | |
| - - n_envs | |
| - 16 | |
| - - n_timesteps | |
| - 7000000.0 | |
| - - n_top | |
| - 4 | |
| - - normalize | |
| - dict(norm_obs=True, norm_reward=False) | |
| - - policy | |
| - LinearPolicy | |