| !!python/object/apply:collections.OrderedDict | |
| - - - alive_bonus_offset | |
| - -1 | |
| - - delta_std | |
| - 0.025 | |
| - - learning_rate | |
| - 0.03 | |
| - - n_delta | |
| - 40 | |
| - - n_envs | |
| - 16 | |
| - - n_timesteps | |
| - 75000000.0 | |
| - - n_top | |
| - 30 | |
| - - normalize | |
| - dict(norm_obs=True, norm_reward=False) | |
| - - policy | |
| - LinearPolicy | |