| !!python/object/apply:collections.OrderedDict |
| - - - batch_size |
| - 512 |
| - - buffer_size |
| - 1000000 |
| - - env_wrapper |
| - sb3_contrib.common.wrappers.TimeFeatureWrapper |
| - - gamma |
| - 0.98 |
| - - learning_rate |
| - 0.001 |
| - - n_timesteps |
| - 3000000.0 |
| - - policy |
| - MultiInputPolicy |
| - - policy_kwargs |
| - dict(net_arch=[512, 512, 512], n_critics=2) |
| - - replay_buffer_class |
| - HerReplayBuffer |
| - - replay_buffer_kwargs |
| - dict( online_sampling=True, goal_selection_strategy='future', n_sampled_goal=4, |
| max_episode_length=100 ) |
| - - tau |
| - 0.005 |
|
|