| seed: 42 | |
| num_steps_per_env: 24 | |
| max_iterations: 10001 | |
| obs_groups: | |
| policy: !!python/tuple | |
| - policy | |
| critic: !!python/tuple | |
| - critic | |
| save_interval: 100 | |
| experiment_name: go2_velocity | |
| run_name: '' | |
| logger: tensorboard | |
| wandb_project: mjlab | |
| wandb_tags: !!python/tuple [] | |
| resume: false | |
| load_run: .* | |
| load_checkpoint: model_.*.pt | |
| clip_actions: null | |
| class_name: OnPolicyRunner | |
| policy: | |
| init_noise_std: 1.0 | |
| noise_std_type: scalar | |
| actor_obs_normalization: true | |
| critic_obs_normalization: true | |
| actor_hidden_dims: !!python/tuple | |
| - 512 | |
| - 256 | |
| - 128 | |
| critic_hidden_dims: !!python/tuple | |
| - 512 | |
| - 256 | |
| - 128 | |
| activation: elu | |
| algorithm: | |
| num_learning_epochs: 5 | |
| num_mini_batches: 4 | |
| learning_rate: 0.001 | |
| schedule: adaptive | |
| gamma: 0.99 | |
| lam: 0.95 | |
| entropy_coef: 0.01 | |
| desired_kl: 0.01 | |
| max_grad_norm: 1.0 | |
| value_loss_coef: 1.0 | |
| use_clipped_value_loss: true | |
| clip_param: 0.2 | |
| normalize_advantage_per_mini_batch: false | |