| behaviors: | |
| Pyramids: | |
| trainer_type: sac | |
| hyperparameters: | |
| learning_rate: 0.0003 | |
| learning_rate_schedule: constant | |
| batch_size: 128 | |
| buffer_size: 2000000 | |
| buffer_init_steps: 1000 | |
| tau: 0.01 | |
| steps_per_update: 10.0 | |
| save_replay_buffer: false | |
| init_entcoef: 0.01 | |
| reward_signal_steps_per_update: 10.0 | |
| network_settings: | |
| normalize: false | |
| hidden_units: 512 | |
| num_layers: 3 | |
| vis_encode_type: simple | |
| reward_signals: | |
| extrinsic: | |
| gamma: 0.995 | |
| strength: 2.0 | |
| gail: | |
| gamma: 0.99 | |
| strength: 0.01 | |
| learning_rate: 0.0003 | |
| use_actions: true | |
| use_vail: false | |
| demo_path: Project/Assets/ML-Agents/Examples/Pyramids/Demos/ExpertPyramid.demo | |
| keep_checkpoints: 5 | |
| max_steps: 3000000 | |
| time_horizon: 128 | |
| summary_freq: 30000 | |