| params: | |
| seed: 42 | |
| env: | |
| clip_observations: 5.0 | |
| clip_actions: 1.0 | |
| algo: | |
| name: a2c_continuous | |
| model: | |
| name: continuous_a2c_logstd | |
| network: | |
| name: actor_critic | |
| separate: false | |
| space: | |
| continuous: | |
| mu_activation: None | |
| sigma_activation: None | |
| mu_init: | |
| name: default | |
| sigma_init: | |
| name: const_initializer | |
| val: 0 | |
| fixed_sigma: true | |
| mlp: | |
| units: | |
| - 512 | |
| - 256 | |
| - 128 | |
| activation: elu | |
| d2rl: false | |
| initializer: | |
| name: default | |
| regularizer: | |
| name: None | |
| load_checkpoint: false | |
| load_path: '' | |
| config: | |
| name: Galaxea-LongTrajectoryAssembly-Direct-v0 | |
| full_experiment_name: LongTrajectoryAssembly | |
| env_name: rlgpu | |
| device: cuda:0 | |
| device_name: cuda:0 | |
| multi_gpu: false | |
| ppo: true | |
| mixed_precision: false | |
| normalize_input: true | |
| normalize_value: true | |
| num_actors: -1 | |
| reward_shaper: | |
| scale_value: 1.0 | |
| normalize_advantage: true | |
| gamma: 0.99 | |
| tau: 0.95 | |
| learning_rate: 3e-4 | |
| lr_schedule: adaptive | |
| kl_threshold: 0.008 | |
| score_to_win: 100000 | |
| max_epochs: 3000 | |
| save_best_after: 100 | |
| save_frequency: 100 | |
| print_stats: true | |
| grad_norm: 1.0 | |
| entropy_coef: 0.001 | |
| truncate_grads: true | |
| e_clip: 0.2 | |
| clip_value: true | |
| horizon_length: 32 | |
| minibatch_size: 16384 | |
| mini_epochs: 8 | |
| critic_coef: 2 | |
| bounds_loss_coef: 0.0001 | |
| games_to_track: 100 | |
| player: | |
| deterministic: true | |
| games_num: 1000000 | |
| print_stats: true | |
| train_dir: /home/ubuntu/gearboxAssembly/logs/rl_games/Galaxea-LongTrajectoryAssembly-Direct-v0 | |