params: seed: 42 env: clip_observations: 5.0 clip_actions: 1.0 algo: name: a2c_continuous model: name: continuous_a2c_logstd network: name: actor_critic separate: false space: continuous: mu_activation: None sigma_activation: None mu_init: name: default sigma_init: name: const_initializer val: 0 fixed_sigma: true mlp: units: - 512 - 256 - 128 activation: elu d2rl: false initializer: name: default regularizer: name: None load_checkpoint: false load_path: '' config: name: Galaxea-LongTrajectoryAssembly-Direct-v0 full_experiment_name: LongTrajectoryAssembly env_name: rlgpu device: cuda:0 device_name: cuda:0 multi_gpu: false ppo: true mixed_precision: false normalize_input: true normalize_value: true num_actors: -1 reward_shaper: scale_value: 1.0 normalize_advantage: true gamma: 0.99 tau: 0.95 learning_rate: 3e-4 lr_schedule: adaptive kl_threshold: 0.008 score_to_win: 100000 max_epochs: 3000 save_best_after: 100 save_frequency: 100 print_stats: true grad_norm: 1.0 entropy_coef: 0.001 truncate_grads: true e_clip: 0.2 clip_value: true horizon_length: 32 minibatch_size: 16384 mini_epochs: 8 critic_coef: 2 bounds_loss_coef: 0.0001 games_to_track: 100 player: deterministic: true games_num: 1000000 print_stats: true train_dir: /home/ubuntu/gearboxAssembly/logs/rl_games/Galaxea-LongTrajectoryAssembly-Direct-v0