| env_id: RBC2D-medium-v0 | |
| env_kwargs: {} | |
| eval_env_kwargs: {} | |
| seed: 2 | |
| rl_mode: sarl | |
| total_timesteps: 50000.0 | |
| n_eval_steps: 10 | |
| eval_freq: ${eval:${total_timesteps} // ${n_eval_steps}} | |
| n_eval_episodes: 1 | |
| continue_training: false | |
| checkpoint_latest: true | |
| rl_device: cpu | |
| eval_callback: | |
| _target_: fluidgym.integration.sb3.EvalCallback | |
| eval_freq: ${eval_freq} | |
| n_eval_episodes: ${n_eval_episodes} | |
| use_wandb: ${wandb.enable} | |
| checkpoint_latest: ${checkpoint_latest} | |
| wandb: | |
| enable: true | |
| entity: safe-autonomous-systems | |
| project: jbecktepe-fluidgym | |
| algorithm: | |
| name: PPO | |
| obj: | |
| _target_: stable_baselines3.ppo.ppo.PPO | |
| policy: MlpPolicy | |
| learning_rate: 0.0003 | |
| n_steps: 2048 | |
| batch_size: 64 | |
| n_epochs: 10 | |
| gamma: 0.99 | |
| gae_lambda: 0.95 | |
| clip_range: 0.2 | |
| clip_range_vf: null | |
| normalize_advantage: true | |
| ent_coef: 0.01 | |
| vf_coef: 0.5 | |
| max_grad_norm: 0.5 | |
| use_sde: false | |
| sde_sample_freq: -1 | |
| rollout_buffer_class: null | |
| rollout_buffer_kwargs: null | |
| target_kl: null | |
| stats_window_size: 100 | |
| tensorboard_log: null | |
| verbose: 0 | |
| seed: ${seed} | |
| device: ${rl_device} | |
| _init_setup_model: true | |
| cluster: {} | |
| test_env_id: ${env_id} | |
| test_env_kwargs: ${env_kwargs} | |
| test_rl_mode: ${rl_mode} | |
| n_test_episodes: 10 | |
| deterministic: true | |
| save_frames: true | |
| render_3d: true | |