env_id: RBC2D-medium-v0 env_kwargs: {} eval_env_kwargs: {} seed: 2 rl_mode: sarl total_timesteps: 50000.0 n_eval_steps: 10 eval_freq: ${eval:${total_timesteps} // ${n_eval_steps}} n_eval_episodes: 1 continue_training: false checkpoint_latest: true rl_device: cpu eval_callback: _target_: fluidgym.integration.sb3.EvalCallback eval_freq: ${eval_freq} n_eval_episodes: ${n_eval_episodes} use_wandb: ${wandb.enable} checkpoint_latest: ${checkpoint_latest} wandb: enable: true entity: safe-autonomous-systems project: jbecktepe-fluidgym algorithm: name: PPO obj: _target_: stable_baselines3.ppo.ppo.PPO policy: MlpPolicy learning_rate: 0.0003 n_steps: 2048 batch_size: 64 n_epochs: 10 gamma: 0.99 gae_lambda: 0.95 clip_range: 0.2 clip_range_vf: null normalize_advantage: true ent_coef: 0.01 vf_coef: 0.5 max_grad_norm: 0.5 use_sde: false sde_sample_freq: -1 rollout_buffer_class: null rollout_buffer_kwargs: null target_kl: null stats_window_size: 100 tensorboard_log: null verbose: 0 seed: ${seed} device: ${rl_device} _init_setup_model: true cluster: {} test_env_id: ${env_id} test_env_kwargs: ${env_kwargs} test_rl_mode: ${rl_mode} n_test_episodes: 10 deterministic: true save_frames: true render_3d: true