| env_id: RBC2D-medium-v0 | |
| env_kwargs: {} | |
| eval_env_kwargs: {} | |
| seed: 2 | |
| rl_mode: sarl | |
| total_timesteps: 50000.0 | |
| n_eval_steps: 10 | |
| eval_freq: ${eval:${total_timesteps} // ${n_eval_steps}} | |
| n_eval_episodes: 1 | |
| continue_training: false | |
| checkpoint_latest: true | |
| rl_device: cpu | |
| eval_callback: | |
| _target_: fluidgym.integration.sb3.EvalCallback | |
| eval_freq: ${eval_freq} | |
| n_eval_episodes: ${n_eval_episodes} | |
| use_wandb: ${wandb.enable} | |
| checkpoint_latest: ${checkpoint_latest} | |
| wandb: | |
| enable: true | |
| entity: safe-autonomous-systems | |
| project: jbecktepe-fluidgym | |
| algorithm: | |
| name: SAC | |
| obj: | |
| _target_: stable_baselines3.sac.sac.SAC | |
| policy: MlpPolicy | |
| learning_rate: 0.0003 | |
| buffer_size: 1000000 | |
| learning_starts: 100 | |
| batch_size: 256 | |
| tau: 0.005 | |
| gamma: 0.99 | |
| train_freq: 1 | |
| gradient_steps: -1 | |
| action_noise: null | |
| replay_buffer_class: null | |
| replay_buffer_kwargs: null | |
| optimize_memory_usage: false | |
| ent_coef: auto | |
| target_update_interval: 1 | |
| target_entropy: auto | |
| use_sde: false | |
| sde_sample_freq: -1 | |
| use_sde_at_warmup: false | |
| stats_window_size: 100 | |
| tensorboard_log: null | |
| policy_kwargs: {} | |
| verbose: 0 | |
| seed: ${seed} | |
| device: cuda | |
| _init_setup_model: true | |
| cluster: {} | |
| test_env_id: ${env_id} | |
| test_env_kwargs: ${env_kwargs} | |
| test_rl_mode: ${rl_mode} | |
| n_test_episodes: 10 | |
| deterministic: true | |
| save_frames: true | |
| render_3d: true | |