| { | |
| "env_id": "CartPole-v1", | |
| "algo_id": "ppo", | |
| "n_steps": 32, | |
| "batch_size": 256, | |
| "n_epochs": 20, | |
| "max_epochs": null, | |
| "n_timesteps": 100000.0, | |
| "seed": 42, | |
| "n_envs": 8, | |
| "subproc": null, | |
| "env_wrappers": [], | |
| "env_kwargs": {}, | |
| "normalize_obs": false, | |
| "normalize_reward": false, | |
| "frame_stack": 1, | |
| "obs_type": "rgb", | |
| "hidden_dims": [ | |
| 64, | |
| 64 | |
| ], | |
| "activation": "tanh", | |
| "policy": "MlpPolicy", | |
| "policy_kwargs": {}, | |
| "policy_lr": 0.001, | |
| "learning_rate": 0.001, | |
| "learning_rate_schedule": "linear", | |
| "max_grad_norm": 0.5, | |
| "gamma": 0.98, | |
| "gae_lambda": 0.8, | |
| "ent_coef": 0.0, | |
| "vf_coef": 0.5, | |
| "clip_range": 0.2, | |
| "clip_range_schedule": "linear", | |
| "normalize_advantages": "batch", | |
| "use_baseline": false, | |
| "eval_freq_epochs": 100, | |
| "eval_warmup_epochs": 0, | |
| "eval_episodes": 10, | |
| "eval_recording_freq_epochs": 100, | |
| "eval_async": false, | |
| "eval_deterministic": true, | |
| "reward_threshold": null, | |
| "early_stop_on_eval_threshold": true, | |
| "early_stop_on_train_threshold": false, | |
| "log_per_env_eval_metrics": false, | |
| "project_id": "CartPole-v1", | |
| "checkpoint_dir": "checkpoints", | |
| "resume": false, | |
| "accelerator": "cpu", | |
| "devices": null, | |
| "normalize": null | |
| } |