| { | |
| "episode_dir": "./recorded_episodes/train", | |
| "output_dir": "./ppo_checkpoints_r2", | |
| "device": "cuda:0", | |
| "total_steps": 2000000, | |
| "n_rollout": 2048, | |
| "n_epochs": 4, | |
| "batch_size": 256, | |
| "lr": 0.0003, | |
| "gamma": 0.99, | |
| "gae_lambda": 0.95, | |
| "clip_eps": 0.2, | |
| "value_coef": 0.5, | |
| "entropy_coef": 0.01, | |
| "max_grad_norm": 0.5, | |
| "save_every": 50, | |
| "resume_from": null, | |
| "seed": 42, | |
| "hf_repo_id": "vshwanilgv/wenavigatecontroller-ppo", | |
| "hf_best_only": true | |
| } |