{ "episode_dir": "./recorded_episodes/train", "output_dir": "./ppo_checkpoints_r2", "device": "cuda:0", "total_steps": 2000000, "n_rollout": 2048, "n_epochs": 4, "batch_size": 256, "lr": 0.0003, "gamma": 0.99, "gae_lambda": 0.95, "clip_eps": 0.2, "value_coef": 0.5, "entropy_coef": 0.01, "max_grad_norm": 0.5, "save_every": 50, "resume_from": null, "seed": 42, "hf_repo_id": "vshwanilgv/wenavigatecontroller-ppo", "hf_best_only": true }