| 1) model = PPO(policy = "MlpPolicy", |
| env = env, |
| n_steps = 1024, |
| batch_size = 64, |
| n_epochs = 4, |
| gamma = 0.999, |
| gae_lambda = 0.98, |
| ent_coef = 0.01, |
| verbose=1) |
| model.learn(total_timesteps = 500000) |
| mean_reward=193.60 +/- 21.32519973099738 |
|
|
| 2) model = PPO(policy = "MlpPolicy", |
| env = env, |
| n_steps = 1024, |
| batch_size = 64, |
| n_epochs = 8, |
| gamma = 0.999, |
| gae_lambda = 0.98, |
| ent_coef = 0.01, |
| verbose=1) |
| model.learn(total_timesteps = 500000) |
| mean_reward=235.09 +/- 21.878789192117072 |
|
|
| 3) model = PPO(policy = "MlpPolicy", |
| env = env, |
| n_steps = 1024, |
| batch_size = 64, |
| n_epochs = 8, |
| gamma = 0.999, |
| gae_lambda = 0.98, |
| ent_coef = 0.01, |
| verbose=1) |
| model.learn(total_timesteps = 1000000) |
|
|
|
|