| import gymnasium as gym | |
| from stable_baselines3 import PPO | |
| # Create the environment (CartPole) | |
| env = gym.make("CartPole-v1") | |
| # Initialize PPO agent | |
| model = PPO("MlpPolicy", env, verbose=1) | |
| # Train the model for a few timesteps | |
| model.learn(total_timesteps=10000) | |
| # Save the trained model | |
| model.save("ppo_cartpole") | |
| # Test the trained model | |
| obs, info = env.reset() # This will return both obs and info | |
| while True: | |
| action, _states = model.predict(obs) | |
| obs, reward, done, truncated, info = env.step(action) # Return updated values for each step | |
| if done: | |
| obs, info = env.reset() # Reset the environment if done | |
| env.close() | |