| import gym | |
| import numpy as np | |
| from stable_baselines3 import PPO | |
| from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv | |
| from stable_baselines3.common.env_util import make_vec_env | |
| from stable_baselines3.common.utils import set_random_seed | |
| def make_env(env_id, rank, seed=0): | |
| """ | |
| Utility function for multiprocessed env. | |
| :param env_id: (str) the environment ID | |
| :param num_env: (int) the number of environments you wish to have in subprocesses | |
| :param seed: (int) the inital seed for RNG | |
| :param rank: (int) index of the subprocess | |
| """ | |
| def _init(): | |
| env = gym.make(env_id) | |
| env.seed(seed + rank) | |
| return env | |
| set_random_seed(seed) | |
| return _init | |
| if __name__ == '__main__': | |
| env_id = "CartPole-v1" | |
| num_cpu = 4 # Number of processes to use | |
| # Create the vectorized environment | |
| env = SubprocVecEnv([make_env(env_id, i) for i in range(num_cpu)]) | |
| # Stable Baselines provides you with make_vec_env() helper | |
| # which does exactly the previous steps for you. | |
| # You can choose between `DummyVecEnv` (usually faster) and `SubprocVecEnv` | |
| # env = make_vec_env(env_id, n_envs=num_cpu, seed=0, vec_env_cls=SubprocVecEnv) | |
| model = PPO('MlpPolicy', env, verbose=1) | |
| model.learn(total_timesteps=25_000) | |
| obs = env.reset() | |
| for _ in range(1000): | |
| action, _states = model.predict(obs) | |
| obs, rewards, dones, info = env.step(action) | |
| env.render() |