| import numpy as np | |
| ENV_NAME = "MountainCarContinuous-v0" | |
| NUM_ENVS = 15 # 15 asynchronous environments running in parallel | |
| ROLLOUT_STEPS = 200 # Steps collected per environment per iteration | |
| TOTAL_ITERATIONS = 1500 # Replacing episode loops with iteration cycles. which is 4.5million timesteps | |
| GAMMA = 0.99 | |
| CLIP_RATIO = 0.2 | |
| POLICY_LR = 3e-4 | |
| VALUE_LR = 1e-3 | |
| TRAIN_EPOCHS = 5 | |
| BATCH_SIZE = 128 # Increased to handle the larger batch sizes (15 * 200 = 3000 steps) | |
| INITIAL_LOG_STD = -0.5 * np.ones(1, dtype=np.float32) | |
| # Directory Management | |
| LOG_DIR = "./ContinuousMountainCar_logs/" | |
| CHECKPOINT_DIR = "./ContinuousMountainCar/" |