import numpy as np ENV_NAME = "MountainCarContinuous-v0" NUM_ENVS = 15 # 15 asynchronous environments running in parallel ROLLOUT_STEPS = 200 # Steps collected per environment per iteration TOTAL_ITERATIONS = 1500 # Replacing episode loops with iteration cycles. which is 4.5million timesteps GAMMA = 0.99 CLIP_RATIO = 0.2 POLICY_LR = 3e-4 VALUE_LR = 1e-3 TRAIN_EPOCHS = 5 BATCH_SIZE = 128 # Increased to handle the larger batch sizes (15 * 200 = 3000 steps) INITIAL_LOG_STD = -0.5 * np.ones(1, dtype=np.float32) # Directory Management LOG_DIR = "./ContinuousMountainCar_logs/" CHECKPOINT_DIR = "./ContinuousMountainCar/"