Update utils.py
Browse files
utils.py
CHANGED
|
@@ -6,10 +6,8 @@ import json
|
|
| 6 |
import os
|
| 7 |
import time
|
| 8 |
from reward_shaping import LunarLanderRewardShaping
|
| 9 |
-
|
| 10 |
-
from config import * # Import constants like ENV_ID, N_STEPS, NUM_ENVS, GAMMA, RESUME_FILE
|
| 11 |
|
| 12 |
-
# --- Helper Functions for Checkpointing ---
|
| 13 |
def save_resume_data(filepath, timesteps, episodes):
|
| 14 |
"""
|
| 15 |
Saves the current training state to a JSON file located at the specified filepath.
|
|
@@ -41,29 +39,21 @@ def load_resume_data(filepath):
|
|
| 41 |
print(f"Error loading resume data from {filepath}: {e}. Starting from scratch.")
|
| 42 |
return (0, 0)
|
| 43 |
|
| 44 |
-
# --- Environment Setup for Parallel Execution ---
|
| 45 |
def make_env(env_id, seed, idx, **kwargs):
|
| 46 |
"""
|
| 47 |
Creates a single environment instance with a unique seed and applies necessary wrappers.
|
| 48 |
"""
|
| 49 |
def thunk():
|
| 50 |
-
|
| 51 |
env = gym.make(env_id, **kwargs)
|
| 52 |
|
| 53 |
-
# 2. Apply Custom Wrapper (LunarLanderRewardShaping)
|
| 54 |
-
# This is essential for LunarLander to learn a dense reward structure
|
| 55 |
env = LunarLanderRewardShaping(env)
|
| 56 |
|
| 57 |
-
# 3. Apply Standard Logging Wrapper
|
| 58 |
-
# This wrapper tracks the episode reward ('r') and length ('l')
|
| 59 |
-
# and puts them into the 'infos' dictionary when the episode is done.
|
| 60 |
env = gym.wrappers.RecordEpisodeStatistics(env)
|
| 61 |
|
| 62 |
-
# 4. Apply seeding to the final wrapped environment
|
| 63 |
env.action_space.seed(seed + idx)
|
| 64 |
env.observation_space.seed(seed + idx)
|
| 65 |
|
| 66 |
-
# The result 'env' is the fully wrapped, final environment instance
|
| 67 |
return env
|
| 68 |
return thunk
|
| 69 |
|
|
@@ -74,7 +64,6 @@ def make_parallel_envs(env_id, num_envs, seed):
|
|
| 74 |
env_fns = [make_env(env_id, seed, i) for i in range(num_envs)]
|
| 75 |
return AsyncVectorEnv(env_fns)
|
| 76 |
|
| 77 |
-
# --- GAE Calculation ---
|
| 78 |
def calculate_gae(rewards, values, terminated, truncated, next_value, gamma=GAMMA, gae_lambda=GAE_LAMBDA):
|
| 79 |
"""
|
| 80 |
Calculates Generalized Advantage Estimation (GAE) and Returns (R) from rollout data.
|
|
|
|
| 6 |
import os
|
| 7 |
import time
|
| 8 |
from reward_shaping import LunarLanderRewardShaping
|
| 9 |
+
from config import *
|
|
|
|
| 10 |
|
|
|
|
| 11 |
def save_resume_data(filepath, timesteps, episodes):
|
| 12 |
"""
|
| 13 |
Saves the current training state to a JSON file located at the specified filepath.
|
|
|
|
| 39 |
print(f"Error loading resume data from {filepath}: {e}. Starting from scratch.")
|
| 40 |
return (0, 0)
|
| 41 |
|
|
|
|
| 42 |
def make_env(env_id, seed, idx, **kwargs):
|
| 43 |
"""
|
| 44 |
Creates a single environment instance with a unique seed and applies necessary wrappers.
|
| 45 |
"""
|
| 46 |
def thunk():
|
| 47 |
+
|
| 48 |
env = gym.make(env_id, **kwargs)
|
| 49 |
|
|
|
|
|
|
|
| 50 |
env = LunarLanderRewardShaping(env)
|
| 51 |
|
|
|
|
|
|
|
|
|
|
| 52 |
env = gym.wrappers.RecordEpisodeStatistics(env)
|
| 53 |
|
|
|
|
| 54 |
env.action_space.seed(seed + idx)
|
| 55 |
env.observation_space.seed(seed + idx)
|
| 56 |
|
|
|
|
| 57 |
return env
|
| 58 |
return thunk
|
| 59 |
|
|
|
|
| 64 |
env_fns = [make_env(env_id, seed, i) for i in range(num_envs)]
|
| 65 |
return AsyncVectorEnv(env_fns)
|
| 66 |
|
|
|
|
| 67 |
def calculate_gae(rewards, values, terminated, truncated, next_value, gamma=GAMMA, gae_lambda=GAE_LAMBDA):
|
| 68 |
"""
|
| 69 |
Calculates Generalized Advantage Estimation (GAE) and Returns (R) from rollout data.
|