privateboss commited on
Commit
07fb5e6
·
verified ·
1 Parent(s): bbc63fc

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +2 -13
utils.py CHANGED
@@ -6,10 +6,8 @@ import json
6
  import os
7
  import time
8
  from reward_shaping import LunarLanderRewardShaping
9
- # ---------------------------------------------
10
- from config import * # Import constants like ENV_ID, N_STEPS, NUM_ENVS, GAMMA, RESUME_FILE
11
 
12
- # --- Helper Functions for Checkpointing ---
13
  def save_resume_data(filepath, timesteps, episodes):
14
  """
15
  Saves the current training state to a JSON file located at the specified filepath.
@@ -41,29 +39,21 @@ def load_resume_data(filepath):
41
  print(f"Error loading resume data from {filepath}: {e}. Starting from scratch.")
42
  return (0, 0)
43
 
44
- # --- Environment Setup for Parallel Execution ---
45
  def make_env(env_id, seed, idx, **kwargs):
46
  """
47
  Creates a single environment instance with a unique seed and applies necessary wrappers.
48
  """
49
  def thunk():
50
- # 1. Create the base environment
51
  env = gym.make(env_id, **kwargs)
52
 
53
- # 2. Apply Custom Wrapper (LunarLanderRewardShaping)
54
- # This is essential for LunarLander to learn a dense reward structure
55
  env = LunarLanderRewardShaping(env)
56
 
57
- # 3. Apply Standard Logging Wrapper
58
- # This wrapper tracks the episode reward ('r') and length ('l')
59
- # and puts them into the 'infos' dictionary when the episode is done.
60
  env = gym.wrappers.RecordEpisodeStatistics(env)
61
 
62
- # 4. Apply seeding to the final wrapped environment
63
  env.action_space.seed(seed + idx)
64
  env.observation_space.seed(seed + idx)
65
 
66
- # The result 'env' is the fully wrapped, final environment instance
67
  return env
68
  return thunk
69
 
@@ -74,7 +64,6 @@ def make_parallel_envs(env_id, num_envs, seed):
74
  env_fns = [make_env(env_id, seed, i) for i in range(num_envs)]
75
  return AsyncVectorEnv(env_fns)
76
 
77
- # --- GAE Calculation ---
78
  def calculate_gae(rewards, values, terminated, truncated, next_value, gamma=GAMMA, gae_lambda=GAE_LAMBDA):
79
  """
80
  Calculates Generalized Advantage Estimation (GAE) and Returns (R) from rollout data.
 
6
  import os
7
  import time
8
  from reward_shaping import LunarLanderRewardShaping
9
+ from config import *
 
10
 
 
11
  def save_resume_data(filepath, timesteps, episodes):
12
  """
13
  Saves the current training state to a JSON file located at the specified filepath.
 
39
  print(f"Error loading resume data from {filepath}: {e}. Starting from scratch.")
40
  return (0, 0)
41
 
 
42
  def make_env(env_id, seed, idx, **kwargs):
43
  """
44
  Creates a single environment instance with a unique seed and applies necessary wrappers.
45
  """
46
  def thunk():
47
+
48
  env = gym.make(env_id, **kwargs)
49
 
 
 
50
  env = LunarLanderRewardShaping(env)
51
 
 
 
 
52
  env = gym.wrappers.RecordEpisodeStatistics(env)
53
 
 
54
  env.action_space.seed(seed + idx)
55
  env.observation_space.seed(seed + idx)
56
 
 
57
  return env
58
  return thunk
59
 
 
64
  env_fns = [make_env(env_id, seed, i) for i in range(num_envs)]
65
  return AsyncVectorEnv(env_fns)
66
 
 
67
  def calculate_gae(rewards, values, terminated, truncated, next_value, gamma=GAMMA, gae_lambda=GAE_LAMBDA):
68
  """
69
  Calculates Generalized Advantage Estimation (GAE) and Returns (R) from rollout data.