privateboss
/

Lunar_Lander-V3_Discrete

Model card Files Files and versions

privateboss commited on Nov 25, 2025

Commit

07fb5e6

·

verified ·

1 Parent(s): bbc63fc

Update utils.py

Files changed (1) hide show

utils.py +2 -13

utils.py CHANGED Viewed

@@ -6,10 +6,8 @@ import json
 import os
 import time
 from reward_shaping import LunarLanderRewardShaping
-# ---------------------------------------------
-from config import * # Import constants like ENV_ID, N_STEPS, NUM_ENVS, GAMMA, RESUME_FILE
-# --- Helper Functions for Checkpointing ---
 def save_resume_data(filepath, timesteps, episodes):
     """
     Saves the current training state to a JSON file located at the specified filepath.
@@ -41,29 +39,21 @@ def load_resume_data(filepath):
             print(f"Error loading resume data from {filepath}: {e}. Starting from scratch.")
     return (0, 0)
-# --- Environment Setup for Parallel Execution ---
 def make_env(env_id, seed, idx, **kwargs):
     """
     Creates a single environment instance with a unique seed and applies necessary wrappers.
     """
     def thunk():
-        # 1. Create the base environment
         env = gym.make(env_id, **kwargs)
-        # 2. Apply Custom Wrapper (LunarLanderRewardShaping)
-        # This is essential for LunarLander to learn a dense reward structure
         env = LunarLanderRewardShaping(env)
-        # 3. Apply Standard Logging Wrapper
-        # This wrapper tracks the episode reward ('r') and length ('l')
-        # and puts them into the 'infos' dictionary when the episode is done.
         env = gym.wrappers.RecordEpisodeStatistics(env)
-        # 4. Apply seeding to the final wrapped environment
         env.action_space.seed(seed + idx)
         env.observation_space.seed(seed + idx)
-        # The result 'env' is the fully wrapped, final environment instance
         return env
     return thunk
@@ -74,7 +64,6 @@ def make_parallel_envs(env_id, num_envs, seed):
     env_fns = [make_env(env_id, seed, i) for i in range(num_envs)]
     return AsyncVectorEnv(env_fns)
-# --- GAE Calculation ---
 def calculate_gae(rewards, values, terminated, truncated, next_value, gamma=GAMMA, gae_lambda=GAE_LAMBDA):
     """
     Calculates Generalized Advantage Estimation (GAE) and Returns (R) from rollout data.

 import os
 import time
 from reward_shaping import LunarLanderRewardShaping
+from config import *
 def save_resume_data(filepath, timesteps, episodes):
     """
     Saves the current training state to a JSON file located at the specified filepath.
             print(f"Error loading resume data from {filepath}: {e}. Starting from scratch.")
     return (0, 0)
 def make_env(env_id, seed, idx, **kwargs):
     """
     Creates a single environment instance with a unique seed and applies necessary wrappers.
     """
     def thunk():
         env = gym.make(env_id, **kwargs)
         env = LunarLanderRewardShaping(env)
         env = gym.wrappers.RecordEpisodeStatistics(env)
         env.action_space.seed(seed + idx)
         env.observation_space.seed(seed + idx)
         return env
     return thunk
     env_fns = [make_env(env_id, seed, i) for i in range(num_envs)]
     return AsyncVectorEnv(env_fns)
 def calculate_gae(rewards, values, terminated, truncated, next_value, gamma=GAMMA, gae_lambda=GAE_LAMBDA):
     """
     Calculates Generalized Advantage Estimation (GAE) and Returns (R) from rollout data.