privateboss
/

Lunar_Lander-V3_Discrete

Model card Files Files and versions

privateboss commited on Nov 25, 2025

Commit

d1a7480

·

verified ·

1 Parent(s): b84c2e1

Update config.py

Files changed (1) hide show

config.py +3 -7

config.py CHANGED Viewed

@@ -1,12 +1,10 @@
 import tensorflow as tf
 import os
-# --- Environment Configuration ---
 ENV_ID = "LunarLander-v3"
 SEED = 123
 NUM_ENVS = 12
-# --- PPO Hyperparameters ---
 TOTAL_TIMESTEPS = 15_000_000
 N_STEPS = 4096
 GAMMA = 0.99
@@ -17,12 +15,11 @@ CLIP_RANGE = 0.1
 LEARNING_RATE = 3e-4
 RMS_WARMUP_STEPS = 5000
-# --- Loss Coefficients ---
 VALUE_COEF = 0.5
 ENTROPY_COEF = 0.1
 MAX_GRAD_NORM = 0.5
-# --- Hardware and Logging ---
 DEVICE = 'GPU' if tf.config.list_physical_devices('GPU') else 'CPU'
 if DEVICE == 'GPU':
     gpus = tf.config.experimental.list_physical_devices('GPU')
@@ -34,9 +31,8 @@ if DEVICE == 'GPU':
             print(e)
 LOG_DIR = f"./Lunar_Lander_Discrete_logs/ppo_{ENV_ID.lower()}"
-# --- Checkpointing and Resuming ---
 SAVE_PATH_ROOT = "./Lunar_Lander_Discrete_models"
 SAVE_PATH = os.path.join(SAVE_PATH_ROOT, f"ppo_{ENV_ID.lower()}")
 RESUME_FILE = f"ppo_{ENV_ID.lower()}_resume.json"
-# We save the checkpoint every 21 rollouts
-CHECKPOINT_FREQ = N_STEPS * NUM_ENVS * 21 # Current value: 1,081,344 timesteps, after initial 49,152 that include RMS_WARMUP_STEPS

 import tensorflow as tf
 import os
 ENV_ID = "LunarLander-v3"
 SEED = 123
 NUM_ENVS = 12
 TOTAL_TIMESTEPS = 15_000_000
 N_STEPS = 4096
 GAMMA = 0.99
 LEARNING_RATE = 3e-4
 RMS_WARMUP_STEPS = 5000
+-
 VALUE_COEF = 0.5
 ENTROPY_COEF = 0.1
 MAX_GRAD_NORM = 0.5
 DEVICE = 'GPU' if tf.config.list_physical_devices('GPU') else 'CPU'
 if DEVICE == 'GPU':
     gpus = tf.config.experimental.list_physical_devices('GPU')
             print(e)
 LOG_DIR = f"./Lunar_Lander_Discrete_logs/ppo_{ENV_ID.lower()}"
 SAVE_PATH_ROOT = "./Lunar_Lander_Discrete_models"
 SAVE_PATH = os.path.join(SAVE_PATH_ROOT, f"ppo_{ENV_ID.lower()}")
 RESUME_FILE = f"ppo_{ENV_ID.lower()}_resume.json"
+CHECKPOINT_FREQ = N_STEPS * NUM_ENVS * 21