privateboss commited on
Commit
d1a7480
·
verified ·
1 Parent(s): b84c2e1

Update config.py

Browse files
Files changed (1) hide show
  1. config.py +3 -7
config.py CHANGED
@@ -1,12 +1,10 @@
1
  import tensorflow as tf
2
  import os
3
 
4
- # --- Environment Configuration ---
5
  ENV_ID = "LunarLander-v3"
6
  SEED = 123
7
  NUM_ENVS = 12
8
 
9
- # --- PPO Hyperparameters ---
10
  TOTAL_TIMESTEPS = 15_000_000
11
  N_STEPS = 4096
12
  GAMMA = 0.99
@@ -17,12 +15,11 @@ CLIP_RANGE = 0.1
17
  LEARNING_RATE = 3e-4
18
  RMS_WARMUP_STEPS = 5000
19
 
20
- # --- Loss Coefficients ---
21
  VALUE_COEF = 0.5
22
  ENTROPY_COEF = 0.1
23
  MAX_GRAD_NORM = 0.5
24
 
25
- # --- Hardware and Logging ---
26
  DEVICE = 'GPU' if tf.config.list_physical_devices('GPU') else 'CPU'
27
  if DEVICE == 'GPU':
28
  gpus = tf.config.experimental.list_physical_devices('GPU')
@@ -34,9 +31,8 @@ if DEVICE == 'GPU':
34
  print(e)
35
  LOG_DIR = f"./Lunar_Lander_Discrete_logs/ppo_{ENV_ID.lower()}"
36
 
37
- # --- Checkpointing and Resuming ---
38
  SAVE_PATH_ROOT = "./Lunar_Lander_Discrete_models"
39
  SAVE_PATH = os.path.join(SAVE_PATH_ROOT, f"ppo_{ENV_ID.lower()}")
40
  RESUME_FILE = f"ppo_{ENV_ID.lower()}_resume.json"
41
- # We save the checkpoint every 21 rollouts
42
- CHECKPOINT_FREQ = N_STEPS * NUM_ENVS * 21 # Current value: 1,081,344 timesteps, after initial 49,152 that include RMS_WARMUP_STEPS
 
1
  import tensorflow as tf
2
  import os
3
 
 
4
  ENV_ID = "LunarLander-v3"
5
  SEED = 123
6
  NUM_ENVS = 12
7
 
 
8
  TOTAL_TIMESTEPS = 15_000_000
9
  N_STEPS = 4096
10
  GAMMA = 0.99
 
15
  LEARNING_RATE = 3e-4
16
  RMS_WARMUP_STEPS = 5000
17
 
18
+ -
19
  VALUE_COEF = 0.5
20
  ENTROPY_COEF = 0.1
21
  MAX_GRAD_NORM = 0.5
22
 
 
23
  DEVICE = 'GPU' if tf.config.list_physical_devices('GPU') else 'CPU'
24
  if DEVICE == 'GPU':
25
  gpus = tf.config.experimental.list_physical_devices('GPU')
 
31
  print(e)
32
  LOG_DIR = f"./Lunar_Lander_Discrete_logs/ppo_{ENV_ID.lower()}"
33
 
34
+
35
  SAVE_PATH_ROOT = "./Lunar_Lander_Discrete_models"
36
  SAVE_PATH = os.path.join(SAVE_PATH_ROOT, f"ppo_{ENV_ID.lower()}")
37
  RESUME_FILE = f"ppo_{ENV_ID.lower()}_resume.json"
38
+ CHECKPOINT_FREQ = N_STEPS * NUM_ENVS * 21