Upload config.yaml with huggingface_hub
Browse files- config.yaml +48 -0
config.yaml
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# High GPU utilization configuration
|
| 2 |
+
env_id: 'ALE/Pacman-v5' # Gymnasium environment to train on
|
| 3 |
+
total_episodes: 100 # Total number of episodes to train for
|
| 4 |
+
num_envs: 256 # Number of parallel environments for vectorized training
|
| 5 |
+
num_steps: 128 # Number of steps to run per update
|
| 6 |
+
seed: 42 # Random seed for reproducibility
|
| 7 |
+
torch_deterministic: true # Whether to use deterministic PyTorch operations
|
| 8 |
+
cuda: true # Whether to use GPU acceleration
|
| 9 |
+
max_episode_steps: 1000 # Maximum steps per episode (truncates long episodes)
|
| 10 |
+
trajectory_save_every_n_frames: 4 # Save every Nth frame
|
| 11 |
+
save_trajectories: false # Whether to save episode trajectories
|
| 12 |
+
save_every_n_updates: 1 # Save trajectories every N updates
|
| 13 |
+
n_envs_to_save: 12 # Number of parallel environments to save trajectories for
|
| 14 |
+
save_gradients: false # Whether to save gradients
|
| 15 |
+
|
| 16 |
+
# agent.py
|
| 17 |
+
learning_rate: 1.0e-3 # Learning rate for the Adam optimizer
|
| 18 |
+
gamma: 0.99 # Discount factor for future rewards
|
| 19 |
+
gae_lambda: 0.95 # GAE (Generalized Advantage Estimation) lambda parameter
|
| 20 |
+
minibatch_size: 512 # Manual minibatch size
|
| 21 |
+
update_epochs: 8 # Number of epochs to update the policy per batch
|
| 22 |
+
clip_coef: 0.2 # PPO clipping coefficient (epsilon). Default 0.2
|
| 23 |
+
ent_coef: 0.01 # Entropy coefficient for exploration bonus
|
| 24 |
+
vf_coef: 0.5 # Value function loss coefficient
|
| 25 |
+
max_grad_norm: 0.5 # Maximum gradient norm for gradient clipping
|
| 26 |
+
target_kl: null # Target KL divergence (null = no early stopping)
|
| 27 |
+
optimizer_eps: 1.0e-4 # Epsilon parameter for Adam optimizer
|
| 28 |
+
hidden_sizes: [256] # List of hidden layer sizes for MLP (will override hidden_size if specified)
|
| 29 |
+
cnn_layers:
|
| 30 |
+
- { out_channels: 16, kernel_size: 8, stride: 4 }
|
| 31 |
+
- { out_channels: 32, kernel_size: 4, stride: 2 }
|
| 32 |
+
- { out_channels: 64, kernel_size: 3, stride: 2 }
|
| 33 |
+
layer_init_std: 1.4142135623730951 # Standard deviation for layer weight initialization (sqrt(2))
|
| 34 |
+
actor_std: 0.01 # Standard deviation for actor head initialization
|
| 35 |
+
critic_std: 1 # Standard deviation for critic head initialization
|
| 36 |
+
|
| 37 |
+
# logging
|
| 38 |
+
wandb_project_name: "pacman-ppo" # Weights & Biases project name for logging
|
| 39 |
+
wandb_entity: null # Weights & Biases entity/username (null = default)
|
| 40 |
+
|
| 41 |
+
# video saving (not used)
|
| 42 |
+
capture_video: false # Whether to capture videos of agent performance
|
| 43 |
+
save_video_freq: 5 # Save video every N updates
|
| 44 |
+
video_length: 1000 # Maximum number of frames to record per video
|
| 45 |
+
|
| 46 |
+
# huggingface
|
| 47 |
+
upload_to_hf: true
|
| 48 |
+
hf_repo_id: "jacobcd52/pacman_ppo" # replace with your huggingface username and repo name
|