jacobcd52
/

pacman_ppo

Model card Files Files and versions

xet

Community

jacobcd52 commited on Jun 26, 2025

Commit

c956b18

verified ·

1 Parent(s): 8eca16b

Upload config.yaml with huggingface_hub

Browse files

Files changed (1) hide show

config.yaml +48 -0

config.yaml ADDED Viewed

	@@ -0,0 +1,48 @@

+# High GPU utilization configuration
+env_id: 'ALE/Pacman-v5'                   # Gymnasium environment to train on
+total_episodes: 100                      # Total number of episodes to train for
+num_envs: 256                             # Number of parallel environments for vectorized training
+num_steps: 128                            # Number of steps to run per update
+seed: 42                                 # Random seed for reproducibility
+torch_deterministic: true                # Whether to use deterministic PyTorch operations
+cuda: true                               # Whether to use GPU acceleration
+max_episode_steps: 1000                  # Maximum steps per episode (truncates long episodes)
+trajectory_save_every_n_frames: 4        # Save every Nth frame
+save_trajectories: false                 # Whether to save episode trajectories
+save_every_n_updates: 1                  # Save trajectories every N updates
+n_envs_to_save: 12 # Number of parallel environments to save trajectories for
+save_gradients: false # Whether to save gradients
+# agent.py
+learning_rate: 1.0e-3          # Learning rate for the Adam optimizer
+gamma: 0.99                              # Discount factor for future rewards
+gae_lambda: 0.95                         # GAE (Generalized Advantage Estimation) lambda parameter
+minibatch_size: 512                     # Manual minibatch size
+update_epochs: 8                         # Number of epochs to update the policy per batch
+clip_coef: 0.2                           # PPO clipping coefficient (epsilon). Default 0.2
+ent_coef: 0.01                           # Entropy coefficient for exploration bonus
+vf_coef: 0.5                             # Value function loss coefficient
+max_grad_norm: 0.5                       # Maximum gradient norm for gradient clipping
+target_kl: null                          # Target KL divergence (null = no early stopping)
+optimizer_eps: 1.0e-4                    # Epsilon parameter for Adam optimizer
+hidden_sizes: [256]                   # List of hidden layer sizes for MLP (will override hidden_size if specified)
+cnn_layers:
+  - { out_channels: 16, kernel_size: 8, stride: 4 }
+  - { out_channels: 32, kernel_size: 4, stride: 2 }
+  - { out_channels: 64, kernel_size: 3, stride: 2 }
+layer_init_std: 1.4142135623730951       # Standard deviation for layer weight initialization (sqrt(2))
+actor_std: 0.01                          # Standard deviation for actor head initialization
+critic_std: 1                            # Standard deviation for critic head initialization
+# logging
+wandb_project_name: "pacman-ppo"            # Weights & Biases project name for logging
+wandb_entity: null                       # Weights & Biases entity/username (null = default)
+# video saving (not used)
+capture_video: false                      # Whether to capture videos of agent performance
+save_video_freq: 5                      # Save video every N updates
+video_length: 1000                       # Maximum number of frames to record per video
+# huggingface
+upload_to_hf: true
+hf_repo_id: "jacobcd52/pacman_ppo"         # replace with your huggingface username and repo name