pacman_ppo / config.yaml
jacobcd52's picture
Upload config.yaml with huggingface_hub
c956b18 verified
# High GPU utilization configuration
env_id: 'ALE/Pacman-v5' # Gymnasium environment to train on
total_episodes: 100 # Total number of episodes to train for
num_envs: 256 # Number of parallel environments for vectorized training
num_steps: 128 # Number of steps to run per update
seed: 42 # Random seed for reproducibility
torch_deterministic: true # Whether to use deterministic PyTorch operations
cuda: true # Whether to use GPU acceleration
max_episode_steps: 1000 # Maximum steps per episode (truncates long episodes)
trajectory_save_every_n_frames: 4 # Save every Nth frame
save_trajectories: false # Whether to save episode trajectories
save_every_n_updates: 1 # Save trajectories every N updates
n_envs_to_save: 12 # Number of parallel environments to save trajectories for
save_gradients: false # Whether to save gradients
# agent.py
learning_rate: 1.0e-3 # Learning rate for the Adam optimizer
gamma: 0.99 # Discount factor for future rewards
gae_lambda: 0.95 # GAE (Generalized Advantage Estimation) lambda parameter
minibatch_size: 512 # Manual minibatch size
update_epochs: 8 # Number of epochs to update the policy per batch
clip_coef: 0.2 # PPO clipping coefficient (epsilon). Default 0.2
ent_coef: 0.01 # Entropy coefficient for exploration bonus
vf_coef: 0.5 # Value function loss coefficient
max_grad_norm: 0.5 # Maximum gradient norm for gradient clipping
target_kl: null # Target KL divergence (null = no early stopping)
optimizer_eps: 1.0e-4 # Epsilon parameter for Adam optimizer
hidden_sizes: [256] # List of hidden layer sizes for MLP (will override hidden_size if specified)
cnn_layers:
- { out_channels: 16, kernel_size: 8, stride: 4 }
- { out_channels: 32, kernel_size: 4, stride: 2 }
- { out_channels: 64, kernel_size: 3, stride: 2 }
layer_init_std: 1.4142135623730951 # Standard deviation for layer weight initialization (sqrt(2))
actor_std: 0.01 # Standard deviation for actor head initialization
critic_std: 1 # Standard deviation for critic head initialization
# logging
wandb_project_name: "pacman-ppo" # Weights & Biases project name for logging
wandb_entity: null # Weights & Biases entity/username (null = default)
# video saving (not used)
capture_video: false # Whether to capture videos of agent performance
save_video_freq: 5 # Save video every N updates
video_length: 1000 # Maximum number of frames to record per video
# huggingface
upload_to_hf: true
hf_repo_id: "jacobcd52/pacman_ppo" # replace with your huggingface username and repo name