jacobcd52 commited on
Commit
c956b18
·
verified ·
1 Parent(s): 8eca16b

Upload config.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.yaml +48 -0
config.yaml ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # High GPU utilization configuration
2
+ env_id: 'ALE/Pacman-v5' # Gymnasium environment to train on
3
+ total_episodes: 100 # Total number of episodes to train for
4
+ num_envs: 256 # Number of parallel environments for vectorized training
5
+ num_steps: 128 # Number of steps to run per update
6
+ seed: 42 # Random seed for reproducibility
7
+ torch_deterministic: true # Whether to use deterministic PyTorch operations
8
+ cuda: true # Whether to use GPU acceleration
9
+ max_episode_steps: 1000 # Maximum steps per episode (truncates long episodes)
10
+ trajectory_save_every_n_frames: 4 # Save every Nth frame
11
+ save_trajectories: false # Whether to save episode trajectories
12
+ save_every_n_updates: 1 # Save trajectories every N updates
13
+ n_envs_to_save: 12 # Number of parallel environments to save trajectories for
14
+ save_gradients: false # Whether to save gradients
15
+
16
+ # agent.py
17
+ learning_rate: 1.0e-3 # Learning rate for the Adam optimizer
18
+ gamma: 0.99 # Discount factor for future rewards
19
+ gae_lambda: 0.95 # GAE (Generalized Advantage Estimation) lambda parameter
20
+ minibatch_size: 512 # Manual minibatch size
21
+ update_epochs: 8 # Number of epochs to update the policy per batch
22
+ clip_coef: 0.2 # PPO clipping coefficient (epsilon). Default 0.2
23
+ ent_coef: 0.01 # Entropy coefficient for exploration bonus
24
+ vf_coef: 0.5 # Value function loss coefficient
25
+ max_grad_norm: 0.5 # Maximum gradient norm for gradient clipping
26
+ target_kl: null # Target KL divergence (null = no early stopping)
27
+ optimizer_eps: 1.0e-4 # Epsilon parameter for Adam optimizer
28
+ hidden_sizes: [256] # List of hidden layer sizes for MLP (will override hidden_size if specified)
29
+ cnn_layers:
30
+ - { out_channels: 16, kernel_size: 8, stride: 4 }
31
+ - { out_channels: 32, kernel_size: 4, stride: 2 }
32
+ - { out_channels: 64, kernel_size: 3, stride: 2 }
33
+ layer_init_std: 1.4142135623730951 # Standard deviation for layer weight initialization (sqrt(2))
34
+ actor_std: 0.01 # Standard deviation for actor head initialization
35
+ critic_std: 1 # Standard deviation for critic head initialization
36
+
37
+ # logging
38
+ wandb_project_name: "pacman-ppo" # Weights & Biases project name for logging
39
+ wandb_entity: null # Weights & Biases entity/username (null = default)
40
+
41
+ # video saving (not used)
42
+ capture_video: false # Whether to capture videos of agent performance
43
+ save_video_freq: 5 # Save video every N updates
44
+ video_length: 1000 # Maximum number of frames to record per video
45
+
46
+ # huggingface
47
+ upload_to_hf: true
48
+ hf_repo_id: "jacobcd52/pacman_ppo" # replace with your huggingface username and repo name