jacobcd52
/

pacman_ppo

Model card Files Files and versions

pacman_ppo / config.yaml

jacobcd52's picture

Upload config.yaml with huggingface_hub

c956b18 verified 10 months ago

history blame contribute delete

3.16 kB

	# High GPU utilization configuration
	env_id: 'ALE/Pacman-v5' # Gymnasium environment to train on
	total_episodes: 100 # Total number of episodes to train for
	num_envs: 256 # Number of parallel environments for vectorized training
	num_steps: 128 # Number of steps to run per update
	seed: 42 # Random seed for reproducibility
	torch_deterministic: true # Whether to use deterministic PyTorch operations
	cuda: true # Whether to use GPU acceleration
	max_episode_steps: 1000 # Maximum steps per episode (truncates long episodes)
	trajectory_save_every_n_frames: 4 # Save every Nth frame
	save_trajectories: false # Whether to save episode trajectories
	save_every_n_updates: 1 # Save trajectories every N updates
	n_envs_to_save: 12 # Number of parallel environments to save trajectories for
	save_gradients: false # Whether to save gradients

	# agent.py
	learning_rate: 1.0e-3 # Learning rate for the Adam optimizer
	gamma: 0.99 # Discount factor for future rewards
	gae_lambda: 0.95 # GAE (Generalized Advantage Estimation) lambda parameter
	minibatch_size: 512 # Manual minibatch size
	update_epochs: 8 # Number of epochs to update the policy per batch
	clip_coef: 0.2 # PPO clipping coefficient (epsilon). Default 0.2
	ent_coef: 0.01 # Entropy coefficient for exploration bonus
	vf_coef: 0.5 # Value function loss coefficient
	max_grad_norm: 0.5 # Maximum gradient norm for gradient clipping
	target_kl: null # Target KL divergence (null = no early stopping)
	optimizer_eps: 1.0e-4 # Epsilon parameter for Adam optimizer
	hidden_sizes: [256] # List of hidden layer sizes for MLP (will override hidden_size if specified)
	cnn_layers:
	- { out_channels: 16, kernel_size: 8, stride: 4 }
	- { out_channels: 32, kernel_size: 4, stride: 2 }
	- { out_channels: 64, kernel_size: 3, stride: 2 }
	layer_init_std: 1.4142135623730951 # Standard deviation for layer weight initialization (sqrt(2))
	actor_std: 0.01 # Standard deviation for actor head initialization
	critic_std: 1 # Standard deviation for critic head initialization

	# logging
	wandb_project_name: "pacman-ppo" # Weights & Biases project name for logging
	wandb_entity: null # Weights & Biases entity/username (null = default)

	# video saving (not used)
	capture_video: false # Whether to capture videos of agent performance
	save_video_freq: 5 # Save video every N updates
	video_length: 1000 # Maximum number of frames to record per video

	# huggingface
	upload_to_hf: true
	hf_repo_id: "jacobcd52/pacman_ppo" # replace with your huggingface username and repo name