ItsTSV
/

ppo_hopper

Reinforcement Learning

Model card Files Files and versions

ppo_hopper / ppo_hopper.yaml

ItsTSV's picture

Upload ppo_hopper.yaml

179f13d verified 3 months ago

history blame contribute delete

2.1 kB

	# This file is used to configure logging and agents behaviour.
	#
	# The first part consists of Wandb info used to log experiments.
	# Changing it adjusts the way logging is stored and displayed.
	#
	# The second part (config) is used to change hyperparameter settings of agents.
	# Changing it adjusts the way agents behave and learn.
	project: "Hopper-v5"
	name: "PPO"
	dir: "../logs"
	notes: "Training Hopper-v5 using PPO"
	mode: "online"
	monitor_gym: "False"
	config:
	# Environment, logging and saving control
	environment: "Hopper-v5" # Environment to use
	algorithm: "PPO Continuous" # What kind of algorithm to use?
	save_dir: "../models/" # Where to save model?
	save_name: "ppo_hopper" # Model name
	save_interval: 50 # How many previous episodes will be used to calculate mean reward?
	total_steps: 1_000_000 # For how many steps will the agent train?
	episode_steps: 1250 # How many steps before the episode is terminated?
	# Algorithm hyperparameters
	gamma: 0.999 # Discount factor for future rewards
	lambda: 0.99 # GAE tradeoff parameter
	ppo_epochs: 8 # How many epochs to train on each batch?
	rollout_length: 512 # How many steps to collect before updating?
	batch_size: 32 # How many steps are in each batch?
	clip_epsilon: 0.2 # How much is the policy clipped?
	learning_rate_actor: 0.0001 # Learning rate for actor head
	learning_rate_critic: 0.0001 # Learning rate for critic head
	value_loss_coef: 0.8 # How much is the value loss weighted?
	entropy_coef: 0.0025 # How much is the entropy loss weighted?
	max_grad_norm: 0.5 # Maximum norm for gradient clipping
	network_size: 256 # Number of neurons in each hidden layer