# This file is used to configure logging and agents behaviour.
#
# The first part consists of Wandb info used to log experiments.
# Changing it adjusts the way logging is stored and displayed.
#
# The second part (config) is used to change hyperparameter settings of agents.
# Changing it adjusts the way agents behave and learn.
project: "Hopper-v5"
name: "PPO"
dir: "../logs"
notes: "Training Hopper-v5 using PPO"
mode: "online"
monitor_gym: "False"
config:
  # Environment, logging and saving control
  environment: "Hopper-v5"                  # Environment to use
  algorithm: "PPO Continuous"               # What kind of algorithm to use?
  save_dir: "../models/"                    # Where to save model?
  save_name: "ppo_hopper"                   # Model name
  save_interval: 50                         # How many previous episodes will be used to calculate mean reward?
  total_steps: 1_000_000                    # For how many steps will the agent train?
  episode_steps: 1250                       # How many steps before the episode is terminated?
  # Algorithm hyperparameters
  gamma: 0.999                              # Discount factor for future rewards
  lambda: 0.99                              # GAE tradeoff parameter
  ppo_epochs: 8                             # How many epochs to train on each batch?
  rollout_length: 512                       # How many steps to collect before updating?
  batch_size: 32                            # How many steps are in each batch?
  clip_epsilon: 0.2                         # How much is the policy clipped?
  learning_rate_actor: 0.0001               # Learning rate for actor head
  learning_rate_critic: 0.0001              # Learning rate for critic head
  value_loss_coef: 0.8                      # How much is the value loss weighted?
  entropy_coef: 0.0025                      # How much is the entropy loss weighted?
  max_grad_norm: 0.5                        # Maximum norm for gradient clipping
  network_size: 256                         # Number of neurons in each hidden layer