File size: 2,711 Bytes
8131790 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 |
---
tags:
- deep-reinforcement-learning
- reinforcement-learning
library_name: pytorch
model-index:
- name: Swimmer-v5
results:
- task:
type: reinforcement-learning
name: reinforcement-learning
dataset:
name: Swimmer-v5
type: Swimmer-v5
metrics:
- type: mean_reward
value: 341.476147499577
name: mean_reward
---
# RoboDRL
## Model details
Algorithm: PPO Continuous
Environment: Swimmer-v5
Framework: PyTorch + custom implementation
## Used config
```yaml
# This file is used to configure logging and agents behaviour.
#
# The first part consists of Wandb info used to log experiments.
# Changing it adjusts the way logging is stored and displayed.
#
# The second part (config) is used to change hyperparameter settings of agents.
# Changing it adjusts the way agents behave and learn.
project: "Swimmer-v5"
name: "PPO"
dir: "logs"
notes: "Training Swimmer-v5 using PPO"
monitor_gym: "False"
config:
# Environment, logging and saving control
environment: "Swimmer-v5" # Environment to use
algorithm: "PPO Continuous" # What kind of algorithm to use?
save_dir: "models" # Where to save model?
save_name: "ppo_swimmer" # Model name
save_interval: 25 # How many previous episodes will be used to calculate mean reward?
total_steps: 500_000 # For how many steps will the agent train?
# Algorithm hyperparameters
gamma: 0.999 # Discount factor for future rewards
lambda: 0.98 # GAE tradeoff parameter
ppo_epochs: 10 # How many epochs to train on each batch?
rollout_length: 2048 # How many steps to collect before updating?
batch_size: 64 # How many steps are in each batch?
clip_epsilon: 0.2 # How much is the policy clipped?
learning_rate_actor: 0.0003 # Learning rate for actor head
learning_rate_critic: 0.0003 # Learning rate for critic head
value_loss_coef: 0.5 # How much is the value loss weighted?
entropy_coef: 0.001 # How much is the entropy loss weighted?
max_grad_norm: 0.5 # Maximum norm for gradient clipping
network_size: 128 # Number of neurons in each hidden layer
normalize_rewards: 0 # Whether to normalize rewards or not (1 = True, 0 = False)
init_method: "default" # How will be the neural networks initialized?
|