File size: 2,711 Bytes
8131790
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
---
tags:
  - deep-reinforcement-learning
  - reinforcement-learning
library_name: pytorch
model-index:
  - name: Swimmer-v5
    results:
      - task:
          type: reinforcement-learning
          name: reinforcement-learning
        dataset:
          name: Swimmer-v5
          type: Swimmer-v5
        metrics:
          - type: mean_reward
            value: 341.476147499577
            name: mean_reward
---
# RoboDRL

## Model details
Algorithm: PPO Continuous

Environment: Swimmer-v5

Framework: PyTorch + custom implementation

## Used config
```yaml
# This file is used to configure logging and agents behaviour.
#
# The first part consists of Wandb info used to log experiments.
# Changing it adjusts the way logging is stored and displayed.
#
# The second part (config) is used to change hyperparameter settings of agents.
# Changing it adjusts the way agents behave and learn.
project: "Swimmer-v5"
name: "PPO"
dir: "logs"
notes: "Training Swimmer-v5 using PPO"
monitor_gym: "False"
config:
  # Environment, logging and saving control
  environment: "Swimmer-v5"                 # Environment to use
  algorithm: "PPO Continuous"               # What kind of algorithm to use?
  save_dir: "models"                        # Where to save model?
  save_name: "ppo_swimmer"                  # Model name
  save_interval: 25                         # How many previous episodes will be used to calculate mean reward?
  total_steps: 500_000                      # For how many steps will the agent train?
  # Algorithm hyperparameters
  gamma: 0.999                              # Discount factor for future rewards
  lambda: 0.98                              # GAE tradeoff parameter
  ppo_epochs: 10                            # How many epochs to train on each batch?
  rollout_length: 2048                      # How many steps to collect before updating?
  batch_size: 64                            # How many steps are in each batch?
  clip_epsilon: 0.2                         # How much is the policy clipped?
  learning_rate_actor: 0.0003               # Learning rate for actor head
  learning_rate_critic: 0.0003              # Learning rate for critic head
  value_loss_coef: 0.5                      # How much is the value loss weighted?
  entropy_coef: 0.001                       # How much is the entropy loss weighted?
  max_grad_norm: 0.5                        # Maximum norm for gradient clipping
  network_size: 128                         # Number of neurons in each hidden layer
  normalize_rewards: 0                      # Whether to normalize rewards or not (1 = True, 0 = False)
  init_method: "default"                    # How will be the neural networks initialized?