arkadyark commited on
Commit
44fac71
·
1 Parent(s): 2a89a17

Initial commit

Browse files
README.md CHANGED
@@ -6,7 +6,7 @@ tags:
6
  - reinforcement-learning
7
  - stable-baselines3
8
  model-index:
9
- - name: PPO
10
  results:
11
  - task:
12
  type: reinforcement-learning
@@ -16,13 +16,13 @@ model-index:
16
  type: SpaceInvadersNoFrameskip-v4
17
  metrics:
18
  - type: mean_reward
19
- value: 317.50 +/- 95.66
20
  name: mean_reward
21
  verified: false
22
  ---
23
 
24
- # **PPO** Agent playing **SpaceInvadersNoFrameskip-v4**
25
- This is a trained model of a **PPO** agent playing **SpaceInvadersNoFrameskip-v4**
26
  using the [stable-baselines3 library](https://github.com/DLR-RM/stable-baselines3)
27
  and the [RL Zoo](https://github.com/DLR-RM/rl-baselines3-zoo).
28
 
@@ -43,37 +43,39 @@ pip install rl_zoo3
43
 
44
  ```
45
  # Download model and save it into the logs/ folder
46
- python -m rl_zoo3.load_from_hub --algo ppo --env SpaceInvadersNoFrameskip-v4 -orga arkadyark -f logs/
47
- python -m rl_zoo3.enjoy --algo ppo --env SpaceInvadersNoFrameskip-v4 -f logs/
48
  ```
49
 
50
  If you installed the RL Zoo3 via pip (`pip install rl_zoo3`), from anywhere you can do:
51
  ```
52
- python -m rl_zoo3.load_from_hub --algo ppo --env SpaceInvadersNoFrameskip-v4 -orga arkadyark -f logs/
53
- python -m rl_zoo3.enjoy --algo ppo --env SpaceInvadersNoFrameskip-v4 -f logs/
54
  ```
55
 
56
  ## Training (with the RL Zoo)
57
  ```
58
- python -m rl_zoo3.train --algo ppo --env SpaceInvadersNoFrameskip-v4 -f logs/
59
  # Upload the model and generate video (when possible)
60
- python -m rl_zoo3.push_to_hub --algo ppo --env SpaceInvadersNoFrameskip-v4 -f logs/ -orga arkadyark
61
  ```
62
 
63
  ## Hyperparameters
64
  ```python
65
- OrderedDict([('batch_size', 256),
66
- ('clip_range', 'lin_0.1'),
67
- ('ent_coef', 0.01),
68
  ('env_wrapper',
69
  ['stable_baselines3.common.atari_wrappers.AtariWrapper']),
 
 
70
  ('frame_stack', 4),
71
- ('learning_rate', 'lin_2.5e-4'),
72
- ('n_envs', 8),
73
- ('n_epochs', 4),
74
- ('n_steps', 128),
75
- ('n_timesteps', 1000000.0),
76
  ('policy', 'CnnPolicy'),
77
- ('vf_coef', 0.5),
 
78
  ('normalize', False)])
79
  ```
 
6
  - reinforcement-learning
7
  - stable-baselines3
8
  model-index:
9
+ - name: DQN
10
  results:
11
  - task:
12
  type: reinforcement-learning
 
16
  type: SpaceInvadersNoFrameskip-v4
17
  metrics:
18
  - type: mean_reward
19
+ value: 862.50 +/- 470.60
20
  name: mean_reward
21
  verified: false
22
  ---
23
 
24
+ # **DQN** Agent playing **SpaceInvadersNoFrameskip-v4**
25
+ This is a trained model of a **DQN** agent playing **SpaceInvadersNoFrameskip-v4**
26
  using the [stable-baselines3 library](https://github.com/DLR-RM/stable-baselines3)
27
  and the [RL Zoo](https://github.com/DLR-RM/rl-baselines3-zoo).
28
 
 
43
 
44
  ```
45
  # Download model and save it into the logs/ folder
46
+ python -m rl_zoo3.load_from_hub --algo dqn --env SpaceInvadersNoFrameskip-v4 -orga arkadyark -f logs/
47
+ python -m rl_zoo3.enjoy --algo dqn --env SpaceInvadersNoFrameskip-v4 -f logs/
48
  ```
49
 
50
  If you installed the RL Zoo3 via pip (`pip install rl_zoo3`), from anywhere you can do:
51
  ```
52
+ python -m rl_zoo3.load_from_hub --algo dqn --env SpaceInvadersNoFrameskip-v4 -orga arkadyark -f logs/
53
+ python -m rl_zoo3.enjoy --algo dqn --env SpaceInvadersNoFrameskip-v4 -f logs/
54
  ```
55
 
56
  ## Training (with the RL Zoo)
57
  ```
58
+ python -m rl_zoo3.train --algo dqn --env SpaceInvadersNoFrameskip-v4 -f logs/
59
  # Upload the model and generate video (when possible)
60
+ python -m rl_zoo3.push_to_hub --algo dqn --env SpaceInvadersNoFrameskip-v4 -f logs/ -orga arkadyark
61
  ```
62
 
63
  ## Hyperparameters
64
  ```python
65
+ OrderedDict([('batch_size', 32),
66
+ ('buffer_size', 100000),
 
67
  ('env_wrapper',
68
  ['stable_baselines3.common.atari_wrappers.AtariWrapper']),
69
+ ('exploration_final_eps', 0.01),
70
+ ('exploration_fraction', 0.1),
71
  ('frame_stack', 4),
72
+ ('gradient_steps', 1),
73
+ ('learning_rate', 0.0001),
74
+ ('learning_starts', 100000),
75
+ ('n_timesteps', 10000000.0),
76
+ ('optimize_memory_usage', False),
77
  ('policy', 'CnnPolicy'),
78
+ ('target_update_interval', 1000),
79
+ ('train_freq', 4),
80
  ('normalize', False)])
81
  ```
args.yml CHANGED
@@ -1,6 +1,6 @@
1
  !!python/object/apply:collections.OrderedDict
2
  - - - algo
3
- - ppo
4
  - - conf_file
5
  - null
6
  - - device
@@ -18,7 +18,7 @@
18
  - - hyperparams
19
  - null
20
  - - log_folder
21
- - logs/ppo
22
  - - log_interval
23
  - -1
24
  - - max_total_trials
@@ -54,7 +54,7 @@
54
  - - save_replay_buffer
55
  - false
56
  - - seed
57
- - 2105563713
58
  - - storage
59
  - null
60
  - - study_name
 
1
  !!python/object/apply:collections.OrderedDict
2
  - - - algo
3
+ - dqn
4
  - - conf_file
5
  - null
6
  - - device
 
18
  - - hyperparams
19
  - null
20
  - - log_folder
21
+ - logs/default-params
22
  - - log_interval
23
  - -1
24
  - - max_total_trials
 
54
  - - save_replay_buffer
55
  - false
56
  - - seed
57
+ - 1747788574
58
  - - storage
59
  - null
60
  - - study_name
config.yml CHANGED
@@ -1,25 +1,29 @@
1
  !!python/object/apply:collections.OrderedDict
2
  - - - batch_size
3
- - 256
4
- - - clip_range
5
- - lin_0.1
6
- - - ent_coef
7
- - 0.01
8
  - - env_wrapper
9
  - - stable_baselines3.common.atari_wrappers.AtariWrapper
 
 
 
 
10
  - - frame_stack
11
  - 4
 
 
12
  - - learning_rate
13
- - lin_2.5e-4
14
- - - n_envs
15
- - 8
16
- - - n_epochs
17
- - 4
18
- - - n_steps
19
- - 128
20
  - - n_timesteps
21
- - 1000000.0
 
 
22
  - - policy
23
  - CnnPolicy
24
- - - vf_coef
25
- - 0.5
 
 
 
1
  !!python/object/apply:collections.OrderedDict
2
  - - - batch_size
3
+ - 32
4
+ - - buffer_size
5
+ - 100000
 
 
6
  - - env_wrapper
7
  - - stable_baselines3.common.atari_wrappers.AtariWrapper
8
+ - - exploration_final_eps
9
+ - 0.01
10
+ - - exploration_fraction
11
+ - 0.1
12
  - - frame_stack
13
  - 4
14
+ - - gradient_steps
15
+ - 1
16
  - - learning_rate
17
+ - 0.0001
18
+ - - learning_starts
19
+ - 100000
 
 
 
 
20
  - - n_timesteps
21
+ - 10000000.0
22
+ - - optimize_memory_usage
23
+ - false
24
  - - policy
25
  - CnnPolicy
26
+ - - target_update_interval
27
+ - 1000
28
+ - - train_freq
29
+ - 4
dqn-SpaceInvadersNoFrameskip-v4.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:95ff457396e98ae304d554d2fba40fa9a3bc2f3ee5f2e25d37db98537503adec
3
+ size 27225173
dqn-SpaceInvadersNoFrameskip-v4/_stable_baselines3_version ADDED
@@ -0,0 +1 @@
 
 
1
+ 1.8.0
dqn-SpaceInvadersNoFrameskip-v4/data ADDED
The diff for this file is too large to render. See raw diff
 
dqn-SpaceInvadersNoFrameskip-v4/policy.optimizer.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f21656725dd262199a7069a812cde4fa22a7cfe581fe6f27c23ae88f9ee1cd6
3
+ size 13505739
dqn-SpaceInvadersNoFrameskip-v4/policy.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:479056d2a916843d996ef877d29a35c7682b87072e1f69c024d3ba0b0358e02b
3
+ size 13504937
dqn-SpaceInvadersNoFrameskip-v4/pytorch_variables.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d030ad8db708280fcae77d87e973102039acd23a11bdecc3db8eb6c0ac940ee1
3
+ size 431
dqn-SpaceInvadersNoFrameskip-v4/system_info.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ - OS: Linux-5.19.0-38-generic-x86_64-with-glibc2.35 # 39~22.04.1-Ubuntu SMP PREEMPT_DYNAMIC Fri Mar 17 21:16:15 UTC 2
2
+ - Python: 3.9.16
3
+ - Stable-Baselines3: 1.8.0
4
+ - PyTorch: 2.0.0+cu117
5
+ - GPU Enabled: True
6
+ - Numpy: 1.24.2
7
+ - Gym: 0.21.0
replay.mp4 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89559496e67928cc4a33b6064356e46b847843893492586495e482716ef52c70
3
- size 253512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8d32bc5208bb2a67c0d27c497e76f0fc82ba8c6551cc30059911ab7fd336d96
3
+ size 182038
results.json CHANGED
@@ -1 +1 @@
1
- {"mean_reward": 317.5, "std_reward": 95.66216598007804, "is_deterministic": false, "n_eval_episodes": 10, "eval_datetime": "2023-04-08T19:56:33.399274"}
 
1
+ {"mean_reward": 862.5, "std_reward": 470.59669569600675, "is_deterministic": false, "n_eval_episodes": 10, "eval_datetime": "2023-04-09T21:52:11.322268"}
train_eval_metrics.zip CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c588d7177559deafbe98349c748cefb63721112c6b195060997d5634b17123a
3
- size 39717
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aaaff6d10618715c72c73c32d8c0b427437fde16470b12d14d8719b34b001b4b
3
+ size 268212