seynath commited on
Commit
758a671
·
verified ·
1 Parent(s): 76780d1

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -6,7 +6,7 @@ tags:
6
  - reinforcement-learning
7
  - stable-baselines3
8
  model-index:
9
- - name: QRDQN
10
  results:
11
  - task:
12
  type: reinforcement-learning
@@ -16,13 +16,13 @@ model-index:
16
  type: SpaceInvadersNoFrameskip-v4
17
  metrics:
18
  - type: mean_reward
19
- value: 1059.00 +/- 439.89
20
  name: mean_reward
21
  verified: false
22
  ---
23
 
24
- # **QRDQN** Agent playing **SpaceInvadersNoFrameskip-v4**
25
- This is a trained model of a **QRDQN** agent playing **SpaceInvadersNoFrameskip-v4**
26
  using the [stable-baselines3 library](https://github.com/DLR-RM/stable-baselines3)
27
  and the [RL Zoo](https://github.com/DLR-RM/rl-baselines3-zoo).
28
 
@@ -43,33 +43,39 @@ pip install rl_zoo3
43
 
44
  ```
45
  # Download model and save it into the logs/ folder
46
- python -m rl_zoo3.load_from_hub --algo qrdqn --env SpaceInvadersNoFrameskip-v4 -orga Mihail-P -f logs/
47
- python -m rl_zoo3.enjoy --algo qrdqn --env SpaceInvadersNoFrameskip-v4 -f logs/
48
  ```
49
 
50
  If you installed the RL Zoo3 via pip (`pip install rl_zoo3`), from anywhere you can do:
51
  ```
52
- python -m rl_zoo3.load_from_hub --algo qrdqn --env SpaceInvadersNoFrameskip-v4 -orga Mihail-P -f logs/
53
- python -m rl_zoo3.enjoy --algo qrdqn --env SpaceInvadersNoFrameskip-v4 -f logs/
54
  ```
55
 
56
  ## Training (with the RL Zoo)
57
  ```
58
- python -m rl_zoo3.train --algo qrdqn --env SpaceInvadersNoFrameskip-v4 -f logs/
59
  # Upload the model and generate video (when possible)
60
- python -m rl_zoo3.push_to_hub --algo qrdqn --env SpaceInvadersNoFrameskip-v4 -f logs/ -orga Mihail-P
61
  ```
62
 
63
  ## Hyperparameters
64
  ```python
65
  OrderedDict([('batch_size', 64),
66
- ('buffer_size', 150000),
67
  ('env_wrapper',
68
  ['stable_baselines3.common.atari_wrappers.AtariWrapper']),
69
- ('exploration_fraction', 0.025),
 
70
  ('frame_stack', 4),
 
 
 
71
  ('n_timesteps', 10000000.0),
72
  ('optimize_memory_usage', False),
73
  ('policy', 'CnnPolicy'),
 
 
74
  ('normalize', False)])
75
  ```
 
6
  - reinforcement-learning
7
  - stable-baselines3
8
  model-index:
9
+ - name: DQN
10
  results:
11
  - task:
12
  type: reinforcement-learning
 
16
  type: SpaceInvadersNoFrameskip-v4
17
  metrics:
18
  - type: mean_reward
19
+ value: 1415.00 +/- 538.76
20
  name: mean_reward
21
  verified: false
22
  ---
23
 
24
+ # **DQN** Agent playing **SpaceInvadersNoFrameskip-v4**
25
+ This is a trained model of a **DQN** agent playing **SpaceInvadersNoFrameskip-v4**
26
  using the [stable-baselines3 library](https://github.com/DLR-RM/stable-baselines3)
27
  and the [RL Zoo](https://github.com/DLR-RM/rl-baselines3-zoo).
28
 
 
43
 
44
  ```
45
  # Download model and save it into the logs/ folder
46
+ python -m rl_zoo3.load_from_hub --algo dqn --env SpaceInvadersNoFrameskip-v4 -orga iblub -f logs/
47
+ python -m rl_zoo3.enjoy --algo dqn --env SpaceInvadersNoFrameskip-v4 -f logs/
48
  ```
49
 
50
  If you installed the RL Zoo3 via pip (`pip install rl_zoo3`), from anywhere you can do:
51
  ```
52
+ python -m rl_zoo3.load_from_hub --algo dqn --env SpaceInvadersNoFrameskip-v4 -orga iblub -f logs/
53
+ python -m rl_zoo3.enjoy --algo dqn --env SpaceInvadersNoFrameskip-v4 -f logs/
54
  ```
55
 
56
  ## Training (with the RL Zoo)
57
  ```
58
+ python -m rl_zoo3.train --algo dqn --env SpaceInvadersNoFrameskip-v4 -f logs/
59
  # Upload the model and generate video (when possible)
60
+ python -m rl_zoo3.push_to_hub --algo dqn --env SpaceInvadersNoFrameskip-v4 -f logs/ -orga iblub
61
  ```
62
 
63
  ## Hyperparameters
64
  ```python
65
  OrderedDict([('batch_size', 64),
66
+ ('buffer_size', 1000000),
67
  ('env_wrapper',
68
  ['stable_baselines3.common.atari_wrappers.AtariWrapper']),
69
+ ('exploration_final_eps', 0.01),
70
+ ('exploration_fraction', 0.1),
71
  ('frame_stack', 4),
72
+ ('gradient_steps', 1),
73
+ ('learning_rate', 0.0001),
74
+ ('learning_starts', 100000),
75
  ('n_timesteps', 10000000.0),
76
  ('optimize_memory_usage', False),
77
  ('policy', 'CnnPolicy'),
78
+ ('target_update_interval', 1000),
79
+ ('train_freq', 4),
80
  ('normalize', False)])
81
  ```
args.yml CHANGED
@@ -1,6 +1,6 @@
1
  !!python/object/apply:collections.OrderedDict
2
  - - - algo
3
- - qrdqn
4
  - - conf_file
5
  - null
6
  - - device
@@ -10,15 +10,13 @@
10
  - - env_kwargs
11
  - null
12
  - - eval_episodes
13
- - 25
14
  - - eval_freq
15
- - 50000
16
  - - gym_packages
17
  - []
18
  - - hyperparams
19
- - batch_size: 64
20
- buffer_size: 150000
21
- exploration_fraction: 0.025
22
  - - log_folder
23
  - logs/
24
  - - log_interval
@@ -34,7 +32,7 @@
34
  - - n_startup_trials
35
  - 10
36
  - - n_timesteps
37
- - 10000000
38
  - - n_trials
39
  - 500
40
  - - no_optim_plots
@@ -52,17 +50,17 @@
52
  - - sampler
53
  - tpe
54
  - - save_freq
55
- - 1000000
56
  - - save_replay_buffer
57
  - false
58
  - - seed
59
- - 1078690446
60
  - - storage
61
  - null
62
  - - study_name
63
  - null
64
  - - tensorboard_log
65
- - /tblogs/
66
  - - track
67
  - false
68
  - - trained_agent
@@ -81,3 +79,5 @@
81
  - sb3
82
  - - wandb_tags
83
  - []
 
 
 
1
  !!python/object/apply:collections.OrderedDict
2
  - - - algo
3
+ - dqn
4
  - - conf_file
5
  - null
6
  - - device
 
10
  - - env_kwargs
11
  - null
12
  - - eval_episodes
13
+ - 5
14
  - - eval_freq
15
+ - 25000
16
  - - gym_packages
17
  - []
18
  - - hyperparams
19
+ - null
 
 
20
  - - log_folder
21
  - logs/
22
  - - log_interval
 
32
  - - n_startup_trials
33
  - 10
34
  - - n_timesteps
35
+ - -1
36
  - - n_trials
37
  - 500
38
  - - no_optim_plots
 
50
  - - sampler
51
  - tpe
52
  - - save_freq
53
+ - -1
54
  - - save_replay_buffer
55
  - false
56
  - - seed
57
+ - 4025715748
58
  - - storage
59
  - null
60
  - - study_name
61
  - null
62
  - - tensorboard_log
63
+ - ''
64
  - - track
65
  - false
66
  - - trained_agent
 
79
  - sb3
80
  - - wandb_tags
81
  - []
82
+ - - yaml_file
83
+ - null
config.yml CHANGED
@@ -2,16 +2,28 @@
2
  - - - batch_size
3
  - 64
4
  - - buffer_size
5
- - 150000
6
  - - env_wrapper
7
  - - stable_baselines3.common.atari_wrappers.AtariWrapper
 
 
8
  - - exploration_fraction
9
- - 0.025
10
  - - frame_stack
11
  - 4
 
 
 
 
 
 
12
  - - n_timesteps
13
  - 10000000.0
14
  - - optimize_memory_usage
15
  - false
16
  - - policy
17
  - CnnPolicy
 
 
 
 
 
2
  - - - batch_size
3
  - 64
4
  - - buffer_size
5
+ - 1000000
6
  - - env_wrapper
7
  - - stable_baselines3.common.atari_wrappers.AtariWrapper
8
+ - - exploration_final_eps
9
+ - 0.01
10
  - - exploration_fraction
11
+ - 0.1
12
  - - frame_stack
13
  - 4
14
+ - - gradient_steps
15
+ - 1
16
+ - - learning_rate
17
+ - 0.0001
18
+ - - learning_starts
19
+ - 100000
20
  - - n_timesteps
21
  - 10000000.0
22
  - - optimize_memory_usage
23
  - false
24
  - - policy
25
  - CnnPolicy
26
+ - - target_update_interval
27
+ - 1000
28
+ - - train_freq
29
+ - 4
dqn-SpaceInvadersNoFrameskip-v4.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24630ac98b56c5dc42d4aae85747ae6b5fab29381690872dda5ebdbd419d092a
3
+ size 27225058
dqn-SpaceInvadersNoFrameskip-v4/_stable_baselines3_version ADDED
@@ -0,0 +1 @@
 
 
1
+ 1.7.0
dqn-SpaceInvadersNoFrameskip-v4/data ADDED
The diff for this file is too large to render. See raw diff
 
dqn-SpaceInvadersNoFrameskip-v4/policy.optimizer.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8c3fd998273dfced9ec8feec6560589ca1e93c5e24ade1c7e314f4db8accd78
3
+ size 13505739
dqn-SpaceInvadersNoFrameskip-v4/policy.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf07d1709c8b2913a90712f213e1122bb396af1b862a3df90158c73d0bc58915
3
+ size 13504937
dqn-SpaceInvadersNoFrameskip-v4/pytorch_variables.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d030ad8db708280fcae77d87e973102039acd23a11bdecc3db8eb6c0ac940ee1
3
+ size 431
dqn-SpaceInvadersNoFrameskip-v4/system_info.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ - OS: Linux-5.10.147+-x86_64-with-debian-bullseye-sid # 1 SMP Sat Oct 15 09:06:22 UTC 2022
2
+ - Python: 3.7.12
3
+ - Stable-Baselines3: 1.7.0
4
+ - PyTorch: 1.12.1+cu113
5
+ - GPU Enabled: True
6
+ - Numpy: 1.21.6
7
+ - Gym: 0.21.0
replay.mp4 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74e2345a1acfdab0aec4c523b3687dc651e457d5651afaa63b93b3a961116455
3
- size 558346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00a2e690c5cfe8966f2a19dddea16690551733c093fea228ede9db056da72f06
3
+ size 207801
results.json CHANGED
@@ -1 +1 @@
1
- {"mean_reward": 1059.0, "std_reward": 439.89089556388865, "is_deterministic": false, "n_eval_episodes": 10, "eval_datetime": "2023-04-17T06:17:02.113019"}
 
1
+ {"mean_reward": 1415.0, "std_reward": 538.7624708533436, "is_deterministic": false, "n_eval_episodes": 10, "eval_datetime": "2023-01-12T09:05:13.918172"}
train_eval_metrics.zip CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c462cb45c3f36e46ef59f3ac324b4caaab97d843b88a0ce88468d2b40dfb97c7
3
- size 308560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3f2c8ce8ee92fc53ec7eb7d1b9a158b8d8090b125824a4538b2f3345f139810
3
+ size 230379