Initial commit
Browse files- README.md +9 -9
- args.yml +1 -1
- config.yml +8 -8
- dqn-SpaceInvadersNoFrameskip-v4.zip +2 -2
- dqn-SpaceInvadersNoFrameskip-v4/data +0 -0
- dqn-SpaceInvadersNoFrameskip-v4/policy.optimizer.pth +1 -1
- dqn-SpaceInvadersNoFrameskip-v4/policy.pth +1 -1
- replay.mp4 +0 -0
- results.json +1 -1
- train_eval_metrics.zip +2 -2
README.md
CHANGED
|
@@ -16,7 +16,7 @@ model-index:
|
|
| 16 |
type: SpaceInvadersNoFrameskip-v4
|
| 17 |
metrics:
|
| 18 |
- type: mean_reward
|
| 19 |
-
value:
|
| 20 |
name: mean_reward
|
| 21 |
verified: false
|
| 22 |
---
|
|
@@ -62,21 +62,21 @@ python -m rl_zoo3.push_to_hub --algo dqn --env SpaceInvadersNoFrameskip-v4 -f lo
|
|
| 62 |
|
| 63 |
## Hyperparameters
|
| 64 |
```python
|
| 65 |
-
OrderedDict([('batch_size',
|
| 66 |
-
('buffer_size',
|
| 67 |
('env_wrapper',
|
| 68 |
['stable_baselines3.common.atari_wrappers.AtariWrapper']),
|
| 69 |
-
('exploration_final_eps', 0.
|
| 70 |
-
('exploration_fraction', 0.
|
| 71 |
('frame_stack', 4),
|
| 72 |
('gradient_steps', 1),
|
| 73 |
-
('learning_rate', 0.
|
| 74 |
-
('learning_starts',
|
| 75 |
('n_timesteps', 1000000.0),
|
| 76 |
('optimize_memory_usage', False),
|
| 77 |
('policy', 'CnnPolicy'),
|
| 78 |
-
('target_update_interval',
|
| 79 |
-
('train_freq',
|
| 80 |
('normalize', False)])
|
| 81 |
```
|
| 82 |
|
|
|
|
| 16 |
type: SpaceInvadersNoFrameskip-v4
|
| 17 |
metrics:
|
| 18 |
- type: mean_reward
|
| 19 |
+
value: 578.00 +/- 133.61
|
| 20 |
name: mean_reward
|
| 21 |
verified: false
|
| 22 |
---
|
|
|
|
| 62 |
|
| 63 |
## Hyperparameters
|
| 64 |
```python
|
| 65 |
+
OrderedDict([('batch_size', 32),
|
| 66 |
+
('buffer_size', 150000),
|
| 67 |
('env_wrapper',
|
| 68 |
['stable_baselines3.common.atari_wrappers.AtariWrapper']),
|
| 69 |
+
('exploration_final_eps', 0.010499732933570786),
|
| 70 |
+
('exploration_fraction', 0.3847312822999899),
|
| 71 |
('frame_stack', 4),
|
| 72 |
('gradient_steps', 1),
|
| 73 |
+
('learning_rate', 0.0005104911831540507),
|
| 74 |
+
('learning_starts', 150000),
|
| 75 |
('n_timesteps', 1000000.0),
|
| 76 |
('optimize_memory_usage', False),
|
| 77 |
('policy', 'CnnPolicy'),
|
| 78 |
+
('target_update_interval', 3418),
|
| 79 |
+
('train_freq', 7),
|
| 80 |
('normalize', False)])
|
| 81 |
```
|
| 82 |
|
args.yml
CHANGED
|
@@ -56,7 +56,7 @@
|
|
| 56 |
- - save_replay_buffer
|
| 57 |
- false
|
| 58 |
- - seed
|
| 59 |
-
-
|
| 60 |
- - storage
|
| 61 |
- null
|
| 62 |
- - study_name
|
|
|
|
| 56 |
- - save_replay_buffer
|
| 57 |
- false
|
| 58 |
- - seed
|
| 59 |
+
- 2452322081
|
| 60 |
- - storage
|
| 61 |
- null
|
| 62 |
- - study_name
|
config.yml
CHANGED
|
@@ -1,22 +1,22 @@
|
|
| 1 |
!!python/object/apply:collections.OrderedDict
|
| 2 |
- - - batch_size
|
| 3 |
-
-
|
| 4 |
- - buffer_size
|
| 5 |
-
-
|
| 6 |
- - env_wrapper
|
| 7 |
- - stable_baselines3.common.atari_wrappers.AtariWrapper
|
| 8 |
- - exploration_final_eps
|
| 9 |
-
- 0.
|
| 10 |
- - exploration_fraction
|
| 11 |
-
- 0.
|
| 12 |
- - frame_stack
|
| 13 |
- 4
|
| 14 |
- - gradient_steps
|
| 15 |
- 1
|
| 16 |
- - learning_rate
|
| 17 |
-
- 0.
|
| 18 |
- - learning_starts
|
| 19 |
-
-
|
| 20 |
- - n_timesteps
|
| 21 |
- 1000000.0
|
| 22 |
- - optimize_memory_usage
|
|
@@ -24,6 +24,6 @@
|
|
| 24 |
- - policy
|
| 25 |
- CnnPolicy
|
| 26 |
- - target_update_interval
|
| 27 |
-
-
|
| 28 |
- - train_freq
|
| 29 |
-
-
|
|
|
|
| 1 |
!!python/object/apply:collections.OrderedDict
|
| 2 |
- - - batch_size
|
| 3 |
+
- 32
|
| 4 |
- - buffer_size
|
| 5 |
+
- 150000
|
| 6 |
- - env_wrapper
|
| 7 |
- - stable_baselines3.common.atari_wrappers.AtariWrapper
|
| 8 |
- - exploration_final_eps
|
| 9 |
+
- 0.010499732933570786
|
| 10 |
- - exploration_fraction
|
| 11 |
+
- 0.3847312822999899
|
| 12 |
- - frame_stack
|
| 13 |
- 4
|
| 14 |
- - gradient_steps
|
| 15 |
- 1
|
| 16 |
- - learning_rate
|
| 17 |
+
- 0.0005104911831540507
|
| 18 |
- - learning_starts
|
| 19 |
+
- 150000
|
| 20 |
- - n_timesteps
|
| 21 |
- 1000000.0
|
| 22 |
- - optimize_memory_usage
|
|
|
|
| 24 |
- - policy
|
| 25 |
- CnnPolicy
|
| 26 |
- - target_update_interval
|
| 27 |
+
- 3418
|
| 28 |
- - train_freq
|
| 29 |
+
- 7
|
dqn-SpaceInvadersNoFrameskip-v4.zip
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c5be37d74918db71dd8fb3e24fd5585468b88ab229dd9d97754925c99eef74a9
|
| 3 |
+
size 27220794
|
dqn-SpaceInvadersNoFrameskip-v4/data
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
dqn-SpaceInvadersNoFrameskip-v4/policy.optimizer.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13506172
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:afc45debfc2ebddadee0f00b1300e157c794e90fe7e827e3537efd482785ed43
|
| 3 |
size 13506172
|
dqn-SpaceInvadersNoFrameskip-v4/policy.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 13505370
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6afbe30a3673007e0111b80e9b60f5dd8881cc537a41f33bfb19222943820476
|
| 3 |
size 13505370
|
replay.mp4
CHANGED
|
Binary files a/replay.mp4 and b/replay.mp4 differ
|
|
|
results.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"mean_reward":
|
|
|
|
| 1 |
+
{"mean_reward": 578.0, "std_reward": 133.6076345124035, "is_deterministic": false, "n_eval_episodes": 10, "eval_datetime": "2024-10-03T23:13:22.440239"}
|
train_eval_metrics.zip
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:daecd60378ae84b7387bb087e56a06c2a70226b6c6496df798655d3bb1e53899
|
| 3 |
+
size 39648
|