10m timesteps SpaceInvaders
Browse files
README.md
CHANGED
|
@@ -22,10 +22,13 @@ model-index:
|
|
| 22 |
---
|
| 23 |
|
| 24 |
# **DQN** Agent playing **SpaceInvadersNoFrameskip-v4**
|
|
|
|
| 25 |
This is a trained model of a **DQN** agent playing **SpaceInvadersNoFrameskip-v4**
|
| 26 |
using the [stable-baselines3 library](https://github.com/DLR-RM/stable-baselines3).
|
| 27 |
|
|
|
|
| 28 |
## Usage (with Stable-baselines3)
|
|
|
|
| 29 |
TODO: Add your code
|
| 30 |
|
| 31 |
|
|
@@ -34,4 +37,4 @@ from stable_baselines3 import ...
|
|
| 34 |
from huggingface_sb3 import load_from_hub
|
| 35 |
|
| 36 |
...
|
| 37 |
-
```
|
|
|
|
| 22 |
---
|
| 23 |
|
| 24 |
# **DQN** Agent playing **SpaceInvadersNoFrameskip-v4**
|
| 25 |
+
|
| 26 |
This is a trained model of a **DQN** agent playing **SpaceInvadersNoFrameskip-v4**
|
| 27 |
using the [stable-baselines3 library](https://github.com/DLR-RM/stable-baselines3).
|
| 28 |
|
| 29 |
+
|
| 30 |
## Usage (with Stable-baselines3)
|
| 31 |
+
|
| 32 |
TODO: Add your code
|
| 33 |
|
| 34 |
|
|
|
|
| 37 |
from huggingface_sb3 import load_from_hub
|
| 38 |
|
| 39 |
...
|
| 40 |
+
```
|
config.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
dqn-SpaceInvadersNoFrameskip-v4.zip
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c6311eb6f415d93f7c9c53172878a68276abd230418678ff98468a978d1554e5
|
| 3 |
+
size 27221660
|
dqn-SpaceInvadersNoFrameskip-v4/_stable_baselines3_version
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
2.
|
|
|
|
| 1 |
+
2.5.0
|
dqn-SpaceInvadersNoFrameskip-v4/data
CHANGED
|
@@ -4,9 +4,9 @@
|
|
| 4 |
":serialized:": "gAWVMAAAAAAAAACMHnN0YWJsZV9iYXNlbGluZXMzLmRxbi5wb2xpY2llc5SMCUNublBvbGljeZSTlC4=",
|
| 5 |
"__module__": "stable_baselines3.dqn.policies",
|
| 6 |
"__doc__": "\n Policy class for DQN when using images as input.\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param features_extractor_class: Features extractor to use.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n ",
|
| 7 |
-
"__init__": "<function CnnPolicy.__init__ at
|
| 8 |
"__abstractmethods__": "frozenset()",
|
| 9 |
-
"_abc_impl": "<_abc._abc_data object at
|
| 10 |
},
|
| 11 |
"verbose": 1,
|
| 12 |
"policy_kwargs": {},
|
|
@@ -16,7 +16,10 @@
|
|
| 16 |
"seed": 1632485984,
|
| 17 |
"action_noise": null,
|
| 18 |
"start_time": 1738801757428800604,
|
| 19 |
-
"learning_rate":
|
|
|
|
|
|
|
|
|
|
| 20 |
"tensorboard_log": "runs/SpaceInvadersNoFrameskip-v4__dqn__1632485984__1738801755/SpaceInvadersNoFrameskip-v4",
|
| 21 |
"_last_obs": null,
|
| 22 |
"_last_episode_starts": {
|
|
@@ -81,18 +84,18 @@
|
|
| 81 |
"__module__": "stable_baselines3.common.buffers",
|
| 82 |
"__annotations__": "{'observations': <class 'numpy.ndarray'>, 'next_observations': <class 'numpy.ndarray'>, 'actions': <class 'numpy.ndarray'>, 'rewards': <class 'numpy.ndarray'>, 'dones': <class 'numpy.ndarray'>, 'timeouts': <class 'numpy.ndarray'>}",
|
| 83 |
"__doc__": "\n Replay buffer used in off-policy algorithms like SAC/TD3.\n\n :param buffer_size: Max number of element in the buffer\n :param observation_space: Observation space\n :param action_space: Action space\n :param device: PyTorch device\n :param n_envs: Number of parallel environments\n :param optimize_memory_usage: Enable a memory efficient variant\n of the replay buffer which reduces by almost a factor two the memory used,\n at a cost of more complexity.\n See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195\n and https://github.com/DLR-RM/stable-baselines3/pull/28#issuecomment-637559274\n Cannot be used in combination with handle_timeout_termination.\n :param handle_timeout_termination: Handle timeout termination (due to timelimit)\n separately and treat the task as infinite horizon task.\n https://github.com/DLR-RM/stable-baselines3/issues/284\n ",
|
| 84 |
-
"__init__": "<function ReplayBuffer.__init__ at
|
| 85 |
-
"add": "<function ReplayBuffer.add at
|
| 86 |
-
"sample": "<function ReplayBuffer.sample at
|
| 87 |
-
"_get_samples": "<function ReplayBuffer._get_samples at
|
| 88 |
-
"_maybe_cast_dtype": "<staticmethod(<function ReplayBuffer._maybe_cast_dtype at
|
| 89 |
"__abstractmethods__": "frozenset()",
|
| 90 |
-
"_abc_impl": "<_abc._abc_data object at
|
| 91 |
},
|
| 92 |
"replay_buffer_kwargs": {},
|
| 93 |
"train_freq": {
|
| 94 |
":type:": "<class 'stable_baselines3.common.type_aliases.TrainFreq'>",
|
| 95 |
-
":serialized:": "
|
| 96 |
},
|
| 97 |
"use_sde_at_warmup": false,
|
| 98 |
"exploration_initial_eps": 1.0,
|
|
@@ -102,14 +105,14 @@
|
|
| 102 |
"_n_calls": 10000000,
|
| 103 |
"max_grad_norm": 10,
|
| 104 |
"exploration_rate": 0.01,
|
| 105 |
-
"batch_norm_stats": [],
|
| 106 |
-
"batch_norm_stats_target": [],
|
| 107 |
"lr_schedule": {
|
| 108 |
":type:": "<class 'function'>",
|
| 109 |
-
":serialized:": "
|
| 110 |
},
|
|
|
|
|
|
|
| 111 |
"exploration_schedule": {
|
| 112 |
":type:": "<class 'function'>",
|
| 113 |
-
":serialized:": "
|
| 114 |
}
|
| 115 |
}
|
|
|
|
| 4 |
":serialized:": "gAWVMAAAAAAAAACMHnN0YWJsZV9iYXNlbGluZXMzLmRxbi5wb2xpY2llc5SMCUNublBvbGljeZSTlC4=",
|
| 5 |
"__module__": "stable_baselines3.dqn.policies",
|
| 6 |
"__doc__": "\n Policy class for DQN when using images as input.\n\n :param observation_space: Observation space\n :param action_space: Action space\n :param lr_schedule: Learning rate schedule (could be constant)\n :param net_arch: The specification of the policy and value networks.\n :param activation_fn: Activation function\n :param features_extractor_class: Features extractor to use.\n :param normalize_images: Whether to normalize images or not,\n dividing by 255.0 (True by default)\n :param optimizer_class: The optimizer to use,\n ``th.optim.Adam`` by default\n :param optimizer_kwargs: Additional keyword arguments,\n excluding the learning rate, to pass to the optimizer\n ",
|
| 7 |
+
"__init__": "<function CnnPolicy.__init__ at 0x78dc61faa3b0>",
|
| 8 |
"__abstractmethods__": "frozenset()",
|
| 9 |
+
"_abc_impl": "<_abc._abc_data object at 0x78dc61fcc080>"
|
| 10 |
},
|
| 11 |
"verbose": 1,
|
| 12 |
"policy_kwargs": {},
|
|
|
|
| 16 |
"seed": 1632485984,
|
| 17 |
"action_noise": null,
|
| 18 |
"start_time": 1738801757428800604,
|
| 19 |
+
"learning_rate": {
|
| 20 |
+
":type:": "<class 'function'>",
|
| 21 |
+
":serialized:": "gAWV4wIAAAAAAACMF2Nsb3VkcGlja2xlLmNsb3VkcGlja2xllIwOX21ha2VfZnVuY3Rpb26Uk5QoaACMDV9idWlsdGluX3R5cGWUk5SMCENvZGVUeXBllIWUUpQoSwFLAEsASwFLAUsTQwSIAFMAlE6FlCmMAV+UhZSMZC9ob21lL2Jlbmp5L21pbmljb25kYTMvZW52cy9zYjMtY29uZGEvbGliL3B5dGhvbjMuMTAvc2l0ZS1wYWNrYWdlcy9zdGFibGVfYmFzZWxpbmVzMy9jb21tb24vdXRpbHMucHmUjARmdW5jlEuGQwIEAZSMA3ZhbJSFlCl0lFKUfZQojAtfX3BhY2thZ2VfX5SMGHN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbpSMCF9fbmFtZV9flIwec3RhYmxlX2Jhc2VsaW5lczMuY29tbW9uLnV0aWxzlIwIX19maWxlX1+UjGQvaG9tZS9iZW5qeS9taW5pY29uZGEzL2VudnMvc2IzLWNvbmRhL2xpYi9weXRob24zLjEwL3NpdGUtcGFja2FnZXMvc3RhYmxlX2Jhc2VsaW5lczMvY29tbW9uL3V0aWxzLnB5lHVOTmgAjBBfbWFrZV9lbXB0eV9jZWxslJOUKVKUhZR0lFKUaACMEl9mdW5jdGlvbl9zZXRzdGF0ZZSTlGgffZR9lChoFowEZnVuY5SMDF9fcXVhbG5hbWVfX5SMGWNvbnN0YW50X2ZuLjxsb2NhbHM+LmZ1bmOUjA9fX2Fubm90YXRpb25zX1+UfZSMDl9fa3dkZWZhdWx0c19flE6MDF9fZGVmYXVsdHNfX5ROjApfX21vZHVsZV9flGgXjAdfX2RvY19flE6MC19fY2xvc3VyZV9flGgAjApfbWFrZV9jZWxslJOURz8aNuLrHEMthZRSlIWUjBdfY2xvdWRwaWNrbGVfc3VibW9kdWxlc5RdlIwLX19nbG9iYWxzX1+UfZR1hpSGUjAu"
|
| 22 |
+
},
|
| 23 |
"tensorboard_log": "runs/SpaceInvadersNoFrameskip-v4__dqn__1632485984__1738801755/SpaceInvadersNoFrameskip-v4",
|
| 24 |
"_last_obs": null,
|
| 25 |
"_last_episode_starts": {
|
|
|
|
| 84 |
"__module__": "stable_baselines3.common.buffers",
|
| 85 |
"__annotations__": "{'observations': <class 'numpy.ndarray'>, 'next_observations': <class 'numpy.ndarray'>, 'actions': <class 'numpy.ndarray'>, 'rewards': <class 'numpy.ndarray'>, 'dones': <class 'numpy.ndarray'>, 'timeouts': <class 'numpy.ndarray'>}",
|
| 86 |
"__doc__": "\n Replay buffer used in off-policy algorithms like SAC/TD3.\n\n :param buffer_size: Max number of element in the buffer\n :param observation_space: Observation space\n :param action_space: Action space\n :param device: PyTorch device\n :param n_envs: Number of parallel environments\n :param optimize_memory_usage: Enable a memory efficient variant\n of the replay buffer which reduces by almost a factor two the memory used,\n at a cost of more complexity.\n See https://github.com/DLR-RM/stable-baselines3/issues/37#issuecomment-637501195\n and https://github.com/DLR-RM/stable-baselines3/pull/28#issuecomment-637559274\n Cannot be used in combination with handle_timeout_termination.\n :param handle_timeout_termination: Handle timeout termination (due to timelimit)\n separately and treat the task as infinite horizon task.\n https://github.com/DLR-RM/stable-baselines3/issues/284\n ",
|
| 87 |
+
"__init__": "<function ReplayBuffer.__init__ at 0x78dc6975b910>",
|
| 88 |
+
"add": "<function ReplayBuffer.add at 0x78dc6975b9a0>",
|
| 89 |
+
"sample": "<function ReplayBuffer.sample at 0x78dc6975ba30>",
|
| 90 |
+
"_get_samples": "<function ReplayBuffer._get_samples at 0x78dc6975bac0>",
|
| 91 |
+
"_maybe_cast_dtype": "<staticmethod(<function ReplayBuffer._maybe_cast_dtype at 0x78dc6975bb50>)>",
|
| 92 |
"__abstractmethods__": "frozenset()",
|
| 93 |
+
"_abc_impl": "<_abc._abc_data object at 0x78dc6974fe40>"
|
| 94 |
},
|
| 95 |
"replay_buffer_kwargs": {},
|
| 96 |
"train_freq": {
|
| 97 |
":type:": "<class 'stable_baselines3.common.type_aliases.TrainFreq'>",
|
| 98 |
+
":serialized:": "gAWVYQAAAAAAAACMJXN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi50eXBlX2FsaWFzZXOUjAlUcmFpbkZyZXGUk5RLBGgAjBJUcmFpbkZyZXF1ZW5jeVVuaXSUk5SMBHN0ZXCUhZRSlIaUgZQu"
|
| 99 |
},
|
| 100 |
"use_sde_at_warmup": false,
|
| 101 |
"exploration_initial_eps": 1.0,
|
|
|
|
| 105 |
"_n_calls": 10000000,
|
| 106 |
"max_grad_norm": 10,
|
| 107 |
"exploration_rate": 0.01,
|
|
|
|
|
|
|
| 108 |
"lr_schedule": {
|
| 109 |
":type:": "<class 'function'>",
|
| 110 |
+
":serialized:": "gAWV2AQAAAAAAACMF2Nsb3VkcGlja2xlLmNsb3VkcGlja2xllIwOX21ha2VfZnVuY3Rpb26Uk5QoaACMDV9idWlsdGluX3R5cGWUk5SMCENvZGVUeXBllIWUUpQoSwFLAEsASwFLA0sTQwx0AIgAfACDAYMBUwCUToWUjAVmbG9hdJSFlIwScHJvZ3Jlc3NfcmVtYWluaW5nlIWUjGQvaG9tZS9iZW5qeS9taW5pY29uZGEzL2VudnMvc2IzLW5vLWhmL2xpYi9weXRob24zLjEwL3NpdGUtcGFja2FnZXMvc3RhYmxlX2Jhc2VsaW5lczMvY29tbW9uL3V0aWxzLnB5lIwIPGxhbWJkYT6US2JDAgwAlIwOdmFsdWVfc2NoZWR1bGWUhZQpdJRSlH2UKIwLX19wYWNrYWdlX1+UjBhzdGFibGVfYmFzZWxpbmVzMy5jb21tb26UjAhfX25hbWVfX5SMHnN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi51dGlsc5SMCF9fZmlsZV9flIxkL2hvbWUvYmVuankvbWluaWNvbmRhMy9lbnZzL3NiMy1uby1oZi9saWIvcHl0aG9uMy4xMC9zaXRlLXBhY2thZ2VzL3N0YWJsZV9iYXNlbGluZXMzL2NvbW1vbi91dGlscy5weZR1Tk5oAIwQX21ha2VfZW1wdHlfY2VsbJSTlClSlIWUdJRSlGgAjBJfZnVuY3Rpb25fc2V0c3RhdGWUk5RoIX2UfZQoaBiMCDxsYW1iZGE+lIwMX19xdWFsbmFtZV9flIwhZ2V0X3NjaGVkdWxlX2ZuLjxsb2NhbHM+LjxsYW1iZGE+lIwPX19hbm5vdGF0aW9uc19flH2UjA5fX2t3ZGVmYXVsdHNfX5ROjAxfX2RlZmF1bHRzX1+UTowKX19tb2R1bGVfX5RoGYwHX19kb2NfX5ROjAtfX2Nsb3N1cmVfX5RoAIwKX21ha2VfY2VsbJSTlGgCKGgHKEsBSwBLAEsBSwFLE0MEiABTAJROhZQpjAFflIWUjGQvaG9tZS9iZW5qeS9taW5pY29uZGEzL2VudnMvc2IzLWNvbmRhL2xpYi9weXRob24zLjEwL3NpdGUtcGFja2FnZXMvc3RhYmxlX2Jhc2VsaW5lczMvY29tbW9uL3V0aWxzLnB5lIwEZnVuY5RLhkMCBAGUjAN2YWyUhZQpdJRSlH2UKGgWjBhzdGFibGVfYmFzZWxpbmVzMy5jb21tb26UaBiMHnN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi51dGlsc5RoGoxkL2hvbWUvYmVuankvbWluaWNvbmRhMy9lbnZzL3NiMy1jb25kYS9saWIvcHl0aG9uMy4xMC9zaXRlLXBhY2thZ2VzL3N0YWJsZV9iYXNlbGluZXMzL2NvbW1vbi91dGlscy5weZR1Tk5oHSlSlIWUdJRSlGgjaER9lH2UKGgYjARmdW5jlGgnjBljb25zdGFudF9mbi48bG9jYWxzPi5mdW5jlGgpfZRoK05oLE5oLWg/aC5OaC9oMUc/Gjbi6xxDLYWUUpSFlIwXX2Nsb3VkcGlja2xlX3N1Ym1vZHVsZXOUXZSMC19fZ2xvYmFsc19flH2UdYaUhlIwhZRSlIWUaE1dlGhPfZR1hpSGUjAu"
|
| 111 |
},
|
| 112 |
+
"batch_norm_stats": [],
|
| 113 |
+
"batch_norm_stats_target": [],
|
| 114 |
"exploration_schedule": {
|
| 115 |
":type:": "<class 'function'>",
|
| 116 |
+
":serialized:": "gAWVgwMAAAAAAACMF2Nsb3VkcGlja2xlLmNsb3VkcGlja2xllIwOX21ha2VfZnVuY3Rpb26Uk5QoaACMDV9idWlsdGluX3R5cGWUk5SMCENvZGVUeXBllIWUUpQoSwFLAEsASwFLBEsTQyhkAXwAGACIAWsEcgiIAFMAiAJkAXwAGACIAIgCGAAUAIgBGwAXAFMAlE5LAYaUKYwScHJvZ3Jlc3NfcmVtYWluaW5nlIWUjGQvaG9tZS9iZW5qeS9taW5pY29uZGEzL2VudnMvc2IzLW5vLWhmL2xpYi9weXRob24zLjEwL3NpdGUtcGFja2FnZXMvc3RhYmxlX2Jhc2VsaW5lczMvY29tbW9uL3V0aWxzLnB5lIwEZnVuY5RLdEMGDAEEARgClIwDZW5klIwMZW5kX2ZyYWN0aW9ulIwFc3RhcnSUh5QpdJRSlH2UKIwLX19wYWNrYWdlX1+UjBhzdGFibGVfYmFzZWxpbmVzMy5jb21tb26UjAhfX25hbWVfX5SMHnN0YWJsZV9iYXNlbGluZXMzLmNvbW1vbi51dGlsc5SMCF9fZmlsZV9flIxkL2hvbWUvYmVuankvbWluaWNvbmRhMy9lbnZzL3NiMy1uby1oZi9saWIvcHl0aG9uMy4xMC9zaXRlLXBhY2thZ2VzL3N0YWJsZV9iYXNlbGluZXMzL2NvbW1vbi91dGlscy5weZR1Tk5oAIwQX21ha2VfZW1wdHlfY2VsbJSTlClSlGgdKVKUaB0pUpSHlHSUUpRoAIwSX2Z1bmN0aW9uX3NldHN0YXRllJOUaCN9lH2UKGgYjARmdW5jlIwMX19xdWFsbmFtZV9flIwbZ2V0X2xpbmVhcl9mbi48bG9jYWxzPi5mdW5jlIwPX19hbm5vdGF0aW9uc19flH2UKGgKjAhidWlsdGluc5SMBWZsb2F0lJOUjAZyZXR1cm6UaC91jA5fX2t3ZGVmYXVsdHNfX5ROjAxfX2RlZmF1bHRzX1+UTowKX19tb2R1bGVfX5RoGYwHX19kb2NfX5ROjAtfX2Nsb3N1cmVfX5RoAIwKX21ha2VfY2VsbJSTlEc/hHrhR64Ue4WUUpRoN0c/uZmZmZmZmoWUUpRoN0c/8AAAAAAAAIWUUpSHlIwXX2Nsb3VkcGlja2xlX3N1Ym1vZHVsZXOUXZSMC19fZ2xvYmFsc19flH2UdYaUhlIwLg=="
|
| 117 |
}
|
| 118 |
}
|
dqn-SpaceInvadersNoFrameskip-v4/system_info.txt
CHANGED
|
@@ -1,8 +1,8 @@
|
|
| 1 |
-
- OS: Linux-
|
| 2 |
-
- Python: 3.
|
| 3 |
-
- Stable-Baselines3: 2.
|
| 4 |
-
- PyTorch: 2.
|
| 5 |
- GPU Enabled: True
|
| 6 |
- Numpy: 2.2.2
|
| 7 |
-
- Cloudpickle: 3.1.
|
| 8 |
- Gymnasium: 1.0.0
|
|
|
|
| 1 |
+
- OS: Linux-6.8.0-52-generic-x86_64-with-glibc2.39 # 53-Ubuntu SMP PREEMPT_DYNAMIC Sat Jan 11 00:06:25 UTC 2025
|
| 2 |
+
- Python: 3.10.12
|
| 3 |
+
- Stable-Baselines3: 2.5.0
|
| 4 |
+
- PyTorch: 2.6.0+cu124
|
| 5 |
- GPU Enabled: True
|
| 6 |
- Numpy: 2.2.2
|
| 7 |
+
- Cloudpickle: 3.1.1
|
| 8 |
- Gymnasium: 1.0.0
|
results.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"mean_reward": 739.5, "std_reward": 253.18422146729444, "is_deterministic": false, "n_eval_episodes": 10, "eval_datetime": "2025-02-06T13:
|
|
|
|
| 1 |
+
{"mean_reward": 739.5, "std_reward": 253.18422146729444, "is_deterministic": false, "n_eval_episodes": 10, "eval_datetime": "2025-02-06T13:35:28.511766"}
|