pushing model
Browse files- README.md +4 -4
- events.out.tfevents.1675614244.ip-26-0-128-121.2245265.0 → events.out.tfevents.1675616236.ip-26-0-142-109.1351408.0 +2 -2
- replay.mp4 +0 -0
- sebulba_ppo_envpool.cleanrl_model +2 -2
- sebulba_ppo_envpool.py +9 -2
- videos/Pong-v5__sebulba_ppo_envpool__1__074d5429-3b5b-459e-9b88-8db711fce3f2-eval/0.mp4 +0 -0
- videos/Pong-v5__sebulba_ppo_envpool__1__859fb3f0-0595-46e5-afd6-dda7445846f6-eval/0.mp4 +0 -0
README.md
CHANGED
|
@@ -16,7 +16,7 @@ model-index:
|
|
| 16 |
type: Pong-v5
|
| 17 |
metrics:
|
| 18 |
- type: mean_reward
|
| 19 |
-
value:
|
| 20 |
name: mean_reward
|
| 21 |
verified: false
|
| 22 |
---
|
|
@@ -46,7 +46,7 @@ curl -OL https://huggingface.co/cleanrl/Pong-v5-sebulba_ppo_envpool-seed1/raw/ma
|
|
| 46 |
curl -OL https://huggingface.co/cleanrl/Pong-v5-sebulba_ppo_envpool-seed1/raw/main/pyproject.toml
|
| 47 |
curl -OL https://huggingface.co/cleanrl/Pong-v5-sebulba_ppo_envpool-seed1/raw/main/poetry.lock
|
| 48 |
poetry install --all-extras
|
| 49 |
-
python sebulba_ppo_envpool.py --actor-device-ids 0 --learner-device-ids 1 2 3 4 --params-queue-timeout 0.02 --track --save-model --upload-model --hf-entity cleanrl --
|
| 50 |
```
|
| 51 |
|
| 52 |
# Hyperparameters
|
|
@@ -74,7 +74,7 @@ python sebulba_ppo_envpool.py --actor-device-ids 0 --learner-device-ids 1 2 3 4
|
|
| 74 |
'num_envs': 64,
|
| 75 |
'num_minibatches': 4,
|
| 76 |
'num_steps': 128,
|
| 77 |
-
'num_updates':
|
| 78 |
'params_queue_timeout': 0.02,
|
| 79 |
'profile': False,
|
| 80 |
'save_model': True,
|
|
@@ -82,7 +82,7 @@ python sebulba_ppo_envpool.py --actor-device-ids 0 --learner-device-ids 1 2 3 4
|
|
| 82 |
'target_kl': None,
|
| 83 |
'test_actor_learner_throughput': False,
|
| 84 |
'torch_deterministic': True,
|
| 85 |
-
'total_timesteps':
|
| 86 |
'track': True,
|
| 87 |
'update_epochs': 4,
|
| 88 |
'upload_model': True,
|
|
|
|
| 16 |
type: Pong-v5
|
| 17 |
metrics:
|
| 18 |
- type: mean_reward
|
| 19 |
+
value: 17.90 +/- 1.97
|
| 20 |
name: mean_reward
|
| 21 |
verified: false
|
| 22 |
---
|
|
|
|
| 46 |
curl -OL https://huggingface.co/cleanrl/Pong-v5-sebulba_ppo_envpool-seed1/raw/main/pyproject.toml
|
| 47 |
curl -OL https://huggingface.co/cleanrl/Pong-v5-sebulba_ppo_envpool-seed1/raw/main/poetry.lock
|
| 48 |
poetry install --all-extras
|
| 49 |
+
python sebulba_ppo_envpool.py --actor-device-ids 0 --learner-device-ids 1 2 3 4 --params-queue-timeout 0.02 --track --save-model --upload-model --hf-entity cleanrl --env-id Pong-v5 --seed 1
|
| 50 |
```
|
| 51 |
|
| 52 |
# Hyperparameters
|
|
|
|
| 74 |
'num_envs': 64,
|
| 75 |
'num_minibatches': 4,
|
| 76 |
'num_steps': 128,
|
| 77 |
+
'num_updates': 6103,
|
| 78 |
'params_queue_timeout': 0.02,
|
| 79 |
'profile': False,
|
| 80 |
'save_model': True,
|
|
|
|
| 82 |
'target_kl': None,
|
| 83 |
'test_actor_learner_throughput': False,
|
| 84 |
'torch_deterministic': True,
|
| 85 |
+
'total_timesteps': 50000000,
|
| 86 |
'track': True,
|
| 87 |
'update_epochs': 4,
|
| 88 |
'upload_model': True,
|
events.out.tfevents.1675614244.ip-26-0-128-121.2245265.0 → events.out.tfevents.1675616236.ip-26-0-142-109.1351408.0
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0ccfb749271751f36fb56879919b9ff0978aab3d403b28b67b8c575302c29f4a
|
| 3 |
+
size 9358611
|
replay.mp4
CHANGED
|
Binary files a/replay.mp4 and b/replay.mp4 differ
|
|
|
sebulba_ppo_envpool.cleanrl_model
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ce9a36ebe3d00a58f91122dc6e4cafc62de05a884ad7a03513d93cb9b6c9846b
|
| 3 |
+
size 4378338
|
sebulba_ppo_envpool.py
CHANGED
|
@@ -749,7 +749,6 @@ if __name__ == "__main__":
|
|
| 749 |
),
|
| 750 |
),
|
| 751 |
)
|
| 752 |
-
print(devices)
|
| 753 |
learner_devices = [devices[d_id] for d_id in args.learner_device_ids]
|
| 754 |
actor_devices = [devices[d_id] for d_id in args.actor_device_ids]
|
| 755 |
agent_state = flax.jax_utils.replicate(agent_state, devices=learner_devices)
|
|
@@ -876,7 +875,15 @@ if __name__ == "__main__":
|
|
| 876 |
|
| 877 |
repo_name = f"{args.env_id}-{args.exp_name}-seed{args.seed}"
|
| 878 |
repo_id = f"{args.hf_entity}/{repo_name}" if args.hf_entity else repo_name
|
| 879 |
-
push_to_hub(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 880 |
|
| 881 |
envs.close()
|
| 882 |
writer.close()
|
|
|
|
| 749 |
),
|
| 750 |
),
|
| 751 |
)
|
|
|
|
| 752 |
learner_devices = [devices[d_id] for d_id in args.learner_device_ids]
|
| 753 |
actor_devices = [devices[d_id] for d_id in args.actor_device_ids]
|
| 754 |
agent_state = flax.jax_utils.replicate(agent_state, devices=learner_devices)
|
|
|
|
| 875 |
|
| 876 |
repo_name = f"{args.env_id}-{args.exp_name}-seed{args.seed}"
|
| 877 |
repo_id = f"{args.hf_entity}/{repo_name}" if args.hf_entity else repo_name
|
| 878 |
+
push_to_hub(
|
| 879 |
+
args,
|
| 880 |
+
episodic_returns,
|
| 881 |
+
repo_id,
|
| 882 |
+
"PPO",
|
| 883 |
+
f"runs/{run_name}",
|
| 884 |
+
f"videos/{run_name}-eval",
|
| 885 |
+
extra_dependencies=["jax", "envpool", "atari"],
|
| 886 |
+
)
|
| 887 |
|
| 888 |
envs.close()
|
| 889 |
writer.close()
|
videos/Pong-v5__sebulba_ppo_envpool__1__074d5429-3b5b-459e-9b88-8db711fce3f2-eval/0.mp4
ADDED
|
Binary file (161 kB). View file
|
|
|
videos/Pong-v5__sebulba_ppo_envpool__1__859fb3f0-0595-46e5-afd6-dda7445846f6-eval/0.mp4
DELETED
|
Binary file (42.2 kB)
|
|
|