pushing model
Browse files- README.md +77 -0
- ddpg.cleanrl_model +0 -0
- ddpg_eval.py +141 -0
- events.out.tfevents.1732697106.DESKTOP-3BC7099.18000.0 +3 -0
- poetry.lock +0 -0
- pyproject.toml +33 -0
- replay.mp4 +0 -0
- videos/Hopper-v5__ddpg__1__1732697106-eval/rl-video-episode-0.mp4 +0 -0
- videos/Hopper-v5__ddpg__1__1732697106-eval/rl-video-episode-1.mp4 +0 -0
- videos/Hopper-v5__ddpg__1__1732697106-eval/rl-video-episode-8.mp4 +0 -0
README.md
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
tags:
|
| 3 |
+
- Hopper-v5
|
| 4 |
+
- deep-reinforcement-learning
|
| 5 |
+
- reinforcement-learning
|
| 6 |
+
- custom-implementation
|
| 7 |
+
library_name: cleanrl
|
| 8 |
+
model-index:
|
| 9 |
+
- name: DDPG
|
| 10 |
+
results:
|
| 11 |
+
- task:
|
| 12 |
+
type: reinforcement-learning
|
| 13 |
+
name: reinforcement-learning
|
| 14 |
+
dataset:
|
| 15 |
+
name: Hopper-v5
|
| 16 |
+
type: Hopper-v5
|
| 17 |
+
metrics:
|
| 18 |
+
- type: mean_reward
|
| 19 |
+
value: 521.49 +/- 197.82
|
| 20 |
+
name: mean_reward
|
| 21 |
+
verified: false
|
| 22 |
+
---
|
| 23 |
+
|
| 24 |
+
# (CleanRL) **DDPG** Agent Playing **Hopper-v5**
|
| 25 |
+
|
| 26 |
+
This is a trained model of a DDPG agent playing Hopper-v5.
|
| 27 |
+
The model was trained by using [CleanRL](https://github.com/vwxyzjn/cleanrl) and the most up-to-date training code can be
|
| 28 |
+
found [here](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ddpg_eval.py).
|
| 29 |
+
|
| 30 |
+
## Get Started
|
| 31 |
+
|
| 32 |
+
To use this model, please install the `cleanrl` package with the following command:
|
| 33 |
+
|
| 34 |
+
```
|
| 35 |
+
pip install "cleanrl[ddpg_eval]"
|
| 36 |
+
python -m cleanrl_utils.enjoy --exp-name ddpg_eval --env-id Hopper-v5
|
| 37 |
+
```
|
| 38 |
+
|
| 39 |
+
Please refer to the [documentation](https://docs.cleanrl.dev/get-started/zoo/) for more detail.
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
## Command to reproduce the training
|
| 43 |
+
|
| 44 |
+
```bash
|
| 45 |
+
curl -OL https://huggingface.co/jacksonhack/Hopper-v5-ddpg_eval-seed1/raw/main/ddpg_eval.py
|
| 46 |
+
curl -OL https://huggingface.co/jacksonhack/Hopper-v5-ddpg_eval-seed1/raw/main/pyproject.toml
|
| 47 |
+
curl -OL https://huggingface.co/jacksonhack/Hopper-v5-ddpg_eval-seed1/raw/main/poetry.lock
|
| 48 |
+
poetry install --all-extras
|
| 49 |
+
python ddpg_eval.py
|
| 50 |
+
```
|
| 51 |
+
|
| 52 |
+
# Hyperparameters
|
| 53 |
+
```python
|
| 54 |
+
{'batch_size': 256,
|
| 55 |
+
'buffer_size': 1000000,
|
| 56 |
+
'capture_video': False,
|
| 57 |
+
'cuda': True,
|
| 58 |
+
'env_id': 'Hopper-v5',
|
| 59 |
+
'exp_name': 'ddpg_eval',
|
| 60 |
+
'exploration_noise': 0.1,
|
| 61 |
+
'gamma': 0.99,
|
| 62 |
+
'hf_entity': 'jacksonhack',
|
| 63 |
+
'learning_rate': 0.0003,
|
| 64 |
+
'learning_starts': 25000.0,
|
| 65 |
+
'noise_clip': 0.5,
|
| 66 |
+
'policy_frequency': 2,
|
| 67 |
+
'save_model': False,
|
| 68 |
+
'seed': 1,
|
| 69 |
+
'tau': 0.005,
|
| 70 |
+
'torch_deterministic': True,
|
| 71 |
+
'total_timesteps': 1000000,
|
| 72 |
+
'track': False,
|
| 73 |
+
'upload_model': False,
|
| 74 |
+
'wandb_entity': None,
|
| 75 |
+
'wandb_project_name': 'cleanRL'}
|
| 76 |
+
```
|
| 77 |
+
|
ddpg.cleanrl_model
ADDED
|
Binary file (562 kB). View file
|
|
|
ddpg_eval.py
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Callable
|
| 2 |
+
|
| 3 |
+
import gymnasium as gym
|
| 4 |
+
import torch
|
| 5 |
+
import torch.nn as nn
|
| 6 |
+
|
| 7 |
+
from dataclasses import dataclass
|
| 8 |
+
|
| 9 |
+
import os
|
| 10 |
+
import tyro
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
@dataclass
|
| 14 |
+
class Args:
|
| 15 |
+
exp_name: str = os.path.basename(__file__)[: -len(".py")]
|
| 16 |
+
"""the name of this experiment"""
|
| 17 |
+
seed: int = 1
|
| 18 |
+
"""seed of the experiment"""
|
| 19 |
+
torch_deterministic: bool = True
|
| 20 |
+
"""if toggled, `torch.backends.cudnn.deterministic=False`"""
|
| 21 |
+
cuda: bool = True
|
| 22 |
+
"""if toggled, cuda will be enabled by default"""
|
| 23 |
+
track: bool = False
|
| 24 |
+
"""if toggled, this experiment will be tracked with Weights and Biases"""
|
| 25 |
+
wandb_project_name: str = "cleanRL"
|
| 26 |
+
"""the wandb's project name"""
|
| 27 |
+
wandb_entity: str = None
|
| 28 |
+
"""the entity (team) of wandb's project"""
|
| 29 |
+
capture_video: bool = False
|
| 30 |
+
"""whether to capture videos of the agent performances (check out `videos` folder)"""
|
| 31 |
+
save_model: bool = False
|
| 32 |
+
"""whether to save model into the `runs/{run_name}` folder"""
|
| 33 |
+
upload_model: bool = False
|
| 34 |
+
"""whether to upload the saved model to huggingface"""
|
| 35 |
+
hf_entity: str = "jacksonhack"
|
| 36 |
+
"""the user or org name of the model repository from the Hugging Face Hub"""
|
| 37 |
+
|
| 38 |
+
# Algorithm specific arguments
|
| 39 |
+
env_id: str = "Hopper-v5"
|
| 40 |
+
"""the environment id of the Atari game"""
|
| 41 |
+
total_timesteps: int = 1000000
|
| 42 |
+
"""total timesteps of the experiments"""
|
| 43 |
+
learning_rate: float = 3e-4
|
| 44 |
+
"""the learning rate of the optimizer"""
|
| 45 |
+
buffer_size: int = int(1e6)
|
| 46 |
+
"""the replay memory buffer size"""
|
| 47 |
+
gamma: float = 0.99
|
| 48 |
+
"""the discount factor gamma"""
|
| 49 |
+
tau: float = 0.005
|
| 50 |
+
"""target smoothing coefficient (default: 0.005)"""
|
| 51 |
+
batch_size: int = 256
|
| 52 |
+
"""the batch size of sample from the reply memory"""
|
| 53 |
+
exploration_noise: float = 0.1
|
| 54 |
+
"""the scale of exploration noise"""
|
| 55 |
+
learning_starts: int = 25e3
|
| 56 |
+
"""timestep to start learning"""
|
| 57 |
+
policy_frequency: int = 2
|
| 58 |
+
"""the frequency of training policy (delayed)"""
|
| 59 |
+
noise_clip: float = 0.5
|
| 60 |
+
"""noise clip parameter of the Target Policy Smoothing Regularization"""
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def evaluate(
|
| 64 |
+
model_path: str,
|
| 65 |
+
make_env: Callable,
|
| 66 |
+
env_id: str,
|
| 67 |
+
eval_episodes: int,
|
| 68 |
+
run_name: str,
|
| 69 |
+
Model: nn.Module,
|
| 70 |
+
device: torch.device = torch.device("cpu"),
|
| 71 |
+
capture_video: bool = True,
|
| 72 |
+
exploration_noise: float = 0.1,
|
| 73 |
+
):
|
| 74 |
+
envs = gym.vector.SyncVectorEnv([make_env(env_id, 0, 0, capture_video, run_name)])
|
| 75 |
+
actor = Model[0](envs).to(device)
|
| 76 |
+
qf = Model[1](envs).to(device)
|
| 77 |
+
actor_params, qf_params = torch.load(model_path, map_location=device)
|
| 78 |
+
actor.load_state_dict(actor_params)
|
| 79 |
+
actor.eval()
|
| 80 |
+
qf.load_state_dict(qf_params)
|
| 81 |
+
qf.eval()
|
| 82 |
+
# note: qf is not used in this script
|
| 83 |
+
|
| 84 |
+
obs, _ = envs.reset()
|
| 85 |
+
episodic_returns = []
|
| 86 |
+
while len(episodic_returns) < eval_episodes:
|
| 87 |
+
with torch.no_grad():
|
| 88 |
+
actions = actor(torch.Tensor(obs).to(device))
|
| 89 |
+
actions += torch.normal(0, actor.action_scale * exploration_noise)
|
| 90 |
+
actions = actions.cpu().numpy().clip(envs.single_action_space.low, envs.single_action_space.high)
|
| 91 |
+
|
| 92 |
+
next_obs, _, _, _, infos = envs.step(actions)
|
| 93 |
+
|
| 94 |
+
if "episode" in infos:
|
| 95 |
+
print(f"eval_episode={len(episodic_returns)}, episodic_return={infos['episode']['r'][infos['_episode']]}")
|
| 96 |
+
episodic_returns += [infos["episode"]["r"]]
|
| 97 |
+
# print(f"global_step={global_step}, episode_return={infos['episode']['r'][infos['_episode']][0]}")
|
| 98 |
+
# writer.add_scalar("charts/episodic_return", infos["episode"]["r"][infos["_episode"]][0], global_step)
|
| 99 |
+
# writer.add_scalar("charts/episodic_length", infos["episode"]["l"][infos["_episode"]][0], global_step)
|
| 100 |
+
|
| 101 |
+
# if "final_info" in infos:
|
| 102 |
+
# for info in infos["final_info"]:
|
| 103 |
+
# if "episode" not in info:
|
| 104 |
+
# continue
|
| 105 |
+
# print(f"eval_episode={len(episodic_returns)}, episodic_return={info['episode']['r']}")
|
| 106 |
+
# episodic_returns += [info["episode"]["r"]]
|
| 107 |
+
|
| 108 |
+
obs = next_obs
|
| 109 |
+
|
| 110 |
+
print(f"episodic_returns: {episodic_returns}")
|
| 111 |
+
return episodic_returns
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
if __name__ == "__main__":
|
| 115 |
+
# from huggingface_hub import hf_hub_download
|
| 116 |
+
|
| 117 |
+
from rl.ddpg import Actor, QNetwork, make_env
|
| 118 |
+
|
| 119 |
+
# model_path = hf_hub_download(
|
| 120 |
+
# repo_id="cleanrl/HalfCheetah-v4-ddpg_continuous_action-seed1", filename="ddpg_continuous_action.cleanrl_model"
|
| 121 |
+
# )
|
| 122 |
+
run_name = "Hopper-v5__ddpg__1__1732697106"
|
| 123 |
+
model_path = "runs/Hopper-v5__ddpg__1__1732697106/ddpg.cleanrl_model"
|
| 124 |
+
episodic_returns = evaluate(
|
| 125 |
+
model_path,
|
| 126 |
+
make_env,
|
| 127 |
+
"Hopper-v5",
|
| 128 |
+
eval_episodes=10,
|
| 129 |
+
run_name=f"{run_name}-eval",
|
| 130 |
+
Model=(Actor, QNetwork),
|
| 131 |
+
device="cpu",
|
| 132 |
+
capture_video=True,
|
| 133 |
+
)
|
| 134 |
+
|
| 135 |
+
from rl_utils.huggingface import push_to_hub
|
| 136 |
+
|
| 137 |
+
args = tyro.cli(Args)
|
| 138 |
+
|
| 139 |
+
repo_name = f"{args.env_id}-{args.exp_name}-seed{args.seed}"
|
| 140 |
+
repo_id = f"{args.hf_entity}/{repo_name}" if args.hf_entity else repo_name
|
| 141 |
+
push_to_hub(args, episodic_returns, repo_id, "DDPG", f"runs/{run_name}", f"videos/{run_name}-eval")
|
events.out.tfevents.1732697106.DESKTOP-3BC7099.18000.0
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e465d5a0a6fa6e0ba25c042077b5772bd45d4f8f0460b8e53e1bb2380257d118
|
| 3 |
+
size 2912083
|
poetry.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
pyproject.toml
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[tool.poetry]
|
| 2 |
+
name = "rl"
|
| 3 |
+
version = "0.1.0"
|
| 4 |
+
description = ""
|
| 5 |
+
authors = ["jackson <1666825283@qq.com>"]
|
| 6 |
+
readme = "README.md"
|
| 7 |
+
packages = [
|
| 8 |
+
{include = "rl"},
|
| 9 |
+
{include = "rl_utils"},
|
| 10 |
+
]
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
[tool.poetry.dependencies]
|
| 14 |
+
python = "^3.10"
|
| 15 |
+
gymnasium = {extras = ["box2d"], version = "^1.0.0"}
|
| 16 |
+
tensorboard = "^2.18.0"
|
| 17 |
+
huggingface-hub = "^0.26.2"
|
| 18 |
+
tyro = "^0.8.14"
|
| 19 |
+
torch = "^2.5.1"
|
| 20 |
+
stable-baselines3 = "^2.3.2"
|
| 21 |
+
numpy = "^1.21.6"
|
| 22 |
+
tenacity = "^9.0.0"
|
| 23 |
+
mujoco = "2.3.3"
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
[tool.poetry.group.dev.dependencies]
|
| 27 |
+
black = "^24.10.0"
|
| 28 |
+
wandb = "^0.18.7"
|
| 29 |
+
moviepy = "^2.1.1"
|
| 30 |
+
|
| 31 |
+
[build-system]
|
| 32 |
+
requires = ["poetry-core"]
|
| 33 |
+
build-backend = "poetry.core.masonry.api"
|
replay.mp4
ADDED
|
Binary file (166 kB). View file
|
|
|
videos/Hopper-v5__ddpg__1__1732697106-eval/rl-video-episode-0.mp4
ADDED
|
Binary file (179 kB). View file
|
|
|
videos/Hopper-v5__ddpg__1__1732697106-eval/rl-video-episode-1.mp4
ADDED
|
Binary file (204 kB). View file
|
|
|
videos/Hopper-v5__ddpg__1__1732697106-eval/rl-video-episode-8.mp4
ADDED
|
Binary file (166 kB). View file
|
|
|