jacksonhack commited on
Commit
1d8092a
·
verified ·
1 Parent(s): 4847bbd

pushing model

Browse files
README.md ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - Hopper-v5
4
+ - deep-reinforcement-learning
5
+ - reinforcement-learning
6
+ - custom-implementation
7
+ library_name: cleanrl
8
+ model-index:
9
+ - name: DDPG
10
+ results:
11
+ - task:
12
+ type: reinforcement-learning
13
+ name: reinforcement-learning
14
+ dataset:
15
+ name: Hopper-v5
16
+ type: Hopper-v5
17
+ metrics:
18
+ - type: mean_reward
19
+ value: 521.49 +/- 197.82
20
+ name: mean_reward
21
+ verified: false
22
+ ---
23
+
24
+ # (CleanRL) **DDPG** Agent Playing **Hopper-v5**
25
+
26
+ This is a trained model of a DDPG agent playing Hopper-v5.
27
+ The model was trained by using [CleanRL](https://github.com/vwxyzjn/cleanrl) and the most up-to-date training code can be
28
+ found [here](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ddpg_eval.py).
29
+
30
+ ## Get Started
31
+
32
+ To use this model, please install the `cleanrl` package with the following command:
33
+
34
+ ```
35
+ pip install "cleanrl[ddpg_eval]"
36
+ python -m cleanrl_utils.enjoy --exp-name ddpg_eval --env-id Hopper-v5
37
+ ```
38
+
39
+ Please refer to the [documentation](https://docs.cleanrl.dev/get-started/zoo/) for more detail.
40
+
41
+
42
+ ## Command to reproduce the training
43
+
44
+ ```bash
45
+ curl -OL https://huggingface.co/jacksonhack/Hopper-v5-ddpg_eval-seed1/raw/main/ddpg_eval.py
46
+ curl -OL https://huggingface.co/jacksonhack/Hopper-v5-ddpg_eval-seed1/raw/main/pyproject.toml
47
+ curl -OL https://huggingface.co/jacksonhack/Hopper-v5-ddpg_eval-seed1/raw/main/poetry.lock
48
+ poetry install --all-extras
49
+ python ddpg_eval.py
50
+ ```
51
+
52
+ # Hyperparameters
53
+ ```python
54
+ {'batch_size': 256,
55
+ 'buffer_size': 1000000,
56
+ 'capture_video': False,
57
+ 'cuda': True,
58
+ 'env_id': 'Hopper-v5',
59
+ 'exp_name': 'ddpg_eval',
60
+ 'exploration_noise': 0.1,
61
+ 'gamma': 0.99,
62
+ 'hf_entity': 'jacksonhack',
63
+ 'learning_rate': 0.0003,
64
+ 'learning_starts': 25000.0,
65
+ 'noise_clip': 0.5,
66
+ 'policy_frequency': 2,
67
+ 'save_model': False,
68
+ 'seed': 1,
69
+ 'tau': 0.005,
70
+ 'torch_deterministic': True,
71
+ 'total_timesteps': 1000000,
72
+ 'track': False,
73
+ 'upload_model': False,
74
+ 'wandb_entity': None,
75
+ 'wandb_project_name': 'cleanRL'}
76
+ ```
77
+
ddpg.cleanrl_model ADDED
Binary file (562 kB). View file
 
ddpg_eval.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Callable
2
+
3
+ import gymnasium as gym
4
+ import torch
5
+ import torch.nn as nn
6
+
7
+ from dataclasses import dataclass
8
+
9
+ import os
10
+ import tyro
11
+
12
+
13
+ @dataclass
14
+ class Args:
15
+ exp_name: str = os.path.basename(__file__)[: -len(".py")]
16
+ """the name of this experiment"""
17
+ seed: int = 1
18
+ """seed of the experiment"""
19
+ torch_deterministic: bool = True
20
+ """if toggled, `torch.backends.cudnn.deterministic=False`"""
21
+ cuda: bool = True
22
+ """if toggled, cuda will be enabled by default"""
23
+ track: bool = False
24
+ """if toggled, this experiment will be tracked with Weights and Biases"""
25
+ wandb_project_name: str = "cleanRL"
26
+ """the wandb's project name"""
27
+ wandb_entity: str = None
28
+ """the entity (team) of wandb's project"""
29
+ capture_video: bool = False
30
+ """whether to capture videos of the agent performances (check out `videos` folder)"""
31
+ save_model: bool = False
32
+ """whether to save model into the `runs/{run_name}` folder"""
33
+ upload_model: bool = False
34
+ """whether to upload the saved model to huggingface"""
35
+ hf_entity: str = "jacksonhack"
36
+ """the user or org name of the model repository from the Hugging Face Hub"""
37
+
38
+ # Algorithm specific arguments
39
+ env_id: str = "Hopper-v5"
40
+ """the environment id of the Atari game"""
41
+ total_timesteps: int = 1000000
42
+ """total timesteps of the experiments"""
43
+ learning_rate: float = 3e-4
44
+ """the learning rate of the optimizer"""
45
+ buffer_size: int = int(1e6)
46
+ """the replay memory buffer size"""
47
+ gamma: float = 0.99
48
+ """the discount factor gamma"""
49
+ tau: float = 0.005
50
+ """target smoothing coefficient (default: 0.005)"""
51
+ batch_size: int = 256
52
+ """the batch size of sample from the reply memory"""
53
+ exploration_noise: float = 0.1
54
+ """the scale of exploration noise"""
55
+ learning_starts: int = 25e3
56
+ """timestep to start learning"""
57
+ policy_frequency: int = 2
58
+ """the frequency of training policy (delayed)"""
59
+ noise_clip: float = 0.5
60
+ """noise clip parameter of the Target Policy Smoothing Regularization"""
61
+
62
+
63
+ def evaluate(
64
+ model_path: str,
65
+ make_env: Callable,
66
+ env_id: str,
67
+ eval_episodes: int,
68
+ run_name: str,
69
+ Model: nn.Module,
70
+ device: torch.device = torch.device("cpu"),
71
+ capture_video: bool = True,
72
+ exploration_noise: float = 0.1,
73
+ ):
74
+ envs = gym.vector.SyncVectorEnv([make_env(env_id, 0, 0, capture_video, run_name)])
75
+ actor = Model[0](envs).to(device)
76
+ qf = Model[1](envs).to(device)
77
+ actor_params, qf_params = torch.load(model_path, map_location=device)
78
+ actor.load_state_dict(actor_params)
79
+ actor.eval()
80
+ qf.load_state_dict(qf_params)
81
+ qf.eval()
82
+ # note: qf is not used in this script
83
+
84
+ obs, _ = envs.reset()
85
+ episodic_returns = []
86
+ while len(episodic_returns) < eval_episodes:
87
+ with torch.no_grad():
88
+ actions = actor(torch.Tensor(obs).to(device))
89
+ actions += torch.normal(0, actor.action_scale * exploration_noise)
90
+ actions = actions.cpu().numpy().clip(envs.single_action_space.low, envs.single_action_space.high)
91
+
92
+ next_obs, _, _, _, infos = envs.step(actions)
93
+
94
+ if "episode" in infos:
95
+ print(f"eval_episode={len(episodic_returns)}, episodic_return={infos['episode']['r'][infos['_episode']]}")
96
+ episodic_returns += [infos["episode"]["r"]]
97
+ # print(f"global_step={global_step}, episode_return={infos['episode']['r'][infos['_episode']][0]}")
98
+ # writer.add_scalar("charts/episodic_return", infos["episode"]["r"][infos["_episode"]][0], global_step)
99
+ # writer.add_scalar("charts/episodic_length", infos["episode"]["l"][infos["_episode"]][0], global_step)
100
+
101
+ # if "final_info" in infos:
102
+ # for info in infos["final_info"]:
103
+ # if "episode" not in info:
104
+ # continue
105
+ # print(f"eval_episode={len(episodic_returns)}, episodic_return={info['episode']['r']}")
106
+ # episodic_returns += [info["episode"]["r"]]
107
+
108
+ obs = next_obs
109
+
110
+ print(f"episodic_returns: {episodic_returns}")
111
+ return episodic_returns
112
+
113
+
114
+ if __name__ == "__main__":
115
+ # from huggingface_hub import hf_hub_download
116
+
117
+ from rl.ddpg import Actor, QNetwork, make_env
118
+
119
+ # model_path = hf_hub_download(
120
+ # repo_id="cleanrl/HalfCheetah-v4-ddpg_continuous_action-seed1", filename="ddpg_continuous_action.cleanrl_model"
121
+ # )
122
+ run_name = "Hopper-v5__ddpg__1__1732697106"
123
+ model_path = "runs/Hopper-v5__ddpg__1__1732697106/ddpg.cleanrl_model"
124
+ episodic_returns = evaluate(
125
+ model_path,
126
+ make_env,
127
+ "Hopper-v5",
128
+ eval_episodes=10,
129
+ run_name=f"{run_name}-eval",
130
+ Model=(Actor, QNetwork),
131
+ device="cpu",
132
+ capture_video=True,
133
+ )
134
+
135
+ from rl_utils.huggingface import push_to_hub
136
+
137
+ args = tyro.cli(Args)
138
+
139
+ repo_name = f"{args.env_id}-{args.exp_name}-seed{args.seed}"
140
+ repo_id = f"{args.hf_entity}/{repo_name}" if args.hf_entity else repo_name
141
+ push_to_hub(args, episodic_returns, repo_id, "DDPG", f"runs/{run_name}", f"videos/{run_name}-eval")
events.out.tfevents.1732697106.DESKTOP-3BC7099.18000.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e465d5a0a6fa6e0ba25c042077b5772bd45d4f8f0460b8e53e1bb2380257d118
3
+ size 2912083
poetry.lock ADDED
The diff for this file is too large to render. See raw diff
 
pyproject.toml ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [tool.poetry]
2
+ name = "rl"
3
+ version = "0.1.0"
4
+ description = ""
5
+ authors = ["jackson <1666825283@qq.com>"]
6
+ readme = "README.md"
7
+ packages = [
8
+ {include = "rl"},
9
+ {include = "rl_utils"},
10
+ ]
11
+
12
+
13
+ [tool.poetry.dependencies]
14
+ python = "^3.10"
15
+ gymnasium = {extras = ["box2d"], version = "^1.0.0"}
16
+ tensorboard = "^2.18.0"
17
+ huggingface-hub = "^0.26.2"
18
+ tyro = "^0.8.14"
19
+ torch = "^2.5.1"
20
+ stable-baselines3 = "^2.3.2"
21
+ numpy = "^1.21.6"
22
+ tenacity = "^9.0.0"
23
+ mujoco = "2.3.3"
24
+
25
+
26
+ [tool.poetry.group.dev.dependencies]
27
+ black = "^24.10.0"
28
+ wandb = "^0.18.7"
29
+ moviepy = "^2.1.1"
30
+
31
+ [build-system]
32
+ requires = ["poetry-core"]
33
+ build-backend = "poetry.core.masonry.api"
replay.mp4 ADDED
Binary file (166 kB). View file
 
videos/Hopper-v5__ddpg__1__1732697106-eval/rl-video-episode-0.mp4 ADDED
Binary file (179 kB). View file
 
videos/Hopper-v5__ddpg__1__1732697106-eval/rl-video-episode-1.mp4 ADDED
Binary file (204 kB). View file
 
videos/Hopper-v5__ddpg__1__1732697106-eval/rl-video-episode-8.mp4 ADDED
Binary file (166 kB). View file