pushing model

Browse files

Files changed (10) hide show

README.md +77 -0
ddpg.cleanrl_model +0 -0
ddpg_eval.py +141 -0
events.out.tfevents.1732697106.DESKTOP-3BC7099.18000.0 +3 -0
poetry.lock +0 -0
pyproject.toml +33 -0
replay.mp4 +0 -0
videos/Hopper-v5__ddpg__1__1732697106-eval/rl-video-episode-0.mp4 +0 -0
videos/Hopper-v5__ddpg__1__1732697106-eval/rl-video-episode-1.mp4 +0 -0
videos/Hopper-v5__ddpg__1__1732697106-eval/rl-video-episode-8.mp4 +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,77 @@

+---
+tags:
+- Hopper-v5
+- deep-reinforcement-learning
+- reinforcement-learning
+- custom-implementation
+library_name: cleanrl
+model-index:
+- name: DDPG
+  results:
+  - task:
+      type: reinforcement-learning
+      name: reinforcement-learning
+    dataset:
+      name: Hopper-v5
+      type: Hopper-v5
+    metrics:
+    - type: mean_reward
+      value: 521.49 +/- 197.82
+      name: mean_reward
+      verified: false
+---
+# (CleanRL) **DDPG** Agent Playing **Hopper-v5**
+This is a trained model of a DDPG agent playing Hopper-v5.
+The model was trained by using [CleanRL](https://github.com/vwxyzjn/cleanrl) and the most up-to-date training code can be
+found [here](https://github.com/vwxyzjn/cleanrl/blob/master/cleanrl/ddpg_eval.py).
+## Get Started
+To use this model, please install the `cleanrl` package with the following command:
+```
+pip install "cleanrl[ddpg_eval]"
+python -m cleanrl_utils.enjoy --exp-name ddpg_eval --env-id Hopper-v5
+```
+Please refer to the [documentation](https://docs.cleanrl.dev/get-started/zoo/) for more detail.
+## Command to reproduce the training
+```bash
+curl -OL https://huggingface.co/jacksonhack/Hopper-v5-ddpg_eval-seed1/raw/main/ddpg_eval.py
+curl -OL https://huggingface.co/jacksonhack/Hopper-v5-ddpg_eval-seed1/raw/main/pyproject.toml
+curl -OL https://huggingface.co/jacksonhack/Hopper-v5-ddpg_eval-seed1/raw/main/poetry.lock
+poetry install --all-extras
+python ddpg_eval.py
+```
+# Hyperparameters
+```python
+{'batch_size': 256,
+ 'buffer_size': 1000000,
+ 'capture_video': False,
+ 'cuda': True,
+ 'env_id': 'Hopper-v5',
+ 'exp_name': 'ddpg_eval',
+ 'exploration_noise': 0.1,
+ 'gamma': 0.99,
+ 'hf_entity': 'jacksonhack',
+ 'learning_rate': 0.0003,
+ 'learning_starts': 25000.0,
+ 'noise_clip': 0.5,
+ 'policy_frequency': 2,
+ 'save_model': False,
+ 'seed': 1,
+ 'tau': 0.005,
+ 'torch_deterministic': True,
+ 'total_timesteps': 1000000,
+ 'track': False,
+ 'upload_model': False,
+ 'wandb_entity': None,
+ 'wandb_project_name': 'cleanRL'}
+```

ddpg.cleanrl_model ADDED Viewed

Binary file (562 kB). View file

ddpg_eval.py ADDED Viewed

	@@ -0,0 +1,141 @@

+from typing import Callable
+import gymnasium as gym
+import torch
+import torch.nn as nn
+from dataclasses import dataclass
+import os
+import tyro
+@dataclass
+class Args:
+    exp_name: str = os.path.basename(__file__)[: -len(".py")]
+    """the name of this experiment"""
+    seed: int = 1
+    """seed of the experiment"""
+    torch_deterministic: bool = True
+    """if toggled, `torch.backends.cudnn.deterministic=False`"""
+    cuda: bool = True
+    """if toggled, cuda will be enabled by default"""
+    track: bool = False
+    """if toggled, this experiment will be tracked with Weights and Biases"""
+    wandb_project_name: str = "cleanRL"
+    """the wandb's project name"""
+    wandb_entity: str = None
+    """the entity (team) of wandb's project"""
+    capture_video: bool = False
+    """whether to capture videos of the agent performances (check out `videos` folder)"""
+    save_model: bool = False
+    """whether to save model into the `runs/{run_name}` folder"""
+    upload_model: bool = False
+    """whether to upload the saved model to huggingface"""
+    hf_entity: str = "jacksonhack"
+    """the user or org name of the model repository from the Hugging Face Hub"""
+    # Algorithm specific arguments
+    env_id: str = "Hopper-v5"
+    """the environment id of the Atari game"""
+    total_timesteps: int = 1000000
+    """total timesteps of the experiments"""
+    learning_rate: float = 3e-4
+    """the learning rate of the optimizer"""
+    buffer_size: int = int(1e6)
+    """the replay memory buffer size"""
+    gamma: float = 0.99
+    """the discount factor gamma"""
+    tau: float = 0.005
+    """target smoothing coefficient (default: 0.005)"""
+    batch_size: int = 256
+    """the batch size of sample from the reply memory"""
+    exploration_noise: float = 0.1
+    """the scale of exploration noise"""
+    learning_starts: int = 25e3
+    """timestep to start learning"""
+    policy_frequency: int = 2
+    """the frequency of training policy (delayed)"""
+    noise_clip: float = 0.5
+    """noise clip parameter of the Target Policy Smoothing Regularization"""
+def evaluate(
+    model_path: str,
+    make_env: Callable,
+    env_id: str,
+    eval_episodes: int,
+    run_name: str,
+    Model: nn.Module,
+    device: torch.device = torch.device("cpu"),
+    capture_video: bool = True,
+    exploration_noise: float = 0.1,
+):
+    envs = gym.vector.SyncVectorEnv([make_env(env_id, 0, 0, capture_video, run_name)])
+    actor = Model[0](envs).to(device)
+    qf = Model[1](envs).to(device)
+    actor_params, qf_params = torch.load(model_path, map_location=device)
+    actor.load_state_dict(actor_params)
+    actor.eval()
+    qf.load_state_dict(qf_params)
+    qf.eval()
+    # note: qf is not used in this script
+    obs, _ = envs.reset()
+    episodic_returns = []
+    while len(episodic_returns) < eval_episodes:
+        with torch.no_grad():
+            actions = actor(torch.Tensor(obs).to(device))
+            actions += torch.normal(0, actor.action_scale * exploration_noise)
+            actions = actions.cpu().numpy().clip(envs.single_action_space.low, envs.single_action_space.high)
+        next_obs, _, _, _, infos = envs.step(actions)
+        if "episode" in infos:
+            print(f"eval_episode={len(episodic_returns)}, episodic_return={infos['episode']['r'][infos['_episode']]}")
+            episodic_returns += [infos["episode"]["r"]]
+            # print(f"global_step={global_step}, episode_return={infos['episode']['r'][infos['_episode']][0]}")
+            # writer.add_scalar("charts/episodic_return", infos["episode"]["r"][infos["_episode"]][0], global_step)
+            # writer.add_scalar("charts/episodic_length", infos["episode"]["l"][infos["_episode"]][0], global_step)
+        # if "final_info" in infos:
+        #     for info in infos["final_info"]:
+        #         if "episode" not in info:
+        #             continue
+        #         print(f"eval_episode={len(episodic_returns)}, episodic_return={info['episode']['r']}")
+        #         episodic_returns += [info["episode"]["r"]]
+        obs = next_obs
+    print(f"episodic_returns: {episodic_returns}")
+    return episodic_returns
+if __name__ == "__main__":
+    # from huggingface_hub import hf_hub_download
+    from rl.ddpg import Actor, QNetwork, make_env
+    # model_path = hf_hub_download(
+    #     repo_id="cleanrl/HalfCheetah-v4-ddpg_continuous_action-seed1", filename="ddpg_continuous_action.cleanrl_model"
+    # )
+    run_name = "Hopper-v5__ddpg__1__1732697106"
+    model_path = "runs/Hopper-v5__ddpg__1__1732697106/ddpg.cleanrl_model"
+    episodic_returns = evaluate(
+        model_path,
+        make_env,
+        "Hopper-v5",
+        eval_episodes=10,
+        run_name=f"{run_name}-eval",
+        Model=(Actor, QNetwork),
+        device="cpu",
+        capture_video=True,
+    )
+    from rl_utils.huggingface import push_to_hub
+    args = tyro.cli(Args)
+    repo_name = f"{args.env_id}-{args.exp_name}-seed{args.seed}"
+    repo_id = f"{args.hf_entity}/{repo_name}" if args.hf_entity else repo_name
+    push_to_hub(args, episodic_returns, repo_id, "DDPG", f"runs/{run_name}", f"videos/{run_name}-eval")

events.out.tfevents.1732697106.DESKTOP-3BC7099.18000.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e465d5a0a6fa6e0ba25c042077b5772bd45d4f8f0460b8e53e1bb2380257d118
+size 2912083

poetry.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

pyproject.toml ADDED Viewed

	@@ -0,0 +1,33 @@

+[tool.poetry]
+name = "rl"
+version = "0.1.0"
+description = ""
+authors = ["jackson <1666825283@qq.com>"]
+readme = "README.md"
+packages = [
+    {include = "rl"},
+    {include = "rl_utils"},
+]
+[tool.poetry.dependencies]
+python = "^3.10"
+gymnasium = {extras = ["box2d"], version = "^1.0.0"}
+tensorboard = "^2.18.0"
+huggingface-hub = "^0.26.2"
+tyro = "^0.8.14"
+torch = "^2.5.1"
+stable-baselines3 = "^2.3.2"
+numpy = "^1.21.6"
+tenacity = "^9.0.0"
+mujoco = "2.3.3"
+[tool.poetry.group.dev.dependencies]
+black = "^24.10.0"
+wandb = "^0.18.7"
+moviepy = "^2.1.1"
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"

replay.mp4 ADDED Viewed

Binary file (166 kB). View file

videos/Hopper-v5__ddpg__1__1732697106-eval/rl-video-episode-0.mp4 ADDED Viewed

Binary file (179 kB). View file

videos/Hopper-v5__ddpg__1__1732697106-eval/rl-video-episode-1.mp4 ADDED Viewed

Binary file (204 kB). View file

videos/Hopper-v5__ddpg__1__1732697106-eval/rl-video-episode-8.mp4 ADDED Viewed

Binary file (166 kB). View file