File size: 5,118 Bytes
1d8092a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 |
from typing import Callable
import gymnasium as gym
import torch
import torch.nn as nn
from dataclasses import dataclass
import os
import tyro
@dataclass
class Args:
exp_name: str = os.path.basename(__file__)[: -len(".py")]
"""the name of this experiment"""
seed: int = 1
"""seed of the experiment"""
torch_deterministic: bool = True
"""if toggled, `torch.backends.cudnn.deterministic=False`"""
cuda: bool = True
"""if toggled, cuda will be enabled by default"""
track: bool = False
"""if toggled, this experiment will be tracked with Weights and Biases"""
wandb_project_name: str = "cleanRL"
"""the wandb's project name"""
wandb_entity: str = None
"""the entity (team) of wandb's project"""
capture_video: bool = False
"""whether to capture videos of the agent performances (check out `videos` folder)"""
save_model: bool = False
"""whether to save model into the `runs/{run_name}` folder"""
upload_model: bool = False
"""whether to upload the saved model to huggingface"""
hf_entity: str = "jacksonhack"
"""the user or org name of the model repository from the Hugging Face Hub"""
# Algorithm specific arguments
env_id: str = "Hopper-v5"
"""the environment id of the Atari game"""
total_timesteps: int = 1000000
"""total timesteps of the experiments"""
learning_rate: float = 3e-4
"""the learning rate of the optimizer"""
buffer_size: int = int(1e6)
"""the replay memory buffer size"""
gamma: float = 0.99
"""the discount factor gamma"""
tau: float = 0.005
"""target smoothing coefficient (default: 0.005)"""
batch_size: int = 256
"""the batch size of sample from the reply memory"""
exploration_noise: float = 0.1
"""the scale of exploration noise"""
learning_starts: int = 25e3
"""timestep to start learning"""
policy_frequency: int = 2
"""the frequency of training policy (delayed)"""
noise_clip: float = 0.5
"""noise clip parameter of the Target Policy Smoothing Regularization"""
def evaluate(
model_path: str,
make_env: Callable,
env_id: str,
eval_episodes: int,
run_name: str,
Model: nn.Module,
device: torch.device = torch.device("cpu"),
capture_video: bool = True,
exploration_noise: float = 0.1,
):
envs = gym.vector.SyncVectorEnv([make_env(env_id, 0, 0, capture_video, run_name)])
actor = Model[0](envs).to(device)
qf = Model[1](envs).to(device)
actor_params, qf_params = torch.load(model_path, map_location=device)
actor.load_state_dict(actor_params)
actor.eval()
qf.load_state_dict(qf_params)
qf.eval()
# note: qf is not used in this script
obs, _ = envs.reset()
episodic_returns = []
while len(episodic_returns) < eval_episodes:
with torch.no_grad():
actions = actor(torch.Tensor(obs).to(device))
actions += torch.normal(0, actor.action_scale * exploration_noise)
actions = actions.cpu().numpy().clip(envs.single_action_space.low, envs.single_action_space.high)
next_obs, _, _, _, infos = envs.step(actions)
if "episode" in infos:
print(f"eval_episode={len(episodic_returns)}, episodic_return={infos['episode']['r'][infos['_episode']]}")
episodic_returns += [infos["episode"]["r"]]
# print(f"global_step={global_step}, episode_return={infos['episode']['r'][infos['_episode']][0]}")
# writer.add_scalar("charts/episodic_return", infos["episode"]["r"][infos["_episode"]][0], global_step)
# writer.add_scalar("charts/episodic_length", infos["episode"]["l"][infos["_episode"]][0], global_step)
# if "final_info" in infos:
# for info in infos["final_info"]:
# if "episode" not in info:
# continue
# print(f"eval_episode={len(episodic_returns)}, episodic_return={info['episode']['r']}")
# episodic_returns += [info["episode"]["r"]]
obs = next_obs
print(f"episodic_returns: {episodic_returns}")
return episodic_returns
if __name__ == "__main__":
# from huggingface_hub import hf_hub_download
from rl.ddpg import Actor, QNetwork, make_env
# model_path = hf_hub_download(
# repo_id="cleanrl/HalfCheetah-v4-ddpg_continuous_action-seed1", filename="ddpg_continuous_action.cleanrl_model"
# )
run_name = "Hopper-v5__ddpg__1__1732697106"
model_path = "runs/Hopper-v5__ddpg__1__1732697106/ddpg.cleanrl_model"
episodic_returns = evaluate(
model_path,
make_env,
"Hopper-v5",
eval_episodes=10,
run_name=f"{run_name}-eval",
Model=(Actor, QNetwork),
device="cpu",
capture_video=True,
)
from rl_utils.huggingface import push_to_hub
args = tyro.cli(Args)
repo_name = f"{args.env_id}-{args.exp_name}-seed{args.seed}"
repo_id = f"{args.hf_entity}/{repo_name}" if args.hf_entity else repo_name
push_to_hub(args, episodic_returns, repo_id, "DDPG", f"runs/{run_name}", f"videos/{run_name}-eval")
|