|
|
from typing import Callable |
|
|
|
|
|
import gymnasium as gym |
|
|
import torch |
|
|
import torch.nn as nn |
|
|
|
|
|
from dataclasses import dataclass |
|
|
|
|
|
import os |
|
|
import tyro |
|
|
|
|
|
|
|
|
@dataclass |
|
|
class Args: |
|
|
exp_name: str = os.path.basename(__file__)[: -len(".py")] |
|
|
"""the name of this experiment""" |
|
|
seed: int = 1 |
|
|
"""seed of the experiment""" |
|
|
torch_deterministic: bool = True |
|
|
"""if toggled, `torch.backends.cudnn.deterministic=False`""" |
|
|
cuda: bool = True |
|
|
"""if toggled, cuda will be enabled by default""" |
|
|
track: bool = False |
|
|
"""if toggled, this experiment will be tracked with Weights and Biases""" |
|
|
wandb_project_name: str = "cleanRL" |
|
|
"""the wandb's project name""" |
|
|
wandb_entity: str = None |
|
|
"""the entity (team) of wandb's project""" |
|
|
capture_video: bool = False |
|
|
"""whether to capture videos of the agent performances (check out `videos` folder)""" |
|
|
save_model: bool = False |
|
|
"""whether to save model into the `runs/{run_name}` folder""" |
|
|
upload_model: bool = False |
|
|
"""whether to upload the saved model to huggingface""" |
|
|
hf_entity: str = "jacksonhack" |
|
|
"""the user or org name of the model repository from the Hugging Face Hub""" |
|
|
|
|
|
|
|
|
env_id: str = "Hopper-v5" |
|
|
"""the environment id of the Atari game""" |
|
|
total_timesteps: int = 1000000 |
|
|
"""total timesteps of the experiments""" |
|
|
learning_rate: float = 3e-4 |
|
|
"""the learning rate of the optimizer""" |
|
|
buffer_size: int = int(1e6) |
|
|
"""the replay memory buffer size""" |
|
|
gamma: float = 0.99 |
|
|
"""the discount factor gamma""" |
|
|
tau: float = 0.005 |
|
|
"""target smoothing coefficient (default: 0.005)""" |
|
|
batch_size: int = 256 |
|
|
"""the batch size of sample from the reply memory""" |
|
|
exploration_noise: float = 0.1 |
|
|
"""the scale of exploration noise""" |
|
|
learning_starts: int = 25e3 |
|
|
"""timestep to start learning""" |
|
|
policy_frequency: int = 2 |
|
|
"""the frequency of training policy (delayed)""" |
|
|
noise_clip: float = 0.5 |
|
|
"""noise clip parameter of the Target Policy Smoothing Regularization""" |
|
|
|
|
|
|
|
|
def evaluate( |
|
|
model_path: str, |
|
|
make_env: Callable, |
|
|
env_id: str, |
|
|
eval_episodes: int, |
|
|
run_name: str, |
|
|
Model: nn.Module, |
|
|
device: torch.device = torch.device("cpu"), |
|
|
capture_video: bool = True, |
|
|
exploration_noise: float = 0.1, |
|
|
): |
|
|
envs = gym.vector.SyncVectorEnv([make_env(env_id, 0, 0, capture_video, run_name)]) |
|
|
actor = Model[0](envs).to(device) |
|
|
qf = Model[1](envs).to(device) |
|
|
actor_params, qf_params = torch.load(model_path, map_location=device) |
|
|
actor.load_state_dict(actor_params) |
|
|
actor.eval() |
|
|
qf.load_state_dict(qf_params) |
|
|
qf.eval() |
|
|
|
|
|
|
|
|
obs, _ = envs.reset() |
|
|
episodic_returns = [] |
|
|
while len(episodic_returns) < eval_episodes: |
|
|
with torch.no_grad(): |
|
|
actions = actor(torch.Tensor(obs).to(device)) |
|
|
actions += torch.normal(0, actor.action_scale * exploration_noise) |
|
|
actions = actions.cpu().numpy().clip(envs.single_action_space.low, envs.single_action_space.high) |
|
|
|
|
|
next_obs, _, _, _, infos = envs.step(actions) |
|
|
|
|
|
if "episode" in infos: |
|
|
print(f"eval_episode={len(episodic_returns)}, episodic_return={infos['episode']['r'][infos['_episode']]}") |
|
|
episodic_returns += [infos["episode"]["r"]] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
obs = next_obs |
|
|
|
|
|
print(f"episodic_returns: {episodic_returns}") |
|
|
return episodic_returns |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
|
|
|
from rl.ddpg import Actor, QNetwork, make_env |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
run_name = "Hopper-v5__ddpg__1__1732697106" |
|
|
model_path = "runs/Hopper-v5__ddpg__1__1732697106/ddpg.cleanrl_model" |
|
|
episodic_returns = evaluate( |
|
|
model_path, |
|
|
make_env, |
|
|
"Hopper-v5", |
|
|
eval_episodes=10, |
|
|
run_name=f"{run_name}-eval", |
|
|
Model=(Actor, QNetwork), |
|
|
device="cpu", |
|
|
capture_video=True, |
|
|
) |
|
|
|
|
|
from rl_utils.huggingface import push_to_hub |
|
|
|
|
|
args = tyro.cli(Args) |
|
|
|
|
|
repo_name = f"{args.env_id}-{args.exp_name}-seed{args.seed}" |
|
|
repo_id = f"{args.hf_entity}/{repo_name}" if args.hf_entity else repo_name |
|
|
push_to_hub(args, episodic_returns, repo_id, "DDPG", f"runs/{run_name}", f"videos/{run_name}-eval") |
|
|
|