from typing import Callable import gymnasium as gym import torch import torch.nn as nn from dataclasses import dataclass import os import tyro @dataclass class Args: exp_name: str = os.path.basename(__file__)[: -len(".py")] """the name of this experiment""" seed: int = 1 """seed of the experiment""" torch_deterministic: bool = True """if toggled, `torch.backends.cudnn.deterministic=False`""" cuda: bool = True """if toggled, cuda will be enabled by default""" track: bool = False """if toggled, this experiment will be tracked with Weights and Biases""" wandb_project_name: str = "cleanRL" """the wandb's project name""" wandb_entity: str = None """the entity (team) of wandb's project""" capture_video: bool = False """whether to capture videos of the agent performances (check out `videos` folder)""" save_model: bool = False """whether to save model into the `runs/{run_name}` folder""" upload_model: bool = False """whether to upload the saved model to huggingface""" hf_entity: str = "jacksonhack" """the user or org name of the model repository from the Hugging Face Hub""" # Algorithm specific arguments env_id: str = "Hopper-v5" """the environment id of the Atari game""" total_timesteps: int = 1000000 """total timesteps of the experiments""" learning_rate: float = 3e-4 """the learning rate of the optimizer""" buffer_size: int = int(1e6) """the replay memory buffer size""" gamma: float = 0.99 """the discount factor gamma""" tau: float = 0.005 """target smoothing coefficient (default: 0.005)""" batch_size: int = 256 """the batch size of sample from the reply memory""" exploration_noise: float = 0.1 """the scale of exploration noise""" learning_starts: int = 25e3 """timestep to start learning""" policy_frequency: int = 2 """the frequency of training policy (delayed)""" noise_clip: float = 0.5 """noise clip parameter of the Target Policy Smoothing Regularization""" def evaluate( model_path: str, make_env: Callable, env_id: str, eval_episodes: int, run_name: str, Model: nn.Module, device: torch.device = torch.device("cpu"), capture_video: bool = True, exploration_noise: float = 0.1, ): envs = gym.vector.SyncVectorEnv([make_env(env_id, 0, 0, capture_video, run_name)]) actor = Model[0](envs).to(device) qf = Model[1](envs).to(device) actor_params, qf_params = torch.load(model_path, map_location=device) actor.load_state_dict(actor_params) actor.eval() qf.load_state_dict(qf_params) qf.eval() # note: qf is not used in this script obs, _ = envs.reset() episodic_returns = [] while len(episodic_returns) < eval_episodes: with torch.no_grad(): actions = actor(torch.Tensor(obs).to(device)) actions += torch.normal(0, actor.action_scale * exploration_noise) actions = actions.cpu().numpy().clip(envs.single_action_space.low, envs.single_action_space.high) next_obs, _, _, _, infos = envs.step(actions) if "episode" in infos: print(f"eval_episode={len(episodic_returns)}, episodic_return={infos['episode']['r'][infos['_episode']]}") episodic_returns += [infos["episode"]["r"]] # print(f"global_step={global_step}, episode_return={infos['episode']['r'][infos['_episode']][0]}") # writer.add_scalar("charts/episodic_return", infos["episode"]["r"][infos["_episode"]][0], global_step) # writer.add_scalar("charts/episodic_length", infos["episode"]["l"][infos["_episode"]][0], global_step) # if "final_info" in infos: # for info in infos["final_info"]: # if "episode" not in info: # continue # print(f"eval_episode={len(episodic_returns)}, episodic_return={info['episode']['r']}") # episodic_returns += [info["episode"]["r"]] obs = next_obs print(f"episodic_returns: {episodic_returns}") return episodic_returns if __name__ == "__main__": # from huggingface_hub import hf_hub_download from rl.ddpg import Actor, QNetwork, make_env # model_path = hf_hub_download( # repo_id="cleanrl/HalfCheetah-v4-ddpg_continuous_action-seed1", filename="ddpg_continuous_action.cleanrl_model" # ) run_name = "Hopper-v5__ddpg__1__1732697106" model_path = "runs/Hopper-v5__ddpg__1__1732697106/ddpg.cleanrl_model" episodic_returns = evaluate( model_path, make_env, "Hopper-v5", eval_episodes=10, run_name=f"{run_name}-eval", Model=(Actor, QNetwork), device="cpu", capture_video=True, ) from rl_utils.huggingface import push_to_hub args = tyro.cli(Args) repo_name = f"{args.env_id}-{args.exp_name}-seed{args.seed}" repo_id = f"{args.hf_entity}/{repo_name}" if args.hf_entity else repo_name push_to_hub(args, episodic_returns, repo_id, "DDPG", f"runs/{run_name}", f"videos/{run_name}-eval")