jacksonhack's picture
pushing model
1d8092a verified
from typing import Callable
import gymnasium as gym
import torch
import torch.nn as nn
from dataclasses import dataclass
import os
import tyro
@dataclass
class Args:
exp_name: str = os.path.basename(__file__)[: -len(".py")]
"""the name of this experiment"""
seed: int = 1
"""seed of the experiment"""
torch_deterministic: bool = True
"""if toggled, `torch.backends.cudnn.deterministic=False`"""
cuda: bool = True
"""if toggled, cuda will be enabled by default"""
track: bool = False
"""if toggled, this experiment will be tracked with Weights and Biases"""
wandb_project_name: str = "cleanRL"
"""the wandb's project name"""
wandb_entity: str = None
"""the entity (team) of wandb's project"""
capture_video: bool = False
"""whether to capture videos of the agent performances (check out `videos` folder)"""
save_model: bool = False
"""whether to save model into the `runs/{run_name}` folder"""
upload_model: bool = False
"""whether to upload the saved model to huggingface"""
hf_entity: str = "jacksonhack"
"""the user or org name of the model repository from the Hugging Face Hub"""
# Algorithm specific arguments
env_id: str = "Hopper-v5"
"""the environment id of the Atari game"""
total_timesteps: int = 1000000
"""total timesteps of the experiments"""
learning_rate: float = 3e-4
"""the learning rate of the optimizer"""
buffer_size: int = int(1e6)
"""the replay memory buffer size"""
gamma: float = 0.99
"""the discount factor gamma"""
tau: float = 0.005
"""target smoothing coefficient (default: 0.005)"""
batch_size: int = 256
"""the batch size of sample from the reply memory"""
exploration_noise: float = 0.1
"""the scale of exploration noise"""
learning_starts: int = 25e3
"""timestep to start learning"""
policy_frequency: int = 2
"""the frequency of training policy (delayed)"""
noise_clip: float = 0.5
"""noise clip parameter of the Target Policy Smoothing Regularization"""
def evaluate(
model_path: str,
make_env: Callable,
env_id: str,
eval_episodes: int,
run_name: str,
Model: nn.Module,
device: torch.device = torch.device("cpu"),
capture_video: bool = True,
exploration_noise: float = 0.1,
):
envs = gym.vector.SyncVectorEnv([make_env(env_id, 0, 0, capture_video, run_name)])
actor = Model[0](envs).to(device)
qf = Model[1](envs).to(device)
actor_params, qf_params = torch.load(model_path, map_location=device)
actor.load_state_dict(actor_params)
actor.eval()
qf.load_state_dict(qf_params)
qf.eval()
# note: qf is not used in this script
obs, _ = envs.reset()
episodic_returns = []
while len(episodic_returns) < eval_episodes:
with torch.no_grad():
actions = actor(torch.Tensor(obs).to(device))
actions += torch.normal(0, actor.action_scale * exploration_noise)
actions = actions.cpu().numpy().clip(envs.single_action_space.low, envs.single_action_space.high)
next_obs, _, _, _, infos = envs.step(actions)
if "episode" in infos:
print(f"eval_episode={len(episodic_returns)}, episodic_return={infos['episode']['r'][infos['_episode']]}")
episodic_returns += [infos["episode"]["r"]]
# print(f"global_step={global_step}, episode_return={infos['episode']['r'][infos['_episode']][0]}")
# writer.add_scalar("charts/episodic_return", infos["episode"]["r"][infos["_episode"]][0], global_step)
# writer.add_scalar("charts/episodic_length", infos["episode"]["l"][infos["_episode"]][0], global_step)
# if "final_info" in infos:
# for info in infos["final_info"]:
# if "episode" not in info:
# continue
# print(f"eval_episode={len(episodic_returns)}, episodic_return={info['episode']['r']}")
# episodic_returns += [info["episode"]["r"]]
obs = next_obs
print(f"episodic_returns: {episodic_returns}")
return episodic_returns
if __name__ == "__main__":
# from huggingface_hub import hf_hub_download
from rl.ddpg import Actor, QNetwork, make_env
# model_path = hf_hub_download(
# repo_id="cleanrl/HalfCheetah-v4-ddpg_continuous_action-seed1", filename="ddpg_continuous_action.cleanrl_model"
# )
run_name = "Hopper-v5__ddpg__1__1732697106"
model_path = "runs/Hopper-v5__ddpg__1__1732697106/ddpg.cleanrl_model"
episodic_returns = evaluate(
model_path,
make_env,
"Hopper-v5",
eval_episodes=10,
run_name=f"{run_name}-eval",
Model=(Actor, QNetwork),
device="cpu",
capture_video=True,
)
from rl_utils.huggingface import push_to_hub
args = tyro.cli(Args)
repo_name = f"{args.env_id}-{args.exp_name}-seed{args.seed}"
repo_id = f"{args.hf_entity}/{repo_name}" if args.hf_entity else repo_name
push_to_hub(args, episodic_returns, repo_id, "DDPG", f"runs/{run_name}", f"videos/{run_name}-eval")