Sami94's picture
Upload folder using huggingface_hub
064e771 verified
from __future__ import annotations
import datetime as dt
import json
import shutil
from pathlib import Path
import gymnasium as gym
import imageio
import numpy as np
from huggingface_hub import HfApi
from huggingface_hub.errors import HfHubHTTPError
from huggingface_hub.repocard import metadata_eval_result, metadata_save
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.monitor import Monitor
USERNAME = "Sami94"
STUDENT_NAME = "Sami Chellia"
ENV_ID = "LunarLander-v2"
MODEL_NAME = "ppo-LunarLander-v2"
REPO_ID = f"{USERNAME}/{MODEL_NAME}"
OUTPUT_DIR = Path("artifacts/unit1") / MODEL_NAME
def evaluate(model: PPO, n_eval_episodes: int = 10) -> tuple[float, float]:
eval_env = Monitor(gym.make(ENV_ID, render_mode="rgb_array"))
mean_reward, std_reward = evaluate_policy(
model,
eval_env,
n_eval_episodes=n_eval_episodes,
deterministic=True,
)
eval_env.close()
return float(mean_reward), float(std_reward)
def record_video(model: PPO, out_path: Path, max_steps: int = 1000) -> None:
env = gym.make(ENV_ID, render_mode="rgb_array")
obs, _ = env.reset(seed=42)
frames = [env.render()]
for _ in range(max_steps):
action, _ = model.predict(obs, deterministic=True)
obs, _, terminated, truncated, _ = env.step(action)
frames.append(env.render())
if terminated or truncated:
break
env.close()
imageio.mimsave(out_path, [np.asarray(frame) for frame in frames], fps=30)
def save_artifacts(model: PPO, mean_reward: float, std_reward: float, timesteps: int) -> None:
if OUTPUT_DIR.exists():
shutil.rmtree(OUTPUT_DIR)
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
model.save(OUTPUT_DIR / MODEL_NAME)
record_video(model, OUTPUT_DIR / "replay.mp4")
results = {
"env_id": ENV_ID,
"mean_reward": mean_reward,
"std_reward": std_reward,
"n_eval_episodes": 10,
"total_timesteps": timesteps,
"eval_datetime": dt.datetime.now().isoformat(),
"student": STUDENT_NAME,
"hf_username": USERNAME,
}
(OUTPUT_DIR / "results.json").write_text(json.dumps(results, indent=2), encoding="utf-8")
metadata = {
"tags": [
ENV_ID,
"ppo",
"stable-baselines3",
"reinforcement-learning",
"huggingface-deep-rl-course",
]
}
eval_metadata = metadata_eval_result(
model_pretty_name=MODEL_NAME,
task_pretty_name="reinforcement-learning",
task_id="reinforcement-learning",
metrics_pretty_name="mean_reward",
metrics_id="mean_reward",
metrics_value=f"{mean_reward:.2f} +/- {std_reward:.2f}",
dataset_pretty_name=ENV_ID,
dataset_id=ENV_ID,
)
metadata = {**metadata, **eval_metadata}
readme_path = OUTPUT_DIR / "README.md"
readme_path.write_text(
f"""# PPO Agent for {ENV_ID}
Student: {STUDENT_NAME}
Hugging Face username: {USERNAME}
This repository contains a PPO agent trained for the Hugging Face Deep RL course.
Mean reward: {mean_reward:.2f} +/- {std_reward:.2f}
Total timesteps: {timesteps}
```python
from huggingface_hub import hf_hub_download
from stable_baselines3 import PPO
model_path = hf_hub_download(repo_id="{REPO_ID}", filename="{MODEL_NAME}.zip")
model = PPO.load(model_path)
```
""",
encoding="utf-8",
)
metadata_save(readme_path, metadata)
def push_artifacts() -> str:
api = HfApi()
try:
repo_url = api.create_repo(repo_id=REPO_ID, repo_type="model", exist_ok=True)
api.upload_folder(repo_id=REPO_ID, repo_type="model", folder_path=OUTPUT_DIR, path_in_repo=".")
return str(repo_url)
except HfHubHTTPError as exc:
print(f"Hub push failed for {REPO_ID}: {exc}")
print(f"Local artifacts saved in {OUTPUT_DIR.resolve()}")
return f"LOCAL_ONLY:{OUTPUT_DIR.resolve()}"
def main() -> None:
env = make_vec_env(ENV_ID, n_envs=16)
model = PPO(
policy="MlpPolicy",
env=env,
n_steps=1024,
batch_size=64,
n_epochs=4,
gamma=0.999,
gae_lambda=0.98,
ent_coef=0.01,
verbose=1,
)
total_timesteps = 0
best: tuple[float, float] | None = None
for chunk in [200_000, 200_000, 200_000, 200_000, 200_000]:
model.learn(total_timesteps=chunk, reset_num_timesteps=False)
total_timesteps += chunk
mean_reward, std_reward = evaluate(model)
best = (mean_reward, std_reward)
print(f"Evaluation after {total_timesteps} timesteps: {mean_reward:.2f} +/- {std_reward:.2f}")
save_artifacts(model, mean_reward, std_reward, total_timesteps)
if mean_reward >= 200:
print("Certification threshold reached for Unit 1.")
break
env.close()
if best is None:
raise RuntimeError("Training finished without evaluation.")
url = push_artifacts()
(OUTPUT_DIR / "pushed_repo.json").write_text(
json.dumps({"repo_id": REPO_ID, "url": url}, indent=2),
encoding="utf-8",
)
print(json.dumps({"repo_id": REPO_ID, "url": url, "mean_reward": best[0], "std_reward": best[1]}, indent=2))
if __name__ == "__main__":
main()