from __future__ import annotations import datetime as dt import json import shutil from pathlib import Path import gymnasium as gym import imageio import numpy as np from huggingface_hub import HfApi from huggingface_hub.errors import HfHubHTTPError from huggingface_hub.repocard import metadata_eval_result, metadata_save from stable_baselines3 import PPO from stable_baselines3.common.env_util import make_vec_env from stable_baselines3.common.evaluation import evaluate_policy from stable_baselines3.common.monitor import Monitor USERNAME = "Sami94" STUDENT_NAME = "Sami Chellia" ENV_ID = "LunarLander-v2" MODEL_NAME = "ppo-LunarLander-v2" REPO_ID = f"{USERNAME}/{MODEL_NAME}" OUTPUT_DIR = Path("artifacts/unit1") / MODEL_NAME def evaluate(model: PPO, n_eval_episodes: int = 10) -> tuple[float, float]: eval_env = Monitor(gym.make(ENV_ID, render_mode="rgb_array")) mean_reward, std_reward = evaluate_policy( model, eval_env, n_eval_episodes=n_eval_episodes, deterministic=True, ) eval_env.close() return float(mean_reward), float(std_reward) def record_video(model: PPO, out_path: Path, max_steps: int = 1000) -> None: env = gym.make(ENV_ID, render_mode="rgb_array") obs, _ = env.reset(seed=42) frames = [env.render()] for _ in range(max_steps): action, _ = model.predict(obs, deterministic=True) obs, _, terminated, truncated, _ = env.step(action) frames.append(env.render()) if terminated or truncated: break env.close() imageio.mimsave(out_path, [np.asarray(frame) for frame in frames], fps=30) def save_artifacts(model: PPO, mean_reward: float, std_reward: float, timesteps: int) -> None: if OUTPUT_DIR.exists(): shutil.rmtree(OUTPUT_DIR) OUTPUT_DIR.mkdir(parents=True, exist_ok=True) model.save(OUTPUT_DIR / MODEL_NAME) record_video(model, OUTPUT_DIR / "replay.mp4") results = { "env_id": ENV_ID, "mean_reward": mean_reward, "std_reward": std_reward, "n_eval_episodes": 10, "total_timesteps": timesteps, "eval_datetime": dt.datetime.now().isoformat(), "student": STUDENT_NAME, "hf_username": USERNAME, } (OUTPUT_DIR / "results.json").write_text(json.dumps(results, indent=2), encoding="utf-8") metadata = { "tags": [ ENV_ID, "ppo", "stable-baselines3", "reinforcement-learning", "huggingface-deep-rl-course", ] } eval_metadata = metadata_eval_result( model_pretty_name=MODEL_NAME, task_pretty_name="reinforcement-learning", task_id="reinforcement-learning", metrics_pretty_name="mean_reward", metrics_id="mean_reward", metrics_value=f"{mean_reward:.2f} +/- {std_reward:.2f}", dataset_pretty_name=ENV_ID, dataset_id=ENV_ID, ) metadata = {**metadata, **eval_metadata} readme_path = OUTPUT_DIR / "README.md" readme_path.write_text( f"""# PPO Agent for {ENV_ID} Student: {STUDENT_NAME} Hugging Face username: {USERNAME} This repository contains a PPO agent trained for the Hugging Face Deep RL course. Mean reward: {mean_reward:.2f} +/- {std_reward:.2f} Total timesteps: {timesteps} ```python from huggingface_hub import hf_hub_download from stable_baselines3 import PPO model_path = hf_hub_download(repo_id="{REPO_ID}", filename="{MODEL_NAME}.zip") model = PPO.load(model_path) ``` """, encoding="utf-8", ) metadata_save(readme_path, metadata) def push_artifacts() -> str: api = HfApi() try: repo_url = api.create_repo(repo_id=REPO_ID, repo_type="model", exist_ok=True) api.upload_folder(repo_id=REPO_ID, repo_type="model", folder_path=OUTPUT_DIR, path_in_repo=".") return str(repo_url) except HfHubHTTPError as exc: print(f"Hub push failed for {REPO_ID}: {exc}") print(f"Local artifacts saved in {OUTPUT_DIR.resolve()}") return f"LOCAL_ONLY:{OUTPUT_DIR.resolve()}" def main() -> None: env = make_vec_env(ENV_ID, n_envs=16) model = PPO( policy="MlpPolicy", env=env, n_steps=1024, batch_size=64, n_epochs=4, gamma=0.999, gae_lambda=0.98, ent_coef=0.01, verbose=1, ) total_timesteps = 0 best: tuple[float, float] | None = None for chunk in [200_000, 200_000, 200_000, 200_000, 200_000]: model.learn(total_timesteps=chunk, reset_num_timesteps=False) total_timesteps += chunk mean_reward, std_reward = evaluate(model) best = (mean_reward, std_reward) print(f"Evaluation after {total_timesteps} timesteps: {mean_reward:.2f} +/- {std_reward:.2f}") save_artifacts(model, mean_reward, std_reward, total_timesteps) if mean_reward >= 200: print("Certification threshold reached for Unit 1.") break env.close() if best is None: raise RuntimeError("Training finished without evaluation.") url = push_artifacts() (OUTPUT_DIR / "pushed_repo.json").write_text( json.dumps({"repo_id": REPO_ID, "url": url}, indent=2), encoding="utf-8", ) print(json.dumps({"repo_id": REPO_ID, "url": url, "mean_reward": best[0], "std_reward": best[1]}, indent=2)) if __name__ == "__main__": main()