Vishand03
/

lunarlander-ppo

Reinforcement Learning

stable-baselines3

deep-reinforcement-learning

Eval Results (legacy)

Model card Files Files and versions

Vishand03 commited on Aug 25, 2025

Commit

56dced8

·

verified ·

1 Parent(s): eff10f3

Update README.md

Files changed (1) hide show

README.md +12 -5

README.md CHANGED Viewed

@@ -41,7 +41,7 @@ This is a trained **PPO agent** for the **LunarLander-v2** environment using Sta
 - Optimizer: Adam
 ## 🎥 Demo
-![LunarLander](lunarlander_loop.gif)
 ## 🛠 Usage
@@ -52,19 +52,24 @@ from stable_baselines3.common.monitor import Monitor
 from stable_baselines3.common.evaluation import evaluate_policy
 from huggingface_hub import hf_hub_download
 # Environment for human rendering
 env = gym.make("LunarLander-v2", render_mode="human")
 # Environment for evaluation (no render)
 eval_env = Monitor(gym.make("LunarLander-v2"))
-# Download the model from Hugging Face Hub
 model_path = hf_hub_download("Vishand03/lunarlander-ppo", "model.zip")
-# Load the model
 model = PPO.load(model_path)
 # Run a single episode
 obs, _ = env.reset()
 done = False
 while not done:
@@ -72,6 +77,8 @@ while not done:
     obs, reward, terminated, truncated, _ = env.step(action)
     done = terminated or truncated
-# Evaluate the policy
 mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)
 print(f"Mean Reward: {mean_reward:.2f} +/- {std_reward:.2f}")

 - Optimizer: Adam
 ## 🎥 Demo
+![LunarLander](lunarlander.gif)
 ## 🛠 Usage
 from stable_baselines3.common.evaluation import evaluate_policy
 from huggingface_hub import hf_hub_download
+# -------------------------
+# Environment Setup
+# -------------------------
 # Environment for human rendering
 env = gym.make("LunarLander-v2", render_mode="human")
 # Environment for evaluation (no render)
 eval_env = Monitor(gym.make("LunarLander-v2"))
+# -------------------------
+# Load pretrained model from Hugging Face Hub
+# -------------------------
 model_path = hf_hub_download("Vishand03/lunarlander-ppo", "model.zip")
 model = PPO.load(model_path)
+# -------------------------
 # Run a single episode
+# -------------------------
 obs, _ = env.reset()
 done = False
 while not done:
     obs, reward, terminated, truncated, _ = env.step(action)
     done = terminated or truncated
+# -------------------------
+# Evaluate policy
+# -------------------------
 mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)
 print(f"Mean Reward: {mean_reward:.2f} +/- {std_reward:.2f}")