Update README.md
Browse files
README.md
CHANGED
|
@@ -41,7 +41,7 @@ This is a trained **PPO agent** for the **LunarLander-v2** environment using Sta
|
|
| 41 |
- Optimizer: Adam
|
| 42 |
|
| 43 |
## 🎥 Demo
|
| 44 |
-

|
| 57 |
|
| 58 |
# Environment for evaluation (no render)
|
| 59 |
eval_env = Monitor(gym.make("LunarLander-v2"))
|
| 60 |
|
| 61 |
-
#
|
|
|
|
|
|
|
| 62 |
model_path = hf_hub_download("Vishand03/lunarlander-ppo", "model.zip")
|
| 63 |
-
|
| 64 |
-
# Load the model
|
| 65 |
model = PPO.load(model_path)
|
| 66 |
|
|
|
|
| 67 |
# Run a single episode
|
|
|
|
| 68 |
obs, _ = env.reset()
|
| 69 |
done = False
|
| 70 |
while not done:
|
|
@@ -72,6 +77,8 @@ while not done:
|
|
| 72 |
obs, reward, terminated, truncated, _ = env.step(action)
|
| 73 |
done = terminated or truncated
|
| 74 |
|
| 75 |
-
#
|
|
|
|
|
|
|
| 76 |
mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)
|
| 77 |
print(f"Mean Reward: {mean_reward:.2f} +/- {std_reward:.2f}")
|
|
|
|
| 41 |
- Optimizer: Adam
|
| 42 |
|
| 43 |
## 🎥 Demo
|
| 44 |
+

|
| 45 |
|
| 46 |
## 🛠 Usage
|
| 47 |
|
|
|
|
| 52 |
from stable_baselines3.common.evaluation import evaluate_policy
|
| 53 |
from huggingface_hub import hf_hub_download
|
| 54 |
|
| 55 |
+
# -------------------------
|
| 56 |
+
# Environment Setup
|
| 57 |
+
# -------------------------
|
| 58 |
# Environment for human rendering
|
| 59 |
env = gym.make("LunarLander-v2", render_mode="human")
|
| 60 |
|
| 61 |
# Environment for evaluation (no render)
|
| 62 |
eval_env = Monitor(gym.make("LunarLander-v2"))
|
| 63 |
|
| 64 |
+
# -------------------------
|
| 65 |
+
# Load pretrained model from Hugging Face Hub
|
| 66 |
+
# -------------------------
|
| 67 |
model_path = hf_hub_download("Vishand03/lunarlander-ppo", "model.zip")
|
|
|
|
|
|
|
| 68 |
model = PPO.load(model_path)
|
| 69 |
|
| 70 |
+
# -------------------------
|
| 71 |
# Run a single episode
|
| 72 |
+
# -------------------------
|
| 73 |
obs, _ = env.reset()
|
| 74 |
done = False
|
| 75 |
while not done:
|
|
|
|
| 77 |
obs, reward, terminated, truncated, _ = env.step(action)
|
| 78 |
done = terminated or truncated
|
| 79 |
|
| 80 |
+
# -------------------------
|
| 81 |
+
# Evaluate policy
|
| 82 |
+
# -------------------------
|
| 83 |
mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)
|
| 84 |
print(f"Mean Reward: {mean_reward:.2f} +/- {std_reward:.2f}")
|