Vishand03 commited on
Commit
56dced8
·
verified ·
1 Parent(s): eff10f3

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +12 -5
README.md CHANGED
@@ -41,7 +41,7 @@ This is a trained **PPO agent** for the **LunarLander-v2** environment using Sta
41
  - Optimizer: Adam
42
 
43
  ## 🎥 Demo
44
- ![LunarLander](lunarlander_loop.gif)
45
 
46
  ## 🛠 Usage
47
 
@@ -52,19 +52,24 @@ from stable_baselines3.common.monitor import Monitor
52
  from stable_baselines3.common.evaluation import evaluate_policy
53
  from huggingface_hub import hf_hub_download
54
 
 
 
 
55
  # Environment for human rendering
56
  env = gym.make("LunarLander-v2", render_mode="human")
57
 
58
  # Environment for evaluation (no render)
59
  eval_env = Monitor(gym.make("LunarLander-v2"))
60
 
61
- # Download the model from Hugging Face Hub
 
 
62
  model_path = hf_hub_download("Vishand03/lunarlander-ppo", "model.zip")
63
-
64
- # Load the model
65
  model = PPO.load(model_path)
66
 
 
67
  # Run a single episode
 
68
  obs, _ = env.reset()
69
  done = False
70
  while not done:
@@ -72,6 +77,8 @@ while not done:
72
  obs, reward, terminated, truncated, _ = env.step(action)
73
  done = terminated or truncated
74
 
75
- # Evaluate the policy
 
 
76
  mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)
77
  print(f"Mean Reward: {mean_reward:.2f} +/- {std_reward:.2f}")
 
41
  - Optimizer: Adam
42
 
43
  ## 🎥 Demo
44
+ ![LunarLander](lunarlander.gif)
45
 
46
  ## 🛠 Usage
47
 
 
52
  from stable_baselines3.common.evaluation import evaluate_policy
53
  from huggingface_hub import hf_hub_download
54
 
55
+ # -------------------------
56
+ # Environment Setup
57
+ # -------------------------
58
  # Environment for human rendering
59
  env = gym.make("LunarLander-v2", render_mode="human")
60
 
61
  # Environment for evaluation (no render)
62
  eval_env = Monitor(gym.make("LunarLander-v2"))
63
 
64
+ # -------------------------
65
+ # Load pretrained model from Hugging Face Hub
66
+ # -------------------------
67
  model_path = hf_hub_download("Vishand03/lunarlander-ppo", "model.zip")
 
 
68
  model = PPO.load(model_path)
69
 
70
+ # -------------------------
71
  # Run a single episode
72
+ # -------------------------
73
  obs, _ = env.reset()
74
  done = False
75
  while not done:
 
77
  obs, reward, terminated, truncated, _ = env.step(action)
78
  done = terminated or truncated
79
 
80
+ # -------------------------
81
+ # Evaluate policy
82
+ # -------------------------
83
  mean_reward, std_reward = evaluate_policy(model, eval_env, n_eval_episodes=10, deterministic=True)
84
  print(f"Mean Reward: {mean_reward:.2f} +/- {std_reward:.2f}")