| |
| import gymnasium as gym |
| import os |
| import numpy as np |
| import config |
| from agent import PPOAgent |
|
|
| def evaluate_agent(weights_path="ppo_mountain_car_weights.weights.h5", num_episodes=15): |
| """ |
| Loads saved policy weights and spins up a human-rendered environment |
| to visually evaluate the performance of the trained agent. |
| |
| """ |
| if not os.path.exists(weights_path): |
| print(f"❌ Error: Weights file '{weights_path}' not found!") |
| print(" Please run 'python main.py' first to train the agent and generate weights.") |
| return |
|
|
| print("\n🎬 Initializing evaluation sandbox with visual rendering...") |
|
|
| env = gym.make(config.ENV_NAME, render_mode="human") |
| action_bounds = env.action_space.high[0] |
| |
| |
| agent = PPOAgent(action_bounds) |
| |
| dummy_state = env.observation_space.sample() |
| _, _, _ = agent.get_vector_actions(np.array([dummy_state])) |
| |
| print(f"📥 Loading trained policy weights from: {weights_path}") |
| agent.ac.load_weights(weights_path) |
| print("🎯 Weights successfully bound to the Actor-Critic network.") |
|
|
| print(f"\n🚀 Running {num_episodes} evaluation episodes...") |
|
|
| for ep in range(1, num_episodes + 1): |
| state, _ = env.reset() |
| done = False |
| total_raw_reward = 0 |
| step_count = 0 |
| |
| while not done: |
| batched_state = np.array([state], dtype=np.float32) |
| |
| actions, _, _ = agent.get_vector_actions(batched_state) |
| action = actions[0] |
| |
| state, reward, terminated, truncated, _ = env.step(action) |
| done = terminated or truncated |
| |
| total_raw_reward += reward |
| step_count += 1 |
| |
| print(f"🔹 Episode {ep:2d}/{num_episodes} completed | Steps: {step_count:3d} | Total Env Score: {total_raw_reward:7.2f}") |
| |
| print("\n🧹 Evaluation complete. Closing display window safely.") |
| env.close() |
|
|
| if __name__ == "__main__": |
| evaluate_agent() |