# trained_agent.py import gymnasium as gym import os import numpy as np import config from agent import PPOAgent def evaluate_agent(weights_path="ppo_mountain_car_weights.weights.h5", num_episodes=15): """ Loads saved policy weights and spins up a human-rendered environment to visually evaluate the performance of the trained agent. """ if not os.path.exists(weights_path): print(f"โŒ Error: Weights file '{weights_path}' not found!") print(" Please run 'python main.py' first to train the agent and generate weights.") return print("\n๐ŸŽฌ Initializing evaluation sandbox with visual rendering...") env = gym.make(config.ENV_NAME, render_mode="human") action_bounds = env.action_space.high[0] # Reconstruct the PPO Agent architecture agent = PPOAgent(action_bounds) dummy_state = env.observation_space.sample() _, _, _ = agent.get_vector_actions(np.array([dummy_state])) print(f"๐Ÿ“ฅ Loading trained policy weights from: {weights_path}") agent.ac.load_weights(weights_path) print("๐ŸŽฏ Weights successfully bound to the Actor-Critic network.") print(f"\n๐Ÿš€ Running {num_episodes} evaluation episodes...") for ep in range(1, num_episodes + 1): state, _ = env.reset() done = False total_raw_reward = 0 step_count = 0 while not done: batched_state = np.array([state], dtype=np.float32) actions, _, _ = agent.get_vector_actions(batched_state) action = actions[0] state, reward, terminated, truncated, _ = env.step(action) done = terminated or truncated total_raw_reward += reward step_count += 1 print(f"๐Ÿ”น Episode {ep:2d}/{num_episodes} completed | Steps: {step_count:3d} | Total Env Score: {total_raw_reward:7.2f}") print("\n๐Ÿงน Evaluation complete. Closing display window safely.") env.close() if __name__ == "__main__": evaluate_agent()