privateboss's picture
Upload 8 files
a063d15 verified
Raw
History Blame Contribute Delete
2.08 kB
# trained_agent.py
import gymnasium as gym
import os
import numpy as np
import config
from agent import PPOAgent
def evaluate_agent(weights_path="ppo_mountain_car_weights.weights.h5", num_episodes=15):
"""
Loads saved policy weights and spins up a human-rendered environment
to visually evaluate the performance of the trained agent.
"""
if not os.path.exists(weights_path):
print(f"❌ Error: Weights file '{weights_path}' not found!")
print(" Please run 'python main.py' first to train the agent and generate weights.")
return
print("\n🎬 Initializing evaluation sandbox with visual rendering...")
env = gym.make(config.ENV_NAME, render_mode="human")
action_bounds = env.action_space.high[0]
# Reconstruct the PPO Agent architecture
agent = PPOAgent(action_bounds)
dummy_state = env.observation_space.sample()
_, _, _ = agent.get_vector_actions(np.array([dummy_state]))
print(f"📥 Loading trained policy weights from: {weights_path}")
agent.ac.load_weights(weights_path)
print("🎯 Weights successfully bound to the Actor-Critic network.")
print(f"\n🚀 Running {num_episodes} evaluation episodes...")
for ep in range(1, num_episodes + 1):
state, _ = env.reset()
done = False
total_raw_reward = 0
step_count = 0
while not done:
batched_state = np.array([state], dtype=np.float32)
actions, _, _ = agent.get_vector_actions(batched_state)
action = actions[0]
state, reward, terminated, truncated, _ = env.step(action)
done = terminated or truncated
total_raw_reward += reward
step_count += 1
print(f"🔹 Episode {ep:2d}/{num_episodes} completed | Steps: {step_count:3d} | Total Env Score: {total_raw_reward:7.2f}")
print("\n🧹 Evaluation complete. Closing display window safely.")
env.close()
if __name__ == "__main__":
evaluate_agent()