File size: 2,079 Bytes
a063d15
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# trained_agent.py
import gymnasium as gym
import os
import numpy as np
import config
from agent import PPOAgent

def evaluate_agent(weights_path="ppo_mountain_car_weights.weights.h5", num_episodes=15):
    """
    Loads saved policy weights and spins up a human-rendered environment 
    to visually evaluate the performance of the trained agent.
    
    """
    if not os.path.exists(weights_path):
        print(f"❌ Error: Weights file '{weights_path}' not found!")
        print("   Please run 'python main.py' first to train the agent and generate weights.")
        return

    print("\n🎬 Initializing evaluation sandbox with visual rendering...")

    env = gym.make(config.ENV_NAME, render_mode="human")
    action_bounds = env.action_space.high[0]
    
    # Reconstruct the PPO Agent architecture
    agent = PPOAgent(action_bounds)
    
    dummy_state = env.observation_space.sample()
    _, _, _ = agent.get_vector_actions(np.array([dummy_state]))
    
    print(f"📥 Loading trained policy weights from: {weights_path}")
    agent.ac.load_weights(weights_path)
    print("🎯 Weights successfully bound to the Actor-Critic network.")

    print(f"\n🚀 Running {num_episodes} evaluation episodes...")

    for ep in range(1, num_episodes + 1):
        state, _ = env.reset()
        done = False
        total_raw_reward = 0
        step_count = 0
        
        while not done:
            batched_state = np.array([state], dtype=np.float32)
            
            actions, _, _ = agent.get_vector_actions(batched_state)
            action = actions[0]
            
            state, reward, terminated, truncated, _ = env.step(action)
            done = terminated or truncated
            
            total_raw_reward += reward
            step_count += 1
            
        print(f"🔹 Episode {ep:2d}/{num_episodes} completed | Steps: {step_count:3d} | Total Env Score: {total_raw_reward:7.2f}")
        
    print("\n🧹 Evaluation complete. Closing display window safely.")
    env.close()

if __name__ == "__main__":
    evaluate_agent()