""" Evaluator — assesses a trained agent over multiple episodes without exploration. Produces summary statistics: mean/std reward, mean waiting time, mean queue length, mean throughput. """ from __future__ import annotations import numpy as np try: from tqdm import tqdm as _tqdm _TQDM = True except ImportError: _TQDM = False from utils.logger import setup_logger class Evaluator: """ Runs a trained agent in greedy (no exploration) mode. Args: env: Environment instance (reset between episodes). agent: Trained agent (select_action called with training=False). config: Project config module. """ def __init__(self, env, agent, config): self.env = env self.agent = agent self.config = config self.logger = setup_logger("evaluator") def evaluate(self, num_episodes: int, render: bool = False) -> dict: """ Evaluate the agent for *num_episodes* episodes. Args: num_episodes: Number of evaluation episodes. render: Call env.render() at each step if True. Returns: Dictionary with mean/std reward and mean performance metrics. """ self.logger.info(f"Evaluating for {num_episodes} episodes …") rewards, waits, queues, thrus = [], [], [], [] iterator = ( _tqdm(range(num_episodes), desc="Eval", unit="ep") if _TQDM else range(num_episodes) ) for _ in iterator: ep_reward, info = self._run_episode(render=render) rewards.append(ep_reward) waits.append(info.get("average_waiting_time", 0.0)) queues.append(info.get("total_queue_length", 0.0)) thrus.append(info.get("vehicles_passed", 0)) results = { "mean_reward": float(np.mean(rewards)), "std_reward": float(np.std(rewards)), "best_reward": float(np.max(rewards)), "mean_waiting_time": float(np.mean(waits)), "mean_queue_length": float(np.mean(queues)), "mean_throughput": float(np.mean(thrus)), } self.logger.info(f" mean reward : {results['mean_reward']:.2f}") self.logger.info(f" best reward : {results['best_reward']:.2f}") self.logger.info(f" mean wait : {results['mean_waiting_time']:.2f}") return results # ------------------------------------------------------------------ # Internal # ------------------------------------------------------------------ def _run_episode(self, render: bool = False) -> tuple[float, dict]: state, _ = self.env.reset() ep_reward = 0.0 done = False info: dict = {} while not done: action = self.agent.select_action(state, training=False) next_state, reward, terminated, truncated, info = self.env.step(action) done = terminated or truncated if render: self.env.render() state = next_state ep_reward += reward return ep_reward, info