Spaces:
Sleeping
Sleeping
| """ | |
| Evaluator — assesses a trained agent over multiple episodes without exploration. | |
| Produces summary statistics: | |
| mean/std reward, mean waiting time, mean queue length, mean throughput. | |
| """ | |
| from __future__ import annotations | |
| import numpy as np | |
| try: | |
| from tqdm import tqdm as _tqdm | |
| _TQDM = True | |
| except ImportError: | |
| _TQDM = False | |
| from utils.logger import setup_logger | |
| class Evaluator: | |
| """ | |
| Runs a trained agent in greedy (no exploration) mode. | |
| Args: | |
| env: Environment instance (reset between episodes). | |
| agent: Trained agent (select_action called with training=False). | |
| config: Project config module. | |
| """ | |
| def __init__(self, env, agent, config): | |
| self.env = env | |
| self.agent = agent | |
| self.config = config | |
| self.logger = setup_logger("evaluator") | |
| def evaluate(self, num_episodes: int, render: bool = False) -> dict: | |
| """ | |
| Evaluate the agent for *num_episodes* episodes. | |
| Args: | |
| num_episodes: Number of evaluation episodes. | |
| render: Call env.render() at each step if True. | |
| Returns: | |
| Dictionary with mean/std reward and mean performance metrics. | |
| """ | |
| self.logger.info(f"Evaluating for {num_episodes} episodes …") | |
| rewards, waits, queues, thrus = [], [], [], [] | |
| iterator = ( | |
| _tqdm(range(num_episodes), desc="Eval", unit="ep") | |
| if _TQDM | |
| else range(num_episodes) | |
| ) | |
| for _ in iterator: | |
| ep_reward, info = self._run_episode(render=render) | |
| rewards.append(ep_reward) | |
| waits.append(info.get("average_waiting_time", 0.0)) | |
| queues.append(info.get("total_queue_length", 0.0)) | |
| thrus.append(info.get("vehicles_passed", 0)) | |
| results = { | |
| "mean_reward": float(np.mean(rewards)), | |
| "std_reward": float(np.std(rewards)), | |
| "best_reward": float(np.max(rewards)), | |
| "mean_waiting_time": float(np.mean(waits)), | |
| "mean_queue_length": float(np.mean(queues)), | |
| "mean_throughput": float(np.mean(thrus)), | |
| } | |
| self.logger.info(f" mean reward : {results['mean_reward']:.2f}") | |
| self.logger.info(f" best reward : {results['best_reward']:.2f}") | |
| self.logger.info(f" mean wait : {results['mean_waiting_time']:.2f}") | |
| return results | |
| # ------------------------------------------------------------------ | |
| # Internal | |
| # ------------------------------------------------------------------ | |
| def _run_episode(self, render: bool = False) -> tuple[float, dict]: | |
| state, _ = self.env.reset() | |
| ep_reward = 0.0 | |
| done = False | |
| info: dict = {} | |
| while not done: | |
| action = self.agent.select_action(state, training=False) | |
| next_state, reward, terminated, truncated, info = self.env.step(action) | |
| done = terminated or truncated | |
| if render: | |
| self.env.render() | |
| state = next_state | |
| ep_reward += reward | |
| return ep_reward, info | |