Traffic-Control / training /evaluator.py
Dhaerya's picture
Add files
b00d5d5
"""
Evaluator — assesses a trained agent over multiple episodes without exploration.
Produces summary statistics:
mean/std reward, mean waiting time, mean queue length, mean throughput.
"""
from __future__ import annotations
import numpy as np
try:
from tqdm import tqdm as _tqdm
_TQDM = True
except ImportError:
_TQDM = False
from utils.logger import setup_logger
class Evaluator:
"""
Runs a trained agent in greedy (no exploration) mode.
Args:
env: Environment instance (reset between episodes).
agent: Trained agent (select_action called with training=False).
config: Project config module.
"""
def __init__(self, env, agent, config):
self.env = env
self.agent = agent
self.config = config
self.logger = setup_logger("evaluator")
def evaluate(self, num_episodes: int, render: bool = False) -> dict:
"""
Evaluate the agent for *num_episodes* episodes.
Args:
num_episodes: Number of evaluation episodes.
render: Call env.render() at each step if True.
Returns:
Dictionary with mean/std reward and mean performance metrics.
"""
self.logger.info(f"Evaluating for {num_episodes} episodes …")
rewards, waits, queues, thrus = [], [], [], []
iterator = (
_tqdm(range(num_episodes), desc="Eval", unit="ep")
if _TQDM
else range(num_episodes)
)
for _ in iterator:
ep_reward, info = self._run_episode(render=render)
rewards.append(ep_reward)
waits.append(info.get("average_waiting_time", 0.0))
queues.append(info.get("total_queue_length", 0.0))
thrus.append(info.get("vehicles_passed", 0))
results = {
"mean_reward": float(np.mean(rewards)),
"std_reward": float(np.std(rewards)),
"best_reward": float(np.max(rewards)),
"mean_waiting_time": float(np.mean(waits)),
"mean_queue_length": float(np.mean(queues)),
"mean_throughput": float(np.mean(thrus)),
}
self.logger.info(f" mean reward : {results['mean_reward']:.2f}")
self.logger.info(f" best reward : {results['best_reward']:.2f}")
self.logger.info(f" mean wait : {results['mean_waiting_time']:.2f}")
return results
# ------------------------------------------------------------------
# Internal
# ------------------------------------------------------------------
def _run_episode(self, render: bool = False) -> tuple[float, dict]:
state, _ = self.env.reset()
ep_reward = 0.0
done = False
info: dict = {}
while not done:
action = self.agent.select_action(state, training=False)
next_state, reward, terminated, truncated, info = self.env.step(action)
done = terminated or truncated
if render:
self.env.render()
state = next_state
ep_reward += reward
return ep_reward, info