""" Wildfire Containment Simulator — Evaluation Script. Runs both agents (random + heuristic) on all 3 difficulty tiers, reports scores, and saves results to JSON. """ import json import sys import os import time sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "..")) from agents.random_agent import RandomAgent from agents.heuristic_agent import HeuristicAgent from graders.grader_easy import grade as grade_easy from graders.grader_medium import grade as grade_medium from graders.grader_hard import grade as grade_hard def run_evaluation(num_runs: int = 5) -> dict: graders = { "easy": grade_easy, "medium": grade_medium, "hard": grade_hard, } agents = { "random": lambda seed: RandomAgent(seed=seed), "heuristic": lambda seed: HeuristicAgent(), } results = {} print("=" * 80) print("WILDFIRE CONTAINMENT SIMULATOR — Evaluation") print("=" * 80) print() for agent_name, agent_factory in agents.items(): results[agent_name] = {} for tier_name, grader_fn in graders.items(): scores = [] detail_rows = [] times = [] for run in range(num_runs): seed = 42 + run agent = agent_factory(seed) start = time.time() score, details = grader_fn(agent, seed=seed) elapsed = time.time() - start scores.append(score) detail_rows.append(details) times.append(elapsed) mean_score = sum(scores) / len(scores) std_score = (sum((s - mean_score) ** 2 for s in scores) / len(scores)) ** 0.5 mean_containment = sum(d["containment_pct"] for d in detail_rows) / len(detail_rows) mean_pop_saved = sum(d["pop_saved_pct"] for d in detail_rows) / len(detail_rows) mean_steps = sum(d["steps"] for d in detail_rows) / len(detail_rows) casualty_rate = sum(1 for d in detail_rows if d["crew_casualty"]) / len(detail_rows) results[agent_name][tier_name] = { "scores": [round(s, 4) for s in scores], "mean": round(mean_score, 4), "std": round(std_score, 4), "mean_containment_pct": round(mean_containment, 4), "mean_pop_saved_pct": round(mean_pop_saved, 4), "mean_steps": round(mean_steps, 1), "crew_casualty_rate": round(casualty_rate, 2), "mean_time_s": round(sum(times) / len(times), 3), } print(f" {agent_name:12s} | {tier_name:8s} | " f"reward={mean_score:+.2f}+-{std_score:.2f} | " f"contain={mean_containment*100:.0f}% | " f"pop_saved={mean_pop_saved*100:.0f}% | " f"steps={mean_steps:.0f}") print() print("=" * 80) print(f"{'Agent':>12s} | {'Easy':>10s} | {'Medium':>10s} | {'Hard':>10s}") print("-" * 80) for agent_name in agents: easy = results[agent_name]["easy"]["mean"] medium = results[agent_name]["medium"]["mean"] hard = results[agent_name]["hard"]["mean"] print(f"{agent_name:>12s} | {easy:>+10.2f} | {medium:>+10.2f} | {hard:>+10.2f}") print("=" * 80) output_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "results.json") with open(output_path, "w") as f: json.dump(results, f, indent=2) print(f"\nResults saved to {output_path}") return results if __name__ == "__main__": num_runs = int(sys.argv[1]) if len(sys.argv) > 1 else 3 run_evaluation(num_runs=num_runs)