Spaces:

Eshit
/

Wildfire-Containment-Simulator

Running

App Files Files Community

Wildfire-Containment-Simulator / scripts /evaluate.py

Eshit

Deploy to HF Space

363abf3 13 days ago

raw

history blame contribute delete

3.69 kB

	"""
	Wildfire Containment Simulator — Evaluation Script.

	Runs both agents (random + heuristic) on all 3 difficulty tiers,
	reports scores, and saves results to JSON.
	"""

	import json
	import sys
	import os
	import time

	sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), ".."))

	from agents.random_agent import RandomAgent
	from agents.heuristic_agent import HeuristicAgent
	from graders.grader_easy import grade as grade_easy
	from graders.grader_medium import grade as grade_medium
	from graders.grader_hard import grade as grade_hard


	def run_evaluation(num_runs: int = 5) -> dict:
	graders = {
	"easy": grade_easy,
	"medium": grade_medium,
	"hard": grade_hard,
	}

	agents = {
	"random": lambda seed: RandomAgent(seed=seed),
	"heuristic": lambda seed: HeuristicAgent(),
	}

	results = {}

	print("=" * 80)
	print("WILDFIRE CONTAINMENT SIMULATOR — Evaluation")
	print("=" * 80)
	print()

	for agent_name, agent_factory in agents.items():
	results[agent_name] = {}
	for tier_name, grader_fn in graders.items():
	scores = []
	detail_rows = []
	times = []

	for run in range(num_runs):
	seed = 42 + run
	agent = agent_factory(seed)

	start = time.time()
	score, details = grader_fn(agent, seed=seed)
	elapsed = time.time() - start

	scores.append(score)
	detail_rows.append(details)
	times.append(elapsed)

	mean_score = sum(scores) / len(scores)
	std_score = (sum((s - mean_score) 2 for s in scores) / len(scores)) 0.5
	mean_containment = sum(d["containment_pct"] for d in detail_rows) / len(detail_rows)
	mean_pop_saved = sum(d["pop_saved_pct"] for d in detail_rows) / len(detail_rows)
	mean_steps = sum(d["steps"] for d in detail_rows) / len(detail_rows)
	casualty_rate = sum(1 for d in detail_rows if d["crew_casualty"]) / len(detail_rows)

	results[agent_name][tier_name] = {
	"scores": [round(s, 4) for s in scores],
	"mean": round(mean_score, 4),
	"std": round(std_score, 4),
	"mean_containment_pct": round(mean_containment, 4),
	"mean_pop_saved_pct": round(mean_pop_saved, 4),
	"mean_steps": round(mean_steps, 1),
	"crew_casualty_rate": round(casualty_rate, 2),
	"mean_time_s": round(sum(times) / len(times), 3),
	}

	print(f" {agent_name:12s} \| {tier_name:8s} \| "
	f"reward={mean_score:+.2f}+-{std_score:.2f} \| "
	f"contain={mean_containment*100:.0f}% \| "
	f"pop_saved={mean_pop_saved*100:.0f}% \| "
	f"steps={mean_steps:.0f}")

	print()

	print("=" * 80)
	print(f"{'Agent':>12s} \| {'Easy':>10s} \| {'Medium':>10s} \| {'Hard':>10s}")
	print("-" * 80)
	for agent_name in agents:
	easy = results[agent_name]["easy"]["mean"]
	medium = results[agent_name]["medium"]["mean"]
	hard = results[agent_name]["hard"]["mean"]
	print(f"{agent_name:>12s} \| {easy:>+10.2f} \| {medium:>+10.2f} \| {hard:>+10.2f}")
	print("=" * 80)

	output_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "results.json")
	with open(output_path, "w") as f:
	json.dump(results, f, indent=2)
	print(f"\nResults saved to {output_path}")

	return results


	if __name__ == "__main__":
	num_runs = int(sys.argv[1]) if len(sys.argv) > 1 else 3
	run_evaluation(num_runs=num_runs)