Spaces:

Eshit
/

Wildfire-Containment-Simulator

Running

App Files Files Community

Wildfire-Containment-Simulator / tests /test_reward.py

Eshit

Deploy to HF Space

363abf3 about 1 month ago

raw

history blame contribute delete

3.04 kB

	from env import WildfireEnv
	from env.models import Action, ActionType
	from env.reward import RewardCalculator
	from env.models import TIER_EASY
	from agents.heuristic_agent import HeuristicAgent


	def test_successful_episode_scores_high(fresh_env):
	agent = HeuristicAgent()
	obs = fresh_env.reset(task_id="easy", seed=42)
	total_reward = 0.0
	done = False
	while not done:
	action = agent.act(obs)
	result = fresh_env.step(action)
	total_reward += result.reward
	obs = result.observation
	done = result.done
	assert total_reward > 3.0, f"Expected > 3.0, got {total_reward:.3f}"


	def test_all_pop_lost_scores_negative():
	calc = RewardCalculator(TIER_EASY)
	final_state = {
	"containment_pct": 0.0,
	"pop_lost": 100,
	"total_pop": 100,
	"crew_casualty_occurred": False,
	"invalid_action_count": 0,
	}
	terminal = calc.compute_terminal_reward(final_state, episode_steps=80, max_steps=80)
	assert terminal < -2.0, f"Expected < -2.0, got {terminal:.3f}"


	def test_crew_casualty_stacks():
	calc = RewardCalculator(TIER_EASY)
	# pop loss AND crew casualty
	final_state = {
	"containment_pct": 0.0,
	"pop_lost": 50,
	"total_pop": 100,
	"crew_casualty_occurred": True,
	"invalid_action_count": 0,
	}
	terminal = calc.compute_terminal_reward(final_state, episode_steps=80, max_steps=80)
	# -3.0*(0.5) for pop loss = -1.5, -2.0 for casualty = -3.5 total
	assert terminal < -3.0, f"Expected < -3.0 (both penalties stacked), got {terminal:.3f}"


	def test_redundant_action_penalty(fresh_env):
	obs = fresh_env.reset(task_id="easy", seed=42)
	rows = len(obs.grid)
	cols = len(obs.grid[0])
	tr, tc = rows // 2, cols // 2

	# First deploy — not redundant
	result1 = fresh_env.step(Action(
	action_type=ActionType.DEPLOY_CREW,
	crew_id="crew_0",
	target_row=tr,
	target_col=tc,
	))

	# Same action again — redundant, step reward should include -0.1 penalty
	result2 = fresh_env.step(Action(
	action_type=ActionType.DEPLOY_CREW,
	crew_id="crew_0",
	target_row=tr,
	target_col=tc,
	))

	# The non-terminal step reward for the redundant action must be at least -0.1
	# lower than it would be without the penalty. We can't isolate it perfectly,
	# but we can verify the redundancy flag is wired by checking the env directly.
	assert result2 is not None # basic smoke check

	# Direct unit test on compute_step_reward
	from env.reward import RewardCalculator
	from env.models import TIER_EASY
	calc = RewardCalculator(TIER_EASY)
	state = {"containment_pct": 0.5, "pop_lost": 0, "total_pop": 10}
	reward_normal = calc.compute_step_reward(state, state, True, False)
	reward_redundant = calc.compute_step_reward(state, state, True, True)
	assert reward_redundant == reward_normal - 0.1, (
	f"Redundant penalty missing: {reward_normal:.3f} vs {reward_redundant:.3f}"
	)