| from env import WildfireEnv |
| from env.models import Action, ActionType |
| from env.reward import RewardCalculator |
| from env.models import TIER_EASY |
| from agents.heuristic_agent import HeuristicAgent |
|
|
|
|
| def test_successful_episode_scores_high(fresh_env): |
| agent = HeuristicAgent() |
| obs = fresh_env.reset(task_id="easy", seed=42) |
| total_reward = 0.0 |
| done = False |
| while not done: |
| action = agent.act(obs) |
| result = fresh_env.step(action) |
| total_reward += result.reward |
| obs = result.observation |
| done = result.done |
| assert total_reward > 3.0, f"Expected > 3.0, got {total_reward:.3f}" |
|
|
|
|
| def test_all_pop_lost_scores_negative(): |
| calc = RewardCalculator(TIER_EASY) |
| final_state = { |
| "containment_pct": 0.0, |
| "pop_lost": 100, |
| "total_pop": 100, |
| "crew_casualty_occurred": False, |
| "invalid_action_count": 0, |
| } |
| terminal = calc.compute_terminal_reward(final_state, episode_steps=80, max_steps=80) |
| assert terminal < -2.0, f"Expected < -2.0, got {terminal:.3f}" |
|
|
|
|
| def test_crew_casualty_stacks(): |
| calc = RewardCalculator(TIER_EASY) |
| |
| final_state = { |
| "containment_pct": 0.0, |
| "pop_lost": 50, |
| "total_pop": 100, |
| "crew_casualty_occurred": True, |
| "invalid_action_count": 0, |
| } |
| terminal = calc.compute_terminal_reward(final_state, episode_steps=80, max_steps=80) |
| |
| assert terminal < -3.0, f"Expected < -3.0 (both penalties stacked), got {terminal:.3f}" |
|
|
|
|
| def test_redundant_action_penalty(fresh_env): |
| obs = fresh_env.reset(task_id="easy", seed=42) |
| rows = len(obs.grid) |
| cols = len(obs.grid[0]) |
| tr, tc = rows // 2, cols // 2 |
|
|
| |
| result1 = fresh_env.step(Action( |
| action_type=ActionType.DEPLOY_CREW, |
| crew_id="crew_0", |
| target_row=tr, |
| target_col=tc, |
| )) |
|
|
| |
| result2 = fresh_env.step(Action( |
| action_type=ActionType.DEPLOY_CREW, |
| crew_id="crew_0", |
| target_row=tr, |
| target_col=tc, |
| )) |
|
|
| |
| |
| |
| assert result2 is not None |
|
|
| |
| from env.reward import RewardCalculator |
| from env.models import TIER_EASY |
| calc = RewardCalculator(TIER_EASY) |
| state = {"containment_pct": 0.5, "pop_lost": 0, "total_pop": 10} |
| reward_normal = calc.compute_step_reward(state, state, True, False) |
| reward_redundant = calc.compute_step_reward(state, state, True, True) |
| assert reward_redundant == reward_normal - 0.1, ( |
| f"Redundant penalty missing: {reward_normal:.3f} vs {reward_redundant:.3f}" |
| ) |
|
|