from env import WildfireEnv
from env.models import Action, ActionType
from env.reward import RewardCalculator
from env.models import TIER_EASY
from agents.heuristic_agent import HeuristicAgent


def test_successful_episode_scores_high(fresh_env):
    agent = HeuristicAgent()
    obs = fresh_env.reset(task_id="easy", seed=42)
    total_reward = 0.0
    done = False
    while not done:
        action = agent.act(obs)
        result = fresh_env.step(action)
        total_reward += result.reward
        obs = result.observation
        done = result.done
    assert total_reward > 3.0, f"Expected > 3.0, got {total_reward:.3f}"


def test_all_pop_lost_scores_negative():
    calc = RewardCalculator(TIER_EASY)
    final_state = {
        "containment_pct": 0.0,
        "pop_lost": 100,
        "total_pop": 100,
        "crew_casualty_occurred": False,
        "invalid_action_count": 0,
    }
    terminal = calc.compute_terminal_reward(final_state, episode_steps=80, max_steps=80)
    assert terminal < -2.0, f"Expected < -2.0, got {terminal:.3f}"


def test_crew_casualty_stacks():
    calc = RewardCalculator(TIER_EASY)
    # pop loss AND crew casualty
    final_state = {
        "containment_pct": 0.0,
        "pop_lost": 50,
        "total_pop": 100,
        "crew_casualty_occurred": True,
        "invalid_action_count": 0,
    }
    terminal = calc.compute_terminal_reward(final_state, episode_steps=80, max_steps=80)
    # -3.0*(0.5) for pop loss = -1.5, -2.0 for casualty = -3.5 total
    assert terminal < -3.0, f"Expected < -3.0 (both penalties stacked), got {terminal:.3f}"


def test_redundant_action_penalty(fresh_env):
    obs = fresh_env.reset(task_id="easy", seed=42)
    rows = len(obs.grid)
    cols = len(obs.grid[0])
    tr, tc = rows // 2, cols // 2

    # First deploy — not redundant
    result1 = fresh_env.step(Action(
        action_type=ActionType.DEPLOY_CREW,
        crew_id="crew_0",
        target_row=tr,
        target_col=tc,
    ))

    # Same action again — redundant, step reward should include -0.1 penalty
    result2 = fresh_env.step(Action(
        action_type=ActionType.DEPLOY_CREW,
        crew_id="crew_0",
        target_row=tr,
        target_col=tc,
    ))

    # The non-terminal step reward for the redundant action must be at least -0.1
    # lower than it would be without the penalty. We can't isolate it perfectly,
    # but we can verify the redundancy flag is wired by checking the env directly.
    assert result2 is not None  # basic smoke check

    # Direct unit test on compute_step_reward
    from env.reward import RewardCalculator
    from env.models import TIER_EASY
    calc = RewardCalculator(TIER_EASY)
    state = {"containment_pct": 0.5, "pop_lost": 0, "total_pop": 10}
    reward_normal = calc.compute_step_reward(state, state, True, False)
    reward_redundant = calc.compute_step_reward(state, state, True, True)
    assert reward_redundant == reward_normal - 0.1, (
        f"Redundant penalty missing: {reward_normal:.3f} vs {reward_redundant:.3f}"
    )