"""
Quick smoke test — run locally before pushing to HF Spaces.
Tests: reset, step through full episode, crisis triggers, reward signals.

Usage:
    python tests/test_env.py
"""

import sys
import os
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from revops_gym.env import RevOpsEnv
from revops_gym.models import RevOpsAction


def test_episode(difficulty="normal", seed=42):
    print(f"\n=== Smoke test | difficulty={difficulty} seed={seed} ===")
    env = RevOpsEnv(crisis_every=3, seed=seed, difficulty=difficulty)
    obs = env.reset(seed=seed)
    assert obs.step_number == 0, "Reset should start at step 0"
    assert obs.mrr > 0, "MRR should be positive after reset"
    print(env.render())

    actions = [
        ("increase_marketing", 0.6),
        ("hire_support", 0.8),
        ("negotiate_contracts", 0.5),
        ("raise_prices", 0.4),
        ("feature_investment", 0.7),
        ("cut_costs", 0.3),
        ("discount_campaign", 0.5),
        ("increase_marketing", 0.7),
        ("hire_support", 0.5),
        ("pivot_segment", 0.6),
    ]

    rewards = []
    crises_seen = []
    for i, (action_type, magnitude) in enumerate(actions):
        obs = env.step({"action_type": action_type, "magnitude": magnitude})
        rewards.append(obs.reward_last_step)
        if obs.active_crisis != "NONE":
            crises_seen.append(obs.active_crisis)
        print(
            f"  Step {obs.step_number:2d} | {action_type:<22} mag={magnitude} "
            f"| reward={obs.reward_last_step:+.3f} | MRR=${obs.mrr:,.0f} "
            f"| LTV/CAC={obs.ltv_cac_ratio:.2f}x"
            + (f" | ⚠️ {obs.active_crisis}" if obs.active_crisis != "NONE" else "")
        )
        if obs.terminated or obs.truncated:
            print(f"\n  Episode ended at step {obs.step_number} "
                  f"({'terminated' if obs.terminated else 'truncated'})")
            break

    print(f"\n  Total steps: {obs.step_number}")
    print(f"  Mean reward: {sum(rewards)/len(rewards):.4f}")
    print(f"  Min reward:  {min(rewards):.4f}")
    print(f"  Max reward:  {max(rewards):.4f}")
    print(f"  Crises seen: {crises_seen or ['none triggered yet']}")
    assert len(rewards) > 0, "Should have at least one reward"
    print("\n✅ Smoke test passed!")
    return True


def test_all_actions():
    print("\n=== Testing all action types ===")
    env = RevOpsEnv(seed=0)
    env.reset(seed=0)
    all_actions = [
        "increase_marketing", "decrease_marketing", "hire_support",
        "fire_support", "discount_campaign", "raise_prices",
        "feature_investment", "cut_costs", "negotiate_contracts", "pivot_segment",
    ]
    for action in all_actions:
        obs = env.step({"action_type": action, "magnitude": 0.5})
        assert obs.reward_last_step is not None
        print(f"  ✓ {action:<24} reward={obs.reward_last_step:+.3f}")
    print("✅ All actions tested!")


def test_termination():
    print("\n=== Testing termination conditions ===")
    from revops_gym.models import RevOpsState
    from revops_gym.reward import RewardRubric
    rubric = RewardRubric()

    # MRR below floor
    state = RevOpsState(mrr=5_000, step_number=5)
    assert state.is_terminal, "Should terminate when MRR < floor"
    rb = rubric.compute(state, terminated=True)
    assert rb.terminated_penalty == -2.0, "Should get termination penalty"
    print("  ✓ MRR floor termination works")

    # Max steps
    state2 = RevOpsState(mrr=100_000, step_number=30)
    assert state2.is_terminal, "Should truncate at step 30"
    print("  ✓ Step limit truncation works")
    print("✅ Termination tests passed!")


if __name__ == "__main__":
    test_episode(difficulty="easy")
    test_episode(difficulty="normal")
    test_episode(difficulty="hard")
    test_all_actions()
    test_termination()
    print("\n🎉 All tests passed! Ready to push to HF Spaces.")