"""Unit tests for the parsing and scoring helpers in inference.py.

These guard the two-line action-format contract that the trained policy
emits and the reward function consumes.
"""
from __future__ import annotations

import importlib.util
import os
import sys

ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
spec = importlib.util.spec_from_file_location("inference", os.path.join(ROOT, "inference.py"))
inf = importlib.util.module_from_spec(spec)
sys.modules["inference"] = inf
spec.loader.exec_module(inf)

OPTIONS = ["full cooperation", "limited disclosure", "seek legal delay"]


def test_parses_well_formed_two_line_completion():
    completion = "DECISION: full cooperation\nPITCH: A clean record protects long-term reputation."
    decision, pitch, ok = inf.parse_completion(completion, OPTIONS)
    assert decision == "full cooperation"
    assert "clean record" in pitch
    assert ok is True


def test_falls_back_to_substring_match_when_format_partially_broken():
    completion = "I think we should go with limited disclosure to manage risk."
    decision, _, ok = inf.parse_completion(completion, OPTIONS)
    assert decision == "limited disclosure"
    assert ok is False


def test_returns_first_option_when_completion_unintelligible():
    completion = "blah blah blah"
    decision, pitch, ok = inf.parse_completion(completion, OPTIONS)
    assert decision == OPTIONS[0]
    assert pitch == ""
    assert ok is False


def test_pitch_clipped_to_first_line():
    completion = "DECISION: seek legal delay\nPITCH: buying time preserves runway.\nExtra trailing line."
    _, pitch, ok = inf.parse_completion(completion, OPTIONS)
    assert "buying time preserves runway." in pitch
    assert "Extra trailing line" not in pitch
    assert ok is True


def test_keyword_pitch_score_rewards_role_alignment():
    cfo_text = "runway discipline and burn control protect compliance"
    cto_text = "engineering quality and team morale make velocity sustainable"
    assert inf.keyword_pitch_score(cfo_text, "CFO") >= 0.5
    assert inf.keyword_pitch_score(cto_text, "CTO") >= 0.5
    assert inf.keyword_pitch_score("totally unrelated text about cats", "CFO") == 0.0


def test_keyword_pitch_score_zero_on_empty_pitch():
    assert inf.keyword_pitch_score("", "CTO") == 0.0
    assert inf.keyword_pitch_score("   ", "CFO") == 0.0


def test_random_policy_act_returns_valid_option():
    class FakeObs:
        options = OPTIONS
        npc_statements = []
    pol = inf.RandomPolicy()
    decision, pitch, ok = pol.act(FakeObs())
    assert decision in OPTIONS
    assert pitch == ""
    assert ok is False


def test_run_summary_aggregates_metrics_correctly():
    eps = [
        inf.EpisodeMetrics(seed=1, total_reward=2.0, final_profitability=50.0,
                           survived=True, votes_won=7, votes_total=10,
                           pitches_written=4, avg_pitch_score=0.3),
        inf.EpisodeMetrics(seed=2, total_reward=4.0, final_profitability=70.0,
                           survived=True, votes_won=9, votes_total=10,
                           pitches_written=8, avg_pitch_score=0.5),
    ]
    s = inf.summarise("trained-test", eps)
    assert s.policy == "trained-test"
    assert s.n_episodes == 2
    assert abs(s.mean_reward - 3.0) < 1e-6
    assert abs(s.mean_profitability - 60.0) < 1e-6
    assert s.survival_rate == 1.0
    assert abs(s.win_rate_per_round - 0.8) < 1e-6
    assert abs(s.pitch_usage_rate - 0.6) < 1e-6
    assert abs(s.mean_pitch_score - 0.4) < 1e-6