"""SchemaShiftEnvironment acceptance tests — Phase 6 (end-to-end RL loop)."""
from __future__ import annotations

import pytest

from models import (
    Action,
    CompleteParams,
    DriftReportParams,
    InspectParams,
    RetryParams,
    ToolCallParams,
)
from server.environment import SchemaShiftEnvironment


# ──────────────────────────────────────────────────────────────────
# Test 1 — Round 1 bug prevention
# ──────────────────────────────────────────────────────────────────

def test_step_before_reset_raises() -> None:
    env = SchemaShiftEnvironment()
    action = Action(type="complete_task", complete=CompleteParams(summary="noop"))
    with pytest.raises(RuntimeError) as excinfo:
        env.step(action)
    assert "reset" in str(excinfo.value).lower()


# ──────────────────────────────────────────────────────────────────
# Test 2 — reset returns a valid observation
# ──────────────────────────────────────────────────────────────────

def test_reset_returns_valid_observation() -> None:
    env = SchemaShiftEnvironment()
    obs = env.reset("E1_onboard_new_hire")
    assert obs.task_id == "E1_onboard_new_hire"
    assert obs.step == 0
    assert obs.max_steps == 8
    assert "mail" in obs.tool_schemas
    assert "calendar" in obs.tool_schemas
    assert "crm" not in obs.tool_schemas
    assert obs.done is False
    assert obs.difficulty == "easy"
    assert len(obs.success_criteria) >= 1


# ──────────────────────────────────────────────────────────────────
# Test 3 — reset on unknown task raises
# ──────────────────────────────────────────────────────────────────

def test_reset_unknown_task_raises() -> None:
    env = SchemaShiftEnvironment()
    with pytest.raises(ValueError):
        env.reset("nonexistent_task")


# ──────────────────────────────────────────────────────────────────
# Test 4 — call_tool success updates agent_state
# ──────────────────────────────────────────────────────────────────

def test_call_tool_success_updates_state() -> None:
    env = SchemaShiftEnvironment()
    env.reset("E1_onboard_new_hire")

    obs, reward = env.step(Action(
        type="call_tool",
        tool_call=ToolCallParams(
            tool="mail",
            endpoint="send_message",
            params={
                "to": "priya@company.com",
                "subject": "Welcome!",
                "body": "Welcome to the team.",
            },
        ),
    ))
    assert obs.last_response is not None
    assert obs.last_response.ok is True
    assert obs.known_state["mail.sent_count"] == 1
    assert obs.known_state["mail.last_sent_to"] == "priya@company.com"
    assert obs.known_state["mail.last_subject_contains_welcome"] is True


# ──────────────────────────────────────────────────────────────────
# Test 5 — FULL E1 episode with drift → inspect → retry → report → complete
# ──────────────────────────────────────────────────────────────────

def test_e1_full_episode_with_adaptation() -> None:
    env = SchemaShiftEnvironment()
    env.reset("E1_onboard_new_hire")

    # Step 1: send welcome email (pre-drift)
    obs, r1 = env.step(Action(
        type="call_tool",
        tool_call=ToolCallParams(
            tool="mail", endpoint="send_message",
            params={"to": "priya@company.com", "subject": "Welcome aboard!",
                    "body": "Welcome to the team."},
        ),
    ))
    assert obs.last_response is not None and obs.last_response.ok is True

    # Step 2: inspect calendar (pre-drift)
    obs, r2 = env.step(Action(
        type="inspect_schema", inspect=InspectParams(tool="calendar"),
    ))
    assert obs.last_response is not None and obs.last_response.ok is True

    # Step 3: drift fires at state.step=3; call with stale attendees fails
    obs, r3 = env.step(Action(
        type="call_tool",
        tool_call=ToolCallParams(
            tool="calendar", endpoint="create_event",
            params={"title": "New Hire Orientation",
                    "start": "2026-04-27T10:00:00Z",
                    "end": "2026-04-27T11:00:00Z",
                    "attendees": ["priya@company.com", "alex@company.com"]},
        ),
    ))
    assert obs.last_response is not None and obs.last_response.ok is False

    # Step 4: inspect calendar (now shows participants schema)
    obs, r4 = env.step(Action(
        type="inspect_schema", inspect=InspectParams(tool="calendar"),
    ))
    assert obs.last_response is not None and obs.last_response.ok is True
    cal_schema = obs.tool_schemas["calendar"]["create_event"]
    assert "participants" in cal_schema["params"]

    # Step 5: retry with participants format
    obs, r5 = env.step(Action(
        type="retry_with_variant",
        retry=RetryParams(
            tool="calendar", endpoint="create_event",
            params={"title": "New Hire Orientation",
                    "start": "2026-04-27T10:00:00Z",
                    "end": "2026-04-27T11:00:00Z",
                    "participants": [
                        {"email": "priya@company.com", "role": "required"},
                        {"email": "alex@company.com", "role": "required"},
                    ]},
        ),
    ))
    assert obs.last_response is not None and obs.last_response.ok is True

    # Step 6: report drift
    obs, r6 = env.step(Action(
        type="report_drift",
        report=DriftReportParams(
            tool="calendar", drift_kind="field_rename",
            description="create_event attendees renamed to participants",
        ),
    ))

    # Step 7: complete
    obs, r7 = env.step(Action(
        type="complete_task",
        complete=CompleteParams(
            summary="Onboarded Priya with welcome email and orientation event.",
        ),
    ))

    state = env._state
    assert state is not None
    assert obs.done is True
    assert state.agent_state["mail.sent_count"] == 1
    assert state.agent_state["calendar.events_count"] == 1
    assert state.agent_state["calendar.last_event_has_both_attendees"] is True
    assert state.drift_plan[0].detected_by_agent is True
    assert r7.task_completion == 1.0
    assert r7.drift_detection == 1.0
    assert r7.adaptation_quality == 1.0
    assert r7.shaped_total > 0.5
    assert r7.binary == 1.0


# ──────────────────────────────────────────────────────────────────
# Test 6 — max_steps terminates episode
# ──────────────────────────────────────────────────────────────────

def test_max_steps_terminates_episode() -> None:
    env = SchemaShiftEnvironment()
    env.reset("E2_meeting_invite_blast")
    inspect = Action(type="inspect_schema", inspect=InspectParams(tool="mail"))
    obs = None
    for _ in range(6):
        obs, _ = env.step(inspect)
    assert obs is not None
    assert obs.done is True
    assert obs.step == 6


# ──────────────────────────────────────────────────────────────────
# Test 7 — step_shaping +0.10 for inspect after failure
# ──────────────────────────────────────────────────────────────────

def test_step_shaping_applied_correctly() -> None:
    env = SchemaShiftEnvironment()
    env.reset("E1_onboard_new_hire")

    # Step 1: send_message missing required 'body' → 400
    env.step(Action(
        type="call_tool",
        tool_call=ToolCallParams(
            tool="mail", endpoint="send_message",
            params={"to": "x@y.com", "subject": "hi"},
        ),
    ))

    # Step 2: inspect_schema after failure → +0.10 shaping
    obs, reward = env.step(Action(
        type="inspect_schema", inspect=InspectParams(tool="mail"),
    ))
    assert reward.step_shaping == pytest.approx(0.10)


# ──────────────────────────────────────────────────────────────────
# Test 8 — dumb retry penalty
# ──────────────────────────────────────────────────────────────────

def test_dumb_retry_penalty() -> None:
    env = SchemaShiftEnvironment()
    env.reset("E1_onboard_new_hire")

    # Step 1: call_tool mail.send_message with only {"to": "x"} → 400
    env.step(Action(
        type="call_tool",
        tool_call=ToolCallParams(
            tool="mail", endpoint="send_message",
            params={"to": "x@y.com"},
        ),
    ))

    # Step 2: same call again → dumb retry → -0.05 penalty
    obs, reward = env.step(Action(
        type="call_tool",
        tool_call=ToolCallParams(
            tool="mail", endpoint="send_message",
            params={"to": "x@y.com"},
        ),
    ))
    assert reward.step_shaping == pytest.approx(-0.05)