import pytest from support_ticket_env import SupportTicketEnvironment from support_ticket_env.models import LookupAccountAction, SearchKBAction, SendReplyAction def test_search_returns_expected_article_and_progress_reward() -> None: env = SupportTicketEnvironment() env.reset("password_reset_guidance") result = env.step(SearchKBAction(action_type="search_kb", query="password reset email not arriving")) assert result.observation.last_tool_result.tool_name == "search_kb" assert result.observation.last_tool_result.article_ids[0] == "KB-PW-RESET" assert result.reward == pytest.approx(0.19) def test_lookup_populates_account_facts() -> None: env = SupportTicketEnvironment() env.reset("duplicate_charge_refund") result = env.step(LookupAccountAction(action_type="lookup_account", customer_id="cust_bill_002")) account = result.observation.known_facts["account"] assert account["plan"] == "Business" assert account["duplicate_charge_amount_cents"] == 4900 def test_redundant_search_is_penalized() -> None: env = SupportTicketEnvironment() env.reset("password_reset_guidance") env.step(SearchKBAction(action_type="search_kb", query="password reset email not arriving")) result = env.step(SearchKBAction(action_type="search_kb", query="password reset email not arriving")) assert result.reward == pytest.approx(-0.03) assert result.info["redundancy_penalty"] == pytest.approx(0.02) def test_refund_before_lookup_is_invalid() -> None: env = SupportTicketEnvironment() env.reset("duplicate_charge_refund") result = env.step({"action_type": "issue_refund", "amount_cents": 4900, "reason_code": "duplicate_charge"}) assert result.observation.last_action_error == "lookup_required_before_refund" assert result.reward == pytest.approx(-0.11) def test_reset_clears_previous_state() -> None: env = SupportTicketEnvironment() env.reset("password_reset_guidance") env.step(SearchKBAction(action_type="search_kb", query="password reset email not arriving")) result = env.reset("password_reset_guidance") assert result.observation.steps_taken == 0 assert result.observation.known_facts == {} assert len(result.observation.conversation_history) == 1 def test_max_steps_timeout_is_deterministic() -> None: env = SupportTicketEnvironment() result = env.reset("password_reset_guidance") for _ in range(8): result = env.step(SendReplyAction(action_type="send_reply", message="Still investigating.")) assert result.done is True assert result.info["terminal_reason"] == "max_steps_exceeded"