"""
End-to-end test through ClarifyClient (the path TRL training actually uses).

Server stays a single env instance per WebSocket session; reset+step+state
all share state. Run uvicorn first, then this script.
"""

import json
import sys
from pathlib import Path

ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(ROOT))

from client import ClarifyClient


def main() -> int:
    env = ClarifyClient(base_url="http://127.0.0.1:7860").sync()
    with env:
        print("--- list tools ---")
        tools = env.list_tools()
        print([t.name for t in tools])

        print("\n--- reset(seed=7, task_id=medium) ---")
        obs = env.reset(seed=7, task_id="medium")
        print(f"reward={obs.reward}  done={obs.done}")
        print("observation:", obs.observation)

        print("\n--- ask_question: order id ---")
        result = env.call_tool("ask_question", question="what is the order id?")
        print("result:", result)

        print("\n--- ask_question: item issue ---")
        result = env.call_tool("ask_question", question="what's wrong with the order?")
        print("result:", result)

        print("\n--- ask_question: refund/replace ---")
        result = env.call_tool("ask_question", question="refund or replace?")
        print("result:", result)

        print("\n--- ask_question: urgency ---")
        result = env.call_tool("ask_question", question="when do you need this?")
        print("result:", result)

        print("\n--- propose_plan ---")
        plan = json.dumps({
            "order_id": "#4521",
            "item_issue": "wrong-item",
            "refund_or_replace": "replace",
            "urgency": "high",
        })
        result = env.call_tool("propose_plan", plan=plan)
        print("result:", result)

        print("\n--- final state ---")
        state = env.state()
        print(state)

    return 0


if __name__ == "__main__":
    sys.exit(main())