"""
MCP ReAct Agent - Enhanced Generalist

Key improvements over v6:
  - Richer system prompt with strategy patterns for different game types
  - Stuck detection + automatic recovery (suggest_exploration, try new verbs)
  - Smarter history: shows failed actions to avoid repetition
  - Exit registration from game text (auto-detects mentioned directions)
  - Multi-phase play: explore → collect → solve → backtrack
  - Robust parsing with multiple fallback strategies
"""

import json
import os
import re
from dataclasses import dataclass, field
from typing import Optional

from dotenv import load_dotenv
from huggingface_hub import InferenceClient

# Load environment variables
load_dotenv()

# Set USE_LOCAL_MODEL=1 in your .env to use a locally downloaded model
USE_LOCAL_MODEL = os.getenv("USE_LOCAL_MODEL", "0").strip() in ("1", "true", "yes")
LOCAL_MODEL_ID = os.getenv("LOCAL_MODEL_ID", "Qwen/Qwen2.5-3B-Instruct")

# =============================================================================
# LLM Configuration - DO NOT MODIFY
# =============================================================================

# Model to use (fixed for fair evaluation)
LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"

# Initialize the LLM client based on mode
_local_pipeline = None

if USE_LOCAL_MODEL:
    import torch
    from transformers import pipeline as _hf_pipeline

    _local_pipeline = _hf_pipeline(
        "text-generation",
        model=LOCAL_MODEL_ID,
        torch_dtype=torch.bfloat16,
        device_map="auto",
    )
    LLM_CLIENT = None
else:
    _hf_token = os.getenv("HF_TOKEN")
    if not _hf_token:
        raise ValueError("HF_TOKEN not found. Set it in your .env file.")
    LLM_CLIENT = InferenceClient(token=_hf_token)


def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str:
    """
    Call the LLM with the given prompt. Use this function in your agent.

    Args:
        prompt: The user prompt (current game state, history, etc.)
        system_prompt: The system prompt (instructions for the agent)
        seed: Random seed for reproducibility
        max_tokens: Maximum tokens in response (default: 300)

    Returns:
        The LLM's response text
    """
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": prompt},
    ]

    if USE_LOCAL_MODEL and _local_pipeline is not None:
        outputs = _local_pipeline(
            messages,
            max_new_tokens=max_tokens,
            temperature=0.0001,  # Near-deterministic (0.0 unsupported by some backends)
            do_sample=True,
        )
        return outputs[0]["generated_text"][-1]["content"]

    response = LLM_CLIENT.chat.completions.create(
        model=LLM_MODEL,
        messages=messages,
        temperature=0.0,  # Deterministic for reproducibility
        max_tokens=max_tokens,
        seed=seed,
    )

    return response.choices[0].message.content


@dataclass
class RunResult:
    """Result of running the agent. Do not modify this class."""
    final_score: int
    max_score: int
    moves: int
    locations_visited: set[str]
    game_completed: bool
    error: Optional[str] = None
    history: list[tuple[str, str, str]] = field(default_factory=list)


# ─── System Prompt ─────────────────────────────────────────────────────────────
SYSTEM_PROMPT = """You are an expert text adventure game player. You are methodical, curious, and never give up.

AVAILABLE TOOLS:
- play_action: Send a command to the game.
  ARGS: {"action": "your command"}
  For movement use direction words: north, south, east, west, up, down, in, out, ne, nw, se, sw
  For interactions: examine <thing>, take <item>, drop <item>, open <thing>, close <thing>,
    read <thing>, push <thing>, pull <thing>, turn <thing>, light <thing>, put <item> in <container>,
    unlock <door> with <key>, give <item> to <npc>, attack <enemy> with <weapon>, tie <item> to <thing>,
    climb <thing>, enter <thing>, search <thing>, listen, smell, wave <item>, eat <item>, drink <item>

- think: Plan your strategy. ARGS: {"goal": "...", "thought": "..."}

- notebook_write: Save clues, codes, puzzle info permanently.
  ARGS: {"text": "...", "category": "Clue|Puzzle|Item|Danger|NPC|Code|Goal|Map"}

- notebook_read: Read your saved notes. ARGS: {"keyword": "optional filter"}

- memory: Full status dump (location, inventory, notes, map). ARGS: {}

- get_map: View explored map and unexplored exits. ARGS: {}

- find_path: Get directions to a known room. ARGS: {"target_room": "room name"}

- suggest_exploration: Get suggestion for nearest unexplored area. ARGS: {}

- register_exits: Record exits visible in current room.
  ARGS: {"directions": "north, south, up"}

STRATEGY — How to play well:
1. EXPLORE SYSTEMATICALLY: When you enter a new room, ALWAYS do "look" first, then register visible exits with register_exits. Explore every exit.
2. EXAMINE EVERYTHING: If the game describes objects, furniture, or features — examine them. Things hide under rugs, inside containers, behind paintings.
3. TAKE EVERYTHING: Collect all portable items. You'll need them later for puzzles.
4. READ CAREFULLY: The game text contains ALL clues. Unusual descriptions often hint at puzzles.
5. SAVE CLUES: If you notice a code, inscription, locked door, NPC request, or puzzle — write it in notebook_write immediately.
6. DON'T REPEAT FAILURES: Check your recent history. If a command didn't work, try a DIFFERENT approach. Use synonyms: get/take, look/examine, push/move.
7. BACKTRACK SMARTLY: If stuck, call suggest_exploration to find unexplored exits, or find_path to return to a room with unsolved puzzles.
8. USE ITEMS: When you have items and encounter obstacles, think about which item might help. Try "use X", "put X in Y", "unlock Y with X".
9. LISTEN AND SEARCH: "listen", "search", "look under X", "look behind X" often reveal hidden things.
10. CHECK SCORE: If your score increases, you're making progress. If not for a while, try a new area.

RESPONSE FORMAT (strict):
THOUGHT: <brief reasoning about what you observe and your plan>
TOOL: <exactly one tool name>
ARGS: <valid JSON for that tool>

Example:
THOUGHT: I see a rusty door to the north and a brass lamp on the ground. I should take the lamp first.
TOOL: play_action
ARGS: {"action": "take lamp"}"""


# ─── Directions mentioned in text ──────────────────────────────────────────────
EXIT_PATTERN = re.compile(
    r"\b(north|south|east|west|up|down|northeast|northwest|southeast|southwest)\b",
    re.IGNORECASE,
)

DIRECTION_SET = {
    "n",
    "s",
    "e",
    "w",
    "u",
    "d",
    "ne",
    "nw",
    "se",
    "sw",
    "north",
    "south",
    "east",
    "west",
    "up",
    "down",
    "northeast",
    "northwest",
    "southeast",
    "southwest",
    "in",
    "out",
    "enter",
    "exit",
}


class StudentAgent:
    def __init__(self):
        self.history: list[dict] = []
        self.score: int = 0
        self.max_score: int = 0
        self.location: str = "Unknown"
        self.locations_visited: set[str] = set()
        self.failed_actions: set[str] = set()  # track "location:action" that failed
        self.consecutive_no_score: int = 0
        self.last_score: int = 0

    async def run(
        self, client, game: str, max_steps: int, seed: int, verbose: bool = False
    ) -> RunResult:
        tools = await client.list_tools()
        tool_names = [t.name for t in tools]

        # Initial look
        result = await client.call_tool("play_action", {"action": "look"})
        observation = self._extract_result(result)
        self._update_state(observation)

        # Register initial exits
        exits = self._detect_exits(observation)
        if exits:
            try:
                await client.call_tool(
                    "register_exits", {"directions": ", ".join(exits)}
                )
            except Exception:
                pass

        if verbose:
            print(f"\n{'=' * 60}\nINITIAL OBSERVATION:\n{observation}\n{'=' * 60}")

        step = 0
        for step in range(1, max_steps + 1):
            prompt = self._build_prompt(observation, step)
            response = call_llm(prompt, SYSTEM_PROMPT, seed + step, max_tokens=400)
            thought, tool_name, tool_args = self._parse_response(response, tool_names)

            if verbose:
                print(f"\n--- Step {step} ---")
                print(f"  THOUGHT: {thought}")
                print(f"  TOOL: {tool_name}({json.dumps(tool_args)})")

            try:
                result = await client.call_tool(tool_name, tool_args)
                observation = self._extract_result(result)
            except Exception as e:
                observation = f"Error: {e}"

            if verbose:
                obs_preview = observation[:400].replace("\n", "\n    ")
                print(f"  RESULT: {obs_preview}")

            self._update_state(observation)

            # Auto-register exits when we get a play_action result
            if tool_name == "play_action":
                exits = self._detect_exits(observation)
                if exits:
                    try:
                        await client.call_tool(
                            "register_exits", {"directions": ", ".join(exits)}
                        )
                    except Exception:
                        pass

                # Track failed movement
                action = tool_args.get("action", "").lower()
                if self._is_failure(observation):
                    self.failed_actions.add(f"{self.location}:{action}")

            # Track score progress
            if self.score > self.last_score:
                self.consecutive_no_score = 0
                self.last_score = self.score
            else:
                self.consecutive_no_score += 1

            self.history.append(
                {
                    "step": step,
                    "thought": thought,
                    "tool": tool_name,
                    "args": tool_args,
                    "result": observation[:200],
                    "location": self.location,
                    "score": self.score,
                }
            )

            if self._is_game_over(observation):
                break

        return RunResult(
            final_score=self.score,
            max_score=self.max_score,
            moves=step,
            locations_visited=self.locations_visited,
            game_completed=self._is_game_over(observation),
            error=None,
            history=[
                (h["tool"], json.dumps(h["args"]), h["result"]) for h in self.history
            ],
        )

    def _build_prompt(self, observation: str, step: int) -> str:
        parts = []

        # Status line
        parts.append(
            f"[Step {step} | Score: {self.score}/{self.max_score} | "
            f"Location: {self.location} | Rooms visited: {len(self.locations_visited)}]"
        )

        # Recent history (last 7 for better context)
        if self.history:
            parts.append("\nRecent history:")
            for h in self.history[-7:]:
                action_str = json.dumps(h["args"])
                loc = h.get("location", "?")
                result_short = h["result"].replace("\n", " ")[:80]
                parts.append(f"  [{loc}] {h['tool']}({action_str}) -> {result_short}")

        # Failed actions at current location (helps avoid repetition)
        loc_failures = [
            a.split(":", 1)[1]
            for a in self.failed_actions
            if a.startswith(f"{self.location}:")
        ]
        if loc_failures:
            parts.append(f"\nActions that FAILED here: {', '.join(loc_failures)}")

        # Stuck hint
        if self.consecutive_no_score > 8:
            parts.append(
                "\n[HINT: Score hasn't changed in a while. Consider: "
                "call suggest_exploration, check memory, examine objects more carefully, "
                "or try using inventory items on things you've seen.]"
            )

        # Current game output
        parts.append(f"\nGame output:\n{observation}")
        parts.append("\nWhat do you do next?")

        return "\n".join(parts)

    def _parse_response(
        self, response: str, valid_tools: list[str]
    ) -> tuple[str, str, dict]:
        thought = "..."
        tool_name = "play_action"
        tool_args = {"action": "look"}

        lines = response.split("\n")
        args_lines = []
        collecting_args = False

        for line in lines:
            clean = line.strip()
            up = clean.upper()

            if up.startswith("THOUGHT:"):
                thought = clean.split(":", 1)[1].strip()
                collecting_args = False
            elif up.startswith("TOOL:"):
                raw_tool = clean.split(":", 1)[1].strip().lower().strip("`").strip()
                # Handle common LLM mistakes
                raw_tool = raw_tool.replace(" ", "_")
                if raw_tool in valid_tools:
                    tool_name = raw_tool
                elif "play" in raw_tool or "action" in raw_tool:
                    tool_name = "play_action"
                elif "note" in raw_tool and "write" in raw_tool:
                    tool_name = "notebook_write"
                elif "note" in raw_tool and "read" in raw_tool:
                    tool_name = "notebook_read"
                elif "note" in raw_tool:
                    tool_name = "notebook_write"
                elif "map" in raw_tool:
                    tool_name = "get_map"
                elif "path" in raw_tool:
                    tool_name = "find_path"
                elif "suggest" in raw_tool or "explor" in raw_tool:
                    tool_name = "suggest_exploration"
                elif "register" in raw_tool or "exit" in raw_tool:
                    tool_name = "register_exits"
                collecting_args = False
            elif up.startswith("ARGS:"):
                raw = clean.split(":", 1)[1].strip()
                args_lines = [raw]
                collecting_args = True
            elif collecting_args and clean:
                args_lines.append(clean)

        # Parse ARGS
        if args_lines:
            raw_args = " ".join(args_lines)
            # Try direct JSON parse
            try:
                tool_args = json.loads(raw_args)
            except json.JSONDecodeError:
                # Try extracting JSON object
                m = re.search(r"\{[^{}]+\}", raw_args)
                if m:
                    try:
                        tool_args = json.loads(m.group())
                    except json.JSONDecodeError:
                        pass
                # Fallback: try extracting action string
                if tool_name == "play_action":
                    m = re.search(r'"action"\s*:\s*"([^"]+)"', raw_args)
                    if m:
                        tool_args = {"action": m.group(1)}

        # ─── Fix play_action args ───
        if tool_name == "play_action":
            action = str(tool_args.get("action", "")).strip()
            # Merge split args (action + target/object)
            for extra_key in ("target", "object", "item", "direction"):
                extra = str(tool_args.get(extra_key, "")).strip()
                if extra and extra.lower() not in action.lower():
                    action = f"{action} {extra}".strip()

            # Strip "go " prefix for bare directions
            if action.lower().startswith("go "):
                rest = action[3:].strip().lower()
                if rest in DIRECTION_SET:
                    action = rest

            tool_args = {"action": action or "look"}

        # ─── Fix find_path args ───
        if tool_name == "find_path":
            # Normalize: the tool expects "target_room" not "to" or "room"
            for key in ("to", "room", "destination", "target"):
                if key in tool_args and "target_room" not in tool_args:
                    tool_args["target_room"] = tool_args.pop(key)

        # Final validation
        if tool_name not in valid_tools:
            tool_name = "play_action"
            if "action" not in tool_args:
                tool_args = {"action": "look"}

        return thought, tool_name, tool_args

    def _extract_result(self, result) -> str:
        if hasattr(result, "content") and result.content:
            return result.content[0].text
        return str(result)

    def _update_state(self, text: str):
        m = re.search(r"Score:\s*(\d+)/(\d+)", text, re.IGNORECASE)
        if m:
            self.score = int(m.group(1))
            self.max_score = int(m.group(2))
        m_loc = re.search(r"\[Location:\s*([^|\]]+)", text)
        if m_loc:
            loc = m_loc.group(1).strip()
            if loc and loc != "Unknown":
                self.location = loc
                self.locations_visited.add(loc)

    def _detect_exits(self, text: str) -> list[str]:
        """Extract direction words mentioned in game text."""
        return list(set(EXIT_PATTERN.findall(text.lower())))

    def _is_failure(self, text: str) -> bool:
        """Detect if the game rejected our action."""
        fail_phrases = [
            "you can't go",
            "you can't do",
            "i don't understand",
            "that's not a verb",
            "you don't see",
            "you can't see",
            "there's no",
            "you can't",
            "nothing happens",
            "is locked",
            "is closed",
            "won't budge",
            "doesn't seem to",
            "you aren't",
        ]
        lower = text.lower()
        return any(f in lower for f in fail_phrases)

    def _is_game_over(self, text: str) -> bool:
        return any(
            x in text.lower()
            for x in [
                "*** you have died ***",
                "*** you have won ***",
                "game over",
                "you have won",
                "you have died",
                "would you like to restart",
            ]
        )


# =============================================================================
# For local testing
# =============================================================================

async def test_agent():
    """Test the agent locally."""
    from fastmcp import Client

    server_path = "mcp_server.py"
    agent = StudentAgent()

    async with Client(server_path) as client:
        result = await agent.run(
            client=client,
            game="zork1",
            max_steps=10,
            seed=42,
            verbose=True,
        )

        print(f"\nFinal Score: {result.final_score}")
        print(f"Moves: {result.moves}")
        print(f"Locations: {result.locations_visited}")


if __name__ == "__main__":
    import asyncio
    asyncio.run(test_agent())