"""
Example: MCP ReAct Agent

A complete ReAct agent that uses MCP tools to play text adventure games.
This is a working example students can learn from.
"""

import json
import os
import re
from dataclasses import dataclass, field
from typing import Optional

from dotenv import load_dotenv
from huggingface_hub import InferenceClient

load_dotenv()

# =============================================================================
# LLM Configuration - DO NOT MODIFY
# =============================================================================

LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"

_hf_token = os.getenv("HF_TOKEN")
if not _hf_token:
    raise ValueError("HF_TOKEN not found. Set it in your .env file.")

LLM_CLIENT = InferenceClient(token=_hf_token)


def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str:
    """
    Call the LLM with the given prompt. Use this function in your agent.
    
    Args:
        prompt: The user prompt (current game state, history, etc.)
        system_prompt: The system prompt (instructions for the agent)
        seed: Random seed for reproducibility
        max_tokens: Maximum tokens in response (default: 300)
        
    Returns:
        The LLM's response text
        
    Example:
        response = call_llm(
            prompt="You are in a forest. What do you do?",
            system_prompt=SYSTEM_PROMPT,
            seed=42,
        )
    """
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": prompt},
    ]
    
    response = LLM_CLIENT.chat.completions.create(
        model=LLM_MODEL,
        messages=messages,
        temperature=0.0,  # Deterministic for reproducibility
        max_tokens=max_tokens,
        seed=seed,
    )
    
    return response.choices[0].message.content


@dataclass
class RunResult:
    """Result of running the agent. Do not modify this class."""
    final_score: int
    max_score: int
    moves: int
    locations_visited: set[str]
    game_completed: bool
    error: Optional[str] = None
    history: list[tuple[str, str, str]] = field(default_factory=list)


# =============================================================================
# System Prompt
# =============================================================================

SYSTEM_PROMPT = """You are an expert text adventure game player. Your goal is to explore, collect treasures, and interact with your environment.

VALID GAME COMMANDS:
- Movement: north, south, east, west, northeast, northwest, southwest, southeast, up, down, enter, exit
- Objects: take <item>, drop <item>, open <thing>, close <thing>, examine <thing>
- Light: turn on lamp, turn off lamp
- Combat: attack <enemy> with <weapon>, hit <thing>
- Other: read <thing>, wait, shout
- Communication: ask <person> about <thing>, tell <person> about <thing>

FORBIDDEN (will NOT work): check, inspect, search, grab, use, help, turn on torch

RESPOND IN THIS EXACT FORMAT (no markdown):
POSSIBLE ACTIONS: <list all reasonable actions possible in this situation>
THOUGHT: <brief reasoning about what to do next>
ACTION: <action_name>

Examples:
POSSIBLE ACTIONS: take sword, examine sword, east, south, ask villager about dragon
THOUGHT: I need to take the sword.
ACTION: take sword

POSSIBLE ACTIONS: examine mailbox, open mailbox, take mailbox, north, east, shout
THOUGHT: The mailbox might contain something useful.
ACTION: open mailbox

STRATEGY:
- Pick up any useful items (lamp, sword, pig, etc.). You do not have to take an item if it is already in your inventory.
- Explore as much as possible : prioritize moving to examining
- Examine objects only when they seem very interesting and if you are stuck
- Open containers (mailbox, window, etc.)
- Make sure you have a light source if you need to explore dark areas
- Prioritize movements over examining random things
- PAY ATTENTION to actions you have already done in your situation (for instance, try every possible direction mentioned in the situation)

DO NOT repeat the same action endlessly."""


# =============================================================================
# Student Agent Implementation
# =============================================================================

class StudentAgent:
    """
    MCP ReAct Agent - A complete working example.
    
    This agent demonstrates:
    - ReAct loop (Thought -> Tool -> Observation)
    - Loop detection
    - Action validation
    - Score tracking via memory tool
    """
    
    def __init__(self):
        """Initialize the agent state."""
        self.history: list[dict] = []
        self.recent_actions: list[str] = []
        self.score: int = 0
        self.location_moves : dict = {}
    
    async def run(
        self,
        client,
        game: str,
        max_steps: int,
        seed: int,
        verbose: bool = False,
    ) -> RunResult:
        """Run the agent for a game session."""
        locations_visited = set()
        history = []
        moves = 0
        
        
        # Get initial observation
        result = await client.call_tool("play_action", {"action": "look"})
        observation = self._extract_result(result)
        
        # Track initial location
        location = observation.split("\n")[0] if observation else "Unknown"
        locations_visited.add(location)
        
        if verbose:
            print(f"\n{observation}")
        
        # Main ReAct loop
        for step in range(1, max_steps + 1):

            inventory_result = await client.call_tool("play_action", {"action": "inventory"})
            inventory = self._extract_result(inventory_result)
            look_result = await client.call_tool("play_action", {"action": "look"})
            look = self._extract_result(look_result)
            listen_result = await client.call_tool("play_action", {"action": "listen"})
            listen = self._extract_result(listen_result)
            prompt = self._build_prompt(observation, inventory, look, listen, self.location_moves.get(look, []))


            score_result = await client.call_tool("get_score", {})
            score = self._extract_result(score_result)
            
            # print(f"SCORE : {score}")
            # print("-"*10)
            # print(prompt)
            # print("-"*10)
            
            # Call LLM for reasoning (use step-based seed for variety)
            response = call_llm(prompt, SYSTEM_PROMPT, seed + step)
            
            # Parse the response
            thought, action = self._parse_response(response)
            
            if verbose:
                print(f"\n--- Step {step} ---")
                print(f"[THOUGHT] {thought}")
                print(f"[ACTION] {action}")
            
            action = self._validate_action_call(action)
            
            moves += 1
            
            try:
                result = await client.call_tool("play_action", {"action": action})
                observation = self._extract_result(result)

                self.location_moves.setdefault(look, []).append(action)
                
                if verbose:
                    print(f"[RESULT] {observation[:200]}...")
            except Exception as e:
                observation = f"Error: {e}"
                if verbose:
                    print(f"[ERROR] {e}")
            
            # Track location
            location = observation.split("\n")[0] if observation else "Unknown"
            locations_visited.add(location)
            
            # Update history
            self.history.append({
                "step": step,
                "thought": thought,
                "action": action,
                "result": observation[:200]
            })
            
            # Track score from observation
            self._update_score(observation)
            
            
            # Check for game over
            if self._is_game_over(observation):
                if verbose:
                    print("\n*** GAME OVER ***")
                break
        
        return RunResult(
            final_score=self.score,
            max_score=350,
            moves=moves,
            locations_visited=locations_visited,
            game_completed=self._is_game_over(observation),
            history=history,
        )
    
    def _build_prompt(self, observation: str, inventory:str, look:str, listen:str, location_moves : list[str]) -> str:
        """Build the prompt for the LLM with context."""
        parts = []
        
        
        parts.append(f"Inventory :{inventory}")

        base_reason_size = 100
        short_reason_size = 40
        base_size_threshold = 3
        short_size_threshold = 10
        action_threshold = 30
        
        # Recent history
        if self.history:
            nb = 0
            parts.append("\nRecent actions -> Recent Results:")
            for entry in self.history[-2::-1]:
                reason_size = base_reason_size
                if nb >= base_size_threshold:
                    reason_size = short_reason_size
                action = entry.get("action")
                result_short = entry["result"][:reason_size] + "..." if len(entry["result"]) > reason_size else entry["result"]
                if nb >= short_size_threshold:
                    parts.append(f"> {action}")
                else:
                    parts.append(f"> {action} -> {result_short}")
                if nb >= action_threshold:
                    break
                nb += 1
            
            if self.recent_actions and len(set(self.recent_actions[-3:])) == 1:
                parts.append(f"\n[WARNING: You've been doing '{self.recent_actions[-1]}' repeatedly. TRY SOMETHING DIFFERENT!]")


        parts.append(f"Here is your current situation :{look}")

        parts.append(f"Here is what you hear in this situation :{listen}")

        parts.append(f"Here are the actions you have already tried in this situation, avoid repeating if possible (but do not hallucinate directions or objects) : {",".join(location_moves) if len(location_moves) > 0 else "none"}")
        
        parts.append(f"\nResult of your most recent action ({self.history[-1].get("action") if self.history else ""}):\n{observation}")
        parts.append("\nWhat do you do next?")
        
        return "\n".join(parts)
    
    def _parse_response(self, response: str) -> tuple[str, str, dict]:
        """Parse the LLM response to extract thought, tool, and arguments."""
        thought = "No reasoning provided"
        action = "look"
        
        lines = response.strip().split("\n")
        
        for line in lines:
            line_clean = line.strip()
            line_upper = line_clean.upper()
            
            if line_upper.startswith("THOUGHT:"):
                thought = line_clean.split(":", 1)[1].strip()
            
            elif line_upper.startswith("ACTION:"):
                raw_action = line_clean.split(":", 1)[1].strip().lower()
                raw_action = raw_action.replace("**", "").replace("*", "").replace("`", "")
                action = raw_action if raw_action else "look"
        
        return thought, action
    
    def _validate_action_call(self, action: str) -> tuple[str, dict]:
        """Validate and fix common tool call issues."""
                    
        invalid_verb_map = {
            "check": "examine",
            "inspect": "examine",
            "search": "look",
            "grab": "take",
            "pick": "take",
            "use": "examine",
            "investigate": "examine",
        }
            
        words = action.lower().split()
        if words and words[0] in invalid_verb_map:
            words[0] = invalid_verb_map[words[0]]
            action = " ".join(words)
            
        action = action.lower().strip()
        action = action.replace("**", "").replace("*", "").replace("`", "")
        action = " ".join(action.split())
            
        return action
    
    def _extract_result(self, result) -> str:
        """Extract text from MCP tool result."""
        if hasattr(result, 'content') and result.content:
            return result.content[0].text
        if isinstance(result, list) and result:
            return result[0].text if hasattr(result[0], 'text') else str(result[0])
        return str(result)
    
    def _update_score(self, text: str) -> None:
        """Update score from game text."""
        patterns = [
            r'Score:\s*(\d+)',
            r'score[:\s]+(\d+)',
            r'\[Score:\s*(\d+)',
        ]
        
        for pattern in patterns:
            match = re.search(pattern, text, re.IGNORECASE)
            if match:
                self.score = max(self.score, int(match.group(1)))
    
    def _is_game_over(self, text: str) -> bool:
        """Check if the game is over."""
        game_over_phrases = [
            "game over",
            "you have died",
            "you are dead",
            "*** you have died ***",
        ]
        text_lower = text.lower()
        return any(phrase in text_lower for phrase in game_over_phrases)


# =============================================================================
# Local Testing
# =============================================================================

async def test_agent():
    """Test the agent locally."""
    from fastmcp import Client
    
    agent = StudentAgent()
    
    async with Client("mcp_server.py") as client:
        result = await agent.run(
            client=client,
            game="zork1",
            max_steps=20,
            seed=42,
            verbose=True,
        )
        
        print(f"\n{'=' * 50}")
        print(f"Final Score: {result.final_score}")
        print(f"Moves: {result.moves}")
        print(f"Locations: {len(result.locations_visited)}")


if __name__ == "__main__":
    import asyncio
    asyncio.run(test_agent())