""" Example: MCP ReAct Agent A complete ReAct agent that uses MCP tools to play text adventure games. This is a working example students can learn from. """ import json import os import re from dataclasses import dataclass, field from typing import Optional from dotenv import load_dotenv from huggingface_hub import InferenceClient load_dotenv() # ============================================================================= # LLM Configuration - DO NOT MODIFY # ============================================================================= LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct" _hf_token = os.getenv("HF_TOKEN") if not _hf_token: raise ValueError("HF_TOKEN not found. Set it in your .env file.") LLM_CLIENT = InferenceClient(token=_hf_token) def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str: """ Call the LLM with the given prompt. Use this function in your agent. Args: prompt: The user prompt (current game state, history, etc.) system_prompt: The system prompt (instructions for the agent) seed: Random seed for reproducibility max_tokens: Maximum tokens in response (default: 300) Returns: The LLM's response text Example: response = call_llm( prompt="You are in a forest. What do you do?", system_prompt=SYSTEM_PROMPT, seed=42, ) """ messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}, ] response = LLM_CLIENT.chat.completions.create( model=LLM_MODEL, messages=messages, temperature=0.0, # Deterministic for reproducibility max_tokens=max_tokens, seed=seed, ) return response.choices[0].message.content @dataclass class RunResult: """Result of running the agent. Do not modify this class.""" final_score: int max_score: int moves: int locations_visited: set[str] game_completed: bool error: Optional[str] = None history: list[tuple[str, str, str]] = field(default_factory=list) # ============================================================================= # System Prompt # ============================================================================= SYSTEM_PROMPT = """You are an expert text adventure game player. Your goal is to explore, collect treasures, and interact with your environment. VALID GAME COMMANDS: - Movement: north, south, east, west, northeast, northwest, southwest, southeast, up, down, enter, exit - Objects: take , drop , open , close , examine - Light: turn on lamp, turn off lamp - Combat: attack with , hit - Other: read , wait, shout - Communication: ask about , tell about FORBIDDEN (will NOT work): check, inspect, search, grab, use, help, turn on torch RESPOND IN THIS EXACT FORMAT (no markdown): POSSIBLE ACTIONS: THOUGHT: ACTION: Examples: POSSIBLE ACTIONS: take sword, examine sword, east, south, ask villager about dragon THOUGHT: I need to take the sword. ACTION: take sword POSSIBLE ACTIONS: examine mailbox, open mailbox, take mailbox, north, east, shout THOUGHT: The mailbox might contain something useful. ACTION: open mailbox STRATEGY: - Pick up any useful items (lamp, sword, pig, etc.). You do not have to take an item if it is already in your inventory. - Explore as much as possible : prioritize moving to examining - Examine objects only when they seem very interesting and if you are stuck - Open containers (mailbox, window, etc.) - Make sure you have a light source if you need to explore dark areas - Prioritize movements over examining random things - PAY ATTENTION to actions you have already done in your situation (for instance, try every possible direction mentioned in the situation) DO NOT repeat the same action endlessly.""" # ============================================================================= # Student Agent Implementation # ============================================================================= class StudentAgent: """ MCP ReAct Agent - A complete working example. This agent demonstrates: - ReAct loop (Thought -> Tool -> Observation) - Loop detection - Action validation - Score tracking via memory tool """ def __init__(self): """Initialize the agent state.""" self.history: list[dict] = [] self.recent_actions: list[str] = [] self.score: int = 0 self.location_moves : dict = {} async def run( self, client, game: str, max_steps: int, seed: int, verbose: bool = False, ) -> RunResult: """Run the agent for a game session.""" locations_visited = set() history = [] moves = 0 # Get initial observation result = await client.call_tool("play_action", {"action": "look"}) observation = self._extract_result(result) # Track initial location location = observation.split("\n")[0] if observation else "Unknown" locations_visited.add(location) if verbose: print(f"\n{observation}") # Main ReAct loop for step in range(1, max_steps + 1): inventory_result = await client.call_tool("play_action", {"action": "inventory"}) inventory = self._extract_result(inventory_result) look_result = await client.call_tool("play_action", {"action": "look"}) look = self._extract_result(look_result) listen_result = await client.call_tool("play_action", {"action": "listen"}) listen = self._extract_result(listen_result) prompt = self._build_prompt(observation, inventory, look, listen, self.location_moves.get(look, [])) score_result = await client.call_tool("get_score", {}) score = self._extract_result(score_result) # print(f"SCORE : {score}") # print("-"*10) # print(prompt) # print("-"*10) # Call LLM for reasoning (use step-based seed for variety) response = call_llm(prompt, SYSTEM_PROMPT, seed + step) # Parse the response thought, action = self._parse_response(response) if verbose: print(f"\n--- Step {step} ---") print(f"[THOUGHT] {thought}") print(f"[ACTION] {action}") action = self._validate_action_call(action) moves += 1 try: result = await client.call_tool("play_action", {"action": action}) observation = self._extract_result(result) self.location_moves.setdefault(look, []).append(action) if verbose: print(f"[RESULT] {observation[:200]}...") except Exception as e: observation = f"Error: {e}" if verbose: print(f"[ERROR] {e}") # Track location location = observation.split("\n")[0] if observation else "Unknown" locations_visited.add(location) # Update history self.history.append({ "step": step, "thought": thought, "action": action, "result": observation[:200] }) # Track score from observation self._update_score(observation) # Check for game over if self._is_game_over(observation): if verbose: print("\n*** GAME OVER ***") break return RunResult( final_score=self.score, max_score=350, moves=moves, locations_visited=locations_visited, game_completed=self._is_game_over(observation), history=history, ) def _build_prompt(self, observation: str, inventory:str, look:str, listen:str, location_moves : list[str]) -> str: """Build the prompt for the LLM with context.""" parts = [] parts.append(f"Inventory :{inventory}") base_reason_size = 100 short_reason_size = 40 base_size_threshold = 3 short_size_threshold = 10 action_threshold = 30 # Recent history if self.history: nb = 0 parts.append("\nRecent actions -> Recent Results:") for entry in self.history[-2::-1]: reason_size = base_reason_size if nb >= base_size_threshold: reason_size = short_reason_size action = entry.get("action") result_short = entry["result"][:reason_size] + "..." if len(entry["result"]) > reason_size else entry["result"] if nb >= short_size_threshold: parts.append(f"> {action}") else: parts.append(f"> {action} -> {result_short}") if nb >= action_threshold: break nb += 1 if self.recent_actions and len(set(self.recent_actions[-3:])) == 1: parts.append(f"\n[WARNING: You've been doing '{self.recent_actions[-1]}' repeatedly. TRY SOMETHING DIFFERENT!]") parts.append(f"Here is your current situation :{look}") parts.append(f"Here is what you hear in this situation :{listen}") parts.append(f"Here are the actions you have already tried in this situation, avoid repeating if possible (but do not hallucinate directions or objects) : {",".join(location_moves) if len(location_moves) > 0 else "none"}") parts.append(f"\nResult of your most recent action ({self.history[-1].get("action") if self.history else ""}):\n{observation}") parts.append("\nWhat do you do next?") return "\n".join(parts) def _parse_response(self, response: str) -> tuple[str, str, dict]: """Parse the LLM response to extract thought, tool, and arguments.""" thought = "No reasoning provided" action = "look" lines = response.strip().split("\n") for line in lines: line_clean = line.strip() line_upper = line_clean.upper() if line_upper.startswith("THOUGHT:"): thought = line_clean.split(":", 1)[1].strip() elif line_upper.startswith("ACTION:"): raw_action = line_clean.split(":", 1)[1].strip().lower() raw_action = raw_action.replace("**", "").replace("*", "").replace("`", "") action = raw_action if raw_action else "look" return thought, action def _validate_action_call(self, action: str) -> tuple[str, dict]: """Validate and fix common tool call issues.""" invalid_verb_map = { "check": "examine", "inspect": "examine", "search": "look", "grab": "take", "pick": "take", "use": "examine", "investigate": "examine", } words = action.lower().split() if words and words[0] in invalid_verb_map: words[0] = invalid_verb_map[words[0]] action = " ".join(words) action = action.lower().strip() action = action.replace("**", "").replace("*", "").replace("`", "") action = " ".join(action.split()) return action def _extract_result(self, result) -> str: """Extract text from MCP tool result.""" if hasattr(result, 'content') and result.content: return result.content[0].text if isinstance(result, list) and result: return result[0].text if hasattr(result[0], 'text') else str(result[0]) return str(result) def _update_score(self, text: str) -> None: """Update score from game text.""" patterns = [ r'Score:\s*(\d+)', r'score[:\s]+(\d+)', r'\[Score:\s*(\d+)', ] for pattern in patterns: match = re.search(pattern, text, re.IGNORECASE) if match: self.score = max(self.score, int(match.group(1))) def _is_game_over(self, text: str) -> bool: """Check if the game is over.""" game_over_phrases = [ "game over", "you have died", "you are dead", "*** you have died ***", ] text_lower = text.lower() return any(phrase in text_lower for phrase in game_over_phrases) # ============================================================================= # Local Testing # ============================================================================= async def test_agent(): """Test the agent locally.""" from fastmcp import Client agent = StudentAgent() async with Client("mcp_server.py") as client: result = await agent.run( client=client, game="zork1", max_steps=20, seed=42, verbose=True, ) print(f"\n{'=' * 50}") print(f"Final Score: {result.final_score}") print(f"Moves: {result.moves}") print(f"Locations: {len(result.locations_visited)}") if __name__ == "__main__": import asyncio asyncio.run(test_agent())