""" MCP ReAct Agent - Enhanced Generalist Key improvements over v6: - Richer system prompt with strategy patterns for different game types - Stuck detection + automatic recovery (suggest_exploration, try new verbs) - Smarter history: shows failed actions to avoid repetition - Exit registration from game text (auto-detects mentioned directions) - Multi-phase play: explore → collect → solve → backtrack - Robust parsing with multiple fallback strategies """ import json import os import re from dataclasses import dataclass, field from typing import Optional from dotenv import load_dotenv from huggingface_hub import InferenceClient # Load environment variables load_dotenv() # Set USE_LOCAL_MODEL=1 in your .env to use a locally downloaded model USE_LOCAL_MODEL = os.getenv("USE_LOCAL_MODEL", "0").strip() in ("1", "true", "yes") LOCAL_MODEL_ID = os.getenv("LOCAL_MODEL_ID", "Qwen/Qwen2.5-3B-Instruct") # ============================================================================= # LLM Configuration - DO NOT MODIFY # ============================================================================= # Model to use (fixed for fair evaluation) LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct" # Initialize the LLM client based on mode _local_pipeline = None if USE_LOCAL_MODEL: import torch from transformers import pipeline as _hf_pipeline _local_pipeline = _hf_pipeline( "text-generation", model=LOCAL_MODEL_ID, torch_dtype=torch.bfloat16, device_map="auto", ) LLM_CLIENT = None else: _hf_token = os.getenv("HF_TOKEN") if not _hf_token: raise ValueError("HF_TOKEN not found. Set it in your .env file.") LLM_CLIENT = InferenceClient(token=_hf_token) def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str: """ Call the LLM with the given prompt. Use this function in your agent. Args: prompt: The user prompt (current game state, history, etc.) system_prompt: The system prompt (instructions for the agent) seed: Random seed for reproducibility max_tokens: Maximum tokens in response (default: 300) Returns: The LLM's response text """ messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}, ] if USE_LOCAL_MODEL and _local_pipeline is not None: outputs = _local_pipeline( messages, max_new_tokens=max_tokens, temperature=0.0001, # Near-deterministic (0.0 unsupported by some backends) do_sample=True, ) return outputs[0]["generated_text"][-1]["content"] response = LLM_CLIENT.chat.completions.create( model=LLM_MODEL, messages=messages, temperature=0.0, # Deterministic for reproducibility max_tokens=max_tokens, seed=seed, ) return response.choices[0].message.content @dataclass class RunResult: """Result of running the agent. Do not modify this class.""" final_score: int max_score: int moves: int locations_visited: set[str] game_completed: bool error: Optional[str] = None history: list[tuple[str, str, str]] = field(default_factory=list) # ─── System Prompt ───────────────────────────────────────────────────────────── SYSTEM_PROMPT = """You are an expert text adventure game player. You are methodical, curious, and never give up. AVAILABLE TOOLS: - play_action: Send a command to the game. ARGS: {"action": "your command"} For movement use direction words: north, south, east, west, up, down, in, out, ne, nw, se, sw For interactions: examine , take , drop , open , close , read , push , pull , turn , light , put in , unlock with , give to , attack with , tie to , climb , enter , search , listen, smell, wave , eat , drink - think: Plan your strategy. ARGS: {"goal": "...", "thought": "..."} - notebook_write: Save clues, codes, puzzle info permanently. ARGS: {"text": "...", "category": "Clue|Puzzle|Item|Danger|NPC|Code|Goal|Map"} - notebook_read: Read your saved notes. ARGS: {"keyword": "optional filter"} - memory: Full status dump (location, inventory, notes, map). ARGS: {} - get_map: View explored map and unexplored exits. ARGS: {} - find_path: Get directions to a known room. ARGS: {"target_room": "room name"} - suggest_exploration: Get suggestion for nearest unexplored area. ARGS: {} - register_exits: Record exits visible in current room. ARGS: {"directions": "north, south, up"} STRATEGY — How to play well: 1. EXPLORE SYSTEMATICALLY: When you enter a new room, ALWAYS do "look" first, then register visible exits with register_exits. Explore every exit. 2. EXAMINE EVERYTHING: If the game describes objects, furniture, or features — examine them. Things hide under rugs, inside containers, behind paintings. 3. TAKE EVERYTHING: Collect all portable items. You'll need them later for puzzles. 4. READ CAREFULLY: The game text contains ALL clues. Unusual descriptions often hint at puzzles. 5. SAVE CLUES: If you notice a code, inscription, locked door, NPC request, or puzzle — write it in notebook_write immediately. 6. DON'T REPEAT FAILURES: Check your recent history. If a command didn't work, try a DIFFERENT approach. Use synonyms: get/take, look/examine, push/move. 7. BACKTRACK SMARTLY: If stuck, call suggest_exploration to find unexplored exits, or find_path to return to a room with unsolved puzzles. 8. USE ITEMS: When you have items and encounter obstacles, think about which item might help. Try "use X", "put X in Y", "unlock Y with X". 9. LISTEN AND SEARCH: "listen", "search", "look under X", "look behind X" often reveal hidden things. 10. CHECK SCORE: If your score increases, you're making progress. If not for a while, try a new area. RESPONSE FORMAT (strict): THOUGHT: TOOL: ARGS: Example: THOUGHT: I see a rusty door to the north and a brass lamp on the ground. I should take the lamp first. TOOL: play_action ARGS: {"action": "take lamp"}""" # ─── Directions mentioned in text ────────────────────────────────────────────── EXIT_PATTERN = re.compile( r"\b(north|south|east|west|up|down|northeast|northwest|southeast|southwest)\b", re.IGNORECASE, ) DIRECTION_SET = { "n", "s", "e", "w", "u", "d", "ne", "nw", "se", "sw", "north", "south", "east", "west", "up", "down", "northeast", "northwest", "southeast", "southwest", "in", "out", "enter", "exit", } class StudentAgent: def __init__(self): self.history: list[dict] = [] self.score: int = 0 self.max_score: int = 0 self.location: str = "Unknown" self.locations_visited: set[str] = set() self.failed_actions: set[str] = set() # track "location:action" that failed self.consecutive_no_score: int = 0 self.last_score: int = 0 async def run( self, client, game: str, max_steps: int, seed: int, verbose: bool = False ) -> RunResult: tools = await client.list_tools() tool_names = [t.name for t in tools] # Initial look result = await client.call_tool("play_action", {"action": "look"}) observation = self._extract_result(result) self._update_state(observation) # Register initial exits exits = self._detect_exits(observation) if exits: try: await client.call_tool( "register_exits", {"directions": ", ".join(exits)} ) except Exception: pass if verbose: print(f"\n{'=' * 60}\nINITIAL OBSERVATION:\n{observation}\n{'=' * 60}") step = 0 for step in range(1, max_steps + 1): prompt = self._build_prompt(observation, step) response = call_llm(prompt, SYSTEM_PROMPT, seed + step, max_tokens=400) thought, tool_name, tool_args = self._parse_response(response, tool_names) if verbose: print(f"\n--- Step {step} ---") print(f" THOUGHT: {thought}") print(f" TOOL: {tool_name}({json.dumps(tool_args)})") try: result = await client.call_tool(tool_name, tool_args) observation = self._extract_result(result) except Exception as e: observation = f"Error: {e}" if verbose: obs_preview = observation[:400].replace("\n", "\n ") print(f" RESULT: {obs_preview}") self._update_state(observation) # Auto-register exits when we get a play_action result if tool_name == "play_action": exits = self._detect_exits(observation) if exits: try: await client.call_tool( "register_exits", {"directions": ", ".join(exits)} ) except Exception: pass # Track failed movement action = tool_args.get("action", "").lower() if self._is_failure(observation): self.failed_actions.add(f"{self.location}:{action}") # Track score progress if self.score > self.last_score: self.consecutive_no_score = 0 self.last_score = self.score else: self.consecutive_no_score += 1 self.history.append( { "step": step, "thought": thought, "tool": tool_name, "args": tool_args, "result": observation[:200], "location": self.location, "score": self.score, } ) if self._is_game_over(observation): break return RunResult( final_score=self.score, max_score=self.max_score, moves=step, locations_visited=self.locations_visited, game_completed=self._is_game_over(observation), error=None, history=[ (h["tool"], json.dumps(h["args"]), h["result"]) for h in self.history ], ) def _build_prompt(self, observation: str, step: int) -> str: parts = [] # Status line parts.append( f"[Step {step} | Score: {self.score}/{self.max_score} | " f"Location: {self.location} | Rooms visited: {len(self.locations_visited)}]" ) # Recent history (last 7 for better context) if self.history: parts.append("\nRecent history:") for h in self.history[-7:]: action_str = json.dumps(h["args"]) loc = h.get("location", "?") result_short = h["result"].replace("\n", " ")[:80] parts.append(f" [{loc}] {h['tool']}({action_str}) -> {result_short}") # Failed actions at current location (helps avoid repetition) loc_failures = [ a.split(":", 1)[1] for a in self.failed_actions if a.startswith(f"{self.location}:") ] if loc_failures: parts.append(f"\nActions that FAILED here: {', '.join(loc_failures)}") # Stuck hint if self.consecutive_no_score > 8: parts.append( "\n[HINT: Score hasn't changed in a while. Consider: " "call suggest_exploration, check memory, examine objects more carefully, " "or try using inventory items on things you've seen.]" ) # Current game output parts.append(f"\nGame output:\n{observation}") parts.append("\nWhat do you do next?") return "\n".join(parts) def _parse_response( self, response: str, valid_tools: list[str] ) -> tuple[str, str, dict]: thought = "..." tool_name = "play_action" tool_args = {"action": "look"} lines = response.split("\n") args_lines = [] collecting_args = False for line in lines: clean = line.strip() up = clean.upper() if up.startswith("THOUGHT:"): thought = clean.split(":", 1)[1].strip() collecting_args = False elif up.startswith("TOOL:"): raw_tool = clean.split(":", 1)[1].strip().lower().strip("`").strip() # Handle common LLM mistakes raw_tool = raw_tool.replace(" ", "_") if raw_tool in valid_tools: tool_name = raw_tool elif "play" in raw_tool or "action" in raw_tool: tool_name = "play_action" elif "note" in raw_tool and "write" in raw_tool: tool_name = "notebook_write" elif "note" in raw_tool and "read" in raw_tool: tool_name = "notebook_read" elif "note" in raw_tool: tool_name = "notebook_write" elif "map" in raw_tool: tool_name = "get_map" elif "path" in raw_tool: tool_name = "find_path" elif "suggest" in raw_tool or "explor" in raw_tool: tool_name = "suggest_exploration" elif "register" in raw_tool or "exit" in raw_tool: tool_name = "register_exits" collecting_args = False elif up.startswith("ARGS:"): raw = clean.split(":", 1)[1].strip() args_lines = [raw] collecting_args = True elif collecting_args and clean: args_lines.append(clean) # Parse ARGS if args_lines: raw_args = " ".join(args_lines) # Try direct JSON parse try: tool_args = json.loads(raw_args) except json.JSONDecodeError: # Try extracting JSON object m = re.search(r"\{[^{}]+\}", raw_args) if m: try: tool_args = json.loads(m.group()) except json.JSONDecodeError: pass # Fallback: try extracting action string if tool_name == "play_action": m = re.search(r'"action"\s*:\s*"([^"]+)"', raw_args) if m: tool_args = {"action": m.group(1)} # ─── Fix play_action args ─── if tool_name == "play_action": action = str(tool_args.get("action", "")).strip() # Merge split args (action + target/object) for extra_key in ("target", "object", "item", "direction"): extra = str(tool_args.get(extra_key, "")).strip() if extra and extra.lower() not in action.lower(): action = f"{action} {extra}".strip() # Strip "go " prefix for bare directions if action.lower().startswith("go "): rest = action[3:].strip().lower() if rest in DIRECTION_SET: action = rest tool_args = {"action": action or "look"} # ─── Fix find_path args ─── if tool_name == "find_path": # Normalize: the tool expects "target_room" not "to" or "room" for key in ("to", "room", "destination", "target"): if key in tool_args and "target_room" not in tool_args: tool_args["target_room"] = tool_args.pop(key) # Final validation if tool_name not in valid_tools: tool_name = "play_action" if "action" not in tool_args: tool_args = {"action": "look"} return thought, tool_name, tool_args def _extract_result(self, result) -> str: if hasattr(result, "content") and result.content: return result.content[0].text return str(result) def _update_state(self, text: str): m = re.search(r"Score:\s*(\d+)/(\d+)", text, re.IGNORECASE) if m: self.score = int(m.group(1)) self.max_score = int(m.group(2)) m_loc = re.search(r"\[Location:\s*([^|\]]+)", text) if m_loc: loc = m_loc.group(1).strip() if loc and loc != "Unknown": self.location = loc self.locations_visited.add(loc) def _detect_exits(self, text: str) -> list[str]: """Extract direction words mentioned in game text.""" return list(set(EXIT_PATTERN.findall(text.lower()))) def _is_failure(self, text: str) -> bool: """Detect if the game rejected our action.""" fail_phrases = [ "you can't go", "you can't do", "i don't understand", "that's not a verb", "you don't see", "you can't see", "there's no", "you can't", "nothing happens", "is locked", "is closed", "won't budge", "doesn't seem to", "you aren't", ] lower = text.lower() return any(f in lower for f in fail_phrases) def _is_game_over(self, text: str) -> bool: return any( x in text.lower() for x in [ "*** you have died ***", "*** you have won ***", "game over", "you have won", "you have died", "would you like to restart", ] ) # ============================================================================= # For local testing # ============================================================================= async def test_agent(): """Test the agent locally.""" from fastmcp import Client server_path = "mcp_server.py" agent = StudentAgent() async with Client(server_path) as client: result = await agent.run( client=client, game="zork1", max_steps=10, seed=42, verbose=True, ) print(f"\nFinal Score: {result.final_score}") print(f"Moves: {result.moves}") print(f"Locations: {result.locations_visited}") if __name__ == "__main__": import asyncio asyncio.run(test_agent())