Spaces:
Sleeping
Sleeping
| """ | |
| MCP ReAct Agent - Enhanced Generalist | |
| Key improvements over v6: | |
| - Richer system prompt with strategy patterns for different game types | |
| - Stuck detection + automatic recovery (suggest_exploration, try new verbs) | |
| - Smarter history: shows failed actions to avoid repetition | |
| - Exit registration from game text (auto-detects mentioned directions) | |
| - Multi-phase play: explore β collect β solve β backtrack | |
| - Robust parsing with multiple fallback strategies | |
| """ | |
| import json | |
| import os | |
| import re | |
| from dataclasses import dataclass, field | |
| from typing import Optional | |
| from dotenv import load_dotenv | |
| from huggingface_hub import InferenceClient | |
| # Load environment variables | |
| load_dotenv() | |
| # Set USE_LOCAL_MODEL=1 in your .env to use a locally downloaded model | |
| USE_LOCAL_MODEL = os.getenv("USE_LOCAL_MODEL", "0").strip() in ("1", "true", "yes") | |
| LOCAL_MODEL_ID = os.getenv("LOCAL_MODEL_ID", "Qwen/Qwen2.5-3B-Instruct") | |
| # ============================================================================= | |
| # LLM Configuration - DO NOT MODIFY | |
| # ============================================================================= | |
| # Model to use (fixed for fair evaluation) | |
| LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct" | |
| # Initialize the LLM client based on mode | |
| _local_pipeline = None | |
| if USE_LOCAL_MODEL: | |
| import torch | |
| from transformers import pipeline as _hf_pipeline | |
| _local_pipeline = _hf_pipeline( | |
| "text-generation", | |
| model=LOCAL_MODEL_ID, | |
| torch_dtype=torch.bfloat16, | |
| device_map="auto", | |
| ) | |
| LLM_CLIENT = None | |
| else: | |
| _hf_token = os.getenv("HF_TOKEN") | |
| if not _hf_token: | |
| raise ValueError("HF_TOKEN not found. Set it in your .env file.") | |
| LLM_CLIENT = InferenceClient(token=_hf_token) | |
| def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str: | |
| """ | |
| Call the LLM with the given prompt. Use this function in your agent. | |
| Args: | |
| prompt: The user prompt (current game state, history, etc.) | |
| system_prompt: The system prompt (instructions for the agent) | |
| seed: Random seed for reproducibility | |
| max_tokens: Maximum tokens in response (default: 300) | |
| Returns: | |
| The LLM's response text | |
| """ | |
| messages = [ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": prompt}, | |
| ] | |
| if USE_LOCAL_MODEL and _local_pipeline is not None: | |
| outputs = _local_pipeline( | |
| messages, | |
| max_new_tokens=max_tokens, | |
| temperature=0.0001, # Near-deterministic (0.0 unsupported by some backends) | |
| do_sample=True, | |
| ) | |
| return outputs[0]["generated_text"][-1]["content"] | |
| response = LLM_CLIENT.chat.completions.create( | |
| model=LLM_MODEL, | |
| messages=messages, | |
| temperature=0.0, # Deterministic for reproducibility | |
| max_tokens=max_tokens, | |
| seed=seed, | |
| ) | |
| return response.choices[0].message.content | |
| class RunResult: | |
| """Result of running the agent. Do not modify this class.""" | |
| final_score: int | |
| max_score: int | |
| moves: int | |
| locations_visited: set[str] | |
| game_completed: bool | |
| error: Optional[str] = None | |
| history: list[tuple[str, str, str]] = field(default_factory=list) | |
| # βββ System Prompt βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| SYSTEM_PROMPT = """You are an expert text adventure game player. You are methodical, curious, and never give up. | |
| AVAILABLE TOOLS: | |
| - play_action: Send a command to the game. | |
| ARGS: {"action": "your command"} | |
| For movement use direction words: north, south, east, west, up, down, in, out, ne, nw, se, sw | |
| For interactions: examine <thing>, take <item>, drop <item>, open <thing>, close <thing>, | |
| read <thing>, push <thing>, pull <thing>, turn <thing>, light <thing>, put <item> in <container>, | |
| unlock <door> with <key>, give <item> to <npc>, attack <enemy> with <weapon>, tie <item> to <thing>, | |
| climb <thing>, enter <thing>, search <thing>, listen, smell, wave <item>, eat <item>, drink <item> | |
| - think: Plan your strategy. ARGS: {"goal": "...", "thought": "..."} | |
| - notebook_write: Save clues, codes, puzzle info permanently. | |
| ARGS: {"text": "...", "category": "Clue|Puzzle|Item|Danger|NPC|Code|Goal|Map"} | |
| - notebook_read: Read your saved notes. ARGS: {"keyword": "optional filter"} | |
| - memory: Full status dump (location, inventory, notes, map). ARGS: {} | |
| - get_map: View explored map and unexplored exits. ARGS: {} | |
| - find_path: Get directions to a known room. ARGS: {"target_room": "room name"} | |
| - suggest_exploration: Get suggestion for nearest unexplored area. ARGS: {} | |
| - register_exits: Record exits visible in current room. | |
| ARGS: {"directions": "north, south, up"} | |
| STRATEGY β How to play well: | |
| 1. EXPLORE SYSTEMATICALLY: When you enter a new room, ALWAYS do "look" first, then register visible exits with register_exits. Explore every exit. | |
| 2. EXAMINE EVERYTHING: If the game describes objects, furniture, or features β examine them. Things hide under rugs, inside containers, behind paintings. | |
| 3. TAKE EVERYTHING: Collect all portable items. You'll need them later for puzzles. | |
| 4. READ CAREFULLY: The game text contains ALL clues. Unusual descriptions often hint at puzzles. | |
| 5. SAVE CLUES: If you notice a code, inscription, locked door, NPC request, or puzzle β write it in notebook_write immediately. | |
| 6. DON'T REPEAT FAILURES: Check your recent history. If a command didn't work, try a DIFFERENT approach. Use synonyms: get/take, look/examine, push/move. | |
| 7. BACKTRACK SMARTLY: If stuck, call suggest_exploration to find unexplored exits, or find_path to return to a room with unsolved puzzles. | |
| 8. USE ITEMS: When you have items and encounter obstacles, think about which item might help. Try "use X", "put X in Y", "unlock Y with X". | |
| 9. LISTEN AND SEARCH: "listen", "search", "look under X", "look behind X" often reveal hidden things. | |
| 10. CHECK SCORE: If your score increases, you're making progress. If not for a while, try a new area. | |
| RESPONSE FORMAT (strict): | |
| THOUGHT: <brief reasoning about what you observe and your plan> | |
| TOOL: <exactly one tool name> | |
| ARGS: <valid JSON for that tool> | |
| Example: | |
| THOUGHT: I see a rusty door to the north and a brass lamp on the ground. I should take the lamp first. | |
| TOOL: play_action | |
| ARGS: {"action": "take lamp"}""" | |
| # βββ Directions mentioned in text ββββββββββββββββββββββββββββββββββββββββββββββ | |
| EXIT_PATTERN = re.compile( | |
| r"\b(north|south|east|west|up|down|northeast|northwest|southeast|southwest)\b", | |
| re.IGNORECASE, | |
| ) | |
| DIRECTION_SET = { | |
| "n", | |
| "s", | |
| "e", | |
| "w", | |
| "u", | |
| "d", | |
| "ne", | |
| "nw", | |
| "se", | |
| "sw", | |
| "north", | |
| "south", | |
| "east", | |
| "west", | |
| "up", | |
| "down", | |
| "northeast", | |
| "northwest", | |
| "southeast", | |
| "southwest", | |
| "in", | |
| "out", | |
| "enter", | |
| "exit", | |
| } | |
| class StudentAgent: | |
| def __init__(self): | |
| self.history: list[dict] = [] | |
| self.score: int = 0 | |
| self.max_score: int = 0 | |
| self.location: str = "Unknown" | |
| self.locations_visited: set[str] = set() | |
| self.failed_actions: set[str] = set() # track "location:action" that failed | |
| self.consecutive_no_score: int = 0 | |
| self.last_score: int = 0 | |
| async def run( | |
| self, client, game: str, max_steps: int, seed: int, verbose: bool = False | |
| ) -> RunResult: | |
| tools = await client.list_tools() | |
| tool_names = [t.name for t in tools] | |
| # Initial look | |
| result = await client.call_tool("play_action", {"action": "look"}) | |
| observation = self._extract_result(result) | |
| self._update_state(observation) | |
| # Register initial exits | |
| exits = self._detect_exits(observation) | |
| if exits: | |
| try: | |
| await client.call_tool( | |
| "register_exits", {"directions": ", ".join(exits)} | |
| ) | |
| except Exception: | |
| pass | |
| if verbose: | |
| print(f"\n{'=' * 60}\nINITIAL OBSERVATION:\n{observation}\n{'=' * 60}") | |
| step = 0 | |
| for step in range(1, max_steps + 1): | |
| prompt = self._build_prompt(observation, step) | |
| response = call_llm(prompt, SYSTEM_PROMPT, seed + step, max_tokens=400) | |
| thought, tool_name, tool_args = self._parse_response(response, tool_names) | |
| if verbose: | |
| print(f"\n--- Step {step} ---") | |
| print(f" THOUGHT: {thought}") | |
| print(f" TOOL: {tool_name}({json.dumps(tool_args)})") | |
| try: | |
| result = await client.call_tool(tool_name, tool_args) | |
| observation = self._extract_result(result) | |
| except Exception as e: | |
| observation = f"Error: {e}" | |
| if verbose: | |
| obs_preview = observation[:400].replace("\n", "\n ") | |
| print(f" RESULT: {obs_preview}") | |
| self._update_state(observation) | |
| # Auto-register exits when we get a play_action result | |
| if tool_name == "play_action": | |
| exits = self._detect_exits(observation) | |
| if exits: | |
| try: | |
| await client.call_tool( | |
| "register_exits", {"directions": ", ".join(exits)} | |
| ) | |
| except Exception: | |
| pass | |
| # Track failed movement | |
| action = tool_args.get("action", "").lower() | |
| if self._is_failure(observation): | |
| self.failed_actions.add(f"{self.location}:{action}") | |
| # Track score progress | |
| if self.score > self.last_score: | |
| self.consecutive_no_score = 0 | |
| self.last_score = self.score | |
| else: | |
| self.consecutive_no_score += 1 | |
| self.history.append( | |
| { | |
| "step": step, | |
| "thought": thought, | |
| "tool": tool_name, | |
| "args": tool_args, | |
| "result": observation[:200], | |
| "location": self.location, | |
| "score": self.score, | |
| } | |
| ) | |
| if self._is_game_over(observation): | |
| break | |
| return RunResult( | |
| final_score=self.score, | |
| max_score=self.max_score, | |
| moves=step, | |
| locations_visited=self.locations_visited, | |
| game_completed=self._is_game_over(observation), | |
| error=None, | |
| history=[ | |
| (h["tool"], json.dumps(h["args"]), h["result"]) for h in self.history | |
| ], | |
| ) | |
| def _build_prompt(self, observation: str, step: int) -> str: | |
| parts = [] | |
| # Status line | |
| parts.append( | |
| f"[Step {step} | Score: {self.score}/{self.max_score} | " | |
| f"Location: {self.location} | Rooms visited: {len(self.locations_visited)}]" | |
| ) | |
| # Recent history (last 7 for better context) | |
| if self.history: | |
| parts.append("\nRecent history:") | |
| for h in self.history[-7:]: | |
| action_str = json.dumps(h["args"]) | |
| loc = h.get("location", "?") | |
| result_short = h["result"].replace("\n", " ")[:80] | |
| parts.append(f" [{loc}] {h['tool']}({action_str}) -> {result_short}") | |
| # Failed actions at current location (helps avoid repetition) | |
| loc_failures = [ | |
| a.split(":", 1)[1] | |
| for a in self.failed_actions | |
| if a.startswith(f"{self.location}:") | |
| ] | |
| if loc_failures: | |
| parts.append(f"\nActions that FAILED here: {', '.join(loc_failures)}") | |
| # Stuck hint | |
| if self.consecutive_no_score > 8: | |
| parts.append( | |
| "\n[HINT: Score hasn't changed in a while. Consider: " | |
| "call suggest_exploration, check memory, examine objects more carefully, " | |
| "or try using inventory items on things you've seen.]" | |
| ) | |
| # Current game output | |
| parts.append(f"\nGame output:\n{observation}") | |
| parts.append("\nWhat do you do next?") | |
| return "\n".join(parts) | |
| def _parse_response( | |
| self, response: str, valid_tools: list[str] | |
| ) -> tuple[str, str, dict]: | |
| thought = "..." | |
| tool_name = "play_action" | |
| tool_args = {"action": "look"} | |
| lines = response.split("\n") | |
| args_lines = [] | |
| collecting_args = False | |
| for line in lines: | |
| clean = line.strip() | |
| up = clean.upper() | |
| if up.startswith("THOUGHT:"): | |
| thought = clean.split(":", 1)[1].strip() | |
| collecting_args = False | |
| elif up.startswith("TOOL:"): | |
| raw_tool = clean.split(":", 1)[1].strip().lower().strip("`").strip() | |
| # Handle common LLM mistakes | |
| raw_tool = raw_tool.replace(" ", "_") | |
| if raw_tool in valid_tools: | |
| tool_name = raw_tool | |
| elif "play" in raw_tool or "action" in raw_tool: | |
| tool_name = "play_action" | |
| elif "note" in raw_tool and "write" in raw_tool: | |
| tool_name = "notebook_write" | |
| elif "note" in raw_tool and "read" in raw_tool: | |
| tool_name = "notebook_read" | |
| elif "note" in raw_tool: | |
| tool_name = "notebook_write" | |
| elif "map" in raw_tool: | |
| tool_name = "get_map" | |
| elif "path" in raw_tool: | |
| tool_name = "find_path" | |
| elif "suggest" in raw_tool or "explor" in raw_tool: | |
| tool_name = "suggest_exploration" | |
| elif "register" in raw_tool or "exit" in raw_tool: | |
| tool_name = "register_exits" | |
| collecting_args = False | |
| elif up.startswith("ARGS:"): | |
| raw = clean.split(":", 1)[1].strip() | |
| args_lines = [raw] | |
| collecting_args = True | |
| elif collecting_args and clean: | |
| args_lines.append(clean) | |
| # Parse ARGS | |
| if args_lines: | |
| raw_args = " ".join(args_lines) | |
| # Try direct JSON parse | |
| try: | |
| tool_args = json.loads(raw_args) | |
| except json.JSONDecodeError: | |
| # Try extracting JSON object | |
| m = re.search(r"\{[^{}]+\}", raw_args) | |
| if m: | |
| try: | |
| tool_args = json.loads(m.group()) | |
| except json.JSONDecodeError: | |
| pass | |
| # Fallback: try extracting action string | |
| if tool_name == "play_action": | |
| m = re.search(r'"action"\s*:\s*"([^"]+)"', raw_args) | |
| if m: | |
| tool_args = {"action": m.group(1)} | |
| # βββ Fix play_action args βββ | |
| if tool_name == "play_action": | |
| action = str(tool_args.get("action", "")).strip() | |
| # Merge split args (action + target/object) | |
| for extra_key in ("target", "object", "item", "direction"): | |
| extra = str(tool_args.get(extra_key, "")).strip() | |
| if extra and extra.lower() not in action.lower(): | |
| action = f"{action} {extra}".strip() | |
| # Strip "go " prefix for bare directions | |
| if action.lower().startswith("go "): | |
| rest = action[3:].strip().lower() | |
| if rest in DIRECTION_SET: | |
| action = rest | |
| tool_args = {"action": action or "look"} | |
| # βββ Fix find_path args βββ | |
| if tool_name == "find_path": | |
| # Normalize: the tool expects "target_room" not "to" or "room" | |
| for key in ("to", "room", "destination", "target"): | |
| if key in tool_args and "target_room" not in tool_args: | |
| tool_args["target_room"] = tool_args.pop(key) | |
| # Final validation | |
| if tool_name not in valid_tools: | |
| tool_name = "play_action" | |
| if "action" not in tool_args: | |
| tool_args = {"action": "look"} | |
| return thought, tool_name, tool_args | |
| def _extract_result(self, result) -> str: | |
| if hasattr(result, "content") and result.content: | |
| return result.content[0].text | |
| return str(result) | |
| def _update_state(self, text: str): | |
| m = re.search(r"Score:\s*(\d+)/(\d+)", text, re.IGNORECASE) | |
| if m: | |
| self.score = int(m.group(1)) | |
| self.max_score = int(m.group(2)) | |
| m_loc = re.search(r"\[Location:\s*([^|\]]+)", text) | |
| if m_loc: | |
| loc = m_loc.group(1).strip() | |
| if loc and loc != "Unknown": | |
| self.location = loc | |
| self.locations_visited.add(loc) | |
| def _detect_exits(self, text: str) -> list[str]: | |
| """Extract direction words mentioned in game text.""" | |
| return list(set(EXIT_PATTERN.findall(text.lower()))) | |
| def _is_failure(self, text: str) -> bool: | |
| """Detect if the game rejected our action.""" | |
| fail_phrases = [ | |
| "you can't go", | |
| "you can't do", | |
| "i don't understand", | |
| "that's not a verb", | |
| "you don't see", | |
| "you can't see", | |
| "there's no", | |
| "you can't", | |
| "nothing happens", | |
| "is locked", | |
| "is closed", | |
| "won't budge", | |
| "doesn't seem to", | |
| "you aren't", | |
| ] | |
| lower = text.lower() | |
| return any(f in lower for f in fail_phrases) | |
| def _is_game_over(self, text: str) -> bool: | |
| return any( | |
| x in text.lower() | |
| for x in [ | |
| "*** you have died ***", | |
| "*** you have won ***", | |
| "game over", | |
| "you have won", | |
| "you have died", | |
| "would you like to restart", | |
| ] | |
| ) | |
| # ============================================================================= | |
| # For local testing | |
| # ============================================================================= | |
| async def test_agent(): | |
| """Test the agent locally.""" | |
| from fastmcp import Client | |
| server_path = "mcp_server.py" | |
| agent = StudentAgent() | |
| async with Client(server_path) as client: | |
| result = await agent.run( | |
| client=client, | |
| game="zork1", | |
| max_steps=10, | |
| seed=42, | |
| verbose=True, | |
| ) | |
| print(f"\nFinal Score: {result.final_score}") | |
| print(f"Moves: {result.moves}") | |
| print(f"Locations: {result.locations_visited}") | |
| if __name__ == "__main__": | |
| import asyncio | |
| asyncio.run(test_agent()) | |