Spaces:
Sleeping
Sleeping
| """ | |
| Student Agent for Text Adventure Games | |
| This is your submission file. Implement the StudentAgent class to play | |
| text adventure games using the MCP server you also implement. | |
| Your agent should: | |
| 1. Connect to the MCP server via the provided client | |
| 2. Use the ReAct pattern (Thought -> Action -> Observation) | |
| 3. Call MCP tools to interact with the game | |
| 4. Maximize the game score within the step limit | |
| Required method: | |
| async def run(self, client, game, max_steps, seed, verbose) -> RunResult | |
| The 'client' is a FastMCP Client already connected to your MCP server. | |
| Use it to call tools like: await client.call_tool("play_action", {"action": "look"}) | |
| Tips: | |
| - Start by looking around and understanding your environment | |
| - Keep track of visited locations to avoid loops | |
| - Pick up useful items (lamp, sword, etc.) | |
| - The seed parameter should be used to set your LLM's seed for reproducibility | |
| """ | |
| import json | |
| import os | |
| import re | |
| from dataclasses import dataclass, field | |
| from typing import Optional | |
| from dotenv import load_dotenv | |
| from huggingface_hub import InferenceClient | |
| # Load environment variables | |
| load_dotenv() | |
| # ============================================================================= | |
| # LLM Configuration - DO NOT MODIFY | |
| # ============================================================================= | |
| # Model to use (fixed for fair evaluation) | |
| LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct" | |
| # Initialize the LLM client (uses HF_TOKEN from environment) | |
| _hf_token = os.getenv("HF_TOKEN") | |
| if not _hf_token: | |
| raise ValueError("HF_TOKEN not found. Set it in your .env file.") | |
| LLM_CLIENT = InferenceClient(token=_hf_token) | |
| def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str: | |
| """ | |
| Call the LLM with the given prompt. Use this function in your agent. | |
| Args: | |
| prompt: The user prompt (current game state, history, etc.) | |
| system_prompt: The system prompt (instructions for the agent) | |
| seed: Random seed for reproducibility | |
| max_tokens: Maximum tokens in response (default: 300) | |
| Returns: | |
| The LLM's response text | |
| Example: | |
| response = call_llm( | |
| prompt="You are in a forest. What do you do?", | |
| system_prompt=SYSTEM_PROMPT, | |
| seed=42, | |
| ) | |
| """ | |
| messages = [ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": prompt}, | |
| ] | |
| response = LLM_CLIENT.chat.completions.create( | |
| model=LLM_MODEL, | |
| messages=messages, | |
| temperature=0.0, # Deterministic for reproducibility | |
| max_tokens=max_tokens, | |
| seed=seed, | |
| ) | |
| return response.choices[0].message.content | |
| class RunResult: | |
| """Result of running the agent. Do not modify this class.""" | |
| final_score: int | |
| max_score: int | |
| moves: int | |
| locations_visited: set[str] | |
| game_completed: bool | |
| error: Optional[str] = None | |
| history: list[tuple[str, str, str]] = field(default_factory=list) | |
| # ============================================================================= | |
| # System Prompt - Customize this for your agent | |
| # ============================================================================= | |
| SYSTEM_PROMPT = """You are an expert AI agent playing a classic text adventure game. Your mission: explore the world, solve puzzles, collect treasures, and maximize your score through careful observation and strategic play. | |
| AVAILABLE TOOLS (use via MCP): | |
| - play_action: Execute a game command (the primary tool for interacting with the game) | |
| - memory: Get current game state summary (location, score, recent actions, failed actions) | |
| - inventory: Check what you're carrying | |
| - get_map: See explored locations and connections (use to avoid getting lost) | |
| - get_valid_actions: Get a list of likely valid actions in the current state | |
| VALID GAME COMMANDS for play_action: | |
| - Movement: north, south, east, west, up, down, enter, exit, climb | |
| - Objects: take <item>, drop <item>, open <thing>, close <thing>, examine <thing> | |
| - Light: turn on lamp, extinguish candle | |
| - Combat: attack <enemy> with <weapon>, kill <enemy> with <weapon> | |
| - Item use: put <item> in <container>, give <item> to <npc>, turn on <item> | |
| - Other: look, inventory, read <thing>, wait, push <thing>, pull <thing> | |
| - Multi-object: take all, drop all, take lamp, sword | |
| - NPC: give <item> to <npc>, ask <npc> about <topic> | |
| FORBIDDEN COMMANDS (parser won't recognize): check, inspect, search, grab, use, help | |
| CRITICAL RULES: | |
| 1. Distinguish failure types: | |
| - Hard failure ("can't go", "wall", "I don't understand"): STOP retrying after 2 attempts | |
| - Puzzle feedback (unusual responses, state changes): Continue with DIFFERENT approaches | |
| - Soft rejection ("too dark", "locked"): Solve the prerequisite first | |
| 2. One command per turn: Issue a single game command | |
| 3. Discovery-based play: Solve through observation and experimentation | |
| 4. Combat priority: During combat, ONLY use combat actions. No examining! | |
| ITEM STRATEGY (VERY IMPORTANT): | |
| - After picking up an item, THINK about what it could be used for: | |
| * Light sources (lamp, lantern, torch) -> turn on before dark areas | |
| * Weapons (sword, knife, axe) -> attack enemies with them | |
| * Keys/tools -> open locked doors/containers | |
| * Food/drink -> give to NPCs or eat/drink when needed | |
| * Treasures (gold, jewels, trophy) -> may need to be placed somewhere for points | |
| * Rope/ladder -> climb or tie to access new areas | |
| - When you encounter an obstacle, ALWAYS check your inventory for a relevant item: | |
| * Locked door? -> Do I have a key? | |
| * Dark room? -> Do I have a lamp? Turn it on! | |
| * Enemy? -> Do I have a weapon? Attack with it! | |
| * NPC wants something? -> Do I have it in inventory? | |
| * Container/receptacle? -> Try putting relevant items in it | |
| - EXAMINE items you pick up - the description often hints at their use | |
| - Try using items on things in the environment: "put X in Y", "give X to Y", "unlock door with key" | |
| EXPLORATION STRATEGY: | |
| 1. New location -> look -> note features -> check exits -> try promising directions | |
| 2. Examine interesting objects (every noun could be interactive) | |
| 3. Pick up useful items (light sources, weapons, keys, treasures) | |
| 4. Open containers (mailbox, chest, door, window) | |
| 5. Use get_map to avoid getting lost | |
| 6. Turn on lamp before entering dark areas! | |
| 7. When stuck: check inventory for unused items, then try get_valid_actions | |
| PUZZLE-SOLVING: | |
| - FIRST check inventory - do you have an item that could help? | |
| - Standard actions first (examine, take, open) | |
| - Try items on obstacles: "unlock X with key", "cut X with sword", "light X with lamp" | |
| - Environmental clues: read room descriptions for hints about what items to use | |
| - Multi-step chains: get item -> prepare it -> use it at the right location | |
| RESPOND IN THIS EXACT FORMAT (no markdown, no code blocks): | |
| THOUGHT: <your reasoning - what you observe, plan, and why. If you have items, consider how they might help.> | |
| TOOL: <tool_name> | |
| ARGS: <JSON arguments> | |
| Examples: | |
| THOUGHT: I just arrived at a new location. I should look around to understand my surroundings. | |
| TOOL: play_action | |
| ARGS: {"action": "look"} | |
| THOUGHT: It's dark here and I have a lamp in my inventory. I need to turn it on to see. | |
| TOOL: play_action | |
| ARGS: {"action": "turn on lamp"} | |
| THOUGHT: There's a locked door and I picked up a key earlier. Let me try using it. | |
| TOOL: play_action | |
| ARGS: {"action": "unlock door with key"} | |
| THOUGHT: The troll is blocking my way and I have a sword. I should attack it. | |
| TOOL: play_action | |
| ARGS: {"action": "attack troll with sword"} | |
| THOUGHT: I'm stuck and haven't used several items. Let me check what I'm carrying. | |
| TOOL: inventory | |
| ARGS: {} | |
| ANTI-PATTERNS TO AVOID: | |
| - Picking up items and NEVER using them | |
| - Ignoring inventory when stuck at a puzzle | |
| - Repeating the EXACT same action after a hard failure | |
| - Checking inventory during combat | |
| - Using forbidden verbs (check, inspect, search, grab, use) | |
| - Staying in one location too long without making progress | |
| DO NOT repeat the same action multiple times. If stuck, CHECK YOUR INVENTORY for items that might help, then try something different or move to a new area.""" | |
| # ============================================================================= | |
| # Student Agent - IMPLEMENT THIS CLASS | |
| # ============================================================================= | |
| class StudentAgent: | |
| """ | |
| ReAct agent implementation inspired by ZorkGPT architecture. | |
| Features: | |
| - ReAct loop (Thought -> Tool -> Observation) | |
| - Loop detection (repeated actions, action cycling) | |
| - Action validation and cleaning | |
| - Score tracking from game responses | |
| - Contextual prompt building with history | |
| - Game-agnostic design | |
| """ | |
| def __init__(self): | |
| """Initialize agent state tracking.""" | |
| self.history: list[dict] = [] # Full action history | |
| self.recent_actions: list[str] = [] # Last N actions for loop detection | |
| self.score: int = 0 | |
| self.max_score: int = 0 | |
| self.tool_names: list[str] = [] | |
| # Per-location tracking | |
| self.actions_by_location: dict[str, list[str]] = {} # location -> [actions tried] | |
| self.failed_actions_by_location: dict[str, set[str]] = {} # location -> {failed actions} | |
| self.current_location: str = "" | |
| self.turns_at_location: int = 0 | |
| self.turns_since_score_change: int = 0 | |
| # Inventory tracking | |
| self.known_inventory: list[str] = [] # Items we know we're carrying | |
| self.last_inventory_check: int = 0 # Step when we last checked inventory | |
| self.items_used: set[str] = set() # Items we've tried using | |
| self.items_examined: set[str] = set() # Items we've examined | |
| async def run( | |
| self, | |
| client, # FastMCP Client connected to your MCP server | |
| game: str, | |
| max_steps: int, | |
| seed: int, | |
| verbose: bool = False, | |
| ) -> RunResult: | |
| """ | |
| Run the agent for a game session using the ReAct pattern. | |
| """ | |
| locations_visited = set() # Text-based (unique first lines) - for professor's metric | |
| game_locations_visited = set() # Jericho real rooms - for debugging | |
| history = [] # (thought, action, result) tuples for RunResult | |
| moves = 0 | |
| game_over = False | |
| # Get available tools from the MCP server | |
| tools = await client.list_tools() | |
| self.tool_names = [t.name for t in tools] | |
| # Get initial observation | |
| result = await client.call_tool("play_action", {"action": "look"}) | |
| observation = self._extract_result(result) | |
| # Track initial location (both systems) | |
| location = self._extract_location(observation) | |
| obs_location = self._extract_observation_location(observation) | |
| game_locations_visited.add(location) | |
| locations_visited.add(obs_location) | |
| self.current_location = location | |
| self._update_score(observation) | |
| if verbose: | |
| print(f"\n=== Starting {game} ===") | |
| print(f"{observation}\n") | |
| # Main ReAct loop | |
| for step in range(1, max_steps + 1): | |
| # Periodically refresh inventory (every 10 steps or when we just picked something up) | |
| if step - self.last_inventory_check >= 10 or self._just_picked_up_item(observation): | |
| try: | |
| inv_result = await client.call_tool("inventory", {}) | |
| inv_text = self._extract_result(inv_result) | |
| self._parse_inventory(inv_text) | |
| self.last_inventory_check = step | |
| except Exception: | |
| pass | |
| # Track item pickups from observation | |
| self._track_item_changes(observation) | |
| # Build contextual prompt | |
| prompt = self._build_prompt(observation) | |
| # Call LLM with step-varied seed for diversity | |
| response = call_llm(prompt, SYSTEM_PROMPT, seed + step) | |
| # Parse response to get thought, tool, args | |
| thought, tool_name, tool_args = self._parse_response(response) | |
| # Validate and fix tool call | |
| tool_name, tool_args = self._validate_tool_call(tool_name, tool_args) | |
| if verbose: | |
| print(f"--- Step {step} ---") | |
| print(f"[THOUGHT] {thought}") | |
| print(f"[TOOL] {tool_name}({tool_args})") | |
| # Loop detection for play_action | |
| if tool_name == "play_action": | |
| action = tool_args.get("action", "look") | |
| self.recent_actions.append(action) | |
| if len(self.recent_actions) > 10: | |
| self.recent_actions = self.recent_actions[-10:] | |
| # Track actions at current location | |
| if self.current_location not in self.actions_by_location: | |
| self.actions_by_location[self.current_location] = [] | |
| self.actions_by_location[self.current_location].append(action) | |
| # Check if this action (or a semantic variant) was already tried and failed here | |
| action_key = self._normalize_action_key(action) | |
| failed_here = self.failed_actions_by_location.get(self.current_location, set()) | |
| if action_key in failed_here: | |
| if verbose: | |
| print(f"[BLOCKED] '{action}' already failed at '{self.current_location}', skipping") | |
| tool_args = self._break_loop(action) | |
| # Detect immediate repetition (same action 3+ times) | |
| elif len(self.recent_actions) >= 3 and len(set(self.recent_actions[-3:])) == 1: | |
| if verbose: | |
| print(f"[LOOP] Immediate repetition detected: '{action}'") | |
| tool_args = self._break_loop(action) | |
| # Detect action cycling (A->B->A->B pattern) | |
| elif self._detect_cycling(): | |
| if verbose: | |
| print(f"[LOOP] Action cycling detected") | |
| tool_args = self._break_loop(action) | |
| # Detect semantic repetition (push statue / push the statue / push statue north) | |
| elif self._is_semantic_repeat(action): | |
| if verbose: | |
| print(f"[LOOP] Semantic repetition of '{action}' at this location") | |
| tool_args = self._break_loop(action) | |
| # Force movement if stuck at same location too long without score progress | |
| if self.turns_at_location >= 8 and self.turns_since_score_change >= 8: | |
| if not self._is_movement_action(tool_args.get("action", "")): | |
| if verbose: | |
| print(f"[STUCK] {self.turns_at_location} turns here with no score. Forcing movement.") | |
| tool_args = self._force_movement() | |
| moves += 1 | |
| # Execute the tool | |
| try: | |
| result = await client.call_tool(tool_name, tool_args) | |
| observation = self._extract_result(result) | |
| if verbose: | |
| obs_preview = observation[:200] + "..." if len(observation) > 200 else observation | |
| print(f"[RESULT] {obs_preview}") | |
| except Exception as e: | |
| observation = f"Error: {e}" | |
| if verbose: | |
| print(f"[ERROR] {e}") | |
| # Track location (real game location for agent reasoning) | |
| location = self._extract_location(observation) | |
| # Track text-based observation header for professor's location metric | |
| obs_location = self._extract_observation_location(observation) | |
| prev_loc_count = len(locations_visited) | |
| locations_visited.add(obs_location) | |
| new_text_discovered = len(locations_visited) > prev_loc_count | |
| prev_game_loc_count = len(game_locations_visited) | |
| game_locations_visited.add(location) | |
| new_game_loc_discovered = len(game_locations_visited) > prev_game_loc_count | |
| # Track location stagnation | |
| if location != self.current_location: | |
| self.current_location = location | |
| self.turns_at_location = 1 | |
| else: | |
| self.turns_at_location += 1 | |
| # Track failed actions at this location | |
| if tool_name == "play_action": | |
| executed_action = tool_args.get("action", "") | |
| if self._action_failed(observation): | |
| if self.current_location not in self.failed_actions_by_location: | |
| self.failed_actions_by_location[self.current_location] = set() | |
| self.failed_actions_by_location[self.current_location].add( | |
| self._normalize_action_key(executed_action) | |
| ) | |
| # Track item examinations and uses | |
| self._track_item_usage(executed_action) | |
| # Update score | |
| prev_score = self.score | |
| self._update_score(observation) | |
| score_changed = self.score != prev_score | |
| if score_changed: | |
| self.turns_since_score_change = 0 | |
| else: | |
| self.turns_since_score_change += 1 | |
| # Print progress summary | |
| if verbose: | |
| status_parts = [f"Score: {self.score}"] | |
| if score_changed: | |
| status_parts.append(f"(+{self.score - prev_score}!)") | |
| status_parts.append(f"Texts: {len(locations_visited)}") | |
| if new_text_discovered: | |
| status_parts.append(f"(NEW text: {obs_location[:50]})") | |
| status_parts.append(f"Rooms: {len(game_locations_visited)}") | |
| if new_game_loc_discovered: | |
| status_parts.append(f"(NEW room: {location})") | |
| status_parts.append(f"Moves: {moves}") | |
| print(f"[PROGRESS] {' | '.join(status_parts)}") | |
| # Update history | |
| action_str = tool_args.get("action", tool_name) if tool_name == "play_action" else tool_name | |
| self.history.append({ | |
| "step": step, | |
| "thought": thought, | |
| "tool": tool_name, | |
| "args": tool_args, | |
| "result": observation[:300], | |
| "location": location, | |
| }) | |
| # Keep history bounded | |
| if len(self.history) > 15: | |
| self.history = self.history[-15:] | |
| # Record in result history | |
| history.append((thought, f"{tool_name}({tool_args})", observation[:150])) | |
| # Check for game over | |
| if self._is_game_over(observation): | |
| game_over = True | |
| if verbose: | |
| print("\n*** GAME OVER ***") | |
| break | |
| # Combine text-based locations (for professor's metric) into locations_visited | |
| # Store game rooms count in a verbose-only summary at the end | |
| if verbose: | |
| print(f"\n--- Location Summary ---") | |
| print(f" Unique text observations: {len(locations_visited)}") | |
| print(f" Unique game rooms: {len(game_locations_visited)}") | |
| print(f" Game rooms: {sorted(game_locations_visited)}") | |
| return RunResult( | |
| final_score=self.score, | |
| max_score=self.max_score if self.max_score > 0 else 350, | |
| moves=moves, | |
| locations_visited=locations_visited, | |
| game_completed=game_over, | |
| history=history, | |
| ) | |
| def _build_prompt(self, observation: str) -> str: | |
| """ | |
| Build a contextual prompt for the LLM with game state and history. | |
| Includes failed-action context so the LLM avoids retrying useless actions. | |
| """ | |
| parts = [] | |
| # Score context | |
| parts.append(f"Current Score: {self.score}") | |
| if self.max_score > 0: | |
| parts.append(f"Max Possible Score: {self.max_score}") | |
| parts.append(f"Current Location: {self.current_location}") | |
| parts.append(f"Turns at this location: {self.turns_at_location}") | |
| parts.append(f"Turns since last score change: {self.turns_since_score_change}") | |
| # Inventory context - critical for item-usage reasoning | |
| if self.known_inventory: | |
| parts.append(f"\nYOUR INVENTORY: {', '.join(self.known_inventory)}") | |
| # Highlight unused items | |
| unused = [item for item in self.known_inventory if item.lower() not in self.items_used] | |
| unexamined = [item for item in self.known_inventory if item.lower() not in self.items_examined] | |
| if unexamined: | |
| parts.append(f" Items NOT YET EXAMINED (examine these!): {', '.join(unexamined)}") | |
| if unused and self.turns_since_score_change >= 3: | |
| parts.append(f" Items NOT YET USED (try using these!): {', '.join(unused)}") | |
| parts.append(f" HINT: Try commands like 'put <item> in <thing>', 'give <item> to <npc>', " | |
| f"'unlock <thing> with <item>', 'turn on <item>', 'attack <enemy> with <item>'") | |
| else: | |
| parts.append("\nYOUR INVENTORY: (empty or unknown - try 'inventory' to check)") | |
| # Recent history for continuity | |
| if self.history: | |
| parts.append("\nRecent actions and results:") | |
| for entry in self.history[-5:]: | |
| action = entry.get("args", {}).get("action", entry["tool"]) | |
| loc = entry.get("location", "") | |
| result_short = entry["result"][:100] | |
| if len(entry["result"]) > 100: | |
| result_short += "..." | |
| parts.append(f" [{loc}] {action} -> {result_short}") | |
| # Failed actions at current location - critical for avoiding retries | |
| failed_here = self.failed_actions_by_location.get(self.current_location, set()) | |
| if failed_here: | |
| parts.append(f"\n[ACTIONS THAT ALREADY FAILED AT THIS LOCATION - DO NOT RETRY THESE]:") | |
| parts.append(f" {', '.join(sorted(failed_here))}") | |
| # Actions already tried at this location | |
| tried_here = self.actions_by_location.get(self.current_location, []) | |
| if len(tried_here) > 3: | |
| unique_tried = sorted(set(tried_here[-10:])) | |
| parts.append(f"\n[ACTIONS ALREADY TRIED HERE (try something new!)]:") | |
| parts.append(f" {', '.join(unique_tried)}") | |
| # Loop warning | |
| if self.recent_actions and len(self.recent_actions) >= 3: | |
| if len(set(self.recent_actions[-3:])) <= 2: | |
| parts.append( | |
| f"\n[WARNING: You are REPEATING actions: {self.recent_actions[-3:]}. " | |
| f"You MUST try something completely different! Move to a new area with " | |
| f"north/south/east/west, or use get_map to find unexplored exits.]" | |
| ) | |
| # Stagnation warning with escalating urgency | |
| if self.turns_at_location >= 4 and self.turns_since_score_change >= 4: | |
| parts.append( | |
| f"\n[CRITICAL: You have been at '{self.current_location}' for {self.turns_at_location} turns " | |
| f"with NO score progress for {self.turns_since_score_change} turns. " | |
| f"LEAVE THIS AREA NOW. Try: north, south, east, west, up, down, enter, exit. " | |
| f"Use get_map to see where you've been and find NEW areas to explore.]" | |
| ) | |
| elif self.turns_at_location >= 3: | |
| parts.append( | |
| f"\n[NOTE: You've been at '{self.current_location}' for {self.turns_at_location} turns. " | |
| f"Consider moving on if you're not making progress.]" | |
| ) | |
| # Current observation | |
| parts.append(f"\nCurrent situation:\n{observation}") | |
| parts.append("\nWhat do you do next?") | |
| return "\n".join(parts) | |
| def _just_picked_up_item(self, observation: str) -> bool: | |
| """Check if the last observation indicates we picked up an item.""" | |
| pickup_indicators = ["taken", "picked up", "you now have", "added to", | |
| "you take", "you get", "you pick up"] | |
| obs_lower = observation.lower() | |
| return any(ind in obs_lower for ind in pickup_indicators) | |
| def _parse_inventory(self, inv_text: str) -> None: | |
| """Parse inventory text to extract item names.""" | |
| inv_lower = inv_text.lower() | |
| if "empty" in inv_lower or "nothing" in inv_lower or "not carrying" in inv_lower: | |
| self.known_inventory = [] | |
| return | |
| # Try to parse "Inventory: item1, item2, item3" format | |
| if "inventory:" in inv_lower: | |
| after_colon = inv_text.split(":", 1)[1].strip() | |
| if after_colon: | |
| items = [item.strip() for item in after_colon.split(",") if item.strip()] | |
| if items: | |
| self.known_inventory = items | |
| return | |
| # Parse line-by-line (common Infocom format: " A brass lantern") | |
| lines = inv_text.strip().split("\n") | |
| items = [] | |
| for line in lines: | |
| line = line.strip() | |
| # Skip header lines | |
| if not line or "carrying" in line.lower() or "inventory" in line.lower(): | |
| continue | |
| # Skip score lines | |
| if line.startswith("[") or line.startswith("+"): | |
| continue | |
| # Strip leading articles and punctuation | |
| cleaned = line.lstrip("- *•") | |
| cleaned = cleaned.strip() | |
| if cleaned: | |
| items.append(cleaned) | |
| if items: | |
| self.known_inventory = items | |
| def _track_item_changes(self, observation: str) -> None: | |
| """Track item pickups/drops from game observation text.""" | |
| obs_lower = observation.lower() | |
| # Detect pickups | |
| pickup_patterns = [ | |
| r"(?:taken|you take|you pick up|you get)\b", | |
| ] | |
| if any(re.search(p, obs_lower) for p in pickup_patterns): | |
| # We picked up something - force an inventory refresh soon | |
| self.last_inventory_check = 0 # Will trigger refresh next step | |
| def _track_item_usage(self, action: str) -> None: | |
| """Track when items are examined or used in commands.""" | |
| action_lower = action.lower().strip() | |
| words = action_lower.split() | |
| if not words: | |
| return | |
| verb = words[0] | |
| target = " ".join(words[1:]) if len(words) > 1 else "" | |
| # Track examinations | |
| if verb in ("examine", "look", "read"): | |
| for item in self.known_inventory: | |
| if item.lower() in target or target in item.lower(): | |
| self.items_examined.add(item.lower()) | |
| # Track usage (any verb that's not examine/take/drop/look) | |
| if verb not in ("examine", "take", "drop", "look", "inventory", "i", | |
| "north", "south", "east", "west", "up", "down", | |
| "n", "s", "e", "w", "u", "d", "enter", "exit"): | |
| for item in self.known_inventory: | |
| if item.lower() in action_lower: | |
| self.items_used.add(item.lower()) | |
| def _parse_response(self, response: str) -> tuple[str, str, dict]: | |
| """ | |
| Parse LLM response to extract thought, tool name, and arguments. | |
| Handles various formatting quirks from the LLM. | |
| """ | |
| thought = "No reasoning provided" | |
| tool_name = "play_action" | |
| tool_args = {"action": "look"} | |
| lines = response.strip().split("\n") | |
| for line in lines: | |
| line_clean = line.strip() | |
| line_upper = line_clean.upper() | |
| if line_upper.startswith("THOUGHT:"): | |
| thought = line_clean.split(":", 1)[1].strip() | |
| elif line_upper.startswith("TOOL:"): | |
| raw_tool = line_clean.split(":", 1)[1].strip().lower() | |
| # Clean markdown artifacts | |
| raw_tool = raw_tool.replace("**", "").replace("*", "").replace("`", "") | |
| raw_tool = raw_tool.split()[0] if raw_tool else "play_action" | |
| tool_name = raw_tool | |
| elif line_upper.startswith("ARGS:"): | |
| args_part = line_clean.split(":", 1)[1].strip() | |
| try: | |
| # Handle single quotes | |
| args_part = args_part.replace("'", '"') | |
| tool_args = json.loads(args_part) | |
| except json.JSONDecodeError: | |
| # Try to extract action from malformed JSON | |
| match = re.search(r'"action"\s*:\s*"([^"]+)"', args_part) | |
| if match: | |
| tool_args = {"action": match.group(1)} | |
| else: | |
| # Last resort: treat the whole thing as an action | |
| cleaned = args_part.strip('{}" ') | |
| if cleaned: | |
| tool_args = {"action": cleaned} | |
| else: | |
| tool_args = {"action": "look"} | |
| return thought, tool_name, tool_args | |
| def _validate_tool_call(self, tool_name: str, tool_args: dict) -> tuple[str, dict]: | |
| """ | |
| Validate and fix common tool call issues. | |
| Maps invalid tool names and cleans action text. | |
| """ | |
| # Fix tool name aliases | |
| if tool_name not in self.tool_names: | |
| tool_alias_map = { | |
| "action": "play_action", "do": "play_action", "command": "play_action", | |
| "execute": "play_action", "game": "play_action", | |
| "map": "get_map", "location": "get_map", "locations": "get_map", | |
| "mem": "memory", "state": "memory", "status": "memory", "info": "memory", | |
| "inv": "inventory", "items": "inventory", | |
| "valid": "get_valid_actions", "actions": "get_valid_actions", | |
| "valid_actions": "get_valid_actions", | |
| } | |
| tool_name = tool_alias_map.get(tool_name, "play_action") | |
| # Clean action text for play_action | |
| if tool_name == "play_action": | |
| action = tool_args.get("action", "look") | |
| action = self._clean_action(action) | |
| tool_args["action"] = action | |
| return tool_name, tool_args | |
| def _clean_action(self, action: str) -> str: | |
| """ | |
| Clean and validate a game action command. | |
| Fixes common invalid verbs and removes formatting artifacts. | |
| """ | |
| # Remove markdown/formatting | |
| action = action.replace("**", "").replace("*", "").replace("`", "") | |
| action = action.strip().lower() | |
| action = action.strip(".,!?;:") | |
| action = " ".join(action.split()) # Normalize whitespace | |
| # Fix invalid verbs that parsers don't recognize | |
| invalid_verb_map = { | |
| "check": "examine", | |
| "inspect": "examine", | |
| "search": "look", | |
| "grab": "take", | |
| "pick up": "take", | |
| "pick": "take", | |
| "use": "examine", | |
| "investigate": "examine", | |
| "observe": "look at", | |
| "collect": "take", | |
| "get": "take", | |
| } | |
| words = action.split() | |
| if words: | |
| # Check single-word verb | |
| if words[0] in invalid_verb_map: | |
| words[0] = invalid_verb_map[words[0]] | |
| action = " ".join(words) | |
| # Check two-word verb | |
| elif len(words) >= 2: | |
| two_word = f"{words[0]} {words[1]}" | |
| if two_word in invalid_verb_map: | |
| action = invalid_verb_map[two_word] + " " + " ".join(words[2:]) | |
| action = action.strip() | |
| if not action: | |
| action = "look" | |
| return action | |
| def _detect_cycling(self) -> bool: | |
| """ | |
| Detect action cycling patterns (A->B->A->B or low diversity over many turns). | |
| """ | |
| # Check for exact 2-step cycle in last 4 actions | |
| if len(self.recent_actions) >= 4: | |
| last4 = self.recent_actions[-4:] | |
| if last4[0] == last4[2] and last4[1] == last4[3]: | |
| return True | |
| # Check for low diversity over last 6 actions | |
| if len(self.recent_actions) >= 6: | |
| recent = self.recent_actions[-6:] | |
| unique = set(recent) | |
| if len(unique) <= 2: | |
| return True | |
| # Check for semantic cycling (normalized keys) | |
| if len(self.recent_actions) >= 4: | |
| last4_keys = [self._normalize_action_key(a) for a in self.recent_actions[-4:]] | |
| if len(set(last4_keys)) <= 2: | |
| return True | |
| return False | |
| def _normalize_action_key(self, action: str) -> str: | |
| """ | |
| Normalize an action to a canonical key for dedup. | |
| 'push statue', 'push the statue', 'push statue north' all become 'push statue'. | |
| """ | |
| action = action.lower().strip() | |
| # Remove articles | |
| for article in [" the ", " a ", " an "]: | |
| action = action.replace(article, " ") | |
| # Remove directional suffixes | |
| for suffix in [" north", " south", " east", " west", " up", " down", | |
| " here", " again", " carefully", " closely"]: | |
| if action.endswith(suffix): | |
| action = action[:-len(suffix)] | |
| # Normalize whitespace | |
| action = " ".join(action.split()) | |
| return action | |
| def _is_semantic_repeat(self, action: str) -> bool: | |
| """ | |
| Check if this action is a semantic repeat of something already tried | |
| at this location 2+ times. | |
| """ | |
| tried_here = self.actions_by_location.get(self.current_location, []) | |
| if len(tried_here) < 2: | |
| return False | |
| action_key = self._normalize_action_key(action) | |
| count = sum(1 for a in tried_here[-8:] if self._normalize_action_key(a) == action_key) | |
| return count >= 2 | |
| def _action_failed(self, observation: str) -> bool: | |
| """ | |
| Check if a game response indicates the action failed/was useless. | |
| """ | |
| obs_lower = observation.lower() | |
| failure_indicators = [ | |
| "can't go that way", "you can't go", "there is no way", | |
| "wall there", "you cannot go", "not a direction", | |
| "can't see any such thing", "doesn't work", "don't understand", | |
| "blocked", "too dark", "there is a wall", | |
| "you can't", "impossible", "nothing happens", | |
| "that doesn't seem to work", "i don't understand", | |
| "that's not something you can", "you don't see", | |
| "i don't know the word", "not something you can", | |
| "already", "can't do that", "won't budge", | |
| "that doesn't make sense", "that's not a verb", | |
| ] | |
| return any(indicator in obs_lower for indicator in failure_indicators) | |
| def _is_movement_action(self, action: str) -> bool: | |
| """Check if an action is a movement command.""" | |
| movements = { | |
| "north", "south", "east", "west", "up", "down", | |
| "n", "s", "e", "w", "u", "d", | |
| "northeast", "northwest", "southeast", "southwest", | |
| "enter", "exit", "in", "out", "climb", | |
| } | |
| return action.strip().lower().split()[0] in movements if action.strip() else False | |
| def _force_movement(self) -> dict: | |
| """ | |
| Force a movement action to escape a stuck location. | |
| Avoids directions that already failed here. | |
| """ | |
| failed_here = self.failed_actions_by_location.get(self.current_location, set()) | |
| tried_here = set(self.actions_by_location.get(self.current_location, [])) | |
| # Prioritize untried directions, then tried-but-not-failed | |
| all_directions = ["north", "south", "east", "west", "up", "down", | |
| "enter", "exit", "northeast", "northwest", "southeast", "southwest"] | |
| # First: directions never tried here | |
| for d in all_directions: | |
| if d not in failed_here and d not in tried_here: | |
| return {"action": d} | |
| # Second: directions tried but not failed (might work for movement) | |
| for d in all_directions: | |
| if d not in failed_here: | |
| return {"action": d} | |
| # All directions failed? Try going back the way we came | |
| return {"action": "look"} | |
| def _break_loop(self, stuck_action: str) -> dict: | |
| """ | |
| Generate a loop-breaking action when the agent is stuck. | |
| Prefers untried directions at the current location. | |
| """ | |
| failed_here = self.failed_actions_by_location.get(self.current_location, set()) | |
| tried_here = set(self.actions_by_location.get(self.current_location, [])) | |
| recent_set = set(self.recent_actions[-5:]) if self.recent_actions else set() | |
| # Priority 1: Try untried movement directions at this location | |
| directions = ["north", "south", "east", "west", "up", "down", "enter", "exit"] | |
| for d in directions: | |
| if d not in failed_here and d not in tried_here and d not in recent_set: | |
| return {"action": d} | |
| # Priority 2: Movement directions not recently used and not failed | |
| for d in directions: | |
| if d not in failed_here and d not in recent_set: | |
| return {"action": d} | |
| # Priority 3: Non-movement fallbacks | |
| fallbacks = ["look", "inventory", "examine room"] | |
| for action in fallbacks: | |
| if action not in recent_set and action != stuck_action: | |
| return {"action": action} | |
| # Priority 4: Any direction not failed | |
| for d in directions: | |
| if d not in failed_here: | |
| return {"action": d} | |
| return {"action": "look"} | |
| def _extract_result(self, result) -> str: | |
| """Extract text from MCP tool result.""" | |
| if hasattr(result, 'content') and result.content: | |
| return result.content[0].text | |
| if isinstance(result, list) and result: | |
| return result[0].text if hasattr(result[0], 'text') else str(result[0]) | |
| return str(result) | |
| def _extract_location(self, observation: str) -> str: | |
| """Extract real game location from observation. | |
| The server appends [Location: X] to every play_action response. | |
| Falls back to first line if not found.""" | |
| # Look for server-injected location tag | |
| match = re.search(r'\[Location:\s*(.+?)\]', observation) | |
| if match: | |
| loc = match.group(1).strip() | |
| if loc and loc != "Unknown": | |
| return loc | |
| # Fallback: first non-empty, non-metadata line | |
| lines = observation.strip().split("\n") | |
| for line in lines: | |
| line = line.strip() | |
| if line and not line.startswith("[") and not line.startswith("+"): | |
| return line | |
| return "Unknown" | |
| def _extract_observation_location(self, observation: str) -> str: | |
| """Extract the text-based location header from observation. | |
| Used for the locations_visited set in RunResult (professor's metric | |
| counts unique text headers, not unique game rooms).""" | |
| lines = observation.strip().split("\n") | |
| for line in lines: | |
| line = line.strip() | |
| if (line and not line.startswith("[") and not line.startswith("+") | |
| and not line.startswith("GAME OVER")): | |
| return line | |
| return "Unknown" | |
| def _update_score(self, text: str) -> None: | |
| """Update score from game text output.""" | |
| patterns = [ | |
| r'\[Score:\s*(\d+)', # [Score: 10 | Moves: 5] | |
| r'Score:\s*(\d+)', # Score: 10 | |
| r'score[:\s]+(\d+)', # score 10 or score: 10 | |
| r'\+(\d+)\s+points?!.*Total:\s*(\d+)', # +5 points! (Total: 15) | |
| ] | |
| for pattern in patterns: | |
| match = re.search(pattern, text, re.IGNORECASE) | |
| if match: | |
| # Use the last group (total score if available) | |
| score_val = int(match.group(match.lastindex)) | |
| self.score = max(self.score, score_val) | |
| # Track max score | |
| max_match = re.search(r'Max Possible Score:\s*(\d+)', text) | |
| if max_match: | |
| self.max_score = int(max_match.group(1)) | |
| def _is_game_over(self, text: str) -> bool: | |
| """Check if the game is over from response text.""" | |
| game_over_phrases = [ | |
| "game over", | |
| "you have died", | |
| "you are dead", | |
| "*** you have died ***", | |
| "you have won", | |
| "*** you have won ***", | |
| "\ngame over", | |
| ] | |
| text_lower = text.lower() | |
| return any(phrase in text_lower for phrase in game_over_phrases) | |
| def _call_llm(self, prompt: str, system_prompt: str, seed: int) -> str: | |
| """ | |
| Call the LLM with the given prompt. | |
| Convenience wrapper around the module-level call_llm(). | |
| """ | |
| return call_llm(prompt, system_prompt, seed) | |
| # ============================================================================= | |
| # For local testing | |
| # ============================================================================= | |
| async def test_agent(): | |
| """Test the agent locally.""" | |
| from fastmcp import Client | |
| # Path to your MCP server | |
| server_path = "mcp_server.py" | |
| agent = StudentAgent() | |
| async with Client(server_path) as client: | |
| result = await agent.run( | |
| client=client, | |
| game="zork1", | |
| max_steps=10, | |
| seed=42, | |
| verbose=True, | |
| ) | |
| print(f"\nFinal Score: {result.final_score}") | |
| print(f"Moves: {result.moves}") | |
| print(f"Locations: {result.locations_visited}") | |
| if __name__ == "__main__": | |
| import asyncio | |
| asyncio.run(test_agent()) |