Spaces:
Sleeping
Sleeping
Clarelec
feat: Enhance agent strategy with a detailed system prompt, new state tracking, and update the README to a French implementation report.
e97ef73 | """ | |
| Student Agent for Text Adventure Games | |
| This is your submission file. Implement the StudentAgent class to play | |
| text adventure games using the MCP server you also implement. | |
| Your agent should: | |
| 1. Connect to the MCP server via the provided client | |
| 2. Use the ReAct pattern (Thought -> Action -> Observation) | |
| 3. Call MCP tools to interact with the game | |
| 4. Maximize the game score within the step limit | |
| Required method: | |
| async def run(self, client, game, max_steps, seed, verbose) -> RunResult | |
| The 'client' is a FastMCP Client already connected to your MCP server. | |
| Use it to call tools like: await client.call_tool("play_action", {"action": "look"}) | |
| Tips: | |
| - Start by looking around and understanding your environment | |
| - Keep track of visited locations to avoid loops | |
| - Pick up useful items (lamp, sword, etc.) | |
| - The seed parameter should be used to set your LLM's seed for reproducibility | |
| """ | |
| import json | |
| import os | |
| import re | |
| from dataclasses import dataclass, field | |
| from typing import Optional | |
| from dotenv import load_dotenv | |
| from huggingface_hub import InferenceClient | |
| # Load environment variables | |
| load_dotenv() | |
| # ============================================================================= | |
| # LLM Configuration - DO NOT MODIFY | |
| # ============================================================================= | |
| # Model to use (fixed for fair evaluation) | |
| LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct" | |
| # Initialize the LLM client (uses HF_TOKEN from environment) | |
| _hf_token = os.getenv("HF_TOKEN") | |
| if not _hf_token: | |
| raise ValueError("HF_TOKEN not found. Set it in your .env file.") | |
| LLM_CLIENT = InferenceClient(token=_hf_token) | |
| def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str: | |
| """ | |
| Call the LLM with the given prompt. Use this function in your agent. | |
| Args: | |
| prompt: The user prompt (current game state, history, etc.) | |
| system_prompt: The system prompt (instructions for the agent) | |
| seed: Random seed for reproducibility | |
| max_tokens: Maximum tokens in response (default: 300) | |
| Returns: | |
| The LLM's response text | |
| Example: | |
| response = call_llm( | |
| prompt="You are in a forest. What do you do?", | |
| system_prompt=SYSTEM_PROMPT, | |
| seed=42, | |
| ) | |
| """ | |
| messages = [ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": prompt}, | |
| ] | |
| response = LLM_CLIENT.chat.completions.create( | |
| model=LLM_MODEL, | |
| messages=messages, | |
| temperature=0.0, # Deterministic for reproducibility | |
| max_tokens=max_tokens, | |
| seed=seed, | |
| ) | |
| return response.choices[0].message.content | |
| class RunResult: | |
| """Result of running the agent. Do not modify this class.""" | |
| final_score: int | |
| max_score: int | |
| moves: int | |
| locations_visited: set[str] | |
| game_completed: bool | |
| error: Optional[str] = None | |
| history: list[tuple[str, str, str]] = field(default_factory=list) | |
| # ============================================================================= | |
| # System Prompt | |
| # ============================================================================= | |
| SYSTEM_PROMPT_TEMPLATE = """You are an expert text adventure game player. You are playing: {game_name}. | |
| {game_description} | |
| Your MISSION: Explore the world, collect items, solve puzzles, and MAXIMIZE your score. | |
| CRITICAL RULES: | |
| 1. ONLY use information from the game's actual responses. NEVER invent rooms, items, or exits. | |
| 2. ALWAYS read the observation carefully — it contains clues about what to do next. | |
| 3. If an action fails, try something COMPLETELY DIFFERENT. Never repeat failed actions. | |
| 4. Pay attention to sounds, smells, and descriptions — they hint at what to do. | |
| 5. If something is described as "already on fire" or "already lit", it IS working. Move on. | |
| AVAILABLE TOOLS: | |
| - play_action: Execute game commands (movement, look, examine, take, open, etc.) | |
| - save_checkpoint: Save before risky actions | |
| - load_checkpoint: Restore a saved state | |
| VALID GAME COMMANDS for play_action: | |
| - Movement: north, south, east, west, up, down, enter, exit, northeast, northwest, southeast, southwest | |
| - Objects: take <item>, drop <item>, open <thing>, close <thing>, examine <thing> | |
| - Interaction: push <thing>, pull <thing>, turn <thing>, move <thing>, read <thing> | |
| - Senses: look, listen, smell, search <thing> | |
| - Light: light <thing>, turn on <thing>, turn off <thing> | |
| - Other: wait, take all, put <thing> in <container>, give <thing> to <person>, talk to <person> | |
| RESPONSE FORMAT (follow exactly, no markdown): | |
| THOUGHT: <brief reasoning> | |
| TOOL: <tool_name> | |
| ARGS: <JSON arguments> | |
| EXAMPLE 1 — See items in a room: | |
| THOUGHT: I see a lantern and a chest. I should take the lantern first. | |
| TOOL: play_action | |
| ARGS: {{"action": "take lantern"}} | |
| EXAMPLE 2 — If take fails, try open or examine: | |
| THOUGHT: The chest is too heavy to take. I should open it to see what's inside. | |
| TOOL: play_action | |
| ARGS: {{"action": "open chest"}} | |
| EXAMPLE 3 — Exploring: | |
| THOUGHT: The description mentions a path to the east. I should go there. | |
| TOOL: play_action | |
| ARGS: {{"action": "east"}} | |
| STRATEGY (in priority order): | |
| 1. TAKE portable items first (key, lantern, sword, food, pole). If 'take' fails, try 'open' or 'examine' instead. | |
| 2. If an action fails or has no effect, do NOT retry it. Try a completely different verb. | |
| 3. Look at KNOWN ACTION TEMPLATES to find the exact syntax the game understands. | |
| 4. In a NEW room: always try listen, smell, or 'search <object>' to find hidden things. | |
| 5. OPEN containers (chests, boxes, drawers, mailbox) — they often have items inside. | |
| 6. Try LIKELY EXITS from the room description — prioritize unexplored directions. | |
| 7. DARKNESS IS DANGEROUS. If it's dark, immediately 'turn on lamp' or 'light lantern' or go back. | |
| 8. DO NOT repeat an action that already failed (marked ✗ in history). | |
| 9. Prioritize EXPLORATION — move to new rooms over staying in explored ones. | |
| 10. Use SIMPLE object names: 'take lantern' not 'take brass lantern'. Use 1-2 words max. | |
| 11. If blocked by weight or 'carrying too much', DROP an item first, then retry. | |
| 12. If 'open X' fails, try 'pull', 'push', or 'turn' parts of it (lever, knob, button). | |
| 13. If you've visited a room 3+ times with no progress, go somewhere NEW. | |
| Now analyze the current game state and choose the BEST next action.""" | |
| # Game descriptions for common games | |
| GAME_DESCRIPTIONS = { | |
| "zork1": "Zork I: The Great Underground Empire. Explore the ruins of an ancient underground empire, collect treasures, and deposit them in the trophy case in the white house. Beware of the grue in dark places — always carry a lamp!", | |
| "zork2": "Zork II: The Wizard of Frobozz. Continue exploring the Great Underground Empire while avoiding the Wizard of Frobozz.", | |
| "zork3": "Zork III: The Dungeon Master. The final chapter of the Zork trilogy. Face the Dungeon Master and prove your worth.", | |
| "lostpig": "Lost Pig. You are Grunk, an orc. Your pig has run away, and you must find it. Explore, solve puzzles, and find that pig!", | |
| "advent": "Colossal Cave Adventure. Explore Colossal Cave, collect treasures and bring them to the building at the surface.", | |
| "enchanter": "Enchanter. You are a novice enchanter on a quest to defeat the evil warlock Krill.", | |
| "detective": "Detective. Solve a murder mystery by examining clues and questioning suspects.", | |
| "pentari": "Pentari. A fantasy adventure with magic and exploration.", | |
| "library": "The Library. Explore a mysterious library and discover its secrets.", | |
| "ztuu": "Ztunnel Underground University. Explore an underground university.", | |
| } | |
| def get_system_prompt(game: str) -> str: | |
| """Generate a game-specific system prompt.""" | |
| game_lower = game.lower() | |
| description = GAME_DESCRIPTIONS.get(game_lower, f"A text adventure game called '{game}'. Explore, solve puzzles, and maximize your score.") | |
| return SYSTEM_PROMPT_TEMPLATE.format( | |
| game_name=game, | |
| game_description=description, | |
| ) | |
| # ============================================================================= | |
| # Student Agent - IMPLEMENT THIS CLASS | |
| # ============================================================================= | |
| class StudentAgent: | |
| """ | |
| ReAct agent implementation for text adventure games. | |
| Uses the Thought -> Action -> Observation loop to: | |
| 1. Reason about the current game state | |
| 2. Select and execute appropriate MCP tool actions | |
| 3. Learn from observations to make better decisions | |
| Features: | |
| - MCP resources for always-available context (inventory, state, map, history) | |
| - Observation-hash loop detection | |
| - Progressive strategy escalation | |
| """ | |
| # Invalid verb corrections — only correct truly invalid verbs | |
| VERB_CORRECTIONS = { | |
| "check": "examine", | |
| "inspect": "examine", | |
| "grab": "take", | |
| "pick": "take", | |
| "investigate": "examine", | |
| } | |
| def __init__(self): | |
| """Initialize agent state tracking.""" | |
| self.history: list[tuple[str, str, str]] = [] # (thought, action, observation) | |
| self.visited_locations: set[str] = set() | |
| self.current_location: str = "" | |
| self.failed_actions: dict[str, list[str]] = {} # Track failed actions PER LOCATION | |
| self.score: int = 0 | |
| self.moves: int = 0 | |
| self.max_score: int = 350 # Default, will be updated from game | |
| self.last_actions: list[str] = [] # Track recent actions for context | |
| self.scoring_actions: list[str] = [] # Actions that increased score | |
| self.game_name: str = "" # Current game name | |
| # Observation-hash loop detection | |
| self.observation_hashes: list[int] = [] | |
| self._loop_warning: bool = False | |
| self.steps_in_location: int = 0 # Steps spent at current location | |
| self._prev_location_for_counter: str = "" | |
| self.sensory_done_rooms: dict[str, set[str]] = {} # {room: {"listen", "smell", ...}} | |
| self.last_observation: str = "" # Cache latest observation for object extraction | |
| async def run( | |
| self, | |
| client, # FastMCP Client connected to your MCP server | |
| game: str, | |
| max_steps: int, | |
| seed: int, | |
| verbose: bool = False, | |
| ) -> RunResult: | |
| """ | |
| Run the agent for a game session using the ReAct pattern. | |
| Args: | |
| client: FastMCP Client connected to your MCP server | |
| game: Name of the game being played (e.g., "zork1") | |
| max_steps: Maximum number of steps to take | |
| seed: Random seed for reproducibility (use for LLM calls) | |
| verbose: Whether to print detailed output | |
| Returns: | |
| RunResult with final score and statistics | |
| """ | |
| # Reset state for new game | |
| self.history = [] | |
| self.visited_locations = set() | |
| self.current_location = "" | |
| self.failed_actions = {} # {location: [action1, action2, ...]} | |
| self.score = 0 | |
| self.moves = 0 | |
| self.last_actions = [] | |
| self.scoring_actions = [] | |
| self.game_name = game | |
| self.observation_hashes = [] | |
| self._loop_warning = False | |
| self.steps_in_location = 0 | |
| self._prev_location_for_counter = "" | |
| self.sensory_done_rooms = {} | |
| self.last_observation = "" | |
| game_completed = False | |
| error_msg = None | |
| # Generate game-specific system prompt | |
| system_prompt = get_system_prompt(game) | |
| try: | |
| # Step 1: Get initial observation | |
| result = await client.call_tool("play_action", {"action": "look"}) | |
| observation = result.content[0].text if result and result.content else "No response from game." | |
| self.last_observation = observation | |
| if verbose: | |
| print(f"\n{'='*60}") | |
| print(f"INITIAL OBSERVATION:\n{observation}") | |
| print(f"{'='*60}\n") | |
| # Step 2: Main ReAct loop | |
| for step in range(max_steps): | |
| if verbose: | |
| print(f"\n--- Step {step + 1}/{max_steps} ---") | |
| # Pre-fetch MCP resources (free — no game step consumed) | |
| resource_ctx = {} | |
| for res_uri, res_key in [ | |
| ("game://inventory", "inventory"), | |
| ("game://state", "game_state"), | |
| ("game://history", "action_history"), | |
| ("game://map", "map_info"), | |
| ("game://unexplored_exits", "unexplored_exits"), | |
| ("game://rooms", "rooms_knowledge"), | |
| ("game://actions", "suggested_actions"), | |
| ("game://valid_directions", "valid_directions"), | |
| ]: | |
| try: | |
| res_result = await client.read_resource(res_uri) | |
| # Handle different FastMCP response formats | |
| if hasattr(res_result, 'content'): | |
| resource_ctx[res_key] = res_result.content | |
| elif isinstance(res_result, (list, tuple)) and res_result: | |
| resource_ctx[res_key] = res_result[0].text if hasattr(res_result[0], 'text') else str(res_result[0]) | |
| else: | |
| resource_ctx[res_key] = str(res_result) | |
| except Exception: | |
| resource_ctx[res_key] = "" | |
| # Sync location from server-side tracking (uses Jericho get_player_location) | |
| game_state_text = resource_ctx.get("game_state", "") | |
| if isinstance(game_state_text, str): | |
| loc_match = re.search(r'Location:\s*(.+)', game_state_text) | |
| if loc_match: | |
| server_loc = loc_match.group(1).strip() | |
| if server_loc and server_loc != "Unknown": | |
| if self.current_location != server_loc: | |
| self.steps_in_location = 0 | |
| self.current_location = server_loc | |
| self.visited_locations.add(server_loc) | |
| # Build the prompt with context from MCP resources | |
| prompt = self._build_prompt(observation, self.history, **resource_ctx) | |
| # Call LLM to get thought and action | |
| # Use seed + step to vary outputs and prevent loops from identical responses | |
| try: | |
| llm_response = call_llm( | |
| prompt=prompt, | |
| system_prompt=system_prompt, | |
| seed=seed + step, | |
| max_tokens=300, | |
| ) | |
| except Exception as e: | |
| if verbose: | |
| print(f"LLM Error: {e}") | |
| error_msg = f"LLM call failed: {str(e)}" | |
| break | |
| # Parse the response | |
| thought, tool_name, tool_args = self._parse_response(llm_response) | |
| if verbose: | |
| print(f"THOUGHT: {thought}") | |
| print(f"TOOL: {tool_name}") | |
| print(f"ARGS: {tool_args}") | |
| # Validate tool and args - default to 'look' if parsing failed | |
| if not tool_name: | |
| tool_name = "play_action" | |
| tool_args = {"action": "look"} | |
| if verbose: | |
| print(f"⚠️ Parsing failed, defaulting to look") | |
| # Validate and fix tool calls (verb corrections, tool name fixes) | |
| tool_name, tool_args = self._validate_tool_call(tool_name, tool_args) | |
| # === SMART SANITIZER (Round 9) === | |
| if tool_name == "play_action" and "action" in tool_args: | |
| action = tool_args["action"] | |
| original_action = action | |
| # Syntax Fix: "bird's nest" -> "nest" | |
| action = re.sub(r"\b\w+'s\s+(\w+)\b", r"\1", action) | |
| # Syntax Fix: "pile of leaves" -> "leaves" | |
| action = re.sub(r"\bpile of\s+(\w+)\b", r"\1", action) | |
| # Remove "examine" if it's "examine examine" (rare but possible) | |
| if action.startswith("examine examine "): | |
| action = action.replace("examine examine ", "examine ") | |
| if action != original_action and verbose: | |
| print(f"🧹 Sanitized action: '{original_action}' -> '{action}'") | |
| tool_args["action"] = action | |
| # Block redundant look actions | |
| if tool_name == "play_action" and "action" in tool_args: | |
| action = tool_args["action"] | |
| action_key = f"play_action({{'action': '{action}'}})" | |
| recent_actions = [a for a in self.last_actions[-3:]] | |
| if action_key in recent_actions: | |
| # INSTANT BLOCK: do not silently modify the action. | |
| # We change the tool to something that will yield a block observation immediately. | |
| if verbose: | |
| print(f"⚠️ Loop detected explicitly on: '{action}'") | |
| tool_name = "local_guardian_block" | |
| tool_args = {"reason": f"Loop detected: You just tried '{action}'. You MUST try a completely different verb from the KNOWN ACTION TEMPLATES."} | |
| # Loop detection: if same play_action 3+ times in last 8 (even interleaved) | |
| if tool_name == "play_action": | |
| action = tool_args.get("action", "look") | |
| action_key = f"play_action({{'action': '{action}'}})" | |
| recent_count = sum(1 for a in self.last_actions[-8:] if a == action_key) | |
| if recent_count >= 3: | |
| if verbose: | |
| print(f"⚠️ Heavy Loop detected — '{action}' attempted {recent_count}x") | |
| tool_name = "local_guardian_block" | |
| tool_args = {"reason": f"Heavy Loop detected: You tried '{action}' {recent_count} times recently. Stop this immediately. Try an unexplored exit or a new verb!"} | |
| # Create action string for tracking | |
| action_str = f"{tool_name}({tool_args})" | |
| # Track recent actions (keep last 10) | |
| if tool_name != "local_guardian_block": | |
| self.last_actions.append(action_str) | |
| else: | |
| self.last_actions.append(f"blocked({action_key})") | |
| if len(self.last_actions) > 10: | |
| self.last_actions.pop(0) | |
| # Execute the tool | |
| try: | |
| # Exit Guardian (Round 9): Block invalid moves client-side | |
| mock_observation = None | |
| if tool_name == "play_action" and "action" in tool_args: | |
| action = tool_args["action"] | |
| valid_dirs = resource_ctx.get("valid_directions", "") | |
| # Only block if we have valid_dirs AND we've been here > 1 step (allow explore) | |
| if valid_dirs and self.steps_in_location > 1 and action in { | |
| "north", "south", "east", "west", "northeast", "northwest", "southeast", "southwest", "up", "down" | |
| }: | |
| # Check if direction is valid | |
| # valid_dirs is "east, north, up" | |
| if action not in [d.strip() for d in valid_dirs.split(',')]: | |
| mock_observation = f"🚫 [Exit Guardian] You can't go '{action}'. Valid exits: {valid_dirs}. Try one of those." | |
| if tool_name == "local_guardian_block": | |
| mock_observation = f"🚫 [Loop Guardian] {tool_args.get('reason', 'Action blocked.')}" | |
| if mock_observation: | |
| result = None | |
| observation = mock_observation | |
| if verbose: | |
| print(f"🛡️ GUARDIAN BLOCKED: {action}") | |
| else: | |
| result = await client.call_tool(tool_name, tool_args) | |
| observation = result.content[0].text if result and result.content else "No response." | |
| except Exception as e: | |
| observation = f"Error executing {tool_name}: {str(e)}" | |
| if verbose: | |
| print(f"Tool Error: {e}") | |
| if verbose: | |
| print(f"OBSERVATION: {observation[:500]}...") | |
| # Update state from observation | |
| self.last_observation = observation | |
| self._update_state_from_observation(observation, tool_name, tool_args) | |
| # Record in history | |
| self.history.append((thought, action_str, observation[:300])) | |
| # Check for game over | |
| if "GAME OVER" in observation.upper() or "*** YOU HAVE DIED ***" in observation.upper(): | |
| game_completed = True | |
| if verbose: | |
| print("\n🏁 GAME OVER DETECTED") | |
| break | |
| # Check for winning | |
| if "CONGRATULATIONS" in observation.upper(): | |
| game_completed = True | |
| if verbose: | |
| print("\n🏆 GAME WON!") | |
| break | |
| # Get final state from resource | |
| try: | |
| state_result = await client.read_resource("game://state") | |
| state_text = state_result.content if hasattr(state_result, 'content') else str(state_result) | |
| self._extract_final_score(state_text) | |
| except Exception: | |
| pass # Use last known score | |
| except Exception as e: | |
| error_msg = f"Agent error: {str(e)}" | |
| if verbose: | |
| print(f"ERROR: {e}") | |
| return RunResult( | |
| final_score=self.score, | |
| max_score=self.max_score, # Dynamic based on game | |
| moves=self.moves, | |
| locations_visited=self.visited_locations, | |
| game_completed=game_completed, | |
| error=error_msg, | |
| history=self.history, | |
| ) | |
| # Patterns to extract visible objects/creatures from observations | |
| _OBJECT_PATTERNS = [ | |
| # Standard IF: "You can see X here" | |
| re.compile(r'(?:can see|see|there)\s+(?:a\s+|an\s+|the\s+)?(.+?)\s+(?:here|too)', re.IGNORECASE), | |
| # "X is here", "X sits here", "X stands here", "X leans here" | |
| re.compile(r'^(.+?)\s+(?:is|are|sit|stand|lies?|lean)\s+here', re.IGNORECASE | re.MULTILINE), | |
| # "There is a X" | |
| re.compile(r'there\s+(?:is|are)\s+(?:a\s+|an\s+|the\s+)?(.+?)(?:\.|\n|$)', re.IGNORECASE), | |
| # Exclamatory: "There X!" (informal IF) | |
| re.compile(r'there\s+([\w]+)!', re.IGNORECASE), | |
| # "There X at/in/on/near Y" — catches grammars without articles | |
| # e.g. "There stone chest at back of room" or "There fountain in middle" | |
| re.compile(r'[Tt]here\s+(.+?)\s+(?:at|in|on|near|next to|against|over)', re.IGNORECASE), | |
| # Standalone item lines: "Long green pole lean against wall" | |
| # Starts with adjective/article + noun phrase + posture verb | |
| re.compile(r'^(?:A|An|The|Some|Long|Old|Big|Small|Tall|Short|Heavy|Thin)\s+(.+?)\s+(?:lean|sit|stand|lie|rest|hang)', re.IGNORECASE | re.MULTILINE), | |
| # "X next to Y" or "X over in corner" | |
| re.compile(r'^(.+?)\s+(?:next to|over in|in the corner|on the floor|on the table|on the ground)', re.IGNORECASE | re.MULTILINE), | |
| ] | |
| def _is_combat_active(self, observation: str) -> bool: | |
| """Detect if the agent is currently in combat based on keywords.""" | |
| combat_keywords = { | |
| 'attack', 'fight', 'kill', 'hit', 'miss', 'wound', 'blood', | |
| 'smash', 'dodge', 'lunge', 'slash', 'stab', 'bite', 'claw', | |
| 'enemy', 'monster', 'opponent', 'threat', 'weapon', 'sword' | |
| } | |
| # Check strictly in the last observation logic | |
| obs_lower = observation.lower() | |
| # Simple keyword match for now | |
| return any(k in obs_lower for k in combat_keywords) and len(observation) < 1000 | |
| def _extract_visible_objects(self, observation: str) -> list[str]: | |
| """Extract names of visible objects/creatures from the observation text.""" | |
| objects = [] | |
| for pattern in self._OBJECT_PATTERNS: | |
| for match in pattern.finditer(observation): | |
| obj = match.group(1).strip().rstrip('.!,;') | |
| # Filter out noise: too long, or common non-objects | |
| if obj and len(obj) < 30 and obj.lower() not in ( | |
| 'it', 'that', 'this', 'here', 'there', 'you', | |
| 'nothing', 'something', 'anything', 'everything', | |
| 'light', 'way', 'noise', 'room', 'darkness', | |
| 'lots', 'wall', 'door', 'doorway', 'floor', 'ceiling', | |
| 'tunnel', 'path', 'passage', 'ground', 'stairs', | |
| ): | |
| objects.append(obj) | |
| return list(dict.fromkeys(objects)) # deduplicate preserving order | |
| def _clean_object_name(name: str) -> str: | |
| """Simplify object names for game commands. | |
| 'torch (black and sooty)' → 'torch' | |
| 'Statue of little man' → 'statue' | |
| 'long green pole' → 'pole' | |
| 'stone chest' → 'chest' | |
| """ | |
| # Remove parenthetical suffixes | |
| paren_idx = name.find('(') | |
| if paren_idx > 0: | |
| name = name[:paren_idx].strip() | |
| # Remove leading adjectives — use last word as the core noun | |
| # "long green pole" → "pole", "stone chest" → "chest" | |
| # But keep 2-word compound nouns like "stone block" | |
| words = name.lower().split() | |
| if len(words) >= 3: | |
| # Multi-word: just use last word (the noun) | |
| name = words[-1] | |
| elif len(words) == 2: | |
| # Two words: remove common adjectives, keep noun | |
| adjectives = {'big', 'small', 'long', 'short', 'old', 'new', 'little', | |
| 'large', 'green', 'red', 'blue', 'black', 'white', 'dark', | |
| 'heavy', 'light', 'broken', 'stone', 'wooden', 'metal'} | |
| if words[0] in adjectives: | |
| name = words[-1] | |
| else: | |
| name = ' '.join(words) | |
| else: | |
| name = name.lower() | |
| return name | |
| def _build_prompt(self, observation: str, history: list, | |
| inventory: str = "", game_state: str = "", | |
| action_history: str = "", map_info: str = "", | |
| unexplored_exits: str = "", rooms_knowledge: str = "", | |
| suggested_actions: str = "", | |
| **_ignored) -> str: | |
| """ | |
| Build a compact prompt for the LLM with essential context. | |
| Lean and focused — avoids sending noise like full dictionaries. | |
| """ | |
| # Check for combat — prioritizes survival over exploration | |
| is_combat = self._is_combat_active(observation) | |
| prompt_parts = [] | |
| # Game state from server resource (location, score, moves) | |
| prompt_parts.append(f"GAME: {self.game_name}") | |
| if is_combat: | |
| prompt_parts.append("\n🚨 COMBAT ACTIVE! FOCUS ON SURVIVAL: ATTACK, HEAL, OR FLEE. DO NOT EXPLORE.") | |
| # Weapon Awareness: suggest attacking with best weapon | |
| inv_lower = inventory.lower() if inventory else "" | |
| weapons = [] | |
| for w in ['sword', 'axe', 'knife', 'dagger', 'spear', 'bow', 'blade', 'mace', 'gladius', 'elven']: | |
| if w in inv_lower: | |
| weapons.append(w) | |
| if weapons: | |
| best_weapon = weapons[0] # simplistic selection | |
| prompt_parts.append(f"⚔️ YOU HAVE A WEAPON: {best_weapon}. USE IT: 'attack <enemy> with {best_weapon}'") | |
| else: | |
| prompt_parts.append("⚠️ NO WEAPON FOUND! Consider 'flee' or 'run' if health is low.") | |
| if game_state: | |
| prompt_parts.append(game_state) | |
| # Inventory from server resource | |
| prompt_parts.append(f"\n🎒 INVENTORY: {inventory or 'Empty-handed.'}") | |
| # Action history — trimmed to last 4 for focus | |
| if action_history: | |
| lines = action_history.strip().split('\n') | |
| trimmed = lines[-8:] # 4 actions × 2 lines each | |
| prompt_parts.append(f"\nRECENT ACTIONS:\n" + '\n'.join(trimmed)) | |
| # Failed actions to avoid (for current location only) | |
| loc = self.current_location or "Unknown" | |
| location_failures = self.failed_actions.get(loc, []) | |
| if location_failures: | |
| recent_failed = location_failures[-5:] | |
| prompt_parts.append(f"\n❌ FAILED HERE (don't repeat): {', '.join(recent_failed)}") | |
| # Unexplored exits — CRITICAL for exploration | |
| if not is_combat and unexplored_exits: | |
| prompt_parts.append(f"\n🚪 EXITS:\n{unexplored_exits}") | |
| # Room knowledge — accumulated info about visited rooms | |
| # Room knowledge — accumulated info about visited rooms | |
| if not is_combat: # Only show room history if exploring | |
| if rooms_knowledge and len(rooms_knowledge) > 20: | |
| prompt_parts.append(f"\n🗺️ ROOM KNOWLEDGE:\n{rooms_knowledge}") | |
| # Suggested Actions (Ground Truth) | |
| if suggested_actions: | |
| prompt_parts.append(f"\n{suggested_actions}") | |
| # === OBJECT DETECTION === | |
| visible_objects = [self._clean_object_name(o) for o in self._extract_visible_objects(observation)] | |
| visible_objects = [o for o in visible_objects if o] # filter empty after cleaning | |
| loc_failures = self.failed_actions.get(loc, []) | |
| # Split into takeable vs already-failed-to-take | |
| takeable = [o for o in visible_objects if f"take {o}" not in loc_failures] | |
| failed_takes = [o for o in visible_objects if f"take {o}" in loc_failures] | |
| if takeable: | |
| suggestions = [f"take {o}" for o in takeable[:3]] | |
| prompt_parts.append(f"\n👀 VISIBLE OBJECTS: {', '.join(takeable[:5])}") | |
| prompt_parts.append(f"💡 TRY: {' or '.join(suggestions)}") | |
| if failed_takes: | |
| alt = [f"open {o}" for o in failed_takes[:2]] + [f"examine {o}" for o in failed_takes[:1]] | |
| prompt_parts.append(f"📦 Can't take these, try: {' or '.join(alt)}") | |
| # Interaction verb hints: if 'open X' failed, suggest pull/push/turn | |
| for fa in loc_failures: | |
| if fa.startswith("open "): | |
| obj = fa[5:] | |
| prompt_parts.append(f"🔧 'open {obj}' failed — try: pull {obj}, push {obj}, turn {obj}, pull lever") | |
| break # One hint is enough | |
| # Sensory-action tracking — show which senses remain for this room | |
| done_senses = self.sensory_done_rooms.get(loc, set()) | |
| remaining_senses = [s for s in ("listen", "smell", "search") if s not in done_senses] | |
| if remaining_senses: | |
| # Chain senses: after listen, nudge toward smell next | |
| if "listen" in done_senses and "smell" not in done_senses: | |
| prompt_parts.append("\n⚡ You listened — now try SMELL for more clues!") | |
| elif self.steps_in_location <= 1 and len(remaining_senses) >= 2: | |
| # Fresh room: strongly recommend senses BEFORE movement | |
| prompt_parts.append(f"\n🔴 NEW ROOM! Try {remaining_senses[0].upper()} before exploring exits!") | |
| else: | |
| prompt_parts.append(f"\n⚡ Still untried here: {', '.join(remaining_senses)}") | |
| # Scoring actions to reinforce | |
| if self.scoring_actions: | |
| prompt_parts.append(f"✅ SCORED POINTS WITH: {', '.join(self.scoring_actions[-3:])}") | |
| # Repetition warning | |
| if len(self.last_actions) >= 4: | |
| last_4 = self.last_actions[-4:] | |
| if len(set(last_4)) <= 2: | |
| prompt_parts.append(f"\n⚠️ YOU ARE REPEATING YOURSELF! You MUST consult KNOWN ACTION TEMPLATES to find a valid syntax.") | |
| # Loop detection warnings | |
| if "LOOP DETECTED" in observation or "LOOP DETECTED" in game_state: | |
| prompt_parts.append("\n⚠️ LOOP DETECTED! Change strategy NOW.") | |
| if self._loop_warning: | |
| prompt_parts.append("\n🔄 STUCK IN LOOP! Try something COMPLETELY different.") | |
| # Anti-Hallucination Warning (Round 9) | |
| prompt_parts.append("\n⚠️ INTERACT ONLY WITH VISIBLE OBJECTS. Do not invent items.") | |
| # Stuck breaker: lowered from 6 to 4 | |
| if not is_combat and self.steps_in_location >= 4: | |
| prompt_parts.append( | |
| f"\n🚨 STUCK {self.steps_in_location} steps! " | |
| "LEAVE this room — try an unexplored exit NOW." | |
| ) | |
| # Anti-revisit: warn when room visited 3+ times | |
| visit_count = len([h for h in self.history if loc in str(h)]) | |
| if visit_count >= 6: # ~3 visits (2 entries per visit) | |
| prompt_parts.append( | |
| f"\n⚠️ You've been in '{loc}' many times! " | |
| "Go to a DIFFERENT room. Explore somewhere new." | |
| ) | |
| # Drop-items hint: if last observation mentions carrying too much | |
| if self.last_observation and any(p in self.last_observation.lower() for p in | |
| ["carrying too", "carry stuff", "too heavy", "hands are full", | |
| "can't carry", "overburdened", "too hard when"]): | |
| inv_text = inventory if inventory else "" | |
| prompt_parts.append( | |
| f"\n💡 BLOCKED BY WEIGHT! Try: DROP an item first, then retry the action." | |
| ) | |
| # Progressive escalation (lowered thresholds for short games) | |
| non_scoring = 0 | |
| for _, _, obs in reversed(self.history): | |
| if "🎉" in obs or "+points" in obs.lower(): | |
| break | |
| non_scoring += 1 | |
| if non_scoring > 15: | |
| prompt_parts.append( | |
| "\n🚨 15+ moves without scoring! Try a completely new area." | |
| ) | |
| elif non_scoring > 8: | |
| prompt_parts.append( | |
| "\n⚠️ 8+ moves without scoring. Explore somewhere new." | |
| ) | |
| # Current observation | |
| prompt_parts.append(f"\nCURRENT OBSERVATION:\n{observation}") | |
| # Instruction | |
| prompt_parts.append("\nWhat is your next action? Respond: THOUGHT, TOOL, ARGS") | |
| return "\n".join(prompt_parts) | |
| def _parse_response(self, response: str) -> tuple[str, str, dict]: | |
| """ | |
| Parse LLM response to extract thought, tool name, and arguments. | |
| Args: | |
| response: Raw LLM response string | |
| Returns: | |
| Tuple of (thought, tool_name, args_dict) | |
| """ | |
| thought = "" | |
| tool_name = "" | |
| args_dict = {} | |
| try: | |
| # Extract THOUGHT | |
| thought_match = re.search(r'THOUGHT:\s*(.+?)(?=TOOL:|$)', response, re.DOTALL | re.IGNORECASE) | |
| if thought_match: | |
| thought = thought_match.group(1).strip() | |
| # Extract TOOL | |
| tool_match = re.search(r'TOOL:\s*(\w+)', response, re.IGNORECASE) | |
| if tool_match: | |
| tool_name = tool_match.group(1).strip() | |
| # Extract ARGS - try to find JSON | |
| args_match = re.search(r'ARGS:\s*(\{.*?\})', response, re.DOTALL | re.IGNORECASE) | |
| if args_match: | |
| try: | |
| args_dict = json.loads(args_match.group(1)) | |
| except json.JSONDecodeError: | |
| # Try to extract action from malformed JSON | |
| action_match = re.search(r'"action"\s*:\s*"([^"]+)"', args_match.group(1)) | |
| if action_match: | |
| args_dict = {"action": action_match.group(1)} | |
| # Fallback: if we found a tool but no args, try to extract action from context | |
| if tool_name and not args_dict: | |
| # Look for quoted strings that might be actions | |
| action_patterns = [ | |
| r'go\s+(\w+)', | |
| r'take\s+(\w+)', | |
| r'open\s+(\w+)', | |
| r'"(\w+(?:\s+\w+)*)"', | |
| ] | |
| for pattern in action_patterns: | |
| match = re.search(pattern, response.lower()) | |
| if match: | |
| args_dict = {"action": match.group(1)} | |
| break | |
| # Default to "look" if still no args | |
| if not args_dict and tool_name == "play_action": | |
| args_dict = {"action": "look"} | |
| except Exception: | |
| # If all parsing fails, default to look action | |
| thought = "Parsing failed, defaulting to look" | |
| tool_name = "play_action" | |
| args_dict = {"action": "look"} | |
| return thought, tool_name, args_dict | |
| # Tools that take NO arguments — strip any unexpected args | |
| NO_ARG_TOOLS = {"get_valid_actions"} | |
| # Inventory-like tool names that should redirect to play_action | |
| INVENTORY_ALIASES = {"inv", "items", "inventory"} | |
| def _validate_tool_call(self, tool_name: str, tool_args: dict) -> tuple[str, dict]: | |
| """ | |
| Validate and fix common tool call issues. | |
| Corrects invalid verbs, normalizes tool names, strips bad args. | |
| """ | |
| # Fix tool name aliases | |
| tool_name_fixes = { | |
| "action": "play_action", | |
| "do": "play_action", | |
| "command": "play_action", | |
| # Removed tools — redirect gracefully | |
| "map": "play_action", | |
| "location": "play_action", | |
| "mem": "play_action", | |
| "memory": "play_action", | |
| "state": "play_action", | |
| "status": "play_action", | |
| "get_hints": "play_action", | |
| "get_next_step": "play_action", | |
| "get_map": "play_action", | |
| "get_dictionary": "play_action", | |
| } | |
| # Handle 'examine' / 'look' used as tool name instead of play_action | |
| if tool_name == "examine": | |
| thing = tool_args.get("thing", tool_args.get("object", tool_args.get("item", ""))) | |
| tool_name = "play_action" | |
| tool_args = {"action": f"examine {thing}".strip()} | |
| elif tool_name == "look": | |
| tool_name = "play_action" | |
| tool_args = {"action": "look"} | |
| elif tool_name in self.INVENTORY_ALIASES: | |
| # inventory tool removed — redirect to game command | |
| tool_name = "play_action" | |
| tool_args = {"action": "inventory"} | |
| elif tool_name in tool_name_fixes: | |
| tool_name = tool_name_fixes[tool_name] | |
| # Ensure redirected tools have a valid action arg | |
| if tool_name == "play_action" and "action" not in tool_args: | |
| tool_args = {"action": "look"} | |
| # Strip unexpected args from parameterless tools | |
| if tool_name in self.NO_ARG_TOOLS: | |
| tool_args = {} | |
| # Fix invalid verbs in play_action | |
| if tool_name == "play_action" and "action" in tool_args: | |
| action = tool_args["action"] | |
| words = action.lower().split() | |
| if words and words[0] in self.VERB_CORRECTIONS: | |
| words[0] = self.VERB_CORRECTIONS[words[0]] | |
| action = " ".join(words) | |
| # Clean up formatting artifacts | |
| action = action.lower().strip() | |
| action = action.replace("**", "").replace("*", "").replace("`", "") | |
| action = " ".join(action.split()) | |
| # Fix 5: Block redundant 'look' in same room (< 2 steps) | |
| if action == "look" and self.steps_in_location <= 2 and self.steps_in_location > 0: | |
| action = "listen" # More useful than repeating room description | |
| tool_args["action"] = action | |
| return tool_name, tool_args | |
| # _update_location removed — we rely on server-side tracking | |
| # via game://state resource which uses Jericho's get_player_location() | |
| MOVEMENT_VERBS = { | |
| "north", "south", "east", "west", "up", "down", | |
| "northeast", "northwest", "southeast", "southwest", | |
| "n", "s", "e", "w", "u", "d", "ne", "nw", "se", "sw", | |
| "enter", "exit", "climb", "go", "look", | |
| } | |
| def _update_state_from_observation(self, observation: str, tool_name: str, tool_args: dict): | |
| """Update agent state based on observation.""" | |
| # Location tracking is done server-side via game://state resource | |
| # Extract score — use most specific match, avoid double-counting | |
| # Priority: "X/Y" format > "Total: X" > "Score: X" | |
| max_score_pattern1 = re.search(r'(\d+)\s*/\s*(\d+)', observation) | |
| total_match = re.search(r'Total:\s*(\d+)', observation) | |
| score_match = re.search(r'Score:\s*(\d+)', observation) | |
| if max_score_pattern1: | |
| current = int(max_score_pattern1.group(1)) | |
| max_val = int(max_score_pattern1.group(2)) | |
| if max_val >= current and max_val > 0: | |
| self.max_score = max_val | |
| self.score = current | |
| elif total_match: | |
| self.score = int(total_match.group(1)) | |
| elif score_match: | |
| self.score = int(score_match.group(1)) | |
| # Extract max score from resource text (e.g., "max achieved: 50") | |
| max_score_pattern2 = re.search(r'max[_\s]?(?:score|achieved)[:\s]+(\d+)', observation, re.IGNORECASE) | |
| if max_score_pattern2: | |
| potential_max = int(max_score_pattern2.group(1)) | |
| if potential_max > 0: | |
| self.max_score = potential_max | |
| # Extract moves | |
| moves_match = re.search(r'Moves:\s*(\d+)', observation) | |
| if moves_match: | |
| self.moves = int(moves_match.group(1)) | |
| # Observation-hash loop detection (lowered threshold: 4 recent, ≥2 matches) | |
| obs_hash = hash(observation[:200]) | |
| self.observation_hashes.append(obs_hash) | |
| if len(self.observation_hashes) >= 4: | |
| recent = self.observation_hashes[-4:] | |
| self._loop_warning = recent.count(obs_hash) >= 2 | |
| else: | |
| self._loop_warning = False | |
| # Track steps in current location | |
| self.steps_in_location += 1 | |
| # Track failed actions (including no-op / "already done" responses) | |
| failure_indicators = [ | |
| "can't go", "can't do", "don't understand", "doesn't work", | |
| "impossible", "can't see", "nothing happens", "you can't", | |
| "i don't know", "what do you want", "there is no", | |
| "already", "no effect", "that doesn't", "doesn't seem", | |
| "isn't here", "not here", "you don't have", | |
| # Informal parser style (covers many Z-machine games) | |
| "that not", "not turn", "not see that", "not allowed", | |
| "not know", "not do that", "not go that way", "not open", "not work", | |
| # Evasion / creature avoidance | |
| "walk right away", "run away", "runs away", "run after", "get dizzy", | |
| # Heavy/immovable objects | |
| "too heavy", "stuck to", "fixed in place", "can't be moved", | |
| # Disambiguation / needs target | |
| "what do you want", | |
| ] | |
| observation_lower = observation.lower() | |
| for indicator in failure_indicators: | |
| if indicator in observation_lower: | |
| if tool_name == "play_action" and "action" in tool_args: | |
| action = tool_args["action"] | |
| loc = self.current_location or "Unknown" | |
| if loc not in self.failed_actions: | |
| self.failed_actions[loc] = [] | |
| if action not in self.failed_actions[loc]: | |
| self.failed_actions[loc].append(action) | |
| break | |
| # Track scoring actions (look for point gain messages) | |
| if tool_name == "play_action" and "action" in tool_args: | |
| action = tool_args["action"] | |
| if "+points" in observation_lower or "🎉" in observation: | |
| if action not in self.scoring_actions: | |
| self.scoring_actions.append(action) | |
| # Mark individual sensory actions as done for current room | |
| if tool_name == "play_action" and "action" in tool_args: | |
| action_word = tool_args["action"].split()[0] if tool_args["action"] else "" | |
| if action_word in ("listen", "smell", "search"): | |
| loc = self.current_location or "Unknown" | |
| if loc not in self.sensory_done_rooms: | |
| self.sensory_done_rooms[loc] = set() | |
| self.sensory_done_rooms[loc].add(action_word) | |
| def _extract_final_score(self, memory_output: str): | |
| """Extract final score from game://state resource text.""" | |
| score_match = re.search(r'Score:\s*(\d+)', memory_output) | |
| if score_match: | |
| self.score = int(score_match.group(1)) | |
| moves_match = re.search(r'Moves:\s*(\d+)', memory_output) | |
| if moves_match: | |
| self.moves = int(moves_match.group(1)) | |
| def _call_llm(self, prompt: str, system_prompt: str, seed: int) -> str: | |
| """ | |
| Call the LLM with the given prompt. | |
| This is a convenience wrapper - you can also use call_llm() directly. | |
| """ | |
| return call_llm(prompt, system_prompt, seed) | |
| # ============================================================================= | |
| # For local testing | |
| # ============================================================================= | |
| async def test_agent(): | |
| """Test the agent locally.""" | |
| from fastmcp import Client | |
| # Path to your MCP server | |
| server_path = "mcp_server.py" | |
| agent = StudentAgent() | |
| async with Client(server_path) as client: | |
| result = await agent.run( | |
| client=client, | |
| game="zork1", | |
| max_steps=10, | |
| seed=42, | |
| verbose=True, | |
| ) | |
| print(f"\nFinal Score: {result.final_score}") | |
| print(f"Moves: {result.moves}") | |
| print(f"Locations: {result.locations_visited}") | |
| if __name__ == "__main__": | |
| import asyncio | |
| asyncio.run(test_agent()) | |