Spaces:
Sleeping
Sleeping
| """ | |
| ReAct Agent for Text Adventure Games | |
| Uses MCP tools (including Jericho-powered valid actions) to play | |
| text adventure games with reasoning, loop detection, and exploration strategy. | |
| """ | |
| import json | |
| import os | |
| import re | |
| from collections import deque | |
| from dataclasses import dataclass, field | |
| from typing import Optional | |
| from dotenv import load_dotenv | |
| from huggingface_hub import InferenceClient | |
| load_dotenv() | |
| # ============================================================================= | |
| # LLM Configuration - DO NOT MODIFY | |
| # ============================================================================= | |
| LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct" | |
| _hf_token = os.getenv("HF_TOKEN") | |
| if not _hf_token: | |
| raise ValueError("HF_TOKEN not found. Set it in your .env file.") | |
| LLM_CLIENT = InferenceClient(token=_hf_token, provider="novita") | |
| def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str: | |
| """Call the LLM with the given prompt.""" | |
| messages = [ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": prompt}, | |
| ] | |
| response = LLM_CLIENT.chat.completions.create( | |
| model=LLM_MODEL, | |
| messages=messages, | |
| temperature=0.15, | |
| max_tokens=max_tokens, | |
| seed=seed, | |
| ) | |
| return response.choices[0].message.content | |
| class RunResult: | |
| """Result of running the agent. Do not modify this class.""" | |
| final_score: int | |
| max_score: int | |
| moves: int | |
| locations_visited: set[str] | |
| game_completed: bool | |
| error: Optional[str] = None | |
| history: list[tuple[str, str, str]] = field(default_factory=list) | |
| # ============================================================================= | |
| # Constants | |
| # ============================================================================= | |
| MOVEMENT_COMMANDS = { | |
| "north", "south", "east", "west", "up", "down", | |
| "n", "s", "e", "w", "u", "d", | |
| "enter", "exit", "in", "out", | |
| "northeast", "northwest", "southeast", "southwest", | |
| "ne", "nw", "se", "sw", | |
| "go north", "go south", "go east", "go west", | |
| "go up", "go down", "go northeast", "go northwest", | |
| "go southeast", "go southwest", | |
| } | |
| # ============================================================================= | |
| # System Prompt | |
| # ============================================================================= | |
| SYSTEM_PROMPT = """You play text adventure games. Your goal: maximize score. | |
| Rooms are auto-explored for you (items taken, containers opened, objects examined). | |
| Your job: solve puzzles the auto-explorer can't. | |
| AVAILABLE TOOLS: | |
| - play_action: Run a game command. Example: ARGS: {"action": "put gem in slot"} | |
| - get_valid_actions: See what commands work here. FREE, no move cost. | |
| - get_state_info: Check score, inventory, history. FREE. | |
| - get_map: See room connections. FREE. | |
| - get_inventory: Check items carried. FREE. | |
| HOW TO SCORE POINTS: | |
| 1. Use items on things: "put X in Y", "give X to Y", "insert X in Y", "unlock Y with X" | |
| 2. Try Key actions from the list (copy exactly) | |
| 3. Read clues, follow instructions found in game text | |
| 4. Move to new rooms you haven't visited | |
| RULES: | |
| - NEVER drop items. NEVER "put X down". NEVER throw items away. | |
| - If an action had NO_EFFECT, do NOT repeat it. | |
| - One action per turn. | |
| RESPONSE FORMAT (follow exactly): | |
| THOUGHT: <your reasoning> | |
| TOOL: play_action | |
| ARGS: {"action": "<command>"}""" | |
| # ============================================================================= | |
| # Agent Implementation | |
| # ============================================================================= | |
| VALID_TOOLS = { | |
| "play_action", "auto_explore_room", "get_valid_actions", | |
| "get_state_info", "get_map", "get_inventory", | |
| } | |
| class StudentAgent: | |
| """ReAct agent with auto-exploration, auto-navigation, and loop detection.""" | |
| def __init__(self): | |
| self.history: list[dict] = [] | |
| self.recent_actions: list[str] = [] | |
| self.recent_tools: list[str] = [] | |
| self.score: int = 0 | |
| self.max_score: int = 0 | |
| self.current_location: str = "" | |
| self.no_effect_count: int = 0 | |
| self.steps_since_score_change: int = 0 | |
| self.visited_rooms: set[str] = set() | |
| # Navigation tracking | |
| self.tried_exits: dict[str, set[str]] = {} # room -> tried directions | |
| self.room_exits: dict[str, list[str]] = {} # room -> available exits | |
| self.room_graph: dict[str, dict[str, str]] = {} # room -> {dir -> dest} | |
| self.failed_exits: dict[str, set[str]] = {} # room -> directions that don't change room | |
| self.steps_in_room: int = 0 | |
| # Inventory tracking for smart re-exploration | |
| self.inventory_version: int = 0 # increments on inventory change | |
| self.room_explored_at_inv: dict[str, int] = {} # room -> inv_version when last explored | |
| self.last_inventory_str: str = "" | |
| # Key action tracking for smarter stagnation handling | |
| self.key_actions_by_room: dict[str, list[str]] = {} | |
| self.tried_actions_by_room: dict[str, set[str]] = {} | |
| self.room_visit_count: dict[str, int] = {} | |
| async def run( | |
| self, | |
| client, | |
| game: str, | |
| max_steps: int, | |
| seed: int, | |
| verbose: bool = False, | |
| ) -> RunResult: | |
| """Run the agent for a game session.""" | |
| locations_visited = set() | |
| history = [] | |
| moves = 0 | |
| # Step 0: Initial look | |
| result = await client.call_tool("play_action", {"action": "look"}) | |
| observation = self._extract_result(result) | |
| self._parse_status(observation) | |
| locations_visited.add(self.current_location) | |
| if verbose: | |
| print(f"\n{observation}") | |
| # Step 0.5: Auto-explore starting room | |
| result = await client.call_tool("auto_explore_room", {}) | |
| explore_text = self._extract_result(result) | |
| self._parse_status(explore_text) | |
| self._store_exits(explore_text) | |
| self._update_inventory_from_text(explore_text) | |
| self.room_explored_at_inv[self.current_location] = self.inventory_version | |
| observation = f"[Room auto-explored]\n{explore_text}" | |
| last_valid_actions = explore_text | |
| if verbose: | |
| print(f"\n[AUTO-EXPLORE]\n{explore_text}") | |
| for step in range(1, max_steps + 1): | |
| old_location = self.current_location | |
| # Check for untried exits -> auto-navigate (including BFS) | |
| untried = self._find_unexplored_exit() | |
| # Force movement if stuck in room without scoring | |
| if not untried and self.steps_in_room >= 3 and self.steps_since_score_change >= 3: | |
| exits = self.room_exits.get(self.current_location, []) | |
| failed = self.failed_exits.get(self.current_location, set()) | |
| valid_exits = [e for e in exits if e not in failed] | |
| if valid_exits: | |
| # Prefer exits to least-visited rooms | |
| best_exit = self._pick_least_visited_exit(valid_exits) | |
| if best_exit: | |
| untried = best_exit | |
| is_auto_nav = bool(untried) | |
| if untried: | |
| tool_name = "play_action" | |
| tool_args = {"action": untried} | |
| thought = f"Auto-navigating: {untried}" | |
| if verbose: | |
| print(f"\n--- Step {step}/{max_steps} [AUTO-NAV] ---") | |
| print(f"[ACTION] {untried}") | |
| elif self.steps_since_score_change > 0 and self.steps_since_score_change % 6 == 0: | |
| # Every 6 stagnant steps, try untried key actions, re-explore, or move | |
| key_acts = self.key_actions_by_room.get(self.current_location, []) | |
| tried = self.tried_actions_by_room.get(self.current_location, set()) | |
| untried_keys = [a for a in key_acts if a.lower() not in tried | |
| and a.lower() not in MOVEMENT_COMMANDS | |
| and not a.lower().startswith(("examine ", "look ", "read ", "search "))] | |
| if untried_keys: | |
| tool_name = "play_action" | |
| tool_args = {"action": untried_keys[0]} | |
| thought = f"Stagnation: trying untried key action" | |
| is_auto_nav = True | |
| if verbose: | |
| print(f"\n--- Step {step}/{max_steps} [STAGNATION KEY-ACTION: {untried_keys[0]}] ---") | |
| elif self.steps_in_room >= 4: | |
| # Force move to least-visited adjacent room | |
| exits = self.room_exits.get(self.current_location, []) | |
| failed = self.failed_exits.get(self.current_location, set()) | |
| valid_exits = [e for e in exits if e not in failed] | |
| best = self._pick_least_visited_exit(valid_exits) if valid_exits else None | |
| if best: | |
| tool_name = "play_action" | |
| tool_args = {"action": best} | |
| thought = f"Stagnation: moving to least-visited room" | |
| is_auto_nav = True | |
| if verbose: | |
| print(f"\n--- Step {step}/{max_steps} [STAGNATION MOVE: {best}] ---") | |
| else: | |
| tool_name = "auto_explore_room" | |
| tool_args = {} | |
| thought = "Re-exploring room after stagnation" | |
| is_auto_nav = True | |
| if verbose: | |
| print(f"\n--- Step {step}/{max_steps} [STAGNATION RE-EXPLORE] ---") | |
| else: | |
| tool_name = "auto_explore_room" | |
| tool_args = {} | |
| thought = "Re-exploring room after stagnation" | |
| is_auto_nav = True | |
| if verbose: | |
| print(f"\n--- Step {step}/{max_steps} [STAGNATION RE-EXPLORE] ---") | |
| else: | |
| # No unexplored exits - ask LLM for puzzle-solving | |
| prompt = self._build_prompt(observation, step, max_steps, last_valid_actions) | |
| response = call_llm(prompt, SYSTEM_PROMPT, seed + step, max_tokens=250) | |
| thought, tool_name, tool_args = self._parse_response(response) | |
| if verbose: | |
| print(f"\n--- Step {step}/{max_steps} ---") | |
| print(f"[THOUGHT] {thought}") | |
| print(f"[TOOL] {tool_name}({tool_args})") | |
| tool_name, tool_args = self._validate_tool(tool_name, tool_args) | |
| tool_name, tool_args = self._anti_loop_check(tool_name, tool_args) | |
| # Track tool calls | |
| self.recent_tools.append(tool_name) | |
| if len(self.recent_tools) > 5: | |
| self.recent_tools = self.recent_tools[-5:] | |
| # Track play_action | |
| if tool_name == "play_action": | |
| action = tool_args.get("action", "look") | |
| self.recent_actions.append(action) | |
| if len(self.recent_actions) > 10: | |
| self.recent_actions = self.recent_actions[-10:] | |
| moves += 1 | |
| # Record tried exit | |
| if action.lower() in MOVEMENT_COMMANDS: | |
| self.tried_exits.setdefault(self.current_location, set()).add(action.lower()) | |
| # Track tried action for stagnation key-action cycling | |
| self.tried_actions_by_room.setdefault(self.current_location, set()).add(action.lower()) | |
| # Execute tool | |
| try: | |
| result = await client.call_tool(tool_name, tool_args) | |
| observation = self._extract_result(result) | |
| except Exception as e: | |
| observation = f"Error: {e}. Try a different action." | |
| if verbose: | |
| print(f"[RESULT] {observation[:300]}") | |
| # Parse status | |
| self._parse_status(observation) | |
| locations_visited.add(self.current_location) | |
| # Don't let auto-nav NO_EFFECTs pollute LLM loop detection | |
| if is_auto_nav: | |
| self.no_effect_count = 0 | |
| # Track time spent in current room | |
| if self.current_location != old_location: | |
| self.steps_in_room = 0 | |
| self.room_visit_count[self.current_location] = self.room_visit_count.get(self.current_location, 0) + 1 | |
| else: | |
| self.steps_in_room += 1 | |
| # Update room graph and track failed movements | |
| if tool_name == "play_action": | |
| action = tool_args.get("action", "") | |
| if action.lower() in MOVEMENT_COMMANDS: | |
| if self.current_location != old_location: | |
| self.room_graph.setdefault(old_location, {})[action.lower()] = self.current_location | |
| else: | |
| # Movement didn't change room - mark as failed exit | |
| self.failed_exits.setdefault(old_location, set()).add(action.lower()) | |
| # Scan game response for new direction words | |
| self._scan_for_new_exits(observation) | |
| # Auto-explore new rooms | |
| if "NEW_ROOM" in observation: | |
| try: | |
| ae_result = await client.call_tool("auto_explore_room", {}) | |
| ae_text = self._extract_result(ae_result) | |
| self._parse_status(ae_text) | |
| self._store_exits(ae_text) | |
| self._update_inventory_from_text(ae_text) | |
| self.room_explored_at_inv[self.current_location] = self.inventory_version | |
| last_valid_actions = ae_text | |
| observation = f"[New room auto-explored]\n{ae_text}" | |
| if verbose: | |
| print(f"[AUTO-EXPLORE]\n{ae_text[:300]}") | |
| except Exception: | |
| pass | |
| elif "REVISITED" in observation and tool_name == "play_action": | |
| # Only re-explore if inventory changed since last exploration of this room | |
| last_inv = self.room_explored_at_inv.get(self.current_location, -1) | |
| if self.inventory_version > last_inv: | |
| try: | |
| ae_result = await client.call_tool("auto_explore_room", {}) | |
| ae_text = self._extract_result(ae_result) | |
| self._parse_status(ae_text) | |
| self._store_exits(ae_text) | |
| self._update_inventory_from_text(ae_text) | |
| self.room_explored_at_inv[self.current_location] = self.inventory_version | |
| last_valid_actions = ae_text | |
| observation = f"[Revisited room re-explored]\n{ae_text}" | |
| if verbose: | |
| print(f"[RE-EXPLORE]\n{ae_text[:300]}") | |
| except Exception: | |
| pass | |
| elif tool_name == "get_valid_actions": | |
| last_valid_actions = observation | |
| self._store_exits(observation) | |
| elif tool_name == "auto_explore_room": | |
| last_valid_actions = observation | |
| self._store_exits(observation) | |
| self._update_inventory_from_text(observation) | |
| # Update history | |
| self.history.append({ | |
| "step": step, | |
| "thought": thought, | |
| "tool": tool_name, | |
| "args": tool_args, | |
| "result": observation[:200], | |
| }) | |
| history.append((thought, f"{tool_name}({tool_args})", observation[:100])) | |
| # Check game over | |
| if "GAME OVER" in observation or self._is_game_over(observation): | |
| if verbose: | |
| print("\n*** GAME OVER ***") | |
| break | |
| return RunResult( | |
| final_score=self.score, | |
| max_score=self.max_score if self.max_score > 0 else 350, | |
| moves=moves, | |
| locations_visited=locations_visited, | |
| game_completed=self._is_game_over(observation), | |
| history=history, | |
| ) | |
| def _update_inventory_from_text(self, text: str) -> None: | |
| """Track inventory changes from explore/action results.""" | |
| inv_match = re.search(r'Inventory:\s*(.+)', text) | |
| if inv_match: | |
| inv_str = inv_match.group(1).strip() | |
| if inv_str != self.last_inventory_str: | |
| self.last_inventory_str = inv_str | |
| self.inventory_version += 1 | |
| def _store_exits(self, text: str) -> None: | |
| """Parse and store available exits and key actions from responses.""" | |
| exits = [] | |
| for line in text.split("\n"): | |
| line_stripped = line.strip() | |
| if line_stripped.startswith("Exits:") or line_stripped.startswith("Movement:"): | |
| exits_str = line_stripped.split(":", 1)[1].strip() | |
| if exits_str and exits_str != "none": | |
| exits = [e.strip().lower() for e in exits_str.split(",") if e.strip()] | |
| elif line_stripped.startswith("Key actions:"): | |
| actions_str = line_stripped.split(":", 1)[1].strip() | |
| if actions_str and self.current_location: | |
| actions = [a.strip() for a in actions_str.split(",") if a.strip()] | |
| if actions: | |
| self.key_actions_by_room[self.current_location] = actions | |
| # Also extract directions from Key actions (e.g. "get in southwest") | |
| for action in actions_str.split(","): | |
| action = action.strip().lower() | |
| for prefix in ("get in ", "go "): | |
| if action.startswith(prefix): | |
| dir_part = action[len(prefix):].strip() | |
| if dir_part in MOVEMENT_COMMANDS: | |
| exits.append(dir_part) | |
| if exits and self.current_location: | |
| self.room_exits[self.current_location] = exits | |
| def _find_unexplored_exit(self) -> str | None: | |
| """Find an untried exit from current room, or BFS navigate toward one.""" | |
| # Direct unexplored exit from current room | |
| available = self.room_exits.get(self.current_location, []) | |
| tried = self.tried_exits.get(self.current_location, set()) | |
| failed = self.failed_exits.get(self.current_location, set()) | |
| for exit_dir in available: | |
| if exit_dir not in tried and exit_dir not in failed: | |
| return exit_dir | |
| # BFS to find nearest room with unexplored exits | |
| visited_bfs = {self.current_location} | |
| queue = deque() | |
| # Seed with known connections from current room | |
| for direction, dest in self.room_graph.get(self.current_location, {}).items(): | |
| if dest not in visited_bfs: | |
| visited_bfs.add(dest) | |
| queue.append((dest, direction)) # (room, first_step_to_get_there) | |
| while queue: | |
| room, first_step = queue.popleft() | |
| # Check if this room has unexplored exits | |
| room_available = self.room_exits.get(room, []) | |
| room_tried = self.tried_exits.get(room, set()) | |
| room_failed = self.failed_exits.get(room, set()) | |
| for exit_dir in room_available: | |
| if exit_dir not in room_tried and exit_dir not in room_failed: | |
| return first_step # Navigate toward this room | |
| # Expand through known connections | |
| for direction, dest in self.room_graph.get(room, {}).items(): | |
| if dest not in visited_bfs: | |
| visited_bfs.add(dest) | |
| queue.append((dest, first_step)) | |
| return None | |
| def _pick_least_visited_exit(self, valid_exits: list[str]) -> str | None: | |
| """Pick exit leading to the least-visited room.""" | |
| graph = self.room_graph.get(self.current_location, {}) | |
| best_exit = None | |
| min_visits = float('inf') | |
| last_action = self.recent_actions[-1] if self.recent_actions else "" | |
| for e in valid_exits: | |
| if e == last_action: | |
| continue # Don't go back immediately | |
| dest = graph.get(e) | |
| if dest: | |
| visits = self.room_visit_count.get(dest, 0) | |
| if visits < min_visits: | |
| min_visits = visits | |
| best_exit = e | |
| else: | |
| # Unknown destination - prefer this (unexplored) | |
| return e | |
| return best_exit or (valid_exits[0] if valid_exits else None) | |
| def _scan_for_new_exits(self, text: str) -> None: | |
| """Scan game text for direction words and add new ones as potential exits.""" | |
| all_dirs = { | |
| "north", "south", "east", "west", | |
| "northeast", "northwest", "southeast", "southwest", | |
| "up", "down", | |
| } | |
| current_exits = set(self.room_exits.get(self.current_location, [])) | |
| failed = self.failed_exits.get(self.current_location, set()) | |
| for word in text.lower().split(): | |
| clean = word.strip(".,;:!?\"'()[]") | |
| if clean in all_dirs and clean not in current_exits and clean not in failed: | |
| self.room_exits.setdefault(self.current_location, []).append(clean) | |
| current_exits.add(clean) | |
| def _build_prompt(self, observation: str, step: int, max_steps: int, valid_actions: str = "") -> str: | |
| """Build the prompt for the LLM.""" | |
| parts = [] | |
| remaining = max_steps - step | |
| if remaining < 15: | |
| parts.append(f"!!! Only {remaining} steps left! Use items to score! !!!") | |
| parts.append(f"Score: {self.score}/{self.max_score} | Step: {step}/{max_steps}") | |
| if self.visited_rooms: | |
| parts.append(f"Rooms visited: {len(self.visited_rooms)}") | |
| # Recent history (compact) | |
| if self.history: | |
| parts.append("\nRecent:") | |
| for entry in self.history[-5:]: | |
| if isinstance(entry["args"], dict) and "action" in entry["args"]: | |
| args_str = entry["args"]["action"] | |
| else: | |
| args_str = entry["tool"] | |
| flags = "" | |
| result_text = entry['result'][:60] | |
| if "SCORE_CHANGE" in result_text: | |
| flags = " [SCORED!]" | |
| elif "NO_EFFECT" in result_text: | |
| flags = " [NO_EFFECT]" | |
| parts.append(f" {args_str} -> {result_text}{flags}") | |
| # Warnings | |
| if len(self.recent_actions) >= 3: | |
| last3 = self.recent_actions[-3:] | |
| if len(set(last3)) == 1: | |
| parts.append(f"\n!!! STOP repeating '{last3[0]}'! Do something DIFFERENT! !!!") | |
| elif len(self.recent_actions) >= 4: | |
| last4 = self.recent_actions[-4:] | |
| if last4[0] == last4[2] and last4[1] == last4[3]: | |
| parts.append(f"\n!!! Back-and-forth loop. Go to a NEW room! !!!") | |
| if self.no_effect_count >= 2: | |
| parts.append(f"\n!!! {self.no_effect_count} actions had NO EFFECT. Try Key actions or move! !!!") | |
| if self.steps_since_score_change > 10: | |
| parts.append(f"\n!!! No score in {self.steps_since_score_change} steps! Move to new rooms or try new items! !!!") | |
| if self.steps_in_room >= 3: | |
| parts.append(f"\n!!! Stuck in this room for {self.steps_in_room} turns. Move to a DIFFERENT room! !!!") | |
| # Current observation | |
| parts.append(f"\n--- Current ---\n{observation}") | |
| # Valid actions (if not in observation) | |
| if valid_actions and "Exits:" not in observation and "Key actions:" in valid_actions: | |
| parts.append(f"\n--- Available ---\n{valid_actions}") | |
| return "\n".join(parts) | |
| def _parse_response(self, response: str) -> tuple[str, str, dict]: | |
| """Parse LLM response to extract thought, tool, and arguments. | |
| Handles multiple formats for robustness with smaller models: | |
| - Standard: THOUGHT: / TOOL: / ARGS: {"action": "..."} | |
| - Bare action: ARGS: go north (no JSON) | |
| - ACTION: format: ACTION: go north | |
| - Fallback: extract any quoted action from response | |
| """ | |
| thought = "No reasoning" | |
| tool_name = "play_action" | |
| tool_args = {"action": "look"} | |
| found_tool = False | |
| found_args = False | |
| lines = response.strip().split("\n") | |
| for line in lines: | |
| line_clean = line.strip() | |
| line_upper = line_clean.upper() | |
| if line_upper.startswith("THOUGHT:"): | |
| thought = line_clean.split(":", 1)[1].strip() | |
| elif line_upper.startswith("TOOL:"): | |
| raw_tool = line_clean.split(":", 1)[1].strip() | |
| raw_tool = raw_tool.replace("**", "").replace("*", "").replace("`", "").strip() | |
| raw_tool = raw_tool.split("(")[0].strip() # Handle tool(args) format | |
| raw_tool = raw_tool.split()[0] if raw_tool else "play_action" | |
| tool_name = raw_tool.lower() | |
| found_tool = True | |
| elif line_upper.startswith("ARGS:") or line_upper.startswith("ARG:") or line_upper.startswith("ARGUMENTS:"): | |
| args_part = line_clean.split(":", 1)[1].strip() | |
| found_args = True | |
| try: | |
| args_part_json = args_part.replace("'", '"') | |
| tool_args = json.loads(args_part_json) | |
| except json.JSONDecodeError: | |
| match = re.search(r'"action"\s*:\s*"([^"]+)"', args_part) | |
| if match: | |
| tool_args = {"action": match.group(1)} | |
| else: | |
| # Bare action string (e.g. "ARGS: go north") | |
| clean = args_part.strip().strip('"').strip("'").strip("{}") | |
| # Remove JSON-like remnants | |
| clean = re.sub(r'^action\s*:\s*', '', clean, flags=re.IGNORECASE) | |
| clean = clean.strip().strip('"').strip("'") | |
| if clean: | |
| tool_args = {"action": clean} | |
| elif line_upper.startswith("ACTION:") or line_upper.startswith("COMMAND:"): | |
| # Alternative format some smaller models use | |
| action_str = line_clean.split(":", 1)[1].strip() | |
| action_str = action_str.strip('"').strip("'").strip("`") | |
| if action_str: | |
| tool_name = "play_action" | |
| tool_args = {"action": action_str} | |
| found_tool = True | |
| found_args = True | |
| # Fallback: if no structured output found, try to extract an action | |
| if not found_args: | |
| # Try to find a quoted command in the response | |
| quoted = re.findall(r'"([^"]{2,40})"', response) | |
| if quoted: | |
| # Use the last quoted string as the action (usually the command) | |
| candidate = quoted[-1].lower().strip() | |
| if not any(w in candidate for w in ("thought", "tool", "args", "action")): | |
| tool_args = {"action": candidate} | |
| elif not found_tool: | |
| # Last resort: if response is just a bare game command (1-4 words) | |
| stripped = response.strip().split("\n")[-1].strip() | |
| stripped = stripped.strip('"').strip("'").strip("`").strip("*") | |
| words = stripped.split() | |
| if 1 <= len(words) <= 5 and len(stripped) < 50: | |
| tool_args = {"action": stripped.lower()} | |
| return thought, tool_name, tool_args | |
| def _validate_tool(self, tool_name: str, tool_args: dict) -> tuple[str, dict]: | |
| """Fix common LLM mistakes in tool names.""" | |
| tool_aliases = { | |
| "action": "play_action", "do": "play_action", "command": "play_action", | |
| "play": "play_action", "execute": "play_action", "game": "play_action", | |
| "send": "play_action", "act": "play_action", | |
| "valid_actions": "get_valid_actions", "validactions": "get_valid_actions", | |
| "actions": "get_valid_actions", "available": "get_valid_actions", | |
| "state_info": "get_state_info", "stateinfo": "get_state_info", | |
| "state": "get_state_info", "memory": "get_state_info", "status": "get_state_info", | |
| "info": "get_state_info", "check": "get_state_info", | |
| "map": "get_map", "navigation": "get_map", "rooms": "get_map", | |
| "inventory": "get_inventory", "inv": "get_inventory", | |
| "items": "get_inventory", "carrying": "get_inventory", | |
| "explore": "auto_explore_room", "explore_room": "auto_explore_room", | |
| "auto_explore": "auto_explore_room", "search": "auto_explore_room", | |
| } | |
| if tool_name not in VALID_TOOLS: | |
| tool_name = tool_aliases.get(tool_name, "play_action") | |
| if tool_name != "play_action": | |
| tool_args = {} | |
| if tool_name == "play_action": | |
| action = tool_args.get("action", "look") | |
| action = action.lower().strip() | |
| action = action.replace("**", "").replace("*", "").replace("`", "") | |
| action = " ".join(action.split()) | |
| tool_args = {"action": action} | |
| return tool_name, tool_args | |
| def _anti_loop_check(self, tool_name: str, tool_args: dict) -> tuple[str, dict]: | |
| """Override the LLM's choice if a loop is detected.""" | |
| # Info-tool loop: 2+ non-action tools in a row | |
| if tool_name != "play_action" and len(self.recent_tools) >= 2: | |
| if all(t != "play_action" for t in self.recent_tools[-2:]): | |
| return "play_action", {"action": "look"} | |
| if tool_name != "play_action": | |
| return tool_name, tool_args | |
| action = tool_args.get("action", "look") | |
| # Block dropping items | |
| if action.startswith("drop ") or action.startswith("throw "): | |
| return "get_valid_actions", {} | |
| if action.startswith("put ") and action.endswith(" down"): | |
| return "get_valid_actions", {} | |
| # Block rubbing torch/fire on things | |
| if " across " in action and ("torch" in action or "fire" in action): | |
| return "get_valid_actions", {} | |
| # Block "again" / "g" (repeat last) - can cause hidden loops | |
| if action in ("again", "g"): | |
| return "get_valid_actions", {} | |
| # Exact repeat (except look) | |
| if self.recent_actions and action == self.recent_actions[-1] and action != "look": | |
| return "get_valid_actions", {} | |
| # Back-and-forth: A, B, A, about to do B | |
| if len(self.recent_actions) >= 3: | |
| last3 = self.recent_actions[-3:] | |
| if last3[0] == last3[2] and action == last3[1]: | |
| return "get_state_info", {} | |
| # Too many no-effect actions | |
| if self.no_effect_count >= 3: | |
| self.no_effect_count = 0 | |
| return "get_valid_actions", {} | |
| # Too many NPC conversation turns | |
| npc_keywords = ("ask ", "tell ", "talk ", "say ") | |
| if action.startswith(npc_keywords): | |
| npc_count = sum(1 for a in self.recent_actions[-5:] if a.startswith(npc_keywords)) | |
| if npc_count >= 3: | |
| return "get_map", {} | |
| return tool_name, tool_args | |
| def _parse_status(self, text: str) -> None: | |
| """Parse status info from tool responses.""" | |
| loc_match = re.search(r'Location:\s*(.+)', text) | |
| if loc_match: | |
| new_loc = loc_match.group(1).strip() | |
| self.current_location = new_loc | |
| self.visited_rooms.add(new_loc) | |
| score_match = re.search(r'Score:\s*(\d+)/(\d+)', text) | |
| if score_match: | |
| new_score = int(score_match.group(1)) | |
| self.max_score = int(score_match.group(2)) | |
| if new_score > self.score: | |
| self.steps_since_score_change = 0 | |
| else: | |
| self.steps_since_score_change += 1 | |
| self.score = new_score | |
| if "NO_EFFECT" in text: | |
| self.no_effect_count += 1 | |
| else: | |
| self.no_effect_count = 0 | |
| def _extract_result(self, result) -> str: | |
| """Extract text from MCP tool result.""" | |
| if hasattr(result, 'content') and result.content: | |
| return result.content[0].text | |
| if isinstance(result, list) and result: | |
| return result[0].text if hasattr(result[0], 'text') else str(result[0]) | |
| return str(result) | |
| def _is_game_over(self, text: str) -> bool: | |
| """Check if the game is over.""" | |
| game_over_phrases = [ | |
| "game over", "you have died", "you are dead", | |
| "*** you have died ***", | |
| ] | |
| return any(phrase in text.lower() for phrase in game_over_phrases) | |
| # ============================================================================= | |
| # Local Testing | |
| # ============================================================================= | |
| async def test_agent(): | |
| """Test the agent locally.""" | |
| from fastmcp import Client | |
| agent = StudentAgent() | |
| async with Client("mcp_server.py") as client: | |
| result = await agent.run( | |
| client=client, | |
| game="lostpig", | |
| max_steps=50, | |
| seed=42, | |
| verbose=True, | |
| ) | |
| print(f"\n{'=' * 50}") | |
| print(f"Final Score: {result.final_score}/{result.max_score}") | |
| print(f"Moves: {result.moves}") | |
| print(f"Locations: {len(result.locations_visited)}") | |
| if __name__ == "__main__": | |
| import asyncio | |
| asyncio.run(test_agent()) | |