Spaces:
Sleeping
Sleeping
| """ | |
| Student Agent for Text Adventure Games | |
| This is your submission file. Implement the StudentAgent class to play | |
| text adventure games using the MCP server you also implement. | |
| Your agent should: | |
| 1. Connect to the MCP server via the provided client | |
| 2. Use the ReAct pattern (Thought -> Action -> Observation) | |
| 3. Call MCP tools to interact with the game | |
| 4. Maximize the game score within the step limit | |
| Required method: | |
| async def run(self, client, game, max_steps, seed, verbose) -> RunResult | |
| The 'client' is a FastMCP Client already connected to your MCP server. | |
| Use it to call tools like: await client.call_tool("play_action", {"action": "look"}) | |
| Tips: | |
| - Start by looking around and understanding your environment | |
| - Keep track of visited locations to avoid loops | |
| - Pick up useful items (lamp, sword, etc.) | |
| - The seed parameter should be used to set your LLM's seed for reproducibility | |
| """ | |
| import json | |
| import os | |
| import re | |
| from dataclasses import dataclass, field | |
| from typing import Optional | |
| from dotenv import load_dotenv | |
| from huggingface_hub import InferenceClient | |
| # Load environment variables | |
| load_dotenv() | |
| # ============================================================================= | |
| # LLM Configuration - DO NOT MODIFY | |
| # ============================================================================= | |
| # Model to use (fixed for fair evaluation) | |
| LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct" | |
| # Initialize the LLM client (uses HF_TOKEN from environment) | |
| _hf_token = os.getenv("HF_TOKEN") | |
| if not _hf_token: | |
| raise ValueError("HF_TOKEN not found. Set it in your .env file.") | |
| LLM_CLIENT = InferenceClient(token=_hf_token) | |
| def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str: | |
| """ | |
| Call the LLM with the given prompt. Use this function in your agent. | |
| Args: | |
| prompt: The user prompt (current game state, history, etc.) | |
| system_prompt: The system prompt (instructions for the agent) | |
| seed: Random seed for reproducibility | |
| max_tokens: Maximum tokens in response (default: 300) | |
| Returns: | |
| The LLM's response text | |
| Example: | |
| response = call_llm( | |
| prompt="You are in a forest. What do you do?", | |
| system_prompt=SYSTEM_PROMPT, | |
| seed=42, | |
| ) | |
| """ | |
| messages = [ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": prompt}, | |
| ] | |
| response = LLM_CLIENT.chat.completions.create( | |
| model=LLM_MODEL, | |
| messages=messages, | |
| temperature=0.0, # Deterministic for reproducibility | |
| max_tokens=max_tokens, | |
| seed=seed, | |
| ) | |
| return response.choices[0].message.content | |
| class RunResult: | |
| """Result of running the agent. Do not modify this class.""" | |
| final_score: int | |
| max_score: int | |
| moves: int | |
| locations_visited: set[str] | |
| game_completed: bool | |
| error: Optional[str] = None | |
| history: list[tuple[str, str, str]] = field(default_factory=list) | |
| # ============================================================================= | |
| # System Prompt - Customize this for your agent | |
| # ============================================================================= | |
| SYSTEM_PROMPT = """You are an expert text adventure game player. Your goal is to explore, collect treasures, and maximize your score. | |
| AVAILABLE TOOLS: | |
| 1. get_valid_actions - Get list of valid actions at current location (USE THIS FIRST!) | |
| 2. play_action - Execute a game command | |
| 3. memory - Get current game state, score, and recent history | |
| 4. get_map - See explored locations and connections | |
| 5. inventory - Check what you're carrying | |
| 6. get_location_id - Get current room ID and name | |
| WORKFLOW: | |
| 1. First, call get_valid_actions to see what's possible | |
| 2. Consider each valid action based on: | |
| - Exploration potential (new rooms, unexplored directions) | |
| - Item collection opportunities | |
| - Puzzle solving possibilities | |
| - Actions not yet tried at this room | |
| 3. Choose the most promising action and execute it | |
| RESPOND IN THIS EXACT FORMAT (no markdown): | |
| THOUGHT: <reasoning about the situation and valid actions> | |
| ACTION_REASONING: <evaluate each valid action and why it might be good or bad> | |
| TOOL: <tool_name> | |
| ARGS: <JSON arguments> | |
| Example: | |
| THOUGHT: I'm in a new room and should see what actions are available here. | |
| ACTION_REASONING: Getting valid actions will help me make an informed decision. | |
| TOOL: get_valid_actions | |
| ARGS: {} | |
| Example 2: | |
| THOUGHT: Valid actions are: north, south, take lamp, examine mailbox. The lamp could be useful for dark areas. | |
| ACTION_REASONING: 'take lamp' - lamp is essential for exploring dark areas (HIGH PRIORITY). 'north' - unexplored direction (MEDIUM). 'examine mailbox' - already examined (LOW). 'south' - leads back (LOW). | |
| TOOL: play_action | |
| ARGS: {"action": "take lamp"} | |
| STRATEGY: | |
| - Prioritize taking useful items (lamp, torch, sword, keys) | |
| - Explore systematically, trying all directions from each room | |
| - Avoid repeating failed or useless actions | |
| - Open containers and examine interesting objects | |
| - Track what you've tried and focus on unexplored actions | |
| - Use room IDs to detect when you've revisited the same room""" | |
| # ============================================================================= | |
| # Student Agent - IMPLEMENT THIS CLASS | |
| # ============================================================================= | |
| class StudentAgent: | |
| """ | |
| ReAct agent that uses get_valid_actions as the core decision-making mechanism. | |
| Workflow: | |
| 1. Get valid actions at current location | |
| 2. Reason about each action considering history, map, and exploration | |
| 3. Pick the best action and execute it | |
| """ | |
| def __init__(self): | |
| """Initialize agent state tracking.""" | |
| self.history: list[dict] = [] # Full action history | |
| self.explored_rooms: set[int] = set() # Visited room IDs | |
| self.room_unexplored: dict[int, list[str]] = {} # room_id -> unexplored actions | |
| self.room_actions_taken: dict[int, list[str]] = {} # room_id -> actions taken | |
| self.room_names: dict[int, str] = {} # room_id -> room name (for display) | |
| self.current_room_id: int = -1 | |
| self.previous_room_id: int = -1 | |
| self.last_action: str = "look" # Last action taken | |
| self.steps_since_map_check: int = 0 | |
| self.valid_actions: list[str] = [] # Current valid actions | |
| self.score: int = 0 | |
| self.should_get_valid_actions: bool = True # Flag to get valid actions | |
| async def run( | |
| self, | |
| client, # FastMCP Client connected to your MCP server | |
| game: str, | |
| max_steps: int, | |
| seed: int, | |
| verbose: bool = False, | |
| ) -> RunResult: | |
| """ | |
| Run the agent for a game session. | |
| Args: | |
| client: FastMCP Client connected to your MCP server | |
| game: Name of the game being played (e.g., "zork1") | |
| max_steps: Maximum number of steps to take | |
| seed: Random seed for reproducibility (use for LLM calls) | |
| verbose: Whether to print detailed output | |
| Returns: | |
| RunResult with final score and statistics | |
| """ | |
| # TODO: Implement your ReAct loop here | |
| # | |
| # Basic structure: | |
| # 1. Get initial observation (call play_action with "look") | |
| # 2. Loop for max_steps: | |
| # a. Build prompt with current observation and history | |
| # b. Call LLM to get thought and action | |
| # c. Parse the response to extract tool and args | |
| # d. Call the tool via client.call_tool(tool_name, args) | |
| # e. Update history and state | |
| # f. Check for game over | |
| # 3. Return RunResult with final statistics | |
| # Example of calling a tool: | |
| # result = await client.call_tool("play_action", {"action": "look"}) | |
| # observation = result[0].text if result else "No response" | |
| # Example of calling the LLM: | |
| # response = call_llm( | |
| # prompt="Current observation: " + observation, | |
| # system_prompt=SYSTEM_PROMPT, | |
| # seed=seed, | |
| # ) | |
| locations_visited = set() | |
| history = [] | |
| moves = 0 | |
| # Get initial observation | |
| result = await client.call_tool("play_action", {"action": "look"}) | |
| observation = self._extract_result(result) | |
| # Extract initial room ID from response | |
| room_id, room_name = self._extract_room_info(observation) | |
| self.current_room_id = room_id | |
| self.previous_room_id = room_id | |
| self.room_names[room_id] = room_name | |
| self.explored_rooms.add(room_id) | |
| locations_visited.add(room_name) # For backward compatibility with RunResult | |
| if verbose: | |
| print(f"\n{observation}\n") | |
| # Main ReAct loop | |
| for step in range(1, max_steps + 1): | |
| if verbose: | |
| print(f"\n--- Step {step} ---") | |
| # Check map periodically | |
| self.steps_since_map_check += 1 | |
| if self.steps_since_map_check >= 5: | |
| map_result = await client.call_tool("get_map", {}) | |
| map_text = self._extract_result(map_result) | |
| if verbose: | |
| print(f"[MAP]\n{map_text}\n") | |
| self.steps_since_map_check = 0 | |
| # Get valid actions when needed (new room or flag set) | |
| if self.should_get_valid_actions: | |
| try: | |
| valid_result = await client.call_tool("get_valid_actions", {}) | |
| valid_text = self._extract_result(valid_result) | |
| if "Valid actions:" in valid_text: | |
| actions_str = valid_text.split("Valid actions:")[1].strip() | |
| self.valid_actions = [a.strip() for a in actions_str.split(",")] | |
| # Initialize unexplored actions for this room | |
| if self.current_room_id not in self.room_unexplored: | |
| self.room_unexplored[self.current_room_id] = self.valid_actions.copy() | |
| if verbose: | |
| print(f"[VALID ACTIONS] {', '.join(self.valid_actions[:10])}") | |
| self.should_get_valid_actions = False | |
| except Exception as e: | |
| if verbose: | |
| print(f"[WARNING] Could not get valid actions: {e}") | |
| # Build prompt with context | |
| prompt = self._build_prompt(observation, step) | |
| # print("*" * 50) | |
| # print(prompt) | |
| # print("*" * 50) | |
| # Call LLM for reasoning (use step-based seed) | |
| response = call_llm(prompt, SYSTEM_PROMPT, seed + step, max_tokens=400) | |
| # Parse the response | |
| thought, action_reasoning, tool_name, tool_args = self._parse_response(response) | |
| if verbose: | |
| print(f"[THOUGHT] {thought}") | |
| if action_reasoning: | |
| print(f"[ACTION_REASONING] {action_reasoning[:150]}...") | |
| print(f"[TOOL] {tool_name}({tool_args})") | |
| # Execute the tool | |
| try: | |
| result = await client.call_tool(tool_name, tool_args) | |
| observation = self._extract_result(result) | |
| if verbose: | |
| print(f"[RESULT] {observation[:200]}...") | |
| except Exception as e: | |
| observation = f"Error: {e}" | |
| if verbose: | |
| print(f"[ERROR] {e}") | |
| # Track action if it was play_action | |
| if tool_name == "play_action": | |
| action = tool_args.get("action", "look") | |
| moves += 1 | |
| # Track action at room | |
| if self.current_room_id not in self.room_actions_taken: | |
| self.room_actions_taken[self.current_room_id] = [] | |
| self.room_actions_taken[self.current_room_id].append(action) | |
| # Remove from unexplored | |
| if self.current_room_id in self.room_unexplored: | |
| if action in self.room_unexplored[self.current_room_id]: | |
| self.room_unexplored[self.current_room_id].remove(action) | |
| # Extract room info from observation | |
| new_room_id, new_room_name = self._extract_room_info(observation) | |
| # Check if we moved to a new room | |
| if new_room_id != self.current_room_id and new_room_id != -1: | |
| self.previous_room_id = self.current_room_id | |
| self.current_room_id = new_room_id | |
| self.room_names[new_room_id] = new_room_name | |
| locations_visited.add(new_room_name) | |
| if new_room_id not in self.explored_rooms: | |
| self.explored_rooms.add(new_room_id) | |
| self.should_get_valid_actions = True # Get actions at new room | |
| if verbose: | |
| print(f"[NEW ROOM] #{new_room_id}: {new_room_name}") | |
| self.last_action = action | |
| # Update score tracking | |
| self._update_score(observation) | |
| # Update history | |
| self.history.append({ | |
| "step": step, | |
| "thought": thought, | |
| "tool": tool_name, | |
| "args": tool_args, | |
| "result": observation[:200], | |
| "room_id": self.current_room_id, | |
| "room_name": self.room_names.get(self.current_room_id, "Unknown"), | |
| "score": self.score | |
| }) | |
| # Keep only recent history | |
| if len(self.history) > 15: | |
| self.history = self.history[-15:] | |
| # Record for result | |
| history.append((thought, f"{tool_name}({tool_args})", observation[:100])) | |
| # Check for game over | |
| if self._is_game_over(observation): | |
| if verbose: | |
| print("\n*** GAME OVER ***") | |
| break | |
| return RunResult( | |
| final_score=self.score, | |
| max_score=350, | |
| moves=moves, | |
| locations_visited=locations_visited, | |
| game_completed=self._is_game_over(observation), | |
| history=history, | |
| ) | |
| def _build_prompt(self, observation: str, step: int) -> str: | |
| """ | |
| Build the prompt for the LLM with context about valid actions and exploration. | |
| """ | |
| parts = [] | |
| room_name = self.room_names.get(self.current_room_id, "Unknown") | |
| parts.append(f"Current Room: #{self.current_room_id} ({room_name})") | |
| parts.append(f"Explored: {len(self.explored_rooms)} rooms") | |
| # Show valid actions if available | |
| if self.valid_actions: | |
| parts.append(f"\n[VALID ACTIONS AT THIS ROOM]") | |
| parts.append(f"{', '.join(self.valid_actions[:15])}") | |
| if len(self.valid_actions) > 15: | |
| parts.append(f"... and {len(self.valid_actions) - 15} more") | |
| # Show unexplored actions at current room | |
| unexplored = self.room_unexplored.get(self.current_room_id, []) | |
| if unexplored: | |
| parts.append(f"\n[UNEXPLORED ACTIONS HERE] {', '.join(unexplored[:10])}") | |
| # Show actions already taken at current room | |
| taken = self.room_actions_taken.get(self.current_room_id, []) | |
| if taken: | |
| parts.append(f"[ALREADY TRIED HERE] {', '.join(taken[-5:])}") | |
| # Recent history | |
| if self.history: | |
| parts.append("\n[RECENT HISTORY]") | |
| for entry in self.history[-3:]: | |
| action = entry.get("args", {}).get("action", entry["tool"]) | |
| room = f"#{entry.get('room_id', '?')}" | |
| score = entry.get("score", 0) | |
| result_short = entry["result"][:60] + "..." if len(entry["result"]) > 60 else entry["result"] | |
| parts.append(f" {action} @ Room {room} (score:{score}) -> {result_short}") | |
| parts.append(f"\n[CURRENT SITUATION]\n{observation}") | |
| parts.append("\n[YOUR TASK]") | |
| if self.should_get_valid_actions: | |
| parts.append("Call get_valid_actions to see what's possible at this new room.") | |
| elif self.valid_actions: | |
| parts.append(f"Analyze the {len(self.valid_actions)} valid actions above. Consider:") | |
| parts.append("- Actions that explore new rooms") | |
| parts.append("- Actions that interact with items (take, examine, open)") | |
| parts.append("- Actions you haven't tried here yet") | |
| parts.append("Reason about each action, then pick the BEST one.") | |
| else: | |
| parts.append("Take an action to continue playing.") | |
| return "\n".join(parts) | |
| def _parse_response(self, response: str) -> tuple[str, str, str, dict]: | |
| """ | |
| Parse LLM response to extract thought, action reasoning, tool name, and arguments. | |
| Returns: | |
| Tuple of (thought, action_reasoning, tool_name, args_dict) | |
| """ | |
| thought = "No reasoning provided" | |
| action_reasoning = "" | |
| tool_name = "play_action" | |
| tool_args = {"action": "look"} | |
| lines = response.strip().split("\n") | |
| for line in lines: | |
| line_clean = line.strip() | |
| line_upper = line_clean.upper() | |
| if line_upper.startswith("THOUGHT:"): | |
| thought = line_clean.split(":", 1)[1].strip() | |
| elif line_upper.startswith("ACTION_REASONING:"): | |
| action_reasoning = line_clean.split(":", 1)[1].strip() | |
| elif line_upper.startswith("TOOL:"): | |
| raw_tool = line_clean.split(":", 1)[1].strip().lower() | |
| raw_tool = raw_tool.replace("**", "").replace("*", "").replace("`", "") | |
| raw_tool = raw_tool.split()[0] if raw_tool else "play_action" | |
| tool_name = raw_tool | |
| elif line_upper.startswith("ARGS:"): | |
| args_part = line_clean.split(":", 1)[1].strip() | |
| try: | |
| args_part = args_part.replace("'", '"') | |
| tool_args = json.loads(args_part) | |
| except json.JSONDecodeError: | |
| # Try to extract action from malformed JSON | |
| match = re.search(r'"action"\s*:\s*"([^"]+)"', args_part) | |
| if match: | |
| tool_args = {"action": match.group(1)} | |
| else: | |
| tool_args = {"action": "look"} | |
| # Validate tool name | |
| valid_tools = ["play_action", "get_valid_actions", "memory", "get_map", "inventory", "get_location_id"] | |
| if tool_name not in valid_tools: | |
| tool_name = "play_action" | |
| # Clean up action if present | |
| if tool_name == "play_action" and "action" in tool_args: | |
| action = tool_args["action"].lower().strip() | |
| action = action.replace("**", "").replace("*", "").replace("`", "") | |
| action = " ".join(action.split()) | |
| tool_args["action"] = action | |
| return thought, action_reasoning, tool_name, tool_args | |
| def _extract_result(self, result) -> str: | |
| """Extract text from MCP tool result.""" | |
| if hasattr(result, 'content') and result.content: | |
| return result.content[0].text | |
| if isinstance(result, list) and result: | |
| return result[0].text if hasattr(result[0], 'text') else str(result[0]) | |
| return str(result) | |
| def _extract_room_info(self, text: str) -> tuple[int, str]: | |
| """Extract room ID and name from MCP server response.""" | |
| # Look for pattern: [Room #123: Room Name] | |
| match = re.search(r'\[Room #(\d+):\s*([^\]]+)\]', text) | |
| if match: | |
| room_id = int(match.group(1)) | |
| room_name = match.group(2).strip() | |
| return room_id, room_name | |
| return self.current_room_id, self.room_names.get(self.current_room_id, "Unknown") | |
| def _update_score(self, text: str) -> None: | |
| """Update score from game text.""" | |
| patterns = [ | |
| r'Score:\s*(\d+)', | |
| r'score[:\s]+(\d+)', | |
| r'\[Score:\s*(\d+)', | |
| r'Total:\s*(\d+)', | |
| ] | |
| for pattern in patterns: | |
| match = re.search(pattern, text, re.IGNORECASE) | |
| if match: | |
| self.score = max(self.score, int(match.group(1))) | |
| def _is_game_over(self, text: str) -> bool: | |
| """Check if the game is over.""" | |
| game_over_phrases = [ | |
| "game over", | |
| "you have died", | |
| "you are dead", | |
| "*** you have died ***", | |
| "*** you have won ***", | |
| ] | |
| text_lower = text.lower() | |
| return any(phrase in text_lower for phrase in game_over_phrases) | |
| # ============================================================================= | |
| # For local testing | |
| # ============================================================================= | |
| async def test_agent(): | |
| """Test the agent locally.""" | |
| from fastmcp import Client | |
| import os | |
| # Path to your MCP server (in same directory) | |
| script_dir = os.path.dirname(os.path.abspath(__file__)) | |
| server_path = os.path.join(script_dir, "mcp_server.py") | |
| agent = StudentAgent() | |
| async with Client(server_path) as client: | |
| result = await agent.run( | |
| client=client, | |
| game="zork1", | |
| max_steps=20, | |
| seed=42, | |
| verbose=True, | |
| ) | |
| print(f"\n{'=' * 50}") | |
| print(f"Final Score: {result.final_score}") | |
| print(f"Moves: {result.moves}") | |
| print(f"Locations: {len(result.locations_visited)}") | |
| if __name__ == "__main__": | |
| import asyncio | |
| asyncio.run(test_agent()) | |