Spaces:
Sleeping
Sleeping
| """ | |
| Student Agent for Text Adventure Games | |
| This is your submission file. Implement the StudentAgent class to play | |
| text adventure games using the MCP server you also implement. | |
| Your agent should: | |
| 1. Connect to the MCP server via the provided client | |
| 2. Use the ReAct pattern (Thought -> Action -> Observation) | |
| 3. Call MCP tools to interact with the game | |
| 4. Maximize the game score within the step limit | |
| Required method: | |
| async def run(self, client, game, max_steps, seed, verbose) -> RunResult | |
| The 'client' is a FastMCP Client already connected to your MCP server. | |
| Use it to call tools like: await client.call_tool("play_action", {"action": "look"}) | |
| Tips: | |
| - Start by looking around and understanding your environment | |
| - Keep track of visited locations to avoid loops | |
| - Pick up useful items (lamp, sword, etc.) | |
| - The seed parameter should be used to set your LLM's seed for reproducibility | |
| """ | |
| import json | |
| import os | |
| import re | |
| from dataclasses import dataclass, field | |
| from typing import Optional | |
| from dotenv import load_dotenv | |
| from huggingface_hub import InferenceClient | |
| # Load environment variables | |
| load_dotenv() | |
| # ============================================================================= | |
| # LLM Configuration - DO NOT MODIFY | |
| # ============================================================================= | |
| # Model to use (fixed for fair evaluation) | |
| LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct" | |
| # Initialize the LLM client (uses HF_TOKEN from environment) | |
| _hf_token = os.getenv("HF_TOKEN") | |
| if not _hf_token: | |
| raise ValueError("HF_TOKEN not found. Set it in your .env file.") | |
| LLM_CLIENT = InferenceClient(token=_hf_token) | |
| def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str: | |
| """ | |
| Call the LLM with the given prompt. Use this function in your agent. | |
| Args: | |
| prompt: The user prompt (current game state, history, etc.) | |
| system_prompt: The system prompt (instructions for the agent) | |
| seed: Random seed for reproducibility | |
| max_tokens: Maximum tokens in response (default: 300) | |
| Returns: | |
| The LLM's response text | |
| Example: | |
| response = call_llm( | |
| prompt="You are in a forest. What do you do?", | |
| system_prompt=SYSTEM_PROMPT, | |
| seed=42, | |
| ) | |
| """ | |
| messages = [ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": prompt}, | |
| ] | |
| response = LLM_CLIENT.chat.completions.create( | |
| model=LLM_MODEL, | |
| messages=messages, | |
| temperature=0.0, # Deterministic for reproducibility | |
| max_tokens=max_tokens, | |
| seed=seed, | |
| ) | |
| return response.choices[0].message.content | |
| class RunResult: | |
| """Result of running the agent. Do not modify this class.""" | |
| final_score: int | |
| max_score: int | |
| moves: int | |
| locations_visited: set[str] | |
| game_completed: bool | |
| error: Optional[str] = None | |
| history: list[tuple[str, str, str]] = field(default_factory=list) | |
| # ============================================================================= | |
| # System Prompt - Customize this for your agent | |
| # ============================================================================= | |
| SYSTEM_PROMPT = """You are playing a classic text adventure game. | |
| GOAL: Explore the world, solve puzzles, and maximize your score. | |
| AVAILABLE TOOLS (use via MCP): | |
| - state: Get structured JSON with observation, inventory, score, valid_actions | |
| - play_action: Execute a game command (north, take lamp, open mailbox, etc.) | |
| - memory: Get current game state and history (optional) | |
| - inventory: Check what you're carrying (optional) | |
| RULES: | |
| - Prefer actions from state.valid_actions. | |
| - Avoid repeating the same action in the same place. | |
| - If stuck, try a different valid action. | |
| RESPOND IN THIS EXACT FORMAT (no markdown): | |
| THOUGHT: <your reasoning about what to do next> | |
| TOOL: <tool_name> | |
| ARGS: <JSON arguments, e.g., {"action": "look"}> | |
| """ | |
| # ============================================================================= | |
| # Student Agent - IMPLEMENT THIS CLASS | |
| # ============================================================================= | |
| class StudentAgent: | |
| """ | |
| Your ReAct agent implementation. | |
| TODO: | |
| 1. Implement the run() method with the ReAct loop | |
| 2. Parse LLM responses to extract tool calls | |
| 3. Track state and avoid loops | |
| Use the provided call_llm() function to interact with the LLM. | |
| """ | |
| def __init__(self): | |
| """Initialize your agent here.""" | |
| self.history: list[dict] = [] | |
| self.visited_locations: set[str] = set() | |
| self.recent_actions: list[str] = [] | |
| self.current_valid_actions: list[str] = [] | |
| self.score = 0 | |
| self.max_score = 350 | |
| async def run( | |
| self, | |
| client, # FastMCP Client connected to your MCP server | |
| game: str, | |
| max_steps: int, | |
| seed: int, | |
| verbose: bool = False, | |
| ) -> RunResult: | |
| """ | |
| Run the agent for a game session. | |
| Args: | |
| client: FastMCP Client connected to your MCP server | |
| game: Name of the game being played (e.g., "zork1") | |
| max_steps: Maximum number of steps to take | |
| seed: Random seed for reproducibility (use for LLM calls) | |
| verbose: Whether to print detailed output | |
| Returns: | |
| RunResult with final score and statistics | |
| """ | |
| locations_visited: set[str] = set() | |
| history: list[tuple[str, str, str]] = [] | |
| final_score = 0 | |
| moves = 0 | |
| game_completed = False | |
| observation = "" | |
| last_known_score = 0 | |
| tools = await client.list_tools() | |
| tool_names = {tool.name for tool in tools} | |
| has_state = "state" in tool_names | |
| try: | |
| result = await client.call_tool("play_action", {"action": "look"}) | |
| observation = self._extract_result(result) | |
| except Exception as exc: | |
| return RunResult( | |
| final_score=0, | |
| max_score=self.max_score, | |
| moves=0, | |
| locations_visited=set(), | |
| game_completed=False, | |
| error=f"Failed initial action: {exc}", | |
| history=[], | |
| ) | |
| state = {} | |
| if has_state: | |
| state = await self._get_state(client) | |
| if state: | |
| observation = state.get("observation", observation) | |
| moves = int(state.get("moves", 0)) | |
| final_score = int(state.get("score", 0)) | |
| self.max_score = int(state.get("max_score", self.max_score)) | |
| game_completed = bool(state.get("done", False)) | |
| self.current_valid_actions = state.get("valid_actions", []) | |
| if not state: | |
| moves = 1 | |
| final_score, parsed_moves = self._parse_score_and_moves(observation, final_score, moves) | |
| moves = parsed_moves | |
| initial_location = self._extract_location(state, observation) | |
| locations_visited.add(initial_location) | |
| self.visited_locations.add(initial_location) | |
| self.score = final_score | |
| last_known_score = final_score | |
| for step in range(max_steps): | |
| if game_completed or moves >= max_steps: | |
| break | |
| location = self._extract_location(state, observation) | |
| locations_visited.add(location) | |
| self.visited_locations.add(location) | |
| prompt = self._build_prompt(observation, history) | |
| response = self._call_llm(prompt, SYSTEM_PROMPT, seed + step) | |
| thought, tool_name, args = self._parse_response(response) | |
| if tool_name != "play_action": | |
| tool_name = "play_action" | |
| action = str(args.get("action", "look")).strip() or "look" | |
| action = self._canonical_action(action, self.current_valid_actions) | |
| action = self._avoid_simple_loop(action) | |
| if verbose: | |
| print(f"\n--- Step {step + 1} ---") | |
| print(f"[THOUGHT] {thought}") | |
| print(f"[ACTION] {action}") | |
| try: | |
| result = await client.call_tool(tool_name, {"action": action}) | |
| observation = self._extract_result(result) | |
| except Exception as exc: | |
| observation = f"Error: {exc}" | |
| self.recent_actions.append(action.lower()) | |
| if len(self.recent_actions) > 8: | |
| self.recent_actions = self.recent_actions[-8:] | |
| if has_state: | |
| latest_state = await self._get_state(client) | |
| if latest_state: | |
| state = latest_state | |
| observation = state.get("observation", observation) | |
| self.current_valid_actions = state.get("valid_actions", []) | |
| moves = int(state.get("moves", moves + 1)) | |
| final_score = int(state.get("score", final_score)) | |
| self.max_score = int(state.get("max_score", self.max_score)) | |
| game_completed = bool(state.get("done", False)) | |
| else: | |
| final_score, moves = self._parse_score_and_moves(observation, final_score, moves + 1) | |
| game_completed = self._is_game_over(observation) | |
| else: | |
| final_score, moves = self._parse_score_and_moves(observation, final_score, moves + 1) | |
| game_completed = self._is_game_over(observation) | |
| history.append((thought, f"play_action({action})", observation[:100])) | |
| self.history.append( | |
| { | |
| "step": step + 1, | |
| "thought": thought, | |
| "action": action, | |
| "observation": observation[:200], | |
| } | |
| ) | |
| if len(self.history) > 50: | |
| self.history = self.history[-50:] | |
| last_known_score = final_score | |
| return RunResult( | |
| final_score=last_known_score, | |
| max_score=self.max_score, | |
| moves=moves, | |
| locations_visited=locations_visited, | |
| game_completed=game_completed, | |
| history=history, | |
| ) | |
| def _build_prompt(self, observation: str, history: list) -> str: | |
| """ | |
| Build the prompt for the LLM. | |
| TODO: Implement this to create effective prompts | |
| """ | |
| recent = history[-4:] if history else [] | |
| recent_text = "\n".join([f"- {tool} -> {obs}" for _, tool, obs in recent]) or "- none" | |
| valid_actions = ", ".join(self.current_valid_actions) if self.current_valid_actions else "(unknown)" | |
| return ( | |
| f"Score: {self.score}/{self.max_score}\n" | |
| f"Visited locations: {len(self.visited_locations)}\n" | |
| f"Recent actions: {', '.join(self.recent_actions[-5:]) if self.recent_actions else '(none)'}\n" | |
| f"Valid actions: {valid_actions}\n\n" | |
| f"Recent history:\n{recent_text}\n\n" | |
| f"Current observation:\n{observation}\n\n" | |
| "Pick the single best next action." | |
| ) | |
| def _parse_response(self, response: str) -> tuple[str, str, dict]: | |
| """ | |
| Parse LLM response to extract thought, tool name, and arguments. | |
| TODO: Implement robust parsing | |
| Returns: | |
| Tuple of (thought, tool_name, args_dict) | |
| """ | |
| thought = "No thought provided." | |
| tool_name = "play_action" | |
| args = {"action": "look"} | |
| for line in response.strip().splitlines(): | |
| clean = line.strip() | |
| upper = clean.upper() | |
| if upper.startswith("THOUGHT:"): | |
| thought = clean.split(":", 1)[1].strip() or thought | |
| elif upper.startswith("TOOL:"): | |
| raw_tool = clean.split(":", 1)[1].strip().lower() | |
| raw_tool = raw_tool.replace("`", "").replace("*", "") | |
| tool_name = raw_tool.split()[0] if raw_tool else "play_action" | |
| elif upper.startswith("ARGS:"): | |
| raw_args = clean.split(":", 1)[1].strip() | |
| try: | |
| args = json.loads(raw_args.replace("'", '"')) | |
| except json.JSONDecodeError: | |
| match = re.search(r'"action"\s*:\s*"([^"]+)"', raw_args) | |
| if match: | |
| args = {"action": match.group(1)} | |
| return thought, tool_name, args | |
| async def _get_state(self, client) -> dict: | |
| try: | |
| result = await client.call_tool("state", {}) | |
| data = json.loads(self._extract_result(result)) | |
| self.score = int(data.get("score", self.score) or self.score) | |
| self.max_score = int(data.get("max_score", self.max_score) or self.max_score) | |
| return data | |
| except Exception: | |
| return {} | |
| def _extract_result(self, result) -> str: | |
| if hasattr(result, "content") and result.content: | |
| return result.content[0].text | |
| if isinstance(result, list) and result: | |
| first = result[0] | |
| if hasattr(first, "text"): | |
| return first.text | |
| return str(first) | |
| return str(result) | |
| def _extract_location(self, state: dict, observation: str) -> str: | |
| if state and state.get("location"): | |
| return str(state["location"]) | |
| first_line = observation.strip().split("\n")[0] if observation else "" | |
| return first_line.strip() or "Unknown" | |
| def _parse_score_and_moves(self, text: str, current_score: int, current_moves: int) -> tuple[int, int]: | |
| score = current_score | |
| moves = current_moves | |
| match = re.search(r"\[Score:\s*(-?\d+)\s*\|\s*Moves:\s*(\d+)\]", text) | |
| if match: | |
| score = int(match.group(1)) | |
| moves = int(match.group(2)) | |
| else: | |
| total = re.search(r"Total:\s*(-?\d+)", text) | |
| if total: | |
| score = int(total.group(1)) | |
| self.score = score | |
| return score, moves | |
| def _canonical_action(self, action: str, valid_actions: list[str]) -> str: | |
| if not action: | |
| return "look" | |
| cleaned = " ".join(action.lower().strip().split()) | |
| aliases = {"n": "north", "s": "south", "e": "east", "w": "west", "u": "up", "d": "down"} | |
| cleaned = aliases.get(cleaned, cleaned) | |
| if not valid_actions: | |
| return cleaned | |
| valid_map = {" ".join(a.lower().strip().split()): a for a in valid_actions} | |
| if cleaned in valid_map: | |
| return valid_map[cleaned] | |
| for norm, original in valid_map.items(): | |
| if norm.startswith(cleaned) or cleaned.startswith(norm): | |
| return original | |
| return valid_actions[0] | |
| def _avoid_simple_loop(self, action: str) -> str: | |
| if len(self.recent_actions) < 2: | |
| return action | |
| if self.recent_actions[-1] == action.lower() and self.current_valid_actions: | |
| for candidate in self.current_valid_actions: | |
| if candidate.lower() != action.lower(): | |
| return candidate | |
| return action | |
| def _is_game_over(self, text: str) -> bool: | |
| text_lower = text.lower() | |
| return any( | |
| marker in text_lower | |
| for marker in ("game over", "you have died", "you are dead", "*** you have died ***") | |
| ) | |
| def _call_llm(self, prompt: str, system_prompt: str, seed: int) -> str: | |
| """ | |
| Call the LLM with the given prompt. | |
| This is a convenience wrapper - you can also use call_llm() directly. | |
| """ | |
| return call_llm(prompt, system_prompt, seed) | |
| # ============================================================================= | |
| # For local testing | |
| # ============================================================================= | |
| async def test_agent(): | |
| """Test the agent locally.""" | |
| from fastmcp import Client | |
| # Path to your MCP server | |
| server_path = "mcp_server.py" | |
| agent = StudentAgent() | |
| async with Client(server_path) as client: | |
| result = await agent.run( | |
| client=client, | |
| game="zork1", | |
| max_steps=10, | |
| seed=42, | |
| verbose=True, | |
| ) | |
| print(f"\nFinal Score: {result.final_score}") | |
| print(f"Moves: {result.moves}") | |
| print(f"Locations: {result.locations_visited}") | |
| if __name__ == "__main__": | |
| import asyncio | |
| asyncio.run(test_agent()) | |