Spaces:
Sleeping
Sleeping
| """ | |
| Student Agent for Text Adventure Games | |
| This is your submission file. Implement the StudentAgent class to play | |
| text adventure games using the MCP server you also implement. | |
| Your agent should: | |
| 1. Connect to the MCP server via the provided client | |
| 2. Use the ReAct pattern (Thought -> Action -> Observation) | |
| 3. Call MCP tools to interact with the game | |
| 4. Maximize the game score within the step limit | |
| Required method: | |
| async def run(self, client, game, max_steps, seed, verbose) -> RunResult | |
| The 'client' is a FastMCP Client already connected to your MCP server. | |
| Use it to call tools like: await client.call_tool("play_action", {"action": "look"}) | |
| Tips: | |
| - Start by looking around and understanding your environment | |
| - Keep track of visited locations to avoid loops | |
| - Pick up useful items (lamp, sword, etc.) | |
| - The seed parameter should be used to set your LLM's seed for reproducibility | |
| """ | |
| import json | |
| import os | |
| import re | |
| from dataclasses import dataclass, field | |
| from typing import Optional | |
| from dotenv import load_dotenv | |
| from huggingface_hub import InferenceClient | |
| # Silence transformers warnings in local mode (prevents repeated max_length/max_new_tokens spam) | |
| try: | |
| import transformers | |
| transformers.utils.logging.set_verbosity_error() | |
| except Exception: | |
| pass | |
| # Load environment variables | |
| load_dotenv() | |
| # Set USE_LOCAL_MODEL=1 in your .env to use a locally downloaded model | |
| USE_LOCAL_MODEL = os.getenv("USE_LOCAL_MODEL", "0").strip() in ("1", "true", "yes") | |
| LOCAL_MODEL_ID = os.getenv("LOCAL_MODEL_ID", "Qwen/Qwen2.5-3B-Instruct") | |
| # ============================================================================= | |
| # LLM Configuration - DO NOT MODIFY | |
| # ============================================================================= | |
| # Model to use (fixed for fair evaluation) | |
| LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct" | |
| # Initialize the LLM client based on mode | |
| _local_pipeline = None | |
| if USE_LOCAL_MODEL: | |
| import torch | |
| from transformers import pipeline as _hf_pipeline | |
| _local_pipeline = _hf_pipeline( | |
| "text-generation", | |
| model=LOCAL_MODEL_ID, | |
| torch_dtype=torch.bfloat16, | |
| device_map="auto", | |
| ) | |
| LLM_CLIENT = None | |
| else: | |
| _hf_token = os.getenv("HF_TOKEN") | |
| if not _hf_token: | |
| raise ValueError("HF_TOKEN not found. Set it in your .env file.") | |
| LLM_CLIENT = InferenceClient(token=_hf_token) | |
| def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str: | |
| """ | |
| Call the LLM with the given prompt. Use this function in your agent. | |
| Args: | |
| prompt: The user prompt (current game state, history, etc.) | |
| system_prompt: The system prompt (instructions for the agent) | |
| seed: Random seed for reproducibility | |
| max_tokens: Maximum tokens in response (default: 300) | |
| Returns: | |
| The LLM's response text | |
| Example: | |
| response = call_llm( | |
| prompt="You are in a forest. What do you do?", | |
| system_prompt=SYSTEM_PROMPT, | |
| seed=42, | |
| ) | |
| """ | |
| messages = [ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": prompt}, | |
| ] | |
| if USE_LOCAL_MODEL and _local_pipeline is not None: | |
| # Keep local generation shorter + quieter | |
| outputs = _local_pipeline( | |
| messages, | |
| max_new_tokens=min(max_tokens, 128), | |
| temperature=0.0001, # Near-deterministic (0.0 unsupported by some backends) | |
| do_sample=True, | |
| ) | |
| return outputs[0]["generated_text"][-1]["content"] | |
| # Hosted inference (may fail with 402 if credits depleted) | |
| response = LLM_CLIENT.chat.completions.create( | |
| model=LLM_MODEL, | |
| messages=messages, | |
| temperature=0.0, # Deterministic for reproducibility | |
| max_tokens=max_tokens, | |
| seed=seed, | |
| ) | |
| return response.choices[0].message.content | |
| class RunResult: | |
| """Result of running the agent. Do not modify this class.""" | |
| final_score: int | |
| max_score: int | |
| moves: int | |
| locations_visited: set[str] | |
| game_completed: bool | |
| error: Optional[str] = None | |
| history: list[tuple[str, str, str]] = field(default_factory=list) | |
| # ============================================================================= | |
| # System Prompt - Customize this for your agent | |
| # ============================================================================= | |
| SYSTEM_PROMPT = """You are playing a classic text adventure game. | |
| GOAL: Explore the world, solve puzzles, and maximize your score. | |
| AVAILABLE TOOLS (use via MCP): | |
| - play_action: Execute a game command (north, take lamp, open mailbox, etc.) | |
| - memory: Get current game state and history (if implemented) | |
| - inventory: Check what you're carrying (if implemented) | |
| VALID GAME COMMANDS for play_action: | |
| - Movement: north, south, east, west, up, down, enter, exit | |
| - Objects: take <item>, drop <item>, open <thing>, close <thing>, examine <thing> | |
| - Other: look, inventory, read <thing>, turn on lamp | |
| RESPOND IN THIS EXACT FORMAT (no markdown): | |
| THOUGHT: <your reasoning about what to do next> | |
| TOOL: <tool_name> | |
| ARGS: <JSON arguments, e.g., {"action": "look"}> | |
| Example: | |
| THOUGHT: I should look around to see where I am. | |
| TOOL: play_action | |
| ARGS: {"action": "look"} | |
| """ | |
| # ============================================================================= | |
| # Student Agent - IMPLEMENT THIS CLASS | |
| # ============================================================================= | |
| class StudentAgent: | |
| """ | |
| Your ReAct agent implementation. | |
| TODO: | |
| 1. Implement the run() method with the ReAct loop | |
| 2. Parse LLM responses to extract tool calls | |
| 3. Track state and avoid loops | |
| Use the provided call_llm() function to interact with the LLM. | |
| """ | |
| def __init__(self): | |
| """Initialize your agent here.""" | |
| # TODO: Initialize any state tracking you need | |
| # self.history = [] | |
| # self.visited_locations = set() | |
| self.score = 0 | |
| self.failed_actions = set() | |
| self.last_obs_norm = "" | |
| self.recent_actions = [] | |
| async def run( | |
| self, | |
| client, # FastMCP Client connected to your MCP server | |
| game: str, | |
| max_steps: int, | |
| seed: int, | |
| verbose: bool = False, | |
| ) -> RunResult: | |
| """ | |
| Run the agent for a game session. | |
| Args: | |
| client: FastMCP Client connected to your MCP server | |
| game: Name of the game being played (e.g., "zork1") | |
| max_steps: Maximum number of steps to take | |
| seed: Random seed for reproducibility (use for LLM calls) | |
| verbose: Whether to print detailed output | |
| Returns: | |
| RunResult with final score and statistics | |
| """ | |
| # TODO: Implement your ReAct loop here | |
| # | |
| # Basic structure: | |
| # 1. Get initial observation (call play_action with "look") | |
| # 2. Loop for max_steps: | |
| # a. Build prompt with current observation and history | |
| # b. Call LLM to get thought and action | |
| # c. Parse the response to extract tool and args | |
| # d. Call the tool via client.call_tool(tool_name, args) | |
| # e. Update history and state | |
| # f. Check for game over | |
| # 3. Return RunResult with final statistics | |
| # Example of calling a tool: | |
| # result = await client.call_tool("play_action", {"action": "look"}) | |
| # observation = result[0].text if result else "No response" | |
| # Example of calling the LLM: | |
| # response = call_llm( | |
| # prompt="Current observation: " + observation, | |
| # system_prompt=SYSTEM_PROMPT, | |
| # seed=seed, | |
| # ) | |
| # Placeholder implementation - replace with your code | |
| locations_visited = set() | |
| history = [] | |
| final_score = 0 | |
| moves = 0 | |
| # TODO: Your implementation here | |
| # ... | |
| observation = str(await client.call_tool("play_action", {"action": "look"})) | |
| self._update_score(observation) | |
| self.last_obs_norm = self._norm_obs(observation) | |
| for step in range(max_steps): | |
| loc = observation.split("\n", 1)[0].strip() | |
| if loc: | |
| locations_visited.add(loc) | |
| mem = "" | |
| try: | |
| mem = str(await client.call_tool("memory", {})) | |
| except Exception: | |
| mem = "" | |
| prompt = self._build_prompt(observation, history) | |
| if mem: | |
| prompt += "\n\nServer memory:\n" + mem | |
| prompt += "\n\nAvoid repeating actions that did nothing: " + ", ".join(list(self.failed_actions)[:20]) | |
| prompt += "\n\nExploration bias: if stuck, try moving (north/south/east/west/up/down/in/out)." | |
| response = self._call_llm(prompt, SYSTEM_PROMPT, seed + step) | |
| thought, tool_name, args = self._parse_response(response) | |
| # Keep it simple: always call play_action | |
| tool_name = "play_action" | |
| action = str(args.get("action", "look")).strip() if isinstance(args, dict) else "look" | |
| if not action: | |
| action = "look" | |
| # Simple avoidance: don't repeat known-failed actions | |
| if action in self.failed_actions: | |
| action = self._fallback_action_from_observation(observation) | |
| # Hard anti-stuck rule: if we keep doing "look", force exploration | |
| if len(self.recent_actions) >= 2 and self.recent_actions[-1] == "look" and self.recent_actions[-2] == "look": | |
| if "inventory" not in self.failed_actions: | |
| action = "inventory" | |
| else: | |
| action = self._fallback_action_from_observation(observation) | |
| # avoid repeating exact action too much | |
| if len(self.recent_actions) >= 2 and self.recent_actions[-1] == action and self.recent_actions[-2] == action: | |
| action = self._fallback_action_from_observation(observation) | |
| new_observation = str(await client.call_tool("play_action", {"action": action})) | |
| self._update_score(new_observation) | |
| # mark failure if no change (but do not mark "look" as failed) | |
| new_norm = self._norm_obs(new_observation) | |
| if new_norm == self.last_obs_norm: | |
| if action != "look": | |
| self.failed_actions.add(action) | |
| self.last_obs_norm = new_norm | |
| self.recent_actions.append(action) | |
| if len(self.recent_actions) > 12: | |
| self.recent_actions = self.recent_actions[-12:] | |
| history.append((thought, tool_name, json.dumps({"action": action}))) | |
| moves += 1 | |
| observation = new_observation | |
| final_score = self.score | |
| if verbose: | |
| print(f"Step {step+1:03d} | Score {self.score:3d} | {action}") | |
| if "GAME OVER" in observation: | |
| return RunResult( | |
| final_score=final_score, | |
| max_score=350, # Zork1 max score, adjust if needed | |
| moves=moves, | |
| locations_visited=locations_visited, | |
| game_completed=True, | |
| history=history, | |
| ) | |
| return RunResult( | |
| final_score=final_score, | |
| max_score=350, # Zork1 max score, adjust if needed | |
| moves=moves, | |
| locations_visited=locations_visited, | |
| game_completed=False, | |
| history=history, | |
| ) | |
| def _build_prompt(self, observation: str, history: list) -> str: | |
| """ | |
| Build the prompt for the LLM. | |
| TODO: Implement this to create effective prompts | |
| """ | |
| # TODO: Combine system prompt, history, and current observation | |
| recent = "\n".join([f"- {h[2]}" for h in history[-6:]]) if history else "(none)" | |
| return f"Current observation:\n{observation}\n\nRecent actions:\n{recent}" | |
| def _parse_response(self, response: str) -> tuple[str, str, dict]: | |
| """ | |
| Parse LLM response to extract thought, tool name, and arguments. | |
| TODO: Implement robust parsing | |
| Returns: | |
| Tuple of (thought, tool_name, args_dict) | |
| """ | |
| # TODO: Parse the response format: | |
| # THOUGHT: ... | |
| # TOOL: ... | |
| # ARGS: {...} | |
| thought = "" | |
| tool = "play_action" | |
| args = {"action": "look"} | |
| m = re.search(r"THOUGHT:\s*(.*)", response) | |
| if m: | |
| thought = m.group(1).strip() | |
| m = re.search(r"TOOL:\s*([A-Za-z_][A-Za-z0-9_]*)", response) | |
| if m: | |
| tool = m.group(1).strip() | |
| m = re.search(r"ARGS:\s*(\{.*\})", response, flags=re.S) | |
| if m: | |
| try: | |
| args = json.loads(m.group(1)) | |
| except Exception: | |
| args = {"action": "look"} | |
| if "action" not in args: | |
| args["action"] = "look" | |
| return thought, tool, args | |
| def _call_llm(self, prompt: str, system_prompt: str, seed: int) -> str: | |
| """ | |
| Call the LLM with the given prompt. | |
| This is a convenience wrapper - you can also use call_llm() directly. | |
| """ | |
| return call_llm(prompt, system_prompt, seed) | |
| def _fallback_action_from_observation(self, observation: str) -> str: | |
| # Try movement first | |
| for a in ["north", "south", "east", "west", "up", "down", "in", "out"]: | |
| if a not in self.failed_actions: | |
| return a | |
| # Try simple object interactions based on words in the observation | |
| words = re.findall(r"[A-Za-z]{3,}", observation.lower()) | |
| stop = { | |
| "the","and","you","are","with","that","this","from","your","have","here","there", | |
| "into","over","under","would","could","should","what","when","then","than","them", | |
| "been","were","will","just","about","some","there","where","which" | |
| } | |
| candidates = [w for w in words if w not in stop] | |
| candidates = candidates[:25] | |
| for w in candidates: | |
| for verb in ["examine", "take", "open"]: | |
| cmd = f"{verb} {w}" | |
| if cmd not in self.failed_actions: | |
| return cmd | |
| return "look" | |
| def _update_score(self, text: str): | |
| m = re.search(r"Score:\s*(\d+)", text, flags=re.I) | |
| if m: | |
| self.score = max(self.score, int(m.group(1))) | |
| def _norm_obs(self, text: str) -> str: | |
| s = re.sub(r"\[Score:.*?\]", "", text, flags=re.I) | |
| s = re.sub(r"Score:\s*\d+|Moves:\s*\d+", "", s, flags=re.I) | |
| s = re.sub(r"\s+", " ", s).strip() | |
| return s[:700] | |
| # ============================================================================= | |
| # For local testing | |
| # ============================================================================= | |
| async def test_agent(): | |
| """Test the agent locally.""" | |
| from fastmcp import Client | |
| # Path to your MCP server | |
| server_path = "mcp_server.py" | |
| agent = StudentAgent() | |
| async with Client(server_path) as client: | |
| result = await agent.run( | |
| client=client, | |
| game="zork1", | |
| max_steps=10, | |
| seed=42, | |
| verbose=True, | |
| ) | |
| print(f"\nFinal Score: {result.final_score}") | |
| print(f"Moves: {result.moves}") | |
| print(f"Locations: {result.locations_visited}") | |
| if __name__ == "__main__": | |
| import asyncio | |
| asyncio.run(test_agent()) |