| """ |
| Student Agent for Text Adventure Games |
| |
| This is your submission file. Implement the StudentAgent class to play |
| text adventure games using the MCP server you also implement. |
| |
| Your agent should: |
| 1. Connect to the MCP server via the provided client |
| 2. Use the ReAct pattern (Thought -> Action -> Observation) |
| 3. Call MCP tools to interact with the game |
| 4. Maximize the game score within the step limit |
| |
| Required method: |
| async def run(self, client, game, max_steps, seed, verbose) -> RunResult |
| |
| The 'client' is a FastMCP Client already connected to your MCP server. |
| Use it to call tools like: await client.call_tool("play_action", {"action": "look"}) |
| |
| Tips: |
| - Start by looking around and understanding your environment |
| - Keep track of visited locations to avoid loops |
| - Pick up useful items (lamp, sword, etc.) |
| - The seed parameter should be used to set your LLM's seed for reproducibility |
| """ |
|
|
| import json |
| import os |
| import re |
| from dataclasses import dataclass, field |
| from typing import Optional, List, Dict |
|
|
| from dotenv import load_dotenv |
| from huggingface_hub import InferenceClient |
| from groq import Groq |
|
|
| load_dotenv() |
|
|
| |
| |
| |
|
|
| |
| LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct" |
|
|
| |
| _hf_token = os.getenv("HF_TOKEN") |
| if not _hf_token: |
| raise ValueError("HF_TOKEN not found. Set it in your .env file.") |
|
|
| LLM_CLIENT = InferenceClient(token=_hf_token) |
|
|
|
|
| def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str: |
| """ |
| Call the LLM with the given prompt. Use this function in your agent. |
| |
| Args: |
| prompt: The user prompt (current game state, history, etc.) |
| system_prompt: The system prompt (instructions for the agent) |
| seed: Random seed for reproducibility |
| max_tokens: Maximum tokens in response (default: 300) |
| |
| Returns: |
| The LLM's response text |
| |
| Example: |
| response = call_llm( |
| prompt="You are in a forest. What do you do?", |
| system_prompt=SYSTEM_PROMPT, |
| seed=42, |
| ) |
| """ |
| messages = [ |
| {"role": "system", "content": system_prompt}, |
| {"role": "user", "content": prompt}, |
| ] |
| |
| response = LLM_CLIENT.chat.completions.create( |
| model=LLM_MODEL, |
| messages=messages, |
| temperature=0.0, |
| max_tokens=max_tokens, |
| seed=seed, |
| ) |
| |
| return response.choices[0].message.content |
|
|
| @dataclass |
| class RunResult: |
| final_score: int |
| max_score: int |
| moves: int |
| locations_visited: set[str] |
| game_completed: bool |
| error: Optional[str] = None |
| history: list[tuple[str, str, str]] = field(default_factory=list) |
|
|
| |
| |
| |
|
|
| SYSTEM_PROMPT = """You are an expert text adventure player. |
| OBJECTIVE: Explore, collect treasures, and maximize score. |
| |
| TOOLS |
| 1. play_action: Execute commands (north, take sword, etc.) |
| 2. inventory: Check what you are carrying |
| |
| VALID COMMANDS for play_action (you must use one of these): |
| - Move: n, s, e, w, ne, nw, se, sw, up, down, enter, exit |
| - Perception : look, examine <thing>, look into <thing>, look under <thing>, listen |
| - Action <thing>: take, drop, open, close, examine, read, break, climb, unlock, push, pull, burn |
| - Complex: turn on/off <item>, attack <enemy> with <weapon>, get <item> with <item>. |
| |
| INTERACTION RULES: |
| 1. EXAMINE + LOOK INTO: you MUST 'look into' AND 'examine' EVERY item in the current location (stairs, chest, statue...). |
| 2. TAKE ITEMS: If you see an item, 'take' it immediately. |
| 3. LISTEN: Noise or sound -> 'listen' |
| 4. ANTI-LOOP: if <examine> did not work, try <look into>, and then move on. |
| |
| EXPLORATION RULES: |
| 1. EXHAUSTIVE SEARCH: Try EVERY direction that is not blocked and not known yet. |
| 2. AWARENESS: If there is a single OBVIOUS direction hinted at, try it even if it was previously blocked. |
| |
| RESPONSE FORMAT (Strict JSON-like): |
| THOUGHT: <Reasoning : explain briefly the next logical steps given the observation.> |
| TOOL: <tool_name> |
| ARGS: <JSON arguments> |
| |
| EXAMPLE: |
| THOUGHT: I see a fountain, a curtain. I will look into the fountain. Then I will examine the curtain. |
| TOOL: play_action |
| ARGS: {"action": "look into fountain"} |
| """ |
|
|
|
|
| REVERSE_ACTIONS = { |
| "north" : "south", |
| "south" : "north", |
| "east" : "west", |
| "west" : "east", |
| "up" : "down", |
| "down" : "up", |
| "enter" : "exit", |
| "exit" : "enter", |
| "n" : "s", |
| "s" : "n", |
| "e" : "w", |
| "w" : "e", |
| "u" : "d", |
| "d" : "u", |
| "northeast" : "southwest", |
| "northwest" : "southeast", |
| "southeast" : "northwest", |
| "southwest" : "northeast", |
| "ne" : "sw", |
| "nw" : "se", |
| "se" : "nw", |
| "sw" : "ne" |
| } |
|
|
| |
| |
| |
|
|
| class StudentAgent: |
| def __init__(self): |
| self.history: List[Dict] = [] |
| self.score: int = 0 |
| self.visited_locations: set[str] = set() |
| self.last_observation: str = "" |
| self.tried : dict[str, set[str]] = {} |
| self.last_action : str = None |
| self.last_thought : str = None |
| self.descriptions : str = None |
| self.current_location : str = "" |
| self.descriptions : dict[str, str] = dict() |
| self.explored_locations : dict[str, set] = dict() |
|
|
| async def run(self, client, game: str, max_steps: int, seed: int, verbose: bool = False) -> RunResult: |
| moves = 0 |
| |
| |
| result = await client.call_tool("play_action", {"action": "look"}) |
| observation = self._clean_observation(self._extract_result(result)) |
| self.last_observation = observation |
| self.last_action = "look" |
|
|
| new_location = self._extract_location(observation) |
| self.current_location = new_location |
| self.explored_locations[new_location] = set() |
|
|
|
|
| if verbose: print(f"START: {observation[:100]}...") |
|
|
| for step in range(1, max_steps + 1): |
|
|
| |
| if self.last_action in [ |
| "north", "south", "east", "west", "up", "down", |
| "enter", "exit", "n", "s", "e", "w", "u", "d", "northeast", "northwest", |
| "southeast", "southwest", "ne", "nw", "se", "sw" |
| ]: |
| new_location = self._extract_location(observation) |
|
|
| if new_location != self.current_location: |
| self.explored_locations[self.current_location].add(f"{self.last_action} -> {new_location}") |
| if new_location != "Blocked": |
| if new_location not in self.explored_locations: |
| self.explored_locations[new_location] = set() |
| self.explored_locations[new_location].add(f"{REVERSE_ACTIONS[self.last_action]} -> {self.current_location}") |
| self.current_location = new_location |
|
|
| self.descriptions[new_location] = observation |
|
|
| |
| show_inventory = self.last_action.startswith('examine') |
|
|
| inventory = "" |
| if show_inventory: |
| inventory_result = await client.call_tool("inventory", {}) |
| inventory = self._extract_result(inventory_result) |
|
|
| prompt = self._build_prompt(observation, inventory, show_inventory) |
|
|
| print("---------------------------------------------------") |
| print(prompt) |
| print("---------------------------------------------------") |
|
|
| response = call_llm(prompt, SYSTEM_PROMPT, seed + step) |
| |
| thought, tool_name, tool_args = self._parse_response(response) |
| |
| self.last_thought = thought |
|
|
| if verbose: |
| print(f"\n--- Step {step} ---") |
| print(f"Thought: {thought}") |
| print(f"Action: {tool_args}") |
|
|
| try: |
| result = await client.call_tool(tool_name, tool_args) |
|
|
| action = tool_args.get("action", tool_name) |
| self.last_action = action |
| if self.current_location not in self.tried: |
| self.tried[self.current_location] = set() |
| if action.startswith("look ") or action.startswith("examine"): |
| self.tried[self.current_location].add(action) |
|
|
| raw_result = self._extract_result(result) |
| observation = self._clean_observation(raw_result) |
| self._update_score(raw_result) |
| |
| except Exception as e: |
| observation = f"System Error: {e}" |
|
|
| self.history.append({ |
| "step": step, |
| "tool": tool_name, |
| "args": tool_args, |
| "result": observation, |
| "thought": thought |
| }) |
| |
| if len(self.history) > 10: |
| self.history.pop(0) |
|
|
| moves += 1 |
| if self._is_game_over(observation): |
| break |
|
|
| return RunResult( |
| final_score=self.score, |
| max_score=0, |
| moves=moves, |
| locations_visited=self.visited_locations, |
| game_completed=self._is_game_over(observation) |
| ) |
| |
|
|
| def _extract_location(self, observation: str, max_length: int = 25) -> str: |
| lines = observation.strip().split('\n') |
|
|
| for line in lines: |
| cleaned_line = line.strip() |
| |
| if not cleaned_line: |
| continue |
| |
| if len(cleaned_line) >= max_length: |
| continue |
| |
| if re.match(r'^[a-zA-Z0-9 ]+$', cleaned_line): |
| return cleaned_line |
| |
| return "Blocked" |
| |
| def _smart_truncate(self, text: str, max_length: int = 80) -> str: |
| """ |
| Truncates text to the nearest sentence ending (., !, ?) before max_length. |
| If no punctuation is found, it falls back to a hard cut. |
| """ |
| |
| clean_text = text.replace("\n", " ").strip() |
| |
| |
| if len(clean_text) <= max_length: |
| return clean_text |
| |
| |
| truncated = clean_text[:max_length] |
| |
| |
| |
| import re |
| match = re.search(r'[.!?](?!.*[.!?])', truncated) |
| |
| if match: |
| |
| return truncated[:match.end()] + "..." |
| |
| |
| last_space = truncated.rfind(' ') |
| if last_space != -1: |
| return truncated[:last_space] + "..." |
| |
| return truncated + "..." |
| |
|
|
| def get_mini_map(self) -> str: |
| parts = [f"KNOWN CONNECTIONS FROM {self.current_location}:"] |
| for exit in self.explored_locations[self.current_location]: |
| parts.append(f" > {exit}") |
|
|
| return "\n".join(parts) |
|
|
| def _build_prompt(self, current_obs: str, inventory : str, show_inventory : False) -> str: |
| """ |
| Constructs a prompt that includes the 'Short Term Memory' |
| so the LLM knows what it just tried. |
| """ |
|
|
| parts = [] |
|
|
| if self.history: |
| parts.append("\nRECENT HISTORY (Read this to avoid loops!):") |
| for h in self.history[-5:]: |
| action = h['args'].get('action', 'check') |
| parts.append(f" > {action}") |
| ''' |
| res_summary = self._smart_truncate(h['result'], 80) |
| |
| parts.append(f"- Action: {action} -> Result: {res_summary}") |
| ''' |
| |
| if len(self.history) >= 2: |
| last_action = self.history[-1]['args'].get('action') |
| second_last = self.history[-2]['args'].get('action') |
| if last_action == second_last: |
| parts.append("\nWARNING: You just repeated an action. TRY SOMETHING DIFFERENT.\n") |
|
|
| if self.current_location in self.tried: |
| parts.append(f"\nALREADY TRIED IN {self.current_location}:") |
| parts.append(f"[{', '.join(self.tried[self.current_location])}]") |
|
|
| parts.append(f"\n{self.get_mini_map()}") |
|
|
| if show_inventory: |
| parts.append(f"\n{inventory}") |
|
|
| |
| if self.last_thought: |
| parts.append("\nPREVIOUS PLAN:") |
| parts.append(self.last_thought) |
| |
|
|
| parts.append(f"\nCURRENT OBSERVATION :") |
| parts.append(current_obs) |
| |
| parts.append("\nBased on the history and observation, what is your next move?") |
| |
| return "\n".join(parts) |
|
|
| def _clean_observation(self, text: str) -> str: |
| """Removes 'Score' lines to prevent LLM confusion.""" |
| text = re.sub(r'\[?Score:.*\]?', '', text, flags=re.IGNORECASE) |
| return text.strip() |
|
|
| def _parse_response(self, text: str): |
| """Robust parsing that handles messy LLM output.""" |
| thought = "Deciding next move..." |
| tool_name = "play_action" |
| tool_args = {"action": "look"} |
| |
| |
| if "THOUGHT:" in text: |
| thought = text.split("THOUGHT:")[1].split("TOOL:")[0].strip() |
| |
| |
| if "TOOL:" in text: |
| tool_part = text.split("TOOL:")[1].split("ARGS:")[0].strip() |
| tool_name = tool_part.lower() |
|
|
| |
| if "ARGS:" in text: |
| args_part = text.split("ARGS:")[1].strip() |
| try: |
| |
| tool_args = json.loads(args_part) |
| except: |
| |
| import re |
| match = re.search(r'action["\']?\s*:\s*["\']([^"\']+)["\']', args_part) |
| if match: |
| tool_args = {"action": match.group(1)} |
| |
| return thought, tool_name, tool_args |
|
|
| def _extract_result(self, result) -> str: |
| """Helper to get text from MCP result object.""" |
| if hasattr(result, 'content') and result.content: |
| return result.content[0].text |
| return str(result) |
|
|
| def _update_score(self, text: str): |
| match = re.search(r'Score:\s*(\d+)', text, re.IGNORECASE) |
| if match: |
| self.score = max(self.score, int(match.group(1))) |
|
|
| def _is_game_over(self, text: str) -> bool: |
| return "*** you have died ***" in text.lower() or "game over" in text.lower() |
|
|
|
|
| async def test_agent(): |
| """Test the agent locally.""" |
| from fastmcp import Client |
| |
| agent = StudentAgent() |
| |
| async with Client("mcp_server.py") as client: |
| result = await agent.run( |
| client=client, |
| game="zork1", |
| max_steps=20, |
| seed=42, |
| verbose=True, |
| ) |
| |
| print(f"\n{'=' * 50}") |
| print(f"Final Score: {result.final_score}") |
| print(f"Moves: {result.moves}") |
| print(f"Locations: {len(result.locations_visited)}") |
|
|
|
|
| if __name__ == "__main__": |
| import asyncio |
| asyncio.run(test_agent()) |