Spaces:
Sleeping
Sleeping
| """ | |
| Student Agent for Text Adventure Games | |
| This is your submission file. Implement the StudentAgent class to play | |
| text adventure games using the MCP server you also implement. | |
| Your agent should: | |
| 1. Connect to the MCP server via the provided client | |
| 2. Use the ReAct pattern (Thought -> Action -> Observation) | |
| 3. Call MCP tools to interact with the game | |
| 4. Maximize the game score within the step limit | |
| """ | |
| import json | |
| import os | |
| import re | |
| from dataclasses import dataclass, field | |
| from typing import Optional | |
| from dotenv import load_dotenv | |
| from huggingface_hub import InferenceClient | |
| # Load environment variables | |
| load_dotenv() | |
| # ============================================================================= | |
| # LLM Configuration | |
| # ============================================================================= | |
| LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct" | |
| _hf_token = os.getenv("HF_TOKEN") | |
| if not _hf_token: | |
| raise ValueError("HF_TOKEN not found. Set it in your .env file.") | |
| LLM_CLIENT = InferenceClient(token=_hf_token) | |
| def call_llm( | |
| prompt: str, | |
| system_prompt: str, | |
| seed: int, | |
| max_tokens: int = 300, | |
| ) -> str: | |
| """Call the LLM with the given prompt.""" | |
| messages = [ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": prompt}, | |
| ] | |
| try: | |
| response = LLM_CLIENT.chat.completions.create( | |
| model=LLM_MODEL, | |
| messages=messages, | |
| temperature=0.0, | |
| max_tokens=max_tokens, | |
| seed=seed, | |
| ) | |
| except Exception as e: | |
| print(f"Error calling LLM: {e}") | |
| raise e | |
| else: | |
| return response.choices[0].message.content | |
| class RunResult: | |
| """Result of running the agent.""" | |
| final_score: int | |
| max_score: int | |
| moves: int | |
| locations_visited: set[str] | |
| game_completed: bool | |
| error: Optional[str] = None | |
| history: list[tuple[str, str, str]] = field(default_factory=list) | |
| # ============================================================================= | |
| # Console Formatting | |
| # ============================================================================= | |
| class Colors: | |
| """ANSI Escape codes for pretty console output.""" | |
| HEADER = "\033[95m" | |
| BLUE = "\033[94m" | |
| CYAN = "\033[96m" | |
| GREEN = "\033[92m" | |
| YELLOW = "\033[93m" | |
| RED = "\033[91m" | |
| BOLD = "\033[1m" | |
| UNDERLINE = "\033[4m" | |
| ENDC = "\033[0m" | |
| # ============================================================================= | |
| # System Prompt | |
| # ============================================================================= | |
| SUMMURIZER_SYSTEM_PROMPT = """ | |
| You are helping an agent to play a text adventure game. To help this agent you should return a good summary of the current observation and the history of the agent's actions and observations. | |
| If applicable, use the history to give potential next best actions in few sentences (max 250 words). If you think the agent is stuck, you should give a hint to the agent to get unstuck. | |
| Here are some hints to help the agent: | |
| The first hint you should give is: looking in, under, on, or behind stuff, it could be useful to get points and can help get unstuck. Try to make the model look in stuff that could contain coins, keys, etc. | |
| Also if you are in room with a window, you should look out of the window to see if there is a way to get out of the room. | |
| You should make sure to examine the whole room before moving to another direction. | |
| You should tell sometimes to use `get_history` or `get_valid_actions` to get the history of the agent's actions and observations by location or the list of valid actions you can perform in the current location. This could be useful to get unstuck. | |
| YOUR RESPONSE FORMAT SHOULD BE: | |
| SUMMARY: <summary of the prompt> | |
| NEXT BEST ACTIONS: <list of potential next best actions in few sentences (max 250 words)> | |
| """ | |
| SYSTEM_PROMPT = """ | |
| You are an expert Text Adventure Game Agent. | |
| Your objective is to MAXIMIZE SCORE and COMPLETE THE GAME by navigating the world and solving puzzles. | |
| ### TOOL PROTOCOLS AND SYNTAX | |
| #### LIST OF AVAILABLE TOOLS: | |
| 1. play_action (TAKES ARGS: {"action": "<command>"}) - Execute game commands (north, take lamp, open mailbox, etc.) | |
| 2. get_history (TAKES NO ARGS) - Get the history of your actions and observations by location. Very important! | |
| 3. get_valid_actions (TAKES NO ARGS) - Get the list of valid actions you can perform in the current location. Very important! | |
| 4. explore_surroundings (TAKES NO ARGS)- Explore the surroundings of the current location. Very important! | |
| * **Strict Syntax:** The game parser is old and strict. Do not use adjectives or articles unless necessary. | |
| * *Bad:* "pick up the shiny egg", "examine the old mailbox" | |
| * *Good:* "take egg", "examine mailbox", "look in fountain" | |
| * **Validator:** Before attempting complex interactions, ALWAYS check `get_valid_actions`. | |
| * If the tool returns "take egg", you must send exactly "take egg" to the `play_action` tool. | |
| * **Execution:** execute commands using `play_action(action="<command>")`. | |
| * **Map & History:** Use `get_history` to track your path and the history of your actions and observations by location. | |
| * **Loop Avoidance:** If you have visited a room 3 times with no progress, navigate a different direction immediately. | |
| * **Dead Ends:** Never retry a direction that previously returned "You can't go that way". | |
| * **Surveying:** Use `explore_surroundings` to identify exits and descriptions. | |
| * Example of Movement commands: `go north`, `go northeast`, `go northwest`, `go south`, `go southeast`, `go southwest`, `go east`, `go west`, `go up`, `go down`, `enter`, `exit`, etc. | |
| * **Kleptomania:** If an item is portable, `take` it. Inventory items are crucial for puzzles. | |
| * **Investigation Loop:** | |
| 1. **Examine:** Use `examine <object>` on every new object with the `play_action` tool. | |
| 2. **Look In:** Use `look in <object>` for containers with the `play_action` tool. *Note: "Examine" and "Look In" are distinct commands.* | |
| 3. **Inventory:** Your inventory is pass to you at each step, you should use it to your advantage. | |
| ### OUTPUT FORMAT | |
| You operate in a strict Thought-Action loop. You must output in this format: | |
| THOUGHT: <Step-by-step reasoning. Reference valid actions and history. Do not repeat the same action multiple times in a row.> | |
| TOOL: <tool_name> | |
| ARGS: <JSON arguments> | |
| """ | |
| # ============================================================================= | |
| # Student Agent | |
| # ============================================================================= | |
| class StudentAgent: | |
| """ | |
| A robust ReAct agent for text adventure games. | |
| Features: | |
| - Loop detection | |
| - Valid action prioritization | |
| - Robust parsing | |
| """ | |
| def __init__(self): | |
| self.history = [] | |
| self.visited_locations = set() | |
| self.location_counts = {} # Track how often we visit a room ID | |
| async def get_score(self, client) -> int: | |
| """Get the current score.""" | |
| response = await client.call_tool("get_score", None) | |
| return int(self._extract_result(response)) | |
| async def run( | |
| self, | |
| client, | |
| game: str, | |
| max_steps: int, | |
| seed: int, | |
| verbose: bool = False, | |
| ) -> RunResult: | |
| # 1. Initialization | |
| try: | |
| # Start by looking | |
| result = await client.call_tool("play_action", {"action": "look"}) | |
| observation = self._extract_result(result) | |
| except Exception as e: | |
| observation = f"Error starting game: {e}" | |
| score = await self.get_score(client) | |
| max_score = score | |
| moves = 0 | |
| game_completed = False | |
| for step in range(1, max_steps + 1): | |
| if verbose: | |
| print( | |
| f"\n{Colors.BOLD}{Colors.CYAN}--- Step {step} | Score: {score} | Moves: {moves} | # Locations Visited: {len(self.visited_locations)} ---{Colors.ENDC}" | |
| ) | |
| # Get current user location | |
| current_loc_line = observation.strip().split("\n")[0] | |
| self.location_counts[current_loc_line] = ( | |
| self.location_counts.get(current_loc_line, 0) + 1 | |
| ) | |
| # 3. Construct Dynamic Prompt | |
| # If we are visiting a place too often or stuck, inject a hint | |
| hint = "You could use the `explore_surroundings` tool to explore the surroundings of the current location." | |
| if self.location_counts[current_loc_line] > 3: | |
| hint = f"\n[SYSTEM HINT]: You have been in '{current_loc_line}' {self.location_counts[current_loc_line]} times. Stop looping. Go somewhere new." | |
| # Build prompt with context | |
| prompt = self._build_prompt(observation, self.history, hint) | |
| # Get current inventory and pass it to the summurizer | |
| inventory_response = await client.call_tool("inventory", {}) | |
| inventory = self._extract_result(inventory_response) | |
| # Let's summurize the prompt with another LLM call that could hint to the next best action | |
| prompt += f"\n\nCURRENT INVENTORY: {inventory}" | |
| summary_response = call_llm( | |
| prompt, | |
| SUMMURIZER_SYSTEM_PROMPT, | |
| seed, | |
| max_tokens=256, | |
| ) | |
| summary = self._extract_result(summary_response) | |
| if verbose: | |
| print( | |
| f"{Colors.BOLD}{Colors.YELLOW}SYSTEM SUMMARY:{Colors.ENDC}\n{summary}" | |
| ) | |
| # Call reasoning LLM to take the next best action | |
| response = call_llm(summary, SYSTEM_PROMPT, seed, max_tokens=256) | |
| thought, tool_name, tool_args = self._parse_response(response) | |
| if verbose: | |
| # BOLD YELLOW for Thought | |
| print(f"{Colors.BOLD}{Colors.YELLOW}THOUGHT:{Colors.ENDC} {thought}") | |
| # BOLD GREEN for Tool/Args | |
| print(f"{Colors.BOLD}{Colors.GREEN}TOOL:{Colors.ENDC} {tool_name}") | |
| print(f"{Colors.BOLD}{Colors.GREEN}ARGS:{Colors.ENDC} {tool_args}") | |
| # 6. Execute | |
| try: | |
| raw_result = await client.call_tool(tool_name, tool_args) | |
| observation = self._extract_result(raw_result) | |
| if tool_name == "get_valid_actions": | |
| valid_actions_result = observation | |
| if verbose: | |
| print( | |
| f"{Colors.BOLD}{Colors.BLUE}OBSERVATION:{Colors.ENDC}\n{observation.strip()}" | |
| ) | |
| except Exception as e: | |
| observation = f"Tool Execution Error: {e}" | |
| if verbose: | |
| print(f"{Colors.BOLD}{Colors.RED}ERROR:{Colors.ENDC} {e}") | |
| current_score = await self.get_score(client) | |
| if current_score != score: | |
| print( | |
| f"{Colors.BOLD}{Colors.GREEN}SYSTEM:{Colors.ENDC} NEW SCORE: {current_score} (+{current_score - score})" | |
| ) | |
| score = current_score | |
| max_score = max(max_score, score) | |
| # Update history | |
| self.history.append( | |
| (thought, f"{tool_name}{tool_args}", observation) | |
| ) # Truncate obs for history to save context | |
| self.visited_locations.add(current_loc_line) | |
| moves += 1 | |
| if "GAME OVER" in observation: | |
| game_completed = True | |
| if verbose: | |
| print(f"\n{Colors.BOLD}{Colors.RED}*** GAME OVER ***{Colors.ENDC}") | |
| break | |
| return RunResult( | |
| final_score=score, | |
| max_score=max_score, | |
| moves=moves, | |
| locations_visited=self.visited_locations, | |
| game_completed=game_completed, | |
| history=self.history, | |
| ) | |
| def _extract_result(self, result) -> str: | |
| if hasattr(result, "content") and result.content: | |
| return result.content[0].text | |
| return str(result) | |
| def _build_prompt(self, observation: str, history: list, hint: str) -> str: | |
| """Constructs a context-aware prompt.""" | |
| # Compress history | |
| hist_str = "" | |
| for i, (t, a, o) in enumerate( | |
| history[max(0, len(history) - 5) :] | |
| ): # Only last 5 | |
| hist_str += f"- Action: {a}\n Result: {o[:200]}...\n" | |
| return f""" | |
| CURRENT GAME STATUS: | |
| {observation} | |
| Last 5 Actions and Observations: | |
| {hist_str} | |
| {hint} | |
| Response Format: | |
| THOUGHT: ... | |
| TOOL: ... | |
| ARGS: ... | |
| """ | |
| def _parse_response(self, response: str) -> tuple[str, str, dict]: | |
| """Robust parsing of LLM output.""" | |
| thought = "Thinking..." | |
| tool_name = "play_action" | |
| tool_args = {"action": "look"} | |
| # Normalize | |
| lines = response.strip().split("\n") | |
| for line in lines: | |
| line = line.strip() | |
| if not line: | |
| continue | |
| if line.upper().startswith("THOUGHT:"): | |
| thought = line[8:].strip() | |
| elif line.upper().startswith("TOOL:"): | |
| tool_name = line[5:].strip() | |
| elif line.upper().startswith("ARGS:"): | |
| args_str = line[5:].strip() | |
| try: | |
| # Try pure JSON | |
| tool_args = json.loads(args_str) | |
| except: | |
| # Fallback for simple dict string like {'action': 'north'} | |
| try: | |
| # Replace single quotes with double for strict JSON | |
| args_str_fixed = args_str.replace("'", '"') | |
| tool_args = json.loads(args_str_fixed) | |
| except: | |
| # Fallback: assume it's just the action string or broken json | |
| # If the tool is play_action, assume the rest of the line is the action | |
| if tool_name == "play_action": | |
| # Regex to extract value from {"action": "value"} or just "value" | |
| match = re.search(r':\s*"([^"]+)"', args_str) | |
| if match: | |
| tool_args = {"action": match.group(1)} | |
| else: | |
| tool_args = {"action": args_str} | |
| return thought, tool_name, tool_args | |
| # ============================================================================= | |
| # Local Testing | |
| # ============================================================================= | |
| async def test_agent(): | |
| """Test the agent locally.""" | |
| from fastmcp import Client | |
| server_path = "mcp_server.py" | |
| agent = StudentAgent() | |
| async with Client(server_path) as client: | |
| result = await agent.run(client, "zork1", 20, 42, True) | |
| print( | |
| f"{Colors.BOLD}{Colors.HEADER}\nFinal Score: {result.final_score}{Colors.ENDC}" | |
| ) | |
| if __name__ == "__main__": | |
| import asyncio | |
| asyncio.run(test_agent()) | |