Spaces:
Sleeping
Sleeping
| """ | |
| Example: MCP ReAct Agent | |
| A complete ReAct agent that uses MCP tools to play text adventure games. | |
| This is a working example students can learn from. | |
| """ | |
| import json | |
| import os | |
| import re | |
| from dataclasses import dataclass, field | |
| from typing import Optional | |
| from dotenv import load_dotenv | |
| from huggingface_hub import InferenceClient | |
| load_dotenv() | |
| # ============================================================================= | |
| # LLM Configuration - DO NOT MODIFY | |
| # ============================================================================= | |
| LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct" | |
| _hf_token = os.getenv("HF_TOKEN") | |
| if not _hf_token: | |
| raise ValueError("HF_TOKEN not found. Set it in your .env file.") | |
| LLM_CLIENT = InferenceClient(token=_hf_token) | |
| def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str: | |
| """ | |
| Call the LLM with the given prompt. Use this function in your agent. | |
| Args: | |
| prompt: The user prompt (current game state, history, etc.) | |
| system_prompt: The system prompt (instructions for the agent) | |
| seed: Random seed for reproducibility | |
| max_tokens: Maximum tokens in response (default: 300) | |
| Returns: | |
| The LLM's response text | |
| Example: | |
| response = call_llm( | |
| prompt="You are in a forest. What do you do?", | |
| system_prompt=SYSTEM_PROMPT, | |
| seed=42, | |
| ) | |
| """ | |
| messages = [ | |
| {"role": "system", "content": system_prompt}, | |
| {"role": "user", "content": prompt}, | |
| ] | |
| response = LLM_CLIENT.chat.completions.create( | |
| model=LLM_MODEL, | |
| messages=messages, | |
| temperature=0.0, # Deterministic for reproducibility | |
| max_tokens=max_tokens, | |
| seed=seed, | |
| ) | |
| return response.choices[0].message.content | |
| class RunResult: | |
| """Result of running the agent. Do not modify this class.""" | |
| final_score: int | |
| max_score: int | |
| moves: int | |
| locations_visited: set[str] | |
| game_completed: bool | |
| error: Optional[str] = None | |
| history: list[tuple[str, str, str]] = field(default_factory=list) | |
| # ============================================================================= | |
| # System Prompt | |
| # ============================================================================= | |
| SYSTEM_PROMPT = """You are an expert text adventure game player. Your goal is to explore, collect treasures, and interact with your environment. | |
| VALID GAME COMMANDS: | |
| - Movement: north, south, east, west, northeast, northwest, southwest, southeast, up, down, enter, exit | |
| - Objects: take <item>, drop <item>, open <thing>, close <thing>, examine <thing> | |
| - Light: turn on lamp, turn off lamp | |
| - Combat: attack <enemy> with <weapon>, hit <thing> | |
| - Other: read <thing>, wait, shout | |
| - Communication: ask <person> about <thing>, tell <person> about <thing> | |
| FORBIDDEN (will NOT work): check, inspect, search, grab, use, help, turn on torch | |
| RESPOND IN THIS EXACT FORMAT (no markdown): | |
| POSSIBLE ACTIONS: <list all reasonable actions possible in this situation> | |
| THOUGHT: <brief reasoning about what to do next> | |
| ACTION: <action_name> | |
| Examples: | |
| POSSIBLE ACTIONS: take sword, examine sword, east, south, ask villager about dragon | |
| THOUGHT: I need to take the sword. | |
| ACTION: take sword | |
| POSSIBLE ACTIONS: examine mailbox, open mailbox, take mailbox, north, east, shout | |
| THOUGHT: The mailbox might contain something useful. | |
| ACTION: open mailbox | |
| STRATEGY: | |
| - Pick up any useful items (lamp, sword, pig, etc.). You do not have to take an item if it is already in your inventory. | |
| - Explore as much as possible : prioritize moving to examining | |
| - Examine objects only when they seem very interesting and if you are stuck | |
| - Open containers (mailbox, window, etc.) | |
| - Make sure you have a light source if you need to explore dark areas | |
| - Prioritize movements over examining random things | |
| - PAY ATTENTION to actions you have already done in your situation (for instance, try every possible direction mentioned in the situation) | |
| DO NOT repeat the same action endlessly.""" | |
| # ============================================================================= | |
| # Student Agent Implementation | |
| # ============================================================================= | |
| class StudentAgent: | |
| """ | |
| MCP ReAct Agent - A complete working example. | |
| This agent demonstrates: | |
| - ReAct loop (Thought -> Tool -> Observation) | |
| - Loop detection | |
| - Action validation | |
| - Score tracking via memory tool | |
| """ | |
| def __init__(self): | |
| """Initialize the agent state.""" | |
| self.history: list[dict] = [] | |
| self.recent_actions: list[str] = [] | |
| self.score: int = 0 | |
| self.location_moves : dict = {} | |
| async def run( | |
| self, | |
| client, | |
| game: str, | |
| max_steps: int, | |
| seed: int, | |
| verbose: bool = False, | |
| ) -> RunResult: | |
| """Run the agent for a game session.""" | |
| locations_visited = set() | |
| history = [] | |
| moves = 0 | |
| # Get initial observation | |
| result = await client.call_tool("play_action", {"action": "look"}) | |
| observation = self._extract_result(result) | |
| # Track initial location | |
| location = observation.split("\n")[0] if observation else "Unknown" | |
| locations_visited.add(location) | |
| if verbose: | |
| print(f"\n{observation}") | |
| # Main ReAct loop | |
| for step in range(1, max_steps + 1): | |
| inventory_result = await client.call_tool("play_action", {"action": "inventory"}) | |
| inventory = self._extract_result(inventory_result) | |
| look_result = await client.call_tool("play_action", {"action": "look"}) | |
| look = self._extract_result(look_result) | |
| listen_result = await client.call_tool("play_action", {"action": "listen"}) | |
| listen = self._extract_result(listen_result) | |
| prompt = self._build_prompt(observation, inventory, look, listen, self.location_moves.get(look, [])) | |
| score_result = await client.call_tool("get_score", {}) | |
| score = self._extract_result(score_result) | |
| # print(f"SCORE : {score}") | |
| # print("-"*10) | |
| # print(prompt) | |
| # print("-"*10) | |
| # Call LLM for reasoning (use step-based seed for variety) | |
| response = call_llm(prompt, SYSTEM_PROMPT, seed + step) | |
| # Parse the response | |
| thought, action = self._parse_response(response) | |
| if verbose: | |
| print(f"\n--- Step {step} ---") | |
| print(f"[THOUGHT] {thought}") | |
| print(f"[ACTION] {action}") | |
| action = self._validate_action_call(action) | |
| moves += 1 | |
| try: | |
| result = await client.call_tool("play_action", {"action": action}) | |
| observation = self._extract_result(result) | |
| self.location_moves.setdefault(look, []).append(action) | |
| if verbose: | |
| print(f"[RESULT] {observation[:200]}...") | |
| except Exception as e: | |
| observation = f"Error: {e}" | |
| if verbose: | |
| print(f"[ERROR] {e}") | |
| # Track location | |
| location = observation.split("\n")[0] if observation else "Unknown" | |
| locations_visited.add(location) | |
| # Update history | |
| self.history.append({ | |
| "step": step, | |
| "thought": thought, | |
| "action": action, | |
| "result": observation[:200] | |
| }) | |
| # Track score from observation | |
| self._update_score(observation) | |
| # Check for game over | |
| if self._is_game_over(observation): | |
| if verbose: | |
| print("\n*** GAME OVER ***") | |
| break | |
| return RunResult( | |
| final_score=self.score, | |
| max_score=350, | |
| moves=moves, | |
| locations_visited=locations_visited, | |
| game_completed=self._is_game_over(observation), | |
| history=history, | |
| ) | |
| def _build_prompt(self, observation: str, inventory:str, look:str, listen:str, location_moves : list[str]) -> str: | |
| """Build the prompt for the LLM with context.""" | |
| parts = [] | |
| parts.append(f"Inventory :{inventory}") | |
| base_reason_size = 100 | |
| short_reason_size = 40 | |
| base_size_threshold = 3 | |
| short_size_threshold = 10 | |
| action_threshold = 30 | |
| # Recent history | |
| if self.history: | |
| nb = 0 | |
| parts.append("\nRecent actions -> Recent Results:") | |
| for entry in self.history[-2::-1]: | |
| reason_size = base_reason_size | |
| if nb >= base_size_threshold: | |
| reason_size = short_reason_size | |
| action = entry.get("action") | |
| result_short = entry["result"][:reason_size] + "..." if len(entry["result"]) > reason_size else entry["result"] | |
| if nb >= short_size_threshold: | |
| parts.append(f"> {action}") | |
| else: | |
| parts.append(f"> {action} -> {result_short}") | |
| if nb >= action_threshold: | |
| break | |
| nb += 1 | |
| if self.recent_actions and len(set(self.recent_actions[-3:])) == 1: | |
| parts.append(f"\n[WARNING: You've been doing '{self.recent_actions[-1]}' repeatedly. TRY SOMETHING DIFFERENT!]") | |
| parts.append(f"Here is your current situation :{look}") | |
| parts.append(f"Here is what you hear in this situation :{listen}") | |
| parts.append(f"Here are the actions you have already tried in this situation, avoid repeating if possible (but do not hallucinate directions or objects) : {",".join(location_moves) if len(location_moves) > 0 else "none"}") | |
| parts.append(f"\nResult of your most recent action ({self.history[-1].get("action") if self.history else ""}):\n{observation}") | |
| parts.append("\nWhat do you do next?") | |
| return "\n".join(parts) | |
| def _parse_response(self, response: str) -> tuple[str, str, dict]: | |
| """Parse the LLM response to extract thought, tool, and arguments.""" | |
| thought = "No reasoning provided" | |
| action = "look" | |
| lines = response.strip().split("\n") | |
| for line in lines: | |
| line_clean = line.strip() | |
| line_upper = line_clean.upper() | |
| if line_upper.startswith("THOUGHT:"): | |
| thought = line_clean.split(":", 1)[1].strip() | |
| elif line_upper.startswith("ACTION:"): | |
| raw_action = line_clean.split(":", 1)[1].strip().lower() | |
| raw_action = raw_action.replace("**", "").replace("*", "").replace("`", "") | |
| action = raw_action if raw_action else "look" | |
| return thought, action | |
| def _validate_action_call(self, action: str) -> tuple[str, dict]: | |
| """Validate and fix common tool call issues.""" | |
| invalid_verb_map = { | |
| "check": "examine", | |
| "inspect": "examine", | |
| "search": "look", | |
| "grab": "take", | |
| "pick": "take", | |
| "use": "examine", | |
| "investigate": "examine", | |
| } | |
| words = action.lower().split() | |
| if words and words[0] in invalid_verb_map: | |
| words[0] = invalid_verb_map[words[0]] | |
| action = " ".join(words) | |
| action = action.lower().strip() | |
| action = action.replace("**", "").replace("*", "").replace("`", "") | |
| action = " ".join(action.split()) | |
| return action | |
| def _extract_result(self, result) -> str: | |
| """Extract text from MCP tool result.""" | |
| if hasattr(result, 'content') and result.content: | |
| return result.content[0].text | |
| if isinstance(result, list) and result: | |
| return result[0].text if hasattr(result[0], 'text') else str(result[0]) | |
| return str(result) | |
| def _update_score(self, text: str) -> None: | |
| """Update score from game text.""" | |
| patterns = [ | |
| r'Score:\s*(\d+)', | |
| r'score[:\s]+(\d+)', | |
| r'\[Score:\s*(\d+)', | |
| ] | |
| for pattern in patterns: | |
| match = re.search(pattern, text, re.IGNORECASE) | |
| if match: | |
| self.score = max(self.score, int(match.group(1))) | |
| def _is_game_over(self, text: str) -> bool: | |
| """Check if the game is over.""" | |
| game_over_phrases = [ | |
| "game over", | |
| "you have died", | |
| "you are dead", | |
| "*** you have died ***", | |
| ] | |
| text_lower = text.lower() | |
| return any(phrase in text_lower for phrase in game_over_phrases) | |
| # ============================================================================= | |
| # Local Testing | |
| # ============================================================================= | |
| async def test_agent(): | |
| """Test the agent locally.""" | |
| from fastmcp import Client | |
| agent = StudentAgent() | |
| async with Client("mcp_server.py") as client: | |
| result = await agent.run( | |
| client=client, | |
| game="zork1", | |
| max_steps=20, | |
| seed=42, | |
| verbose=True, | |
| ) | |
| print(f"\n{'=' * 50}") | |
| print(f"Final Score: {result.final_score}") | |
| print(f"Moves: {result.moves}") | |
| print(f"Locations: {len(result.locations_visited)}") | |
| if __name__ == "__main__": | |
| import asyncio | |
| asyncio.run(test_agent()) | |