mcngnt's picture
Final submission
b41e35c
"""
Example: MCP ReAct Agent
A complete ReAct agent that uses MCP tools to play text adventure games.
This is a working example students can learn from.
"""
import json
import os
import re
from dataclasses import dataclass, field
from typing import Optional
from dotenv import load_dotenv
from huggingface_hub import InferenceClient
load_dotenv()
# =============================================================================
# LLM Configuration - DO NOT MODIFY
# =============================================================================
LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"
_hf_token = os.getenv("HF_TOKEN")
if not _hf_token:
raise ValueError("HF_TOKEN not found. Set it in your .env file.")
LLM_CLIENT = InferenceClient(token=_hf_token)
def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str:
"""
Call the LLM with the given prompt. Use this function in your agent.
Args:
prompt: The user prompt (current game state, history, etc.)
system_prompt: The system prompt (instructions for the agent)
seed: Random seed for reproducibility
max_tokens: Maximum tokens in response (default: 300)
Returns:
The LLM's response text
Example:
response = call_llm(
prompt="You are in a forest. What do you do?",
system_prompt=SYSTEM_PROMPT,
seed=42,
)
"""
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt},
]
response = LLM_CLIENT.chat.completions.create(
model=LLM_MODEL,
messages=messages,
temperature=0.0, # Deterministic for reproducibility
max_tokens=max_tokens,
seed=seed,
)
return response.choices[0].message.content
@dataclass
class RunResult:
"""Result of running the agent. Do not modify this class."""
final_score: int
max_score: int
moves: int
locations_visited: set[str]
game_completed: bool
error: Optional[str] = None
history: list[tuple[str, str, str]] = field(default_factory=list)
# =============================================================================
# System Prompt
# =============================================================================
SYSTEM_PROMPT = """You are an expert text adventure game player. Your goal is to explore, collect treasures, and interact with your environment.
VALID GAME COMMANDS:
- Movement: north, south, east, west, northeast, northwest, southwest, southeast, up, down, enter, exit
- Objects: take <item>, drop <item>, open <thing>, close <thing>, examine <thing>
- Light: turn on lamp, turn off lamp
- Combat: attack <enemy> with <weapon>, hit <thing>
- Other: read <thing>, wait, shout
- Communication: ask <person> about <thing>, tell <person> about <thing>
FORBIDDEN (will NOT work): check, inspect, search, grab, use, help, turn on torch
RESPOND IN THIS EXACT FORMAT (no markdown):
POSSIBLE ACTIONS: <list all reasonable actions possible in this situation>
THOUGHT: <brief reasoning about what to do next>
ACTION: <action_name>
Examples:
POSSIBLE ACTIONS: take sword, examine sword, east, south, ask villager about dragon
THOUGHT: I need to take the sword.
ACTION: take sword
POSSIBLE ACTIONS: examine mailbox, open mailbox, take mailbox, north, east, shout
THOUGHT: The mailbox might contain something useful.
ACTION: open mailbox
STRATEGY:
- Pick up any useful items (lamp, sword, pig, etc.). You do not have to take an item if it is already in your inventory.
- Explore as much as possible : prioritize moving to examining
- Examine objects only when they seem very interesting and if you are stuck
- Open containers (mailbox, window, etc.)
- Make sure you have a light source if you need to explore dark areas
- Prioritize movements over examining random things
- PAY ATTENTION to actions you have already done in your situation (for instance, try every possible direction mentioned in the situation)
DO NOT repeat the same action endlessly."""
# =============================================================================
# Student Agent Implementation
# =============================================================================
class StudentAgent:
"""
MCP ReAct Agent - A complete working example.
This agent demonstrates:
- ReAct loop (Thought -> Tool -> Observation)
- Loop detection
- Action validation
- Score tracking via memory tool
"""
def __init__(self):
"""Initialize the agent state."""
self.history: list[dict] = []
self.recent_actions: list[str] = []
self.score: int = 0
self.location_moves : dict = {}
async def run(
self,
client,
game: str,
max_steps: int,
seed: int,
verbose: bool = False,
) -> RunResult:
"""Run the agent for a game session."""
locations_visited = set()
history = []
moves = 0
# Get initial observation
result = await client.call_tool("play_action", {"action": "look"})
observation = self._extract_result(result)
# Track initial location
location = observation.split("\n")[0] if observation else "Unknown"
locations_visited.add(location)
if verbose:
print(f"\n{observation}")
# Main ReAct loop
for step in range(1, max_steps + 1):
inventory_result = await client.call_tool("play_action", {"action": "inventory"})
inventory = self._extract_result(inventory_result)
look_result = await client.call_tool("play_action", {"action": "look"})
look = self._extract_result(look_result)
listen_result = await client.call_tool("play_action", {"action": "listen"})
listen = self._extract_result(listen_result)
prompt = self._build_prompt(observation, inventory, look, listen, self.location_moves.get(look, []))
score_result = await client.call_tool("get_score", {})
score = self._extract_result(score_result)
# print(f"SCORE : {score}")
# print("-"*10)
# print(prompt)
# print("-"*10)
# Call LLM for reasoning (use step-based seed for variety)
response = call_llm(prompt, SYSTEM_PROMPT, seed + step)
# Parse the response
thought, action = self._parse_response(response)
if verbose:
print(f"\n--- Step {step} ---")
print(f"[THOUGHT] {thought}")
print(f"[ACTION] {action}")
action = self._validate_action_call(action)
moves += 1
try:
result = await client.call_tool("play_action", {"action": action})
observation = self._extract_result(result)
self.location_moves.setdefault(look, []).append(action)
if verbose:
print(f"[RESULT] {observation[:200]}...")
except Exception as e:
observation = f"Error: {e}"
if verbose:
print(f"[ERROR] {e}")
# Track location
location = observation.split("\n")[0] if observation else "Unknown"
locations_visited.add(location)
# Update history
self.history.append({
"step": step,
"thought": thought,
"action": action,
"result": observation[:200]
})
# Track score from observation
self._update_score(observation)
# Check for game over
if self._is_game_over(observation):
if verbose:
print("\n*** GAME OVER ***")
break
return RunResult(
final_score=self.score,
max_score=350,
moves=moves,
locations_visited=locations_visited,
game_completed=self._is_game_over(observation),
history=history,
)
def _build_prompt(self, observation: str, inventory:str, look:str, listen:str, location_moves : list[str]) -> str:
"""Build the prompt for the LLM with context."""
parts = []
parts.append(f"Inventory :{inventory}")
base_reason_size = 100
short_reason_size = 40
base_size_threshold = 3
short_size_threshold = 10
action_threshold = 30
# Recent history
if self.history:
nb = 0
parts.append("\nRecent actions -> Recent Results:")
for entry in self.history[-2::-1]:
reason_size = base_reason_size
if nb >= base_size_threshold:
reason_size = short_reason_size
action = entry.get("action")
result_short = entry["result"][:reason_size] + "..." if len(entry["result"]) > reason_size else entry["result"]
if nb >= short_size_threshold:
parts.append(f"> {action}")
else:
parts.append(f"> {action} -> {result_short}")
if nb >= action_threshold:
break
nb += 1
if self.recent_actions and len(set(self.recent_actions[-3:])) == 1:
parts.append(f"\n[WARNING: You've been doing '{self.recent_actions[-1]}' repeatedly. TRY SOMETHING DIFFERENT!]")
parts.append(f"Here is your current situation :{look}")
parts.append(f"Here is what you hear in this situation :{listen}")
parts.append(f"Here are the actions you have already tried in this situation, avoid repeating if possible (but do not hallucinate directions or objects) : {",".join(location_moves) if len(location_moves) > 0 else "none"}")
parts.append(f"\nResult of your most recent action ({self.history[-1].get("action") if self.history else ""}):\n{observation}")
parts.append("\nWhat do you do next?")
return "\n".join(parts)
def _parse_response(self, response: str) -> tuple[str, str, dict]:
"""Parse the LLM response to extract thought, tool, and arguments."""
thought = "No reasoning provided"
action = "look"
lines = response.strip().split("\n")
for line in lines:
line_clean = line.strip()
line_upper = line_clean.upper()
if line_upper.startswith("THOUGHT:"):
thought = line_clean.split(":", 1)[1].strip()
elif line_upper.startswith("ACTION:"):
raw_action = line_clean.split(":", 1)[1].strip().lower()
raw_action = raw_action.replace("**", "").replace("*", "").replace("`", "")
action = raw_action if raw_action else "look"
return thought, action
def _validate_action_call(self, action: str) -> tuple[str, dict]:
"""Validate and fix common tool call issues."""
invalid_verb_map = {
"check": "examine",
"inspect": "examine",
"search": "look",
"grab": "take",
"pick": "take",
"use": "examine",
"investigate": "examine",
}
words = action.lower().split()
if words and words[0] in invalid_verb_map:
words[0] = invalid_verb_map[words[0]]
action = " ".join(words)
action = action.lower().strip()
action = action.replace("**", "").replace("*", "").replace("`", "")
action = " ".join(action.split())
return action
def _extract_result(self, result) -> str:
"""Extract text from MCP tool result."""
if hasattr(result, 'content') and result.content:
return result.content[0].text
if isinstance(result, list) and result:
return result[0].text if hasattr(result[0], 'text') else str(result[0])
return str(result)
def _update_score(self, text: str) -> None:
"""Update score from game text."""
patterns = [
r'Score:\s*(\d+)',
r'score[:\s]+(\d+)',
r'\[Score:\s*(\d+)',
]
for pattern in patterns:
match = re.search(pattern, text, re.IGNORECASE)
if match:
self.score = max(self.score, int(match.group(1)))
def _is_game_over(self, text: str) -> bool:
"""Check if the game is over."""
game_over_phrases = [
"game over",
"you have died",
"you are dead",
"*** you have died ***",
]
text_lower = text.lower()
return any(phrase in text_lower for phrase in game_over_phrases)
# =============================================================================
# Local Testing
# =============================================================================
async def test_agent():
"""Test the agent locally."""
from fastmcp import Client
agent = StudentAgent()
async with Client("mcp_server.py") as client:
result = await agent.run(
client=client,
game="zork1",
max_steps=20,
seed=42,
verbose=True,
)
print(f"\n{'=' * 50}")
print(f"Final Score: {result.final_score}")
print(f"Moves: {result.moves}")
print(f"Locations: {len(result.locations_visited)}")
if __name__ == "__main__":
import asyncio
asyncio.run(test_agent())