Armand BLIN
feat(server): new logic and report
8f05d7e
"""
Student Agent for Text Adventure Games
This is your submission file. Implement the StudentAgent class to play
text adventure games using the MCP server you also implement.
Your agent should:
1. Connect to the MCP server via the provided client
2. Use the ReAct pattern (Thought -> Action -> Observation)
3. Call MCP tools to interact with the game
4. Maximize the game score within the step limit
"""
import json
import os
import re
from dataclasses import dataclass, field
from typing import Optional
from dotenv import load_dotenv
from huggingface_hub import InferenceClient
# Load environment variables
load_dotenv()
# =============================================================================
# LLM Configuration
# =============================================================================
LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"
_hf_token = os.getenv("HF_TOKEN")
if not _hf_token:
raise ValueError("HF_TOKEN not found. Set it in your .env file.")
LLM_CLIENT = InferenceClient(token=_hf_token)
def call_llm(
prompt: str,
system_prompt: str,
seed: int,
max_tokens: int = 300,
) -> str:
"""Call the LLM with the given prompt."""
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt},
]
try:
response = LLM_CLIENT.chat.completions.create(
model=LLM_MODEL,
messages=messages,
temperature=0.0,
max_tokens=max_tokens,
seed=seed,
)
except Exception as e:
print(f"Error calling LLM: {e}")
raise e
else:
return response.choices[0].message.content
@dataclass
class RunResult:
"""Result of running the agent."""
final_score: int
max_score: int
moves: int
locations_visited: set[str]
game_completed: bool
error: Optional[str] = None
history: list[tuple[str, str, str]] = field(default_factory=list)
# =============================================================================
# Console Formatting
# =============================================================================
class Colors:
"""ANSI Escape codes for pretty console output."""
HEADER = "\033[95m"
BLUE = "\033[94m"
CYAN = "\033[96m"
GREEN = "\033[92m"
YELLOW = "\033[93m"
RED = "\033[91m"
BOLD = "\033[1m"
UNDERLINE = "\033[4m"
ENDC = "\033[0m"
# =============================================================================
# System Prompt
# =============================================================================
SUMMURIZER_SYSTEM_PROMPT = """
You are helping an agent to play a text adventure game. To help this agent you should return a good summary of the current observation and the history of the agent's actions and observations.
If applicable, use the history to give potential next best actions in few sentences (max 250 words). If you think the agent is stuck, you should give a hint to the agent to get unstuck.
Here are some hints to help the agent:
The first hint you should give is: looking in, under, on, or behind stuff, it could be useful to get points and can help get unstuck. Try to make the model look in stuff that could contain coins, keys, etc.
Also if you are in room with a window, you should look out of the window to see if there is a way to get out of the room.
You should make sure to examine the whole room before moving to another direction.
You should tell sometimes to use `get_history` or `get_valid_actions` to get the history of the agent's actions and observations by location or the list of valid actions you can perform in the current location. This could be useful to get unstuck.
YOUR RESPONSE FORMAT SHOULD BE:
SUMMARY: <summary of the prompt>
NEXT BEST ACTIONS: <list of potential next best actions in few sentences (max 250 words)>
"""
SYSTEM_PROMPT = """
You are an expert Text Adventure Game Agent.
Your objective is to MAXIMIZE SCORE and COMPLETE THE GAME by navigating the world and solving puzzles.
### TOOL PROTOCOLS AND SYNTAX
#### LIST OF AVAILABLE TOOLS:
1. play_action (TAKES ARGS: {"action": "<command>"}) - Execute game commands (north, take lamp, open mailbox, etc.)
2. get_history (TAKES NO ARGS) - Get the history of your actions and observations by location. Very important!
3. get_valid_actions (TAKES NO ARGS) - Get the list of valid actions you can perform in the current location. Very important!
4. explore_surroundings (TAKES NO ARGS)- Explore the surroundings of the current location. Very important!
* **Strict Syntax:** The game parser is old and strict. Do not use adjectives or articles unless necessary.
* *Bad:* "pick up the shiny egg", "examine the old mailbox"
* *Good:* "take egg", "examine mailbox", "look in fountain"
* **Validator:** Before attempting complex interactions, ALWAYS check `get_valid_actions`.
* If the tool returns "take egg", you must send exactly "take egg" to the `play_action` tool.
* **Execution:** execute commands using `play_action(action="<command>")`.
* **Map & History:** Use `get_history` to track your path and the history of your actions and observations by location.
* **Loop Avoidance:** If you have visited a room 3 times with no progress, navigate a different direction immediately.
* **Dead Ends:** Never retry a direction that previously returned "You can't go that way".
* **Surveying:** Use `explore_surroundings` to identify exits and descriptions.
* Example of Movement commands: `go north`, `go northeast`, `go northwest`, `go south`, `go southeast`, `go southwest`, `go east`, `go west`, `go up`, `go down`, `enter`, `exit`, etc.
* **Kleptomania:** If an item is portable, `take` it. Inventory items are crucial for puzzles.
* **Investigation Loop:**
1. **Examine:** Use `examine <object>` on every new object with the `play_action` tool.
2. **Look In:** Use `look in <object>` for containers with the `play_action` tool. *Note: "Examine" and "Look In" are distinct commands.*
3. **Inventory:** Your inventory is pass to you at each step, you should use it to your advantage.
### OUTPUT FORMAT
You operate in a strict Thought-Action loop. You must output in this format:
THOUGHT: <Step-by-step reasoning. Reference valid actions and history. Do not repeat the same action multiple times in a row.>
TOOL: <tool_name>
ARGS: <JSON arguments>
"""
# =============================================================================
# Student Agent
# =============================================================================
class StudentAgent:
"""
A robust ReAct agent for text adventure games.
Features:
- Loop detection
- Valid action prioritization
- Robust parsing
"""
def __init__(self):
self.history = []
self.visited_locations = set()
self.location_counts = {} # Track how often we visit a room ID
async def get_score(self, client) -> int:
"""Get the current score."""
response = await client.call_tool("get_score", None)
return int(self._extract_result(response))
async def run(
self,
client,
game: str,
max_steps: int,
seed: int,
verbose: bool = False,
) -> RunResult:
# 1. Initialization
try:
# Start by looking
result = await client.call_tool("play_action", {"action": "look"})
observation = self._extract_result(result)
except Exception as e:
observation = f"Error starting game: {e}"
score = await self.get_score(client)
max_score = score
moves = 0
game_completed = False
for step in range(1, max_steps + 1):
if verbose:
print(
f"\n{Colors.BOLD}{Colors.CYAN}--- Step {step} | Score: {score} | Moves: {moves} | # Locations Visited: {len(self.visited_locations)} ---{Colors.ENDC}"
)
# Get current user location
current_loc_line = observation.strip().split("\n")[0]
self.location_counts[current_loc_line] = (
self.location_counts.get(current_loc_line, 0) + 1
)
# 3. Construct Dynamic Prompt
# If we are visiting a place too often or stuck, inject a hint
hint = "You could use the `explore_surroundings` tool to explore the surroundings of the current location."
if self.location_counts[current_loc_line] > 3:
hint = f"\n[SYSTEM HINT]: You have been in '{current_loc_line}' {self.location_counts[current_loc_line]} times. Stop looping. Go somewhere new."
# Build prompt with context
prompt = self._build_prompt(observation, self.history, hint)
# Get current inventory and pass it to the summurizer
inventory_response = await client.call_tool("inventory", {})
inventory = self._extract_result(inventory_response)
# Let's summurize the prompt with another LLM call that could hint to the next best action
prompt += f"\n\nCURRENT INVENTORY: {inventory}"
summary_response = call_llm(
prompt,
SUMMURIZER_SYSTEM_PROMPT,
seed,
max_tokens=256,
)
summary = self._extract_result(summary_response)
if verbose:
print(
f"{Colors.BOLD}{Colors.YELLOW}SYSTEM SUMMARY:{Colors.ENDC}\n{summary}"
)
# Call reasoning LLM to take the next best action
response = call_llm(summary, SYSTEM_PROMPT, seed, max_tokens=256)
thought, tool_name, tool_args = self._parse_response(response)
if verbose:
# BOLD YELLOW for Thought
print(f"{Colors.BOLD}{Colors.YELLOW}THOUGHT:{Colors.ENDC} {thought}")
# BOLD GREEN for Tool/Args
print(f"{Colors.BOLD}{Colors.GREEN}TOOL:{Colors.ENDC} {tool_name}")
print(f"{Colors.BOLD}{Colors.GREEN}ARGS:{Colors.ENDC} {tool_args}")
# 6. Execute
try:
raw_result = await client.call_tool(tool_name, tool_args)
observation = self._extract_result(raw_result)
if tool_name == "get_valid_actions":
valid_actions_result = observation
if verbose:
print(
f"{Colors.BOLD}{Colors.BLUE}OBSERVATION:{Colors.ENDC}\n{observation.strip()}"
)
except Exception as e:
observation = f"Tool Execution Error: {e}"
if verbose:
print(f"{Colors.BOLD}{Colors.RED}ERROR:{Colors.ENDC} {e}")
current_score = await self.get_score(client)
if current_score != score:
print(
f"{Colors.BOLD}{Colors.GREEN}SYSTEM:{Colors.ENDC} NEW SCORE: {current_score} (+{current_score - score})"
)
score = current_score
max_score = max(max_score, score)
# Update history
self.history.append(
(thought, f"{tool_name}{tool_args}", observation)
) # Truncate obs for history to save context
self.visited_locations.add(current_loc_line)
moves += 1
if "GAME OVER" in observation:
game_completed = True
if verbose:
print(f"\n{Colors.BOLD}{Colors.RED}*** GAME OVER ***{Colors.ENDC}")
break
return RunResult(
final_score=score,
max_score=max_score,
moves=moves,
locations_visited=self.visited_locations,
game_completed=game_completed,
history=self.history,
)
def _extract_result(self, result) -> str:
if hasattr(result, "content") and result.content:
return result.content[0].text
return str(result)
def _build_prompt(self, observation: str, history: list, hint: str) -> str:
"""Constructs a context-aware prompt."""
# Compress history
hist_str = ""
for i, (t, a, o) in enumerate(
history[max(0, len(history) - 5) :]
): # Only last 5
hist_str += f"- Action: {a}\n Result: {o[:200]}...\n"
return f"""
CURRENT GAME STATUS:
{observation}
Last 5 Actions and Observations:
{hist_str}
{hint}
Response Format:
THOUGHT: ...
TOOL: ...
ARGS: ...
"""
def _parse_response(self, response: str) -> tuple[str, str, dict]:
"""Robust parsing of LLM output."""
thought = "Thinking..."
tool_name = "play_action"
tool_args = {"action": "look"}
# Normalize
lines = response.strip().split("\n")
for line in lines:
line = line.strip()
if not line:
continue
if line.upper().startswith("THOUGHT:"):
thought = line[8:].strip()
elif line.upper().startswith("TOOL:"):
tool_name = line[5:].strip()
elif line.upper().startswith("ARGS:"):
args_str = line[5:].strip()
try:
# Try pure JSON
tool_args = json.loads(args_str)
except:
# Fallback for simple dict string like {'action': 'north'}
try:
# Replace single quotes with double for strict JSON
args_str_fixed = args_str.replace("'", '"')
tool_args = json.loads(args_str_fixed)
except:
# Fallback: assume it's just the action string or broken json
# If the tool is play_action, assume the rest of the line is the action
if tool_name == "play_action":
# Regex to extract value from {"action": "value"} or just "value"
match = re.search(r':\s*"([^"]+)"', args_str)
if match:
tool_args = {"action": match.group(1)}
else:
tool_args = {"action": args_str}
return thought, tool_name, tool_args
# =============================================================================
# Local Testing
# =============================================================================
async def test_agent():
"""Test the agent locally."""
from fastmcp import Client
server_path = "mcp_server.py"
agent = StudentAgent()
async with Client(server_path) as client:
result = await agent.run(client, "zork1", 20, 42, True)
print(
f"{Colors.BOLD}{Colors.HEADER}\nFinal Score: {result.final_score}{Colors.ENDC}"
)
if __name__ == "__main__":
import asyncio
asyncio.run(test_agent())