Spaces:

armandblin
/

text-adventure-agent

Sleeping

Armand BLIN

feat(server): new logic and report

8f05d7e 2 months ago

15 kB

	"""
	Student Agent for Text Adventure Games

	This is your submission file. Implement the StudentAgent class to play
	text adventure games using the MCP server you also implement.

	Your agent should:
	1. Connect to the MCP server via the provided client
	2. Use the ReAct pattern (Thought -> Action -> Observation)
	3. Call MCP tools to interact with the game
	4. Maximize the game score within the step limit
	"""

	import json
	import os
	import re
	from dataclasses import dataclass, field
	from typing import Optional

	from dotenv import load_dotenv
	from huggingface_hub import InferenceClient

	# Load environment variables
	load_dotenv()

	# =============================================================================
	# LLM Configuration
	# =============================================================================

	LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"
	_hf_token = os.getenv("HF_TOKEN")
	if not _hf_token:
	raise ValueError("HF_TOKEN not found. Set it in your .env file.")

	LLM_CLIENT = InferenceClient(token=_hf_token)


	def call_llm(
	prompt: str,
	system_prompt: str,
	seed: int,
	max_tokens: int = 300,
	) -> str:
	"""Call the LLM with the given prompt."""
	messages = [
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": prompt},
	]

	try:
	response = LLM_CLIENT.chat.completions.create(
	model=LLM_MODEL,
	messages=messages,
	temperature=0.0,
	max_tokens=max_tokens,
	seed=seed,
	)
	except Exception as e:
	print(f"Error calling LLM: {e}")
	raise e
	else:
	return response.choices[0].message.content


	@dataclass
	class RunResult:
	"""Result of running the agent."""

	final_score: int
	max_score: int
	moves: int
	locations_visited: set[str]
	game_completed: bool
	error: Optional[str] = None
	history: list[tuple[str, str, str]] = field(default_factory=list)


	# =============================================================================
	# Console Formatting
	# =============================================================================


	class Colors:
	"""ANSI Escape codes for pretty console output."""

	HEADER = "\033[95m"
	BLUE = "\033[94m"
	CYAN = "\033[96m"
	GREEN = "\033[92m"
	YELLOW = "\033[93m"
	RED = "\033[91m"
	BOLD = "\033[1m"
	UNDERLINE = "\033[4m"
	ENDC = "\033[0m"


	# =============================================================================
	# System Prompt
	# =============================================================================

	SUMMURIZER_SYSTEM_PROMPT = """
	You are helping an agent to play a text adventure game. To help this agent you should return a good summary of the current observation and the history of the agent's actions and observations.
	If applicable, use the history to give potential next best actions in few sentences (max 250 words). If you think the agent is stuck, you should give a hint to the agent to get unstuck.

	Here are some hints to help the agent:
	The first hint you should give is: looking in, under, on, or behind stuff, it could be useful to get points and can help get unstuck. Try to make the model look in stuff that could contain coins, keys, etc.
	Also if you are in room with a window, you should look out of the window to see if there is a way to get out of the room.
	You should make sure to examine the whole room before moving to another direction.

	You should tell sometimes to use `get_history` or `get_valid_actions` to get the history of the agent's actions and observations by location or the list of valid actions you can perform in the current location. This could be useful to get unstuck.

	YOUR RESPONSE FORMAT SHOULD BE:
	SUMMARY: <summary of the prompt>
	NEXT BEST ACTIONS: <list of potential next best actions in few sentences (max 250 words)>
	"""

	SYSTEM_PROMPT = """
	You are an expert Text Adventure Game Agent.
	Your objective is to MAXIMIZE SCORE and COMPLETE THE GAME by navigating the world and solving puzzles.

	### TOOL PROTOCOLS AND SYNTAX

	#### LIST OF AVAILABLE TOOLS:
	1. play_action (TAKES ARGS: {"action": "<command>"}) - Execute game commands (north, take lamp, open mailbox, etc.)
	2. get_history (TAKES NO ARGS) - Get the history of your actions and observations by location. Very important!
	3. get_valid_actions (TAKES NO ARGS) - Get the list of valid actions you can perform in the current location. Very important!
	4. explore_surroundings (TAKES NO ARGS)- Explore the surroundings of the current location. Very important!

	* Strict Syntax: The game parser is old and strict. Do not use adjectives or articles unless necessary.
	* Bad: "pick up the shiny egg", "examine the old mailbox"
	* Good: "take egg", "examine mailbox", "look in fountain"
	* Validator: Before attempting complex interactions, ALWAYS check `get_valid_actions`.
	* If the tool returns "take egg", you must send exactly "take egg" to the `play_action` tool.
	* Execution: execute commands using `play_action(action="<command>")`.

	* Map & History: Use `get_history` to track your path and the history of your actions and observations by location.
	* Loop Avoidance: If you have visited a room 3 times with no progress, navigate a different direction immediately.
	* Dead Ends: Never retry a direction that previously returned "You can't go that way".
	* Surveying: Use `explore_surroundings` to identify exits and descriptions.
	* Example of Movement commands: `go north`, `go northeast`, `go northwest`, `go south`, `go southeast`, `go southwest`, `go east`, `go west`, `go up`, `go down`, `enter`, `exit`, etc.

	* Kleptomania: If an item is portable, `take` it. Inventory items are crucial for puzzles.
	* Investigation Loop:
	1. Examine: Use `examine <object>` on every new object with the `play_action` tool.
	2. Look In: Use `look in <object>` for containers with the `play_action` tool. Note: "Examine" and "Look In" are distinct commands.
	3. Inventory: Your inventory is pass to you at each step, you should use it to your advantage.

	### OUTPUT FORMAT
	You operate in a strict Thought-Action loop. You must output in this format:

	THOUGHT: <Step-by-step reasoning. Reference valid actions and history. Do not repeat the same action multiple times in a row.>
	TOOL: <tool_name>
	ARGS: <JSON arguments>
	"""


	# =============================================================================
	# Student Agent
	# =============================================================================


	class StudentAgent:
	"""
	A robust ReAct agent for text adventure games.
	Features:
	- Loop detection
	- Valid action prioritization
	- Robust parsing
	"""

	def __init__(self):
	self.history = []
	self.visited_locations = set()
	self.location_counts = {} # Track how often we visit a room ID

	async def get_score(self, client) -> int:
	"""Get the current score."""
	response = await client.call_tool("get_score", None)
	return int(self._extract_result(response))

	async def run(
	self,
	client,
	game: str,
	max_steps: int,
	seed: int,
	verbose: bool = False,
	) -> RunResult:

	# 1. Initialization
	try:
	# Start by looking
	result = await client.call_tool("play_action", {"action": "look"})
	observation = self._extract_result(result)
	except Exception as e:
	observation = f"Error starting game: {e}"

	score = await self.get_score(client)
	max_score = score
	moves = 0
	game_completed = False

	for step in range(1, max_steps + 1):
	if verbose:
	print(
	f"\n{Colors.BOLD}{Colors.CYAN}--- Step {step} \| Score: {score} \| Moves: {moves} \| # Locations Visited: {len(self.visited_locations)} ---{Colors.ENDC}"
	)

	# Get current user location
	current_loc_line = observation.strip().split("\n")[0]
	self.location_counts[current_loc_line] = (
	self.location_counts.get(current_loc_line, 0) + 1
	)

	# 3. Construct Dynamic Prompt
	# If we are visiting a place too often or stuck, inject a hint
	hint = "You could use the `explore_surroundings` tool to explore the surroundings of the current location."
	if self.location_counts[current_loc_line] > 3:
	hint = f"\n[SYSTEM HINT]: You have been in '{current_loc_line}' {self.location_counts[current_loc_line]} times. Stop looping. Go somewhere new."

	# Build prompt with context
	prompt = self._build_prompt(observation, self.history, hint)
	# Get current inventory and pass it to the summurizer
	inventory_response = await client.call_tool("inventory", {})
	inventory = self._extract_result(inventory_response)

	# Let's summurize the prompt with another LLM call that could hint to the next best action
	prompt += f"\n\nCURRENT INVENTORY: {inventory}"

	summary_response = call_llm(
	prompt,
	SUMMURIZER_SYSTEM_PROMPT,
	seed,
	max_tokens=256,
	)
	summary = self._extract_result(summary_response)
	if verbose:
	print(
	f"{Colors.BOLD}{Colors.YELLOW}SYSTEM SUMMARY:{Colors.ENDC}\n{summary}"
	)

	# Call reasoning LLM to take the next best action
	response = call_llm(summary, SYSTEM_PROMPT, seed, max_tokens=256)
	thought, tool_name, tool_args = self._parse_response(response)

	if verbose:
	# BOLD YELLOW for Thought
	print(f"{Colors.BOLD}{Colors.YELLOW}THOUGHT:{Colors.ENDC} {thought}")
	# BOLD GREEN for Tool/Args
	print(f"{Colors.BOLD}{Colors.GREEN}TOOL:{Colors.ENDC} {tool_name}")
	print(f"{Colors.BOLD}{Colors.GREEN}ARGS:{Colors.ENDC} {tool_args}")

	# 6. Execute
	try:
	raw_result = await client.call_tool(tool_name, tool_args)
	observation = self._extract_result(raw_result)
	if tool_name == "get_valid_actions":
	valid_actions_result = observation

	if verbose:
	print(
	f"{Colors.BOLD}{Colors.BLUE}OBSERVATION:{Colors.ENDC}\n{observation.strip()}"
	)

	except Exception as e:
	observation = f"Tool Execution Error: {e}"
	if verbose:
	print(f"{Colors.BOLD}{Colors.RED}ERROR:{Colors.ENDC} {e}")

	current_score = await self.get_score(client)
	if current_score != score:
	print(
	f"{Colors.BOLD}{Colors.GREEN}SYSTEM:{Colors.ENDC} NEW SCORE: {current_score} (+{current_score - score})"
	)
	score = current_score
	max_score = max(max_score, score)

	# Update history
	self.history.append(
	(thought, f"{tool_name}{tool_args}", observation)
	) # Truncate obs for history to save context
	self.visited_locations.add(current_loc_line)
	moves += 1

	if "GAME OVER" in observation:
	game_completed = True
	if verbose:
	print(f"\n{Colors.BOLD}{Colors.RED}* GAME OVER *{Colors.ENDC}")
	break

	return RunResult(
	final_score=score,
	max_score=max_score,
	moves=moves,
	locations_visited=self.visited_locations,
	game_completed=game_completed,
	history=self.history,
	)

	def _extract_result(self, result) -> str:
	if hasattr(result, "content") and result.content:
	return result.content[0].text
	return str(result)

	def _build_prompt(self, observation: str, history: list, hint: str) -> str:
	"""Constructs a context-aware prompt."""

	# Compress history
	hist_str = ""
	for i, (t, a, o) in enumerate(
	history[max(0, len(history) - 5) :]
	): # Only last 5
	hist_str += f"- Action: {a}\n Result: {o[:200]}...\n"

	return f"""
	CURRENT GAME STATUS:
	{observation}

	Last 5 Actions and Observations:
	{hist_str}

	{hint}

	Response Format:
	THOUGHT: ...
	TOOL: ...
	ARGS: ...
	"""

	def _parse_response(self, response: str) -> tuple[str, str, dict]:
	"""Robust parsing of LLM output."""
	thought = "Thinking..."
	tool_name = "play_action"
	tool_args = {"action": "look"}

	# Normalize
	lines = response.strip().split("\n")

	for line in lines:
	line = line.strip()
	if not line:
	continue

	if line.upper().startswith("THOUGHT:"):
	thought = line[8:].strip()
	elif line.upper().startswith("TOOL:"):
	tool_name = line[5:].strip()
	elif line.upper().startswith("ARGS:"):
	args_str = line[5:].strip()
	try:
	# Try pure JSON
	tool_args = json.loads(args_str)
	except:
	# Fallback for simple dict string like {'action': 'north'}
	try:
	# Replace single quotes with double for strict JSON
	args_str_fixed = args_str.replace("'", '"')
	tool_args = json.loads(args_str_fixed)
	except:
	# Fallback: assume it's just the action string or broken json
	# If the tool is play_action, assume the rest of the line is the action
	if tool_name == "play_action":
	# Regex to extract value from {"action": "value"} or just "value"
	match = re.search(r':\s*"([^"]+)"', args_str)
	if match:
	tool_args = {"action": match.group(1)}
	else:
	tool_args = {"action": args_str}

	return thought, tool_name, tool_args


	# =============================================================================
	# Local Testing
	# =============================================================================


	async def test_agent():
	"""Test the agent locally."""
	from fastmcp import Client

	server_path = "mcp_server.py"
	agent = StudentAgent()
	async with Client(server_path) as client:
	result = await agent.run(client, "zork1", 20, 42, True)
	print(
	f"{Colors.BOLD}{Colors.HEADER}\nFinal Score: {result.final_score}{Colors.ENDC}"
	)


	if __name__ == "__main__":
	import asyncio

	asyncio.run(test_agent())