Spaces:

mcngnt
/

text-adventure-agent

Sleeping

App Files Files Community

text-adventure-agent / agent.py

mcngnt

Final submission

b41e35c 9 days ago

raw

history blame contribute delete

14 kB

	"""
	Example: MCP ReAct Agent

	A complete ReAct agent that uses MCP tools to play text adventure games.
	This is a working example students can learn from.
	"""

	import json
	import os
	import re
	from dataclasses import dataclass, field
	from typing import Optional

	from dotenv import load_dotenv
	from huggingface_hub import InferenceClient

	load_dotenv()

	# =============================================================================
	# LLM Configuration - DO NOT MODIFY
	# =============================================================================

	LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"

	_hf_token = os.getenv("HF_TOKEN")
	if not _hf_token:
	raise ValueError("HF_TOKEN not found. Set it in your .env file.")

	LLM_CLIENT = InferenceClient(token=_hf_token)


	def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str:
	"""
	Call the LLM with the given prompt. Use this function in your agent.

	Args:
	prompt: The user prompt (current game state, history, etc.)
	system_prompt: The system prompt (instructions for the agent)
	seed: Random seed for reproducibility
	max_tokens: Maximum tokens in response (default: 300)

	Returns:
	The LLM's response text

	Example:
	response = call_llm(
	prompt="You are in a forest. What do you do?",
	system_prompt=SYSTEM_PROMPT,
	seed=42,
	)
	"""
	messages = [
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": prompt},
	]

	response = LLM_CLIENT.chat.completions.create(
	model=LLM_MODEL,
	messages=messages,
	temperature=0.0, # Deterministic for reproducibility
	max_tokens=max_tokens,
	seed=seed,
	)

	return response.choices[0].message.content


	@dataclass
	class RunResult:
	"""Result of running the agent. Do not modify this class."""
	final_score: int
	max_score: int
	moves: int
	locations_visited: set[str]
	game_completed: bool
	error: Optional[str] = None
	history: list[tuple[str, str, str]] = field(default_factory=list)


	# =============================================================================
	# System Prompt
	# =============================================================================

	SYSTEM_PROMPT = """You are an expert text adventure game player. Your goal is to explore, collect treasures, and interact with your environment.

	VALID GAME COMMANDS:
	- Movement: north, south, east, west, northeast, northwest, southwest, southeast, up, down, enter, exit
	- Objects: take <item>, drop <item>, open <thing>, close <thing>, examine <thing>
	- Light: turn on lamp, turn off lamp
	- Combat: attack <enemy> with <weapon>, hit <thing>
	- Other: read <thing>, wait, shout
	- Communication: ask <person> about <thing>, tell <person> about <thing>

	FORBIDDEN (will NOT work): check, inspect, search, grab, use, help, turn on torch

	RESPOND IN THIS EXACT FORMAT (no markdown):
	POSSIBLE ACTIONS: <list all reasonable actions possible in this situation>
	THOUGHT: <brief reasoning about what to do next>
	ACTION: <action_name>

	Examples:
	POSSIBLE ACTIONS: take sword, examine sword, east, south, ask villager about dragon
	THOUGHT: I need to take the sword.
	ACTION: take sword

	POSSIBLE ACTIONS: examine mailbox, open mailbox, take mailbox, north, east, shout
	THOUGHT: The mailbox might contain something useful.
	ACTION: open mailbox

	STRATEGY:
	- Pick up any useful items (lamp, sword, pig, etc.). You do not have to take an item if it is already in your inventory.
	- Explore as much as possible : prioritize moving to examining
	- Examine objects only when they seem very interesting and if you are stuck
	- Open containers (mailbox, window, etc.)
	- Make sure you have a light source if you need to explore dark areas
	- Prioritize movements over examining random things
	- PAY ATTENTION to actions you have already done in your situation (for instance, try every possible direction mentioned in the situation)

	DO NOT repeat the same action endlessly."""


	# =============================================================================
	# Student Agent Implementation
	# =============================================================================

	class StudentAgent:
	"""
	MCP ReAct Agent - A complete working example.

	This agent demonstrates:
	- ReAct loop (Thought -> Tool -> Observation)
	- Loop detection
	- Action validation
	- Score tracking via memory tool
	"""

	def __init__(self):
	"""Initialize the agent state."""
	self.history: list[dict] = []
	self.recent_actions: list[str] = []
	self.score: int = 0
	self.location_moves : dict = {}

	async def run(
	self,
	client,
	game: str,
	max_steps: int,
	seed: int,
	verbose: bool = False,
	) -> RunResult:
	"""Run the agent for a game session."""
	locations_visited = set()
	history = []
	moves = 0


	# Get initial observation
	result = await client.call_tool("play_action", {"action": "look"})
	observation = self._extract_result(result)

	# Track initial location
	location = observation.split("\n")[0] if observation else "Unknown"
	locations_visited.add(location)

	if verbose:
	print(f"\n{observation}")

	# Main ReAct loop
	for step in range(1, max_steps + 1):

	inventory_result = await client.call_tool("play_action", {"action": "inventory"})
	inventory = self._extract_result(inventory_result)
	look_result = await client.call_tool("play_action", {"action": "look"})
	look = self._extract_result(look_result)
	listen_result = await client.call_tool("play_action", {"action": "listen"})
	listen = self._extract_result(listen_result)
	prompt = self._build_prompt(observation, inventory, look, listen, self.location_moves.get(look, []))


	score_result = await client.call_tool("get_score", {})
	score = self._extract_result(score_result)

	# print(f"SCORE : {score}")
	# print("-"*10)
	# print(prompt)
	# print("-"*10)

	# Call LLM for reasoning (use step-based seed for variety)
	response = call_llm(prompt, SYSTEM_PROMPT, seed + step)

	# Parse the response
	thought, action = self._parse_response(response)

	if verbose:
	print(f"\n--- Step {step} ---")
	print(f"[THOUGHT] {thought}")
	print(f"[ACTION] {action}")

	action = self._validate_action_call(action)

	moves += 1

	try:
	result = await client.call_tool("play_action", {"action": action})
	observation = self._extract_result(result)

	self.location_moves.setdefault(look, []).append(action)

	if verbose:
	print(f"[RESULT] {observation[:200]}...")
	except Exception as e:
	observation = f"Error: {e}"
	if verbose:
	print(f"[ERROR] {e}")

	# Track location
	location = observation.split("\n")[0] if observation else "Unknown"
	locations_visited.add(location)

	# Update history
	self.history.append({
	"step": step,
	"thought": thought,
	"action": action,
	"result": observation[:200]
	})

	# Track score from observation
	self._update_score(observation)


	# Check for game over
	if self._is_game_over(observation):
	if verbose:
	print("\n* GAME OVER *")
	break

	return RunResult(
	final_score=self.score,
	max_score=350,
	moves=moves,
	locations_visited=locations_visited,
	game_completed=self._is_game_over(observation),
	history=history,
	)

	def _build_prompt(self, observation: str, inventory:str, look:str, listen:str, location_moves : list[str]) -> str:
	"""Build the prompt for the LLM with context."""
	parts = []


	parts.append(f"Inventory :{inventory}")

	base_reason_size = 100
	short_reason_size = 40
	base_size_threshold = 3
	short_size_threshold = 10
	action_threshold = 30

	# Recent history
	if self.history:
	nb = 0
	parts.append("\nRecent actions -> Recent Results:")
	for entry in self.history[-2::-1]:
	reason_size = base_reason_size
	if nb >= base_size_threshold:
	reason_size = short_reason_size
	action = entry.get("action")
	result_short = entry["result"][:reason_size] + "..." if len(entry["result"]) > reason_size else entry["result"]
	if nb >= short_size_threshold:
	parts.append(f"> {action}")
	else:
	parts.append(f"> {action} -> {result_short}")
	if nb >= action_threshold:
	break
	nb += 1

	if self.recent_actions and len(set(self.recent_actions[-3:])) == 1:
	parts.append(f"\n[WARNING: You've been doing '{self.recent_actions[-1]}' repeatedly. TRY SOMETHING DIFFERENT!]")


	parts.append(f"Here is your current situation :{look}")

	parts.append(f"Here is what you hear in this situation :{listen}")

	parts.append(f"Here are the actions you have already tried in this situation, avoid repeating if possible (but do not hallucinate directions or objects) : {",".join(location_moves) if len(location_moves) > 0 else "none"}")

	parts.append(f"\nResult of your most recent action ({self.history[-1].get("action") if self.history else ""}):\n{observation}")
	parts.append("\nWhat do you do next?")

	return "\n".join(parts)

	def _parse_response(self, response: str) -> tuple[str, str, dict]:
	"""Parse the LLM response to extract thought, tool, and arguments."""
	thought = "No reasoning provided"
	action = "look"

	lines = response.strip().split("\n")

	for line in lines:
	line_clean = line.strip()
	line_upper = line_clean.upper()

	if line_upper.startswith("THOUGHT:"):
	thought = line_clean.split(":", 1)[1].strip()

	elif line_upper.startswith("ACTION:"):
	raw_action = line_clean.split(":", 1)[1].strip().lower()
	raw_action = raw_action.replace("*", "").replace("", "").replace("`", "")
	action = raw_action if raw_action else "look"

	return thought, action

	def _validate_action_call(self, action: str) -> tuple[str, dict]:
	"""Validate and fix common tool call issues."""

	invalid_verb_map = {
	"check": "examine",
	"inspect": "examine",
	"search": "look",
	"grab": "take",
	"pick": "take",
	"use": "examine",
	"investigate": "examine",
	}

	words = action.lower().split()
	if words and words[0] in invalid_verb_map:
	words[0] = invalid_verb_map[words[0]]
	action = " ".join(words)

	action = action.lower().strip()
	action = action.replace("*", "").replace("", "").replace("`", "")
	action = " ".join(action.split())

	return action

	def _extract_result(self, result) -> str:
	"""Extract text from MCP tool result."""
	if hasattr(result, 'content') and result.content:
	return result.content[0].text
	if isinstance(result, list) and result:
	return result[0].text if hasattr(result[0], 'text') else str(result[0])
	return str(result)

	def _update_score(self, text: str) -> None:
	"""Update score from game text."""
	patterns = [
	r'Score:\s*(\d+)',
	r'score[:\s]+(\d+)',
	r'\[Score:\s*(\d+)',
	]

	for pattern in patterns:
	match = re.search(pattern, text, re.IGNORECASE)
	if match:
	self.score = max(self.score, int(match.group(1)))

	def _is_game_over(self, text: str) -> bool:
	"""Check if the game is over."""
	game_over_phrases = [
	"game over",
	"you have died",
	"you are dead",
	"* you have died *",
	]
	text_lower = text.lower()
	return any(phrase in text_lower for phrase in game_over_phrases)


	# =============================================================================
	# Local Testing
	# =============================================================================

	async def test_agent():
	"""Test the agent locally."""
	from fastmcp import Client

	agent = StudentAgent()

	async with Client("mcp_server.py") as client:
	result = await agent.run(
	client=client,
	game="zork1",
	max_steps=20,
	seed=42,
	verbose=True,
	)

	print(f"\n{'=' * 50}")
	print(f"Final Score: {result.final_score}")
	print(f"Moves: {result.moves}")
	print(f"Locations: {len(result.locations_visited)}")


	if __name__ == "__main__":
	import asyncio
	asyncio.run(test_agent())