text-adventure-template

Sleeping

App Files Files Community

text-adventure-template / agent.py

VZ22

cleaned and added README

748ada7 7 days ago

raw

history blame contribute delete

28.5 kB

	"""
	Example: MCP ReAct Agent

	A complete ReAct agent that uses MCP tools to play text adventure games.
	This is a working example students can learn from.
	"""

	import json
	import os
	import re
	from dataclasses import dataclass, field
	from typing import Optional
	import numpy as np

	from dotenv import load_dotenv
	from huggingface_hub import InferenceClient

	load_dotenv()

	# Set USE_LOCAL_MODEL=1 in your .env to use a locally downloaded model
	USE_LOCAL_MODEL = os.getenv("USE_LOCAL_MODEL", "0").strip() in ("1", "true", "yes")
	LOCAL_MODEL_ID = os.getenv("LOCAL_MODEL_ID", "Qwen/Qwen2.5-3B-Instruct")

	# =============================================================================
	# LLM Configuration - DO NOT MODIFY
	# =============================================================================

	LLM_MODEL ="Qwen/Qwen2.5-72B-Instruct"

	# Initialize the LLM client based on mode
	_local_pipeline = None

	if USE_LOCAL_MODEL:
	import torch
	from transformers import pipeline as _hf_pipeline

	_local_pipeline = _hf_pipeline(
	"text-generation",
	model=LOCAL_MODEL_ID,
	torch_dtype=torch.bfloat16,
	device_map="auto",
	)
	LLM_CLIENT = None
	else:
	_hf_token = os.getenv("HF_TOKEN")
	if not _hf_token:
	raise ValueError("HF_TOKEN not found. Set it in your .env file.")
	LLM_CLIENT = InferenceClient(token=_hf_token)

	llm_call_count = 0 # For tracking number of LLM calls (optional)

	def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str:
	"""
	Call the LLM with the given prompt. Use this function in your agent.

	Args:
	prompt: The user prompt (current game state, history, etc.)
	system_prompt: The system prompt (instructions for the agent)
	seed: Random seed for reproducibility
	max_tokens: Maximum tokens in response (default: 300)

	Returns:
	The LLM's response text

	Example:
	response = call_llm(
	prompt="You are in a forest. What do you do?",
	system_prompt=SYSTEM_PROMPT,
	seed=42,
	)
	"""
	global llm_call_count
	llm_call_count += 1
	messages = [
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": prompt},
	]

	if USE_LOCAL_MODEL and _local_pipeline is not None:
	outputs = _local_pipeline(
	messages,
	max_new_tokens=max_tokens,
	temperature=0.0001, # Near-deterministic (0.0 unsupported by some backends)
	do_sample=True,
	max_length=None,
	)
	return outputs[0]["generated_text"][-1]["content"]

	response = LLM_CLIENT.chat.completions.create(
	model=LLM_MODEL,
	messages=messages,
	temperature=0.0, # Deterministic for reproducibility
	max_tokens=max_tokens,
	seed=seed,
	)

	return response.choices[0].message.content

	def levenshtein(a,b,ratio=False,print_matrix=False,lowercase=False) :
	# code copied from https://github.com/jamfromouterspace/levenshtein/blob/master/levenshtein.py
	if type(a) != type('') :
	raise TypeError('First argument is not a string!')
	if type(b) != type('') :
	raise TypeError('Second argument is not a string!')
	if a == '' :
	return len(b)
	if b == '' :
	return len(a)
	if lowercase :
	a = a.lower()
	b = b.lower()

	n = len(a)
	m = len(b)
	lev = np.zeros((n+1,m+1))

	for i in range(0,n+1) :
	lev[i,0] = i
	for i in range(0,m+1) :
	lev[0,i] = i

	for i in range(1,n+1) :
	for j in range(1,m+1) :
	insertion = lev[i-1,j] + 1
	deletion = lev[i,j-1] + 1
	substitution = lev[i-1,j-1] + (1 if a[i-1]!= b[j-1] else 0)
	lev[i,j] = min(insertion,deletion,substitution)

	if print_matrix :
	print(lev)

	if ratio :
	return (n+m-lev[n,m])/(n+m)
	else :
	return lev[n,m]

	@dataclass
	class RunResult:
	"""Result of running the agent. Do not modify this class."""
	final_score: int
	max_score: int
	moves: int
	locations_visited: set[str]
	game_completed: bool
	error: Optional[str] = None
	history: list[tuple[str, str, str]] = field(default_factory=list)


	# =============================================================================
	# System Prompt
	# =============================================================================

	SYSTEM_PROMPT = """You are an expert text adventure game player. Your goal is to explore, collect treasures, and maximize your score.

	AVAILABLE TOOLS (use these via MCP):
	1. play_action - Execute game commands (north, take lamp, open mailbox, etc.)
	2. memory - Get current game state, score, and recent history
	3. get_map - See explored locations and connections
	4. inventory - Check what you're carrying

	VALID GAME COMMANDS for play_action:
	- Movement: north, south, east, west, up, down, enter, exit
	- Objects: take <item>, drop <item>, open <thing>, close <thing>, examine <thing>
	- Light: turn on lamp, turn off lamp
	- Combat: attack <enemy> with <weapon>
	- Other: inventory, look, read <thing>, wait, listen, look inside <container>, blow <object>, follow <creature>, climb <object>, drink <liquid>, eat <food>

	FORBIDDEN (will NOT work): check, inspect, search, grab, use, help

	RESPOND IN THIS EXACT FORMAT (no markdown):
	THOUGHT: <brief reasoning about what to do next>
	TOOL: <tool_name>
	ARGS: <JSON arguments>

	Examples:
	THOUGHT: I need to see what's around me.
	TOOL: play_action
	ARGS: {"action": "look"}

	THOUGHT: Let me check my current state and score.
	TOOL: memory
	ARGS: {}

	THOUGHT: The mailbox might contain something useful.
	TOOL: play_action
	ARGS: {"action": "open mailbox"}

	STRATEGY:
	1. Start by looking around and checking memory
	2. Examine everything - look at items, containers, and surroundings
	3. Explore systematically - try all directions
	4. Pick up useful items (lamp, sword, etc.)
	5. Open containers (mailbox, window, etc.)
	6. Use get_map to avoid getting lost
	"""


	# =============================================================================
	# Student Agent Implementation
	# =============================================================================

	class StudentAgent:
	"""
	MCP ReAct Agent - A complete working example.

	This agent demonstrates:
	- ReAct loop (Thought -> Tool -> Observation)
	- Loop detection
	- Action validation
	- Score tracking via memory tool
	"""

	def __init__(self):
	"""Initialize the agent state."""
	self.history: list[dict] = []
	self.score: int = 0
	self.history_state_tried_action = {}
	self.location_state = {} # to each location, we have a set of every observation made here

	self.idle_actions = ["listen", "wait", "diagnose", "yell", "pray", "launch", "take all"] # Actions that don't change location
	self.map_size = 20
	self.internal_map = [[["Unknown" for i in range(5)] for j in range(self.map_size)] for k in range(self.map_size)] # Internal map representation
	self.position = (self.map_size//2, self.map_size//2, 2)
	# Start at the middle in the internal map, we suppose the map is in 3D (taking into account up and down movements)

	self.directions = {"north": (0, -1, 0), "south": (0, 1, 0), "east": (1, 0, 0), "west": (-1, 0, 0), "up": (0, 0, 1), "down": (0, 0, -1),
	"northeast": (1, -1, 0), "northwest": (-1, -1, 0), "southeast": (1, 1, 0), "southwest": (-1, 1, 0)}

	async def run(
	self,
	client,
	game: str,
	max_steps: int,
	seed: int,
	verbose: bool = False,
	) -> RunResult:
	"""Run the agent for a game session."""
	locations_visited = set()
	history = []
	moves = 0

	# Get list of available tools
	tools = await client.list_tools()
	tool_names = [t.name for t in tools]

	# Get initial observation
	result = await client.call_tool("play_action", {"action": "look"})
	observation = self._extract_result(result)
	observation = observation.strip() if observation else "No observation"

	# Track initial location
	location = await client.call_tool("current_location", {})
	location = self._extract_result(location)
	locations_visited.add(location)

	if verbose:
	print(f"Starting game: {game}")
	print(f"\n{observation}")
	print(f"\nAvailable tools: {tool_names}")

	last_location = location
	current_location = last_location
	self.internal_map[self.position[0]][self.position[1]][self.position[2]] = current_location

	old_state = await client.call_tool("last_observation", {})
	old_state = self._extract_result(old_state)
	current_state = old_state
	tried_action_in_same_state = [("play_action", {"action": "look"})]
	self.location_state[current_location] = set()
	self.location_state[current_location].add(current_state)

	look_observation = observation.lower().strip()

	import pdb
	# Main ReAct loop
	for step in range(1, max_steps + 1):
	# Build prompt with context
	# pdb.set_trace()

	global llm_call_count
	if llm_call_count > 1.5*max_steps:
	if verbose:
	print(f"[WARNING] You've made {llm_call_count} LLM calls, which is quite high for {step} steps.")
	break

	old_state = current_state
	if current_location != last_location:
	print(f"[DEBUG] Moved to new location: {current_location}. Resetting tried actions for this state.")
	observation += f"\n[INFO] You have moved from {last_location} to a new location: {current_location}."
	if current_location in locations_visited and current_state in self.location_state.get(current_location, set()):
	observation += " You've been here before, read the observation carefully, is it new? If not return where you came."
	else:
	observation += " Be thourough, examine everything around you and try to find all treasures and points of interest! Also remember your objective"
	locations_visited.add(current_location)

	prompt = self._build_prompt(observation)
	prompt += self._look_for_neighboring_locations(prompt)
	prompt = self._add_useless_actions_to_prompt(prompt, tried_action_in_same_state)
	# Call LLM for reasoning (use step-based seed for variety)
	response = call_llm(prompt, SYSTEM_PROMPT, seed + step)

	# Parse the response
	thought, tool_name, tool_args = self._parse_response(response, tool_names, verbose)

	# Validate and fix common issues
	tool_name, tool_args = self._validate_tool_call(tool_name, tool_args, tool_names)

	loop_count = 0
	while (tool_name, tool_args) in tried_action_in_same_state or (tool_name == "memory" and step < 5) and loop_count < 5:
	loop_count += 1
	if (tool_name, tool_args) in tried_action_in_same_state:
	if verbose:
	print(f"[WARNING] You've been trying the same action {tool_name} with args {tool_args} in the same state without success.")
	new_prompt = prompt + response + "\n[WARNING: You've been trying the same action without success. Try a different approach!]"
	response = call_llm(new_prompt, SYSTEM_PROMPT, seed + step + 100)
	elif tool_name == "memory" and step < 5:
	if verbose:
	print("[INFO] Early in the game, it's better to explore than to check memory. Forcing an idle action to encourage exploration.")
	new_prompt = prompt + response + "\n[INFO: Early in the game, it's better to explore. Try something else!]"
	response = call_llm(new_prompt, SYSTEM_PROMPT, seed + step + 100)
	# Parse the response
	thought, tool_name, tool_args = self._parse_response(response, tool_names, verbose)

	# Validate and fix common issues
	tool_name, tool_args = self._validate_tool_call(tool_name, tool_args, tool_names)
	if verbose:
	print(f"[FINAL DECISION] {tool_name}({tool_args}) after {loop_count} loops to find a new action.")
	# Loop detection
	if tool_name == "play_action":
	action = tool_args.get("action", "look")

	# Detect loops
	if (len(tried_action_in_same_state) >= 5) and step % 3 == 0:
	actions_to_cycle = [a for a in self.idle_actions if ("play_action", {"action": a}) not in tried_action_in_same_state]
	actions_to_cycle = actions_to_cycle + [direction for direction in self.directions.keys() if ("play_action", {"action": direction}) not in tried_action_in_same_state]
	idx_random = (seed + 571*step) % len(actions_to_cycle)
	action_forced = actions_to_cycle[idx_random]
	if verbose:
	print(f"[WARNING] Loop detected - forcing an random action to break the cycle.")
	tool_args = {"action": action_forced} # Force an idle action

	moves += 1

	if verbose:
	print(f"\n--- Step {step} ---")
	print(f"[THOUGHT] {thought}")
	print(f"[TOOL] {tool_name}({tool_args})")

	not_new_state = False
	# Execute the tool
	try:
	result = await client.call_tool(tool_name, tool_args)
	observation = self._extract_result(result)

	# Look if we got the same observation as for a "look"
	current_obs = await client.call_tool("last_observation", {}) # observation also has the score
	current_obs = self._extract_result(current_obs)
	tried_action_in_same_state.append((tool_name, tool_args))

	if verbose:
	print(f"[RESULT] {observation}...")
	except Exception as e:
	observation = f"Error: {e}"
	if verbose:
	print(f"[ERROR] {e}")

	if tool_args.get("action", "").lower() == "look":
	look_observation = current_obs.lower()
	elif levenshtein(look_observation, current_obs, ratio=True) > 0.8:
	not_new_state = True

	# Track location
	location = await client.call_tool("current_location", {})
	location = self._extract_result(location)
	print(f"[DEBUG] Current location: {location}")
	last_location = current_location
	current_location = location
	if current_location != last_location:
	tried_action_in_same_state.pop() # If we moved, the action is not useless
	# Otherwise we might get stuck
	tried_action_in_same_state, current_state = self._update_history_state(tried_action_in_same_state, current_state, current_obs, verbose)

	# Update position
	action = tool_args.get("action", "").lower()
	direction_curr = ""
	directions_abreviations = {"n": "north", "s": "south", "e": "east", "w": "west", "u": "up", "d": "down",
	"ne": "northeast", "nw": "northwest", "se": "southeast", "sw": "southwest"}
	dx, dy, dz = 0, 0, 0
	if action in self.directions:
	dx, dy, dz = self.directions[action]
	direction_curr = action
	elif action in directions_abreviations:
	direction_curr = directions_abreviations[action]
	dx, dy, dz = self.directions[action]
	if direction_curr != "down" and direction_curr != "" and "fall down" in observation.lower():
	dz -= 1
	new_position = (self.position[0] + dx, self.position[1] + dy, self.position[2] + dz)
	if 0 <= new_position[0] < self.map_size and 0 <= new_position[1] < self.map_size and 0 <= new_position[2] < 5:
	if current_location != last_location:
	if verbose:
	print(f"[DEBUG] Moving {direction_curr} to new location on new position ({new_position}): {current_location}. Updating internal map.")
	self.internal_map[new_position[0]][new_position[1]][new_position[2]] = current_location
	elif new_position != self.position:
	self.internal_map[new_position[0]][new_position[1]][new_position[2]] = "Inaccessible"
	self.position = new_position
	else:
	print(f"[DEBUG] New position {new_position} is out of bounds. Not updating position.")
	# Update history
	self.history.append({
	"step": step,
	"thought": thought,
	"tool": tool_name,
	"args": tool_args,
	"result": observation[:200]
	})
	if len(self.history) > 10:
	self.history = self.history[-10:]

	# Track score from observation
	self._update_score(observation)

	# Record in result history
	history.append((thought, f"{tool_name}({tool_args})", observation))

	if "!" in observation.lower() and current_obs not in self.location_state.get(current_location, set()) and not not_new_state:
	# first time seeing this observation in this location and it has an exclamation mark, it might be important
	if verbose:
	print(f"[EXCLAMATION] The observation contains an exclamation mark, which might indicate an important event!")
	observation += " Something important just happened! Pay attention to this! If you are unsure of the action just do an idle action (look, listen, wait). "
	tried_action_in_same_state, current_state = self._update_history_state(tried_action_in_same_state, current_state, current_obs, verbose)

	if len(tried_action_in_same_state) > 5:
	observation += f"\n[INFO] You've tried {len(tried_action_in_same_state)} different actions in this state. Consider finding new locations to explore!"

	# Check for game over
	if self._is_game_over(observation):
	if verbose:
	print("\n* GAME OVER *")
	break

	if current_location in self.location_state:
	self.location_state[current_location].add(current_obs)
	else:
	self.location_state[current_location] = set([current_obs])

	print(f"\n[FINAL SCORE] {self.score} after {moves} moves and visiting {len(locations_visited)} locations.")
	print(f"The locations are: {', '.join(locations_visited)}")
	print(f"Have visited states: {len(self.history_state_tried_action)}")
	print(f"The sates are: \n {'\nState:\n'.join(list(self.history_state_tried_action.keys())[-5:])}")

	return RunResult(
	final_score=self.score,
	max_score=350,
	moves=moves,
	locations_visited=locations_visited,
	game_completed=self._is_game_over(observation),
	history=history,
	)

	def _build_prompt(self, observation: str) -> str:
	"""Build the prompt for the LLM with context."""
	parts = []

	parts.append(f"Current Score: {self.score}")

	# Recent history
	if self.history:
	parts.append("\nRecent actions:")
	for entry in self.history[-3:]:
	action = entry.get("args", {}).get("action", entry["tool"])
	result_short = entry["result"][:80] + "..." if len(entry["result"]) > 80 else entry["result"]
	parts.append(f" > {action} -> {result_short}")

	parts.append(f"\nCurrent situation:\n{observation}")
	parts.append("\nWhat do you do next?")

	return "\n".join(parts)

	def _update_history_state(self, current_action_state: list, current_state: str, new_state: str, verbose: bool) -> list:
	if verbose:
	print(f"[DEBUG] Updating history state.")
	self.history_state_tried_action[current_state] = current_action_state.copy()
	current_state = new_state
	if current_state not in self.history_state_tried_action:
	current_action_state = []
	neigh_coord = [(1,0,0), (-1,0,0), (0,1,0), (0,-1,0), (0,0,1), (0,0,-1), (1,1,0), (1,-1,0), (-1,1,0), (-1,-1,0)]
	for dx, dy, dz in neigh_coord:
	neighbor_pos = (self.position[0] + dx, self.position[1] + dy, self.position[2] + dz)
	if 0 <= neighbor_pos[0] < self.map_size and 0 <= neighbor_pos[1] < self.map_size and 0 <= neighbor_pos[2] < self.map_size:
	neighbor_location = self.internal_map[neighbor_pos[0]][neighbor_pos[1]][neighbor_pos[2]]
	if neighbor_location != "Inaccessible":
	# After a new state, the map might be updated with new information
	self.internal_map[neighbor_pos[0]][neighbor_pos[1]][neighbor_pos[2]] = "Unknown"
	else:
	current_action_state = self.history_state_tried_action[current_state].copy()
	return current_action_state, current_state

	def _find_location(self, observation: str, default: str) -> str:
	"""Extract location from observation."""
	paragraphs = observation.split("\n")
	for para in paragraphs:
	if not ("." in para or "!" in para or "?" in para or "[" in para) and para.strip() != "":
	return para.strip()
	return default

	def _add_useless_actions_to_prompt(self, prompt: str, useless_actions: list) -> str:
	s = "You have tried these actions in the same state, DO NOT REPEAT THESE ACTIONS:"
	for t, a in useless_actions:
	s += f"> {t}({a}) "
	new_prompt = prompt + f"\n[INFO: Recent tried actions in this state:{s}] \n You've tried these actions multiple times. BE CREATIVE and consider trying something different!]"
	return new_prompt

	def _look_for_neighboring_locations(self, prompt:str) -> list[str]:
	s = "[INFO] Our neighbors are: "

	for dir, (dx, dy, dz) in self.directions.items():
	neighbor_pos = (self.position[0] + dx, self.position[1] + dy, self.position[2] + dz)
	if 0 <= neighbor_pos[0] < self.map_size and 0 <= neighbor_pos[1] < self.map_size and 0 <= neighbor_pos[2] < self.map_size:
	if self.internal_map[neighbor_pos[0]][neighbor_pos[1]][neighbor_pos[2]] != "Unknown":
	s += f"<{dir}> ({self.internal_map[neighbor_pos[0]][neighbor_pos[1]][neighbor_pos[2]]}), "
	else:
	s += f"<{dir}> (Unknown), "
	return s

	def _parse_response(self, response: str, valid_tools: list[str], verbose:bool) -> tuple[str, str, dict]:
	"""Parse the LLM response to extract thought, tool, and arguments."""
	thought = "No reasoning provided"
	tool_name = "play_action"
	tool_args = {"action": "look"}
	lines = response.strip().split("\n")

	for line in lines:
	line_clean = line.strip()
	line_upper = line_clean.upper()

	if line_upper.startswith("THOUGHT:"):
	thought = line_clean.split(":", 1)[1].strip()

	elif line_upper.startswith("TOOL:"):
	raw_tool = line_clean.split(":", 1)[1].strip().lower()
	raw_tool = raw_tool.replace("*", "").replace("", "").replace("`", "")
	raw_tool = raw_tool.split()[0] if raw_tool else "play_action"
	tool_name = raw_tool

	elif line_upper.startswith("ARGS:"):
	args_part = line_clean.split(":", 1)[1].strip()
	try:
	args_part = args_part.replace("'", '"')
	tool_args = json.loads(args_part)
	except json.JSONDecodeError:
	match = re.search(r'"action"\s:\s"([^"]+)"', args_part)
	if match:
	tool_args = {"action": match.group(1)}
	else:
	tool_args = {"action": "look"}

	return thought, tool_name, tool_args

	def _validate_tool_call(self, tool_name: str, tool_args: dict, valid_tools: list[str]) -> tuple[str, dict]:
	"""Validate and fix common tool call issues."""
	# Fix tool name
	if tool_name not in valid_tools:
	if tool_name in ["action", "do", "command"]:
	tool_name = "play_action"
	elif tool_name in ["map", "location"]:
	tool_name = "get_map"
	elif tool_name in ["mem", "state", "status"]:
	tool_name = "memory"
	elif tool_name in ["inv", "items"]:
	tool_name = "inventory"
	else:
	tool_name = "play_action"

	# Fix action verbs
	if tool_name == "play_action":
	action = tool_args.get("action", "look")

	invalid_verb_map = {
	"check": "examine",
	"inspect": "examine",
	"search": "look",
	"grab": "take",
	"pick": "take",
	"use": "examine",
	"investigate": "examine",
	"look around": "look",
	}

	words = action.lower().split()
	if words and words[0] in invalid_verb_map:
	words[0] = invalid_verb_map[words[0]]
	action = " ".join(words)

	if "go" in action:
	action = action.split(" ", 1)[-1] # Take the direction after "go"

	action = action.lower().strip()
	action = action.replace("*", "").replace("", "").replace("`", "")
	action = " ".join(action.split())

	tool_args["action"] = action

	return tool_name, tool_args

	def _extract_result(self, result) -> str:
	"""Extract text from MCP tool result."""
	if hasattr(result, 'content') and result.content:
	return result.content[0].text
	if isinstance(result, list) and result:
	return result[0].text if hasattr(result[0], 'text') else str(result[0])
	return str(result)

	def _update_score(self, text: str) -> None:
	"""Update score from game text."""
	patterns = [
	r'Score:\s*(\d+)',
	r'score[:\s]+(\d+)',
	r'\[Score:\s*(\d+)',
	]

	for pattern in patterns:
	match = re.search(pattern, text, re.IGNORECASE)
	if match:
	self.score = max(self.score, int(match.group(1)))

	def _is_game_over(self, text: str) -> bool:
	"""Check if the game is over."""
	game_over_phrases = [
	"game over",
	"you have died",
	"you are dead",
	"* you have died *",
	]
	text_lower = text.lower()
	return any(phrase in text_lower for phrase in game_over_phrases)


	# =============================================================================
	# Local Testing
	# =============================================================================

	async def test_agent():
	"""Test the agent locally."""
	from fastmcp import Client

	agent = StudentAgent()

	async with Client("mcp_server.py") as client:
	result = await agent.run(
	client=client,
	game="zork1",
	max_steps=20,
	seed=42,
	verbose=True,
	)

	print(f"\n{'=' * 50}")
	print(f"Final Score: {result.final_score}")
	print(f"Moves: {result.moves}")
	print(f"Locations: {len(result.locations_visited)}")


	if __name__ == "__main__":
	import asyncio
	asyncio.run(test_agent())