text-adventure-template

Sleeping

App Files Files Community

text-adventure-template / agent.py

flams

soumission zork

4f5b6ec about 2 months ago

raw

history blame contribute delete

19.1 kB

	"""
	MCP ReAct Agent - Enhanced Generalist

	Key improvements over v6:
	- Richer system prompt with strategy patterns for different game types
	- Stuck detection + automatic recovery (suggest_exploration, try new verbs)
	- Smarter history: shows failed actions to avoid repetition
	- Exit registration from game text (auto-detects mentioned directions)
	- Multi-phase play: explore → collect → solve → backtrack
	- Robust parsing with multiple fallback strategies
	"""

	import json
	import os
	import re
	from dataclasses import dataclass, field
	from typing import Optional

	from dotenv import load_dotenv
	from huggingface_hub import InferenceClient

	# Load environment variables
	load_dotenv()

	# Set USE_LOCAL_MODEL=1 in your .env to use a locally downloaded model
	USE_LOCAL_MODEL = os.getenv("USE_LOCAL_MODEL", "0").strip() in ("1", "true", "yes")
	LOCAL_MODEL_ID = os.getenv("LOCAL_MODEL_ID", "Qwen/Qwen2.5-3B-Instruct")

	# =============================================================================
	# LLM Configuration - DO NOT MODIFY
	# =============================================================================

	# Model to use (fixed for fair evaluation)
	LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"

	# Initialize the LLM client based on mode
	_local_pipeline = None

	if USE_LOCAL_MODEL:
	import torch
	from transformers import pipeline as _hf_pipeline

	_local_pipeline = _hf_pipeline(
	"text-generation",
	model=LOCAL_MODEL_ID,
	torch_dtype=torch.bfloat16,
	device_map="auto",
	)
	LLM_CLIENT = None
	else:
	_hf_token = os.getenv("HF_TOKEN")
	if not _hf_token:
	raise ValueError("HF_TOKEN not found. Set it in your .env file.")
	LLM_CLIENT = InferenceClient(token=_hf_token)


	def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str:
	"""
	Call the LLM with the given prompt. Use this function in your agent.

	Args:
	prompt: The user prompt (current game state, history, etc.)
	system_prompt: The system prompt (instructions for the agent)
	seed: Random seed for reproducibility
	max_tokens: Maximum tokens in response (default: 300)

	Returns:
	The LLM's response text
	"""
	messages = [
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": prompt},
	]

	if USE_LOCAL_MODEL and _local_pipeline is not None:
	outputs = _local_pipeline(
	messages,
	max_new_tokens=max_tokens,
	temperature=0.0001, # Near-deterministic (0.0 unsupported by some backends)
	do_sample=True,
	)
	return outputs[0]["generated_text"][-1]["content"]

	response = LLM_CLIENT.chat.completions.create(
	model=LLM_MODEL,
	messages=messages,
	temperature=0.0, # Deterministic for reproducibility
	max_tokens=max_tokens,
	seed=seed,
	)

	return response.choices[0].message.content


	@dataclass
	class RunResult:
	"""Result of running the agent. Do not modify this class."""
	final_score: int
	max_score: int
	moves: int
	locations_visited: set[str]
	game_completed: bool
	error: Optional[str] = None
	history: list[tuple[str, str, str]] = field(default_factory=list)


	# ─── System Prompt ─────────────────────────────────────────────────────────────
	SYSTEM_PROMPT = """You are an expert text adventure game player. You are methodical, curious, and never give up.

	AVAILABLE TOOLS:
	- play_action: Send a command to the game.
	ARGS: {"action": "your command"}
	For movement use direction words: north, south, east, west, up, down, in, out, ne, nw, se, sw
	For interactions: examine <thing>, take <item>, drop <item>, open <thing>, close <thing>,
	read <thing>, push <thing>, pull <thing>, turn <thing>, light <thing>, put <item> in <container>,
	unlock <door> with <key>, give <item> to <npc>, attack <enemy> with <weapon>, tie <item> to <thing>,
	climb <thing>, enter <thing>, search <thing>, listen, smell, wave <item>, eat <item>, drink <item>

	- think: Plan your strategy. ARGS: {"goal": "...", "thought": "..."}

	- notebook_write: Save clues, codes, puzzle info permanently.
	ARGS: {"text": "...", "category": "Clue\|Puzzle\|Item\|Danger\|NPC\|Code\|Goal\|Map"}

	- notebook_read: Read your saved notes. ARGS: {"keyword": "optional filter"}

	- memory: Full status dump (location, inventory, notes, map). ARGS: {}

	- get_map: View explored map and unexplored exits. ARGS: {}

	- find_path: Get directions to a known room. ARGS: {"target_room": "room name"}

	- suggest_exploration: Get suggestion for nearest unexplored area. ARGS: {}

	- register_exits: Record exits visible in current room.
	ARGS: {"directions": "north, south, up"}

	STRATEGY — How to play well:
	1. EXPLORE SYSTEMATICALLY: When you enter a new room, ALWAYS do "look" first, then register visible exits with register_exits. Explore every exit.
	2. EXAMINE EVERYTHING: If the game describes objects, furniture, or features — examine them. Things hide under rugs, inside containers, behind paintings.
	3. TAKE EVERYTHING: Collect all portable items. You'll need them later for puzzles.
	4. READ CAREFULLY: The game text contains ALL clues. Unusual descriptions often hint at puzzles.
	5. SAVE CLUES: If you notice a code, inscription, locked door, NPC request, or puzzle — write it in notebook_write immediately.
	6. DON'T REPEAT FAILURES: Check your recent history. If a command didn't work, try a DIFFERENT approach. Use synonyms: get/take, look/examine, push/move.
	7. BACKTRACK SMARTLY: If stuck, call suggest_exploration to find unexplored exits, or find_path to return to a room with unsolved puzzles.
	8. USE ITEMS: When you have items and encounter obstacles, think about which item might help. Try "use X", "put X in Y", "unlock Y with X".
	9. LISTEN AND SEARCH: "listen", "search", "look under X", "look behind X" often reveal hidden things.
	10. CHECK SCORE: If your score increases, you're making progress. If not for a while, try a new area.

	RESPONSE FORMAT (strict):
	THOUGHT: <brief reasoning about what you observe and your plan>
	TOOL: <exactly one tool name>
	ARGS: <valid JSON for that tool>

	Example:
	THOUGHT: I see a rusty door to the north and a brass lamp on the ground. I should take the lamp first.
	TOOL: play_action
	ARGS: {"action": "take lamp"}"""


	# ─── Directions mentioned in text ──────────────────────────────────────────────
	EXIT_PATTERN = re.compile(
	r"\b(north\|south\|east\|west\|up\|down\|northeast\|northwest\|southeast\|southwest)\b",
	re.IGNORECASE,
	)

	DIRECTION_SET = {
	"n",
	"s",
	"e",
	"w",
	"u",
	"d",
	"ne",
	"nw",
	"se",
	"sw",
	"north",
	"south",
	"east",
	"west",
	"up",
	"down",
	"northeast",
	"northwest",
	"southeast",
	"southwest",
	"in",
	"out",
	"enter",
	"exit",
	}


	class StudentAgent:
	def __init__(self):
	self.history: list[dict] = []
	self.score: int = 0
	self.max_score: int = 0
	self.location: str = "Unknown"
	self.locations_visited: set[str] = set()
	self.failed_actions: set[str] = set() # track "location:action" that failed
	self.consecutive_no_score: int = 0
	self.last_score: int = 0

	async def run(
	self, client, game: str, max_steps: int, seed: int, verbose: bool = False
	) -> RunResult:
	tools = await client.list_tools()
	tool_names = [t.name for t in tools]

	# Initial look
	result = await client.call_tool("play_action", {"action": "look"})
	observation = self._extract_result(result)
	self._update_state(observation)

	# Register initial exits
	exits = self._detect_exits(observation)
	if exits:
	try:
	await client.call_tool(
	"register_exits", {"directions": ", ".join(exits)}
	)
	except Exception:
	pass

	if verbose:
	print(f"\n{'=' * 60}\nINITIAL OBSERVATION:\n{observation}\n{'=' * 60}")

	step = 0
	for step in range(1, max_steps + 1):
	prompt = self._build_prompt(observation, step)
	response = call_llm(prompt, SYSTEM_PROMPT, seed + step, max_tokens=400)
	thought, tool_name, tool_args = self._parse_response(response, tool_names)

	if verbose:
	print(f"\n--- Step {step} ---")
	print(f" THOUGHT: {thought}")
	print(f" TOOL: {tool_name}({json.dumps(tool_args)})")

	try:
	result = await client.call_tool(tool_name, tool_args)
	observation = self._extract_result(result)
	except Exception as e:
	observation = f"Error: {e}"

	if verbose:
	obs_preview = observation[:400].replace("\n", "\n ")
	print(f" RESULT: {obs_preview}")

	self._update_state(observation)

	# Auto-register exits when we get a play_action result
	if tool_name == "play_action":
	exits = self._detect_exits(observation)
	if exits:
	try:
	await client.call_tool(
	"register_exits", {"directions": ", ".join(exits)}
	)
	except Exception:
	pass

	# Track failed movement
	action = tool_args.get("action", "").lower()
	if self._is_failure(observation):
	self.failed_actions.add(f"{self.location}:{action}")

	# Track score progress
	if self.score > self.last_score:
	self.consecutive_no_score = 0
	self.last_score = self.score
	else:
	self.consecutive_no_score += 1

	self.history.append(
	{
	"step": step,
	"thought": thought,
	"tool": tool_name,
	"args": tool_args,
	"result": observation[:200],
	"location": self.location,
	"score": self.score,
	}
	)

	if self._is_game_over(observation):
	break

	return RunResult(
	final_score=self.score,
	max_score=self.max_score,
	moves=step,
	locations_visited=self.locations_visited,
	game_completed=self._is_game_over(observation),
	error=None,
	history=[
	(h["tool"], json.dumps(h["args"]), h["result"]) for h in self.history
	],
	)

	def _build_prompt(self, observation: str, step: int) -> str:
	parts = []

	# Status line
	parts.append(
	f"[Step {step} \| Score: {self.score}/{self.max_score} \| "
	f"Location: {self.location} \| Rooms visited: {len(self.locations_visited)}]"
	)

	# Recent history (last 7 for better context)
	if self.history:
	parts.append("\nRecent history:")
	for h in self.history[-7:]:
	action_str = json.dumps(h["args"])
	loc = h.get("location", "?")
	result_short = h["result"].replace("\n", " ")[:80]
	parts.append(f" [{loc}] {h['tool']}({action_str}) -> {result_short}")

	# Failed actions at current location (helps avoid repetition)
	loc_failures = [
	a.split(":", 1)[1]
	for a in self.failed_actions
	if a.startswith(f"{self.location}:")
	]
	if loc_failures:
	parts.append(f"\nActions that FAILED here: {', '.join(loc_failures)}")

	# Stuck hint
	if self.consecutive_no_score > 8:
	parts.append(
	"\n[HINT: Score hasn't changed in a while. Consider: "
	"call suggest_exploration, check memory, examine objects more carefully, "
	"or try using inventory items on things you've seen.]"
	)

	# Current game output
	parts.append(f"\nGame output:\n{observation}")
	parts.append("\nWhat do you do next?")

	return "\n".join(parts)

	def _parse_response(
	self, response: str, valid_tools: list[str]
	) -> tuple[str, str, dict]:
	thought = "..."
	tool_name = "play_action"
	tool_args = {"action": "look"}

	lines = response.split("\n")
	args_lines = []
	collecting_args = False

	for line in lines:
	clean = line.strip()
	up = clean.upper()

	if up.startswith("THOUGHT:"):
	thought = clean.split(":", 1)[1].strip()
	collecting_args = False
	elif up.startswith("TOOL:"):
	raw_tool = clean.split(":", 1)[1].strip().lower().strip("`").strip()
	# Handle common LLM mistakes
	raw_tool = raw_tool.replace(" ", "_")
	if raw_tool in valid_tools:
	tool_name = raw_tool
	elif "play" in raw_tool or "action" in raw_tool:
	tool_name = "play_action"
	elif "note" in raw_tool and "write" in raw_tool:
	tool_name = "notebook_write"
	elif "note" in raw_tool and "read" in raw_tool:
	tool_name = "notebook_read"
	elif "note" in raw_tool:
	tool_name = "notebook_write"
	elif "map" in raw_tool:
	tool_name = "get_map"
	elif "path" in raw_tool:
	tool_name = "find_path"
	elif "suggest" in raw_tool or "explor" in raw_tool:
	tool_name = "suggest_exploration"
	elif "register" in raw_tool or "exit" in raw_tool:
	tool_name = "register_exits"
	collecting_args = False
	elif up.startswith("ARGS:"):
	raw = clean.split(":", 1)[1].strip()
	args_lines = [raw]
	collecting_args = True
	elif collecting_args and clean:
	args_lines.append(clean)

	# Parse ARGS
	if args_lines:
	raw_args = " ".join(args_lines)
	# Try direct JSON parse
	try:
	tool_args = json.loads(raw_args)
	except json.JSONDecodeError:
	# Try extracting JSON object
	m = re.search(r"\{[^{}]+\}", raw_args)
	if m:
	try:
	tool_args = json.loads(m.group())
	except json.JSONDecodeError:
	pass
	# Fallback: try extracting action string
	if tool_name == "play_action":
	m = re.search(r'"action"\s:\s"([^"]+)"', raw_args)
	if m:
	tool_args = {"action": m.group(1)}

	# ─── Fix play_action args ───
	if tool_name == "play_action":
	action = str(tool_args.get("action", "")).strip()
	# Merge split args (action + target/object)
	for extra_key in ("target", "object", "item", "direction"):
	extra = str(tool_args.get(extra_key, "")).strip()
	if extra and extra.lower() not in action.lower():
	action = f"{action} {extra}".strip()

	# Strip "go " prefix for bare directions
	if action.lower().startswith("go "):
	rest = action[3:].strip().lower()
	if rest in DIRECTION_SET:
	action = rest

	tool_args = {"action": action or "look"}

	# ─── Fix find_path args ───
	if tool_name == "find_path":
	# Normalize: the tool expects "target_room" not "to" or "room"
	for key in ("to", "room", "destination", "target"):
	if key in tool_args and "target_room" not in tool_args:
	tool_args["target_room"] = tool_args.pop(key)

	# Final validation
	if tool_name not in valid_tools:
	tool_name = "play_action"
	if "action" not in tool_args:
	tool_args = {"action": "look"}

	return thought, tool_name, tool_args

	def _extract_result(self, result) -> str:
	if hasattr(result, "content") and result.content:
	return result.content[0].text
	return str(result)

	def _update_state(self, text: str):
	m = re.search(r"Score:\s*(\d+)/(\d+)", text, re.IGNORECASE)
	if m:
	self.score = int(m.group(1))
	self.max_score = int(m.group(2))
	m_loc = re.search(r"\[Location:\s*([^\|\]]+)", text)
	if m_loc:
	loc = m_loc.group(1).strip()
	if loc and loc != "Unknown":
	self.location = loc
	self.locations_visited.add(loc)

	def _detect_exits(self, text: str) -> list[str]:
	"""Extract direction words mentioned in game text."""
	return list(set(EXIT_PATTERN.findall(text.lower())))

	def _is_failure(self, text: str) -> bool:
	"""Detect if the game rejected our action."""
	fail_phrases = [
	"you can't go",
	"you can't do",
	"i don't understand",
	"that's not a verb",
	"you don't see",
	"you can't see",
	"there's no",
	"you can't",
	"nothing happens",
	"is locked",
	"is closed",
	"won't budge",
	"doesn't seem to",
	"you aren't",
	]
	lower = text.lower()
	return any(f in lower for f in fail_phrases)

	def _is_game_over(self, text: str) -> bool:
	return any(
	x in text.lower()
	for x in [
	"* you have died *",
	"* you have won *",
	"game over",
	"you have won",
	"you have died",
	"would you like to restart",
	]
	)


	# =============================================================================
	# For local testing
	# =============================================================================

	async def test_agent():
	"""Test the agent locally."""
	from fastmcp import Client

	server_path = "mcp_server.py"
	agent = StudentAgent()

	async with Client(server_path) as client:
	result = await agent.run(
	client=client,
	game="zork1",
	max_steps=10,
	seed=42,
	verbose=True,
	)

	print(f"\nFinal Score: {result.final_score}")
	print(f"Moves: {result.moves}")
	print(f"Locations: {result.locations_visited}")


	if __name__ == "__main__":
	import asyncio
	asyncio.run(test_agent())