flams's picture
soumission zork
4f5b6ec
"""
MCP ReAct Agent - Enhanced Generalist
Key improvements over v6:
- Richer system prompt with strategy patterns for different game types
- Stuck detection + automatic recovery (suggest_exploration, try new verbs)
- Smarter history: shows failed actions to avoid repetition
- Exit registration from game text (auto-detects mentioned directions)
- Multi-phase play: explore β†’ collect β†’ solve β†’ backtrack
- Robust parsing with multiple fallback strategies
"""
import json
import os
import re
from dataclasses import dataclass, field
from typing import Optional
from dotenv import load_dotenv
from huggingface_hub import InferenceClient
# Load environment variables
load_dotenv()
# Set USE_LOCAL_MODEL=1 in your .env to use a locally downloaded model
USE_LOCAL_MODEL = os.getenv("USE_LOCAL_MODEL", "0").strip() in ("1", "true", "yes")
LOCAL_MODEL_ID = os.getenv("LOCAL_MODEL_ID", "Qwen/Qwen2.5-3B-Instruct")
# =============================================================================
# LLM Configuration - DO NOT MODIFY
# =============================================================================
# Model to use (fixed for fair evaluation)
LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"
# Initialize the LLM client based on mode
_local_pipeline = None
if USE_LOCAL_MODEL:
import torch
from transformers import pipeline as _hf_pipeline
_local_pipeline = _hf_pipeline(
"text-generation",
model=LOCAL_MODEL_ID,
torch_dtype=torch.bfloat16,
device_map="auto",
)
LLM_CLIENT = None
else:
_hf_token = os.getenv("HF_TOKEN")
if not _hf_token:
raise ValueError("HF_TOKEN not found. Set it in your .env file.")
LLM_CLIENT = InferenceClient(token=_hf_token)
def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str:
"""
Call the LLM with the given prompt. Use this function in your agent.
Args:
prompt: The user prompt (current game state, history, etc.)
system_prompt: The system prompt (instructions for the agent)
seed: Random seed for reproducibility
max_tokens: Maximum tokens in response (default: 300)
Returns:
The LLM's response text
"""
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt},
]
if USE_LOCAL_MODEL and _local_pipeline is not None:
outputs = _local_pipeline(
messages,
max_new_tokens=max_tokens,
temperature=0.0001, # Near-deterministic (0.0 unsupported by some backends)
do_sample=True,
)
return outputs[0]["generated_text"][-1]["content"]
response = LLM_CLIENT.chat.completions.create(
model=LLM_MODEL,
messages=messages,
temperature=0.0, # Deterministic for reproducibility
max_tokens=max_tokens,
seed=seed,
)
return response.choices[0].message.content
@dataclass
class RunResult:
"""Result of running the agent. Do not modify this class."""
final_score: int
max_score: int
moves: int
locations_visited: set[str]
game_completed: bool
error: Optional[str] = None
history: list[tuple[str, str, str]] = field(default_factory=list)
# ─── System Prompt ─────────────────────────────────────────────────────────────
SYSTEM_PROMPT = """You are an expert text adventure game player. You are methodical, curious, and never give up.
AVAILABLE TOOLS:
- play_action: Send a command to the game.
ARGS: {"action": "your command"}
For movement use direction words: north, south, east, west, up, down, in, out, ne, nw, se, sw
For interactions: examine <thing>, take <item>, drop <item>, open <thing>, close <thing>,
read <thing>, push <thing>, pull <thing>, turn <thing>, light <thing>, put <item> in <container>,
unlock <door> with <key>, give <item> to <npc>, attack <enemy> with <weapon>, tie <item> to <thing>,
climb <thing>, enter <thing>, search <thing>, listen, smell, wave <item>, eat <item>, drink <item>
- think: Plan your strategy. ARGS: {"goal": "...", "thought": "..."}
- notebook_write: Save clues, codes, puzzle info permanently.
ARGS: {"text": "...", "category": "Clue|Puzzle|Item|Danger|NPC|Code|Goal|Map"}
- notebook_read: Read your saved notes. ARGS: {"keyword": "optional filter"}
- memory: Full status dump (location, inventory, notes, map). ARGS: {}
- get_map: View explored map and unexplored exits. ARGS: {}
- find_path: Get directions to a known room. ARGS: {"target_room": "room name"}
- suggest_exploration: Get suggestion for nearest unexplored area. ARGS: {}
- register_exits: Record exits visible in current room.
ARGS: {"directions": "north, south, up"}
STRATEGY β€” How to play well:
1. EXPLORE SYSTEMATICALLY: When you enter a new room, ALWAYS do "look" first, then register visible exits with register_exits. Explore every exit.
2. EXAMINE EVERYTHING: If the game describes objects, furniture, or features β€” examine them. Things hide under rugs, inside containers, behind paintings.
3. TAKE EVERYTHING: Collect all portable items. You'll need them later for puzzles.
4. READ CAREFULLY: The game text contains ALL clues. Unusual descriptions often hint at puzzles.
5. SAVE CLUES: If you notice a code, inscription, locked door, NPC request, or puzzle β€” write it in notebook_write immediately.
6. DON'T REPEAT FAILURES: Check your recent history. If a command didn't work, try a DIFFERENT approach. Use synonyms: get/take, look/examine, push/move.
7. BACKTRACK SMARTLY: If stuck, call suggest_exploration to find unexplored exits, or find_path to return to a room with unsolved puzzles.
8. USE ITEMS: When you have items and encounter obstacles, think about which item might help. Try "use X", "put X in Y", "unlock Y with X".
9. LISTEN AND SEARCH: "listen", "search", "look under X", "look behind X" often reveal hidden things.
10. CHECK SCORE: If your score increases, you're making progress. If not for a while, try a new area.
RESPONSE FORMAT (strict):
THOUGHT: <brief reasoning about what you observe and your plan>
TOOL: <exactly one tool name>
ARGS: <valid JSON for that tool>
Example:
THOUGHT: I see a rusty door to the north and a brass lamp on the ground. I should take the lamp first.
TOOL: play_action
ARGS: {"action": "take lamp"}"""
# ─── Directions mentioned in text ──────────────────────────────────────────────
EXIT_PATTERN = re.compile(
r"\b(north|south|east|west|up|down|northeast|northwest|southeast|southwest)\b",
re.IGNORECASE,
)
DIRECTION_SET = {
"n",
"s",
"e",
"w",
"u",
"d",
"ne",
"nw",
"se",
"sw",
"north",
"south",
"east",
"west",
"up",
"down",
"northeast",
"northwest",
"southeast",
"southwest",
"in",
"out",
"enter",
"exit",
}
class StudentAgent:
def __init__(self):
self.history: list[dict] = []
self.score: int = 0
self.max_score: int = 0
self.location: str = "Unknown"
self.locations_visited: set[str] = set()
self.failed_actions: set[str] = set() # track "location:action" that failed
self.consecutive_no_score: int = 0
self.last_score: int = 0
async def run(
self, client, game: str, max_steps: int, seed: int, verbose: bool = False
) -> RunResult:
tools = await client.list_tools()
tool_names = [t.name for t in tools]
# Initial look
result = await client.call_tool("play_action", {"action": "look"})
observation = self._extract_result(result)
self._update_state(observation)
# Register initial exits
exits = self._detect_exits(observation)
if exits:
try:
await client.call_tool(
"register_exits", {"directions": ", ".join(exits)}
)
except Exception:
pass
if verbose:
print(f"\n{'=' * 60}\nINITIAL OBSERVATION:\n{observation}\n{'=' * 60}")
step = 0
for step in range(1, max_steps + 1):
prompt = self._build_prompt(observation, step)
response = call_llm(prompt, SYSTEM_PROMPT, seed + step, max_tokens=400)
thought, tool_name, tool_args = self._parse_response(response, tool_names)
if verbose:
print(f"\n--- Step {step} ---")
print(f" THOUGHT: {thought}")
print(f" TOOL: {tool_name}({json.dumps(tool_args)})")
try:
result = await client.call_tool(tool_name, tool_args)
observation = self._extract_result(result)
except Exception as e:
observation = f"Error: {e}"
if verbose:
obs_preview = observation[:400].replace("\n", "\n ")
print(f" RESULT: {obs_preview}")
self._update_state(observation)
# Auto-register exits when we get a play_action result
if tool_name == "play_action":
exits = self._detect_exits(observation)
if exits:
try:
await client.call_tool(
"register_exits", {"directions": ", ".join(exits)}
)
except Exception:
pass
# Track failed movement
action = tool_args.get("action", "").lower()
if self._is_failure(observation):
self.failed_actions.add(f"{self.location}:{action}")
# Track score progress
if self.score > self.last_score:
self.consecutive_no_score = 0
self.last_score = self.score
else:
self.consecutive_no_score += 1
self.history.append(
{
"step": step,
"thought": thought,
"tool": tool_name,
"args": tool_args,
"result": observation[:200],
"location": self.location,
"score": self.score,
}
)
if self._is_game_over(observation):
break
return RunResult(
final_score=self.score,
max_score=self.max_score,
moves=step,
locations_visited=self.locations_visited,
game_completed=self._is_game_over(observation),
error=None,
history=[
(h["tool"], json.dumps(h["args"]), h["result"]) for h in self.history
],
)
def _build_prompt(self, observation: str, step: int) -> str:
parts = []
# Status line
parts.append(
f"[Step {step} | Score: {self.score}/{self.max_score} | "
f"Location: {self.location} | Rooms visited: {len(self.locations_visited)}]"
)
# Recent history (last 7 for better context)
if self.history:
parts.append("\nRecent history:")
for h in self.history[-7:]:
action_str = json.dumps(h["args"])
loc = h.get("location", "?")
result_short = h["result"].replace("\n", " ")[:80]
parts.append(f" [{loc}] {h['tool']}({action_str}) -> {result_short}")
# Failed actions at current location (helps avoid repetition)
loc_failures = [
a.split(":", 1)[1]
for a in self.failed_actions
if a.startswith(f"{self.location}:")
]
if loc_failures:
parts.append(f"\nActions that FAILED here: {', '.join(loc_failures)}")
# Stuck hint
if self.consecutive_no_score > 8:
parts.append(
"\n[HINT: Score hasn't changed in a while. Consider: "
"call suggest_exploration, check memory, examine objects more carefully, "
"or try using inventory items on things you've seen.]"
)
# Current game output
parts.append(f"\nGame output:\n{observation}")
parts.append("\nWhat do you do next?")
return "\n".join(parts)
def _parse_response(
self, response: str, valid_tools: list[str]
) -> tuple[str, str, dict]:
thought = "..."
tool_name = "play_action"
tool_args = {"action": "look"}
lines = response.split("\n")
args_lines = []
collecting_args = False
for line in lines:
clean = line.strip()
up = clean.upper()
if up.startswith("THOUGHT:"):
thought = clean.split(":", 1)[1].strip()
collecting_args = False
elif up.startswith("TOOL:"):
raw_tool = clean.split(":", 1)[1].strip().lower().strip("`").strip()
# Handle common LLM mistakes
raw_tool = raw_tool.replace(" ", "_")
if raw_tool in valid_tools:
tool_name = raw_tool
elif "play" in raw_tool or "action" in raw_tool:
tool_name = "play_action"
elif "note" in raw_tool and "write" in raw_tool:
tool_name = "notebook_write"
elif "note" in raw_tool and "read" in raw_tool:
tool_name = "notebook_read"
elif "note" in raw_tool:
tool_name = "notebook_write"
elif "map" in raw_tool:
tool_name = "get_map"
elif "path" in raw_tool:
tool_name = "find_path"
elif "suggest" in raw_tool or "explor" in raw_tool:
tool_name = "suggest_exploration"
elif "register" in raw_tool or "exit" in raw_tool:
tool_name = "register_exits"
collecting_args = False
elif up.startswith("ARGS:"):
raw = clean.split(":", 1)[1].strip()
args_lines = [raw]
collecting_args = True
elif collecting_args and clean:
args_lines.append(clean)
# Parse ARGS
if args_lines:
raw_args = " ".join(args_lines)
# Try direct JSON parse
try:
tool_args = json.loads(raw_args)
except json.JSONDecodeError:
# Try extracting JSON object
m = re.search(r"\{[^{}]+\}", raw_args)
if m:
try:
tool_args = json.loads(m.group())
except json.JSONDecodeError:
pass
# Fallback: try extracting action string
if tool_name == "play_action":
m = re.search(r'"action"\s*:\s*"([^"]+)"', raw_args)
if m:
tool_args = {"action": m.group(1)}
# ─── Fix play_action args ───
if tool_name == "play_action":
action = str(tool_args.get("action", "")).strip()
# Merge split args (action + target/object)
for extra_key in ("target", "object", "item", "direction"):
extra = str(tool_args.get(extra_key, "")).strip()
if extra and extra.lower() not in action.lower():
action = f"{action} {extra}".strip()
# Strip "go " prefix for bare directions
if action.lower().startswith("go "):
rest = action[3:].strip().lower()
if rest in DIRECTION_SET:
action = rest
tool_args = {"action": action or "look"}
# ─── Fix find_path args ───
if tool_name == "find_path":
# Normalize: the tool expects "target_room" not "to" or "room"
for key in ("to", "room", "destination", "target"):
if key in tool_args and "target_room" not in tool_args:
tool_args["target_room"] = tool_args.pop(key)
# Final validation
if tool_name not in valid_tools:
tool_name = "play_action"
if "action" not in tool_args:
tool_args = {"action": "look"}
return thought, tool_name, tool_args
def _extract_result(self, result) -> str:
if hasattr(result, "content") and result.content:
return result.content[0].text
return str(result)
def _update_state(self, text: str):
m = re.search(r"Score:\s*(\d+)/(\d+)", text, re.IGNORECASE)
if m:
self.score = int(m.group(1))
self.max_score = int(m.group(2))
m_loc = re.search(r"\[Location:\s*([^|\]]+)", text)
if m_loc:
loc = m_loc.group(1).strip()
if loc and loc != "Unknown":
self.location = loc
self.locations_visited.add(loc)
def _detect_exits(self, text: str) -> list[str]:
"""Extract direction words mentioned in game text."""
return list(set(EXIT_PATTERN.findall(text.lower())))
def _is_failure(self, text: str) -> bool:
"""Detect if the game rejected our action."""
fail_phrases = [
"you can't go",
"you can't do",
"i don't understand",
"that's not a verb",
"you don't see",
"you can't see",
"there's no",
"you can't",
"nothing happens",
"is locked",
"is closed",
"won't budge",
"doesn't seem to",
"you aren't",
]
lower = text.lower()
return any(f in lower for f in fail_phrases)
def _is_game_over(self, text: str) -> bool:
return any(
x in text.lower()
for x in [
"*** you have died ***",
"*** you have won ***",
"game over",
"you have won",
"you have died",
"would you like to restart",
]
)
# =============================================================================
# For local testing
# =============================================================================
async def test_agent():
"""Test the agent locally."""
from fastmcp import Client
server_path = "mcp_server.py"
agent = StudentAgent()
async with Client(server_path) as client:
result = await agent.run(
client=client,
game="zork1",
max_steps=10,
seed=42,
verbose=True,
)
print(f"\nFinal Score: {result.final_score}")
print(f"Moves: {result.moves}")
print(f"Locations: {result.locations_visited}")
if __name__ == "__main__":
import asyncio
asyncio.run(test_agent())