AgenticZork / agent.py
gbl1357's picture
Update agent.py
d86f7b8 verified
"""
Optimized MCP ReAct Agent for Generalized Text Adventures
Designed to maximize score across 51 Jericho games.
"""
import json
import os
import re
import sys
from dataclasses import dataclass, field
from pathlib import Path
from typing import Optional, Any
from dotenv import load_dotenv
from huggingface_hub import InferenceClient
load_dotenv()
LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"
_hf_token = os.getenv("HF_TOKEN")
if not _hf_token:
raise ValueError("HF_TOKEN not found. Set it in your .env file.")
LLM_CLIENT = InferenceClient(token=_hf_token)
def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str:
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt},
]
response = LLM_CLIENT.chat.completions.create(
model=LLM_MODEL,
messages=messages,
temperature=0.0,
max_tokens=max_tokens,
seed=seed,
)
return response.choices[0].message.content
@dataclass
class RunResult:
final_score: int
max_score: int
moves: int
locations_visited: set[str]
game_completed: bool
error: Optional[str] = None
history: list[tuple[str, str, str]] = field(default_factory=list)
# Highly optimized, generalized prompt for text adventure heuristics
SYSTEM_PROMPT = """You are an expert AI agent playing a classic text adventure game. Your ultimate goal is to MAXIMIZE YOUR SCORE. To get points, you must explore, interact with objects, solve puzzles, and collect treasures.
AVAILABLE TOOLS:
1. play_action - Execute game commands (e.g., 'north', 'take lamp', 'examine door')
2. memory - Get current game state, score, and recent history
3. get_map - See explored locations and connections
4. inventory - Check what you're carrying
UNIVERSAL HEURISTICS FOR SCORING POINTS:
1. TAKE EVERYTHING: If a room description mentions an item, your FIRST action should be "take <item>" or "take all".
2. EXAMINE NOUNS: If you enter a room and see an object (e.g., a rug, a tree, a button), "examine <object>" to find hidden clues.
3. OPEN CONTAINERS: If you see a door, window, box, chest, or mailbox, try to "open <object>".
4. EXPLORE UNMAPPED AREAS: Try compass directions (n, s, e, w, u, d, ne, nw, se, sw) to find new rooms.
5. NEVER PING-PONG: Do not walk back and forth between two rooms (e.g., going East, then immediately West) unless you hit a dead end.
6. LEARN FROM FAILURE: If a command says "You can't do that" or "I don't understand", NEVER try that exact command again.
7. USE INVENTORY: If you are stuck, check your inventory. Try to "wear", "eat", "turn on", or "unlock <object> with <item>".
RESPOND IN THIS EXACT FORMAT:
THOUGHT: <Identify nouns/objects in the room description to interact with, OR decide which unexplored direction to take>
TOOL: <tool_name>
ARGS: <JSON arguments>
Example of excellent gameplay:
THOUGHT: The description mentions a mailbox. I should open it to see if there is a treasure or clue inside.
TOOL: play_action
ARGS: {"action": "open mailbox"}
"""
class StudentAgent:
def __init__(self, logger: Any = None, enable_logging: bool = False):
self.history: list[dict] = []
self.recent_actions: list[str] = []
self.score: int = 0
self.failed_actions: dict[str, int] = {}
self.locations_explored: set[str] = set()
self.unexplored_directions: list[str] = []
self.steps_since_map_check: int = 0
self.steps_since_progress: int = 0
self.current_map: Optional[str] = None
self.walkthrough_hints: Optional[list[str]] = None
self.logger = None
self.current_inventory: list[str] = []
self.last_direction_moved: Optional[str] = None
async def run(
self, client, game: str, max_steps: int, seed: int, verbose: bool = False, walkthrough: Optional[list[str]] = None
) -> RunResult:
locations_visited = set()
history = []
moves = 0
self.walkthrough_hints = walkthrough
tools = await client.list_tools()
tool_names = [t.name for t in tools]
inv_result = await client.call_tool("inventory", {})
self.current_inventory = self._parse_inventory(self._extract_result(inv_result))
result = await client.call_tool("play_action", {"action": "look"})
observation = self._extract_result(result)
location = "Start"
location = self._extract_location(observation, location)
locations_visited.add(location)
self.locations_explored.add(location)
self.unexplored_directions = ["north", "south", "east", "west", "up", "down", "ne", "nw", "se", "sw"]
if verbose: print(f"\n{observation}")
for step in range(1, max_steps + 1):
self.steps_since_map_check += 1
if self.steps_since_map_check >= 6 or self.steps_since_progress > 3:
map_result = await client.call_tool("get_map", {})
self.current_map = self._extract_result(map_result)
self.steps_since_map_check = 0
prompt = self._build_prompt(observation)
response = call_llm(prompt, SYSTEM_PROMPT, seed + step)
thought, tool_name, tool_args = self._parse_response(response, tool_names)
if verbose:
print(f"\n--- Step {step} ---")
print(f"[THOUGHT] {thought}")
print(f"[TOOL] {tool_name}({tool_args})")
tool_name, tool_args = self._validate_tool_call(tool_name, tool_args, tool_names)
if tool_name == "play_action":
action = tool_args.get("action", "look")
self.recent_actions.append(action)
if len(self.recent_actions) > 5: self.recent_actions = self.recent_actions[-5:]
# Severe anti-loop detection
if len(self.recent_actions) >= 3 and len(set(self.recent_actions[-3:])) == 1:
if self.unexplored_directions:
action = self.unexplored_directions.pop(0)
tool_args = {"action": action}
else:
tool_args = {"action": "look"}
self.recent_actions[-1] = tool_args["action"]
# Track last movement to prevent immediate backtracking
move_cmds = {"north":"south", "south":"north", "east":"west", "west":"east", "up":"down", "down":"up"}
if action in move_cmds:
self.last_direction_moved = action
elif action not in move_cmds.values():
self.last_direction_moved = None
moves += 1
try:
result = await client.call_tool(tool_name, tool_args)
observation = self._extract_result(result)
if tool_name == "inventory":
self.current_inventory = self._parse_inventory(observation)
except Exception as e:
observation = f"Error: {e}"
new_location = self._extract_location(observation, location)
old_score = self.score
self._update_score(observation)
# Check for TRUE progress (New room or more points)
is_new_room = new_location not in self.locations_explored
if is_new_room or self.score > old_score:
self.steps_since_progress = 0 # Only reset if we actually achieve something new!
else:
self.steps_since_progress += 1
# Always update location tracking
if new_location != location:
location = new_location
locations_visited.add(location)
if is_new_room:
self.locations_explored.add(location)
# Reset unexplored directions for the new room
self.unexplored_directions = ["north", "south", "east", "west", "up", "down", "ne", "nw", "se", "sw"]
# Track failed actions to avoid repeating them
if tool_name == "play_action":
action = tool_args.get("action", "look")
failure_phrases = ["can't", "cannot", "don't", "not", "fail", "impossible", "doesn't work", "not allowed", "look dark", "i don't understand", "no such"]
if any(phrase in observation.lower() for phrase in failure_phrases):
self.failed_actions[action] = self.failed_actions.get(action, 0) + 1
if verbose: print(f"[LOCATION] {location} | Score: {self.score} | Explored: {len(self.locations_explored)} | Progress Steps: {self.steps_since_progress}")
self.history.append({
"step": step, "thought": thought, "tool": tool_name, "args": tool_args,
"result": observation[:200], "location": location, "score": self.score
})
if len(self.history) > 10: self.history = self.history[-10:]
history.append((thought, f"{tool_name}({tool_args})", observation[:100]))
if self._is_game_over(observation):
if verbose: print("\n*** GAME OVER ***")
break
return RunResult(
final_score=self.score, max_score=350, moves=moves,
locations_visited=locations_visited, game_completed=self._is_game_over(observation), history=history
)
def _extract_location(self, observation: str, current_location: str = "Unknown") -> str:
if not observation: return current_location
ignore_phrases = ["you can't go", "you cannot go", "impenetrable", "nothing special", "doesn't seem to work", "i don't understand", "it's pitch black", "locked", "closed", "inventory:", "valid actions:", "there is no", "you hear", "you are empty-handed", "already", "that's not", "what do you want to", "i see no", "failed"]
lines = observation.strip().split('\n')
for line in lines:
line = line.strip()
line_lower = line.lower()
if not line or line.startswith('['): continue
if any(phrase in line_lower for phrase in ignore_phrases): continue
if line.endswith('.') and len(line.split()) > 3: continue
return line
return current_location
def _build_prompt(self, observation: str) -> str:
parts = [f"Current Score: {self.score}", f"Locations explored: {len(self.locations_explored)}"]
if self.history:
parts.append("\nRecent actions:")
for entry in self.history[-3:]:
action = entry.get("args", {}).get("action", entry["tool"])
res = entry["result"].replace('\n', ' ')
res_short = res[:80] + "..." if len(res) > 80 else res
parts.append(f" > {action} -> {res_short}")
# Dynamic State Injection
if self.steps_since_progress == 0 and observation != "Unknown" and len(self.history) > 0:
parts.append("\n[TACTICAL ADVICE: You just discovered a new area!]")
parts.append("1. DO NOT move to another room yet.")
parts.append("2. Look closely at the description below. Are there any objects mentioned? (e.g., mailbox, chest, sword)")
parts.append("3. If yes, you MUST try to 'take', 'open', or 'examine' them right now.")
elif self.steps_since_progress > 3:
parts.append(f"\n[CRITICAL WARNING: You have made {self.steps_since_progress} moves with NO score increase and NO NEW ROOMS.]")
parts.append("You are walking in circles through already-explored areas. STOP WANDERING.")
parts.append("To break out of this loop, you MUST do one of the following:")
parts.append(" 1. Call the 'get_map' tool to see which directions you haven't tried yet.")
parts.append(" 2. Move in a completely unexplored direction (n, s, e, w, u, d).")
parts.append(" 3. Examine or interact with an object you previously ignored.")
# Warn about failed actions
if self.failed_actions:
failed_list = [f"'{k}'" for k, v in self.failed_actions.items() if v >= 2]
if failed_list: parts.append(f"\n[AVOID: These actions do not work here: {', '.join(failed_list)}]")
parts.append(f"\nCurrent situation:\n{observation}\n\nWhat do you do next?")
return "\n".join(parts)
def _parse_response(self, response: str, valid_tools: list[str]) -> tuple[str, str, dict]:
thought, tool_name, tool_args = "No reasoning provided", "play_action", {"action": "look"}
for line in response.strip().split("\n"):
line_clean = line.strip()
line_upper = line_clean.upper()
if line_upper.startswith("THOUGHT:"): thought = line_clean.split(":", 1)[1].strip()
elif line_upper.startswith("TOOL:"):
raw = line_clean.split(":", 1)[1].strip().lower().replace("**", "").replace("*", "").replace("`", "")
tool_name = raw.split()[0] if raw else "play_action"
elif line_upper.startswith("ARGS:"):
args_part = line_clean.split(":", 1)[1].strip()
try: tool_args = json.loads(args_part.replace("'", '"'))
except:
match = re.search(r'"action"\s*:\s*"([^"]+)"', args_part)
tool_args = {"action": match.group(1)} if match else {"action": "look"}
return thought, tool_name, tool_args
def _validate_tool_call(self, tool_name: str, tool_args: dict, valid_tools: list[str]) -> tuple[str, dict]:
if tool_name not in valid_tools: tool_name = "play_action"
if tool_name == "play_action":
action = tool_args.get("action", "look").lower().strip().replace("**", "")
# Map bad verbs to Z-Machine standard verbs
verb_map = {"check": "examine", "inspect": "examine", "investigate": "examine", "grab": "take", "pick up": "take"}
words = action.split()
if words and words[0] in verb_map:
words[0] = verb_map[words[0]]
action = " ".join(words)
# Prevent immediate backtracking (ping-ponging)
reverse_dirs = {"north":"south", "south":"north", "east":"west", "west":"east", "up":"down", "down":"up"}
if self.last_direction_moved and action == reverse_dirs.get(self.last_direction_moved):
if self.unexplored_directions:
action = self.unexplored_directions.pop(0) # Force a different direction
# Prevent repeating failed actions
if action in self.failed_actions and self.failed_actions[action] >= 2:
action = self.unexplored_directions.pop(0) if self.unexplored_directions else "look"
tool_args["action"] = action
return tool_name, tool_args
def _extract_result(self, result) -> str:
if hasattr(result, 'content') and result.content: return result.content[0].text
if isinstance(result, list) and result: return result[0].text if hasattr(result[0], 'text') else str(result[0])
return str(result)
def _update_score(self, text: str) -> None:
for pattern in [r'Score:\s*(\d+)', r'score[:\s]+(\d+)', r'\[Score:\s*(\d+)']:
match = re.search(pattern, text, re.IGNORECASE)
if match: self.score = max(self.score, int(match.group(1)))
def _is_game_over(self, text: str) -> bool:
return any(phrase in text.lower() for phrase in ["game over", "you have died", "you are dead", "*** you have died ***"])
def _parse_inventory(self, inv_text: str) -> list[str]:
if "empty-handed" in inv_text.lower() or "nothing" in inv_text.lower(): return []
if ":" in inv_text: return [item.strip() for item in inv_text.split(":", 1)[1].strip().split(",") if item.strip()]
return []
async def test_agent():
from fastmcp import Client
agent = StudentAgent()
async with Client("mcp_server.py") as client:
result = await agent.run(client=client, game="zork1", max_steps=40, seed=42, verbose=True)
print(f"\n{'=' * 50}\nFinal Score: {result.final_score}\nMoves: {result.moves}\nLocations: {len(result.locations_visited)}")
if __name__ == "__main__":
import asyncio
asyncio.run(test_agent())