minhdc's picture
Add agent submission
5f4400e
"""
Student Agent for Text Adventure Games
This is your submission file. Implement the StudentAgent class to play
text adventure games using the MCP server you also implement.
Your agent should:
1. Connect to the MCP server via the provided client
2. Use the ReAct pattern (Thought -> Action -> Observation)
3. Call MCP tools to interact with the game
4. Maximize the game score within the step limit
Required method:
async def run(self, client, game, max_steps, seed, verbose) -> RunResult
The 'client' is a FastMCP Client already connected to your MCP server.
Use it to call tools like: await client.call_tool("play_action", {"action": "look"})
Tips:
- Start by looking around and understanding your environment
- Keep track of visited locations to avoid loops
- Pick up useful items (lamp, sword, etc.)
- The seed parameter should be used to set your LLM's seed for reproducibility
"""
import json
import os
import re
from dataclasses import dataclass, field
from typing import Optional
from dotenv import load_dotenv
from huggingface_hub import InferenceClient
# Load environment variables
load_dotenv()
# =============================================================================
# LLM Configuration - DO NOT MODIFY
# =============================================================================
# Model to use (fixed for fair evaluation)
LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"
# Initialize the LLM client (uses HF_TOKEN from environment)
_hf_token = os.getenv("HF_TOKEN")
if not _hf_token:
raise ValueError("HF_TOKEN not found. Set it in your .env file.")
LLM_CLIENT = InferenceClient(token=_hf_token)
def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str:
"""
Call the LLM with the given prompt. Use this function in your agent.
Args:
prompt: The user prompt (current game state, history, etc.)
system_prompt: The system prompt (instructions for the agent)
seed: Random seed for reproducibility
max_tokens: Maximum tokens in response (default: 300)
Returns:
The LLM's response text
Example:
response = call_llm(
prompt="You are in a forest. What do you do?",
system_prompt=SYSTEM_PROMPT,
seed=42,
)
"""
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt},
]
response = LLM_CLIENT.chat.completions.create(
model=LLM_MODEL,
messages=messages,
temperature=0.0, # Deterministic for reproducibility
max_tokens=max_tokens,
seed=seed,
)
return response.choices[0].message.content
@dataclass
class RunResult:
"""Result of running the agent. Do not modify this class."""
final_score: int
max_score: int
moves: int
locations_visited: set[str]
game_completed: bool
error: Optional[str] = None
history: list[tuple[str, str, str]] = field(default_factory=list)
# =============================================================================
# System Prompt - Customize this for your agent
# =============================================================================
SYSTEM_PROMPT = """You are an expert AI agent playing a classic text adventure game. Your mission: explore the world, solve puzzles, collect treasures, and maximize your score through careful observation and strategic play.
AVAILABLE TOOLS (use via MCP):
- play_action: Execute a game command (the primary tool for interacting with the game)
- memory: Get current game state summary (location, score, recent actions, failed actions)
- inventory: Check what you're carrying
- get_map: See explored locations and connections (use to avoid getting lost)
- get_valid_actions: Get a list of likely valid actions in the current state
VALID GAME COMMANDS for play_action:
- Movement: north, south, east, west, up, down, enter, exit, climb
- Objects: take <item>, drop <item>, open <thing>, close <thing>, examine <thing>
- Light: turn on lamp, extinguish candle
- Combat: attack <enemy> with <weapon>, kill <enemy> with <weapon>
- Item use: put <item> in <container>, give <item> to <npc>, turn on <item>
- Other: look, inventory, read <thing>, wait, push <thing>, pull <thing>
- Multi-object: take all, drop all, take lamp, sword
- NPC: give <item> to <npc>, ask <npc> about <topic>
FORBIDDEN COMMANDS (parser won't recognize): check, inspect, search, grab, use, help
CRITICAL RULES:
1. Distinguish failure types:
- Hard failure ("can't go", "wall", "I don't understand"): STOP retrying after 2 attempts
- Puzzle feedback (unusual responses, state changes): Continue with DIFFERENT approaches
- Soft rejection ("too dark", "locked"): Solve the prerequisite first
2. One command per turn: Issue a single game command
3. Discovery-based play: Solve through observation and experimentation
4. Combat priority: During combat, ONLY use combat actions. No examining!
ITEM STRATEGY (VERY IMPORTANT):
- After picking up an item, THINK about what it could be used for:
* Light sources (lamp, lantern, torch) -> turn on before dark areas
* Weapons (sword, knife, axe) -> attack enemies with them
* Keys/tools -> open locked doors/containers
* Food/drink -> give to NPCs or eat/drink when needed
* Treasures (gold, jewels, trophy) -> may need to be placed somewhere for points
* Rope/ladder -> climb or tie to access new areas
- When you encounter an obstacle, ALWAYS check your inventory for a relevant item:
* Locked door? -> Do I have a key?
* Dark room? -> Do I have a lamp? Turn it on!
* Enemy? -> Do I have a weapon? Attack with it!
* NPC wants something? -> Do I have it in inventory?
* Container/receptacle? -> Try putting relevant items in it
- EXAMINE items you pick up - the description often hints at their use
- Try using items on things in the environment: "put X in Y", "give X to Y", "unlock door with key"
EXPLORATION STRATEGY:
1. New location -> look -> note features -> check exits -> try promising directions
2. Examine interesting objects (every noun could be interactive)
3. Pick up useful items (light sources, weapons, keys, treasures)
4. Open containers (mailbox, chest, door, window)
5. Use get_map to avoid getting lost
6. Turn on lamp before entering dark areas!
7. When stuck: check inventory for unused items, then try get_valid_actions
PUZZLE-SOLVING:
- FIRST check inventory - do you have an item that could help?
- Standard actions first (examine, take, open)
- Try items on obstacles: "unlock X with key", "cut X with sword", "light X with lamp"
- Environmental clues: read room descriptions for hints about what items to use
- Multi-step chains: get item -> prepare it -> use it at the right location
RESPOND IN THIS EXACT FORMAT (no markdown, no code blocks):
THOUGHT: <your reasoning - what you observe, plan, and why. If you have items, consider how they might help.>
TOOL: <tool_name>
ARGS: <JSON arguments>
Examples:
THOUGHT: I just arrived at a new location. I should look around to understand my surroundings.
TOOL: play_action
ARGS: {"action": "look"}
THOUGHT: It's dark here and I have a lamp in my inventory. I need to turn it on to see.
TOOL: play_action
ARGS: {"action": "turn on lamp"}
THOUGHT: There's a locked door and I picked up a key earlier. Let me try using it.
TOOL: play_action
ARGS: {"action": "unlock door with key"}
THOUGHT: The troll is blocking my way and I have a sword. I should attack it.
TOOL: play_action
ARGS: {"action": "attack troll with sword"}
THOUGHT: I'm stuck and haven't used several items. Let me check what I'm carrying.
TOOL: inventory
ARGS: {}
ANTI-PATTERNS TO AVOID:
- Picking up items and NEVER using them
- Ignoring inventory when stuck at a puzzle
- Repeating the EXACT same action after a hard failure
- Checking inventory during combat
- Using forbidden verbs (check, inspect, search, grab, use)
- Staying in one location too long without making progress
DO NOT repeat the same action multiple times. If stuck, CHECK YOUR INVENTORY for items that might help, then try something different or move to a new area."""
# =============================================================================
# Student Agent - IMPLEMENT THIS CLASS
# =============================================================================
class StudentAgent:
"""
ReAct agent implementation inspired by ZorkGPT architecture.
Features:
- ReAct loop (Thought -> Tool -> Observation)
- Loop detection (repeated actions, action cycling)
- Action validation and cleaning
- Score tracking from game responses
- Contextual prompt building with history
- Game-agnostic design
"""
def __init__(self):
"""Initialize agent state tracking."""
self.history: list[dict] = [] # Full action history
self.recent_actions: list[str] = [] # Last N actions for loop detection
self.score: int = 0
self.max_score: int = 0
self.tool_names: list[str] = []
# Per-location tracking
self.actions_by_location: dict[str, list[str]] = {} # location -> [actions tried]
self.failed_actions_by_location: dict[str, set[str]] = {} # location -> {failed actions}
self.current_location: str = ""
self.turns_at_location: int = 0
self.turns_since_score_change: int = 0
# Inventory tracking
self.known_inventory: list[str] = [] # Items we know we're carrying
self.last_inventory_check: int = 0 # Step when we last checked inventory
self.items_used: set[str] = set() # Items we've tried using
self.items_examined: set[str] = set() # Items we've examined
async def run(
self,
client, # FastMCP Client connected to your MCP server
game: str,
max_steps: int,
seed: int,
verbose: bool = False,
) -> RunResult:
"""
Run the agent for a game session using the ReAct pattern.
"""
locations_visited = set() # Text-based (unique first lines) - for professor's metric
game_locations_visited = set() # Jericho real rooms - for debugging
history = [] # (thought, action, result) tuples for RunResult
moves = 0
game_over = False
# Get available tools from the MCP server
tools = await client.list_tools()
self.tool_names = [t.name for t in tools]
# Get initial observation
result = await client.call_tool("play_action", {"action": "look"})
observation = self._extract_result(result)
# Track initial location (both systems)
location = self._extract_location(observation)
obs_location = self._extract_observation_location(observation)
game_locations_visited.add(location)
locations_visited.add(obs_location)
self.current_location = location
self._update_score(observation)
if verbose:
print(f"\n=== Starting {game} ===")
print(f"{observation}\n")
# Main ReAct loop
for step in range(1, max_steps + 1):
# Periodically refresh inventory (every 10 steps or when we just picked something up)
if step - self.last_inventory_check >= 10 or self._just_picked_up_item(observation):
try:
inv_result = await client.call_tool("inventory", {})
inv_text = self._extract_result(inv_result)
self._parse_inventory(inv_text)
self.last_inventory_check = step
except Exception:
pass
# Track item pickups from observation
self._track_item_changes(observation)
# Build contextual prompt
prompt = self._build_prompt(observation)
# Call LLM with step-varied seed for diversity
response = call_llm(prompt, SYSTEM_PROMPT, seed + step)
# Parse response to get thought, tool, args
thought, tool_name, tool_args = self._parse_response(response)
# Validate and fix tool call
tool_name, tool_args = self._validate_tool_call(tool_name, tool_args)
if verbose:
print(f"--- Step {step} ---")
print(f"[THOUGHT] {thought}")
print(f"[TOOL] {tool_name}({tool_args})")
# Loop detection for play_action
if tool_name == "play_action":
action = tool_args.get("action", "look")
self.recent_actions.append(action)
if len(self.recent_actions) > 10:
self.recent_actions = self.recent_actions[-10:]
# Track actions at current location
if self.current_location not in self.actions_by_location:
self.actions_by_location[self.current_location] = []
self.actions_by_location[self.current_location].append(action)
# Check if this action (or a semantic variant) was already tried and failed here
action_key = self._normalize_action_key(action)
failed_here = self.failed_actions_by_location.get(self.current_location, set())
if action_key in failed_here:
if verbose:
print(f"[BLOCKED] '{action}' already failed at '{self.current_location}', skipping")
tool_args = self._break_loop(action)
# Detect immediate repetition (same action 3+ times)
elif len(self.recent_actions) >= 3 and len(set(self.recent_actions[-3:])) == 1:
if verbose:
print(f"[LOOP] Immediate repetition detected: '{action}'")
tool_args = self._break_loop(action)
# Detect action cycling (A->B->A->B pattern)
elif self._detect_cycling():
if verbose:
print(f"[LOOP] Action cycling detected")
tool_args = self._break_loop(action)
# Detect semantic repetition (push statue / push the statue / push statue north)
elif self._is_semantic_repeat(action):
if verbose:
print(f"[LOOP] Semantic repetition of '{action}' at this location")
tool_args = self._break_loop(action)
# Force movement if stuck at same location too long without score progress
if self.turns_at_location >= 8 and self.turns_since_score_change >= 8:
if not self._is_movement_action(tool_args.get("action", "")):
if verbose:
print(f"[STUCK] {self.turns_at_location} turns here with no score. Forcing movement.")
tool_args = self._force_movement()
moves += 1
# Execute the tool
try:
result = await client.call_tool(tool_name, tool_args)
observation = self._extract_result(result)
if verbose:
obs_preview = observation[:200] + "..." if len(observation) > 200 else observation
print(f"[RESULT] {obs_preview}")
except Exception as e:
observation = f"Error: {e}"
if verbose:
print(f"[ERROR] {e}")
# Track location (real game location for agent reasoning)
location = self._extract_location(observation)
# Track text-based observation header for professor's location metric
obs_location = self._extract_observation_location(observation)
prev_loc_count = len(locations_visited)
locations_visited.add(obs_location)
new_text_discovered = len(locations_visited) > prev_loc_count
prev_game_loc_count = len(game_locations_visited)
game_locations_visited.add(location)
new_game_loc_discovered = len(game_locations_visited) > prev_game_loc_count
# Track location stagnation
if location != self.current_location:
self.current_location = location
self.turns_at_location = 1
else:
self.turns_at_location += 1
# Track failed actions at this location
if tool_name == "play_action":
executed_action = tool_args.get("action", "")
if self._action_failed(observation):
if self.current_location not in self.failed_actions_by_location:
self.failed_actions_by_location[self.current_location] = set()
self.failed_actions_by_location[self.current_location].add(
self._normalize_action_key(executed_action)
)
# Track item examinations and uses
self._track_item_usage(executed_action)
# Update score
prev_score = self.score
self._update_score(observation)
score_changed = self.score != prev_score
if score_changed:
self.turns_since_score_change = 0
else:
self.turns_since_score_change += 1
# Print progress summary
if verbose:
status_parts = [f"Score: {self.score}"]
if score_changed:
status_parts.append(f"(+{self.score - prev_score}!)")
status_parts.append(f"Texts: {len(locations_visited)}")
if new_text_discovered:
status_parts.append(f"(NEW text: {obs_location[:50]})")
status_parts.append(f"Rooms: {len(game_locations_visited)}")
if new_game_loc_discovered:
status_parts.append(f"(NEW room: {location})")
status_parts.append(f"Moves: {moves}")
print(f"[PROGRESS] {' | '.join(status_parts)}")
# Update history
action_str = tool_args.get("action", tool_name) if tool_name == "play_action" else tool_name
self.history.append({
"step": step,
"thought": thought,
"tool": tool_name,
"args": tool_args,
"result": observation[:300],
"location": location,
})
# Keep history bounded
if len(self.history) > 15:
self.history = self.history[-15:]
# Record in result history
history.append((thought, f"{tool_name}({tool_args})", observation[:150]))
# Check for game over
if self._is_game_over(observation):
game_over = True
if verbose:
print("\n*** GAME OVER ***")
break
# Combine text-based locations (for professor's metric) into locations_visited
# Store game rooms count in a verbose-only summary at the end
if verbose:
print(f"\n--- Location Summary ---")
print(f" Unique text observations: {len(locations_visited)}")
print(f" Unique game rooms: {len(game_locations_visited)}")
print(f" Game rooms: {sorted(game_locations_visited)}")
return RunResult(
final_score=self.score,
max_score=self.max_score if self.max_score > 0 else 350,
moves=moves,
locations_visited=locations_visited,
game_completed=game_over,
history=history,
)
def _build_prompt(self, observation: str) -> str:
"""
Build a contextual prompt for the LLM with game state and history.
Includes failed-action context so the LLM avoids retrying useless actions.
"""
parts = []
# Score context
parts.append(f"Current Score: {self.score}")
if self.max_score > 0:
parts.append(f"Max Possible Score: {self.max_score}")
parts.append(f"Current Location: {self.current_location}")
parts.append(f"Turns at this location: {self.turns_at_location}")
parts.append(f"Turns since last score change: {self.turns_since_score_change}")
# Inventory context - critical for item-usage reasoning
if self.known_inventory:
parts.append(f"\nYOUR INVENTORY: {', '.join(self.known_inventory)}")
# Highlight unused items
unused = [item for item in self.known_inventory if item.lower() not in self.items_used]
unexamined = [item for item in self.known_inventory if item.lower() not in self.items_examined]
if unexamined:
parts.append(f" Items NOT YET EXAMINED (examine these!): {', '.join(unexamined)}")
if unused and self.turns_since_score_change >= 3:
parts.append(f" Items NOT YET USED (try using these!): {', '.join(unused)}")
parts.append(f" HINT: Try commands like 'put <item> in <thing>', 'give <item> to <npc>', "
f"'unlock <thing> with <item>', 'turn on <item>', 'attack <enemy> with <item>'")
else:
parts.append("\nYOUR INVENTORY: (empty or unknown - try 'inventory' to check)")
# Recent history for continuity
if self.history:
parts.append("\nRecent actions and results:")
for entry in self.history[-5:]:
action = entry.get("args", {}).get("action", entry["tool"])
loc = entry.get("location", "")
result_short = entry["result"][:100]
if len(entry["result"]) > 100:
result_short += "..."
parts.append(f" [{loc}] {action} -> {result_short}")
# Failed actions at current location - critical for avoiding retries
failed_here = self.failed_actions_by_location.get(self.current_location, set())
if failed_here:
parts.append(f"\n[ACTIONS THAT ALREADY FAILED AT THIS LOCATION - DO NOT RETRY THESE]:")
parts.append(f" {', '.join(sorted(failed_here))}")
# Actions already tried at this location
tried_here = self.actions_by_location.get(self.current_location, [])
if len(tried_here) > 3:
unique_tried = sorted(set(tried_here[-10:]))
parts.append(f"\n[ACTIONS ALREADY TRIED HERE (try something new!)]:")
parts.append(f" {', '.join(unique_tried)}")
# Loop warning
if self.recent_actions and len(self.recent_actions) >= 3:
if len(set(self.recent_actions[-3:])) <= 2:
parts.append(
f"\n[WARNING: You are REPEATING actions: {self.recent_actions[-3:]}. "
f"You MUST try something completely different! Move to a new area with "
f"north/south/east/west, or use get_map to find unexplored exits.]"
)
# Stagnation warning with escalating urgency
if self.turns_at_location >= 4 and self.turns_since_score_change >= 4:
parts.append(
f"\n[CRITICAL: You have been at '{self.current_location}' for {self.turns_at_location} turns "
f"with NO score progress for {self.turns_since_score_change} turns. "
f"LEAVE THIS AREA NOW. Try: north, south, east, west, up, down, enter, exit. "
f"Use get_map to see where you've been and find NEW areas to explore.]"
)
elif self.turns_at_location >= 3:
parts.append(
f"\n[NOTE: You've been at '{self.current_location}' for {self.turns_at_location} turns. "
f"Consider moving on if you're not making progress.]"
)
# Current observation
parts.append(f"\nCurrent situation:\n{observation}")
parts.append("\nWhat do you do next?")
return "\n".join(parts)
def _just_picked_up_item(self, observation: str) -> bool:
"""Check if the last observation indicates we picked up an item."""
pickup_indicators = ["taken", "picked up", "you now have", "added to",
"you take", "you get", "you pick up"]
obs_lower = observation.lower()
return any(ind in obs_lower for ind in pickup_indicators)
def _parse_inventory(self, inv_text: str) -> None:
"""Parse inventory text to extract item names."""
inv_lower = inv_text.lower()
if "empty" in inv_lower or "nothing" in inv_lower or "not carrying" in inv_lower:
self.known_inventory = []
return
# Try to parse "Inventory: item1, item2, item3" format
if "inventory:" in inv_lower:
after_colon = inv_text.split(":", 1)[1].strip()
if after_colon:
items = [item.strip() for item in after_colon.split(",") if item.strip()]
if items:
self.known_inventory = items
return
# Parse line-by-line (common Infocom format: " A brass lantern")
lines = inv_text.strip().split("\n")
items = []
for line in lines:
line = line.strip()
# Skip header lines
if not line or "carrying" in line.lower() or "inventory" in line.lower():
continue
# Skip score lines
if line.startswith("[") or line.startswith("+"):
continue
# Strip leading articles and punctuation
cleaned = line.lstrip("- *•")
cleaned = cleaned.strip()
if cleaned:
items.append(cleaned)
if items:
self.known_inventory = items
def _track_item_changes(self, observation: str) -> None:
"""Track item pickups/drops from game observation text."""
obs_lower = observation.lower()
# Detect pickups
pickup_patterns = [
r"(?:taken|you take|you pick up|you get)\b",
]
if any(re.search(p, obs_lower) for p in pickup_patterns):
# We picked up something - force an inventory refresh soon
self.last_inventory_check = 0 # Will trigger refresh next step
def _track_item_usage(self, action: str) -> None:
"""Track when items are examined or used in commands."""
action_lower = action.lower().strip()
words = action_lower.split()
if not words:
return
verb = words[0]
target = " ".join(words[1:]) if len(words) > 1 else ""
# Track examinations
if verb in ("examine", "look", "read"):
for item in self.known_inventory:
if item.lower() in target or target in item.lower():
self.items_examined.add(item.lower())
# Track usage (any verb that's not examine/take/drop/look)
if verb not in ("examine", "take", "drop", "look", "inventory", "i",
"north", "south", "east", "west", "up", "down",
"n", "s", "e", "w", "u", "d", "enter", "exit"):
for item in self.known_inventory:
if item.lower() in action_lower:
self.items_used.add(item.lower())
def _parse_response(self, response: str) -> tuple[str, str, dict]:
"""
Parse LLM response to extract thought, tool name, and arguments.
Handles various formatting quirks from the LLM.
"""
thought = "No reasoning provided"
tool_name = "play_action"
tool_args = {"action": "look"}
lines = response.strip().split("\n")
for line in lines:
line_clean = line.strip()
line_upper = line_clean.upper()
if line_upper.startswith("THOUGHT:"):
thought = line_clean.split(":", 1)[1].strip()
elif line_upper.startswith("TOOL:"):
raw_tool = line_clean.split(":", 1)[1].strip().lower()
# Clean markdown artifacts
raw_tool = raw_tool.replace("**", "").replace("*", "").replace("`", "")
raw_tool = raw_tool.split()[0] if raw_tool else "play_action"
tool_name = raw_tool
elif line_upper.startswith("ARGS:"):
args_part = line_clean.split(":", 1)[1].strip()
try:
# Handle single quotes
args_part = args_part.replace("'", '"')
tool_args = json.loads(args_part)
except json.JSONDecodeError:
# Try to extract action from malformed JSON
match = re.search(r'"action"\s*:\s*"([^"]+)"', args_part)
if match:
tool_args = {"action": match.group(1)}
else:
# Last resort: treat the whole thing as an action
cleaned = args_part.strip('{}" ')
if cleaned:
tool_args = {"action": cleaned}
else:
tool_args = {"action": "look"}
return thought, tool_name, tool_args
def _validate_tool_call(self, tool_name: str, tool_args: dict) -> tuple[str, dict]:
"""
Validate and fix common tool call issues.
Maps invalid tool names and cleans action text.
"""
# Fix tool name aliases
if tool_name not in self.tool_names:
tool_alias_map = {
"action": "play_action", "do": "play_action", "command": "play_action",
"execute": "play_action", "game": "play_action",
"map": "get_map", "location": "get_map", "locations": "get_map",
"mem": "memory", "state": "memory", "status": "memory", "info": "memory",
"inv": "inventory", "items": "inventory",
"valid": "get_valid_actions", "actions": "get_valid_actions",
"valid_actions": "get_valid_actions",
}
tool_name = tool_alias_map.get(tool_name, "play_action")
# Clean action text for play_action
if tool_name == "play_action":
action = tool_args.get("action", "look")
action = self._clean_action(action)
tool_args["action"] = action
return tool_name, tool_args
def _clean_action(self, action: str) -> str:
"""
Clean and validate a game action command.
Fixes common invalid verbs and removes formatting artifacts.
"""
# Remove markdown/formatting
action = action.replace("**", "").replace("*", "").replace("`", "")
action = action.strip().lower()
action = action.strip(".,!?;:")
action = " ".join(action.split()) # Normalize whitespace
# Fix invalid verbs that parsers don't recognize
invalid_verb_map = {
"check": "examine",
"inspect": "examine",
"search": "look",
"grab": "take",
"pick up": "take",
"pick": "take",
"use": "examine",
"investigate": "examine",
"observe": "look at",
"collect": "take",
"get": "take",
}
words = action.split()
if words:
# Check single-word verb
if words[0] in invalid_verb_map:
words[0] = invalid_verb_map[words[0]]
action = " ".join(words)
# Check two-word verb
elif len(words) >= 2:
two_word = f"{words[0]} {words[1]}"
if two_word in invalid_verb_map:
action = invalid_verb_map[two_word] + " " + " ".join(words[2:])
action = action.strip()
if not action:
action = "look"
return action
def _detect_cycling(self) -> bool:
"""
Detect action cycling patterns (A->B->A->B or low diversity over many turns).
"""
# Check for exact 2-step cycle in last 4 actions
if len(self.recent_actions) >= 4:
last4 = self.recent_actions[-4:]
if last4[0] == last4[2] and last4[1] == last4[3]:
return True
# Check for low diversity over last 6 actions
if len(self.recent_actions) >= 6:
recent = self.recent_actions[-6:]
unique = set(recent)
if len(unique) <= 2:
return True
# Check for semantic cycling (normalized keys)
if len(self.recent_actions) >= 4:
last4_keys = [self._normalize_action_key(a) for a in self.recent_actions[-4:]]
if len(set(last4_keys)) <= 2:
return True
return False
def _normalize_action_key(self, action: str) -> str:
"""
Normalize an action to a canonical key for dedup.
'push statue', 'push the statue', 'push statue north' all become 'push statue'.
"""
action = action.lower().strip()
# Remove articles
for article in [" the ", " a ", " an "]:
action = action.replace(article, " ")
# Remove directional suffixes
for suffix in [" north", " south", " east", " west", " up", " down",
" here", " again", " carefully", " closely"]:
if action.endswith(suffix):
action = action[:-len(suffix)]
# Normalize whitespace
action = " ".join(action.split())
return action
def _is_semantic_repeat(self, action: str) -> bool:
"""
Check if this action is a semantic repeat of something already tried
at this location 2+ times.
"""
tried_here = self.actions_by_location.get(self.current_location, [])
if len(tried_here) < 2:
return False
action_key = self._normalize_action_key(action)
count = sum(1 for a in tried_here[-8:] if self._normalize_action_key(a) == action_key)
return count >= 2
def _action_failed(self, observation: str) -> bool:
"""
Check if a game response indicates the action failed/was useless.
"""
obs_lower = observation.lower()
failure_indicators = [
"can't go that way", "you can't go", "there is no way",
"wall there", "you cannot go", "not a direction",
"can't see any such thing", "doesn't work", "don't understand",
"blocked", "too dark", "there is a wall",
"you can't", "impossible", "nothing happens",
"that doesn't seem to work", "i don't understand",
"that's not something you can", "you don't see",
"i don't know the word", "not something you can",
"already", "can't do that", "won't budge",
"that doesn't make sense", "that's not a verb",
]
return any(indicator in obs_lower for indicator in failure_indicators)
def _is_movement_action(self, action: str) -> bool:
"""Check if an action is a movement command."""
movements = {
"north", "south", "east", "west", "up", "down",
"n", "s", "e", "w", "u", "d",
"northeast", "northwest", "southeast", "southwest",
"enter", "exit", "in", "out", "climb",
}
return action.strip().lower().split()[0] in movements if action.strip() else False
def _force_movement(self) -> dict:
"""
Force a movement action to escape a stuck location.
Avoids directions that already failed here.
"""
failed_here = self.failed_actions_by_location.get(self.current_location, set())
tried_here = set(self.actions_by_location.get(self.current_location, []))
# Prioritize untried directions, then tried-but-not-failed
all_directions = ["north", "south", "east", "west", "up", "down",
"enter", "exit", "northeast", "northwest", "southeast", "southwest"]
# First: directions never tried here
for d in all_directions:
if d not in failed_here and d not in tried_here:
return {"action": d}
# Second: directions tried but not failed (might work for movement)
for d in all_directions:
if d not in failed_here:
return {"action": d}
# All directions failed? Try going back the way we came
return {"action": "look"}
def _break_loop(self, stuck_action: str) -> dict:
"""
Generate a loop-breaking action when the agent is stuck.
Prefers untried directions at the current location.
"""
failed_here = self.failed_actions_by_location.get(self.current_location, set())
tried_here = set(self.actions_by_location.get(self.current_location, []))
recent_set = set(self.recent_actions[-5:]) if self.recent_actions else set()
# Priority 1: Try untried movement directions at this location
directions = ["north", "south", "east", "west", "up", "down", "enter", "exit"]
for d in directions:
if d not in failed_here and d not in tried_here and d not in recent_set:
return {"action": d}
# Priority 2: Movement directions not recently used and not failed
for d in directions:
if d not in failed_here and d not in recent_set:
return {"action": d}
# Priority 3: Non-movement fallbacks
fallbacks = ["look", "inventory", "examine room"]
for action in fallbacks:
if action not in recent_set and action != stuck_action:
return {"action": action}
# Priority 4: Any direction not failed
for d in directions:
if d not in failed_here:
return {"action": d}
return {"action": "look"}
def _extract_result(self, result) -> str:
"""Extract text from MCP tool result."""
if hasattr(result, 'content') and result.content:
return result.content[0].text
if isinstance(result, list) and result:
return result[0].text if hasattr(result[0], 'text') else str(result[0])
return str(result)
def _extract_location(self, observation: str) -> str:
"""Extract real game location from observation.
The server appends [Location: X] to every play_action response.
Falls back to first line if not found."""
# Look for server-injected location tag
match = re.search(r'\[Location:\s*(.+?)\]', observation)
if match:
loc = match.group(1).strip()
if loc and loc != "Unknown":
return loc
# Fallback: first non-empty, non-metadata line
lines = observation.strip().split("\n")
for line in lines:
line = line.strip()
if line and not line.startswith("[") and not line.startswith("+"):
return line
return "Unknown"
def _extract_observation_location(self, observation: str) -> str:
"""Extract the text-based location header from observation.
Used for the locations_visited set in RunResult (professor's metric
counts unique text headers, not unique game rooms)."""
lines = observation.strip().split("\n")
for line in lines:
line = line.strip()
if (line and not line.startswith("[") and not line.startswith("+")
and not line.startswith("GAME OVER")):
return line
return "Unknown"
def _update_score(self, text: str) -> None:
"""Update score from game text output."""
patterns = [
r'\[Score:\s*(\d+)', # [Score: 10 | Moves: 5]
r'Score:\s*(\d+)', # Score: 10
r'score[:\s]+(\d+)', # score 10 or score: 10
r'\+(\d+)\s+points?!.*Total:\s*(\d+)', # +5 points! (Total: 15)
]
for pattern in patterns:
match = re.search(pattern, text, re.IGNORECASE)
if match:
# Use the last group (total score if available)
score_val = int(match.group(match.lastindex))
self.score = max(self.score, score_val)
# Track max score
max_match = re.search(r'Max Possible Score:\s*(\d+)', text)
if max_match:
self.max_score = int(max_match.group(1))
def _is_game_over(self, text: str) -> bool:
"""Check if the game is over from response text."""
game_over_phrases = [
"game over",
"you have died",
"you are dead",
"*** you have died ***",
"you have won",
"*** you have won ***",
"\ngame over",
]
text_lower = text.lower()
return any(phrase in text_lower for phrase in game_over_phrases)
def _call_llm(self, prompt: str, system_prompt: str, seed: int) -> str:
"""
Call the LLM with the given prompt.
Convenience wrapper around the module-level call_llm().
"""
return call_llm(prompt, system_prompt, seed)
# =============================================================================
# For local testing
# =============================================================================
async def test_agent():
"""Test the agent locally."""
from fastmcp import Client
# Path to your MCP server
server_path = "mcp_server.py"
agent = StudentAgent()
async with Client(server_path) as client:
result = await agent.run(
client=client,
game="zork1",
max_steps=10,
seed=42,
verbose=True,
)
print(f"\nFinal Score: {result.final_score}")
print(f"Moves: {result.moves}")
print(f"Locations: {result.locations_visited}")
if __name__ == "__main__":
import asyncio
asyncio.run(test_agent())