text-adventure-template

Sleeping

App Files Files Community

text-adventure-template / agent.py

minhdc

Add agent submission

5f4400e 7 days ago

raw

history blame contribute delete

42.9 kB

	"""
	Student Agent for Text Adventure Games

	This is your submission file. Implement the StudentAgent class to play
	text adventure games using the MCP server you also implement.

	Your agent should:
	1. Connect to the MCP server via the provided client
	2. Use the ReAct pattern (Thought -> Action -> Observation)
	3. Call MCP tools to interact with the game
	4. Maximize the game score within the step limit

	Required method:
	async def run(self, client, game, max_steps, seed, verbose) -> RunResult

	The 'client' is a FastMCP Client already connected to your MCP server.
	Use it to call tools like: await client.call_tool("play_action", {"action": "look"})

	Tips:
	- Start by looking around and understanding your environment
	- Keep track of visited locations to avoid loops
	- Pick up useful items (lamp, sword, etc.)
	- The seed parameter should be used to set your LLM's seed for reproducibility
	"""

	import json
	import os
	import re
	from dataclasses import dataclass, field
	from typing import Optional

	from dotenv import load_dotenv
	from huggingface_hub import InferenceClient

	# Load environment variables
	load_dotenv()

	# =============================================================================
	# LLM Configuration - DO NOT MODIFY
	# =============================================================================

	# Model to use (fixed for fair evaluation)
	LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"

	# Initialize the LLM client (uses HF_TOKEN from environment)
	_hf_token = os.getenv("HF_TOKEN")
	if not _hf_token:
	raise ValueError("HF_TOKEN not found. Set it in your .env file.")

	LLM_CLIENT = InferenceClient(token=_hf_token)


	def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str:
	"""
	Call the LLM with the given prompt. Use this function in your agent.

	Args:
	prompt: The user prompt (current game state, history, etc.)
	system_prompt: The system prompt (instructions for the agent)
	seed: Random seed for reproducibility
	max_tokens: Maximum tokens in response (default: 300)

	Returns:
	The LLM's response text

	Example:
	response = call_llm(
	prompt="You are in a forest. What do you do?",
	system_prompt=SYSTEM_PROMPT,
	seed=42,
	)
	"""
	messages = [
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": prompt},
	]

	response = LLM_CLIENT.chat.completions.create(
	model=LLM_MODEL,
	messages=messages,
	temperature=0.0, # Deterministic for reproducibility
	max_tokens=max_tokens,
	seed=seed,
	)

	return response.choices[0].message.content


	@dataclass
	class RunResult:
	"""Result of running the agent. Do not modify this class."""
	final_score: int
	max_score: int
	moves: int
	locations_visited: set[str]
	game_completed: bool
	error: Optional[str] = None
	history: list[tuple[str, str, str]] = field(default_factory=list)


	# =============================================================================
	# System Prompt - Customize this for your agent
	# =============================================================================

	SYSTEM_PROMPT = """You are an expert AI agent playing a classic text adventure game. Your mission: explore the world, solve puzzles, collect treasures, and maximize your score through careful observation and strategic play.

	AVAILABLE TOOLS (use via MCP):
	- play_action: Execute a game command (the primary tool for interacting with the game)
	- memory: Get current game state summary (location, score, recent actions, failed actions)
	- inventory: Check what you're carrying
	- get_map: See explored locations and connections (use to avoid getting lost)
	- get_valid_actions: Get a list of likely valid actions in the current state

	VALID GAME COMMANDS for play_action:
	- Movement: north, south, east, west, up, down, enter, exit, climb
	- Objects: take <item>, drop <item>, open <thing>, close <thing>, examine <thing>
	- Light: turn on lamp, extinguish candle
	- Combat: attack <enemy> with <weapon>, kill <enemy> with <weapon>
	- Item use: put <item> in <container>, give <item> to <npc>, turn on <item>
	- Other: look, inventory, read <thing>, wait, push <thing>, pull <thing>
	- Multi-object: take all, drop all, take lamp, sword
	- NPC: give <item> to <npc>, ask <npc> about <topic>

	FORBIDDEN COMMANDS (parser won't recognize): check, inspect, search, grab, use, help

	CRITICAL RULES:
	1. Distinguish failure types:
	- Hard failure ("can't go", "wall", "I don't understand"): STOP retrying after 2 attempts
	- Puzzle feedback (unusual responses, state changes): Continue with DIFFERENT approaches
	- Soft rejection ("too dark", "locked"): Solve the prerequisite first
	2. One command per turn: Issue a single game command
	3. Discovery-based play: Solve through observation and experimentation
	4. Combat priority: During combat, ONLY use combat actions. No examining!

	ITEM STRATEGY (VERY IMPORTANT):
	- After picking up an item, THINK about what it could be used for:
	* Light sources (lamp, lantern, torch) -> turn on before dark areas
	* Weapons (sword, knife, axe) -> attack enemies with them
	* Keys/tools -> open locked doors/containers
	* Food/drink -> give to NPCs or eat/drink when needed
	* Treasures (gold, jewels, trophy) -> may need to be placed somewhere for points
	* Rope/ladder -> climb or tie to access new areas
	- When you encounter an obstacle, ALWAYS check your inventory for a relevant item:
	* Locked door? -> Do I have a key?
	* Dark room? -> Do I have a lamp? Turn it on!
	* Enemy? -> Do I have a weapon? Attack with it!
	* NPC wants something? -> Do I have it in inventory?
	* Container/receptacle? -> Try putting relevant items in it
	- EXAMINE items you pick up - the description often hints at their use
	- Try using items on things in the environment: "put X in Y", "give X to Y", "unlock door with key"

	EXPLORATION STRATEGY:
	1. New location -> look -> note features -> check exits -> try promising directions
	2. Examine interesting objects (every noun could be interactive)
	3. Pick up useful items (light sources, weapons, keys, treasures)
	4. Open containers (mailbox, chest, door, window)
	5. Use get_map to avoid getting lost
	6. Turn on lamp before entering dark areas!
	7. When stuck: check inventory for unused items, then try get_valid_actions

	PUZZLE-SOLVING:
	- FIRST check inventory - do you have an item that could help?
	- Standard actions first (examine, take, open)
	- Try items on obstacles: "unlock X with key", "cut X with sword", "light X with lamp"
	- Environmental clues: read room descriptions for hints about what items to use
	- Multi-step chains: get item -> prepare it -> use it at the right location

	RESPOND IN THIS EXACT FORMAT (no markdown, no code blocks):
	THOUGHT: <your reasoning - what you observe, plan, and why. If you have items, consider how they might help.>
	TOOL: <tool_name>
	ARGS: <JSON arguments>

	Examples:
	THOUGHT: I just arrived at a new location. I should look around to understand my surroundings.
	TOOL: play_action
	ARGS: {"action": "look"}

	THOUGHT: It's dark here and I have a lamp in my inventory. I need to turn it on to see.
	TOOL: play_action
	ARGS: {"action": "turn on lamp"}

	THOUGHT: There's a locked door and I picked up a key earlier. Let me try using it.
	TOOL: play_action
	ARGS: {"action": "unlock door with key"}

	THOUGHT: The troll is blocking my way and I have a sword. I should attack it.
	TOOL: play_action
	ARGS: {"action": "attack troll with sword"}

	THOUGHT: I'm stuck and haven't used several items. Let me check what I'm carrying.
	TOOL: inventory
	ARGS: {}

	ANTI-PATTERNS TO AVOID:
	- Picking up items and NEVER using them
	- Ignoring inventory when stuck at a puzzle
	- Repeating the EXACT same action after a hard failure
	- Checking inventory during combat
	- Using forbidden verbs (check, inspect, search, grab, use)
	- Staying in one location too long without making progress

	DO NOT repeat the same action multiple times. If stuck, CHECK YOUR INVENTORY for items that might help, then try something different or move to a new area."""


	# =============================================================================
	# Student Agent - IMPLEMENT THIS CLASS
	# =============================================================================

	class StudentAgent:
	"""
	ReAct agent implementation inspired by ZorkGPT architecture.

	Features:
	- ReAct loop (Thought -> Tool -> Observation)
	- Loop detection (repeated actions, action cycling)
	- Action validation and cleaning
	- Score tracking from game responses
	- Contextual prompt building with history
	- Game-agnostic design
	"""

	def __init__(self):
	"""Initialize agent state tracking."""
	self.history: list[dict] = [] # Full action history
	self.recent_actions: list[str] = [] # Last N actions for loop detection
	self.score: int = 0
	self.max_score: int = 0
	self.tool_names: list[str] = []
	# Per-location tracking
	self.actions_by_location: dict[str, list[str]] = {} # location -> [actions tried]
	self.failed_actions_by_location: dict[str, set[str]] = {} # location -> {failed actions}
	self.current_location: str = ""
	self.turns_at_location: int = 0
	self.turns_since_score_change: int = 0
	# Inventory tracking
	self.known_inventory: list[str] = [] # Items we know we're carrying
	self.last_inventory_check: int = 0 # Step when we last checked inventory
	self.items_used: set[str] = set() # Items we've tried using
	self.items_examined: set[str] = set() # Items we've examined

	async def run(
	self,
	client, # FastMCP Client connected to your MCP server
	game: str,
	max_steps: int,
	seed: int,
	verbose: bool = False,
	) -> RunResult:
	"""
	Run the agent for a game session using the ReAct pattern.
	"""
	locations_visited = set() # Text-based (unique first lines) - for professor's metric
	game_locations_visited = set() # Jericho real rooms - for debugging
	history = [] # (thought, action, result) tuples for RunResult
	moves = 0
	game_over = False

	# Get available tools from the MCP server
	tools = await client.list_tools()
	self.tool_names = [t.name for t in tools]

	# Get initial observation
	result = await client.call_tool("play_action", {"action": "look"})
	observation = self._extract_result(result)

	# Track initial location (both systems)
	location = self._extract_location(observation)
	obs_location = self._extract_observation_location(observation)
	game_locations_visited.add(location)
	locations_visited.add(obs_location)
	self.current_location = location
	self._update_score(observation)

	if verbose:
	print(f"\n=== Starting {game} ===")
	print(f"{observation}\n")

	# Main ReAct loop
	for step in range(1, max_steps + 1):
	# Periodically refresh inventory (every 10 steps or when we just picked something up)
	if step - self.last_inventory_check >= 10 or self._just_picked_up_item(observation):
	try:
	inv_result = await client.call_tool("inventory", {})
	inv_text = self._extract_result(inv_result)
	self._parse_inventory(inv_text)
	self.last_inventory_check = step
	except Exception:
	pass

	# Track item pickups from observation
	self._track_item_changes(observation)

	# Build contextual prompt
	prompt = self._build_prompt(observation)

	# Call LLM with step-varied seed for diversity
	response = call_llm(prompt, SYSTEM_PROMPT, seed + step)

	# Parse response to get thought, tool, args
	thought, tool_name, tool_args = self._parse_response(response)

	# Validate and fix tool call
	tool_name, tool_args = self._validate_tool_call(tool_name, tool_args)

	if verbose:
	print(f"--- Step {step} ---")
	print(f"[THOUGHT] {thought}")
	print(f"[TOOL] {tool_name}({tool_args})")

	# Loop detection for play_action
	if tool_name == "play_action":
	action = tool_args.get("action", "look")
	self.recent_actions.append(action)
	if len(self.recent_actions) > 10:
	self.recent_actions = self.recent_actions[-10:]

	# Track actions at current location
	if self.current_location not in self.actions_by_location:
	self.actions_by_location[self.current_location] = []
	self.actions_by_location[self.current_location].append(action)

	# Check if this action (or a semantic variant) was already tried and failed here
	action_key = self._normalize_action_key(action)
	failed_here = self.failed_actions_by_location.get(self.current_location, set())
	if action_key in failed_here:
	if verbose:
	print(f"[BLOCKED] '{action}' already failed at '{self.current_location}', skipping")
	tool_args = self._break_loop(action)

	# Detect immediate repetition (same action 3+ times)
	elif len(self.recent_actions) >= 3 and len(set(self.recent_actions[-3:])) == 1:
	if verbose:
	print(f"[LOOP] Immediate repetition detected: '{action}'")
	tool_args = self._break_loop(action)

	# Detect action cycling (A->B->A->B pattern)
	elif self._detect_cycling():
	if verbose:
	print(f"[LOOP] Action cycling detected")
	tool_args = self._break_loop(action)

	# Detect semantic repetition (push statue / push the statue / push statue north)
	elif self._is_semantic_repeat(action):
	if verbose:
	print(f"[LOOP] Semantic repetition of '{action}' at this location")
	tool_args = self._break_loop(action)

	# Force movement if stuck at same location too long without score progress
	if self.turns_at_location >= 8 and self.turns_since_score_change >= 8:
	if not self._is_movement_action(tool_args.get("action", "")):
	if verbose:
	print(f"[STUCK] {self.turns_at_location} turns here with no score. Forcing movement.")
	tool_args = self._force_movement()

	moves += 1

	# Execute the tool
	try:
	result = await client.call_tool(tool_name, tool_args)
	observation = self._extract_result(result)

	if verbose:
	obs_preview = observation[:200] + "..." if len(observation) > 200 else observation
	print(f"[RESULT] {obs_preview}")
	except Exception as e:
	observation = f"Error: {e}"
	if verbose:
	print(f"[ERROR] {e}")

	# Track location (real game location for agent reasoning)
	location = self._extract_location(observation)
	# Track text-based observation header for professor's location metric
	obs_location = self._extract_observation_location(observation)
	prev_loc_count = len(locations_visited)
	locations_visited.add(obs_location)
	new_text_discovered = len(locations_visited) > prev_loc_count
	prev_game_loc_count = len(game_locations_visited)
	game_locations_visited.add(location)
	new_game_loc_discovered = len(game_locations_visited) > prev_game_loc_count

	# Track location stagnation
	if location != self.current_location:
	self.current_location = location
	self.turns_at_location = 1
	else:
	self.turns_at_location += 1

	# Track failed actions at this location
	if tool_name == "play_action":
	executed_action = tool_args.get("action", "")
	if self._action_failed(observation):
	if self.current_location not in self.failed_actions_by_location:
	self.failed_actions_by_location[self.current_location] = set()
	self.failed_actions_by_location[self.current_location].add(
	self._normalize_action_key(executed_action)
	)
	# Track item examinations and uses
	self._track_item_usage(executed_action)

	# Update score
	prev_score = self.score
	self._update_score(observation)
	score_changed = self.score != prev_score
	if score_changed:
	self.turns_since_score_change = 0
	else:
	self.turns_since_score_change += 1

	# Print progress summary
	if verbose:
	status_parts = [f"Score: {self.score}"]
	if score_changed:
	status_parts.append(f"(+{self.score - prev_score}!)")
	status_parts.append(f"Texts: {len(locations_visited)}")
	if new_text_discovered:
	status_parts.append(f"(NEW text: {obs_location[:50]})")
	status_parts.append(f"Rooms: {len(game_locations_visited)}")
	if new_game_loc_discovered:
	status_parts.append(f"(NEW room: {location})")
	status_parts.append(f"Moves: {moves}")
	print(f"[PROGRESS] {' \| '.join(status_parts)}")

	# Update history
	action_str = tool_args.get("action", tool_name) if tool_name == "play_action" else tool_name
	self.history.append({
	"step": step,
	"thought": thought,
	"tool": tool_name,
	"args": tool_args,
	"result": observation[:300],
	"location": location,
	})
	# Keep history bounded
	if len(self.history) > 15:
	self.history = self.history[-15:]

	# Record in result history
	history.append((thought, f"{tool_name}({tool_args})", observation[:150]))

	# Check for game over
	if self._is_game_over(observation):
	game_over = True
	if verbose:
	print("\n* GAME OVER *")
	break

	# Combine text-based locations (for professor's metric) into locations_visited
	# Store game rooms count in a verbose-only summary at the end
	if verbose:
	print(f"\n--- Location Summary ---")
	print(f" Unique text observations: {len(locations_visited)}")
	print(f" Unique game rooms: {len(game_locations_visited)}")
	print(f" Game rooms: {sorted(game_locations_visited)}")

	return RunResult(
	final_score=self.score,
	max_score=self.max_score if self.max_score > 0 else 350,
	moves=moves,
	locations_visited=locations_visited,
	game_completed=game_over,
	history=history,
	)

	def _build_prompt(self, observation: str) -> str:
	"""
	Build a contextual prompt for the LLM with game state and history.
	Includes failed-action context so the LLM avoids retrying useless actions.
	"""
	parts = []

	# Score context
	parts.append(f"Current Score: {self.score}")
	if self.max_score > 0:
	parts.append(f"Max Possible Score: {self.max_score}")
	parts.append(f"Current Location: {self.current_location}")
	parts.append(f"Turns at this location: {self.turns_at_location}")
	parts.append(f"Turns since last score change: {self.turns_since_score_change}")

	# Inventory context - critical for item-usage reasoning
	if self.known_inventory:
	parts.append(f"\nYOUR INVENTORY: {', '.join(self.known_inventory)}")
	# Highlight unused items
	unused = [item for item in self.known_inventory if item.lower() not in self.items_used]
	unexamined = [item for item in self.known_inventory if item.lower() not in self.items_examined]
	if unexamined:
	parts.append(f" Items NOT YET EXAMINED (examine these!): {', '.join(unexamined)}")
	if unused and self.turns_since_score_change >= 3:
	parts.append(f" Items NOT YET USED (try using these!): {', '.join(unused)}")
	parts.append(f" HINT: Try commands like 'put <item> in <thing>', 'give <item> to <npc>', "
	f"'unlock <thing> with <item>', 'turn on <item>', 'attack <enemy> with <item>'")
	else:
	parts.append("\nYOUR INVENTORY: (empty or unknown - try 'inventory' to check)")

	# Recent history for continuity
	if self.history:
	parts.append("\nRecent actions and results:")
	for entry in self.history[-5:]:
	action = entry.get("args", {}).get("action", entry["tool"])
	loc = entry.get("location", "")
	result_short = entry["result"][:100]
	if len(entry["result"]) > 100:
	result_short += "..."
	parts.append(f" [{loc}] {action} -> {result_short}")

	# Failed actions at current location - critical for avoiding retries
	failed_here = self.failed_actions_by_location.get(self.current_location, set())
	if failed_here:
	parts.append(f"\n[ACTIONS THAT ALREADY FAILED AT THIS LOCATION - DO NOT RETRY THESE]:")
	parts.append(f" {', '.join(sorted(failed_here))}")

	# Actions already tried at this location
	tried_here = self.actions_by_location.get(self.current_location, [])
	if len(tried_here) > 3:
	unique_tried = sorted(set(tried_here[-10:]))
	parts.append(f"\n[ACTIONS ALREADY TRIED HERE (try something new!)]:")
	parts.append(f" {', '.join(unique_tried)}")

	# Loop warning
	if self.recent_actions and len(self.recent_actions) >= 3:
	if len(set(self.recent_actions[-3:])) <= 2:
	parts.append(
	f"\n[WARNING: You are REPEATING actions: {self.recent_actions[-3:]}. "
	f"You MUST try something completely different! Move to a new area with "
	f"north/south/east/west, or use get_map to find unexplored exits.]"
	)

	# Stagnation warning with escalating urgency
	if self.turns_at_location >= 4 and self.turns_since_score_change >= 4:
	parts.append(
	f"\n[CRITICAL: You have been at '{self.current_location}' for {self.turns_at_location} turns "
	f"with NO score progress for {self.turns_since_score_change} turns. "
	f"LEAVE THIS AREA NOW. Try: north, south, east, west, up, down, enter, exit. "
	f"Use get_map to see where you've been and find NEW areas to explore.]"
	)
	elif self.turns_at_location >= 3:
	parts.append(
	f"\n[NOTE: You've been at '{self.current_location}' for {self.turns_at_location} turns. "
	f"Consider moving on if you're not making progress.]"
	)

	# Current observation
	parts.append(f"\nCurrent situation:\n{observation}")
	parts.append("\nWhat do you do next?")

	return "\n".join(parts)

	def _just_picked_up_item(self, observation: str) -> bool:
	"""Check if the last observation indicates we picked up an item."""
	pickup_indicators = ["taken", "picked up", "you now have", "added to",
	"you take", "you get", "you pick up"]
	obs_lower = observation.lower()
	return any(ind in obs_lower for ind in pickup_indicators)

	def _parse_inventory(self, inv_text: str) -> None:
	"""Parse inventory text to extract item names."""
	inv_lower = inv_text.lower()
	if "empty" in inv_lower or "nothing" in inv_lower or "not carrying" in inv_lower:
	self.known_inventory = []
	return

	# Try to parse "Inventory: item1, item2, item3" format
	if "inventory:" in inv_lower:
	after_colon = inv_text.split(":", 1)[1].strip()
	if after_colon:
	items = [item.strip() for item in after_colon.split(",") if item.strip()]
	if items:
	self.known_inventory = items
	return

	# Parse line-by-line (common Infocom format: " A brass lantern")
	lines = inv_text.strip().split("\n")
	items = []
	for line in lines:
	line = line.strip()
	# Skip header lines
	if not line or "carrying" in line.lower() or "inventory" in line.lower():
	continue
	# Skip score lines
	if line.startswith("[") or line.startswith("+"):
	continue
	# Strip leading articles and punctuation
	cleaned = line.lstrip("- *•")
	cleaned = cleaned.strip()
	if cleaned:
	items.append(cleaned)

	if items:
	self.known_inventory = items

	def _track_item_changes(self, observation: str) -> None:
	"""Track item pickups/drops from game observation text."""
	obs_lower = observation.lower()

	# Detect pickups
	pickup_patterns = [
	r"(?:taken\|you take\|you pick up\|you get)\b",
	]
	if any(re.search(p, obs_lower) for p in pickup_patterns):
	# We picked up something - force an inventory refresh soon
	self.last_inventory_check = 0 # Will trigger refresh next step

	def _track_item_usage(self, action: str) -> None:
	"""Track when items are examined or used in commands."""
	action_lower = action.lower().strip()
	words = action_lower.split()

	if not words:
	return

	verb = words[0]
	target = " ".join(words[1:]) if len(words) > 1 else ""

	# Track examinations
	if verb in ("examine", "look", "read"):
	for item in self.known_inventory:
	if item.lower() in target or target in item.lower():
	self.items_examined.add(item.lower())

	# Track usage (any verb that's not examine/take/drop/look)
	if verb not in ("examine", "take", "drop", "look", "inventory", "i",
	"north", "south", "east", "west", "up", "down",
	"n", "s", "e", "w", "u", "d", "enter", "exit"):
	for item in self.known_inventory:
	if item.lower() in action_lower:
	self.items_used.add(item.lower())

	def _parse_response(self, response: str) -> tuple[str, str, dict]:
	"""
	Parse LLM response to extract thought, tool name, and arguments.
	Handles various formatting quirks from the LLM.
	"""
	thought = "No reasoning provided"
	tool_name = "play_action"
	tool_args = {"action": "look"}

	lines = response.strip().split("\n")

	for line in lines:
	line_clean = line.strip()
	line_upper = line_clean.upper()

	if line_upper.startswith("THOUGHT:"):
	thought = line_clean.split(":", 1)[1].strip()

	elif line_upper.startswith("TOOL:"):
	raw_tool = line_clean.split(":", 1)[1].strip().lower()
	# Clean markdown artifacts
	raw_tool = raw_tool.replace("*", "").replace("", "").replace("`", "")
	raw_tool = raw_tool.split()[0] if raw_tool else "play_action"
	tool_name = raw_tool

	elif line_upper.startswith("ARGS:"):
	args_part = line_clean.split(":", 1)[1].strip()
	try:
	# Handle single quotes
	args_part = args_part.replace("'", '"')
	tool_args = json.loads(args_part)
	except json.JSONDecodeError:
	# Try to extract action from malformed JSON
	match = re.search(r'"action"\s:\s"([^"]+)"', args_part)
	if match:
	tool_args = {"action": match.group(1)}
	else:
	# Last resort: treat the whole thing as an action
	cleaned = args_part.strip('{}" ')
	if cleaned:
	tool_args = {"action": cleaned}
	else:
	tool_args = {"action": "look"}

	return thought, tool_name, tool_args

	def _validate_tool_call(self, tool_name: str, tool_args: dict) -> tuple[str, dict]:
	"""
	Validate and fix common tool call issues.
	Maps invalid tool names and cleans action text.
	"""
	# Fix tool name aliases
	if tool_name not in self.tool_names:
	tool_alias_map = {
	"action": "play_action", "do": "play_action", "command": "play_action",
	"execute": "play_action", "game": "play_action",
	"map": "get_map", "location": "get_map", "locations": "get_map",
	"mem": "memory", "state": "memory", "status": "memory", "info": "memory",
	"inv": "inventory", "items": "inventory",
	"valid": "get_valid_actions", "actions": "get_valid_actions",
	"valid_actions": "get_valid_actions",
	}
	tool_name = tool_alias_map.get(tool_name, "play_action")

	# Clean action text for play_action
	if tool_name == "play_action":
	action = tool_args.get("action", "look")
	action = self._clean_action(action)
	tool_args["action"] = action

	return tool_name, tool_args

	def _clean_action(self, action: str) -> str:
	"""
	Clean and validate a game action command.
	Fixes common invalid verbs and removes formatting artifacts.
	"""
	# Remove markdown/formatting
	action = action.replace("*", "").replace("", "").replace("`", "")
	action = action.strip().lower()
	action = action.strip(".,!?;:")
	action = " ".join(action.split()) # Normalize whitespace

	# Fix invalid verbs that parsers don't recognize
	invalid_verb_map = {
	"check": "examine",
	"inspect": "examine",
	"search": "look",
	"grab": "take",
	"pick up": "take",
	"pick": "take",
	"use": "examine",
	"investigate": "examine",
	"observe": "look at",
	"collect": "take",
	"get": "take",
	}

	words = action.split()
	if words:
	# Check single-word verb
	if words[0] in invalid_verb_map:
	words[0] = invalid_verb_map[words[0]]
	action = " ".join(words)
	# Check two-word verb
	elif len(words) >= 2:
	two_word = f"{words[0]} {words[1]}"
	if two_word in invalid_verb_map:
	action = invalid_verb_map[two_word] + " " + " ".join(words[2:])
	action = action.strip()

	if not action:
	action = "look"

	return action

	def _detect_cycling(self) -> bool:
	"""
	Detect action cycling patterns (A->B->A->B or low diversity over many turns).
	"""
	# Check for exact 2-step cycle in last 4 actions
	if len(self.recent_actions) >= 4:
	last4 = self.recent_actions[-4:]
	if last4[0] == last4[2] and last4[1] == last4[3]:
	return True

	# Check for low diversity over last 6 actions
	if len(self.recent_actions) >= 6:
	recent = self.recent_actions[-6:]
	unique = set(recent)
	if len(unique) <= 2:
	return True

	# Check for semantic cycling (normalized keys)
	if len(self.recent_actions) >= 4:
	last4_keys = [self._normalize_action_key(a) for a in self.recent_actions[-4:]]
	if len(set(last4_keys)) <= 2:
	return True

	return False

	def _normalize_action_key(self, action: str) -> str:
	"""
	Normalize an action to a canonical key for dedup.
	'push statue', 'push the statue', 'push statue north' all become 'push statue'.
	"""
	action = action.lower().strip()
	# Remove articles
	for article in [" the ", " a ", " an "]:
	action = action.replace(article, " ")
	# Remove directional suffixes
	for suffix in [" north", " south", " east", " west", " up", " down",
	" here", " again", " carefully", " closely"]:
	if action.endswith(suffix):
	action = action[:-len(suffix)]
	# Normalize whitespace
	action = " ".join(action.split())
	return action

	def _is_semantic_repeat(self, action: str) -> bool:
	"""
	Check if this action is a semantic repeat of something already tried
	at this location 2+ times.
	"""
	tried_here = self.actions_by_location.get(self.current_location, [])
	if len(tried_here) < 2:
	return False

	action_key = self._normalize_action_key(action)
	count = sum(1 for a in tried_here[-8:] if self._normalize_action_key(a) == action_key)
	return count >= 2

	def _action_failed(self, observation: str) -> bool:
	"""
	Check if a game response indicates the action failed/was useless.
	"""
	obs_lower = observation.lower()
	failure_indicators = [
	"can't go that way", "you can't go", "there is no way",
	"wall there", "you cannot go", "not a direction",
	"can't see any such thing", "doesn't work", "don't understand",
	"blocked", "too dark", "there is a wall",
	"you can't", "impossible", "nothing happens",
	"that doesn't seem to work", "i don't understand",
	"that's not something you can", "you don't see",
	"i don't know the word", "not something you can",
	"already", "can't do that", "won't budge",
	"that doesn't make sense", "that's not a verb",
	]
	return any(indicator in obs_lower for indicator in failure_indicators)

	def _is_movement_action(self, action: str) -> bool:
	"""Check if an action is a movement command."""
	movements = {
	"north", "south", "east", "west", "up", "down",
	"n", "s", "e", "w", "u", "d",
	"northeast", "northwest", "southeast", "southwest",
	"enter", "exit", "in", "out", "climb",
	}
	return action.strip().lower().split()[0] in movements if action.strip() else False

	def _force_movement(self) -> dict:
	"""
	Force a movement action to escape a stuck location.
	Avoids directions that already failed here.
	"""
	failed_here = self.failed_actions_by_location.get(self.current_location, set())
	tried_here = set(self.actions_by_location.get(self.current_location, []))

	# Prioritize untried directions, then tried-but-not-failed
	all_directions = ["north", "south", "east", "west", "up", "down",
	"enter", "exit", "northeast", "northwest", "southeast", "southwest"]

	# First: directions never tried here
	for d in all_directions:
	if d not in failed_here and d not in tried_here:
	return {"action": d}

	# Second: directions tried but not failed (might work for movement)
	for d in all_directions:
	if d not in failed_here:
	return {"action": d}

	# All directions failed? Try going back the way we came
	return {"action": "look"}

	def _break_loop(self, stuck_action: str) -> dict:
	"""
	Generate a loop-breaking action when the agent is stuck.
	Prefers untried directions at the current location.
	"""
	failed_here = self.failed_actions_by_location.get(self.current_location, set())
	tried_here = set(self.actions_by_location.get(self.current_location, []))
	recent_set = set(self.recent_actions[-5:]) if self.recent_actions else set()

	# Priority 1: Try untried movement directions at this location
	directions = ["north", "south", "east", "west", "up", "down", "enter", "exit"]
	for d in directions:
	if d not in failed_here and d not in tried_here and d not in recent_set:
	return {"action": d}

	# Priority 2: Movement directions not recently used and not failed
	for d in directions:
	if d not in failed_here and d not in recent_set:
	return {"action": d}

	# Priority 3: Non-movement fallbacks
	fallbacks = ["look", "inventory", "examine room"]
	for action in fallbacks:
	if action not in recent_set and action != stuck_action:
	return {"action": action}

	# Priority 4: Any direction not failed
	for d in directions:
	if d not in failed_here:
	return {"action": d}

	return {"action": "look"}

	def _extract_result(self, result) -> str:
	"""Extract text from MCP tool result."""
	if hasattr(result, 'content') and result.content:
	return result.content[0].text
	if isinstance(result, list) and result:
	return result[0].text if hasattr(result[0], 'text') else str(result[0])
	return str(result)

	def _extract_location(self, observation: str) -> str:
	"""Extract real game location from observation.
	The server appends [Location: X] to every play_action response.
	Falls back to first line if not found."""
	# Look for server-injected location tag
	match = re.search(r'\[Location:\s*(.+?)\]', observation)
	if match:
	loc = match.group(1).strip()
	if loc and loc != "Unknown":
	return loc

	# Fallback: first non-empty, non-metadata line
	lines = observation.strip().split("\n")
	for line in lines:
	line = line.strip()
	if line and not line.startswith("[") and not line.startswith("+"):
	return line
	return "Unknown"

	def _extract_observation_location(self, observation: str) -> str:
	"""Extract the text-based location header from observation.
	Used for the locations_visited set in RunResult (professor's metric
	counts unique text headers, not unique game rooms)."""
	lines = observation.strip().split("\n")
	for line in lines:
	line = line.strip()
	if (line and not line.startswith("[") and not line.startswith("+")
	and not line.startswith("GAME OVER")):
	return line
	return "Unknown"

	def _update_score(self, text: str) -> None:
	"""Update score from game text output."""
	patterns = [
	r'\[Score:\s*(\d+)', # [Score: 10 \| Moves: 5]
	r'Score:\s*(\d+)', # Score: 10
	r'score[:\s]+(\d+)', # score 10 or score: 10
	r'\+(\d+)\s+points?!.Total:\s(\d+)', # +5 points! (Total: 15)
	]

	for pattern in patterns:
	match = re.search(pattern, text, re.IGNORECASE)
	if match:
	# Use the last group (total score if available)
	score_val = int(match.group(match.lastindex))
	self.score = max(self.score, score_val)

	# Track max score
	max_match = re.search(r'Max Possible Score:\s*(\d+)', text)
	if max_match:
	self.max_score = int(max_match.group(1))

	def _is_game_over(self, text: str) -> bool:
	"""Check if the game is over from response text."""
	game_over_phrases = [
	"game over",
	"you have died",
	"you are dead",
	"* you have died *",
	"you have won",
	"* you have won *",
	"\ngame over",
	]
	text_lower = text.lower()
	return any(phrase in text_lower for phrase in game_over_phrases)

	def _call_llm(self, prompt: str, system_prompt: str, seed: int) -> str:
	"""
	Call the LLM with the given prompt.
	Convenience wrapper around the module-level call_llm().
	"""
	return call_llm(prompt, system_prompt, seed)


	# =============================================================================
	# For local testing
	# =============================================================================

	async def test_agent():
	"""Test the agent locally."""
	from fastmcp import Client

	# Path to your MCP server
	server_path = "mcp_server.py"

	agent = StudentAgent()

	async with Client(server_path) as client:
	result = await agent.run(
	client=client,
	game="zork1",
	max_steps=10,
	seed=42,
	verbose=True,
	)

	print(f"\nFinal Score: {result.final_score}")
	print(f"Moves: {result.moves}")
	print(f"Locations: {result.locations_visited}")


	if __name__ == "__main__":
	import asyncio
	asyncio.run(test_agent())