text-adventure-template

Sleeping

App Files Files Community

text-adventure-template / agent.py

Mtanre

Submit text adventure agent

83e130e 2 months ago

raw

history blame contribute delete

33.8 kB

	"""
	ReAct Agent for Text Adventure Games

	Uses MCP tools (including Jericho-powered valid actions) to play
	text adventure games with reasoning, loop detection, and exploration strategy.
	"""

	import json
	import os
	import re
	from collections import deque
	from dataclasses import dataclass, field
	from typing import Optional

	from dotenv import load_dotenv
	from huggingface_hub import InferenceClient

	load_dotenv()

	# =============================================================================
	# LLM Configuration - DO NOT MODIFY
	# =============================================================================

	LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"

	_hf_token = os.getenv("HF_TOKEN")
	if not _hf_token:
	raise ValueError("HF_TOKEN not found. Set it in your .env file.")

	LLM_CLIENT = InferenceClient(token=_hf_token, provider="novita")


	def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str:
	"""Call the LLM with the given prompt."""
	messages = [
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": prompt},
	]

	response = LLM_CLIENT.chat.completions.create(
	model=LLM_MODEL,
	messages=messages,
	temperature=0.15,
	max_tokens=max_tokens,
	seed=seed,
	)

	return response.choices[0].message.content


	@dataclass
	class RunResult:
	"""Result of running the agent. Do not modify this class."""
	final_score: int
	max_score: int
	moves: int
	locations_visited: set[str]
	game_completed: bool
	error: Optional[str] = None
	history: list[tuple[str, str, str]] = field(default_factory=list)


	# =============================================================================
	# Constants
	# =============================================================================

	MOVEMENT_COMMANDS = {
	"north", "south", "east", "west", "up", "down",
	"n", "s", "e", "w", "u", "d",
	"enter", "exit", "in", "out",
	"northeast", "northwest", "southeast", "southwest",
	"ne", "nw", "se", "sw",
	"go north", "go south", "go east", "go west",
	"go up", "go down", "go northeast", "go northwest",
	"go southeast", "go southwest",
	}


	# =============================================================================
	# System Prompt
	# =============================================================================

	SYSTEM_PROMPT = """You play text adventure games. Your goal: maximize score.

	Rooms are auto-explored for you (items taken, containers opened, objects examined).
	Your job: solve puzzles the auto-explorer can't.

	AVAILABLE TOOLS:
	- play_action: Run a game command. Example: ARGS: {"action": "put gem in slot"}
	- get_valid_actions: See what commands work here. FREE, no move cost.
	- get_state_info: Check score, inventory, history. FREE.
	- get_map: See room connections. FREE.
	- get_inventory: Check items carried. FREE.

	HOW TO SCORE POINTS:
	1. Use items on things: "put X in Y", "give X to Y", "insert X in Y", "unlock Y with X"
	2. Try Key actions from the list (copy exactly)
	3. Read clues, follow instructions found in game text
	4. Move to new rooms you haven't visited

	RULES:
	- NEVER drop items. NEVER "put X down". NEVER throw items away.
	- If an action had NO_EFFECT, do NOT repeat it.
	- One action per turn.

	RESPONSE FORMAT (follow exactly):
	THOUGHT: <your reasoning>
	TOOL: play_action
	ARGS: {"action": "<command>"}"""


	# =============================================================================
	# Agent Implementation
	# =============================================================================

	VALID_TOOLS = {
	"play_action", "auto_explore_room", "get_valid_actions",
	"get_state_info", "get_map", "get_inventory",
	}


	class StudentAgent:
	"""ReAct agent with auto-exploration, auto-navigation, and loop detection."""

	def __init__(self):
	self.history: list[dict] = []
	self.recent_actions: list[str] = []
	self.recent_tools: list[str] = []
	self.score: int = 0
	self.max_score: int = 0
	self.current_location: str = ""
	self.no_effect_count: int = 0
	self.steps_since_score_change: int = 0
	self.visited_rooms: set[str] = set()
	# Navigation tracking
	self.tried_exits: dict[str, set[str]] = {} # room -> tried directions
	self.room_exits: dict[str, list[str]] = {} # room -> available exits
	self.room_graph: dict[str, dict[str, str]] = {} # room -> {dir -> dest}
	self.failed_exits: dict[str, set[str]] = {} # room -> directions that don't change room
	self.steps_in_room: int = 0
	# Inventory tracking for smart re-exploration
	self.inventory_version: int = 0 # increments on inventory change
	self.room_explored_at_inv: dict[str, int] = {} # room -> inv_version when last explored
	self.last_inventory_str: str = ""
	# Key action tracking for smarter stagnation handling
	self.key_actions_by_room: dict[str, list[str]] = {}
	self.tried_actions_by_room: dict[str, set[str]] = {}
	self.room_visit_count: dict[str, int] = {}

	async def run(
	self,
	client,
	game: str,
	max_steps: int,
	seed: int,
	verbose: bool = False,
	) -> RunResult:
	"""Run the agent for a game session."""
	locations_visited = set()
	history = []
	moves = 0

	# Step 0: Initial look
	result = await client.call_tool("play_action", {"action": "look"})
	observation = self._extract_result(result)
	self._parse_status(observation)
	locations_visited.add(self.current_location)

	if verbose:
	print(f"\n{observation}")

	# Step 0.5: Auto-explore starting room
	result = await client.call_tool("auto_explore_room", {})
	explore_text = self._extract_result(result)
	self._parse_status(explore_text)
	self._store_exits(explore_text)
	self._update_inventory_from_text(explore_text)
	self.room_explored_at_inv[self.current_location] = self.inventory_version
	observation = f"[Room auto-explored]\n{explore_text}"
	last_valid_actions = explore_text

	if verbose:
	print(f"\n[AUTO-EXPLORE]\n{explore_text}")

	for step in range(1, max_steps + 1):
	old_location = self.current_location

	# Check for untried exits -> auto-navigate (including BFS)
	untried = self._find_unexplored_exit()

	# Force movement if stuck in room without scoring
	if not untried and self.steps_in_room >= 3 and self.steps_since_score_change >= 3:
	exits = self.room_exits.get(self.current_location, [])
	failed = self.failed_exits.get(self.current_location, set())
	valid_exits = [e for e in exits if e not in failed]
	if valid_exits:
	# Prefer exits to least-visited rooms
	best_exit = self._pick_least_visited_exit(valid_exits)
	if best_exit:
	untried = best_exit

	is_auto_nav = bool(untried)
	if untried:
	tool_name = "play_action"
	tool_args = {"action": untried}
	thought = f"Auto-navigating: {untried}"
	if verbose:
	print(f"\n--- Step {step}/{max_steps} [AUTO-NAV] ---")
	print(f"[ACTION] {untried}")
	elif self.steps_since_score_change > 0 and self.steps_since_score_change % 6 == 0:
	# Every 6 stagnant steps, try untried key actions, re-explore, or move
	key_acts = self.key_actions_by_room.get(self.current_location, [])
	tried = self.tried_actions_by_room.get(self.current_location, set())
	untried_keys = [a for a in key_acts if a.lower() not in tried
	and a.lower() not in MOVEMENT_COMMANDS
	and not a.lower().startswith(("examine ", "look ", "read ", "search "))]
	if untried_keys:
	tool_name = "play_action"
	tool_args = {"action": untried_keys[0]}
	thought = f"Stagnation: trying untried key action"
	is_auto_nav = True
	if verbose:
	print(f"\n--- Step {step}/{max_steps} [STAGNATION KEY-ACTION: {untried_keys[0]}] ---")
	elif self.steps_in_room >= 4:
	# Force move to least-visited adjacent room
	exits = self.room_exits.get(self.current_location, [])
	failed = self.failed_exits.get(self.current_location, set())
	valid_exits = [e for e in exits if e not in failed]
	best = self._pick_least_visited_exit(valid_exits) if valid_exits else None
	if best:
	tool_name = "play_action"
	tool_args = {"action": best}
	thought = f"Stagnation: moving to least-visited room"
	is_auto_nav = True
	if verbose:
	print(f"\n--- Step {step}/{max_steps} [STAGNATION MOVE: {best}] ---")
	else:
	tool_name = "auto_explore_room"
	tool_args = {}
	thought = "Re-exploring room after stagnation"
	is_auto_nav = True
	if verbose:
	print(f"\n--- Step {step}/{max_steps} [STAGNATION RE-EXPLORE] ---")
	else:
	tool_name = "auto_explore_room"
	tool_args = {}
	thought = "Re-exploring room after stagnation"
	is_auto_nav = True
	if verbose:
	print(f"\n--- Step {step}/{max_steps} [STAGNATION RE-EXPLORE] ---")
	else:
	# No unexplored exits - ask LLM for puzzle-solving
	prompt = self._build_prompt(observation, step, max_steps, last_valid_actions)
	response = call_llm(prompt, SYSTEM_PROMPT, seed + step, max_tokens=250)
	thought, tool_name, tool_args = self._parse_response(response)

	if verbose:
	print(f"\n--- Step {step}/{max_steps} ---")
	print(f"[THOUGHT] {thought}")
	print(f"[TOOL] {tool_name}({tool_args})")

	tool_name, tool_args = self._validate_tool(tool_name, tool_args)
	tool_name, tool_args = self._anti_loop_check(tool_name, tool_args)

	# Track tool calls
	self.recent_tools.append(tool_name)
	if len(self.recent_tools) > 5:
	self.recent_tools = self.recent_tools[-5:]

	# Track play_action
	if tool_name == "play_action":
	action = tool_args.get("action", "look")
	self.recent_actions.append(action)
	if len(self.recent_actions) > 10:
	self.recent_actions = self.recent_actions[-10:]
	moves += 1

	# Record tried exit
	if action.lower() in MOVEMENT_COMMANDS:
	self.tried_exits.setdefault(self.current_location, set()).add(action.lower())
	# Track tried action for stagnation key-action cycling
	self.tried_actions_by_room.setdefault(self.current_location, set()).add(action.lower())

	# Execute tool
	try:
	result = await client.call_tool(tool_name, tool_args)
	observation = self._extract_result(result)
	except Exception as e:
	observation = f"Error: {e}. Try a different action."

	if verbose:
	print(f"[RESULT] {observation[:300]}")

	# Parse status
	self._parse_status(observation)
	locations_visited.add(self.current_location)

	# Don't let auto-nav NO_EFFECTs pollute LLM loop detection
	if is_auto_nav:
	self.no_effect_count = 0

	# Track time spent in current room
	if self.current_location != old_location:
	self.steps_in_room = 0
	self.room_visit_count[self.current_location] = self.room_visit_count.get(self.current_location, 0) + 1
	else:
	self.steps_in_room += 1

	# Update room graph and track failed movements
	if tool_name == "play_action":
	action = tool_args.get("action", "")
	if action.lower() in MOVEMENT_COMMANDS:
	if self.current_location != old_location:
	self.room_graph.setdefault(old_location, {})[action.lower()] = self.current_location
	else:
	# Movement didn't change room - mark as failed exit
	self.failed_exits.setdefault(old_location, set()).add(action.lower())

	# Scan game response for new direction words
	self._scan_for_new_exits(observation)

	# Auto-explore new rooms
	if "NEW_ROOM" in observation:
	try:
	ae_result = await client.call_tool("auto_explore_room", {})
	ae_text = self._extract_result(ae_result)
	self._parse_status(ae_text)
	self._store_exits(ae_text)
	self._update_inventory_from_text(ae_text)
	self.room_explored_at_inv[self.current_location] = self.inventory_version
	last_valid_actions = ae_text
	observation = f"[New room auto-explored]\n{ae_text}"
	if verbose:
	print(f"[AUTO-EXPLORE]\n{ae_text[:300]}")
	except Exception:
	pass
	elif "REVISITED" in observation and tool_name == "play_action":
	# Only re-explore if inventory changed since last exploration of this room
	last_inv = self.room_explored_at_inv.get(self.current_location, -1)
	if self.inventory_version > last_inv:
	try:
	ae_result = await client.call_tool("auto_explore_room", {})
	ae_text = self._extract_result(ae_result)
	self._parse_status(ae_text)
	self._store_exits(ae_text)
	self._update_inventory_from_text(ae_text)
	self.room_explored_at_inv[self.current_location] = self.inventory_version
	last_valid_actions = ae_text
	observation = f"[Revisited room re-explored]\n{ae_text}"
	if verbose:
	print(f"[RE-EXPLORE]\n{ae_text[:300]}")
	except Exception:
	pass
	elif tool_name == "get_valid_actions":
	last_valid_actions = observation
	self._store_exits(observation)
	elif tool_name == "auto_explore_room":
	last_valid_actions = observation
	self._store_exits(observation)
	self._update_inventory_from_text(observation)

	# Update history
	self.history.append({
	"step": step,
	"thought": thought,
	"tool": tool_name,
	"args": tool_args,
	"result": observation[:200],
	})
	history.append((thought, f"{tool_name}({tool_args})", observation[:100]))

	# Check game over
	if "GAME OVER" in observation or self._is_game_over(observation):
	if verbose:
	print("\n* GAME OVER *")
	break

	return RunResult(
	final_score=self.score,
	max_score=self.max_score if self.max_score > 0 else 350,
	moves=moves,
	locations_visited=locations_visited,
	game_completed=self._is_game_over(observation),
	history=history,
	)

	def _update_inventory_from_text(self, text: str) -> None:
	"""Track inventory changes from explore/action results."""
	inv_match = re.search(r'Inventory:\s*(.+)', text)
	if inv_match:
	inv_str = inv_match.group(1).strip()
	if inv_str != self.last_inventory_str:
	self.last_inventory_str = inv_str
	self.inventory_version += 1

	def _store_exits(self, text: str) -> None:
	"""Parse and store available exits and key actions from responses."""
	exits = []
	for line in text.split("\n"):
	line_stripped = line.strip()
	if line_stripped.startswith("Exits:") or line_stripped.startswith("Movement:"):
	exits_str = line_stripped.split(":", 1)[1].strip()
	if exits_str and exits_str != "none":
	exits = [e.strip().lower() for e in exits_str.split(",") if e.strip()]
	elif line_stripped.startswith("Key actions:"):
	actions_str = line_stripped.split(":", 1)[1].strip()
	if actions_str and self.current_location:
	actions = [a.strip() for a in actions_str.split(",") if a.strip()]
	if actions:
	self.key_actions_by_room[self.current_location] = actions
	# Also extract directions from Key actions (e.g. "get in southwest")
	for action in actions_str.split(","):
	action = action.strip().lower()
	for prefix in ("get in ", "go "):
	if action.startswith(prefix):
	dir_part = action[len(prefix):].strip()
	if dir_part in MOVEMENT_COMMANDS:
	exits.append(dir_part)
	if exits and self.current_location:
	self.room_exits[self.current_location] = exits

	def _find_unexplored_exit(self) -> str \| None:
	"""Find an untried exit from current room, or BFS navigate toward one."""
	# Direct unexplored exit from current room
	available = self.room_exits.get(self.current_location, [])
	tried = self.tried_exits.get(self.current_location, set())
	failed = self.failed_exits.get(self.current_location, set())
	for exit_dir in available:
	if exit_dir not in tried and exit_dir not in failed:
	return exit_dir

	# BFS to find nearest room with unexplored exits
	visited_bfs = {self.current_location}
	queue = deque()

	# Seed with known connections from current room
	for direction, dest in self.room_graph.get(self.current_location, {}).items():
	if dest not in visited_bfs:
	visited_bfs.add(dest)
	queue.append((dest, direction)) # (room, first_step_to_get_there)

	while queue:
	room, first_step = queue.popleft()
	# Check if this room has unexplored exits
	room_available = self.room_exits.get(room, [])
	room_tried = self.tried_exits.get(room, set())
	room_failed = self.failed_exits.get(room, set())
	for exit_dir in room_available:
	if exit_dir not in room_tried and exit_dir not in room_failed:
	return first_step # Navigate toward this room

	# Expand through known connections
	for direction, dest in self.room_graph.get(room, {}).items():
	if dest not in visited_bfs:
	visited_bfs.add(dest)
	queue.append((dest, first_step))

	return None

	def _pick_least_visited_exit(self, valid_exits: list[str]) -> str \| None:
	"""Pick exit leading to the least-visited room."""
	graph = self.room_graph.get(self.current_location, {})
	best_exit = None
	min_visits = float('inf')
	last_action = self.recent_actions[-1] if self.recent_actions else ""

	for e in valid_exits:
	if e == last_action:
	continue # Don't go back immediately
	dest = graph.get(e)
	if dest:
	visits = self.room_visit_count.get(dest, 0)
	if visits < min_visits:
	min_visits = visits
	best_exit = e
	else:
	# Unknown destination - prefer this (unexplored)
	return e

	return best_exit or (valid_exits[0] if valid_exits else None)

	def _scan_for_new_exits(self, text: str) -> None:
	"""Scan game text for direction words and add new ones as potential exits."""
	all_dirs = {
	"north", "south", "east", "west",
	"northeast", "northwest", "southeast", "southwest",
	"up", "down",
	}
	current_exits = set(self.room_exits.get(self.current_location, []))
	failed = self.failed_exits.get(self.current_location, set())
	for word in text.lower().split():
	clean = word.strip(".,;:!?\"'()[]")
	if clean in all_dirs and clean not in current_exits and clean not in failed:
	self.room_exits.setdefault(self.current_location, []).append(clean)
	current_exits.add(clean)

	def _build_prompt(self, observation: str, step: int, max_steps: int, valid_actions: str = "") -> str:
	"""Build the prompt for the LLM."""
	parts = []

	remaining = max_steps - step
	if remaining < 15:
	parts.append(f"!!! Only {remaining} steps left! Use items to score! !!!")

	parts.append(f"Score: {self.score}/{self.max_score} \| Step: {step}/{max_steps}")

	if self.visited_rooms:
	parts.append(f"Rooms visited: {len(self.visited_rooms)}")

	# Recent history (compact)
	if self.history:
	parts.append("\nRecent:")
	for entry in self.history[-5:]:
	if isinstance(entry["args"], dict) and "action" in entry["args"]:
	args_str = entry["args"]["action"]
	else:
	args_str = entry["tool"]
	flags = ""
	result_text = entry['result'][:60]
	if "SCORE_CHANGE" in result_text:
	flags = " [SCORED!]"
	elif "NO_EFFECT" in result_text:
	flags = " [NO_EFFECT]"
	parts.append(f" {args_str} -> {result_text}{flags}")

	# Warnings
	if len(self.recent_actions) >= 3:
	last3 = self.recent_actions[-3:]
	if len(set(last3)) == 1:
	parts.append(f"\n!!! STOP repeating '{last3[0]}'! Do something DIFFERENT! !!!")
	elif len(self.recent_actions) >= 4:
	last4 = self.recent_actions[-4:]
	if last4[0] == last4[2] and last4[1] == last4[3]:
	parts.append(f"\n!!! Back-and-forth loop. Go to a NEW room! !!!")

	if self.no_effect_count >= 2:
	parts.append(f"\n!!! {self.no_effect_count} actions had NO EFFECT. Try Key actions or move! !!!")

	if self.steps_since_score_change > 10:
	parts.append(f"\n!!! No score in {self.steps_since_score_change} steps! Move to new rooms or try new items! !!!")

	if self.steps_in_room >= 3:
	parts.append(f"\n!!! Stuck in this room for {self.steps_in_room} turns. Move to a DIFFERENT room! !!!")

	# Current observation
	parts.append(f"\n--- Current ---\n{observation}")

	# Valid actions (if not in observation)
	if valid_actions and "Exits:" not in observation and "Key actions:" in valid_actions:
	parts.append(f"\n--- Available ---\n{valid_actions}")

	return "\n".join(parts)

	def _parse_response(self, response: str) -> tuple[str, str, dict]:
	"""Parse LLM response to extract thought, tool, and arguments.

	Handles multiple formats for robustness with smaller models:
	- Standard: THOUGHT: / TOOL: / ARGS: {"action": "..."}
	- Bare action: ARGS: go north (no JSON)
	- ACTION: format: ACTION: go north
	- Fallback: extract any quoted action from response
	"""
	thought = "No reasoning"
	tool_name = "play_action"
	tool_args = {"action": "look"}
	found_tool = False
	found_args = False

	lines = response.strip().split("\n")

	for line in lines:
	line_clean = line.strip()
	line_upper = line_clean.upper()

	if line_upper.startswith("THOUGHT:"):
	thought = line_clean.split(":", 1)[1].strip()

	elif line_upper.startswith("TOOL:"):
	raw_tool = line_clean.split(":", 1)[1].strip()
	raw_tool = raw_tool.replace("*", "").replace("", "").replace("`", "").strip()
	raw_tool = raw_tool.split("(")[0].strip() # Handle tool(args) format
	raw_tool = raw_tool.split()[0] if raw_tool else "play_action"
	tool_name = raw_tool.lower()
	found_tool = True

	elif line_upper.startswith("ARGS:") or line_upper.startswith("ARG:") or line_upper.startswith("ARGUMENTS:"):
	args_part = line_clean.split(":", 1)[1].strip()
	found_args = True
	try:
	args_part_json = args_part.replace("'", '"')
	tool_args = json.loads(args_part_json)
	except json.JSONDecodeError:
	match = re.search(r'"action"\s:\s"([^"]+)"', args_part)
	if match:
	tool_args = {"action": match.group(1)}
	else:
	# Bare action string (e.g. "ARGS: go north")
	clean = args_part.strip().strip('"').strip("'").strip("{}")
	# Remove JSON-like remnants
	clean = re.sub(r'^action\s:\s', '', clean, flags=re.IGNORECASE)
	clean = clean.strip().strip('"').strip("'")
	if clean:
	tool_args = {"action": clean}

	elif line_upper.startswith("ACTION:") or line_upper.startswith("COMMAND:"):
	# Alternative format some smaller models use
	action_str = line_clean.split(":", 1)[1].strip()
	action_str = action_str.strip('"').strip("'").strip("`")
	if action_str:
	tool_name = "play_action"
	tool_args = {"action": action_str}
	found_tool = True
	found_args = True

	# Fallback: if no structured output found, try to extract an action
	if not found_args:
	# Try to find a quoted command in the response
	quoted = re.findall(r'"([^"]{2,40})"', response)
	if quoted:
	# Use the last quoted string as the action (usually the command)
	candidate = quoted[-1].lower().strip()
	if not any(w in candidate for w in ("thought", "tool", "args", "action")):
	tool_args = {"action": candidate}
	elif not found_tool:
	# Last resort: if response is just a bare game command (1-4 words)
	stripped = response.strip().split("\n")[-1].strip()
	stripped = stripped.strip('"').strip("'").strip("`").strip("*")
	words = stripped.split()
	if 1 <= len(words) <= 5 and len(stripped) < 50:
	tool_args = {"action": stripped.lower()}

	return thought, tool_name, tool_args

	def _validate_tool(self, tool_name: str, tool_args: dict) -> tuple[str, dict]:
	"""Fix common LLM mistakes in tool names."""
	tool_aliases = {
	"action": "play_action", "do": "play_action", "command": "play_action",
	"play": "play_action", "execute": "play_action", "game": "play_action",
	"send": "play_action", "act": "play_action",
	"valid_actions": "get_valid_actions", "validactions": "get_valid_actions",
	"actions": "get_valid_actions", "available": "get_valid_actions",
	"state_info": "get_state_info", "stateinfo": "get_state_info",
	"state": "get_state_info", "memory": "get_state_info", "status": "get_state_info",
	"info": "get_state_info", "check": "get_state_info",
	"map": "get_map", "navigation": "get_map", "rooms": "get_map",
	"inventory": "get_inventory", "inv": "get_inventory",
	"items": "get_inventory", "carrying": "get_inventory",
	"explore": "auto_explore_room", "explore_room": "auto_explore_room",
	"auto_explore": "auto_explore_room", "search": "auto_explore_room",
	}

	if tool_name not in VALID_TOOLS:
	tool_name = tool_aliases.get(tool_name, "play_action")

	if tool_name != "play_action":
	tool_args = {}

	if tool_name == "play_action":
	action = tool_args.get("action", "look")
	action = action.lower().strip()
	action = action.replace("*", "").replace("", "").replace("`", "")
	action = " ".join(action.split())
	tool_args = {"action": action}

	return tool_name, tool_args

	def _anti_loop_check(self, tool_name: str, tool_args: dict) -> tuple[str, dict]:
	"""Override the LLM's choice if a loop is detected."""
	# Info-tool loop: 2+ non-action tools in a row
	if tool_name != "play_action" and len(self.recent_tools) >= 2:
	if all(t != "play_action" for t in self.recent_tools[-2:]):
	return "play_action", {"action": "look"}

	if tool_name != "play_action":
	return tool_name, tool_args

	action = tool_args.get("action", "look")

	# Block dropping items
	if action.startswith("drop ") or action.startswith("throw "):
	return "get_valid_actions", {}
	if action.startswith("put ") and action.endswith(" down"):
	return "get_valid_actions", {}
	# Block rubbing torch/fire on things
	if " across " in action and ("torch" in action or "fire" in action):
	return "get_valid_actions", {}
	# Block "again" / "g" (repeat last) - can cause hidden loops
	if action in ("again", "g"):
	return "get_valid_actions", {}

	# Exact repeat (except look)
	if self.recent_actions and action == self.recent_actions[-1] and action != "look":
	return "get_valid_actions", {}

	# Back-and-forth: A, B, A, about to do B
	if len(self.recent_actions) >= 3:
	last3 = self.recent_actions[-3:]
	if last3[0] == last3[2] and action == last3[1]:
	return "get_state_info", {}

	# Too many no-effect actions
	if self.no_effect_count >= 3:
	self.no_effect_count = 0
	return "get_valid_actions", {}

	# Too many NPC conversation turns
	npc_keywords = ("ask ", "tell ", "talk ", "say ")
	if action.startswith(npc_keywords):
	npc_count = sum(1 for a in self.recent_actions[-5:] if a.startswith(npc_keywords))
	if npc_count >= 3:
	return "get_map", {}

	return tool_name, tool_args

	def _parse_status(self, text: str) -> None:
	"""Parse status info from tool responses."""
	loc_match = re.search(r'Location:\s*(.+)', text)
	if loc_match:
	new_loc = loc_match.group(1).strip()
	self.current_location = new_loc
	self.visited_rooms.add(new_loc)

	score_match = re.search(r'Score:\s*(\d+)/(\d+)', text)
	if score_match:
	new_score = int(score_match.group(1))
	self.max_score = int(score_match.group(2))
	if new_score > self.score:
	self.steps_since_score_change = 0
	else:
	self.steps_since_score_change += 1
	self.score = new_score

	if "NO_EFFECT" in text:
	self.no_effect_count += 1
	else:
	self.no_effect_count = 0

	def _extract_result(self, result) -> str:
	"""Extract text from MCP tool result."""
	if hasattr(result, 'content') and result.content:
	return result.content[0].text
	if isinstance(result, list) and result:
	return result[0].text if hasattr(result[0], 'text') else str(result[0])
	return str(result)

	def _is_game_over(self, text: str) -> bool:
	"""Check if the game is over."""
	game_over_phrases = [
	"game over", "you have died", "you are dead",
	"* you have died *",
	]
	return any(phrase in text.lower() for phrase in game_over_phrases)


	# =============================================================================
	# Local Testing
	# =============================================================================

	async def test_agent():
	"""Test the agent locally."""
	from fastmcp import Client

	agent = StudentAgent()

	async with Client("mcp_server.py") as client:
	result = await agent.run(
	client=client,
	game="lostpig",
	max_steps=50,
	seed=42,
	verbose=True,
	)

	print(f"\n{'=' * 50}")
	print(f"Final Score: {result.final_score}/{result.max_score}")
	print(f"Moves: {result.moves}")
	print(f"Locations: {len(result.locations_visited)}")


	if __name__ == "__main__":
	import asyncio
	asyncio.run(test_agent())