Mtanre's picture
Submit text adventure agent
83e130e
"""
ReAct Agent for Text Adventure Games
Uses MCP tools (including Jericho-powered valid actions) to play
text adventure games with reasoning, loop detection, and exploration strategy.
"""
import json
import os
import re
from collections import deque
from dataclasses import dataclass, field
from typing import Optional
from dotenv import load_dotenv
from huggingface_hub import InferenceClient
load_dotenv()
# =============================================================================
# LLM Configuration - DO NOT MODIFY
# =============================================================================
LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"
_hf_token = os.getenv("HF_TOKEN")
if not _hf_token:
raise ValueError("HF_TOKEN not found. Set it in your .env file.")
LLM_CLIENT = InferenceClient(token=_hf_token, provider="novita")
def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str:
"""Call the LLM with the given prompt."""
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt},
]
response = LLM_CLIENT.chat.completions.create(
model=LLM_MODEL,
messages=messages,
temperature=0.15,
max_tokens=max_tokens,
seed=seed,
)
return response.choices[0].message.content
@dataclass
class RunResult:
"""Result of running the agent. Do not modify this class."""
final_score: int
max_score: int
moves: int
locations_visited: set[str]
game_completed: bool
error: Optional[str] = None
history: list[tuple[str, str, str]] = field(default_factory=list)
# =============================================================================
# Constants
# =============================================================================
MOVEMENT_COMMANDS = {
"north", "south", "east", "west", "up", "down",
"n", "s", "e", "w", "u", "d",
"enter", "exit", "in", "out",
"northeast", "northwest", "southeast", "southwest",
"ne", "nw", "se", "sw",
"go north", "go south", "go east", "go west",
"go up", "go down", "go northeast", "go northwest",
"go southeast", "go southwest",
}
# =============================================================================
# System Prompt
# =============================================================================
SYSTEM_PROMPT = """You play text adventure games. Your goal: maximize score.
Rooms are auto-explored for you (items taken, containers opened, objects examined).
Your job: solve puzzles the auto-explorer can't.
AVAILABLE TOOLS:
- play_action: Run a game command. Example: ARGS: {"action": "put gem in slot"}
- get_valid_actions: See what commands work here. FREE, no move cost.
- get_state_info: Check score, inventory, history. FREE.
- get_map: See room connections. FREE.
- get_inventory: Check items carried. FREE.
HOW TO SCORE POINTS:
1. Use items on things: "put X in Y", "give X to Y", "insert X in Y", "unlock Y with X"
2. Try Key actions from the list (copy exactly)
3. Read clues, follow instructions found in game text
4. Move to new rooms you haven't visited
RULES:
- NEVER drop items. NEVER "put X down". NEVER throw items away.
- If an action had NO_EFFECT, do NOT repeat it.
- One action per turn.
RESPONSE FORMAT (follow exactly):
THOUGHT: <your reasoning>
TOOL: play_action
ARGS: {"action": "<command>"}"""
# =============================================================================
# Agent Implementation
# =============================================================================
VALID_TOOLS = {
"play_action", "auto_explore_room", "get_valid_actions",
"get_state_info", "get_map", "get_inventory",
}
class StudentAgent:
"""ReAct agent with auto-exploration, auto-navigation, and loop detection."""
def __init__(self):
self.history: list[dict] = []
self.recent_actions: list[str] = []
self.recent_tools: list[str] = []
self.score: int = 0
self.max_score: int = 0
self.current_location: str = ""
self.no_effect_count: int = 0
self.steps_since_score_change: int = 0
self.visited_rooms: set[str] = set()
# Navigation tracking
self.tried_exits: dict[str, set[str]] = {} # room -> tried directions
self.room_exits: dict[str, list[str]] = {} # room -> available exits
self.room_graph: dict[str, dict[str, str]] = {} # room -> {dir -> dest}
self.failed_exits: dict[str, set[str]] = {} # room -> directions that don't change room
self.steps_in_room: int = 0
# Inventory tracking for smart re-exploration
self.inventory_version: int = 0 # increments on inventory change
self.room_explored_at_inv: dict[str, int] = {} # room -> inv_version when last explored
self.last_inventory_str: str = ""
# Key action tracking for smarter stagnation handling
self.key_actions_by_room: dict[str, list[str]] = {}
self.tried_actions_by_room: dict[str, set[str]] = {}
self.room_visit_count: dict[str, int] = {}
async def run(
self,
client,
game: str,
max_steps: int,
seed: int,
verbose: bool = False,
) -> RunResult:
"""Run the agent for a game session."""
locations_visited = set()
history = []
moves = 0
# Step 0: Initial look
result = await client.call_tool("play_action", {"action": "look"})
observation = self._extract_result(result)
self._parse_status(observation)
locations_visited.add(self.current_location)
if verbose:
print(f"\n{observation}")
# Step 0.5: Auto-explore starting room
result = await client.call_tool("auto_explore_room", {})
explore_text = self._extract_result(result)
self._parse_status(explore_text)
self._store_exits(explore_text)
self._update_inventory_from_text(explore_text)
self.room_explored_at_inv[self.current_location] = self.inventory_version
observation = f"[Room auto-explored]\n{explore_text}"
last_valid_actions = explore_text
if verbose:
print(f"\n[AUTO-EXPLORE]\n{explore_text}")
for step in range(1, max_steps + 1):
old_location = self.current_location
# Check for untried exits -> auto-navigate (including BFS)
untried = self._find_unexplored_exit()
# Force movement if stuck in room without scoring
if not untried and self.steps_in_room >= 3 and self.steps_since_score_change >= 3:
exits = self.room_exits.get(self.current_location, [])
failed = self.failed_exits.get(self.current_location, set())
valid_exits = [e for e in exits if e not in failed]
if valid_exits:
# Prefer exits to least-visited rooms
best_exit = self._pick_least_visited_exit(valid_exits)
if best_exit:
untried = best_exit
is_auto_nav = bool(untried)
if untried:
tool_name = "play_action"
tool_args = {"action": untried}
thought = f"Auto-navigating: {untried}"
if verbose:
print(f"\n--- Step {step}/{max_steps} [AUTO-NAV] ---")
print(f"[ACTION] {untried}")
elif self.steps_since_score_change > 0 and self.steps_since_score_change % 6 == 0:
# Every 6 stagnant steps, try untried key actions, re-explore, or move
key_acts = self.key_actions_by_room.get(self.current_location, [])
tried = self.tried_actions_by_room.get(self.current_location, set())
untried_keys = [a for a in key_acts if a.lower() not in tried
and a.lower() not in MOVEMENT_COMMANDS
and not a.lower().startswith(("examine ", "look ", "read ", "search "))]
if untried_keys:
tool_name = "play_action"
tool_args = {"action": untried_keys[0]}
thought = f"Stagnation: trying untried key action"
is_auto_nav = True
if verbose:
print(f"\n--- Step {step}/{max_steps} [STAGNATION KEY-ACTION: {untried_keys[0]}] ---")
elif self.steps_in_room >= 4:
# Force move to least-visited adjacent room
exits = self.room_exits.get(self.current_location, [])
failed = self.failed_exits.get(self.current_location, set())
valid_exits = [e for e in exits if e not in failed]
best = self._pick_least_visited_exit(valid_exits) if valid_exits else None
if best:
tool_name = "play_action"
tool_args = {"action": best}
thought = f"Stagnation: moving to least-visited room"
is_auto_nav = True
if verbose:
print(f"\n--- Step {step}/{max_steps} [STAGNATION MOVE: {best}] ---")
else:
tool_name = "auto_explore_room"
tool_args = {}
thought = "Re-exploring room after stagnation"
is_auto_nav = True
if verbose:
print(f"\n--- Step {step}/{max_steps} [STAGNATION RE-EXPLORE] ---")
else:
tool_name = "auto_explore_room"
tool_args = {}
thought = "Re-exploring room after stagnation"
is_auto_nav = True
if verbose:
print(f"\n--- Step {step}/{max_steps} [STAGNATION RE-EXPLORE] ---")
else:
# No unexplored exits - ask LLM for puzzle-solving
prompt = self._build_prompt(observation, step, max_steps, last_valid_actions)
response = call_llm(prompt, SYSTEM_PROMPT, seed + step, max_tokens=250)
thought, tool_name, tool_args = self._parse_response(response)
if verbose:
print(f"\n--- Step {step}/{max_steps} ---")
print(f"[THOUGHT] {thought}")
print(f"[TOOL] {tool_name}({tool_args})")
tool_name, tool_args = self._validate_tool(tool_name, tool_args)
tool_name, tool_args = self._anti_loop_check(tool_name, tool_args)
# Track tool calls
self.recent_tools.append(tool_name)
if len(self.recent_tools) > 5:
self.recent_tools = self.recent_tools[-5:]
# Track play_action
if tool_name == "play_action":
action = tool_args.get("action", "look")
self.recent_actions.append(action)
if len(self.recent_actions) > 10:
self.recent_actions = self.recent_actions[-10:]
moves += 1
# Record tried exit
if action.lower() in MOVEMENT_COMMANDS:
self.tried_exits.setdefault(self.current_location, set()).add(action.lower())
# Track tried action for stagnation key-action cycling
self.tried_actions_by_room.setdefault(self.current_location, set()).add(action.lower())
# Execute tool
try:
result = await client.call_tool(tool_name, tool_args)
observation = self._extract_result(result)
except Exception as e:
observation = f"Error: {e}. Try a different action."
if verbose:
print(f"[RESULT] {observation[:300]}")
# Parse status
self._parse_status(observation)
locations_visited.add(self.current_location)
# Don't let auto-nav NO_EFFECTs pollute LLM loop detection
if is_auto_nav:
self.no_effect_count = 0
# Track time spent in current room
if self.current_location != old_location:
self.steps_in_room = 0
self.room_visit_count[self.current_location] = self.room_visit_count.get(self.current_location, 0) + 1
else:
self.steps_in_room += 1
# Update room graph and track failed movements
if tool_name == "play_action":
action = tool_args.get("action", "")
if action.lower() in MOVEMENT_COMMANDS:
if self.current_location != old_location:
self.room_graph.setdefault(old_location, {})[action.lower()] = self.current_location
else:
# Movement didn't change room - mark as failed exit
self.failed_exits.setdefault(old_location, set()).add(action.lower())
# Scan game response for new direction words
self._scan_for_new_exits(observation)
# Auto-explore new rooms
if "NEW_ROOM" in observation:
try:
ae_result = await client.call_tool("auto_explore_room", {})
ae_text = self._extract_result(ae_result)
self._parse_status(ae_text)
self._store_exits(ae_text)
self._update_inventory_from_text(ae_text)
self.room_explored_at_inv[self.current_location] = self.inventory_version
last_valid_actions = ae_text
observation = f"[New room auto-explored]\n{ae_text}"
if verbose:
print(f"[AUTO-EXPLORE]\n{ae_text[:300]}")
except Exception:
pass
elif "REVISITED" in observation and tool_name == "play_action":
# Only re-explore if inventory changed since last exploration of this room
last_inv = self.room_explored_at_inv.get(self.current_location, -1)
if self.inventory_version > last_inv:
try:
ae_result = await client.call_tool("auto_explore_room", {})
ae_text = self._extract_result(ae_result)
self._parse_status(ae_text)
self._store_exits(ae_text)
self._update_inventory_from_text(ae_text)
self.room_explored_at_inv[self.current_location] = self.inventory_version
last_valid_actions = ae_text
observation = f"[Revisited room re-explored]\n{ae_text}"
if verbose:
print(f"[RE-EXPLORE]\n{ae_text[:300]}")
except Exception:
pass
elif tool_name == "get_valid_actions":
last_valid_actions = observation
self._store_exits(observation)
elif tool_name == "auto_explore_room":
last_valid_actions = observation
self._store_exits(observation)
self._update_inventory_from_text(observation)
# Update history
self.history.append({
"step": step,
"thought": thought,
"tool": tool_name,
"args": tool_args,
"result": observation[:200],
})
history.append((thought, f"{tool_name}({tool_args})", observation[:100]))
# Check game over
if "GAME OVER" in observation or self._is_game_over(observation):
if verbose:
print("\n*** GAME OVER ***")
break
return RunResult(
final_score=self.score,
max_score=self.max_score if self.max_score > 0 else 350,
moves=moves,
locations_visited=locations_visited,
game_completed=self._is_game_over(observation),
history=history,
)
def _update_inventory_from_text(self, text: str) -> None:
"""Track inventory changes from explore/action results."""
inv_match = re.search(r'Inventory:\s*(.+)', text)
if inv_match:
inv_str = inv_match.group(1).strip()
if inv_str != self.last_inventory_str:
self.last_inventory_str = inv_str
self.inventory_version += 1
def _store_exits(self, text: str) -> None:
"""Parse and store available exits and key actions from responses."""
exits = []
for line in text.split("\n"):
line_stripped = line.strip()
if line_stripped.startswith("Exits:") or line_stripped.startswith("Movement:"):
exits_str = line_stripped.split(":", 1)[1].strip()
if exits_str and exits_str != "none":
exits = [e.strip().lower() for e in exits_str.split(",") if e.strip()]
elif line_stripped.startswith("Key actions:"):
actions_str = line_stripped.split(":", 1)[1].strip()
if actions_str and self.current_location:
actions = [a.strip() for a in actions_str.split(",") if a.strip()]
if actions:
self.key_actions_by_room[self.current_location] = actions
# Also extract directions from Key actions (e.g. "get in southwest")
for action in actions_str.split(","):
action = action.strip().lower()
for prefix in ("get in ", "go "):
if action.startswith(prefix):
dir_part = action[len(prefix):].strip()
if dir_part in MOVEMENT_COMMANDS:
exits.append(dir_part)
if exits and self.current_location:
self.room_exits[self.current_location] = exits
def _find_unexplored_exit(self) -> str | None:
"""Find an untried exit from current room, or BFS navigate toward one."""
# Direct unexplored exit from current room
available = self.room_exits.get(self.current_location, [])
tried = self.tried_exits.get(self.current_location, set())
failed = self.failed_exits.get(self.current_location, set())
for exit_dir in available:
if exit_dir not in tried and exit_dir not in failed:
return exit_dir
# BFS to find nearest room with unexplored exits
visited_bfs = {self.current_location}
queue = deque()
# Seed with known connections from current room
for direction, dest in self.room_graph.get(self.current_location, {}).items():
if dest not in visited_bfs:
visited_bfs.add(dest)
queue.append((dest, direction)) # (room, first_step_to_get_there)
while queue:
room, first_step = queue.popleft()
# Check if this room has unexplored exits
room_available = self.room_exits.get(room, [])
room_tried = self.tried_exits.get(room, set())
room_failed = self.failed_exits.get(room, set())
for exit_dir in room_available:
if exit_dir not in room_tried and exit_dir not in room_failed:
return first_step # Navigate toward this room
# Expand through known connections
for direction, dest in self.room_graph.get(room, {}).items():
if dest not in visited_bfs:
visited_bfs.add(dest)
queue.append((dest, first_step))
return None
def _pick_least_visited_exit(self, valid_exits: list[str]) -> str | None:
"""Pick exit leading to the least-visited room."""
graph = self.room_graph.get(self.current_location, {})
best_exit = None
min_visits = float('inf')
last_action = self.recent_actions[-1] if self.recent_actions else ""
for e in valid_exits:
if e == last_action:
continue # Don't go back immediately
dest = graph.get(e)
if dest:
visits = self.room_visit_count.get(dest, 0)
if visits < min_visits:
min_visits = visits
best_exit = e
else:
# Unknown destination - prefer this (unexplored)
return e
return best_exit or (valid_exits[0] if valid_exits else None)
def _scan_for_new_exits(self, text: str) -> None:
"""Scan game text for direction words and add new ones as potential exits."""
all_dirs = {
"north", "south", "east", "west",
"northeast", "northwest", "southeast", "southwest",
"up", "down",
}
current_exits = set(self.room_exits.get(self.current_location, []))
failed = self.failed_exits.get(self.current_location, set())
for word in text.lower().split():
clean = word.strip(".,;:!?\"'()[]")
if clean in all_dirs and clean not in current_exits and clean not in failed:
self.room_exits.setdefault(self.current_location, []).append(clean)
current_exits.add(clean)
def _build_prompt(self, observation: str, step: int, max_steps: int, valid_actions: str = "") -> str:
"""Build the prompt for the LLM."""
parts = []
remaining = max_steps - step
if remaining < 15:
parts.append(f"!!! Only {remaining} steps left! Use items to score! !!!")
parts.append(f"Score: {self.score}/{self.max_score} | Step: {step}/{max_steps}")
if self.visited_rooms:
parts.append(f"Rooms visited: {len(self.visited_rooms)}")
# Recent history (compact)
if self.history:
parts.append("\nRecent:")
for entry in self.history[-5:]:
if isinstance(entry["args"], dict) and "action" in entry["args"]:
args_str = entry["args"]["action"]
else:
args_str = entry["tool"]
flags = ""
result_text = entry['result'][:60]
if "SCORE_CHANGE" in result_text:
flags = " [SCORED!]"
elif "NO_EFFECT" in result_text:
flags = " [NO_EFFECT]"
parts.append(f" {args_str} -> {result_text}{flags}")
# Warnings
if len(self.recent_actions) >= 3:
last3 = self.recent_actions[-3:]
if len(set(last3)) == 1:
parts.append(f"\n!!! STOP repeating '{last3[0]}'! Do something DIFFERENT! !!!")
elif len(self.recent_actions) >= 4:
last4 = self.recent_actions[-4:]
if last4[0] == last4[2] and last4[1] == last4[3]:
parts.append(f"\n!!! Back-and-forth loop. Go to a NEW room! !!!")
if self.no_effect_count >= 2:
parts.append(f"\n!!! {self.no_effect_count} actions had NO EFFECT. Try Key actions or move! !!!")
if self.steps_since_score_change > 10:
parts.append(f"\n!!! No score in {self.steps_since_score_change} steps! Move to new rooms or try new items! !!!")
if self.steps_in_room >= 3:
parts.append(f"\n!!! Stuck in this room for {self.steps_in_room} turns. Move to a DIFFERENT room! !!!")
# Current observation
parts.append(f"\n--- Current ---\n{observation}")
# Valid actions (if not in observation)
if valid_actions and "Exits:" not in observation and "Key actions:" in valid_actions:
parts.append(f"\n--- Available ---\n{valid_actions}")
return "\n".join(parts)
def _parse_response(self, response: str) -> tuple[str, str, dict]:
"""Parse LLM response to extract thought, tool, and arguments.
Handles multiple formats for robustness with smaller models:
- Standard: THOUGHT: / TOOL: / ARGS: {"action": "..."}
- Bare action: ARGS: go north (no JSON)
- ACTION: format: ACTION: go north
- Fallback: extract any quoted action from response
"""
thought = "No reasoning"
tool_name = "play_action"
tool_args = {"action": "look"}
found_tool = False
found_args = False
lines = response.strip().split("\n")
for line in lines:
line_clean = line.strip()
line_upper = line_clean.upper()
if line_upper.startswith("THOUGHT:"):
thought = line_clean.split(":", 1)[1].strip()
elif line_upper.startswith("TOOL:"):
raw_tool = line_clean.split(":", 1)[1].strip()
raw_tool = raw_tool.replace("**", "").replace("*", "").replace("`", "").strip()
raw_tool = raw_tool.split("(")[0].strip() # Handle tool(args) format
raw_tool = raw_tool.split()[0] if raw_tool else "play_action"
tool_name = raw_tool.lower()
found_tool = True
elif line_upper.startswith("ARGS:") or line_upper.startswith("ARG:") or line_upper.startswith("ARGUMENTS:"):
args_part = line_clean.split(":", 1)[1].strip()
found_args = True
try:
args_part_json = args_part.replace("'", '"')
tool_args = json.loads(args_part_json)
except json.JSONDecodeError:
match = re.search(r'"action"\s*:\s*"([^"]+)"', args_part)
if match:
tool_args = {"action": match.group(1)}
else:
# Bare action string (e.g. "ARGS: go north")
clean = args_part.strip().strip('"').strip("'").strip("{}")
# Remove JSON-like remnants
clean = re.sub(r'^action\s*:\s*', '', clean, flags=re.IGNORECASE)
clean = clean.strip().strip('"').strip("'")
if clean:
tool_args = {"action": clean}
elif line_upper.startswith("ACTION:") or line_upper.startswith("COMMAND:"):
# Alternative format some smaller models use
action_str = line_clean.split(":", 1)[1].strip()
action_str = action_str.strip('"').strip("'").strip("`")
if action_str:
tool_name = "play_action"
tool_args = {"action": action_str}
found_tool = True
found_args = True
# Fallback: if no structured output found, try to extract an action
if not found_args:
# Try to find a quoted command in the response
quoted = re.findall(r'"([^"]{2,40})"', response)
if quoted:
# Use the last quoted string as the action (usually the command)
candidate = quoted[-1].lower().strip()
if not any(w in candidate for w in ("thought", "tool", "args", "action")):
tool_args = {"action": candidate}
elif not found_tool:
# Last resort: if response is just a bare game command (1-4 words)
stripped = response.strip().split("\n")[-1].strip()
stripped = stripped.strip('"').strip("'").strip("`").strip("*")
words = stripped.split()
if 1 <= len(words) <= 5 and len(stripped) < 50:
tool_args = {"action": stripped.lower()}
return thought, tool_name, tool_args
def _validate_tool(self, tool_name: str, tool_args: dict) -> tuple[str, dict]:
"""Fix common LLM mistakes in tool names."""
tool_aliases = {
"action": "play_action", "do": "play_action", "command": "play_action",
"play": "play_action", "execute": "play_action", "game": "play_action",
"send": "play_action", "act": "play_action",
"valid_actions": "get_valid_actions", "validactions": "get_valid_actions",
"actions": "get_valid_actions", "available": "get_valid_actions",
"state_info": "get_state_info", "stateinfo": "get_state_info",
"state": "get_state_info", "memory": "get_state_info", "status": "get_state_info",
"info": "get_state_info", "check": "get_state_info",
"map": "get_map", "navigation": "get_map", "rooms": "get_map",
"inventory": "get_inventory", "inv": "get_inventory",
"items": "get_inventory", "carrying": "get_inventory",
"explore": "auto_explore_room", "explore_room": "auto_explore_room",
"auto_explore": "auto_explore_room", "search": "auto_explore_room",
}
if tool_name not in VALID_TOOLS:
tool_name = tool_aliases.get(tool_name, "play_action")
if tool_name != "play_action":
tool_args = {}
if tool_name == "play_action":
action = tool_args.get("action", "look")
action = action.lower().strip()
action = action.replace("**", "").replace("*", "").replace("`", "")
action = " ".join(action.split())
tool_args = {"action": action}
return tool_name, tool_args
def _anti_loop_check(self, tool_name: str, tool_args: dict) -> tuple[str, dict]:
"""Override the LLM's choice if a loop is detected."""
# Info-tool loop: 2+ non-action tools in a row
if tool_name != "play_action" and len(self.recent_tools) >= 2:
if all(t != "play_action" for t in self.recent_tools[-2:]):
return "play_action", {"action": "look"}
if tool_name != "play_action":
return tool_name, tool_args
action = tool_args.get("action", "look")
# Block dropping items
if action.startswith("drop ") or action.startswith("throw "):
return "get_valid_actions", {}
if action.startswith("put ") and action.endswith(" down"):
return "get_valid_actions", {}
# Block rubbing torch/fire on things
if " across " in action and ("torch" in action or "fire" in action):
return "get_valid_actions", {}
# Block "again" / "g" (repeat last) - can cause hidden loops
if action in ("again", "g"):
return "get_valid_actions", {}
# Exact repeat (except look)
if self.recent_actions and action == self.recent_actions[-1] and action != "look":
return "get_valid_actions", {}
# Back-and-forth: A, B, A, about to do B
if len(self.recent_actions) >= 3:
last3 = self.recent_actions[-3:]
if last3[0] == last3[2] and action == last3[1]:
return "get_state_info", {}
# Too many no-effect actions
if self.no_effect_count >= 3:
self.no_effect_count = 0
return "get_valid_actions", {}
# Too many NPC conversation turns
npc_keywords = ("ask ", "tell ", "talk ", "say ")
if action.startswith(npc_keywords):
npc_count = sum(1 for a in self.recent_actions[-5:] if a.startswith(npc_keywords))
if npc_count >= 3:
return "get_map", {}
return tool_name, tool_args
def _parse_status(self, text: str) -> None:
"""Parse status info from tool responses."""
loc_match = re.search(r'Location:\s*(.+)', text)
if loc_match:
new_loc = loc_match.group(1).strip()
self.current_location = new_loc
self.visited_rooms.add(new_loc)
score_match = re.search(r'Score:\s*(\d+)/(\d+)', text)
if score_match:
new_score = int(score_match.group(1))
self.max_score = int(score_match.group(2))
if new_score > self.score:
self.steps_since_score_change = 0
else:
self.steps_since_score_change += 1
self.score = new_score
if "NO_EFFECT" in text:
self.no_effect_count += 1
else:
self.no_effect_count = 0
def _extract_result(self, result) -> str:
"""Extract text from MCP tool result."""
if hasattr(result, 'content') and result.content:
return result.content[0].text
if isinstance(result, list) and result:
return result[0].text if hasattr(result[0], 'text') else str(result[0])
return str(result)
def _is_game_over(self, text: str) -> bool:
"""Check if the game is over."""
game_over_phrases = [
"game over", "you have died", "you are dead",
"*** you have died ***",
]
return any(phrase in text.lower() for phrase in game_over_phrases)
# =============================================================================
# Local Testing
# =============================================================================
async def test_agent():
"""Test the agent locally."""
from fastmcp import Client
agent = StudentAgent()
async with Client("mcp_server.py") as client:
result = await agent.run(
client=client,
game="lostpig",
max_steps=50,
seed=42,
verbose=True,
)
print(f"\n{'=' * 50}")
print(f"Final Score: {result.final_score}/{result.max_score}")
print(f"Moves: {result.moves}")
print(f"Locations: {len(result.locations_visited)}")
if __name__ == "__main__":
import asyncio
asyncio.run(test_agent())