Spaces:

LLM-course
/

Text-game-agent-EILLES

Running

App Files Files Community

Text-game-agent-EILLES / mcp_server.py

stephecw

Upload 6 files

e4885d4 verified 22 days ago

raw

history blame contribute delete

28.3 kB

	"""
	Student MCP Server for Text Adventure Games

	This is your MCP server submission. Implement the tools that your agent
	will use to play text adventure games.

	Required tool:
	play_action(action: str) -> str
	Execute a game command and return the result.

	Recommended tools:
	memory() -> str
	Return current game state, score, and recent history.

	inventory() -> str
	Return the player's current inventory.

	get_map() -> str
	Return a map of explored locations.

	Test your server with:
	fastmcp dev submission_template/mcp_server.py

	Then open the MCP Inspector in your browser to test the tools interactively.
	"""

	import sys
	import os
	import re
	from collections import defaultdict
	import json
	import hashlib
	from copy import deepcopy

	# Add parent directory to path to import games module
	sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

	from fastmcp import FastMCP
	from games.zork_env import TextAdventureEnv


	# =============================================================================
	# Create the MCP Server
	# =============================================================================

	mcp = FastMCP("Student Text Adventure Server")


	# =============================================================================
	# Game State Management
	# =============================================================================

	class GameManager:
	"""
	Manages the text adventure game state.

	TODO: Extend this class to track:
	- Action history (for memory tool)
	- Explored locations (for mapping)
	- Current score and moves
	"""

	def __init__(self):
	self.env: TextAdventureEnv = None
	self.state = None
	self.game_name: str = ""

	# History
	self.max_history = 50 # Max number of recent actions to store
	self.history: list[tuple[str, str]] = []

	# checkpoints
	self.checkpoints = {} # name -> opaque state snapshot
	self.last_reward = 0


	# Map tracking
	self.locations = set() # Set of explored locations
	self.current_location: str \| None = None

	# Transions
	self.transitions = defaultdict(dict) # location -> action -> new_location

	# Action tracking
	self.actions_tried_by_location = defaultdict(list) # location -> list of actions tried
	self._actions_tried_set = defaultdict(set)

	def initialize(self, game: str = "zork1"):
	"""Initialize or reset the game."""
	self.game_name = game
	self.env = TextAdventureEnv(game)
	self.state = self.env.reset()

	# reset tracking
	self.history = []
	self.locations = set()
	self.transitions = defaultdict(dict)
	self.actions_tried_by_location = defaultdict(list)
	self._actions_tried_set = defaultdict(set)

	# set initial location
	obs = (self.state.observation or "")
	self.current_location = self._extract_location(obs)
	if self.current_location:
	self.locations.add(self.current_location)
	return obs


	def step(self, action: str) -> str:
	"""Execute an action and return the result."""
	if self.env is None:
	self.initialize()

	action_clean = (action or "").strip().lower()

	from_location = self.current_location

	# track "action tried" in the room
	if from_location and action_clean not in self._actions_tried_set[from_location]:
	self.actions_tried_by_location[from_location].append(action_clean)
	self._actions_tried_set[from_location].add(action_clean)

	#Execute the requested action
	self.state = self.env.step(action)
	raw_obs = self.state.observation or ""

	# No forced look: avoid consuming extra moves
	result_obs = raw_obs

	# Track history (single action only)
	self.history.append((action, result_obs))

	# Cap history
	while len(self.history) > self.max_history:
	self.history.pop(0)

	# update last reward
	try:
	self.last_reward = getattr(self.state, "reward", 0) or 0
	except Exception:
	self.last_reward = 0

	# Track locations + transitions using the best available observation (result_obs)
	new_location = self._extract_location(result_obs)
	if new_location:
	self.locations.add(new_location)

	# Record transition only if location actually changed
	if from_location and new_location != from_location:
	# store canonical mapping: from -> action -> to (overwrite is OK)
	self.transitions[from_location][action_clean] = new_location

	# update current location
	self.current_location = new_location

	return result_obs

	def _extract_location(self, observation: str) -> str \| None:
	"""Extract the current location name from the observation text."""
	# This is a heuristic that works for Zork and similar games where the location is in ALL CAPS at the start
	if not observation:
	return None

	for line in observation.splitlines():
	s = line.strip()
	if not s:
	continue

	low = s.lower()

	# filter common non-room headers / system lines
	if low.startswith("copyright"):
	continue
	if "trademark" in low:
	continue
	if low.startswith("revision"):
	continue
	if low.startswith("serial number"):
	continue
	if "revision" in low and "serial" in low:
	continue

	# room titles in Zork are typically short and NOT full sentences
	if len(s) > 50:
	continue
	if s.endswith((".", "!", "?", ":", ";")):
	continue

	# also avoid lines that look like status messages
	bad_starts = (
	"you ", "it ", "i ", "there ", "the ", "a ", "an ",
	"what ", "can't ", "i don't", "unknown", "error"
	)
	if low.startswith(bad_starts):
	continue

	return s

	return None

	def get_memory(self, last_k: int = 10) -> str:
	"""Return a short summary of state + recent history."""
	loc = self.current_location or "Unknown"
	score = self.get_score()
	moves = self.get_moves()
	obs = (self.state.observation or "").strip() if self.state else ""

	recent = self.history[-last_k:] if self.history else []
	if recent:
	recent_lines = "\n".join(
	f"- {a} -> {(o.splitlines()[0] if o else '')}"
	for a, o in recent
	)
	else:
	recent_lines = "(none)"

	return (
	f"Game: {self.game_name}\n"
	f"Location: {loc}\n"
	f"Score: {score}\n"
	f"Moves: {moves}\n\n"
	f"Recent actions:\n{recent_lines}\n\n"
	f"Last observation:\n{obs}"
	)

	def get_score(self) -> int:
	"""Get current score."""
	return self.state.score if self.state else 0

	def get_moves(self) -> int:
	"""Get number of moves taken."""
	return self.state.moves if self.state else 0

	def get_map(self) -> str:
	"""Return a simple text map of explored locations with action-labeled transitions."""
	if not self.locations:
	return "No locations explored yet."

	lines = [f"Current location: {self.current_location or 'Unknown'}", ""]

	lines.append("Explored locations:")
	for loc in sorted(self.locations):
	lines.append(f"- {loc}")

	lines.append("")
	lines.append("Transitions (from --action--> to):")

	any_edge = False
	for frm in sorted(self.transitions.keys()):
	for act, to in sorted(self.transitions[frm].items()):
	any_edge = True
	lines.append(f"- {frm} --{act}--> {to}")

	if not any_edge:
	lines.append("- (none yet)")

	return "\n".join(lines)


	def _item_name(self, item) -> str:
	"""Best-effort: extract a human-friendly name from a Jericho item object."""
	for attr in ("name", "label", "noun", "text"):
	v = getattr(item, attr, None)
	if isinstance(v, str) and v.strip():
	return v.strip()

	s = str(item)
	m = re.search(r"Obj\d+:\s*([^\s]+)", s)
	if m:
	return m.group(1)

	return s.strip() if s.strip() else "unknown"


	def get_inventory(self) -> str:
	"""
	Return inventory WITHOUT advancing the game (does not call env.step).
	If state.inventory doesn't exist, returns a fallback message.
	"""
	if not self.state:
	return "Inventory not available (game not initialized)."

	inv = getattr(self.state, "inventory", None)

	# Case 0: inventory exposed as a string
	if isinstance(inv, str):
	return inv.strip() if inv.strip() else "You are not carrying anything."

	# Case 1: inventory exposed as list/tuple of objects
	if isinstance(inv, (list, tuple)):
	if len(inv) == 0:
	return "You are not carrying anything."
	pretty = [self._item_name(x) for x in inv]
	return "You are carrying:\n" + "\n".join(f"- {name}" for name in pretty)

	return "Inventory not available from state (no state.inventory)."


	def get_valid_actions(self, max_actions: int = 30) -> str:
	try:
	# Option A: wrapper exposes it
	if self.env is not None and hasattr(self.env, "get_valid_actions"):
	valid = self.env.get_valid_actions()
	# Option B: underlying Jericho env
	elif self.env is not None and hasattr(self.env, "env") and hasattr(self.env.env, "get_valid_actions"):
	valid = self.env.env.get_valid_actions()
	else:
	valid = None

	if isinstance(valid, (list, tuple)) and valid:
	valid = [str(v) for v in valid][:max_actions]
	return "Valid actions:\n" + "\n".join(f"- {v}" for v in valid)
	except Exception:
	pass

	return (
	"Valid actions (fallback):\n"
	"- look\n- inventory\n- north/south/east/west/up/down/in/out\n"
	"- take <noun>\n- drop <noun>\n- open <noun>\n- examine <noun>\n- read <noun>\n"
	)


	def get_actions_tried(self, limit_per_room: int = 50) -> str:
	"""Return actions tried per location (most recent last)."""
	if not self.actions_tried_by_location:
	return "No actions tracked yet."

	lines = [
	f"Current location: {self.current_location or 'Unknown'}",
	"",
	"Actions tried by location:",
	]

	for loc in sorted(self.actions_tried_by_location.keys()):
	acts = self.actions_tried_by_location[loc]
	if not acts:
	continue
	shown = acts[-limit_per_room:]
	lines.append(f"- {loc}:")
	for a in shown:
	lines.append(f" - {a}")

	return "\n".join(lines)

	def _snapshot(self):
	"""
	Best-effort snapshot. Tries env/state native methods if available, else deepcopies state.
	"""
	if self.env is None:
	return None

	# 1) Native env snapshot if exists
	for obj in (self.env, getattr(self.env, "env", None)):
	if obj is None:
	continue
	if hasattr(obj, "get_state") and callable(obj.get_state):
	try:
	return ("native", obj.get_state())
	except Exception:
	pass

	# 2) Fallback: deepcopy state object (works often, not always)
	try:
	return ("deepcopy", deepcopy(self.state))
	except Exception:
	# 3) Last resort: keep nothing (restore impossible)
	return ("none", None)


	def _restore_snapshot(self, snap):
	"""
	Best-effort restore snapshot created by _snapshot().
	"""
	if self.env is None or snap is None:
	return False

	kind, payload = snap
	if kind == "native":
	for obj in (self.env, getattr(self.env, "env", None)):
	if obj is None:
	continue
	if hasattr(obj, "set_state") and callable(obj.set_state):
	try:
	obj.set_state(payload)
	# re-sync wrapper state if needed
	if hasattr(self.env, "state"):
	try:
	self.state = self.env.state
	except Exception:
	pass
	return True
	except Exception:
	pass
	return False

	if kind == "deepcopy":
	try:
	self.state = payload
	# If wrapper uses internal state, try to set it too
	if hasattr(self.env, "state"):
	try:
	self.env.state = payload
	except Exception:
	pass
	return True
	except Exception:
	return False

	return False

	def _state_hash(self) -> str:
	"""
	Stable-ish hash to detect loops. Prefer env-provided hash; else hash observation+inv+loc+score+moves.
	"""
	# If Jericho exposes something like state.hash or env.get_world_state_hash, use it (best-effort).
	for obj in (self.state, self.env, getattr(self.env, "env", None)):
	if obj is None:
	continue
	for attr in ("hash", "state_hash", "world_hash"):
	if hasattr(obj, attr):
	try:
	v = getattr(obj, attr)
	if callable(v):
	v = v()
	if isinstance(v, (str, int)):
	return str(v)
	except Exception:
	pass

	loc = self.current_location or ""
	obs = (getattr(self.state, "observation", "") or "")
	score = self.get_score()
	moves = self.get_moves()
	inv = getattr(self.state, "inventory", None)

	inv_str = ""
	if isinstance(inv, str):
	inv_str = inv
	elif isinstance(inv, (list, tuple)):
	inv_str = "\|".join(self._item_name(x) for x in inv)

	payload = f"{loc}\n{score}\n{moves}\n{inv_str}\n{obs[:500]}"
	return hashlib.sha1(payload.encode("utf-8", errors="ignore")).hexdigest()


	def _extract_visible_objects_heuristic(self, observation: str) -> list[str]:
	"""
	Heuristic object noun extraction. Not perfect but useful.
	Keeps short nouns; removes stopwords; favors known Zork-ish interactables.
	"""
	if not observation:
	return []

	obs = observation.lower()

	# quick whitelist of common objects
	common = [
	"mailbox","leaflet","door","window","grating","lamp","lantern","sword","knife",
	"trapdoor","chest","box","table","rug","mat","rope","key","keys","bottle","water",
	"egg","nest","tree","stairs","staircase","gate"
	]
	found = [w for w in common if w in obs]

	# de-dup
	out = []
	seen = set()
	for x in found:
	if x not in seen:
	out.append(x)
	seen.add(x)
	return out


	def get_state_struct(self) -> dict:
	obs = (getattr(self.state, "observation", "") or "")
	inv = getattr(self.state, "inventory", None)

	inv_list = []
	if isinstance(inv, str):
	# can't parse reliably => keep as one string
	inv_list = [inv.strip()] if inv.strip() else []
	elif isinstance(inv, (list, tuple)):
	inv_list = [self._item_name(x) for x in inv]

	return {
	"game": self.game_name,
	"location": self.current_location or "Unknown",
	"score": self.get_score(),
	"moves": self.get_moves(),
	"done": bool(getattr(self.state, "done", False)) if self.state else False,
	"last_reward": int(getattr(self, "last_reward", 0) or 0),
	"state_hash": self._state_hash(),
	"inventory": inv_list,
	"visible_objects": self._extract_visible_objects_heuristic(obs),
	"last_observation": obs,
	}


	# Global game manager
	_game = GameManager()


	def get_game() -> GameManager:
	"""Get or initialize the game manager."""
	global _game
	if _game.env is None:
	# Get game from environment variable (set by evaluator)
	game = os.environ.get("GAME", "zork1")
	_game.initialize(game)
	return _game


	# =============================================================================
	# MCP Tools - IMPLEMENT THESE
	# =============================================================================

	@mcp.tool()
	def play_action(action: str) -> str:
	"""
	Execute a game command and return the result.

	This is the main tool for interacting with the game.

	Args:
	action: The command to execute (e.g., "north", "take lamp", "open mailbox")

	Returns:
	The game's response to the action

	Valid commands include:
	- Movement: north, south, east, west, up, down, enter, exit
	- Objects: take <item>, drop <item>, open <thing>, examine <thing>
	- Other: look, inventory, read <thing>, turn on lamp
	"""
	game = get_game()

	# Basic validation / normalization
	action = (action or "").strip()
	if not action:
	return "I didn't receive an action. Try: look, north, open mailbox, take lamp."

	# Execute
	result = game.step(action)

	# Optional: append score deltas + game over
	try:
	reward = getattr(game.state, "reward", 0) or 0
	score = getattr(game.state, "score", None)
	done = bool(getattr(game.state, "done", False))

	if reward and score is not None and reward > 0:
	result += f"\n\n+{reward} points! (Total: {score})"

	if done:
	result += "\n\n* GAME OVER *"
	except Exception:
	# Never crash the tool — keep returning the observation
	pass

	return result

	@mcp.tool()
	def memory() -> str:
	"""
	Return a compact summary of the current game state:
	location, score, moves, recent history, last observation.
	"""
	game = get_game()
	return game.get_memory(last_k=10)


	@mcp.tool()
	def get_map() -> str:
	"""
	Return a simple map of explored locations + known transitions.
	"""
	game = get_game()
	return game.get_map()


	@mcp.tool()
	def inventory() -> str:
	"""
	Return the player's inventory WITHOUT advancing the game.
	"""
	game = get_game()
	return game.get_inventory()


	@mcp.tool()
	def valid_actions() -> str:
	"""
	Return a list of likely valid actions (best-effort).
	"""
	game = get_game()
	return game.get_valid_actions(max_actions=30)


	@mcp.tool()
	def tried_actions() -> str:
	"""
	Return actions tried, grouped by location, to avoid loops.
	"""
	game = get_game()
	return game.get_actions_tried(limit_per_room=50)


	@mcp.tool()
	def hint() -> str:
	"""
	Get non-spoiler hints based on the current observation/inventory/location.
	"""
	game = get_game()

	observation = (getattr(game.state, "observation", "") or "")
	obs = observation.lower()
	loc = (game.current_location or "").lower()

	# Best-effort inventory WITHOUT advancing game
	inv_lower = ""
	inv = getattr(game.state, "inventory", None)
	if isinstance(inv, str):
	inv_lower = inv.lower()
	elif isinstance(inv, (list, tuple)):
	names = []
	for item in inv:
	try:
	names.append(game._item_name(item).lower())
	except Exception:
	names.append(str(item).lower())
	inv_lower = " ".join(names)

	hints: list[str] = []

	# Darkness / light
	if ("dark" in obs) or ("pitch black" in obs) or ("dark" in loc):
	hints.append("It is dangerous to move around in the dark. You need a light source.")
	if "lamp" in inv_lower or "lantern" in inv_lower:
	hints.append("You seem to have a lamp/lantern. Try turning it on if that action is available.")
	else:
	hints.append("If you see a lamp or lantern anywhere, pick it up immediately.")

	# Window
	if "window" in obs:
	if "ajar" in obs or "open" in obs:
	hints.append("An open/ajar window may be an entry point. Try 'enter window' or 'in' if allowed.")
	else:
	hints.append("A window often leads somewhere. Try 'open window' or examine it more closely.")

	# Leaves
	if "pile of leaves" in obs or "leaves" in obs:
	hints.append("A pile of leaves often hides something. Try moving or taking them.")

	# Grating
	if "grating" in obs:
	hints.append("A grating is usually a passage. Try opening or unlocking it, or inspect nearby objects.")

	# Containers
	containers = ["mailbox", "chest", "box", "container", "cabinet", "case", "sack"]
	if any(w in obs for w in containers):
	hints.append("Try opening containers. They often contain useful items.")

	# Trees / climbing
	if "tree" in obs or "trees" in obs:
	hints.append("Trees may be climbable. Look for branches or try climbing if possible.")
	if "climbable" in obs or "you can climb" in obs:
	hints.append("Climbing may lead to new areas. Try climbing up or down if available.")

	# Keys / weapons
	if "key" in obs and "key" not in inv_lower:
	hints.append("Keys are important. Pick it up if you can.")
	if ("sword" in obs or "knife" in obs) and ("sword" not in inv_lower and "knife" not in inv_lower):
	hints.append("A weapon may be useful later. Consider taking it.")

	# Explicit possibility override (narration cues)
	low_obs = observation.lower()
	if "possible to climb down" in low_obs or "it is possible to climb down" in low_obs or "you can climb down" in low_obs:
	hints.append("The narration says you can climb down here — try: 'down'.")
	if "possible to climb up" in low_obs or "it is possible to climb up" in low_obs or "you can climb up" in low_obs:
	hints.append("The narration says you can climb up here — try: 'up'.")
	if "possible to enter" in low_obs or "it is possible to enter" in low_obs or "you can enter" in low_obs or "way in" in low_obs:
	hints.append("The narration suggests an entry is possible — try: 'in'.")
	if "way out" in low_obs or "possible to leave" in low_obs or "you can leave" in low_obs:
	hints.append("The narration suggests an exit — try: 'out'.")

	if not hints:
	hints.append("If you feel stuck, call valid_actions and try 1–2 new high-value actions (take/open/enter/climb/pull).")
	hints.append("Avoid repeating actions that produced no new information in the same location.")

	return "Hints:\n" + "\n".join(f"- {h}" for h in hints)

	@mcp.tool()
	def state() -> str:
	"""
	Structured state as JSON string.
	"""
	game = get_game()
	return json.dumps(game.get_state_struct(), ensure_ascii=False, indent=2)


	@mcp.tool()
	def exits() -> str:
	"""
	Return possible movement actions from valid_actions (best-effort).
	"""
	game = get_game()
	va = game.get_valid_actions(max_actions=80)
	moves = []
	for line in va.splitlines():
	line = line.strip()
	if line.startswith("- "):
	act = line[2:].strip().lower()
	if act in {"north","south","east","west","up","down","in","out","northeast","northwest","southeast","southwest"}:
	moves.append(act)
	return json.dumps({"location": game.current_location or "Unknown", "exits": moves}, ensure_ascii=False, indent=2)


	@mcp.tool()
	def graph() -> str:
	"""
	Return explored graph as JSON (nodes + edges).
	"""
	game = get_game()
	nodes = sorted(list(game.locations))
	edges = []
	for frm, d in game.transitions.items():
	for act, to in d.items():
	edges.append({"from": frm, "action": act, "to": to})
	payload = {"current": game.current_location or "Unknown", "nodes": nodes, "edges": edges}
	return json.dumps(payload, ensure_ascii=False, indent=2)


	@mcp.tool()
	def checkpoint_save(name: str = "auto") -> str:
	"""
	Save an environment snapshot under 'name'.
	"""
	game = get_game()
	snap = game._snapshot()
	game.checkpoints[name] = snap
	ok = snap is not None and snap[0] != "none"
	return json.dumps({"ok": bool(ok), "name": name, "kind": snap[0] if snap else "none"}, ensure_ascii=False, indent=2)


	@mcp.tool()
	def checkpoint_restore(name: str = "auto") -> str:
	"""
	Restore a previously saved snapshot.
	"""
	game = get_game()
	snap = game.checkpoints.get(name)
	ok = game._restore_snapshot(snap)
	# re-derive location after restore
	if ok and game.state:
	game.current_location = game._extract_location(getattr(game.state, "observation", "") or "") or game.current_location
	if game.current_location:
	game.locations.add(game.current_location)
	return json.dumps({"ok": bool(ok), "name": name}, ensure_ascii=False, indent=2)


	@mcp.tool()
	def action_probe(action: str) -> str:
	"""
	Simulate an action: save -> step(action) -> capture -> restore.
	Returns a JSON report without committing.
	"""
	game = get_game()
	snap = game._snapshot()
	tracking_backup = {
	"history": list(game.history),
	"locations": set(game.locations),
	"current_location": game.current_location,
	"transitions": deepcopy(game.transitions),
	"actions_tried_by_location": deepcopy(game.actions_tried_by_location),
	"_actions_tried_set": deepcopy(game._actions_tried_set),
	"last_reward": game.last_reward,
	}
	before = game.get_state_struct()

	obs = game.step(action)
	after = game.get_state_struct()

	# attempt restore
	restored = game._restore_snapshot(snap)
	if restored and game.state:
	game.current_location = game._extract_location(getattr(game.state, "observation", "") or "") or game.current_location

	# restore tracking too (avoid probe side-effects)
	game.history = tracking_backup["history"]
	game.locations = tracking_backup["locations"]
	game.current_location = tracking_backup["current_location"]
	game.transitions = tracking_backup["transitions"]
	game.actions_tried_by_location = tracking_backup["actions_tried_by_location"]
	game._actions_tried_set = tracking_backup["_actions_tried_set"]
	game.last_reward = tracking_backup["last_reward"]

	report = {
	"action": (action or "").strip(),
	"ok": True,
	"restored": bool(restored),
	"reward_delta": int(after.get("last_reward", 0) or 0),
	"score_delta": int(after.get("score", 0) - before.get("score", 0)),
	"moves_delta": int(after.get("moves", 0) - before.get("moves", 0)),
	"done": bool(after.get("done", False)),
	"new_location": after.get("location"),
	"state_hash": after.get("state_hash"),
	"observation_head": (obs or "").strip().splitlines()[0] if (obs or "").strip() else "",
	"hash_changed": before.get("state_hash") != after.get("state_hash")
	}
	return json.dumps(report, ensure_ascii=False, indent=2)


	# =============================================================================
	# Run the server
	# =============================================================================

	if __name__ == "__main__":
	# This runs the server with stdio transport (for MCP clients)
	mcp.run()