Spaces:
Sleeping
Sleeping
Implement agent and MCP server
Browse files- agent.py +254 -72
- mcp_server.py +143 -67
agent.py
CHANGED
|
@@ -26,7 +26,9 @@ Tips:
|
|
| 26 |
import json
|
| 27 |
import os
|
| 28 |
import re
|
|
|
|
| 29 |
from dataclasses import dataclass, field
|
|
|
|
| 30 |
from typing import Optional
|
| 31 |
|
| 32 |
from dotenv import load_dotenv
|
|
@@ -159,23 +161,18 @@ ARGS: {"action": "look"}
|
|
| 159 |
# =============================================================================
|
| 160 |
|
| 161 |
class StudentAgent:
|
| 162 |
-
"""
|
| 163 |
-
Your ReAct agent implementation.
|
| 164 |
-
|
| 165 |
-
TODO:
|
| 166 |
-
1. Implement the run() method with the ReAct loop
|
| 167 |
-
2. Parse LLM responses to extract tool calls
|
| 168 |
-
3. Track state and avoid loops
|
| 169 |
-
|
| 170 |
-
Use the provided call_llm() function to interact with the LLM.
|
| 171 |
-
"""
|
| 172 |
|
| 173 |
def __init__(self):
|
| 174 |
-
"""Initialize
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 179 |
|
| 180 |
async def run(
|
| 181 |
self,
|
|
@@ -185,84 +182,269 @@ class StudentAgent:
|
|
| 185 |
seed: int,
|
| 186 |
verbose: bool = False,
|
| 187 |
) -> RunResult:
|
| 188 |
-
"""
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
Returns:
|
| 199 |
-
RunResult with final score and statistics
|
| 200 |
-
"""
|
| 201 |
-
# TODO: Implement your ReAct loop here
|
| 202 |
-
#
|
| 203 |
-
# Basic structure:
|
| 204 |
-
# 1. Get initial observation (call play_action with "look")
|
| 205 |
-
# 2. Loop for max_steps:
|
| 206 |
-
# a. Build prompt with current observation and history
|
| 207 |
-
# b. Call LLM to get thought and action
|
| 208 |
-
# c. Parse the response to extract tool and args
|
| 209 |
-
# d. Call the tool via client.call_tool(tool_name, args)
|
| 210 |
-
# e. Update history and state
|
| 211 |
-
# f. Check for game over
|
| 212 |
-
# 3. Return RunResult with final statistics
|
| 213 |
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
|
|
|
|
|
|
| 217 |
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
# )
|
| 224 |
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
|
| 231 |
-
|
| 232 |
-
|
| 233 |
|
| 234 |
return RunResult(
|
| 235 |
-
final_score=
|
| 236 |
-
max_score=
|
| 237 |
-
moves=moves,
|
| 238 |
-
locations_visited=
|
| 239 |
-
game_completed=
|
| 240 |
-
history=history,
|
| 241 |
)
|
| 242 |
|
| 243 |
def _build_prompt(self, observation: str, history: list) -> str:
|
| 244 |
"""
|
| 245 |
Build the prompt for the LLM.
|
| 246 |
|
| 247 |
-
|
|
|
|
| 248 |
"""
|
| 249 |
-
|
| 250 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 251 |
|
| 252 |
def _parse_response(self, response: str) -> tuple[str, str, dict]:
|
| 253 |
"""
|
| 254 |
Parse LLM response to extract thought, tool name, and arguments.
|
| 255 |
|
| 256 |
-
TODO: Implement robust parsing
|
| 257 |
-
|
| 258 |
Returns:
|
| 259 |
Tuple of (thought, tool_name, args_dict)
|
| 260 |
"""
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 266 |
|
| 267 |
def _call_llm(self, prompt: str, system_prompt: str, seed: int) -> str:
|
| 268 |
"""
|
|
|
|
| 26 |
import json
|
| 27 |
import os
|
| 28 |
import re
|
| 29 |
+
import random
|
| 30 |
from dataclasses import dataclass, field
|
| 31 |
+
from collections import defaultdict
|
| 32 |
from typing import Optional
|
| 33 |
|
| 34 |
from dotenv import load_dotenv
|
|
|
|
| 161 |
# =============================================================================
|
| 162 |
|
| 163 |
class StudentAgent:
|
| 164 |
+
"""A lean ReAct agent with a dash of personal taste."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 165 |
|
| 166 |
def __init__(self):
|
| 167 |
+
"""Initialize run-local state."""
|
| 168 |
+
self.history: list[tuple[str, str, str]] = []
|
| 169 |
+
self.visited_locations: set[str] = set()
|
| 170 |
+
self.actions_tried = defaultdict(lambda: defaultdict(int)) # location -> action -> count
|
| 171 |
+
self.current_score = 0
|
| 172 |
+
self.max_score = 350
|
| 173 |
+
self.moves = 0
|
| 174 |
+
self.game = ""
|
| 175 |
+
self.last_location = "Unknown"
|
| 176 |
|
| 177 |
async def run(
|
| 178 |
self,
|
|
|
|
| 182 |
seed: int,
|
| 183 |
verbose: bool = False,
|
| 184 |
) -> RunResult:
|
| 185 |
+
"""Run the ReAct loop."""
|
| 186 |
+
random.seed(seed)
|
| 187 |
+
self.history = []
|
| 188 |
+
self.visited_locations = set()
|
| 189 |
+
self.actions_tried = defaultdict(lambda: defaultdict(int))
|
| 190 |
+
self.current_score = 0
|
| 191 |
+
self.max_score = 350
|
| 192 |
+
self.moves = 0
|
| 193 |
+
self.game = game
|
| 194 |
+
self.last_location = "Unknown"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
|
| 196 |
+
observation = await self._safe_tool(client, "play_action", {"action": "look"})
|
| 197 |
+
prev_moves_mark = self.moves
|
| 198 |
+
self._ingest_observation(observation)
|
| 199 |
+
if self.moves == prev_moves_mark:
|
| 200 |
+
self.moves += 1
|
| 201 |
|
| 202 |
+
mem_text = await self._safe_tool(client, "memory", {"limit": 3})
|
| 203 |
+
self.max_score = self._parse_max_score(mem_text) or self.max_score
|
| 204 |
+
self.current_score, self.moves = self._parse_score_moves(
|
| 205 |
+
mem_text, self.current_score, self.moves
|
| 206 |
+
)
|
|
|
|
| 207 |
|
| 208 |
+
for step in range(max_steps):
|
| 209 |
+
prompt = self._build_prompt(observation, self.history)
|
| 210 |
+
llm_response = self._call_llm(prompt, SYSTEM_PROMPT, seed)
|
| 211 |
+
thought, tool, args = self._parse_response(llm_response)
|
| 212 |
+
|
| 213 |
+
allowed_tools = {"play_action", "memory", "inventory", "get_map", "get_valid_actions"}
|
| 214 |
+
if tool not in allowed_tools:
|
| 215 |
+
tool, args = "play_action", {"action": "look"}
|
| 216 |
+
|
| 217 |
+
prev_moves = self.moves
|
| 218 |
+
if tool == "play_action":
|
| 219 |
+
action = (args.get("action") or "").strip()
|
| 220 |
+
if not action:
|
| 221 |
+
action = "look"
|
| 222 |
+
|
| 223 |
+
location = self.last_location
|
| 224 |
+
if self._should_switch(location, action):
|
| 225 |
+
action = self._fallback_action(self.actions_tried[location])
|
| 226 |
+
|
| 227 |
+
self.actions_tried[location][action] += 1
|
| 228 |
+
observation = await self._safe_tool(client, "play_action", {"action": action})
|
| 229 |
+
else:
|
| 230 |
+
observation = await self._safe_tool(client, tool, args)
|
| 231 |
+
|
| 232 |
+
self._ingest_observation(observation)
|
| 233 |
+
if tool == "play_action" and self.moves == prev_moves:
|
| 234 |
+
self.moves += 1
|
| 235 |
+
self.history.append((thought, f"{tool} {json.dumps(args)}", observation))
|
| 236 |
+
|
| 237 |
+
if verbose:
|
| 238 |
+
print(f"\n> {tool} {args}\n{observation}")
|
| 239 |
+
|
| 240 |
+
if self._is_terminal(observation):
|
| 241 |
+
break
|
| 242 |
+
if self.moves >= max_steps:
|
| 243 |
+
break
|
| 244 |
|
| 245 |
+
clean_locations = {loc for loc in self.visited_locations if loc != "Unknown"}
|
| 246 |
+
game_completed = self.current_score >= self.max_score or self._is_win(observation)
|
| 247 |
|
| 248 |
return RunResult(
|
| 249 |
+
final_score=self.current_score,
|
| 250 |
+
max_score=self.max_score,
|
| 251 |
+
moves=self.moves,
|
| 252 |
+
locations_visited=clean_locations,
|
| 253 |
+
game_completed=game_completed,
|
| 254 |
+
history=self.history,
|
| 255 |
)
|
| 256 |
|
| 257 |
def _build_prompt(self, observation: str, history: list) -> str:
|
| 258 |
"""
|
| 259 |
Build the prompt for the LLM.
|
| 260 |
|
| 261 |
+
Mix a little personality with concise context so the model
|
| 262 |
+
keeps commands short and avoids spinning in circles.
|
| 263 |
"""
|
| 264 |
+
recent = history[-5:]
|
| 265 |
+
lines = [
|
| 266 |
+
f"Game: {self.game}",
|
| 267 |
+
"You are me playing a parser game. Be decisive, keep commands under four words.",
|
| 268 |
+
"If something failed twice in this room, try a different verb or direction.",
|
| 269 |
+
"",
|
| 270 |
+
"Current observation:",
|
| 271 |
+
observation.strip(),
|
| 272 |
+
"",
|
| 273 |
+
"Recent steps:",
|
| 274 |
+
]
|
| 275 |
+
if not recent:
|
| 276 |
+
lines.append("- none yet")
|
| 277 |
+
else:
|
| 278 |
+
for thought, action, obs in recent:
|
| 279 |
+
snippet = obs.replace("\n", " ")
|
| 280 |
+
if len(snippet) > 120:
|
| 281 |
+
snippet = snippet[:117] + "..."
|
| 282 |
+
lines.append(f"- {action}: {snippet}")
|
| 283 |
+
lines.append("\nNext command?")
|
| 284 |
+
return "\n".join(lines)
|
| 285 |
|
| 286 |
def _parse_response(self, response: str) -> tuple[str, str, dict]:
|
| 287 |
"""
|
| 288 |
Parse LLM response to extract thought, tool name, and arguments.
|
| 289 |
|
|
|
|
|
|
|
| 290 |
Returns:
|
| 291 |
Tuple of (thought, tool_name, args_dict)
|
| 292 |
"""
|
| 293 |
+
thought = ""
|
| 294 |
+
tool = "play_action"
|
| 295 |
+
args: dict = {"action": "look"}
|
| 296 |
+
|
| 297 |
+
if not response:
|
| 298 |
+
return thought, tool, args
|
| 299 |
+
|
| 300 |
+
cleaned = response.strip().replace("```", "")
|
| 301 |
+
|
| 302 |
+
thought_match = re.search(r"THOUGHT:\s*(.*)", cleaned, re.IGNORECASE)
|
| 303 |
+
if thought_match:
|
| 304 |
+
thought = thought_match.group(1).strip()
|
| 305 |
+
|
| 306 |
+
tool_match = re.search(r"TOOL:\s*([A-Za-z0-9_]+)", cleaned, re.IGNORECASE)
|
| 307 |
+
if tool_match:
|
| 308 |
+
tool = tool_match.group(1).strip()
|
| 309 |
+
|
| 310 |
+
args_match = re.search(r"ARGS:\s*(\{[\s\S]*\})", cleaned, re.IGNORECASE)
|
| 311 |
+
if args_match:
|
| 312 |
+
raw_args = args_match.group(1)
|
| 313 |
+
raw_args = raw_args[: raw_args.rfind("}") + 1] if "}" in raw_args else raw_args
|
| 314 |
+
try:
|
| 315 |
+
args = json.loads(raw_args)
|
| 316 |
+
except Exception:
|
| 317 |
+
try:
|
| 318 |
+
args = json.loads(raw_args.replace("'", "\""))
|
| 319 |
+
except Exception:
|
| 320 |
+
args = {"action": raw_args.strip("{} ").strip()}
|
| 321 |
+
|
| 322 |
+
if tool == "play_action" and "action" not in args:
|
| 323 |
+
args["action"] = "look"
|
| 324 |
+
|
| 325 |
+
return thought, tool, args
|
| 326 |
+
|
| 327 |
+
async def _safe_tool(self, client, tool: str, args: dict) -> str:
|
| 328 |
+
"""Call a tool and always return a string."""
|
| 329 |
+
try:
|
| 330 |
+
result = await client.call_tool(tool, args)
|
| 331 |
+
except Exception as exc:
|
| 332 |
+
return f"[tool-error:{tool}] {exc}"
|
| 333 |
+
return self._extract_text(result)
|
| 334 |
+
|
| 335 |
+
def _extract_text(self, result) -> str:
|
| 336 |
+
"""Normalize FastMCP tool responses into plain text."""
|
| 337 |
+
if result is None:
|
| 338 |
+
return ""
|
| 339 |
+
if isinstance(result, str):
|
| 340 |
+
return result
|
| 341 |
+
if isinstance(result, list):
|
| 342 |
+
texts = [self._extract_text(r) for r in result]
|
| 343 |
+
return "\n".join(t for t in texts if t)
|
| 344 |
+
if hasattr(result, "text"):
|
| 345 |
+
try:
|
| 346 |
+
return result.text
|
| 347 |
+
except Exception:
|
| 348 |
+
pass
|
| 349 |
+
if hasattr(result, "content"):
|
| 350 |
+
content = getattr(result, "content")
|
| 351 |
+
if isinstance(content, list):
|
| 352 |
+
texts = [self._extract_text(c) for c in content]
|
| 353 |
+
return "\n".join(t for t in texts if t)
|
| 354 |
+
if isinstance(content, str):
|
| 355 |
+
return content
|
| 356 |
+
if isinstance(result, dict):
|
| 357 |
+
for key in ("text", "content", "data", "result", "output"):
|
| 358 |
+
if key in result:
|
| 359 |
+
return self._extract_text(result[key])
|
| 360 |
+
return str(result)
|
| 361 |
+
|
| 362 |
+
def _ingest_observation(self, observation: str):
|
| 363 |
+
"""Update cached score, move count, and location tracking."""
|
| 364 |
+
self.current_score, self.moves = self._parse_score_moves(
|
| 365 |
+
observation, self.current_score, self.moves
|
| 366 |
+
)
|
| 367 |
+
location = self._extract_location(observation)
|
| 368 |
+
self.last_location = location
|
| 369 |
+
if location and location != "Unknown":
|
| 370 |
+
self.visited_locations.add(location)
|
| 371 |
+
|
| 372 |
+
def _parse_score_moves(
|
| 373 |
+
self, text: str, current_score: int, current_moves: int
|
| 374 |
+
) -> tuple[int, int]:
|
| 375 |
+
if not text:
|
| 376 |
+
return current_score, current_moves
|
| 377 |
+
score_match = re.search(r"Score:\s*(\d+)", text)
|
| 378 |
+
move_match = re.search(r"Moves?:\s*(\d+)", text)
|
| 379 |
+
if score_match:
|
| 380 |
+
current_score = int(score_match.group(1))
|
| 381 |
+
if move_match:
|
| 382 |
+
current_moves = int(move_match.group(1))
|
| 383 |
+
return current_score, current_moves
|
| 384 |
+
|
| 385 |
+
def _parse_max_score(self, text: str) -> Optional[int]:
|
| 386 |
+
if not text:
|
| 387 |
+
return None
|
| 388 |
+
max_match = re.search(r"Score:\s*\d+\s*/\s*(\d+)", text)
|
| 389 |
+
return int(max_match.group(1)) if max_match else None
|
| 390 |
+
|
| 391 |
+
def _extract_location(self, observation: str) -> str:
|
| 392 |
+
if not observation:
|
| 393 |
+
return "Unknown"
|
| 394 |
+
match = re.search(r"Location:\s*([^\]\n]+)", observation)
|
| 395 |
+
if match:
|
| 396 |
+
return match.group(1).strip()
|
| 397 |
+
first_line = observation.strip().splitlines()[0].strip()
|
| 398 |
+
if len(first_line) <= 80:
|
| 399 |
+
return first_line or "Unknown"
|
| 400 |
+
return "Unknown"
|
| 401 |
+
|
| 402 |
+
def _should_switch(self, location: str, action: str) -> bool:
|
| 403 |
+
tried_here = self.actions_tried[location]
|
| 404 |
+
return tried_here.get(action, 0) >= 2
|
| 405 |
+
|
| 406 |
+
def _fallback_action(self, tried_actions: dict[str, int]) -> str:
|
| 407 |
+
palette = [
|
| 408 |
+
"look",
|
| 409 |
+
"inventory",
|
| 410 |
+
"north",
|
| 411 |
+
"south",
|
| 412 |
+
"east",
|
| 413 |
+
"west",
|
| 414 |
+
"up",
|
| 415 |
+
"down",
|
| 416 |
+
"enter",
|
| 417 |
+
"exit",
|
| 418 |
+
"take all",
|
| 419 |
+
"open door",
|
| 420 |
+
"examine room",
|
| 421 |
+
]
|
| 422 |
+
for candidate in palette:
|
| 423 |
+
if tried_actions.get(candidate, 0) == 0:
|
| 424 |
+
return candidate
|
| 425 |
+
return "look"
|
| 426 |
+
|
| 427 |
+
def _is_terminal(self, observation: str) -> bool:
|
| 428 |
+
if not observation:
|
| 429 |
+
return False
|
| 430 |
+
lower = observation.lower()
|
| 431 |
+
return any(
|
| 432 |
+
phrase in lower
|
| 433 |
+
for phrase in [
|
| 434 |
+
"you have died",
|
| 435 |
+
"you are dead",
|
| 436 |
+
"game over",
|
| 437 |
+
"you have won",
|
| 438 |
+
"congratulations",
|
| 439 |
+
"*** the end",
|
| 440 |
+
]
|
| 441 |
+
)
|
| 442 |
+
|
| 443 |
+
def _is_win(self, observation: str) -> bool:
|
| 444 |
+
if not observation:
|
| 445 |
+
return False
|
| 446 |
+
lower = observation.lower()
|
| 447 |
+
return "you have won" in lower or "congratulations" in lower
|
| 448 |
|
| 449 |
def _call_llm(self, prompt: str, system_prompt: str, seed: int) -> str:
|
| 450 |
"""
|
mcp_server.py
CHANGED
|
@@ -26,9 +26,21 @@ Then open the MCP Inspector in your browser to test the tools interactively.
|
|
| 26 |
|
| 27 |
import sys
|
| 28 |
import os
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
|
| 33 |
from fastmcp import FastMCP
|
| 34 |
from games.zork_env import TextAdventureEnv
|
|
@@ -59,17 +71,29 @@ class GameManager:
|
|
| 59 |
self.env: TextAdventureEnv = None
|
| 60 |
self.state = None
|
| 61 |
self.game_name: str = ""
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
|
| 67 |
def initialize(self, game: str = "zork1"):
|
| 68 |
"""Initialize or reset the game."""
|
| 69 |
self.game_name = game
|
| 70 |
self.env = TextAdventureEnv(game)
|
| 71 |
self.state = self.env.reset()
|
| 72 |
-
|
|
|
|
|
|
|
|
|
|
| 73 |
return self.state.observation
|
| 74 |
|
| 75 |
def step(self, action: str) -> str:
|
|
@@ -77,11 +101,16 @@ class GameManager:
|
|
| 77 |
if self.env is None:
|
| 78 |
self.initialize()
|
| 79 |
|
|
|
|
| 80 |
self.state = self.env.step(action)
|
|
|
|
| 81 |
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
|
|
|
|
|
|
|
|
|
| 85 |
|
| 86 |
return self.state.observation
|
| 87 |
|
|
@@ -92,6 +121,11 @@ class GameManager:
|
|
| 92 |
def get_moves(self) -> int:
|
| 93 |
"""Get number of moves taken."""
|
| 94 |
return self.state.moves if self.state else 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
|
| 96 |
|
| 97 |
# Global game manager
|
|
@@ -135,69 +169,111 @@ def play_action(action: str) -> str:
|
|
| 135 |
# TODO: You might want to add action validation here
|
| 136 |
# TODO: You might want to include score changes in the response
|
| 137 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
result = game.step(action)
|
| 139 |
|
| 140 |
# Optional: Append score info
|
| 141 |
-
|
|
|
|
|
|
|
|
|
|
| 142 |
|
| 143 |
return result
|
| 144 |
|
| 145 |
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
|
| 202 |
|
| 203 |
# =============================================================================
|
|
|
|
| 26 |
|
| 27 |
import sys
|
| 28 |
import os
|
| 29 |
+
from collections import defaultdict
|
| 30 |
+
from pathlib import Path
|
| 31 |
+
|
| 32 |
+
# Add a path that actually contains the games package (works for template and sibling-repo layout)
|
| 33 |
+
_here = Path(__file__).resolve().parent
|
| 34 |
+
_candidate_roots = [
|
| 35 |
+
_here.parent, # .../zork/
|
| 36 |
+
_here.parent / "Agentic-zork", # sibling repo with games/
|
| 37 |
+
]
|
| 38 |
+
for _root in _candidate_roots:
|
| 39 |
+
if (_root / "games").exists():
|
| 40 |
+
sys.path.insert(0, str(_root))
|
| 41 |
+
break
|
| 42 |
+
else:
|
| 43 |
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 44 |
|
| 45 |
from fastmcp import FastMCP
|
| 46 |
from games.zork_env import TextAdventureEnv
|
|
|
|
| 71 |
self.env: TextAdventureEnv = None
|
| 72 |
self.state = None
|
| 73 |
self.game_name: str = ""
|
| 74 |
+
self.history: list[tuple[str, str, int, int]] = [] # action, observation, score, moves
|
| 75 |
+
self.visited_locations: set[str] = set()
|
| 76 |
+
self.transitions: dict[str, dict[str, str]] = defaultdict(dict)
|
| 77 |
+
self.current_location: str = "Unknown"
|
| 78 |
+
|
| 79 |
+
def _loc_name(self, state) -> str:
|
| 80 |
+
"""Best-effort location string from env or the observation header."""
|
| 81 |
+
if state and getattr(state, "location", "Unknown") not in ("Unknown", None, ""):
|
| 82 |
+
return str(state.location).strip()
|
| 83 |
+
if state and getattr(state, "observation", ""):
|
| 84 |
+
first_line = state.observation.splitlines()[0].strip()
|
| 85 |
+
return first_line or "Unknown"
|
| 86 |
+
return "Unknown"
|
| 87 |
|
| 88 |
def initialize(self, game: str = "zork1"):
|
| 89 |
"""Initialize or reset the game."""
|
| 90 |
self.game_name = game
|
| 91 |
self.env = TextAdventureEnv(game)
|
| 92 |
self.state = self.env.reset()
|
| 93 |
+
self.current_location = self._loc_name(self.state)
|
| 94 |
+
self.history = []
|
| 95 |
+
self.visited_locations = {self.current_location}
|
| 96 |
+
self.transitions = defaultdict(dict)
|
| 97 |
return self.state.observation
|
| 98 |
|
| 99 |
def step(self, action: str) -> str:
|
|
|
|
| 101 |
if self.env is None:
|
| 102 |
self.initialize()
|
| 103 |
|
| 104 |
+
prev_location = self.current_location
|
| 105 |
self.state = self.env.step(action)
|
| 106 |
+
self.current_location = self._loc_name(self.state)
|
| 107 |
|
| 108 |
+
self.visited_locations.add(self.current_location)
|
| 109 |
+
self.history.append(
|
| 110 |
+
(action, self.state.observation, self.state.score, self.state.moves)
|
| 111 |
+
)
|
| 112 |
+
if prev_location and self.current_location and self.current_location != "Unknown":
|
| 113 |
+
self.transitions[prev_location][action] = self.current_location
|
| 114 |
|
| 115 |
return self.state.observation
|
| 116 |
|
|
|
|
| 121 |
def get_moves(self) -> int:
|
| 122 |
"""Get number of moves taken."""
|
| 123 |
return self.state.moves if self.state else 0
|
| 124 |
+
|
| 125 |
+
def get_inventory(self) -> list[str]:
|
| 126 |
+
if self.state and getattr(self.state, "inventory", None) is not None:
|
| 127 |
+
return list(self.state.inventory)
|
| 128 |
+
return []
|
| 129 |
|
| 130 |
|
| 131 |
# Global game manager
|
|
|
|
| 169 |
# TODO: You might want to add action validation here
|
| 170 |
# TODO: You might want to include score changes in the response
|
| 171 |
|
| 172 |
+
action = action.strip()
|
| 173 |
+
if not action:
|
| 174 |
+
return "Action cannot be empty."
|
| 175 |
+
|
| 176 |
result = game.step(action)
|
| 177 |
|
| 178 |
# Optional: Append score info
|
| 179 |
+
result += (
|
| 180 |
+
f"\n[Score: {game.get_score()} | Moves: {game.get_moves()} | "
|
| 181 |
+
f"Location: {game.current_location}]"
|
| 182 |
+
)
|
| 183 |
|
| 184 |
return result
|
| 185 |
|
| 186 |
|
| 187 |
+
@mcp.tool()
|
| 188 |
+
def memory(limit: int = 6) -> str:
|
| 189 |
+
"""
|
| 190 |
+
Get the current game state summary.
|
| 191 |
+
|
| 192 |
+
Args:
|
| 193 |
+
limit: Number of recent steps to include.
|
| 194 |
+
"""
|
| 195 |
+
game = get_game()
|
| 196 |
+
if game.state is None:
|
| 197 |
+
game.initialize()
|
| 198 |
+
state = game.state
|
| 199 |
+
|
| 200 |
+
lines = [
|
| 201 |
+
f"Location: {game.current_location}",
|
| 202 |
+
f"Score: {state.score}/{state.max_score} | Moves: {state.moves} | Last reward: {state.reward}",
|
| 203 |
+
]
|
| 204 |
+
|
| 205 |
+
inventory = game.get_inventory()
|
| 206 |
+
inv_str = ", ".join(inventory) if inventory else "(empty or unknown)"
|
| 207 |
+
lines.append(f"Inventory: {inv_str}")
|
| 208 |
+
lines.append(f"Visited locations: {len(game.visited_locations)}")
|
| 209 |
+
|
| 210 |
+
if game.history:
|
| 211 |
+
lines.append("Recent:")
|
| 212 |
+
for act, obs, score, mv in game.history[-limit:]:
|
| 213 |
+
snippet = obs.replace("\n", " ")
|
| 214 |
+
if len(snippet) > 120:
|
| 215 |
+
snippet = snippet[:117] + "..."
|
| 216 |
+
lines.append(f"- {mv:03d} [{score}] {act}: {snippet}")
|
| 217 |
+
else:
|
| 218 |
+
lines.append("Recent: (no actions yet)")
|
| 219 |
+
|
| 220 |
+
return "\n".join(lines)
|
| 221 |
+
|
| 222 |
+
|
| 223 |
+
@mcp.tool()
|
| 224 |
+
def inventory() -> str:
|
| 225 |
+
"""
|
| 226 |
+
Check what the player is carrying without spending a move.
|
| 227 |
+
"""
|
| 228 |
+
game = get_game()
|
| 229 |
+
if game.state is None:
|
| 230 |
+
game.initialize()
|
| 231 |
+
items = game.get_inventory()
|
| 232 |
+
if not items:
|
| 233 |
+
return "Inventory: (empty or not reported by engine)"
|
| 234 |
+
return "Inventory: " + ", ".join(items)
|
| 235 |
+
|
| 236 |
+
|
| 237 |
+
@mcp.tool()
|
| 238 |
+
def get_map() -> str:
|
| 239 |
+
"""
|
| 240 |
+
Get a map of explored locations.
|
| 241 |
+
|
| 242 |
+
Returns:
|
| 243 |
+
A text representation of explored locations and connections
|
| 244 |
+
"""
|
| 245 |
+
game = get_game()
|
| 246 |
+
if not game.transitions:
|
| 247 |
+
return "Map is empty. Explore a bit more first."
|
| 248 |
+
|
| 249 |
+
lines = ["Known connections:"]
|
| 250 |
+
for loc, edges in sorted(game.transitions.items()):
|
| 251 |
+
edge_str = "; ".join(f"{act} -> {dest}" for act, dest in sorted(edges.items()))
|
| 252 |
+
lines.append(f"- {loc}: {edge_str}")
|
| 253 |
+
|
| 254 |
+
unseen = game.visited_locations - set(game.transitions.keys())
|
| 255 |
+
if unseen:
|
| 256 |
+
lines.append("Visited without exits mapped: " + ", ".join(sorted(unseen)))
|
| 257 |
+
return "\n".join(lines)
|
| 258 |
+
|
| 259 |
+
|
| 260 |
+
@mcp.tool()
|
| 261 |
+
def get_valid_actions() -> str:
|
| 262 |
+
"""
|
| 263 |
+
Get a list of likely valid actions from the current location.
|
| 264 |
+
|
| 265 |
+
Returns:
|
| 266 |
+
List of actions that might work here
|
| 267 |
+
"""
|
| 268 |
+
game = get_game()
|
| 269 |
+
if game.env and game.env.env:
|
| 270 |
+
try:
|
| 271 |
+
valid = game.env.env.get_valid_actions()
|
| 272 |
+
trimmed = ", ".join(valid[:20])
|
| 273 |
+
return "Valid actions (top 20): " + trimmed
|
| 274 |
+
except Exception:
|
| 275 |
+
pass
|
| 276 |
+
return "Could not determine valid actions"
|
| 277 |
|
| 278 |
|
| 279 |
# =============================================================================
|