ilan commited on
Commit
58ef8a1
·
1 Parent(s): 7a36b3c

quasi final version

Browse files
Files changed (2) hide show
  1. agent.py +518 -83
  2. mcp_server.py +322 -114
agent.py CHANGED
@@ -104,27 +104,55 @@ class RunResult:
104
 
105
  SYSTEM_PROMPT = """You are playing a classic text adventure game.
106
 
107
- GOAL: Explore the world, solve puzzles, and maximize your score.
 
108
 
109
  AVAILABLE TOOLS (use via MCP):
110
- - play_action: Execute a game command (north, take lamp, open mailbox, etc.)
111
- - memory: Get current game state and history (if implemented)
112
- - inventory: Check what you're carrying (if implemented)
 
 
113
 
114
  VALID GAME COMMANDS for play_action:
115
  - Movement: north, south, east, west, up, down, enter, exit
116
- - Objects: take <item>, drop <item>, open <thing>, close <thing>, examine <thing>
117
- - Other: look, inventory, read <thing>, turn on lamp
 
118
 
119
- RESPOND IN THIS EXACT FORMAT (no markdown):
120
- THOUGHT: <your reasoning about what to do next>
121
- TOOL: <tool_name>
122
- ARGS: <JSON arguments, e.g., {"action": "look"}>
123
 
124
- Example:
125
- THOUGHT: I should look around to see where I am.
 
 
 
 
 
126
  TOOL: play_action
127
- ARGS: {"action": "look"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  """
129
 
130
 
@@ -135,21 +163,28 @@ ARGS: {"action": "look"}
135
  class StudentAgent:
136
  """
137
  Your ReAct agent implementation.
138
-
139
- TODO:
140
- 1. Implement the run() method with the ReAct loop
141
- 2. Parse LLM responses to extract tool calls
142
- 3. Track state and avoid loops
143
-
144
- Use the provided call_llm() function to interact with the LLM.
145
  """
146
 
147
  def __init__(self):
148
  """Initialize your agent here."""
149
- # TODO: Initialize any state tracking you need
150
- # self.history = []
151
- # self.visited_locations = set()
152
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
 
154
  async def run(
155
  self,
@@ -162,81 +197,242 @@ class StudentAgent:
162
  """
163
  Run the agent for a game session.
164
 
165
- Args:
166
- client: FastMCP Client connected to your MCP server
167
- game: Name of the game being played (e.g., "zork1")
168
- max_steps: Maximum number of steps to take
169
- seed: Random seed for reproducibility (use for LLM calls)
170
- verbose: Whether to print detailed output
171
-
172
- Returns:
173
- RunResult with final score and statistics
174
  """
175
- # TODO: Implement your ReAct loop here
176
- #
177
- # Basic structure:
178
- # 1. Get initial observation (call play_action with "look")
179
- # 2. Loop for max_steps:
180
- # a. Build prompt with current observation and history
181
- # b. Call LLM to get thought and action
182
- # c. Parse the response to extract tool and args
183
- # d. Call the tool via client.call_tool(tool_name, args)
184
- # e. Update history and state
185
- # f. Check for game over
186
- # 3. Return RunResult with final statistics
187
-
188
- # Example of calling a tool:
189
- # result = await client.call_tool("play_action", {"action": "look"})
190
- # observation = result[0].text if result else "No response"
191
-
192
- # Example of calling the LLM:
193
- # response = call_llm(
194
- # prompt="Current observation: " + observation,
195
- # system_prompt=SYSTEM_PROMPT,
196
- # seed=seed,
197
- # )
198
 
199
- # Placeholder implementation - replace with your code
200
  locations_visited = set()
201
  history = []
202
  final_score = 0
203
  moves = 0
204
 
205
- # TODO: Your implementation here
206
- # ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
  return RunResult(
209
- final_score=final_score,
210
  max_score=350, # Zork1 max score, adjust if needed
211
  moves=moves,
212
- locations_visited=locations_visited,
213
  game_completed=False,
214
  history=history,
215
  )
216
 
217
- def _build_prompt(self, observation: str, history: list) -> str:
218
- """
219
- Build the prompt for the LLM.
220
-
221
- TODO: Implement this to create effective prompts
222
- """
223
- # TODO: Combine system prompt, history, and current observation
224
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
 
226
- def _parse_response(self, response: str) -> tuple[str, str, dict]:
227
- """
228
- Parse LLM response to extract thought, tool name, and arguments.
229
-
230
- TODO: Implement robust parsing
231
-
232
- Returns:
233
- Tuple of (thought, tool_name, args_dict)
234
- """
235
- # TODO: Parse the response format:
236
- # THOUGHT: ...
237
- # TOOL: ...
238
- # ARGS: {...}
239
- pass
240
 
241
  def _call_llm(self, prompt: str, system_prompt: str, seed: int) -> str:
242
  """
@@ -245,6 +441,245 @@ class StudentAgent:
245
  This is a convenience wrapper - you can also use call_llm() directly.
246
  """
247
  return call_llm(prompt, system_prompt, seed)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
 
249
 
250
  # =============================================================================
 
104
 
105
  SYSTEM_PROMPT = """You are playing a classic text adventure game.
106
 
107
+ GOAL:
108
+ Explore the world, discover new locations, collect useful items, solve puzzles, and maximize your score while avoiding useless or dangerous actions.
109
 
110
  AVAILABLE TOOLS (use via MCP):
111
+ - play_action: Execute ONE game command (movement or interaction)
112
+ - memory: Get a summary of the current game state and recent actions
113
+ - inventory: Check what you are carrying
114
+ - get_map: Get a map of explored locations and known connections
115
+ - get_valid_actions: Get a list of actions that are likely to work in the current location
116
 
117
  VALID GAME COMMANDS for play_action:
118
  - Movement: north, south, east, west, up, down, enter, exit
119
+ - Shortcuts: n, s, e, w, u, d
120
+ - Objects: take <item>, drop <item>, open <object>, close <object>, examine <object>
121
+ - Other: look, inventory, read <object>, turn on lamp, turn off lamp
122
 
123
+ RESPOND IN THIS EXACT FORMAT (DO NOT DEVIATE):
124
+ THOUGHT: <brief reasoning about what to do next>
125
+ TOOL: <tool name>
126
+ ARGS: <JSON arguments>
127
 
128
+ EXAMPLES:
129
+
130
+ THOUGHT: I am in a dark room and see a lamp on the ground.
131
+ TOOL: play_action
132
+ ARGS: {"action": "take lamp"}
133
+
134
+ THOUGHT: I can now see exits and should explore further.
135
  TOOL: play_action
136
+ ARGS: {"action": "north"}
137
+
138
+ STRATEGY:
139
+ 1. Use get_valid_actions frequently to know which actions are possible.
140
+ 2. Prefer actions that:
141
+ - reveal new locations
142
+ - interact with visible objects
143
+ - unlock new paths
144
+ - increase the score
145
+ 3. Avoid repeating actions that had no effect.
146
+ 4. Do not repeat the same action twice in a row.
147
+ 5. Use inventory awareness before dropping or interacting with items.
148
+ 6. Use look, inventory, memory, or get_map when unsure.
149
+ 7. Explore methodically rather than randomly.
150
+
151
+ IMPORTANT RULES:
152
+ - Execute EXACTLY ONE action per turn.
153
+ - Never invent commands or arguments.
154
+ - Avoid risky actions (jump, attack, wait).
155
+ - Progress and exploration matter more than speed.
156
  """
157
 
158
 
 
163
  class StudentAgent:
164
  """
165
  Your ReAct agent implementation.
 
 
 
 
 
 
 
166
  """
167
 
168
  def __init__(self):
169
  """Initialize your agent here."""
170
+
171
+ self.history = []
172
+ self.score:int = 0
173
+ self.map:Optional[str] = None
174
+ self.recent_actions: list[str] = []
175
+ self.failure: dict[str, int] = {}
176
+ self.valid_actions: list[str] = []
177
+ self.important_objects = {
178
+ "lamp", "lantern", "key", "keys", "door",
179
+ "mailbox", "box", "chest", "sword", "treasure"
180
+ }
181
+ self.object_goals: dict[str, dict] = {}
182
+ self.tried_edges: dict[tuple[str, str], str] = {}
183
+ self.edge_candidates: list[str] = []
184
+ self.last_action = None
185
+ self.current_inventory: list[str] = []
186
+ self.visited_locations = set()
187
+
188
 
189
  async def run(
190
  self,
 
197
  """
198
  Run the agent for a game session.
199
 
 
 
 
 
 
 
 
 
 
200
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
 
 
202
  locations_visited = set()
203
  history = []
204
  final_score = 0
205
  moves = 0
206
 
207
+ self.last_action = None
208
+
209
+ tools = await client.list_tools()
210
+ tool_names = [t.name for t in tools]
211
+
212
+ inventory_result = await client.call_tool("inventory", {})
213
+ inventory_text = self.extract_result(inventory_result)
214
+ self.current_inventory = self.parse_inventory(inventory_text)
215
+
216
+ init_result = await client.call_tool("play_action", {"action": "look"})
217
+ obs = self.extract_result(init_result)
218
+ self.update_score(obs)
219
+
220
+ location = self.extract_location(obs) or "Unknown"
221
+ locations_visited.add(location)
222
+ self.visited_locations.add(location)
223
+
224
+ for obs in obs:
225
+ for obj in self.extract_objects(obs):
226
+ if obj not in self.object_goals:
227
+ self.object_goals[obj] = {
228
+ "seen_at": location,
229
+ "obtained": False
230
+ }
231
+ for obj in self.extract_objects(obs):
232
+ if obj not in self.object_goals:
233
+ self.object_goals[obj] = {"seen_at": location, "obtained": False}
234
 
235
+ for step in range(1, max_steps + 1):
236
+ map_result = await client.call_tool("get_map", {})
237
+ self.map = self.extract_result(map_result)
238
+
239
+
240
+ try:
241
+ result = await client.call_tool("get_valid_actions", {})
242
+ valid_text = self.extract_result(result)
243
+ if "Valid actions:" in valid_text:
244
+ actions_str = valid_text.split("Valid actions:")[1].strip()
245
+ self.valid_actions = [a.strip() for a in actions_str.split(",")]
246
+ except Exception:
247
+ self.valid_actions = []
248
+
249
+ self.edge_candidates = [
250
+ a for a in self.valid_actions
251
+ if (location, a) not in self.tried_edges
252
+ ]
253
+
254
+ print(f"Edge candidates: {self.edge_candidates[:5]}")
255
+
256
+
257
+ prompt = self._build_prompt(obs)
258
+
259
+ response = call_llm(prompt, SYSTEM_PROMPT, seed + step)
260
+ thought, tool_name, tool_args = self._parse_response(response, tool_names)
261
+
262
+
263
+ tool_name, tool_args = self.format_llm_response(
264
+ tool_name, tool_args, tool_names
265
+ )
266
+
267
+
268
+ if tool_name == "play_action":
269
+ action = tool_args.get("action", "")
270
+ inverse_pairs = {
271
+ ("take", "put"),
272
+ ("put", "take"),
273
+ ("open", "close"),
274
+ ("close", "open"),
275
+ }
276
+ if self.last_action:
277
+ prev = self.last_action.split()[0]
278
+ curr = action.split()[0]
279
+ if (prev, curr) in inverse_pairs:
280
+ if verbose:
281
+ print("Reversible action blocked")
282
+ tool_args = {"action": "look"}
283
+ action = "look"
284
+
285
+ self.recent_actions.append(action)
286
+ if len(self.recent_actions) > 5:
287
+ self.recent_actions = self.recent_actions[-5:]
288
+
289
+ moves += 1
290
+
291
+
292
+
293
+ try:
294
+ result = await client.call_tool(tool_name, tool_args)
295
+ observation = self.extract_result(result)
296
+
297
+
298
+ if tool_name == "inventory":
299
+ self.current_inventory = self.parse_inventory(observation)
300
+ inv_text = " ".join(self.current_inventory).lower()
301
+ for obj in self.object_goals:
302
+ if obj in inv_text:
303
+ self.object_goals[obj]["obtained"] = True
304
+
305
+ except Exception as e:
306
+ observation = f"Error: {e}"
307
+ print(f"Error {e}")
308
+
309
+ new_location = self.extract_location(observation, location)
310
+
311
+ if tool_name == "play_action":
312
+ action = tool_args.get("action", "")
313
+ self.tried_edges[(location, action)] = new_location
314
+ self.last_action = action
315
+
316
+ old_score = self.score
317
+ self.update_score(observation)
318
+
319
+ if tool_name == "play_action":
320
+ if new_location == location and self.score == old_score:
321
+ self.failure[action] = self.failure.get(action, 0) + 1
322
+
323
+ if new_location != location:
324
+ location = new_location
325
+ locations_visited.add(location)
326
+ self.visited_locations.add(location)
327
+
328
+ self.history.append({
329
+ "step": step,
330
+ "thought": thought,
331
+ "tool": tool_name,
332
+ "args": tool_args,
333
+ "result": observation[:200],
334
+ "location": location,
335
+ "score": self.score
336
+ })
337
+ if len(self.history) > 10:
338
+ self.history = self.history[-10:]
339
+
340
+ history.append((thought, f"{tool_name}({tool_args})", observation[:100]))
341
+ print(f"Location : {location} | Score: {self.score} | Moves: {moves}")
342
+
343
+ if self.is_game_over(observation):
344
+ print("\n*** GAME OVER ***")
345
+ break
346
+
347
+
348
  return RunResult(
349
+ final_score=self.score,
350
  max_score=350, # Zork1 max score, adjust if needed
351
  moves=moves,
352
+ locations_visited=self.visited_locations,
353
  game_completed=False,
354
  history=history,
355
  )
356
 
357
+ def _build_prompt(self, observation: str) -> str:
358
+
359
+ lines = []
360
+ lines.append("RULES YOU MUST RESPECT:")
361
+ lines.append("- Do not repeat actions that recently failed.")
362
+ lines.append("- Do not loop on the same action.")
363
+ lines.append("- Prefer actions that reveal new information or areas.")
364
+
365
+ lines.append("\nCURRENT FACTS:")
366
+ lines.append(f"- Score: {self.score}")
367
+ lines.append(f"- Known locations: {len(self.visited_locations)}")
368
+
369
+ if self.history:
370
+ last = self.history[-1]
371
+ last_action = last.get("args", {}).get("action") or last.get("tool", "?")
372
+ last_result = last.get("result", "")
373
+ lines.append("\nLAST MOVE:")
374
+ lines.append(f"- Action: {last_action}")
375
+ lines.append(f"- Outcome: {last_result[:120]}")
376
+
377
+ blocked = [
378
+ name for name, count in self.failure.items() if count >= 2
379
+ ]
380
+ if blocked:
381
+ lines.append("\nDO NOT TRY AGAIN:")
382
+ for b in blocked:
383
+ lines.append(f"- {b}")
384
+
385
+ if self.valid_actions:
386
+ lines.append("\nACTIONS CURRENTLY POSSIBLE:")
387
+ for a in self.valid_actions[:10]:
388
+ lines.append(f"- {a}")
389
+
390
+ if self.edge_candidates:
391
+ lines.append("\nUNEXPLORED ACTIONS FROM THIS LOCATION:")
392
+ for a in self.edge_candidates[:5]:
393
+ lines.append(f"- {a}")
394
+
395
+
396
+ lines.append("\nRAW OBSERVATION FROM THE WORLD:")
397
+ lines.append(observation.strip())
398
+
399
+ unobtained = [
400
+ obj for obj, info in self.object_goals.items()
401
+ if not info["obtained"]
402
+ ]
403
+
404
+ if unobtained:
405
+ lines.append("\nOBJECT-DRIVEN GOALS:")
406
+ for obj in unobtained[:3]:
407
+ loc = self.object_goals[obj]["seen_at"]
408
+ lines.append(f"- {obj} (seen at: {loc})")
409
+
410
+ lines.append(
411
+ "\nPRIORITY RULE:\n"
412
+ "Prefer actions that help obtain or interact with these objects "
413
+ "over random exploration."
414
+ )
415
+
416
+
417
+ lines.append(
418
+ "\nDECISION REQUIRED:\n"
419
+ "Choose ONE action that is different from your recent behavior "
420
+ "and maximizes the chance of discovering something new."
421
+ )
422
+
423
+ return "\n".join(lines)
424
 
425
+
426
+
427
+ def _parse_response(self, response: str, valid_tools: list[str]) -> tuple[str, str, dict]:
428
+
429
+ fields = self._index_response(response)
430
+
431
+ thought = self.extract_thought(fields)
432
+ tool = self.extract_tool(fields, valid_tools)
433
+ args = self.extract_args(fields)
434
+
435
+ return thought, tool, args
 
 
 
436
 
437
  def _call_llm(self, prompt: str, system_prompt: str, seed: int) -> str:
438
  """
 
441
  This is a convenience wrapper - you can also use call_llm() directly.
442
  """
443
  return call_llm(prompt, system_prompt, seed)
444
+
445
+
446
+ def _index_response(self, text: str) -> dict:
447
+ indexed = {}
448
+
449
+ for line in text.splitlines():
450
+ if ":" not in line:
451
+ continue
452
+ key, value = line.split(":", 1)
453
+ indexed[key.strip().lower()] = value.strip()
454
+
455
+ return indexed
456
+
457
+ # =============================================================================
458
+ # Extract information from game observations
459
+ # =============================================================================
460
+
461
+ def extract_objects(self, text: str) -> list[str]:
462
+ found = set()
463
+ text = text.lower()
464
+ for obj in self.important_objects:
465
+ if obj in text:
466
+ found.add(obj)
467
+ return list(found)
468
+
469
+ def extract_thought(self, fields: dict) -> str:
470
+ return fields.get("thought", "No reasoning provided")
471
+
472
+ def extract_tool(self, fields: dict, valid_tools: list[str]) -> str:
473
+ raw = fields.get("tool", "play_action").lower()
474
+ raw = raw.translate(str.maketrans("", "", "*`"))
475
+
476
+ candidate = raw.split()[0] if raw else "play_action"
477
+ return candidate
478
+
479
+ def extract_args(self, fields: dict) -> dict:
480
+ payload = fields.get("args")
481
+ if not payload:
482
+ return {"action": "look"}
483
+
484
+ payload = payload.replace("'", '"')
485
+
486
+ try:
487
+ return json.loads(payload)
488
+ except json.JSONDecodeError:
489
+ match = re.search(r'"action"\s*:\s*"([^"]+)"', payload)
490
+ if match:
491
+ return {"action": match.group(1)}
492
+
493
+ return {"action": "look"}
494
+
495
+
496
+
497
+ def extract_result(self, result) -> str:
498
+ if hasattr(result, 'content') and result.content:
499
+ return result.content[0].text
500
+ if isinstance(result, list) and result:
501
+ return result[0].text if hasattr(result[0], 'text') else str(result[0])
502
+ return str(result)
503
+
504
+ def extract_location(self, observation: str, previous_location: str | None = None) -> str:
505
+ if not observation:
506
+ return previous_location or "Unknown"
507
+
508
+ line = observation.strip().splitlines()[0].strip()
509
+ lower = line.lower()
510
+
511
+ if any(k in lower for k in ("copyright", "infocom", "all rights reserved")):
512
+ return previous_location or "Unknown"
513
+ if lower.startswith(("you ", "opening", "opened", "taken", "done", "with great effort", "the ")):
514
+ return previous_location or "Unknown"
515
+ if line.endswith((".", "!", "?")) or len(line.split()) > 6:
516
+ return previous_location or "Unknown"
517
+
518
+ return line
519
+
520
+ # =============================================================================
521
+ # Function to parse inventory
522
+ # =============================================================================
523
+ def parse_inventory(self, blob: str) -> list[str]:
524
+ if not blob:
525
+ return []
526
+
527
+ low = blob.lower()
528
+
529
+ if any(x in low for x in ("empty-handed", "nothing")):
530
+ return []
531
+
532
+ core = blob.split(":", 1)[-1]
533
+
534
+ tokens = [
535
+ t.strip().lower()
536
+ for t in core.replace("\n", ",").split(",")
537
+ if t.strip()
538
+ and not any(w in t.lower() for w in ("you", "carrying", "parent"))
539
+ ]
540
+
541
+ return sorted(set(tokens))
542
+
543
+
544
+
545
+ # =============================================================================
546
+ # Function to update score
547
+ # =============================================================================
548
+
549
+ def update_score(self, text: str) -> None:
550
+ if not text:
551
+ return
552
+
553
+ t = text.replace("[", "").replace("]", "").lower()
554
+
555
+ if "score" not in t:
556
+ return
557
+
558
+ parts = t.split("score")
559
+ values = []
560
+
561
+ for p in parts[1:]:
562
+ digits = ""
563
+ for ch in p:
564
+ if ch.isdigit():
565
+ digits += ch
566
+ elif digits:
567
+ break
568
+ if digits:
569
+ values.append(int(digits))
570
+
571
+ if values:
572
+ best = max(values)
573
+ if best > self.score:
574
+ self.score = best
575
+
576
+
577
+ # =============================================================================
578
+ # Function to reformat LLM response
579
+ # =============================================================================
580
+
581
+ def format_llm_response(
582
+ self,
583
+ tool_name: str,
584
+ tool_args: dict,
585
+ valid_tools: list[str]
586
+ ) -> tuple[str, dict]:
587
+
588
+ tool_aliases = {
589
+ "action": "play_action",
590
+ "do": "play_action",
591
+ "command": "play_action",
592
+ "map": "get_map",
593
+ "location": "get_map",
594
+ "mem": "memory",
595
+ "state": "memory",
596
+ "status": "memory",
597
+ "inv": "inventory",
598
+ "items": "inventory",
599
+ }
600
+
601
+ tool_name = tool_name.lower().strip()
602
+ tool_name = tool_aliases.get(tool_name, tool_name)
603
+
604
+ if tool_name not in valid_tools:
605
+ tool_name = "play_action"
606
+
607
+ if tool_name == "play_action":
608
+ raw_action = tool_args.get("action", "")
609
+
610
+ if not isinstance(raw_action, str) or not raw_action.strip():
611
+ return "play_action", {"action": "look"}
612
+
613
+ action = raw_action.lower()
614
+
615
+ for ch in ["*", "`", "\"", "'"]:
616
+ action = action.replace(ch, "")
617
+
618
+ action = " ".join(action.split())
619
+
620
+ verb_aliases = {
621
+ "check": "examine",
622
+ "inspect": "examine",
623
+ "use": "examine",
624
+ "investigate": "examine",
625
+ "grab": "take",
626
+ "pick": "take",
627
+ "pickup": "take",
628
+ "search": "look",
629
+ }
630
+
631
+ words = action.split()
632
+ if words and words[0] in verb_aliases:
633
+ words[0] = verb_aliases[words[0]]
634
+ action = " ".join(words)
635
+
636
+ forbidden_prefixes = {
637
+ "help",
638
+ "quit",
639
+ "save",
640
+ "load",
641
+ }
642
+
643
+ dangerous_verbs = {
644
+ "jump",
645
+ "dive",
646
+ "leap",
647
+ }
648
+
649
+ verb = action.split()[0]
650
+
651
+ if verb in forbidden_prefixes:
652
+ return "play_action", {"action": "look"}
653
+
654
+ if verb in dangerous_verbs:
655
+ return "play_action", {"action": "look"}
656
+
657
+ if not action:
658
+ action = "look"
659
+
660
+ return "play_action", {"action": action}
661
+
662
+ return tool_name, {}
663
+
664
+
665
+ def is_game_over(self, text: str) -> bool:
666
+ if not text:
667
+ return False
668
+
669
+ text = text.lower()
670
+
671
+ deaths = (
672
+ "game over",
673
+ "you have died",
674
+ "you are dead",
675
+ "*** you have died ***",
676
+ "you die",
677
+ "you have been killed",
678
+ "your score was",
679
+ )
680
+
681
+ return any(p in text for p in deaths)
682
+
683
 
684
 
685
  # =============================================================================
mcp_server.py CHANGED
@@ -48,43 +48,270 @@ mcp = FastMCP("Student Text Adventure Server")
48
  class GameManager:
49
  """
50
  Manages the text adventure game state.
51
-
52
- TODO: Extend this class to track:
53
- - Action history (for memory tool)
54
- - Explored locations (for mapping)
55
- - Current score and moves
56
  """
57
 
58
- def __init__(self):
59
- self.env: TextAdventureEnv = None
60
- self.state = None
61
- self.game_name: str = ""
62
- # TODO: Add more state tracking
63
- # self.history: list[tuple[str, str]] = []
64
- # self.explored_locations: dict[str, set[str]] = {}
65
- # self.current_location: str = ""
66
-
67
- def initialize(self, game: str = "zork1"):
68
- """Initialize or reset the game."""
69
- self.game_name = game
70
  self.env = TextAdventureEnv(game)
 
71
  self.state = self.env.reset()
72
- # TODO: Reset your state tracking here
73
- return self.state.observation
 
 
 
 
 
 
74
 
75
- def step(self, action: str) -> str:
76
- """Execute an action and return the result."""
77
- if self.env is None:
78
- self.initialize()
79
-
 
 
 
 
 
 
 
 
 
 
80
  self.state = self.env.step(action)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
- # TODO: Update your state tracking here
83
- # self.history.append((action, self.state.observation))
84
- # Update location tracking, etc.
85
-
86
- return self.state.observation
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  def get_score(self) -> int:
89
  """Get current score."""
90
  return self.state.score if self.state else 0
@@ -95,18 +322,16 @@ class GameManager:
95
 
96
 
97
  # Global game manager
98
- _game = GameManager()
99
 
100
 
101
- def get_game() -> GameManager:
102
- """Get or initialize the game manager."""
103
- global _game
104
- if _game.env is None:
105
- # Get game from environment variable (set by evaluator)
106
- game = os.environ.get("GAME", "zork1")
107
- _game.initialize(game)
108
- return _game
109
 
 
 
 
 
 
 
110
 
111
  # =============================================================================
112
  # MCP Tools - IMPLEMENT THESE
@@ -114,90 +339,74 @@ def get_game() -> GameManager:
114
 
115
  @mcp.tool()
116
  def play_action(action: str) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  """
118
- Execute a game command and return the result.
119
-
120
- This is the main tool for interacting with the game.
121
 
122
- Args:
123
- action: The command to execute (e.g., "north", "take lamp", "open mailbox")
124
-
125
  Returns:
126
- The game's response to the action
127
-
128
- Valid commands include:
129
- - Movement: north, south, east, west, up, down, enter, exit
130
- - Objects: take <item>, drop <item>, open <thing>, examine <thing>
131
- - Other: look, inventory, read <thing>, turn on lamp
132
  """
133
  game = get_game()
 
 
 
 
 
 
 
134
 
135
- # TODO: You might want to add action validation here
136
- # TODO: You might want to include score changes in the response
137
-
138
- result = game.step(action)
139
-
140
- # Optional: Append score info
141
- # result += f"\n[Score: {game.get_score()} | Moves: {game.get_moves()}]"
142
-
143
- return result
144
 
145
 
146
- # TODO: Implement additional tools to help your agent
 
 
 
 
 
 
 
 
 
 
147
 
148
- # @mcp.tool()
149
- # def memory() -> str:
150
- # """
151
- # Get the current game state summary.
152
- #
153
- # Returns:
154
- # A summary including current location, score, moves, and recent history
155
- # """
156
- # game = get_game()
157
- # # TODO: Return useful state information
158
- # pass
159
-
160
-
161
- # @mcp.tool()
162
- # def inventory() -> str:
163
- # """
164
- # Check what the player is carrying.
165
- #
166
- # Returns:
167
- # List of items in the player's inventory
168
- # """
169
- # game = get_game()
170
- # result = game.step("inventory")
171
- # return result
172
-
173
-
174
- # @mcp.tool()
175
- # def get_map() -> str:
176
- # """
177
- # Get a map of explored locations.
178
- #
179
- # Returns:
180
- # A text representation of explored locations and connections
181
- # """
182
- # game = get_game()
183
- # # TODO: Return map of explored locations
184
- # pass
185
-
186
-
187
- # @mcp.tool()
188
- # def get_valid_actions() -> str:
189
- # """
190
- # Get a list of likely valid actions from the current location.
191
- #
192
- # Returns:
193
- # List of actions that might work here
194
- # """
195
- # # This is a hint: Jericho provides get_valid_actions()
196
- # game = get_game()
197
- # if game.env and game.env.env:
198
- # valid = game.env.env.get_valid_actions()
199
- # return "Valid actions: " + ", ".join(valid[:20])
200
- # return "Could not determine valid actions"
201
 
202
 
203
  # =============================================================================
@@ -205,5 +414,4 @@ def play_action(action: str) -> str:
205
  # =============================================================================
206
 
207
  if __name__ == "__main__":
208
- # This runs the server with stdio transport (for MCP clients)
209
  mcp.run()
 
48
  class GameManager:
49
  """
50
  Manages the text adventure game state.
 
 
 
 
 
51
  """
52
 
53
+ def __init__(self, game: str = "zork1"):
 
 
 
 
 
 
 
 
 
 
 
54
  self.env = TextAdventureEnv(game)
55
+ self.game_name = game
56
  self.state = self.env.reset()
57
+ self.state = self.env.step("look")
58
+
59
+ self.history = []
60
+ self.explored_locations = {}
61
+ self.current_location = self.extract_location(self.state.observation)
62
+ self.object_goals: dict[str, dict] = {}
63
+
64
+
65
 
66
+ def update_map(self, prev_location: str, action: str, new_location: str):
67
+ if prev_location not in self.explored_locations:
68
+ self.explored_locations[prev_location] = set()
69
+
70
+ if new_location != prev_location:
71
+ self.explored_locations[prev_location].add(
72
+ f"{action} -> {new_location}"
73
+ )
74
+
75
+
76
+ def choose_an_action(self, action: str) -> str:
77
+ prev_observation = self.state.observation
78
+ prev_location = self.current_location
79
+ prev_score = self.state.score
80
+
81
  self.state = self.env.step(action)
82
+ observation = self.state.observation
83
+
84
+ new_location = self.extract_location(observation)
85
+ self.current_location = new_location
86
+
87
+ no_op = (
88
+ observation.strip() == prev_observation.strip()
89
+ and new_location == prev_location
90
+ and self.state.score == prev_score
91
+ )
92
+
93
+ self.history.append({
94
+ "action": action,
95
+ "observation": observation,
96
+ "location": new_location,
97
+ "no_op": no_op,
98
+ })
99
+ self.history = self.history[-35:]
100
+
101
+ return observation
102
+
103
+
104
+
105
+ def extract_location(self, observation: str) -> str:
106
+ if not observation:
107
+ return self.current_location or "Unknown"
108
+
109
+ line = observation.strip().splitlines()[0].strip()
110
+ lower = line.lower()
111
+
112
+ if any(k in lower for k in ("copyright", "infocom", "all rights reserved")):
113
+ return self.current_location or "Unknown"
114
+ if lower.startswith(("you ", "opening", "opened", "taken", "done", "with great effort", "the ")):
115
+ return self.current_location or "Unknown"
116
+ if line.endswith((".", "!", "?")) or len(line.split()) > 6:
117
+ return self.current_location or "Unknown"
118
+
119
+ return line
120
+
121
+
122
+ def is_valid_location(self, candidate: str) -> bool:
123
+ if not candidate:
124
+ return False
125
+
126
+ candidate = candidate.strip()
127
+
128
+ if candidate.endswith("."):
129
+ return False
130
+
131
+ if len(candidate.split()) > 5:
132
+ return False
133
+
134
+ lowered = candidate.lower()
135
+ banned_prefixes = (
136
+ "taken",
137
+ "done",
138
+ "opening",
139
+ "you",
140
+ "there is",
141
+ "with great effort",
142
+ "it is",
143
+ )
144
+ if lowered.startswith(banned_prefixes):
145
+ return False
146
+
147
+ return True
148
+
149
+
150
+ def get_memory(self) -> str:
151
+
152
+ recent = self.history[-6:] if self.history else []
153
+
154
+ recent_actions = []
155
+ no_op_count = 0
156
+
157
+ for h in recent:
158
+ if isinstance(h, dict):
159
+ action = h.get("action", "?")
160
+ loc = h.get("location", "?")
161
+ no_op = h.get("no_op", False)
162
+ recent_actions.append(f"- {action} @ {loc}")
163
+ if no_op:
164
+ no_op_count += 1
165
+
166
+ recent_block = (
167
+ "\n".join(recent_actions)
168
+ if recent_actions else
169
+ "- none"
170
+ )
171
+
172
+ progress_signals = []
173
+
174
+ if len(self.explored_locations) > 1:
175
+ progress_signals.append("new locations visited")
176
+
177
+ if self.state.score > 0:
178
+ progress_signals.append(f"score = {self.state.score}")
179
+
180
+ obtained = [
181
+ obj for obj, info in self.object_goals.items()
182
+ if info.get("obtained")
183
+ ]
184
+ if obtained:
185
+ progress_signals.append(f"objects obtained: {', '.join(obtained[:3])}")
186
+
187
+ if not progress_signals:
188
+ progress_signals.append("no clear progress")
189
+
190
+ stagnation = ""
191
+ if no_op_count >= 3:
192
+ stagnation = "repeated ineffective actions detected"
193
+
194
+ return f"""STATE SNAPSHOT
195
+ Location: {self.current_location}
196
+ Score: {self.state.score}
197
+ Moves: {self.state.moves}
198
+ Game: {self.game_name}
199
+
200
+ RECENT ACTIONS
201
+ {recent_block}
202
+
203
+ PROGRESS
204
+ - {'; '.join(progress_signals)}
205
+
206
+ STAGNATION
207
+ - {stagnation if stagnation else "none"}
208
+
209
+ LAST OBSERVATION (verbatim)
210
+ {self.state.observation}
211
+ """
212
 
213
+
214
+
215
+ def get_map(self) -> str:
216
+
217
+ if not self.explored_locations:
218
+ return "MAP SNAPSHOT\n- No locations visited yet."
219
+
220
+ output = []
221
+ output.append("MAP SNAPSHOT")
222
+ output.append(f"Current location: {self.current_location}")
223
+
224
+ exploration_frontier = []
225
+ exhausted_nodes = []
226
+
227
+ for node, transitions in self.explored_locations.items():
228
+ degree = len(transitions)
229
+
230
+ if degree == 0:
231
+ exhausted_nodes.append(node)
232
+ elif degree <= 2:
233
+ exploration_frontier.append(node)
234
+
235
+ output.append("\nKNOWN CONNECTIONS")
236
+ for node, transitions in sorted(self.explored_locations.items()):
237
+ if not transitions:
238
+ output.append(f"- {node}: no confirmed exits")
239
+ else:
240
+ compressed = ", ".join(sorted(transitions))
241
+ output.append(f"- {node} → {compressed}")
242
+
243
+ if exploration_frontier:
244
+ output.append("\nEXPLORATION FRONTIER")
245
+ for node in exploration_frontier[:5]:
246
+ output.append(f"- {node} (few exits explored)")
247
+
248
+ if exhausted_nodes:
249
+ output.append("\nLOW VALUE AREAS")
250
+ for node in exhausted_nodes[:5]:
251
+ output.append(f"- {node} (dead end or fully explored)")
252
+
253
+ cyclic_nodes = []
254
+ for node, transitions in self.explored_locations.items():
255
+ for t in transitions:
256
+ if f"{node}" in t and "->" in t:
257
+ cyclic_nodes.append(node)
258
+ break
259
+
260
+ if cyclic_nodes:
261
+ output.append("\nPOTENTIAL LOOPS DETECTED")
262
+ for node in cyclic_nodes[:3]:
263
+ output.append(f"- {node}")
264
+
265
+ if exploration_frontier:
266
+ output.append(
267
+ "\nNAVIGATION HINT\n"
268
+ f"- Prefer exploring from: {exploration_frontier[0]}"
269
+ )
270
+
271
+ return "\n".join(output)
272
 
273
+
274
+ def get_inventory(self) -> str:
275
+ items = (
276
+ self.state.inventory
277
+ if hasattr(self.state, "inventory") and self.state.inventory
278
+ else []
279
+ )
280
+
281
+ if not items:
282
+ return "Inventory: empty (hands free)."
283
+
284
+ parsed_items = []
285
+
286
+ for item in items:
287
+ item_str = str(item)
288
+
289
+ if ":" in item_str:
290
+ item_str = item_str.split(":", 1)[1]
291
+ if "parent" in item_str.lower():
292
+ item_str = item_str.lower().split("parent")[0]
293
+
294
+ name = item_str.strip().lower()
295
+
296
+ tag = None
297
+ if any(k in name for k in ["lamp", "torch", "lantern"]):
298
+ tag = "light"
299
+ elif any(k in name for k in ["sword", "knife", "weapon"]):
300
+ tag = "weapon"
301
+ elif any(k in name for k in ["key"]):
302
+ tag = "key item"
303
+ elif any(k in name for k in ["treasure", "jewel", "egg", "canary"]):
304
+ tag = "valuable"
305
+
306
+ if tag:
307
+ parsed_items.append(f"{name} ({tag})")
308
+ else:
309
+ parsed_items.append(name)
310
+
311
+ count = len(parsed_items)
312
+ return f"Inventory ({count} item{'s' if count > 1 else ''}): " + ", ".join(parsed_items)
313
+
314
+
315
  def get_score(self) -> int:
316
  """Get current score."""
317
  return self.state.score if self.state else 0
 
322
 
323
 
324
  # Global game manager
325
+ g = GameManager()
326
 
327
 
 
 
 
 
 
 
 
 
328
 
329
+ def get_game(reseed: bool = False) -> GameManager:
330
+ global g
331
+
332
+ if g is None or reseed:
333
+ g = GameManager()
334
+ return g
335
 
336
  # =============================================================================
337
  # MCP Tools - IMPLEMENT THESE
 
339
 
340
  @mcp.tool()
341
  def play_action(action: str) -> str:
342
+ game = get_game()
343
+
344
+ origin = game.current_location
345
+ before_score = game.state.score
346
+
347
+ result = game.choose_an_action(action)
348
+ destination = game.current_location
349
+
350
+ if origin and destination and origin != destination:
351
+ game.update_map(origin, action, destination)
352
+
353
+ footer = f"\n[Score: {game.state.score}"
354
+ footer+= f"\nMoves: {game.state.moves}]"
355
+
356
+ return result + footer
357
+
358
+
359
+ # TODO: Implement additional tools to help your agent
360
+
361
+ @mcp.tool()
362
+ def memory() -> str:
363
  """
364
+ Get the current game state summary.
 
 
365
 
 
 
 
366
  Returns:
367
+ A summary including current location, score, moves, and recent history
 
 
 
 
 
368
  """
369
  game = get_game()
370
+ return game.get_memory()
371
+
372
+
373
+ @mcp.tool()
374
+ def inventory() -> str:
375
+ """
376
+ Check what the player is carrying.
377
 
378
+ Returns:
379
+ List of items in the player's inventory
380
+ """
381
+ return get_game().get_inventory()
 
 
 
 
 
382
 
383
 
384
+ @mcp.tool()
385
+ def get_map() -> str:
386
+ """
387
+ Get a map of explored locations.
388
+
389
+ Returns:
390
+ A text representation of explored locations and connections
391
+ """
392
+ game = get_game()
393
+ return game.get_map()
394
+
395
 
396
+ @mcp.tool()
397
+ def get_valid_actions() -> str:
398
+ """
399
+ Get a list of likely valid actions from the current location.
400
+
401
+ Returns:
402
+ List of actions that might work here
403
+ """
404
+ # This is a hint: Jericho provides get_valid_actions()
405
+ game = get_game()
406
+ if game.env and game.env.env:
407
+ valid = game.env.env.get_valid_actions()
408
+ return "Valid actions: " + ", ".join(valid[:20])
409
+ return "Could not determine valid actions"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
410
 
411
 
412
  # =============================================================================
 
414
  # =============================================================================
415
 
416
  if __name__ == "__main__":
 
417
  mcp.run()