Hugo PERCOT commited on
Commit
a9ad18c
·
1 Parent(s): 615a63b
Files changed (3) hide show
  1. README.md +16 -4
  2. agent.py +482 -81
  3. mcp_server.py +242 -52
README.md CHANGED
@@ -18,11 +18,23 @@ This is my submission for the Text Adventure Agent assignment. My agent uses the
18
 
19
  ## Approach
20
 
21
- <!-- Describe your approach here -->
 
 
 
22
 
23
- - What strategy does your agent use?
24
- - What tools did you implement in your MCP server?
25
- - Any interesting techniques or optimizations?
 
 
 
 
 
 
 
 
 
26
 
27
  ## Files
28
 
 
18
 
19
  ## Approach
20
 
21
+ - Strategy: score-first ReAct with explicit anti-loop controls and exploration frontier bias.
22
+ - The agent tracks repeated `(location, action)` pairs and loop signals, then overrides low-yield repeated actions with unexplored movement.
23
+ - Prompting includes compact recent history, score/move/loop diagnostics, inventory snapshot, and map frontier hints.
24
+ - Tool usage is constrained so `play_action` dominates turns; planning tools are used mainly when stagnation appears.
25
 
26
+ Implemented MCP tools in `mcp_server.py`:
27
+ - `play_action(action)` executes commands and appends score + loop diagnostics.
28
+ - `memory()` returns state summary, recent history, and notes.
29
+ - `inventory()` returns current inventory without consuming a move.
30
+ - `get_map()` returns explored transitions and untried frontier directions.
31
+ - `get_stats()` returns compact JSON-like state used by the agent for robust tracking.
32
+ - `remember(key, value)` / `recall(key)` provide persistent note memory for clues.
33
+
34
+ Interesting optimizations:
35
+ - Stagnation-aware action override (`no_progress_streak`, repeated actions, same-location streak).
36
+ - Frontier extraction from map snapshots for systematic exploration.
37
+ - Robust parser/validator for LLM tool calls (malformed JSON and tool alias handling).
38
 
39
  ## Files
40
 
agent.py CHANGED
@@ -26,7 +26,9 @@ Tips:
26
  import json
27
  import os
28
  import re
 
29
  from dataclasses import dataclass, field
 
30
  from typing import Optional
31
 
32
  from dotenv import load_dotenv
@@ -35,36 +37,23 @@ from huggingface_hub import InferenceClient
35
  # Load environment variables
36
  load_dotenv()
37
 
38
- # Set USE_LOCAL_MODEL=1 in your .env to use a locally downloaded model
39
- USE_LOCAL_MODEL = os.getenv("USE_LOCAL_MODEL", "0").strip() in ("1", "true", "yes")
40
- LOCAL_MODEL_ID = os.getenv("LOCAL_MODEL_ID", "Qwen/Qwen2.5-3B-Instruct")
41
-
42
  # =============================================================================
43
  # LLM Configuration - DO NOT MODIFY
44
  # =============================================================================
45
 
46
- # Model to use (fixed for fair evaluation)
47
- LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"
48
 
49
- # Initialize the LLM client based on mode
50
- _local_pipeline = None
51
 
52
- if USE_LOCAL_MODEL:
53
- import torch
54
- from transformers import pipeline as _hf_pipeline
55
 
56
- _local_pipeline = _hf_pipeline(
57
- "text-generation",
58
- model=LOCAL_MODEL_ID,
59
- torch_dtype=torch.bfloat16,
60
- device_map="auto",
61
- )
62
- LLM_CLIENT = None
63
- else:
64
- _hf_token = os.getenv("HF_TOKEN")
65
- if not _hf_token:
66
- raise ValueError("HF_TOKEN not found. Set it in your .env file.")
67
- LLM_CLIENT = InferenceClient(token=_hf_token)
68
 
69
 
70
  def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str:
@@ -92,14 +81,30 @@ def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300)
92
  {"role": "user", "content": prompt},
93
  ]
94
 
95
- if USE_LOCAL_MODEL and _local_pipeline is not None:
96
- outputs = _local_pipeline(
97
- messages,
98
- max_new_tokens=max_tokens,
99
- temperature=0.0001, # Near-deterministic (0.0 unsupported by some backends)
100
- do_sample=True,
 
 
 
 
 
 
 
 
 
 
101
  )
102
- return outputs[0]["generated_text"][-1]["content"]
 
 
 
 
 
 
103
 
104
  response = LLM_CLIENT.chat.completions.create(
105
  model=LLM_MODEL,
@@ -134,8 +139,12 @@ GOAL: Explore the world, solve puzzles, and maximize your score.
134
 
135
  AVAILABLE TOOLS (use via MCP):
136
  - play_action: Execute a game command (north, take lamp, open mailbox, etc.)
137
- - memory: Get current game state and history (if implemented)
138
- - inventory: Check what you're carrying (if implemented)
 
 
 
 
139
 
140
  VALID GAME COMMANDS for play_action:
141
  - Movement: north, south, east, west, up, down, enter, exit
@@ -151,6 +160,12 @@ Example:
151
  THOUGHT: I should look around to see where I am.
152
  TOOL: play_action
153
  ARGS: {"action": "look"}
 
 
 
 
 
 
154
  """
155
 
156
 
@@ -172,10 +187,16 @@ class StudentAgent:
172
 
173
  def __init__(self):
174
  """Initialize your agent here."""
175
- # TODO: Initialize any state tracking you need
176
- # self.history = []
177
- # self.visited_locations = set()
178
- pass
 
 
 
 
 
 
179
 
180
  async def run(
181
  self,
@@ -198,71 +219,255 @@ class StudentAgent:
198
  Returns:
199
  RunResult with final score and statistics
200
  """
201
- # TODO: Implement your ReAct loop here
202
- #
203
- # Basic structure:
204
- # 1. Get initial observation (call play_action with "look")
205
- # 2. Loop for max_steps:
206
- # a. Build prompt with current observation and history
207
- # b. Call LLM to get thought and action
208
- # c. Parse the response to extract tool and args
209
- # d. Call the tool via client.call_tool(tool_name, args)
210
- # e. Update history and state
211
- # f. Check for game over
212
- # 3. Return RunResult with final statistics
213
-
214
- # Example of calling a tool:
215
- # result = await client.call_tool("play_action", {"action": "look"})
216
- # observation = result[0].text if result else "No response"
217
-
218
- # Example of calling the LLM:
219
- # response = call_llm(
220
- # prompt="Current observation: " + observation,
221
- # system_prompt=SYSTEM_PROMPT,
222
- # seed=seed,
223
- # )
224
-
225
- # Placeholder implementation - replace with your code
226
  locations_visited = set()
227
- history = []
228
  final_score = 0
229
  moves = 0
 
 
230
 
231
- # TODO: Your implementation here
232
- # ...
233
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
  return RunResult(
235
  final_score=final_score,
236
- max_score=350, # Zork1 max score, adjust if needed
237
  moves=moves,
238
  locations_visited=locations_visited,
239
- game_completed=False,
 
240
  history=history,
241
  )
242
-
243
- def _build_prompt(self, observation: str, history: list) -> str:
 
 
 
 
 
 
 
 
 
244
  """
245
  Build the prompt for the LLM.
246
-
247
- TODO: Implement this to create effective prompts
248
  """
249
- # TODO: Combine system prompt, history, and current observation
250
- pass
251
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
  def _parse_response(self, response: str) -> tuple[str, str, dict]:
253
  """
254
  Parse LLM response to extract thought, tool name, and arguments.
255
-
256
- TODO: Implement robust parsing
257
-
258
  Returns:
259
  Tuple of (thought, tool_name, args_dict)
260
  """
261
- # TODO: Parse the response format:
262
- # THOUGHT: ...
263
- # TOOL: ...
264
- # ARGS: {...}
265
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
 
267
  def _call_llm(self, prompt: str, system_prompt: str, seed: int) -> str:
268
  """
@@ -272,6 +477,202 @@ class StudentAgent:
272
  """
273
  return call_llm(prompt, system_prompt, seed)
274
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
275
 
276
  # =============================================================================
277
  # For local testing
 
26
  import json
27
  import os
28
  import re
29
+ import urllib.request
30
  from dataclasses import dataclass, field
31
+ from collections import deque
32
  from typing import Optional
33
 
34
  from dotenv import load_dotenv
 
37
  # Load environment variables
38
  load_dotenv()
39
 
 
 
 
 
40
  # =============================================================================
41
  # LLM Configuration - DO NOT MODIFY
42
  # =============================================================================
43
 
44
+ # Backend selection
45
+ LLM_BACKEND = os.getenv("LLM_BACKEND", "hf").lower()
46
 
47
+ # HF model (default backend)
48
+ LLM_MODEL = os.getenv("HF_MODEL", "Qwen/Qwen2.5-72B-Instruct")
49
 
50
+ # Ollama model (local backend)
51
+ OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "qwen2.5:3b")
52
+ OLLAMA_URL = os.getenv("OLLAMA_URL", "http://127.0.0.1:11434/api/chat")
53
 
54
+ # Initialize the LLM client (uses HF_TOKEN from environment)
55
+ _hf_token = os.getenv("HF_TOKEN")
56
+ LLM_CLIENT = InferenceClient(token=_hf_token) if _hf_token else None
 
 
 
 
 
 
 
 
 
57
 
58
 
59
  def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str:
 
81
  {"role": "user", "content": prompt},
82
  ]
83
 
84
+ if LLM_BACKEND == "ollama":
85
+ payload = {
86
+ "model": OLLAMA_MODEL,
87
+ "messages": messages,
88
+ "stream": False,
89
+ "options": {
90
+ "temperature": 0.0,
91
+ "num_predict": max_tokens,
92
+ "seed": seed,
93
+ },
94
+ }
95
+ req = urllib.request.Request(
96
+ OLLAMA_URL,
97
+ data=json.dumps(payload).encode("utf-8"),
98
+ headers={"Content-Type": "application/json"},
99
+ method="POST",
100
  )
101
+ with urllib.request.urlopen(req, timeout=120) as response:
102
+ body = response.read().decode("utf-8")
103
+ parsed = json.loads(body)
104
+ return parsed.get("message", {}).get("content", "")
105
+
106
+ if not LLM_CLIENT:
107
+ raise ValueError("HF_TOKEN not found. Set it in your .env file or use LLM_BACKEND=ollama.")
108
 
109
  response = LLM_CLIENT.chat.completions.create(
110
  model=LLM_MODEL,
 
139
 
140
  AVAILABLE TOOLS (use via MCP):
141
  - play_action: Execute a game command (north, take lamp, open mailbox, etc.)
142
+ - memory: Get state summary + recent history + loop diagnostics
143
+ - inventory: Check what you're carrying
144
+ - get_map: Get explored locations and frontier directions
145
+ - get_stats: Get compact state JSON (score, moves, done, loop signals)
146
+ - remember: Save a short note as key/value
147
+ - recall: Retrieve saved notes
148
 
149
  VALID GAME COMMANDS for play_action:
150
  - Movement: north, south, east, west, up, down, enter, exit
 
160
  THOUGHT: I should look around to see where I am.
161
  TOOL: play_action
162
  ARGS: {"action": "look"}
163
+
164
+ POLICY:
165
+ 1) Prefer play_action on most turns.
166
+ 2) If score/reward is stagnant or location repeats, prioritize unexplored movement/frontier.
167
+ 3) Avoid repeating the same action in the same location unless new evidence appears.
168
+ 4) Use memory/get_map/get_stats only when needed to break uncertainty.
169
  """
170
 
171
 
 
187
 
188
  def __init__(self):
189
  """Initialize your agent here."""
190
+ self.history: list[dict] = []
191
+ self.recent_actions: deque[str] = deque(maxlen=8)
192
+ self.location_action_counts: dict[tuple[str, str], int] = {}
193
+ self.score: int = 0
194
+ self.max_score: int = 350
195
+ self.last_observation: str = ""
196
+ self.non_play_streak: int = 0
197
+ self.cached_map: str = ""
198
+ self.cached_inventory: str = ""
199
+ self.note_counter: int = 0
200
 
201
  async def run(
202
  self,
 
219
  Returns:
220
  RunResult with final score and statistics
221
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
222
  locations_visited = set()
223
+ history: list[tuple[str, str, str]] = []
224
  final_score = 0
225
  moves = 0
226
+ game_completed = False
227
+ error = None
228
 
229
+ print(f"Starting game '{game}' with seed {seed}, using LLM model '{LLM_MODEL}'.")
 
230
 
231
+ try:
232
+ tools = await client.list_tools()
233
+ tool_names = {t.name for t in tools}
234
+
235
+ async def call_tool(tool: str, args: dict) -> str:
236
+ result = await client.call_tool(tool, args)
237
+ return self._extract_result(result)
238
+
239
+ observation = await call_tool("play_action", {"action": "look"})
240
+ self.last_observation = observation
241
+
242
+ stats = await self._get_stats(client, tool_names)
243
+ self._update_state_from_stats(stats)
244
+ location = stats.get("location") or self._extract_location(observation)
245
+ if location:
246
+ locations_visited.add(location)
247
+
248
+ if "get_map" in tool_names:
249
+ self.cached_map = await call_tool("get_map", {})
250
+ if "inventory" in tool_names:
251
+ self.cached_inventory = await call_tool("inventory", {})
252
+
253
+ if verbose:
254
+ print(f"\n{observation}")
255
+
256
+ print(max_steps)
257
+ for step in range(1, max_steps + 1):
258
+ location = stats.get("location") or self._extract_location(observation)
259
+ no_progress = int(stats.get("no_progress_streak", 0) or 0)
260
+
261
+ if "get_map" in tool_names and (step % 6 == 0 or no_progress >= 3):
262
+ self.cached_map = await call_tool("get_map", {})
263
+ if "inventory" in tool_names and step % 12 == 0:
264
+ self.cached_inventory = await call_tool("inventory", {})
265
+
266
+ prompt = self._build_prompt(
267
+ observation=observation,
268
+ location=location,
269
+ step=step,
270
+ max_steps=max_steps,
271
+ stats=stats,
272
+ map_snapshot=self.cached_map,
273
+ inventory_snapshot=self.cached_inventory,
274
+ )
275
+
276
+ response = call_llm(
277
+ prompt=prompt,
278
+ system_prompt=SYSTEM_PROMPT,
279
+ seed=seed + (step * 31),
280
+ )
281
+ thought, tool_name, tool_args = self._parse_response(response)
282
+ tool_name, tool_args = self._validate_tool_call(tool_name, tool_args, tool_names)
283
+
284
+ if tool_name != "play_action":
285
+ self.non_play_streak += 1
286
+ else:
287
+ self.non_play_streak = 0
288
+
289
+ if self.non_play_streak >= 2:
290
+ tool_name = "play_action"
291
+ tool_args = {
292
+ "action": self._next_exploration_action(
293
+ current_location=location,
294
+ map_snapshot=self.cached_map,
295
+ )
296
+ }
297
+ self.non_play_streak = 0
298
+
299
+ if tool_name == "play_action":
300
+ action = tool_args.get("action", "look")
301
+ action = self._normalize_action(action)
302
+ action = self._anti_loop_override(action, location, stats)
303
+ tool_args = {"action": action}
304
+ self.recent_actions.append(action)
305
+ moves += 1
306
+
307
+ if verbose:
308
+ print(f"\n--- Step {step} ---")
309
+ print(f"[THOUGHT] {thought}")
310
+ print(f"[TOOL] {tool_name}({tool_args})")
311
+
312
+ try:
313
+ observation = await call_tool(tool_name, tool_args)
314
+ except Exception as tool_exc:
315
+ observation = f"Tool error: {tool_exc}"
316
+ tool_name = "play_action"
317
+ fallback_action = self._next_exploration_action(location, self.cached_map)
318
+ tool_args = {"action": fallback_action}
319
+ observation = await call_tool(tool_name, tool_args)
320
+
321
+ self.last_observation = observation
322
+ stats = await self._get_stats(client, tool_names)
323
+ self._update_state_from_stats(stats)
324
+
325
+ location = stats.get("location") or self._extract_location(observation)
326
+ if location:
327
+ locations_visited.add(location)
328
+
329
+ final_score = int(stats.get("score", self.score) or self.score)
330
+ moves = int(stats.get("moves", moves) or moves)
331
+ self.max_score = int(stats.get("max_score", self.max_score) or self.max_score)
332
+ game_completed = bool(stats.get("done", False)) or self._is_game_over(observation)
333
+
334
+ if tool_name == "play_action":
335
+ loc_key = location or "Unknown"
336
+ act_key = tool_args.get("action", "look")
337
+ key = (loc_key, act_key)
338
+ self.location_action_counts[key] = self.location_action_counts.get(key, 0) + 1
339
+ await self._maybe_store_note(client, tool_names, location, observation)
340
+
341
+ self.history.append(
342
+ {
343
+ "step": step,
344
+ "thought": thought,
345
+ "tool": tool_name,
346
+ "args": tool_args,
347
+ "observation": observation[:220],
348
+ "score": final_score,
349
+ }
350
+ )
351
+ if len(self.history) > 18:
352
+ self.history = self.history[-18:]
353
+
354
+ history.append((thought, f"{tool_name}({tool_args})", observation[:120]))
355
+
356
+ if verbose:
357
+ print(f"[RESULT] {observation[:220]}...")
358
+ print(
359
+ f"[STATE] score={final_score}/{self.max_score} "
360
+ f"moves={moves} loc={location}"
361
+ )
362
+
363
+ if game_completed:
364
+ print(f"Game completed at step {step} with score {final_score}.")
365
+ break
366
+
367
+ except Exception as exc:
368
+ print(f"Error during agent run: {exc}")
369
+ error = str(exc)
370
+
371
+ if final_score == 0:
372
+ print("Agent failed to score any points. Consider improving your action selection and exploration strategy.")
373
+ final_score = self.score
374
+ print("end")
375
  return RunResult(
376
  final_score=final_score,
377
+ max_score=self.max_score,
378
  moves=moves,
379
  locations_visited=locations_visited,
380
+ game_completed=game_completed,
381
+ error=error,
382
  history=history,
383
  )
384
+
385
+ def _build_prompt(
386
+ self,
387
+ observation: str,
388
+ location: str,
389
+ step: int,
390
+ max_steps: int,
391
+ stats: dict,
392
+ map_snapshot: str,
393
+ inventory_snapshot: str,
394
+ ) -> str:
395
  """
396
  Build the prompt for the LLM.
 
 
397
  """
398
+ recent_lines = []
399
+ for item in self.history[-5:]:
400
+ recent_lines.append(
401
+ f"- {item['tool']} {item['args']} => score {item['score']} => {item['observation']}"
402
+ )
403
+ if not recent_lines:
404
+ recent_lines = ["- (none)"]
405
+
406
+ frontier_hint = self._extract_frontier_from_map(map_snapshot)
407
+ no_progress = int(stats.get("no_progress_streak", 0) or 0)
408
+
409
+ prompt = (
410
+ f"Game: current run\n"
411
+ f"Step: {step}/{max_steps}\n"
412
+ f"Location: {location}\n"
413
+ f"Score: {stats.get('score', self.score)}/{stats.get('max_score', self.max_score)}\n"
414
+ f"Loop signals: no_progress={stats.get('no_progress_streak', 0)}, "
415
+ f"same_location={stats.get('same_location_streak', 0)}, "
416
+ f"repeat_action={stats.get('repeated_action_streak', 0)}\n\n"
417
+ f"Recent decisions:\n" + "\n".join(recent_lines) + "\n\n"
418
+ f"Inventory snapshot:\n{inventory_snapshot[:280] if inventory_snapshot else '(unknown)'}\n\n"
419
+ f"Map/frontier snapshot:\n{map_snapshot[:520] if map_snapshot else '(unknown)'}\n\n"
420
+ f"Current observation:\n{observation}\n\n"
421
+ f"Guidance:\n"
422
+ f"- Prefer play_action now unless a planning query is necessary.\n"
423
+ f"- If no_progress >= 3, prioritize an unexplored movement from frontier ({', '.join(frontier_hint)}).\n"
424
+ f"- Avoid repeating recent actions: {', '.join(list(self.recent_actions)[-4:])}.\n"
425
+ f"- If you mention a clue in THOUGHT, keep it concise.\n"
426
+ )
427
+ if no_progress >= 3:
428
+ prompt += "\nYou appear stuck: choose a different movement or interaction than recent attempts.\n"
429
+ return prompt
430
+
431
  def _parse_response(self, response: str) -> tuple[str, str, dict]:
432
  """
433
  Parse LLM response to extract thought, tool name, and arguments.
434
+
 
 
435
  Returns:
436
  Tuple of (thought, tool_name, args_dict)
437
  """
438
+ thought = "No thought"
439
+ tool_name = "play_action"
440
+ args = {"action": "look"}
441
+
442
+ thought_match = re.search(r"THOUGHT\s*:\s*(.+)", response, flags=re.IGNORECASE)
443
+ if thought_match:
444
+ thought = thought_match.group(1).strip()
445
+
446
+ tool_match = re.search(r"TOOL\s*:\s*([^\n]+)", response, flags=re.IGNORECASE)
447
+ if tool_match:
448
+ tool_name = tool_match.group(1).strip().lower()
449
+ tool_name = re.sub(r"[^a-zA-Z0-9_]+", "", tool_name)
450
+
451
+ args_match = re.search(r"ARGS\s*:\s*(\{.*\})", response, flags=re.IGNORECASE | re.DOTALL)
452
+ if args_match:
453
+ raw_args = args_match.group(1).strip()
454
+ try:
455
+ args = json.loads(raw_args)
456
+ except json.JSONDecodeError:
457
+ raw_args = raw_args.replace("'", '"')
458
+ try:
459
+ args = json.loads(raw_args)
460
+ except json.JSONDecodeError:
461
+ action_match = re.search(r'"action"\s*:\s*"([^"]+)"', raw_args)
462
+ if action_match:
463
+ args = {"action": action_match.group(1)}
464
+ else:
465
+ args = {"action": "look"}
466
+
467
+ if not isinstance(args, dict):
468
+ args = {"action": "look"}
469
+
470
+ return thought, tool_name, args
471
 
472
  def _call_llm(self, prompt: str, system_prompt: str, seed: int) -> str:
473
  """
 
477
  """
478
  return call_llm(prompt, system_prompt, seed)
479
 
480
+ async def _get_stats(self, client, tool_names: set[str]) -> dict:
481
+ if "get_stats" not in tool_names:
482
+ return {
483
+ "score": self.score,
484
+ "max_score": self.max_score,
485
+ "moves": 0,
486
+ "done": False,
487
+ }
488
+ try:
489
+ result = await client.call_tool("get_stats", {})
490
+ text = self._extract_result(result)
491
+ return self._parse_stats(text)
492
+ except Exception:
493
+ return {
494
+ "score": self.score,
495
+ "max_score": self.max_score,
496
+ "moves": 0,
497
+ "done": False,
498
+ }
499
+
500
+ def _parse_stats(self, text: str) -> dict:
501
+ text = text.strip()
502
+ try:
503
+ return json.loads(text)
504
+ except Exception:
505
+ data: dict[str, object] = {}
506
+ for key in [
507
+ "score", "max_score", "moves", "reward", "no_progress_streak",
508
+ "same_location_streak", "repeated_action_streak", "unique_locations",
509
+ "unique_recent_observations",
510
+ ]:
511
+ match = re.search(rf'"{key}"\s*:\s*(-?\d+)', text)
512
+ if match:
513
+ data[key] = int(match.group(1))
514
+ for key in ["game", "location"]:
515
+ match = re.search(rf'"{key}"\s*:\s*"([^"]*)"', text)
516
+ if match:
517
+ data[key] = match.group(1)
518
+ done_match = re.search(r'"done"\s*:\s*(true|false)', text, flags=re.IGNORECASE)
519
+ if done_match:
520
+ data["done"] = done_match.group(1).lower() == "true"
521
+ return data
522
+
523
+ def _update_state_from_stats(self, stats: dict) -> None:
524
+ if not stats:
525
+ return
526
+ self.score = int(stats.get("score", self.score) or self.score)
527
+ self.max_score = int(stats.get("max_score", self.max_score) or self.max_score)
528
+
529
+ def _validate_tool_call(self, tool_name: str, tool_args: dict, valid_tools: set[str]) -> tuple[str, dict]:
530
+ aliases = {
531
+ "action": "play_action",
532
+ "act": "play_action",
533
+ "play": "play_action",
534
+ "map": "get_map",
535
+ "stats": "get_stats",
536
+ "state": "memory",
537
+ "inv": "inventory",
538
+ "store": "remember",
539
+ "notes": "recall",
540
+ }
541
+ tool_name = aliases.get(tool_name, tool_name)
542
+ if tool_name not in valid_tools:
543
+ tool_name = "play_action"
544
+
545
+ if not isinstance(tool_args, dict):
546
+ tool_args = {}
547
+
548
+ if tool_name == "play_action":
549
+ action = tool_args.get("action", "look")
550
+ tool_args = {"action": self._normalize_action(action)}
551
+ elif tool_name == "remember":
552
+ key = str(tool_args.get("key", "note")).strip() or "note"
553
+ value = str(tool_args.get("value", "")).strip() or "unknown"
554
+ tool_args = {"key": key[:64], "value": value[:220]}
555
+ elif tool_name == "recall":
556
+ key = str(tool_args.get("key", "")).strip()
557
+ tool_args = {"key": key}
558
+ else:
559
+ tool_args = {}
560
+
561
+ return tool_name, tool_args
562
+
563
+ def _normalize_action(self, action: str) -> str:
564
+ action = str(action).lower().strip()
565
+ action = action.replace("**", "").replace("`", "")
566
+ action = " ".join(action.split())
567
+ invalid_verb_map = {
568
+ "check": "examine",
569
+ "inspect": "examine",
570
+ "search": "look",
571
+ "grab": "take",
572
+ "pick": "take",
573
+ "investigate": "examine",
574
+ }
575
+ words = action.split()
576
+ if words and words[0] in invalid_verb_map:
577
+ words[0] = invalid_verb_map[words[0]]
578
+ action = " ".join(words)
579
+ return action or "look"
580
+
581
+ def _anti_loop_override(self, action: str, location: str, stats: dict) -> str:
582
+ loc = location or "Unknown"
583
+ key = (loc, action)
584
+ no_progress = int(stats.get("no_progress_streak", 0) or 0)
585
+ repeated_action_streak = int(stats.get("repeated_action_streak", 0) or 0)
586
+
587
+ if self.location_action_counts.get(key, 0) >= 2 and no_progress >= 2:
588
+ return self._next_exploration_action(loc, self.cached_map)
589
+
590
+ if repeated_action_streak >= 2 and len(self.recent_actions) >= 2:
591
+ if action == self.recent_actions[-1]:
592
+ return self._next_exploration_action(loc, self.cached_map)
593
+
594
+ if no_progress >= 4 and action in {"look", "inventory", "wait"}:
595
+ return self._next_exploration_action(loc, self.cached_map)
596
+
597
+ return action
598
+
599
+ def _next_exploration_action(self, current_location: str, map_snapshot: str) -> str:
600
+ frontier = self._extract_frontier_from_map(map_snapshot)
601
+ recent = set(list(self.recent_actions)[-4:])
602
+ for direction in frontier:
603
+ if direction not in recent:
604
+ return direction
605
+
606
+ fallback = [
607
+ "north", "south", "east", "west", "up", "down",
608
+ "enter", "exit", "examine room", "look",
609
+ ]
610
+ loc = current_location or "Unknown"
611
+ for action in fallback:
612
+ if self.location_action_counts.get((loc, action), 0) < 2:
613
+ return action
614
+ return "look"
615
+
616
+ def _extract_frontier_from_map(self, map_snapshot: str) -> list[str]:
617
+ if not map_snapshot:
618
+ return ["north", "south", "east", "west"]
619
+ match = re.search(
620
+ r"Frontier directions not yet tried here:\s*(.+)",
621
+ map_snapshot,
622
+ flags=re.IGNORECASE,
623
+ )
624
+ if not match:
625
+ return ["north", "south", "east", "west"]
626
+ dirs = [d.strip().lower() for d in match.group(1).split(",") if d.strip()]
627
+ return dirs or ["north", "south", "east", "west"]
628
+
629
+ def _extract_result(self, result) -> str:
630
+ if hasattr(result, "content") and result.content:
631
+ item = result.content[0]
632
+ if hasattr(item, "text"):
633
+ return item.text
634
+ return str(item)
635
+ if isinstance(result, list) and result:
636
+ first = result[0]
637
+ if hasattr(first, "text"):
638
+ return first.text
639
+ return str(first)
640
+ return str(result)
641
+
642
+ def _extract_location(self, observation: str) -> str:
643
+ lines = (observation or "").strip().split("\n")
644
+ if not lines:
645
+ return "Unknown"
646
+ first = lines[0].strip()
647
+ return first[:120] if first else "Unknown"
648
+
649
+ def _is_game_over(self, text: str) -> bool:
650
+ lower = (text or "").lower()
651
+ endings = [
652
+ "game over",
653
+ "you have died",
654
+ "you are dead",
655
+ "*** you have died ***",
656
+ "[game_over]",
657
+ ]
658
+ return any(token in lower for token in endings)
659
+
660
+ async def _maybe_store_note(self, client, tool_names: set[str], location: str, observation: str) -> None:
661
+ if "remember" not in tool_names:
662
+ return
663
+ text = (observation or "").lower()
664
+ keywords = ["locked", "key", "door", "treasure", "cannot", "need", "dark"]
665
+ if not any(k in text for k in keywords):
666
+ return
667
+
668
+ note_text = " ".join((observation or "").strip().split())[:170]
669
+ key = f"clue_{self.note_counter}_{(location or 'unknown')[:20]}"
670
+ self.note_counter += 1
671
+ try:
672
+ await client.call_tool("remember", {"key": key, "value": note_text})
673
+ except Exception:
674
+ return
675
+
676
 
677
  # =============================================================================
678
  # For local testing
mcp_server.py CHANGED
@@ -26,6 +26,8 @@ Then open the MCP Inspector in your browser to test the tools interactively.
26
 
27
  import sys
28
  import os
 
 
29
 
30
  # Add parent directory to path to import games module
31
  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -59,29 +61,123 @@ class GameManager:
59
  self.env: TextAdventureEnv = None
60
  self.state = None
61
  self.game_name: str = ""
62
- # TODO: Add more state tracking
63
- # self.history: list[tuple[str, str]] = []
64
- # self.explored_locations: dict[str, set[str]] = {}
65
- # self.current_location: str = ""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
  def initialize(self, game: str = "zork1"):
68
  """Initialize or reset the game."""
69
  self.game_name = game
70
  self.env = TextAdventureEnv(game)
71
  self.state = self.env.reset()
72
- # TODO: Reset your state tracking here
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  return self.state.observation
74
 
75
  def step(self, action: str) -> str:
76
  """Execute an action and return the result."""
77
  if self.env is None:
78
  self.initialize()
79
-
 
80
  self.state = self.env.step(action)
81
-
82
- # TODO: Update your state tracking here
83
- # self.history.append((action, self.state.observation))
84
- # Update location tracking, etc.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
  return self.state.observation
87
 
@@ -131,57 +227,151 @@ def play_action(action: str) -> str:
131
  - Other: look, inventory, read <thing>, turn on lamp
132
  """
133
  game = get_game()
134
-
135
- # TODO: You might want to add action validation here
136
- # TODO: You might want to include score changes in the response
137
-
138
  result = game.step(action)
139
-
140
- # Optional: Append score info
141
- # result += f"\n[Score: {game.get_score()} | Moves: {game.get_moves()}]"
142
-
 
 
 
 
 
 
 
 
 
 
 
143
  return result
144
 
145
 
146
- # TODO: Implement additional tools to help your agent
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
- # @mcp.tool()
149
- # def memory() -> str:
150
- # """
151
- # Get the current game state summary.
152
- #
153
- # Returns:
154
- # A summary including current location, score, moves, and recent history
155
- # """
156
- # game = get_game()
157
- # # TODO: Return useful state information
158
- # pass
159
 
 
 
 
 
 
 
 
 
 
160
 
161
- # @mcp.tool()
162
- # def inventory() -> str:
163
- # """
164
- # Check what the player is carrying.
165
- #
166
- # Returns:
167
- # List of items in the player's inventory
168
- # """
169
- # game = get_game()
170
- # result = game.step("inventory")
171
- # return result
172
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
 
174
- # @mcp.tool()
175
- # def get_map() -> str:
176
- # """
177
- # Get a map of explored locations.
178
- #
179
- # Returns:
180
- # A text representation of explored locations and connections
181
- # """
182
- # game = get_game()
183
- # # TODO: Return map of explored locations
184
- # pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
 
186
 
187
  # @mcp.tool()
 
26
 
27
  import sys
28
  import os
29
+ import hashlib
30
+ from collections import deque
31
 
32
  # Add parent directory to path to import games module
33
  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
61
  self.env: TextAdventureEnv = None
62
  self.state = None
63
  self.game_name: str = ""
64
+ self.history: deque[dict] = deque(maxlen=60)
65
+ self.current_location: str = "Unknown"
66
+ self.location_visits: dict[str, int] = {}
67
+ self.map_edges: dict[str, dict[str, str]] = {}
68
+ self.recent_transitions: deque[tuple[str, str]] = deque(maxlen=16)
69
+ self.notes: dict[str, str] = {}
70
+ self.note_order: deque[str] = deque(maxlen=40)
71
+ self.no_progress_streak: int = 0
72
+ self.same_location_streak: int = 0
73
+ self.repeated_action_streak: int = 0
74
+ self.obs_hash_history: deque[str] = deque(maxlen=8)
75
+ self.last_action: str = ""
76
+ self.action_counts: dict[tuple[str, str], int] = {}
77
+
78
+ @staticmethod
79
+ def _clean_text(text: str) -> str:
80
+ return " ".join((text or "").strip().split())
81
+
82
+ def _extract_location(self, observation: str) -> str:
83
+ candidate = (self.state.location if self.state else "") or ""
84
+ candidate = candidate.strip()
85
+ if candidate and candidate.lower() != "unknown":
86
+ return candidate
87
+ lines = (observation or "").strip().split("\n")
88
+ if not lines:
89
+ return "Unknown"
90
+ first = lines[0].strip()
91
+ if first:
92
+ return first[:120]
93
+ return "Unknown"
94
+
95
+ def _is_movement_action(self, action: str) -> bool:
96
+ movement = {
97
+ "north", "south", "east", "west", "up", "down", "enter", "exit",
98
+ "n", "s", "e", "w", "u", "d", "in", "out",
99
+ "northeast", "northwest", "southeast", "southwest",
100
+ "ne", "nw", "se", "sw",
101
+ }
102
+ return action.strip().lower() in movement
103
+
104
+ def _observation_hash(self, observation: str) -> str:
105
+ clean = self._clean_text(observation).lower()[:400]
106
+ return hashlib.sha1(clean.encode("utf-8")).hexdigest()
107
 
108
  def initialize(self, game: str = "zork1"):
109
  """Initialize or reset the game."""
110
  self.game_name = game
111
  self.env = TextAdventureEnv(game)
112
  self.state = self.env.reset()
113
+ self.history.clear()
114
+ self.map_edges.clear()
115
+ self.location_visits.clear()
116
+ self.recent_transitions.clear()
117
+ self.notes.clear()
118
+ self.note_order.clear()
119
+ self.no_progress_streak = 0
120
+ self.same_location_streak = 0
121
+ self.repeated_action_streak = 0
122
+ self.obs_hash_history.clear()
123
+ self.last_action = ""
124
+ self.action_counts.clear()
125
+
126
+ self.current_location = self._extract_location(self.state.observation)
127
+ self.location_visits[self.current_location] = 1
128
+ self.obs_hash_history.append(self._observation_hash(self.state.observation))
129
  return self.state.observation
130
 
131
  def step(self, action: str) -> str:
132
  """Execute an action and return the result."""
133
  if self.env is None:
134
  self.initialize()
135
+ action = self._clean_text(action).lower() or "look"
136
+ prev_loc = self.current_location
137
  self.state = self.env.step(action)
138
+
139
+ new_loc = self._extract_location(self.state.observation)
140
+ self.current_location = new_loc
141
+ self.location_visits[new_loc] = self.location_visits.get(new_loc, 0) + 1
142
+
143
+ if self._is_movement_action(action):
144
+ if prev_loc not in self.map_edges:
145
+ self.map_edges[prev_loc] = {}
146
+ self.map_edges[prev_loc][action] = new_loc
147
+
148
+ if self.state.reward > 0:
149
+ self.no_progress_streak = 0
150
+ else:
151
+ self.no_progress_streak += 1
152
+
153
+ if new_loc == prev_loc:
154
+ self.same_location_streak += 1
155
+ else:
156
+ self.same_location_streak = 0
157
+
158
+ if action == self.last_action:
159
+ self.repeated_action_streak += 1
160
+ else:
161
+ self.repeated_action_streak = 1
162
+ self.last_action = action
163
+
164
+ self.recent_transitions.append((prev_loc, new_loc))
165
+ self.action_counts[(new_loc, action)] = self.action_counts.get((new_loc, action), 0) + 1
166
+
167
+ obs_hash = self._observation_hash(self.state.observation)
168
+ self.obs_hash_history.append(obs_hash)
169
+
170
+ self.history.append(
171
+ {
172
+ "action": action,
173
+ "location_before": prev_loc,
174
+ "location_after": new_loc,
175
+ "score": self.state.score,
176
+ "reward": self.state.reward,
177
+ "moves": self.state.moves,
178
+ "observation": self.state.observation[:280],
179
+ }
180
+ )
181
 
182
  return self.state.observation
183
 
 
227
  - Other: look, inventory, read <thing>, turn on lamp
228
  """
229
  game = get_game()
230
+
 
 
 
231
  result = game.step(action)
232
+
233
+ score_info = (
234
+ f"\n\n[Score: {game.state.score}/{game.state.max_score} | "
235
+ f"Reward: {game.state.reward:+d} | Moves: {game.state.moves}]"
236
+ )
237
+ loop_info = (
238
+ f"\n[LoopSignals: no_progress={game.no_progress_streak}, "
239
+ f"same_location={game.same_location_streak}, "
240
+ f"repeat_action={game.repeated_action_streak}]"
241
+ )
242
+ if game.state.done:
243
+ score_info += "\n[GAME_OVER]"
244
+
245
+ result = result + score_info + loop_info
246
+
247
  return result
248
 
249
 
250
+ @mcp.tool()
251
+ def memory() -> str:
252
+ """Return current state with compact recent history and loop diagnostics."""
253
+ game = get_game()
254
+ recent = list(game.history)[-6:]
255
+ recent_lines = []
256
+ for item in recent:
257
+ recent_lines.append(
258
+ f"- {item['action']} @ {item['location_before']} -> {item['location_after']} "
259
+ f"(reward {item['reward']:+d}, score {item['score']})"
260
+ )
261
+
262
+ if not recent_lines:
263
+ recent_lines = ["- (no actions yet)"]
264
+
265
+ notes = []
266
+ for key in list(game.note_order)[-8:]:
267
+ notes.append(f"- {key}: {game.notes.get(key, '')[:120]}")
268
+ if not notes:
269
+ notes = ["- (none)"]
270
+
271
+ return (
272
+ f"Game: {game.game_name}\n"
273
+ f"Location: {game.current_location}\n"
274
+ f"Score: {game.state.score}/{game.state.max_score}\n"
275
+ f"Moves: {game.state.moves}\n"
276
+ f"Done: {game.state.done}\n"
277
+ f"LoopSignals: no_progress={game.no_progress_streak}, "
278
+ f"same_location={game.same_location_streak}, repeat_action={game.repeated_action_streak}\n\n"
279
+ f"Recent history:\n" + "\n".join(recent_lines) + "\n\n"
280
+ f"Notes:\n" + "\n".join(notes) + "\n\n"
281
+ f"Observation:\n{game.state.observation}"
282
+ )
283
 
 
 
 
 
 
 
 
 
 
 
 
284
 
285
+ @mcp.tool()
286
+ def inventory() -> str:
287
+ """Return current inventory without spending a move."""
288
+ game = get_game()
289
+ items = game.state.inventory if game.state and game.state.inventory else []
290
+ if not items:
291
+ return "Inventory: empty"
292
+ item_list = [str(i) for i in items]
293
+ return "Inventory: " + ", ".join(item_list)
294
 
 
 
 
 
 
 
 
 
 
 
 
295
 
296
+ @mcp.tool()
297
+ def get_map() -> str:
298
+ """Return explored location graph and candidate frontier directions."""
299
+ game = get_game()
300
+ if not game.location_visits:
301
+ return "Map: empty"
302
+
303
+ lines = ["Explored map:"]
304
+ for loc, count in sorted(game.location_visits.items(), key=lambda x: x[0].lower()):
305
+ lines.append(f"- {loc} (visits: {count})")
306
+ exits = game.map_edges.get(loc, {})
307
+ if exits:
308
+ for action, dst in sorted(exits.items(), key=lambda x: x[0]):
309
+ lines.append(f" {action} -> {dst}")
310
+
311
+ known_dirs = set(game.map_edges.get(game.current_location, {}).keys())
312
+ all_dirs = {
313
+ "north", "south", "east", "west", "up", "down", "enter", "exit",
314
+ "n", "s", "e", "w", "u", "d", "in", "out", "ne", "nw", "se", "sw"
315
+ }
316
+ frontier = sorted(all_dirs - known_dirs)
317
+ lines.append(f"\nCurrent: {game.current_location}")
318
+ lines.append("Frontier directions not yet tried here: " + ", ".join(frontier[:12]))
319
+ return "\n".join(lines)
320
 
321
+
322
+ @mcp.tool()
323
+ def get_stats() -> str:
324
+ """Return compact machine-readable state for planning and scoring."""
325
+ game = get_game()
326
+ unique_obs = len(set(game.obs_hash_history))
327
+ return (
328
+ "{"
329
+ f'"game":"{game.game_name}",'
330
+ f'"location":"{game.current_location.replace("\"", "")}",'
331
+ f'"score":{game.state.score},'
332
+ f'"max_score":{game.state.max_score},'
333
+ f'"moves":{game.state.moves},'
334
+ f'"reward":{game.state.reward},'
335
+ f'"done":{str(game.state.done).lower()},'
336
+ f'"no_progress_streak":{game.no_progress_streak},'
337
+ f'"same_location_streak":{game.same_location_streak},'
338
+ f'"repeated_action_streak":{game.repeated_action_streak},'
339
+ f'"unique_locations":{len(game.location_visits)},'
340
+ f'"unique_recent_observations":{unique_obs}'
341
+ "}"
342
+ )
343
+
344
+
345
+ @mcp.tool()
346
+ def remember(key: str, value: str) -> str:
347
+ """Store a short note for the agent (e.g., puzzle clue, locked door)."""
348
+ game = get_game()
349
+ clean_key = GameManager._clean_text(key)[:64]
350
+ clean_value = GameManager._clean_text(value)[:320]
351
+ if not clean_key:
352
+ return "ERROR: key cannot be empty"
353
+ game.notes[clean_key] = clean_value
354
+ if clean_key in game.note_order:
355
+ game.note_order.remove(clean_key)
356
+ game.note_order.append(clean_key)
357
+ return f"Stored note '{clean_key}'"
358
+
359
+
360
+ @mcp.tool()
361
+ def recall(key: str = "") -> str:
362
+ """Recall one note by key, or list all recent notes if key is empty."""
363
+ game = get_game()
364
+ clean_key = GameManager._clean_text(key)
365
+ if clean_key:
366
+ if clean_key in game.notes:
367
+ return f"{clean_key}: {game.notes[clean_key]}"
368
+ return f"No note for key '{clean_key}'"
369
+ if not game.note_order:
370
+ return "No notes stored"
371
+ lines = ["Notes:"]
372
+ for k in list(game.note_order)[-12:]:
373
+ lines.append(f"- {k}: {game.notes.get(k, '')}")
374
+ return "\n".join(lines)
375
 
376
 
377
  # @mcp.tool()