tlemagny commited on
Commit
351074a
·
1 Parent(s): 7a36b3c
Files changed (2) hide show
  1. agent.py +312 -27
  2. mcp_server.py +147 -60
agent.py CHANGED
@@ -82,7 +82,6 @@ def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300)
82
  max_tokens=max_tokens,
83
  seed=seed,
84
  )
85
-
86
  return response.choices[0].message.content
87
 
88
 
@@ -102,32 +101,55 @@ class RunResult:
102
  # System Prompt - Customize this for your agent
103
  # =============================================================================
104
 
105
- SYSTEM_PROMPT = """You are playing a classic text adventure game.
106
-
107
- GOAL: Explore the world, solve puzzles, and maximize your score.
108
 
109
- AVAILABLE TOOLS (use via MCP):
110
- - play_action: Execute a game command (north, take lamp, open mailbox, etc.)
111
- - memory: Get current game state and history (if implemented)
112
- - inventory: Check what you're carrying (if implemented)
 
113
 
114
  VALID GAME COMMANDS for play_action:
115
- - Movement: north, south, east, west, up, down, enter, exit
116
- - Objects: take <item>, drop <item>, open <thing>, close <thing>, examine <thing>
117
- - Other: look, inventory, read <thing>, turn on lamp
 
 
 
 
118
 
119
  RESPOND IN THIS EXACT FORMAT (no markdown):
120
- THOUGHT: <your reasoning about what to do next>
121
  TOOL: <tool_name>
122
- ARGS: <JSON arguments, e.g., {"action": "look"}>
123
 
124
- Example:
125
- THOUGHT: I should look around to see where I am.
126
  TOOL: play_action
127
- ARGS: {"action": "look"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  """
129
 
130
 
 
131
  # =============================================================================
132
  # Student Agent - IMPLEMENT THIS CLASS
133
  # =============================================================================
@@ -147,9 +169,13 @@ class StudentAgent:
147
  def __init__(self):
148
  """Initialize your agent here."""
149
  # TODO: Initialize any state tracking you need
150
- # self.history = []
151
- # self.visited_locations = set()
152
- pass
 
 
 
 
153
 
154
  async def run(
155
  self,
@@ -204,16 +230,164 @@ class StudentAgent:
204
 
205
  # TODO: Your implementation here
206
  # ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
 
208
  return RunResult(
209
- final_score=final_score,
210
- max_score=350, # Zork1 max score, adjust if needed
211
  moves=moves,
212
  locations_visited=locations_visited,
213
- game_completed=False,
214
  history=history,
215
  )
216
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
  def _build_prompt(self, observation: str, history: list) -> str:
218
  """
219
  Build the prompt for the LLM.
@@ -221,7 +395,44 @@ class StudentAgent:
221
  TODO: Implement this to create effective prompts
222
  """
223
  # TODO: Combine system prompt, history, and current observation
224
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
 
226
  def _parse_response(self, response: str) -> tuple[str, str, dict]:
227
  """
@@ -233,10 +444,38 @@ class StudentAgent:
233
  Tuple of (thought, tool_name, args_dict)
234
  """
235
  # TODO: Parse the response format:
236
- # THOUGHT: ...
237
- # TOOL: ...
238
- # ARGS: {...}
239
- pass
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
240
 
241
  def _call_llm(self, prompt: str, system_prompt: str, seed: int) -> str:
242
  """
@@ -245,6 +484,52 @@ class StudentAgent:
245
  This is a convenience wrapper - you can also use call_llm() directly.
246
  """
247
  return call_llm(prompt, system_prompt, seed)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
 
249
 
250
  # =============================================================================
 
82
  max_tokens=max_tokens,
83
  seed=seed,
84
  )
 
85
  return response.choices[0].message.content
86
 
87
 
 
101
  # System Prompt - Customize this for your agent
102
  # =============================================================================
103
 
104
+ SYSTEM_PROMPT = """You are an expert text adventure game player. Your goal is to explore, collect treasures, and maximize your score.
 
 
105
 
106
+ AVAILABLE TOOLS (use these via MCP):
107
+ 1. play_action - Execute game commands (north, take lamp, open mailbox, etc.)
108
+ 2. memory - Get current game state, score, and recent history
109
+ 3. get_map - See explored locations and connections
110
+ 4. inventory - Check what you're carrying
111
 
112
  VALID GAME COMMANDS for play_action:
113
+ - Movement: north, south, east, west, up, down, enter, exit, northeast, northwest, southeast, southwest
114
+ - Objects: take <item>, drop <item>, open <thing>, close <thing>, examine <thing>, put <item> in <container>, pull <object>, push <object>
115
+ - Light: turn on lamp, turn off lamp
116
+ - Combat: attack/hit <enemy> with <weapon> (swords, axes, etc.)
117
+ - Other: inventory, look, read <thing>, wait, ask <character> about <topic>, give <item> to <character>
118
+
119
+ FORBIDDEN (will NOT work): check, inspect, search, grab, use, help
120
 
121
  RESPOND IN THIS EXACT FORMAT (no markdown):
122
+ THOUGHT: <brief reasoning about what to do next>
123
  TOOL: <tool_name>
124
+ ARGS: <JSON arguments>
125
 
126
+ Examples:
127
+ THOUGHT: Old stone fountain with big bowl part. It might contain something useful. I should check it out.
128
  TOOL: play_action
129
+ ARGS: {"action": "examine bowl"}
130
+
131
+ THOUGHT: It seems to be a slot where I can put things.
132
+ TOOL: play_action
133
+ ARGS: {"action": "put coin in slot"}
134
+
135
+ THOUGHT: In the bowl, there is a coin. I should take it.
136
+ TOOL: play_action
137
+ ARGS: {"action": "take coin"}
138
+
139
+ STRATEGY:
140
+ 1. Start by looking around and checking memory
141
+ 2. Explore systematically - try all directions
142
+ 3. Examine everything you find for clues and items. When examining an item there might be other items hidden inside or new actions available.
143
+ 4. Pick up all useful items (lamp, sword, pole, etc.) with "take".
144
+ 5. Interact with objects in the environment and in your inventory (pull, put, push, etc.)
145
+ 6. Use get_map to avoid getting lost
146
+ 7. Turn on lamp before dark areas!
147
+
148
+ DO NOT repeat the same action multiple times in a row. If you find yourself stuck, try a different action or explore a new area.
149
  """
150
 
151
 
152
+
153
  # =============================================================================
154
  # Student Agent - IMPLEMENT THIS CLASS
155
  # =============================================================================
 
169
  def __init__(self):
170
  """Initialize your agent here."""
171
  # TODO: Initialize any state tracking you need
172
+ self.history = []
173
+ self.visited_locations = set()
174
+ self.recent_actions = []
175
+ self.score = 0
176
+ self.location_actions = {}
177
+ self.score_actions = []
178
+ self.stuck_counter = 0
179
 
180
  async def run(
181
  self,
 
230
 
231
  # TODO: Your implementation here
232
  # ...
233
+ # Get list of available tools
234
+ tools = await client.list_tools()
235
+ tool_names = [t.name for t in tools]
236
+ # Get initial observation
237
+ result = await client.call_tool("play_action", {"action": "look"})
238
+ observation = self._extract_result(result)
239
+
240
+ # Track initial location
241
+ location = observation.split("\n")[0] if observation else "Unknown"
242
+ locations_visited.add(location)
243
+
244
+ if verbose:
245
+ print(f"\n{observation}")
246
+
247
+ # Main ReAct loop
248
+ for step in range(1, max_steps + 1):
249
+ # Build prompt with context
250
+ prompt = self._build_prompt(observation, self.history)
251
+
252
+ # Call LLM for reasoning (use step-based seed for variety)
253
+ response = call_llm(prompt, SYSTEM_PROMPT, seed + step)
254
+
255
+ # Parse the response
256
+ thought, tool_name, tool_args = self._parse_response(response)
257
+
258
+ if verbose:
259
+ print(f"\n--- Step {step} ---")
260
+ print(f"[THOUGHT] {thought}")
261
+ print(f"[TOOL] {tool_name}({tool_args})")
262
+
263
+ # Validate and fix common issues
264
+ tool_name, tool_args = self._validate_tool_call(tool_name, tool_args, tool_names)
265
+
266
+ # Loop detection
267
+ if tool_name == "play_action":
268
+ action = tool_args.get("action", "look")
269
+ self.recent_actions.append(action)
270
+ if len(self.recent_actions) > 5:
271
+ self.recent_actions = self.recent_actions[-5:]
272
+
273
+ # Detect loops - if same action 3 times, force "look"
274
+ if len(self.recent_actions) >= 2 and len(set(self.recent_actions[-2:])) == 1:
275
+ if verbose:
276
+ print(f"[WARNING] Loop detected - forcing 'look'")
277
+ tool_args = {"action": "look"}
278
+ self.recent_actions.append("look")
279
+
280
+ moves += 1
281
+
282
+ # Execute the tool
283
+ try:
284
+ result = await client.call_tool(tool_name, tool_args)
285
+ observation = self._extract_result(result)
286
+
287
+ if verbose:
288
+ print(f"[RESULT] {observation[:200]}...")
289
+ except Exception as e:
290
+ observation = f"Error: {e}"
291
+ if verbose:
292
+ print(f"[ERROR] {e}")
293
+
294
+ # Track location
295
+ location = self._get_location(observation)
296
+ locations_visited.add(location)
297
+
298
+ if location not in self.location_actions:
299
+ self.location_actions[location] = set()
300
+ if tool_name == "play_action":
301
+ self.location_actions[location].add(tool_args.get("action", "look"))
302
+
303
+ observations_lines = observation.splitlines()
304
+ # Update history
305
+ self.history.append({
306
+ "step": step,
307
+ "location": location,
308
+ "thought": thought,
309
+ "tool": tool_name,
310
+ "args": tool_args,
311
+ "result": '\n'.join(observations_lines[1:])[:300]
312
+ })
313
+ if len(self.history) > 10:
314
+ self.history = self.history[-10:]
315
+
316
+ current_score = self.score
317
+ # Track score from observation
318
+ self._update_score(observation)
319
+
320
+ if self.score > current_score:
321
+ self.stuck_counter = 0
322
+ if verbose:
323
+ print(f"[SCORE UPDATE] Score increased to {self.score}!")
324
+ self.score_actions.append((location, tool_args.get("action", "look"), '\n'.join(observations_lines[1:])[:300]))
325
+ self.score_actions = self.score_actions[-5:] # Keep last 5 score-increasing actions
326
+ else:
327
+ self.stuck_counter += 1
328
+ if self.stuck_counter >= 10:
329
+ if verbose:
330
+ print(f"[WARNING] No score increase for {self.stuck_counter} steps. Consider changing strategy.")
331
+
332
+ # Record in result history
333
+ history.append((thought, f"{tool_name}({tool_args})", observation[:100]))
334
+
335
+ # Check for game over
336
+ if self._is_game_over(observation):
337
+ if verbose:
338
+ print("\n*** GAME OVER ***")
339
+ break
340
 
341
  return RunResult(
342
+ final_score=self.score,
343
+ max_score=350,
344
  moves=moves,
345
  locations_visited=locations_visited,
346
+ game_completed=self._is_game_over(observation),
347
  history=history,
348
  )
349
 
350
+ def _get_location(self, observation):
351
+ lines = observation.strip().split('\n')
352
+ if lines:
353
+ match = re.match(r'Current Location\s*:\s*(.*)', lines[0])
354
+ if match:
355
+ return match.group(1)
356
+ return lines[0]
357
+ return "Unknown"
358
+
359
+ def _update_score(self, text: str) -> None:
360
+ """Update score from game text."""
361
+ patterns = [
362
+ r'Score:\s*(\d+)',
363
+ r'score[:\s]+(\d+)',
364
+ r'\[Score:\s*(\d+)',
365
+ ]
366
+
367
+ for pattern in patterns:
368
+ match = re.search(pattern, text, re.IGNORECASE)
369
+ if match:
370
+ self.score = max(self.score, int(match.group(1)))
371
+
372
+ def _is_game_over(self, text: str) -> bool:
373
+ """Check if the game is over."""
374
+ game_over_phrases = [
375
+ "game over",
376
+ "you have died",
377
+ "you are dead",
378
+ "*** you have died ***",
379
+ ]
380
+ text_lower = text.lower()
381
+ return any(phrase in text_lower for phrase in game_over_phrases)
382
+
383
+ def _extract_result(self, result) -> str:
384
+ """Extract text from MCP tool result."""
385
+ if hasattr(result, 'content') and result.content:
386
+ return result.content[0].text
387
+ if isinstance(result, list) and result:
388
+ return result[0].text if hasattr(result[0], 'text') else str(result[0])
389
+ return str(result)
390
+
391
  def _build_prompt(self, observation: str, history: list) -> str:
392
  """
393
  Build the prompt for the LLM.
 
395
  TODO: Implement this to create effective prompts
396
  """
397
  # TODO: Combine system prompt, history, and current observation
398
+ parts = []
399
+
400
+ parts.append(f"Current Score: {self.score}")
401
+ parts.append(f"Locations Visited: {len(self.visited_locations)}")
402
+ parts.append(f"Current Location: {self._get_location(observation)}")
403
+
404
+ # Recent history
405
+ if self.history:
406
+ parts.append("\nRecent actions:")
407
+ for entry in self.history[-3:]:
408
+ action = entry.get("args", {}).get("action", entry["tool"])
409
+ result_short = entry["result"][:100] + "..." if len(entry["result"]) > 100 else entry["result"]
410
+ parts.append(f" > {action} -> {result_short}")
411
+
412
+ if self.location_actions.get(self._get_location(observation)):
413
+ parts.append(f"\nLast actions taken at this location: {', '.join(self.location_actions[self._get_location(observation)])}")
414
+ if action in self.location_actions[self._get_location(observation)]:
415
+ parts.append(f"\n[WARNING: You've already tried '{action}' here. Consider a different action.]")
416
+
417
+ if self.score_actions:
418
+ parts.append(f"\nRecent score-increasing actions:")
419
+ for loc, action, result in self.score_actions:
420
+ result_short = result[:100] + "..." if len(result) > 100 else result
421
+ parts.append(f" > At {loc}, action '{action}' led to: {result_short}")
422
+
423
+ # Warn about repeated actions
424
+ if self.recent_actions and len(set(self.recent_actions[-3:])) == 1:
425
+ parts.append(f"\n[WARNING: You've been doing '{self.recent_actions[-1]}' repeatedly. TRY SOMETHING DIFFERENT!]")
426
+ observations = observation.splitlines()
427
+ parts.append(observations[0]) # Location line
428
+ parts.append(f"\nCurrent situation:\n{'\n'.join(observations[1:])}")
429
+ if self.stuck_counter >= 10:
430
+ parts.append(f"\n[WARNING: No score increase for {self.stuck_counter} steps. Consider changing strategy. Interact with different objects, explore new areas.]")
431
+ self.stuck_counter = 0 # Reset counter after warning
432
+
433
+ parts.append("\nWhat do you do next?")
434
+
435
+ return "\n".join(parts)
436
 
437
  def _parse_response(self, response: str) -> tuple[str, str, dict]:
438
  """
 
444
  Tuple of (thought, tool_name, args_dict)
445
  """
446
  # TODO: Parse the response format:
447
+ thought = "No reasoning provided"
448
+ tool_name = "play_action"
449
+ tool_args = {"action": "look"}
450
+
451
+ lines = response.strip().split("\n")
452
+
453
+ for line in lines:
454
+ line_clean = line.strip()
455
+ line_upper = line_clean.upper()
456
+
457
+ if line_upper.startswith("THOUGHT:"):
458
+ thought = line_clean.split(":", 1)[1].strip()
459
+
460
+ elif line_upper.startswith("TOOL:"):
461
+ raw_tool = line_clean.split(":", 1)[1].strip().lower()
462
+ raw_tool = raw_tool.replace("**", "").replace("*", "").replace("`", "")
463
+ raw_tool = raw_tool.split()[0] if raw_tool else "play_action"
464
+ tool_name = raw_tool
465
+
466
+ elif line_upper.startswith("ARGS:"):
467
+ args_part = line_clean.split(":", 1)[1].strip()
468
+ try:
469
+ args_part = args_part.replace("'", '"')
470
+ tool_args = json.loads(args_part)
471
+ except json.JSONDecodeError:
472
+ match = re.search(r'"action"\s*:\s*"([^"]+)"', args_part)
473
+ if match:
474
+ tool_args = {"action": match.group(1)}
475
+ else:
476
+ tool_args = {"action": "look"}
477
+
478
+ return thought, tool_name, tool_args
479
 
480
  def _call_llm(self, prompt: str, system_prompt: str, seed: int) -> str:
481
  """
 
484
  This is a convenience wrapper - you can also use call_llm() directly.
485
  """
486
  return call_llm(prompt, system_prompt, seed)
487
+
488
+ def _validate_tool_call(self, tool_name: str, tool_args: dict, valid_tools: list[str]) -> tuple[str, dict]:
489
+ """Validate and fix common tool call issues."""
490
+ # Fix tool name
491
+ if tool_name not in valid_tools:
492
+ if tool_name in ["action", "do", "command"]:
493
+ tool_name = "play_action"
494
+ elif tool_name in ["map", "location"]:
495
+ tool_name = "get_map"
496
+ elif tool_name in ["mem", "state", "status"]:
497
+ tool_name = "memory"
498
+ elif tool_name in ["inv", "items"]:
499
+ tool_name = "inventory"
500
+ else:
501
+ tool_name = "play_action" # Default to play_action if unrecognized
502
+
503
+ # Fix action verbs
504
+ if tool_name == "play_action":
505
+ action = tool_args.get("action", "look")
506
+
507
+ invalid_verb_map = {
508
+ "check": "examine",
509
+ "inspect": "examine",
510
+ "search": "look",
511
+ "grab": "take",
512
+ "pick": "take",
513
+ "use": "examine",
514
+ "investigate": "examine",
515
+ }
516
+
517
+ words = action.lower().split()
518
+ if words and words[0] in invalid_verb_map:
519
+ words[0] = invalid_verb_map[words[0]]
520
+ action = " ".join(words)
521
+
522
+ if words and words[0] in ["go", "move","enter"] and len(words) > 1:
523
+ action = words[1]
524
+
525
+
526
+ action = action.lower().strip()
527
+ action = action.replace("**", "").replace("*", "").replace("`", "")
528
+ action = " ".join(action.split())
529
+
530
+ tool_args["action"] = action
531
+
532
+ return tool_name, tool_args
533
 
534
 
535
  # =============================================================================
mcp_server.py CHANGED
@@ -60,9 +60,11 @@ class GameManager:
60
  self.state = None
61
  self.game_name: str = ""
62
  # TODO: Add more state tracking
63
- # self.history: list[tuple[str, str]] = []
64
- # self.explored_locations: dict[str, set[str]] = {}
65
- # self.current_location: str = ""
 
 
66
 
67
  def initialize(self, game: str = "zork1"):
68
  """Initialize or reset the game."""
@@ -70,8 +72,22 @@ class GameManager:
70
  self.env = TextAdventureEnv(game)
71
  self.state = self.env.reset()
72
  # TODO: Reset your state tracking here
 
 
 
 
 
73
  return self.state.observation
74
 
 
 
 
 
 
 
 
 
 
75
  def step(self, action: str) -> str:
76
  """Execute an action and return the result."""
77
  if self.env is None:
@@ -80,8 +96,45 @@ class GameManager:
80
  self.state = self.env.step(action)
81
 
82
  # TODO: Update your state tracking here
83
- # self.history.append((action, self.state.observation))
84
- # Update location tracking, etc.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
  return self.state.observation
87
 
@@ -133,71 +186,105 @@ def play_action(action: str) -> str:
133
  game = get_game()
134
 
135
  # TODO: You might want to add action validation here
 
 
 
 
 
 
 
 
 
 
136
  # TODO: You might want to include score changes in the response
137
 
138
- result = game.step(action)
 
 
 
 
 
 
 
 
 
 
 
139
 
140
  # Optional: Append score info
141
- # result += f"\n[Score: {game.get_score()} | Moves: {game.get_moves()}]"
142
 
143
  return result
144
 
145
 
146
  # TODO: Implement additional tools to help your agent
147
 
148
- # @mcp.tool()
149
- # def memory() -> str:
150
- # """
151
- # Get the current game state summary.
152
- #
153
- # Returns:
154
- # A summary including current location, score, moves, and recent history
155
- # """
156
- # game = get_game()
157
- # # TODO: Return useful state information
158
- # pass
159
-
160
-
161
- # @mcp.tool()
162
- # def inventory() -> str:
163
- # """
164
- # Check what the player is carrying.
165
- #
166
- # Returns:
167
- # List of items in the player's inventory
168
- # """
169
- # game = get_game()
170
- # result = game.step("inventory")
171
- # return result
172
-
173
-
174
- # @mcp.tool()
175
- # def get_map() -> str:
176
- # """
177
- # Get a map of explored locations.
178
- #
179
- # Returns:
180
- # A text representation of explored locations and connections
181
- # """
182
- # game = get_game()
183
- # # TODO: Return map of explored locations
184
- # pass
185
-
186
-
187
- # @mcp.tool()
188
- # def get_valid_actions() -> str:
189
- # """
190
- # Get a list of likely valid actions from the current location.
191
- #
192
- # Returns:
193
- # List of actions that might work here
194
- # """
195
- # # This is a hint: Jericho provides get_valid_actions()
196
- # game = get_game()
197
- # if game.env and game.env.env:
198
- # valid = game.env.env.get_valid_actions()
199
- # return "Valid actions: " + ", ".join(valid[:20])
200
- # return "Could not determine valid actions"
 
 
 
 
 
 
 
 
 
 
 
 
 
201
 
202
 
203
  # =============================================================================
 
60
  self.state = None
61
  self.game_name: str = ""
62
  # TODO: Add more state tracking
63
+ self.history: list[tuple[str, str]] = []
64
+ self.explored_locations: dict[str, set[str]] = {}
65
+ self.current_location: str = ""
66
+ self.map: dict[str, dict[str, str]] = {}
67
+ self.inventory: list[str] = []
68
 
69
  def initialize(self, game: str = "zork1"):
70
  """Initialize or reset the game."""
 
72
  self.env = TextAdventureEnv(game)
73
  self.state = self.env.reset()
74
  # TODO: Reset your state tracking here
75
+ self.history = []
76
+ self.explored_locations = {}
77
+ self.map = {}
78
+ self.current_location = self.get_current_location()
79
+ self.inventory = self.get_inventory()
80
  return self.state.observation
81
 
82
+ def get_current_location(self) -> str:
83
+ """Get the current location name."""
84
+ return self.env.env.get_player_location().name
85
+
86
+ def get_inventory(self) -> str:
87
+ """Get the current inventory as a string."""
88
+ list_objects = self.env.env.get_inventory()
89
+ return [obj.name for obj in list_objects]
90
+
91
  def step(self, action: str) -> str:
92
  """Execute an action and return the result."""
93
  if self.env is None:
 
96
  self.state = self.env.step(action)
97
 
98
  # TODO: Update your state tracking here
99
+ self.history.append((action, self.state.observation))
100
+
101
+ if len(self.history) > 50:
102
+ self.history = self.history[-50:]
103
+
104
+ action_inverse = {
105
+ "north": "south",
106
+ "south": "north",
107
+ "east": "west",
108
+ "west": "east",
109
+ "up": "down",
110
+ "down": "up",
111
+ "enter": "exit",
112
+ "exit": "enter",
113
+ "northeast": "southwest",
114
+ "northwest": "southeast",
115
+ "southeast": "northwest",
116
+ "southwest": "northeast",
117
+ }
118
+
119
+ if action in ["north", "south", "east", "west", "up", "down",
120
+ "enter", "exit"]:
121
+ if self.current_location not in self.explored_locations:
122
+ self.explored_locations[self.current_location] = set()
123
+ new_location = self.get_current_location()
124
+ if new_location != self.current_location:
125
+ self.explored_locations[self.current_location].add(f"{action} -> {new_location}")
126
+ if new_location not in self.explored_locations:
127
+ self.explored_locations[new_location] = set()
128
+ self.explored_locations[new_location].add(f"{action_inverse.get(action,'Unknown')} -> {self.current_location}")
129
+ else :
130
+ self.explored_locations[self.current_location].add(f"{action} -> 'No movement'")
131
+
132
+
133
+ self.current_location = self.get_current_location()
134
+
135
+ if "take" in action or "drop" in action:
136
+ self.inventory = self.get_inventory()
137
+
138
 
139
  return self.state.observation
140
 
 
186
  game = get_game()
187
 
188
  # TODO: You might want to add action validation here
189
+ ''' valid_action = False
190
+ if action in ["north", "south", "east", "west", "up", "down",
191
+ "enter", "exit", "n", "s", "e", "w", "u", "d", "look", "inventory", "memory", "get_map"]:
192
+ valid_action = True
193
+ elif action.startswith(("take ", "drop ", "open ", "examine ", "read ", "turn on ", "turn off ")):
194
+ valid_action = True
195
+
196
+ if not valid_action:
197
+ return f"Action '{action}' may not be valid here. Changed actions may lead to better results."'''
198
+
199
  # TODO: You might want to include score changes in the response
200
 
201
+ result = f"Current Location: {game.current_location}\n"
202
+ #result += f"Walkthrough of action: {game.env.env.get_walkthrough()}\n\n"
203
+ result += game.step(action)
204
+ # Add score info
205
+ score_info = f"\n\n[Score: {game.state.score} | Moves: {game.state.moves}]"
206
+
207
+ if game.state.reward > 0:
208
+ score_info = f"\n\n+{game.state.reward} points! (Total: {game.state.score})"
209
+
210
+ done_info = ""
211
+ if game.state.done:
212
+ done_info = "\n\nGAME OVER"
213
 
214
  # Optional: Append score info
215
+ result += score_info + done_info
216
 
217
  return result
218
 
219
 
220
  # TODO: Implement additional tools to help your agent
221
 
222
+ @mcp.tool()
223
+ def memory() -> str:
224
+ """
225
+ Get the current game state summary.
226
+
227
+ Returns:
228
+ A summary including current location, score, moves, and recent history
229
+ """
230
+ game = get_game()
231
+ # TODO: Return useful state information
232
+ location = game.current_location
233
+ score = game.get_score()
234
+ moves = game.get_moves()
235
+ recent_history = "\n".join([f"> {a} -> <{r}>" for a, r in game.history[-10:]])
236
+ return (f"Location: {location}\n"
237
+ f"Score: {score}\n"
238
+ f"Moves: {moves}\n"
239
+ f"10 Last Actions:\n{recent_history}")
240
+
241
+
242
+ @mcp.tool()
243
+ def inventory() -> str:
244
+ """
245
+ Check what the player is carrying.
246
+
247
+ Returns:
248
+ List of items in the player's inventory
249
+ """
250
+ game = get_game()
251
+ result = "Inventory: " + ", ".join(game.inventory) if game.inventory else "Inventory is empty."
252
+ return result
253
+
254
+
255
+ @mcp.tool()
256
+ def get_map() -> str:
257
+ """
258
+ Get a map of explored locations.
259
+
260
+ Returns:
261
+ A text representation of explored locations and connections
262
+ """
263
+ game = get_game()
264
+ # TODO: Return map of explored locations
265
+ map_str = "Explored Locations:\n"
266
+ for loc, exits in game.explored_locations.items():
267
+ map_str += f"-{loc}:\n"
268
+ for exit_info in exits:
269
+ map_str += f" - {exit_info}\n"
270
+ return map_str if game.explored_locations else "No locations explored yet."
271
+ pass
272
+
273
+
274
+ @mcp.tool()
275
+ def get_valid_actions() -> str:
276
+ """
277
+ Get a list of likely valid actions from the current location.
278
+
279
+ Returns:
280
+ List of actions that might work here
281
+ """
282
+ # This is a hint: Jericho provides get_valid_actions()
283
+ game = get_game()
284
+ if game.env and game.env.env:
285
+ valid = game.get_valid_actions()
286
+ return "Valid actions: " + ", ".join(valid[:20])
287
+ return "Could not determine valid actions"
288
 
289
 
290
  # =============================================================================