AbderrahimB commited on
Commit
a5460f1
·
1 Parent(s): 3ad9eca

Update MCP server + exploration agent

Browse files
Files changed (1) hide show
  1. mcp_server.py +257 -133
mcp_server.py CHANGED
@@ -40,170 +40,294 @@ from games.zork_env import TextAdventureEnv
40
 
41
  mcp = FastMCP("Student Text Adventure Server")
42
 
 
 
 
 
 
 
 
 
 
 
 
43
 
44
- # =============================================================================
45
- # Game State Management
46
- # =============================================================================
47
 
48
  class GameManager:
49
- """
50
- Manages the text adventure game state.
51
-
52
- TODO: Extend this class to track:
53
- - Action history (for memory tool)
54
- - Explored locations (for mapping)
55
- - Current score and moves
56
- """
57
-
58
  def __init__(self):
59
- self.env: TextAdventureEnv = None
60
- self.state = None
61
  self.game_name: str = ""
62
- # TODO: Add more state tracking
63
- # self.history: list[tuple[str, str]] = []
64
- # self.explored_locations: dict[str, set[str]] = {}
65
- # self.current_location: str = ""
66
-
67
- def initialize(self, game: str = "zork1"):
68
- """Initialize or reset the game."""
69
  self.game_name = game
70
  self.env = TextAdventureEnv(game)
71
  self.state = self.env.reset()
72
- # TODO: Reset your state tracking here
 
 
 
 
73
  return self.state.observation
74
-
75
- def step(self, action: str) -> str:
76
- """Execute an action and return the result."""
77
  if self.env is None:
78
- self.initialize()
79
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  self.state = self.env.step(action)
81
-
82
- # TODO: Update your state tracking here
83
- # self.history.append((action, self.state.observation))
84
- # Update location tracking, etc.
85
-
86
- return self.state.observation
87
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  def get_score(self) -> int:
89
- """Get current score."""
90
- return self.state.score if self.state else 0
91
-
92
  def get_moves(self) -> int:
93
- """Get number of moves taken."""
94
- return self.state.moves if self.state else 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
96
 
97
- # Global game manager
98
  _game = GameManager()
99
 
100
 
101
  def get_game() -> GameManager:
102
- """Get or initialize the game manager."""
103
  global _game
104
- if _game.env is None:
105
- # Get game from environment variable (set by evaluator)
106
  game = os.environ.get("GAME", "zork1")
107
  _game.initialize(game)
108
  return _game
109
 
110
 
111
- # =============================================================================
112
- # MCP Tools - IMPLEMENT THESE
113
- # =============================================================================
114
-
115
  @mcp.tool()
116
  def play_action(action: str) -> str:
117
  """
118
- Execute a game command and return the result.
119
-
120
- This is the main tool for interacting with the game.
121
-
122
- Args:
123
- action: The command to execute (e.g., "north", "take lamp", "open mailbox")
124
-
125
- Returns:
126
- The game's response to the action
127
-
128
- Valid commands include:
129
- - Movement: north, south, east, west, up, down, enter, exit
130
- - Objects: take <item>, drop <item>, open <thing>, examine <thing>
131
- - Other: look, inventory, read <thing>, turn on lamp
132
  """
133
  game = get_game()
134
-
135
- # TODO: You might want to add action validation here
136
- # TODO: You might want to include score changes in the response
137
-
138
- result = game.step(action)
139
-
140
- # Optional: Append score info
141
- # result += f"\n[Score: {game.get_score()} | Moves: {game.get_moves()}]"
142
-
143
- return result
144
-
145
-
146
- # TODO: Implement additional tools to help your agent
147
-
148
- # @mcp.tool()
149
- # def memory() -> str:
150
- # """
151
- # Get the current game state summary.
152
- #
153
- # Returns:
154
- # A summary including current location, score, moves, and recent history
155
- # """
156
- # game = get_game()
157
- # # TODO: Return useful state information
158
- # pass
159
-
160
-
161
- # @mcp.tool()
162
- # def inventory() -> str:
163
- # """
164
- # Check what the player is carrying.
165
- #
166
- # Returns:
167
- # List of items in the player's inventory
168
- # """
169
- # game = get_game()
170
- # result = game.step("inventory")
171
- # return result
172
-
173
-
174
- # @mcp.tool()
175
- # def get_map() -> str:
176
- # """
177
- # Get a map of explored locations.
178
- #
179
- # Returns:
180
- # A text representation of explored locations and connections
181
- # """
182
- # game = get_game()
183
- # # TODO: Return map of explored locations
184
- # pass
185
-
186
-
187
- # @mcp.tool()
188
- # def get_valid_actions() -> str:
189
- # """
190
- # Get a list of likely valid actions from the current location.
191
- #
192
- # Returns:
193
- # List of actions that might work here
194
- # """
195
- # # This is a hint: Jericho provides get_valid_actions()
196
- # game = get_game()
197
- # if game.env and game.env.env:
198
- # valid = game.env.env.get_valid_actions()
199
- # return "Valid actions: " + ", ".join(valid[:20])
200
- # return "Could not determine valid actions"
201
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
 
203
- # =============================================================================
204
- # Run the server
205
- # =============================================================================
206
 
207
  if __name__ == "__main__":
208
- # This runs the server with stdio transport (for MCP clients)
209
- mcp.run()
 
40
 
41
  mcp = FastMCP("Student Text Adventure Server")
42
 
43
+ MOVE_ACTIONS = {
44
+ "north", "south", "east", "west", "up", "down", "enter", "exit",
45
+ "n", "s", "e", "w", "u", "d"
46
+ }
47
+
48
+
49
+ def _first_line(obs: str) -> str:
50
+ obs = (obs or "").strip().replace("\r", "")
51
+ if not obs:
52
+ return "Unknown"
53
+ return obs.splitlines()[0].strip() or "Unknown"
54
 
 
 
 
55
 
56
  class GameManager:
 
 
 
 
 
 
 
 
 
57
  def __init__(self):
58
+ self.env: Optional[TextAdventureEnv] = None
59
+ self.state: Any = None
60
  self.game_name: str = ""
61
+
62
+ self.history: list[dict[str, Any]] = [] # {action, obs_short, reward, score}
63
+ self.explored: dict[str, dict[str, str]] = {} # loc_key -> {move_action -> loc_key}
64
+ self.current_location_key: str = "Unknown"
65
+
66
+ def initialize(self, game: str = "zork1") -> str:
 
67
  self.game_name = game
68
  self.env = TextAdventureEnv(game)
69
  self.state = self.env.reset()
70
+
71
+ self.history = []
72
+ self.explored = {}
73
+ self.current_location_key = self._get_location_key(self.state.observation)
74
+
75
  return self.state.observation
76
+
77
+ def _jericho_env(self) -> Optional[Any]:
78
+ # TextAdventureEnv often stores jericho env as .env
79
  if self.env is None:
80
+ return None
81
+ inner = getattr(self.env, "env", None)
82
+ return inner
83
+
84
+ def _get_location_key(self, observation: str) -> str:
85
+ """
86
+ Prefer Jericho stable location id if available; fallback to header line.
87
+ """
88
+ inner = self._jericho_env()
89
+ # Some Jericho wrappers expose get_player_location() -> int
90
+ if inner is not None and hasattr(inner, "get_player_location"):
91
+ try:
92
+ loc_id = inner.get_player_location()
93
+ return f"id:{loc_id}"
94
+ except Exception:
95
+ pass
96
+
97
+ # Some expose a world-state hash / state id
98
+ if inner is not None and hasattr(inner, "get_world_state_hash"):
99
+ try:
100
+ h = inner.get_world_state_hash()
101
+ return f"hash:{h}"
102
+ except Exception:
103
+ pass
104
+
105
+ return _first_line(observation)[:80]
106
+
107
+ def step(self, action: str) -> str:
108
+ if self.env is None or self.state is None:
109
+ game = os.environ.get("GAME", "zork1")
110
+ self.initialize(game)
111
+
112
+ prev_loc = self.current_location_key
113
+ prev_score = int(getattr(self.state, "score", 0))
114
+
115
  self.state = self.env.step(action)
116
+ obs = self.state.observation
117
+
118
+ new_loc = self._get_location_key(obs)
119
+ self.current_location_key = new_loc
120
+
121
+ reward = int(getattr(self.state, "reward", 0))
122
+ score = int(getattr(self.state, "score", 0))
123
+
124
+ # Map updates on movement
125
+ a = (action or "").strip().lower()
126
+ if a in MOVE_ACTIONS:
127
+ self.explored.setdefault(prev_loc, {})
128
+ # Only record if movement changed location key
129
+ if new_loc != prev_loc:
130
+ self.explored[prev_loc][a] = new_loc
131
+
132
+ # History (keep bounded)
133
+ obs_short = self._summarize(obs)
134
+ self.history.append(
135
+ {
136
+ "action": a,
137
+ "obs": obs_short,
138
+ "reward": reward,
139
+ "score": score,
140
+ "delta": score - prev_score,
141
+ }
142
+ )
143
+ if len(self.history) > 80:
144
+ self.history = self.history[-80:]
145
+
146
+ return obs
147
+
148
+ def _summarize(self, obs: str) -> str:
149
+ txt = (obs or "").strip().replace("\r", "")
150
+ lines = [ln.strip() for ln in txt.splitlines() if ln.strip()]
151
+ s = " ".join(lines[:2])
152
+ s = re.sub(r"\s+", " ", s)
153
+ return s[:140]
154
+
155
  def get_score(self) -> int:
156
+ return int(getattr(self.state, "score", 0)) if self.state else 0
157
+
 
158
  def get_moves(self) -> int:
159
+ return int(getattr(self.state, "moves", 0)) if self.state else 0
160
+
161
+ def is_done(self) -> bool:
162
+ return bool(getattr(self.state, "done", False)) if self.state else False
163
+
164
+ def get_inventory_text(self) -> str:
165
+ """
166
+ Try to read inventory without consuming a move.
167
+ Fallback: call jericho inventory methods if available.
168
+ """
169
+ if self.state is None:
170
+ return "Inventory: (unknown)"
171
+
172
+ # If state has inventory field
173
+ inv = getattr(self.state, "inventory", None)
174
+ if inv:
175
+ return "Inventory: " + self._format_inventory(inv)
176
+
177
+ inner = self._jericho_env()
178
+ if inner is not None and hasattr(inner, "get_inventory"):
179
+ try:
180
+ inv2 = inner.get_inventory()
181
+ return "Inventory: " + self._format_inventory(inv2)
182
+ except Exception:
183
+ pass
184
+
185
+ # LAST resort: do not step("inventory") because it may cost moves; but if nothing else works:
186
+ return "Inventory: (unavailable)"
187
+
188
+ def _format_inventory(self, inv: Any) -> str:
189
+ if inv is None:
190
+ return "(empty)"
191
+ # inv may be list of objects / strings
192
+ try:
193
+ items = list(inv)
194
+ except Exception:
195
+ return str(inv)
196
+
197
+ cleaned = []
198
+ for it in items:
199
+ s = str(it)
200
+ s = re.sub(r"\s+", " ", s).strip()
201
+ # Heuristic cleanup
202
+ if ":" in s:
203
+ s = s.split(":", 1)[-1].strip()
204
+ cleaned.append(s)
205
+
206
+ if not cleaned:
207
+ return "(empty)"
208
+ return ", ".join(cleaned[:30])
209
+
210
+ def get_valid_actions_list(self) -> list[str]:
211
+ inner = self._jericho_env()
212
+ if inner is not None and hasattr(inner, "get_valid_actions"):
213
+ try:
214
+ acts = inner.get_valid_actions()
215
+ return [str(a) for a in acts]
216
+ except Exception:
217
+ return []
218
+ return []
219
 
220
 
 
221
  _game = GameManager()
222
 
223
 
224
  def get_game() -> GameManager:
 
225
  global _game
226
+ if _game.env is None or _game.state is None:
 
227
  game = os.environ.get("GAME", "zork1")
228
  _game.initialize(game)
229
  return _game
230
 
231
 
 
 
 
 
232
  @mcp.tool()
233
  def play_action(action: str) -> str:
234
  """
235
+ Execute a game command and return the result, with appended score/moves info.
 
 
 
 
 
 
 
 
 
 
 
 
 
236
  """
237
  game = get_game()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
238
 
239
+ action_clean = (action or "").strip()
240
+ if not action_clean:
241
+ action_clean = "look"
242
+
243
+ obs = game.step(action_clean)
244
+
245
+ score = game.get_score()
246
+ moves = game.get_moves()
247
+ reward = int(getattr(game.state, "reward", 0))
248
+ done = game.is_done()
249
+
250
+ suffix = f"\n\n[Score: {score} | Moves: {moves}]"
251
+ if reward > 0:
252
+ suffix = f"\n\n+{reward} points! (Total: {score})"
253
+ if done:
254
+ suffix += "\n\nGAME OVER"
255
+
256
+ return obs + suffix
257
+
258
+
259
+ @mcp.tool()
260
+ def memory() -> str:
261
+ """
262
+ Return current game summary: location, score, moves, inventory, recent history, observation.
263
+ """
264
+ game = get_game()
265
+ loc = game.current_location_key
266
+ score = game.get_score()
267
+ moves = game.get_moves()
268
+ inv = game.get_inventory_text()
269
+
270
+ recent = game.history[-6:]
271
+ if recent:
272
+ hist_lines = "\n".join(
273
+ [f" > {h['action']} -> {h['obs']} (Δ{h['delta']})" for h in recent]
274
+ )
275
+ else:
276
+ hist_lines = " (none)"
277
+
278
+ return (
279
+ "Current State:\n"
280
+ f"LocationID: {loc}\n"
281
+ f"Score: {score}\n"
282
+ f"Moves: {moves}\n"
283
+ f"{inv}\n\n"
284
+ "Recent Actions:\n"
285
+ f"{hist_lines}\n\n"
286
+ "Current Observation:\n"
287
+ f"{game.state.observation if game.state else ''}"
288
+ )
289
+
290
+
291
+ @mcp.tool()
292
+ def inventory() -> str:
293
+ """
294
+ Return inventory text (non-consuming when possible).
295
+ """
296
+ game = get_game()
297
+ return game.get_inventory_text()
298
+
299
+
300
+ @mcp.tool()
301
+ def get_map() -> str:
302
+ """
303
+ Return explored location graph (from movement actions).
304
+ """
305
+ game = get_game()
306
+ if not game.explored:
307
+ return "Map: (empty) Try moving around."
308
+
309
+ lines = ["Explored Map:"]
310
+ for loc, exits in sorted(game.explored.items(), key=lambda x: x[0]):
311
+ lines.append(f"\n* {loc}")
312
+ for a, dst in sorted(exits.items(), key=lambda x: x[0]):
313
+ lines.append(f" {a} -> {dst}")
314
+
315
+ lines.append(f"\n[Current] {game.current_location_key}")
316
+ return "\n".join(lines)
317
+
318
+
319
+ @mcp.tool()
320
+ def get_valid_actions() -> str:
321
+ """
322
+ Return Jericho get_valid_actions() list (trimmed).
323
+ Output is JSON for easier parsing.
324
+ """
325
+ game = get_game()
326
+ acts = game.get_valid_actions_list()
327
+ # Trim to keep messages small
328
+ acts = acts[:120] if acts else []
329
+ return json.dumps(acts)
330
 
 
 
 
331
 
332
  if __name__ == "__main__":
333
+ mcp.run()