NA commited on
Commit
fd9722e
·
1 Parent(s): 615a63b

Implement my agent

Browse files
Files changed (3) hide show
  1. README.md +17 -4
  2. agent.py +307 -181
  3. mcp_server.py +189 -155
README.md CHANGED
@@ -16,13 +16,26 @@ license: mit
16
 
17
  This is my submission for the Text Adventure Agent assignment. My agent uses the ReAct pattern to play text adventure games via MCP.
18
 
 
 
19
  ## Approach
20
 
21
- <!-- Describe your approach here -->
 
 
 
 
 
 
 
 
22
 
23
- - What strategy does your agent use?
24
- - What tools did you implement in your MCP server?
25
- - Any interesting techniques or optimizations?
 
 
 
26
 
27
  ## Files
28
 
 
16
 
17
  This is my submission for the Text Adventure Agent assignment. My agent uses the ReAct pattern to play text adventure games via MCP.
18
 
19
+ Based on the example gave I improved some features. I added a long term memory in order to keep in mind long term objectives, I added memory of actions done in the current map to not repeat wrong directions, I added a memory on last 10 rooms visited and the path done in order to not being stucked in forest or to go back in interesting rooms. I also changed the way of actualisation of current rooms.
20
+
21
  ## Approach
22
 
23
+ This agent uses the following pattern:
24
+ 1. **Thought**: Reason about the current situation
25
+ 2. **Long Term Memory**: Long term memory in order to follow long term objectives like finding a lamp or going back to a room.
26
+ 2. **Tool**: Choose and call an MCP tool
27
+ 3. **Observation**: Process the result
28
+
29
+ I kept the tools from the baseline and then add get_current_map() for the history of the current map and get_last_10_rooms for the last 10 rooms explored. Initially I let the choice to the the LLM to use it but regarding to the importance of those compared to the number of token I choosed to automatically use it at each step.
30
+
31
+ The prompt is then composed of :
32
 
33
+ -current score
34
+ -long term memory
35
+ -recent actions
36
+ -last 10 rooms explored
37
+ -Current location and past actions already explored in this location.
38
+ -Observation
39
 
40
  ## Files
41
 
agent.py CHANGED
@@ -1,26 +1,8 @@
1
  """
2
- Student Agent for Text Adventure Games
3
 
4
- This is your submission file. Implement the StudentAgent class to play
5
- text adventure games using the MCP server you also implement.
6
-
7
- Your agent should:
8
- 1. Connect to the MCP server via the provided client
9
- 2. Use the ReAct pattern (Thought -> Action -> Observation)
10
- 3. Call MCP tools to interact with the game
11
- 4. Maximize the game score within the step limit
12
-
13
- Required method:
14
- async def run(self, client, game, max_steps, seed, verbose) -> RunResult
15
-
16
- The 'client' is a FastMCP Client already connected to your MCP server.
17
- Use it to call tools like: await client.call_tool("play_action", {"action": "look"})
18
-
19
- Tips:
20
- - Start by looking around and understanding your environment
21
- - Keep track of visited locations to avoid loops
22
- - Pick up useful items (lamp, sword, etc.)
23
- - The seed parameter should be used to set your LLM's seed for reproducibility
24
  """
25
 
26
  import json
@@ -32,83 +14,36 @@ from typing import Optional
32
  from dotenv import load_dotenv
33
  from huggingface_hub import InferenceClient
34
 
35
- # Load environment variables
36
  load_dotenv()
37
 
38
- # Set USE_LOCAL_MODEL=1 in your .env to use a locally downloaded model
39
- USE_LOCAL_MODEL = os.getenv("USE_LOCAL_MODEL", "0").strip() in ("1", "true", "yes")
40
- LOCAL_MODEL_ID = os.getenv("LOCAL_MODEL_ID", "Qwen/Qwen2.5-3B-Instruct")
41
-
42
  # =============================================================================
43
  # LLM Configuration - DO NOT MODIFY
44
  # =============================================================================
45
 
46
- # Model to use (fixed for fair evaluation)
47
  LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"
48
 
49
- # Initialize the LLM client based on mode
50
- _local_pipeline = None
51
-
52
- if USE_LOCAL_MODEL:
53
- import torch
54
- from transformers import pipeline as _hf_pipeline
55
 
56
- _local_pipeline = _hf_pipeline(
57
- "text-generation",
58
- model=LOCAL_MODEL_ID,
59
- torch_dtype=torch.bfloat16,
60
- device_map="auto",
61
- )
62
- LLM_CLIENT = None
63
- else:
64
- _hf_token = os.getenv("HF_TOKEN")
65
- if not _hf_token:
66
- raise ValueError("HF_TOKEN not found. Set it in your .env file.")
67
- LLM_CLIENT = InferenceClient(token=_hf_token)
68
 
69
 
70
  def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str:
71
- """
72
- Call the LLM with the given prompt. Use this function in your agent.
73
-
74
- Args:
75
- prompt: The user prompt (current game state, history, etc.)
76
- system_prompt: The system prompt (instructions for the agent)
77
- seed: Random seed for reproducibility
78
- max_tokens: Maximum tokens in response (default: 300)
79
-
80
- Returns:
81
- The LLM's response text
82
-
83
- Example:
84
- response = call_llm(
85
- prompt="You are in a forest. What do you do?",
86
- system_prompt=SYSTEM_PROMPT,
87
- seed=42,
88
- )
89
- """
90
  messages = [
91
  {"role": "system", "content": system_prompt},
92
  {"role": "user", "content": prompt},
93
  ]
94
-
95
- if USE_LOCAL_MODEL and _local_pipeline is not None:
96
- outputs = _local_pipeline(
97
- messages,
98
- max_new_tokens=max_tokens,
99
- temperature=0.0001, # Near-deterministic (0.0 unsupported by some backends)
100
- do_sample=True,
101
- )
102
- return outputs[0]["generated_text"][-1]["content"]
103
-
104
  response = LLM_CLIENT.chat.completions.create(
105
  model=LLM_MODEL,
106
  messages=messages,
107
- temperature=0.0, # Deterministic for reproducibility
108
  max_tokens=max_tokens,
109
  seed=seed,
110
  )
111
-
112
  return response.choices[0].message.content
113
 
114
 
@@ -125,179 +60,370 @@ class RunResult:
125
 
126
 
127
  # =============================================================================
128
- # System Prompt - Customize this for your agent
129
  # =============================================================================
130
 
131
- SYSTEM_PROMPT = """You are playing a classic text adventure game.
132
 
133
- GOAL: Explore the world, solve puzzles, and maximize your score.
134
-
135
- AVAILABLE TOOLS (use via MCP):
136
- - play_action: Execute a game command (north, take lamp, open mailbox, etc.)
137
- - memory: Get current game state and history (if implemented)
138
- - inventory: Check what you're carrying (if implemented)
139
 
140
  VALID GAME COMMANDS for play_action:
141
  - Movement: north, south, east, west, up, down, enter, exit
142
  - Objects: take <item>, drop <item>, open <thing>, close <thing>, examine <thing>
143
- - Other: look, inventory, read <thing>, turn on lamp
 
 
 
 
144
 
145
  RESPOND IN THIS EXACT FORMAT (no markdown):
146
- THOUGHT: <your reasoning about what to do next>
 
147
  TOOL: <tool_name>
148
- ARGS: <JSON arguments, e.g., {"action": "look"}>
149
 
150
- Example:
151
- THOUGHT: I should look around to see where I am.
 
152
  TOOL: play_action
153
  ARGS: {"action": "look"}
154
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
155
 
156
 
157
  # =============================================================================
158
- # Student Agent - IMPLEMENT THIS CLASS
159
  # =============================================================================
160
 
161
  class StudentAgent:
162
  """
163
- Your ReAct agent implementation.
164
 
165
- TODO:
166
- 1. Implement the run() method with the ReAct loop
167
- 2. Parse LLM responses to extract tool calls
168
- 3. Track state and avoid loops
169
-
170
- Use the provided call_llm() function to interact with the LLM.
171
  """
172
 
173
  def __init__(self):
174
- """Initialize your agent here."""
175
- # TODO: Initialize any state tracking you need
176
- # self.history = []
177
- # self.visited_locations = set()
178
- pass
179
 
180
  async def run(
181
  self,
182
- client, # FastMCP Client connected to your MCP server
183
  game: str,
184
  max_steps: int,
185
  seed: int,
186
  verbose: bool = False,
187
  ) -> RunResult:
188
- """
189
- Run the agent for a game session.
190
-
191
- Args:
192
- client: FastMCP Client connected to your MCP server
193
- game: Name of the game being played (e.g., "zork1")
194
- max_steps: Maximum number of steps to take
195
- seed: Random seed for reproducibility (use for LLM calls)
196
- verbose: Whether to print detailed output
197
-
198
- Returns:
199
- RunResult with final score and statistics
200
- """
201
- # TODO: Implement your ReAct loop here
202
- #
203
- # Basic structure:
204
- # 1. Get initial observation (call play_action with "look")
205
- # 2. Loop for max_steps:
206
- # a. Build prompt with current observation and history
207
- # b. Call LLM to get thought and action
208
- # c. Parse the response to extract tool and args
209
- # d. Call the tool via client.call_tool(tool_name, args)
210
- # e. Update history and state
211
- # f. Check for game over
212
- # 3. Return RunResult with final statistics
213
-
214
- # Example of calling a tool:
215
- # result = await client.call_tool("play_action", {"action": "look"})
216
- # observation = result[0].text if result else "No response"
217
-
218
- # Example of calling the LLM:
219
- # response = call_llm(
220
- # prompt="Current observation: " + observation,
221
- # system_prompt=SYSTEM_PROMPT,
222
- # seed=seed,
223
- # )
224
-
225
- # Placeholder implementation - replace with your code
226
  locations_visited = set()
227
  history = []
228
- final_score = 0
229
  moves = 0
230
 
231
- # TODO: Your implementation here
232
- # ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
 
234
  return RunResult(
235
- final_score=final_score,
236
- max_score=350, # Zork1 max score, adjust if needed
237
  moves=moves,
238
  locations_visited=locations_visited,
239
- game_completed=False,
240
  history=history,
241
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
 
243
- def _build_prompt(self, observation: str, history: list) -> str:
244
- """
245
- Build the prompt for the LLM.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
 
247
- TODO: Implement this to create effective prompts
248
- """
249
- # TODO: Combine system prompt, history, and current observation
250
- pass
251
 
252
- def _parse_response(self, response: str) -> tuple[str, str, dict]:
253
- """
254
- Parse LLM response to extract thought, tool name, and arguments.
 
 
 
 
 
 
 
 
 
 
 
255
 
256
- TODO: Implement robust parsing
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
 
258
- Returns:
259
- Tuple of (thought, tool_name, args_dict)
260
- """
261
- # TODO: Parse the response format:
262
- # THOUGHT: ...
263
- # TOOL: ...
264
- # ARGS: {...}
265
- pass
266
 
267
- def _call_llm(self, prompt: str, system_prompt: str, seed: int) -> str:
268
- """
269
- Call the LLM with the given prompt.
 
 
 
 
 
 
 
 
 
 
 
 
270
 
271
- This is a convenience wrapper - you can also use call_llm() directly.
272
- """
273
- return call_llm(prompt, system_prompt, seed)
 
 
 
 
 
 
 
 
 
 
 
 
274
 
275
 
276
  # =============================================================================
277
- # For local testing
278
  # =============================================================================
279
 
280
  async def test_agent():
281
  """Test the agent locally."""
282
  from fastmcp import Client
283
 
284
- # Path to your MCP server
285
- server_path = "mcp_server.py"
286
-
287
  agent = StudentAgent()
288
 
289
- async with Client(server_path) as client:
290
  result = await agent.run(
291
  client=client,
292
  game="zork1",
293
- max_steps=10,
294
  seed=42,
295
  verbose=True,
296
  )
297
 
298
- print(f"\nFinal Score: {result.final_score}")
 
299
  print(f"Moves: {result.moves}")
300
- print(f"Locations: {result.locations_visited}")
301
 
302
 
303
  if __name__ == "__main__":
 
1
  """
2
+ Example: MCP ReAct Agent
3
 
4
+ A complete ReAct agent that uses MCP tools to play text adventure games.
5
+ This is a working example students can learn from.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  """
7
 
8
  import json
 
14
  from dotenv import load_dotenv
15
  from huggingface_hub import InferenceClient
16
 
 
17
  load_dotenv()
18
 
 
 
 
 
19
  # =============================================================================
20
  # LLM Configuration - DO NOT MODIFY
21
  # =============================================================================
22
 
 
23
  LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"
24
 
25
+ _hf_token = os.getenv("HF_TOKEN")
26
+ if not _hf_token:
27
+ raise ValueError("HF_TOKEN not found. Set it in your .env file.")
 
 
 
28
 
29
+ LLM_CLIENT = InferenceClient(token=_hf_token)
 
 
 
 
 
 
 
 
 
 
 
30
 
31
 
32
  def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str:
33
+ """Call the LLM with the given prompt."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  messages = [
35
  {"role": "system", "content": system_prompt},
36
  {"role": "user", "content": prompt},
37
  ]
38
+
 
 
 
 
 
 
 
 
 
39
  response = LLM_CLIENT.chat.completions.create(
40
  model=LLM_MODEL,
41
  messages=messages,
42
+ temperature=0.0,
43
  max_tokens=max_tokens,
44
  seed=seed,
45
  )
46
+
47
  return response.choices[0].message.content
48
 
49
 
 
60
 
61
 
62
  # =============================================================================
63
+ # System Prompt
64
  # =============================================================================
65
 
66
+ SYSTEM_PROMPT = """You are an expert text adventure game player. Your goal is to explore, collect treasures, and maximize your score.
67
 
68
+ AVAILABLE TOOLS (use these via MCP):
69
+ 1. play_action - Execute game commands (north, take lamp, open mailbox, etc.)
70
+ 2. memory - Get current game state, score, and recent history
71
+ 3. get_map - See explored locations and connections
72
+ 4. inventory - Check what you're carrying
 
73
 
74
  VALID GAME COMMANDS for play_action:
75
  - Movement: north, south, east, west, up, down, enter, exit
76
  - Objects: take <item>, drop <item>, open <thing>, close <thing>, examine <thing>
77
+ - Light: turn on lamp, turn off lamp
78
+ - Combat: attack <enemy> with <weapon>
79
+ - Other: inventory, look, read <thing>, wait
80
+
81
+ FORBIDDEN (will NOT work): check, inspect, search, grab, use, help
82
 
83
  RESPOND IN THIS EXACT FORMAT (no markdown):
84
+ THOUGHT: <brief reasoning about what to do next>
85
+ MEMORY: <brief information you want to keep in memory, for example room you need to go back or object you need to find>
86
  TOOL: <tool_name>
87
+ ARGS: <JSON arguments>
88
 
89
+ Examples:
90
+ THOUGHT: I need to see what's around me.
91
+ MEMORY: I need to find a lamp
92
  TOOL: play_action
93
  ARGS: {"action": "look"}
94
+
95
+ THOUGHT: Let me check my current state and score.
96
+ MEMORY: I need to go back to the dark room
97
+ TOOL: memory
98
+ ARGS: {}
99
+
100
+ THOUGHT: The mailbox might contain something useful.
101
+ MEMORY: I need to go back to the dark room
102
+ TOOL: play_action
103
+ ARGS: {"action": "open mailbox"}
104
+
105
+ STRATEGY:
106
+ 1. Start by looking around and checking memory
107
+ 2. Explore systematically - try all directions
108
+ 3. Pick up useful items (lamp, sword, etc.)
109
+ 4. Open containers (mailbox, window, etc.)
110
+ 5. Use get_map to avoid getting lost
111
+ 6. Turn on lamp before dark areas!
112
+
113
+ DO NOT repeat the same action multiple times in a row."""
114
 
115
 
116
  # =============================================================================
117
+ # Student Agent Implementation
118
  # =============================================================================
119
 
120
  class StudentAgent:
121
  """
122
+ MCP ReAct Agent - A complete working example.
123
 
124
+ This agent demonstrates:
125
+ - ReAct loop (Thought -> Tool -> Observation)
126
+ - Loop detection
127
+ - Action validation
128
+ - Score tracking via memory tool
 
129
  """
130
 
131
  def __init__(self):
132
+ """Initialize the agent state."""
133
+ self.history: list[dict] = []
134
+ self.recent_actions: list[str] = []
135
+ self.score: int = 0
 
136
 
137
  async def run(
138
  self,
139
+ client,
140
  game: str,
141
  max_steps: int,
142
  seed: int,
143
  verbose: bool = False,
144
  ) -> RunResult:
145
+ """Run the agent for a game session."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  locations_visited = set()
147
  history = []
 
148
  moves = 0
149
 
150
+ # Get list of available tools
151
+ tools = await client.list_tools()
152
+ tool_names = [t.name for t in tools]
153
+
154
+ # Get initial observation
155
+ result = await client.call_tool("play_action", {"action": "look"})
156
+ observation = self._extract_result(result)
157
+
158
+ # Track initial location
159
+ location = observation.split("\n")[0] if observation else "Unknown"
160
+ locations_visited.add(location)
161
+ memory=""
162
+ if verbose:
163
+ print(f"\n{observation}")
164
+ #last_get_map=False
165
+ # Main ReAct loop
166
+ for step in range(1, max_steps + 1):
167
+ # Build prompt with context
168
+ current_loc=await client.call_tool("get_current_map", {})
169
+ last_10_loc=await client.call_tool("get_last_10_rooms", {})
170
+ prompt = self._build_prompt(current_loc.content[0].text,last_10_loc.content[0].text,observation,memory)
171
+
172
+ print("==========")
173
+ print(current_loc.content[0].text)
174
+ print('PROMPT',prompt)
175
+ # Call LLM for reasoning (use step-based seed for variety)
176
+ response = call_llm(prompt, SYSTEM_PROMPT, seed + step)
177
+
178
+
179
+
180
+ # Parse the response
181
+ memory, thought, tool_name, tool_args = self._parse_response(response, tool_names)
182
+
183
+ if verbose:
184
+ print(f"\n--- Step {step} ---")
185
+ print(f"[THOUGHT] {thought}")
186
+ print(f"[LONG TERM MEMORY] {memory}")
187
+ print(f"[TOOL] {tool_name}({tool_args})")
188
+
189
+ # Validate and fix common issues
190
+ tool_name, tool_args = self._validate_tool_call(tool_name, tool_args, tool_names)
191
+ """
192
+ if "you can't go that way" in observation.lower() and not last_get_map:
193
+ if verbose:
194
+ print(f"[WARNING] Wrong way - forcing 'get_map'")
195
+ tool_args = {}
196
+ tool_name="get_map"
197
+ last_get_map=True
198
+ else :
199
+ last_get_map=False
200
+ """
201
+ # Loop detection
202
+ if tool_name == "play_action":
203
+ action = tool_args.get("action", "look")
204
+ self.recent_actions.append(action)
205
+ if len(self.recent_actions) > 5:
206
+ self.recent_actions = self.recent_actions[-5:]
207
+
208
+ # Detect loops - if same action 3 times, force "look"
209
+ if len(self.recent_actions) >= 3 and len(set(self.recent_actions[-3:])) == 1:
210
+ if verbose:
211
+ print(f"[WARNING] Loop detected - forcing 'look'")
212
+ tool_args = {"action": "look"}
213
+ self.recent_actions.append("look")
214
+
215
+ moves += 1
216
+
217
+ # Execute the tool
218
+ try:
219
+ result = await client.call_tool(tool_name, tool_args)
220
+ observation = self._extract_result(result)
221
+
222
+ if verbose:
223
+ print(f"[RESULT] {observation}...")
224
+ except Exception as e:
225
+ observation = f"Error: {e}"
226
+ if verbose:
227
+ print(f"[ERROR] {e}")
228
+
229
+ # Track location
230
+ location = observation.split("\n")[0] if observation else "Unknown"
231
+ locations_visited.add(location)
232
+
233
+ # Update history
234
+ self.history.append({
235
+ "step": step,
236
+ "thought": thought,
237
+ "tool": tool_name,
238
+ "args": tool_args,
239
+ "result": observation[:200]
240
+ })
241
+ if len(self.history) > 10:
242
+ self.history = self.history[-10:]
243
+
244
+ # Track score from observation
245
+ self._update_score(observation)
246
+
247
+ # Record in result history
248
+ history.append((thought, f"{tool_name}({tool_args})", observation[:100]))
249
+
250
+ # Check for game over
251
+ if self._is_game_over(observation):
252
+ if verbose:
253
+ print("\n*** GAME OVER ***")
254
+ break
255
 
256
  return RunResult(
257
+ final_score=self.score,
258
+ max_score=350,
259
  moves=moves,
260
  locations_visited=locations_visited,
261
+ game_completed=self._is_game_over(observation),
262
  history=history,
263
  )
264
+ def _build_prompt(self,current_loc, last_10_loc, observation, memory) -> str:
265
+ """Build the prompt for the LLM with context."""
266
+ parts = []
267
+
268
+ parts.append(f"Current Score: {self.score}")
269
+
270
+ parts.append(f"\nYour long-term memory: {memory}")
271
+ # Recent history
272
+ if self.history:
273
+ parts.append("\nRecent actions:")
274
+ for entry in self.history[-3:]:
275
+ action = entry.get("args", {}).get("action", entry["tool"])
276
+ result_short = entry["result"][:80] + "..." if len(entry["result"]) > 80 else entry["result"]
277
+ parts.append(f" > {action} -> {result_short}")
278
+
279
+ # Warn about repeated actions
280
+ if self.recent_actions and len(set(self.recent_actions[-3:])) == 1:
281
+ parts.append(f"\n[WARNING: You've been doing '{self.recent_actions[-1]}' repeatedly. TRY SOMETHING DIFFERENT!]")
282
+
283
+ parts.append(last_10_loc)
284
+ parts.append(f"\nCurrent location and past actions you already explored in this location, you can and you should test other actions:\n{current_loc}")
285
+ parts.append(f"\nCurrent situation:\n{observation}")
286
+ parts.append("\nWhat do you do next?")
287
+
288
+ return "\n".join(parts)
289
 
290
+ def _parse_response(self, response: str, valid_tools: list[str]) -> tuple[str, str, dict]:
291
+ """Parse the LLM response to extract thought, tool, and arguments."""
292
+ thought = "No reasoning provided"
293
+ memory="No memory provided"
294
+ tool_name = "play_action"
295
+ tool_args = {"action": "look"}
296
+
297
+ lines = response.strip().split("\n")
298
+
299
+ for line in lines:
300
+ line_clean = line.strip()
301
+ line_upper = line_clean.upper()
302
+
303
+ if line_upper.startswith("THOUGHT:"):
304
+ thought = line_clean.split(":", 1)[1].strip()
305
+
306
+ elif line_upper.startswith("MEMORY:"):
307
+ memory = line_clean.split(":", 1)[1].strip()
308
+
309
+ elif line_upper.startswith("TOOL:"):
310
+ raw_tool = line_clean.split(":", 1)[1].strip().lower()
311
+ raw_tool = raw_tool.replace("**", "").replace("*", "").replace("`", "")
312
+ raw_tool = raw_tool.split()[0] if raw_tool else "play_action"
313
+ tool_name = raw_tool
314
+
315
+ elif line_upper.startswith("ARGS:"):
316
+ args_part = line_clean.split(":", 1)[1].strip()
317
+ try:
318
+ args_part = args_part.replace("'", '"')
319
+ tool_args = json.loads(args_part)
320
+ except json.JSONDecodeError:
321
+ match = re.search(r'"action"\s*:\s*"([^"]+)"', args_part)
322
+ if match:
323
+ tool_args = {"action": match.group(1)}
324
+ else:
325
+ tool_args = {"action": "look"}
326
 
327
+ return memory,thought, tool_name, tool_args
 
 
 
328
 
329
+ def _validate_tool_call(self, tool_name: str, tool_args: dict, valid_tools: list[str]) -> tuple[str, dict]:
330
+ """Validate and fix common tool call issues."""
331
+ # Fix tool name
332
+ if tool_name not in valid_tools:
333
+ if tool_name in ["action", "do", "command"]:
334
+ tool_name = "play_action"
335
+ elif tool_name in ["map", "location"]:
336
+ tool_name = "get_map"
337
+ elif tool_name in ["mem", "state", "status"]:
338
+ tool_name = "memory"
339
+ elif tool_name in ["inv", "items"]:
340
+ tool_name = "inventory"
341
+ else:
342
+ tool_name = "play_action"
343
 
344
+ # Fix action verbs
345
+ if tool_name == "play_action":
346
+ action = tool_args.get("action", "look")
347
+
348
+ invalid_verb_map = {
349
+ "check": "examine",
350
+ "inspect": "examine",
351
+ "search": "look",
352
+ "grab": "take",
353
+ "pick": "take",
354
+ "use": "examine",
355
+ "investigate": "examine",
356
+ }
357
+
358
+ words = action.lower().split()
359
+ if words and words[0] in invalid_verb_map:
360
+ words[0] = invalid_verb_map[words[0]]
361
+ action = " ".join(words)
362
+
363
+ action = action.lower().strip()
364
+ action = action.replace("**", "").replace("*", "").replace("`", "")
365
+ action = " ".join(action.split())
366
+
367
+ tool_args["action"] = action
368
 
369
+ return tool_name, tool_args
 
 
 
 
 
 
 
370
 
371
+ def _extract_result(self, result) -> str:
372
+ """Extract text from MCP tool result."""
373
+ if hasattr(result, 'content') and result.content:
374
+ return result.content[0].text
375
+ if isinstance(result, list) and result:
376
+ return result[0].text if hasattr(result[0], 'text') else str(result[0])
377
+ return str(result)
378
+
379
+ def _update_score(self, text: str) -> None:
380
+ """Update score from game text."""
381
+ patterns = [
382
+ r'Score:\s*(\d+)',
383
+ r'score[:\s]+(\d+)',
384
+ r'\[Score:\s*(\d+)',
385
+ ]
386
 
387
+ for pattern in patterns:
388
+ match = re.search(pattern, text, re.IGNORECASE)
389
+ if match:
390
+ self.score = max(self.score, int(match.group(1)))
391
+
392
+ def _is_game_over(self, text: str) -> bool:
393
+ """Check if the game is over."""
394
+ game_over_phrases = [
395
+ "game over",
396
+ "you have died",
397
+ "you are dead",
398
+ "*** you have died ***",
399
+ ]
400
+ text_lower = text.lower()
401
+ return any(phrase in text_lower for phrase in game_over_phrases)
402
 
403
 
404
  # =============================================================================
405
+ # Local Testing
406
  # =============================================================================
407
 
408
  async def test_agent():
409
  """Test the agent locally."""
410
  from fastmcp import Client
411
 
 
 
 
412
  agent = StudentAgent()
413
 
414
+ async with Client("mcp_server.py") as client:
415
  result = await agent.run(
416
  client=client,
417
  game="zork1",
418
+ max_steps=100,
419
  seed=42,
420
  verbose=True,
421
  )
422
 
423
+ print(f"\n{'=' * 50}")
424
+ print(f"Final Score: {result.final_score}")
425
  print(f"Moves: {result.moves}")
426
+ print(f"Locations: {len(result.locations_visited)}")
427
 
428
 
429
  if __name__ == "__main__":
mcp_server.py CHANGED
@@ -1,27 +1,8 @@
1
  """
2
- Student MCP Server for Text Adventure Games
3
 
4
- This is your MCP server submission. Implement the tools that your agent
5
- will use to play text adventure games.
6
-
7
- Required tool:
8
- play_action(action: str) -> str
9
- Execute a game command and return the result.
10
-
11
- Recommended tools:
12
- memory() -> str
13
- Return current game state, score, and recent history.
14
-
15
- inventory() -> str
16
- Return the player's current inventory.
17
-
18
- get_map() -> str
19
- Return a map of explored locations.
20
-
21
- Test your server with:
22
- fastmcp dev submission_template/mcp_server.py
23
-
24
- Then open the MCP Inspector in your browser to test the tools interactively.
25
  """
26
 
27
  import sys
@@ -31,179 +12,232 @@ import os
31
  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
32
 
33
  from fastmcp import FastMCP
34
- from games.zork_env import TextAdventureEnv
35
 
36
 
37
- # =============================================================================
38
- # Create the MCP Server
39
- # =============================================================================
40
 
41
- mcp = FastMCP("Student Text Adventure Server")
 
42
 
43
 
44
- # =============================================================================
45
- # Game State Management
46
- # =============================================================================
47
-
48
- class GameManager:
49
- """
50
- Manages the text adventure game state.
51
-
52
- TODO: Extend this class to track:
53
- - Action history (for memory tool)
54
- - Explored locations (for mapping)
55
- - Current score and moves
56
- """
57
-
58
- def __init__(self):
59
- self.env: TextAdventureEnv = None
60
- self.state = None
61
- self.game_name: str = ""
62
- # TODO: Add more state tracking
63
- # self.history: list[tuple[str, str]] = []
64
- # self.explored_locations: dict[str, set[str]] = {}
65
- # self.current_location: str = ""
66
 
67
- def initialize(self, game: str = "zork1"):
68
- """Initialize or reset the game."""
69
  self.game_name = game
70
  self.env = TextAdventureEnv(game)
71
  self.state = self.env.reset()
72
- # TODO: Reset your state tracking here
73
- return self.state.observation
74
-
75
- def step(self, action: str) -> str:
76
- """Execute an action and return the result."""
77
- if self.env is None:
78
- self.initialize()
 
 
79
 
 
 
 
 
 
80
  self.state = self.env.step(action)
 
81
 
82
- # TODO: Update your state tracking here
83
- # self.history.append((action, self.state.observation))
84
- # Update location tracking, etc.
 
85
 
86
- return self.state.observation
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
- def get_score(self) -> int:
89
- """Get current score."""
90
- return self.state.score if self.state else 0
 
 
 
 
 
 
 
 
 
 
 
 
 
91
 
92
- def get_moves(self) -> int:
93
- """Get number of moves taken."""
94
- return self.state.moves if self.state else 0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95
 
 
 
 
96
 
97
- # Global game manager
98
- _game = GameManager()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
 
100
 
101
- def get_game() -> GameManager:
102
- """Get or initialize the game manager."""
103
- global _game
104
- if _game.env is None:
105
- # Get game from environment variable (set by evaluator)
106
- game = os.environ.get("GAME", "zork1")
107
- _game.initialize(game)
108
- return _game
109
 
110
 
111
  # =============================================================================
112
- # MCP Tools - IMPLEMENT THESE
113
  # =============================================================================
114
 
115
  @mcp.tool()
116
  def play_action(action: str) -> str:
117
  """
118
- Execute a game command and return the result.
119
-
120
- This is the main tool for interacting with the game.
121
 
122
  Args:
123
- action: The command to execute (e.g., "north", "take lamp", "open mailbox")
124
-
125
  Returns:
126
- The game's response to the action
127
-
128
- Valid commands include:
129
- - Movement: north, south, east, west, up, down, enter, exit
130
- - Objects: take <item>, drop <item>, open <thing>, examine <thing>
131
- - Other: look, inventory, read <thing>, turn on lamp
132
  """
133
  game = get_game()
 
 
 
 
134
 
135
- # TODO: You might want to add action validation here
136
- # TODO: You might want to include score changes in the response
137
 
138
- result = game.step(action)
 
 
 
 
 
 
 
 
 
 
139
 
140
- # Optional: Append score info
141
- # result += f"\n[Score: {game.get_score()} | Moves: {game.get_moves()}]"
 
 
 
 
 
 
 
142
 
143
- return result
144
-
145
-
146
- # TODO: Implement additional tools to help your agent
147
-
148
- # @mcp.tool()
149
- # def memory() -> str:
150
- # """
151
- # Get the current game state summary.
152
- #
153
- # Returns:
154
- # A summary including current location, score, moves, and recent history
155
- # """
156
- # game = get_game()
157
- # # TODO: Return useful state information
158
- # pass
159
-
160
-
161
- # @mcp.tool()
162
- # def inventory() -> str:
163
- # """
164
- # Check what the player is carrying.
165
- #
166
- # Returns:
167
- # List of items in the player's inventory
168
- # """
169
- # game = get_game()
170
- # result = game.step("inventory")
171
- # return result
172
-
173
-
174
- # @mcp.tool()
175
- # def get_map() -> str:
176
- # """
177
- # Get a map of explored locations.
178
- #
179
- # Returns:
180
- # A text representation of explored locations and connections
181
- # """
182
- # game = get_game()
183
- # # TODO: Return map of explored locations
184
- # pass
185
-
186
-
187
- # @mcp.tool()
188
- # def get_valid_actions() -> str:
189
- # """
190
- # Get a list of likely valid actions from the current location.
191
- #
192
- # Returns:
193
- # List of actions that might work here
194
- # """
195
- # # This is a hint: Jericho provides get_valid_actions()
196
- # game = get_game()
197
- # if game.env and game.env.env:
198
- # valid = game.env.env.get_valid_actions()
199
- # return "Valid actions: " + ", ".join(valid[:20])
200
- # return "Could not determine valid actions"
201
 
202
 
203
  # =============================================================================
204
- # Run the server
205
  # =============================================================================
206
 
207
  if __name__ == "__main__":
208
- # This runs the server with stdio transport (for MCP clients)
209
  mcp.run()
 
1
  """
2
+ Example: MCP Server for Text Adventures
3
 
4
+ A complete MCP server that exposes text adventure games via tools.
5
+ This demonstrates a full-featured server with memory, mapping, and inventory.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  """
7
 
8
  import sys
 
12
  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
13
 
14
  from fastmcp import FastMCP
15
+ from games.zork_env import TextAdventureEnv, list_available_games
16
 
17
 
18
+ # Get game from environment variable (default: zork1)
19
+ INITIAL_GAME = os.environ.get("GAME", "zork1")
 
20
 
21
+ # Create the MCP server
22
+ mcp = FastMCP("Text Adventure Server")
23
 
24
 
25
+ class GameState:
26
+ """Manages the text adventure game state and exploration data."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
 
28
+ def __init__(self, game: str = "zork1"):
 
29
  self.game_name = game
30
  self.env = TextAdventureEnv(game)
31
  self.state = self.env.reset()
32
+ self.history: list[tuple[str, str]] = []
33
+ self.explored_locations: dict[str, set[str]] = {"West of House": set()}
34
+ self.current_location: str = "West of House"
35
+ self.last_loc="West of House"
36
+ self.last_dir=""
37
+ self.last_10_loc=[]
38
+ self.last_10_dir=[]
39
+ def _extract_location(self, observation: str) -> str:
40
+ """Extract location name from observation (usually first line)."""
41
 
42
+ lines = observation.strip().split('\n')
43
+ return lines[0] if lines else "Unknown"
44
+
45
+ def take_action(self, action: str) -> str:
46
+ """Execute a game action and return the result."""
47
  self.state = self.env.step(action)
48
+ result = self.state.observation
49
 
50
+ # Track history
51
+ self.history.append((action, result))
52
+ if len(self.history) > 50:
53
+ self.history = self.history[-50:]
54
 
55
+ # Update map
56
+ new_location = self._extract_location(result)
57
+ directions = ["northwest", "southwest", "northeast", "southeast","north", "south", "east", "west","up", "down", "enter", "exit", "go"]
58
+ if any(d in action for d in directions) or action in ["s", "w", "e", "n"]:
59
+ self.explored_locations[self.current_location].add(f"{action} -> {new_location}")
60
+ if len(new_location.split())<=4 or "dark" in new_location and new_location!=self.current_location:
61
+ if new_location not in self.explored_locations:
62
+ self.explored_locations[new_location] = set()
63
+ self.last_loc=self.current_location
64
+ self.last_10_loc.append(self.current_location)
65
+ self.last_10_dir.append(action)
66
+ if len(self.last_10_loc)>10 :
67
+ self.last_10_loc=self.last_10_loc[1:]
68
+ self.last_10_dir=self.last_10_dir[1:]
69
+ self.last_dir=action
70
+ self.current_location = new_location
71
+
72
+
73
+
74
+ return result
75
 
76
+ def get_memory(self) -> str:
77
+ """Get a summary of current game state."""
78
+ recent = self.history[-5:] if self.history else []
79
+ recent_str = "\n".join([f" > {a} -> {r[:60]}..." for a, r in recent]) if recent else " (none yet)"
80
+
81
+ return f"""Current State:
82
+ - Location: {self.current_location}
83
+ - Score: {self.state.score} points
84
+ - Moves: {self.state.moves}
85
+ - Game: {self.game_name}
86
+
87
+ Recent Actions:
88
+ {recent_str}
89
+
90
+ Current Observation:
91
+ {self.state.observation}"""
92
 
93
+ def get_map(self) -> str:
94
+ """Get a map of explored locations."""
95
+ if not self.explored_locations:
96
+ return "Map: No locations explored yet. Try moving around!"
97
+
98
+ lines = ["Explored Locations and explored Exits:"]
99
+ for loc, exits in sorted(self.explored_locations.items()):
100
+ lines.append(f"\n* {loc}")
101
+ for exit_info in sorted(exits):
102
+ lines.append(f" -> {exit_info}")
103
+
104
+ lines.append(f"\n[Current] {self.current_location}")
105
+ return "\n".join(lines)
106
+ def get_last_10_rooms(self) :
107
+ res="\nLast rooms explored : "
108
+ for loc, dir in zip(self.last_10_loc,self.last_10_dir):
109
+ res+=f" {loc} -> {dir} -> "
110
+ return res
111
+
112
+ def get_current_map(self) -> str:
113
+ if not self.current_location:
114
+ return "Map: No locations explored yet. Try moving around!"
115
+
116
+ exits = self.explored_locations.get(self.current_location, set())
117
 
118
+ lines=[f"Current location : {self.current_location}"]
119
+
120
+ lines.append("rooms before :")
121
 
122
+ lines.append(self.last_loc+ " -> " +self.last_dir )
123
+
124
+ if exits:
125
+ lines.append("explored exits:")
126
+ for e in sorted(exits):
127
+ lines.append(f" -> {e}")
128
+ else:
129
+ lines.append("No recorded exits yet.")
130
+ return "\n".join(lines)
131
+
132
+ def get_inventory(self) -> str:
133
+ """Get current inventory."""
134
+ items = self.state.inventory if hasattr(self.state, 'inventory') and self.state.inventory else []
135
+
136
+ if not items:
137
+ return "Inventory: You are empty-handed."
138
+
139
+ item_names = []
140
+ for item in items:
141
+ item_str = str(item)
142
+ item_lower = item_str.lower()
143
+ if "parent" in item_lower:
144
+ idx = item_lower.index("parent")
145
+ name = item_str[:idx].strip()
146
+ if ":" in name:
147
+ name = name.split(":", 1)[1].strip()
148
+ item_names.append(name)
149
+ elif ":" in item_str:
150
+ name = item_str.split(":")[1].strip()
151
+ item_names.append(name)
152
+ else:
153
+ item_names.append(item_str)
154
+
155
+ return f"Inventory: {', '.join(item_names)}"
156
+
157
+
158
+ # Global game state
159
+ _game_state: GameState | None = None
160
 
161
 
162
+ def get_game() -> GameState:
163
+ """Get or initialize the game state."""
164
+ global _game_state
165
+ if _game_state is None:
166
+ _game_state = GameState(INITIAL_GAME)
167
+ return _game_state
 
 
168
 
169
 
170
  # =============================================================================
171
+ # MCP Tools
172
  # =============================================================================
173
 
174
  @mcp.tool()
175
  def play_action(action: str) -> str:
176
  """
177
+ Execute a game action in the text adventure.
 
 
178
 
179
  Args:
180
+ action: The command to execute (e.g., 'north', 'take lamp', 'open mailbox')
181
+
182
  Returns:
183
+ The game's response to your action
 
 
 
 
 
184
  """
185
  game = get_game()
186
+ result = game.take_action(action)
187
+
188
+ # Add score info
189
+ score_info = f"\n\n[Score: {game.state.score} | Moves: {game.state.moves}]"
190
 
191
+ if game.state.reward > 0:
192
+ score_info = f"\n\n+{game.state.reward} points! (Total: {game.state.score})"
193
 
194
+ done_info = ""
195
+ if game.state.done:
196
+ done_info = "\n\nGAME OVER"
197
+
198
+ return result + score_info + done_info
199
+
200
+
201
+ @mcp.tool()
202
+ def memory() -> str:
203
+ """
204
+ Get a summary of the current game state.
205
 
206
+ Returns location, score, moves, recent actions, and current observation.
207
+ """
208
+ return get_game().get_memory()
209
+
210
+
211
+ @mcp.tool()
212
+ def get_map() -> str:
213
+ """
214
+ Get a map showing explored locations and connections.
215
 
216
+ Useful for navigation and avoiding getting lost.
217
+ """
218
+ return get_game().get_map()
219
+
220
+ @mcp.tool()
221
+ def get_current_map() -> str:
222
+ """Get current location."""
223
+ return get_game().get_current_map()
224
+
225
+ @mcp.tool()
226
+ def get_last_10_rooms() -> str:
227
+ """Get last 10 locations and directions."""
228
+ return get_game().get_last_10_rooms()
229
+
230
+ @mcp.tool()
231
+ def inventory() -> str:
232
+ """
233
+ Check what items you are currently carrying.
234
+ """
235
+ return get_game().get_inventory()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
 
237
 
238
  # =============================================================================
239
+ # Main
240
  # =============================================================================
241
 
242
  if __name__ == "__main__":
 
243
  mcp.run()