Tome1 commited on
Commit
2f39f9c
·
1 Parent(s): 615a63b

Implement my agent

Browse files
Files changed (2) hide show
  1. agent.py +230 -245
  2. mcp_server.py +244 -99
agent.py CHANGED
@@ -1,28 +1,9 @@
1
  """
2
  Student Agent for Text Adventure Games
3
-
4
- This is your submission file. Implement the StudentAgent class to play
5
- text adventure games using the MCP server you also implement.
6
-
7
- Your agent should:
8
- 1. Connect to the MCP server via the provided client
9
- 2. Use the ReAct pattern (Thought -> Action -> Observation)
10
- 3. Call MCP tools to interact with the game
11
- 4. Maximize the game score within the step limit
12
-
13
- Required method:
14
- async def run(self, client, game, max_steps, seed, verbose) -> RunResult
15
-
16
- The 'client' is a FastMCP Client already connected to your MCP server.
17
- Use it to call tools like: await client.call_tool("play_action", {"action": "look"})
18
-
19
- Tips:
20
- - Start by looking around and understanding your environment
21
- - Keep track of visited locations to avoid loops
22
- - Pick up useful items (lamp, sword, etc.)
23
- - The seed parameter should be used to set your LLM's seed for reproducibility
24
  """
25
 
 
26
  import json
27
  import os
28
  import re
@@ -32,89 +13,34 @@ from typing import Optional
32
  from dotenv import load_dotenv
33
  from huggingface_hub import InferenceClient
34
 
35
- # Load environment variables
36
  load_dotenv()
37
 
38
- # Set USE_LOCAL_MODEL=1 in your .env to use a locally downloaded model
39
- USE_LOCAL_MODEL = os.getenv("USE_LOCAL_MODEL", "0").strip() in ("1", "true", "yes")
40
- LOCAL_MODEL_ID = os.getenv("LOCAL_MODEL_ID", "Qwen/Qwen2.5-3B-Instruct")
41
-
42
  # =============================================================================
43
- # LLM Configuration - DO NOT MODIFY
44
  # =============================================================================
45
 
46
- # Model to use (fixed for fair evaluation)
47
  LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"
 
 
 
48
 
49
- # Initialize the LLM client based on mode
50
- _local_pipeline = None
51
-
52
- if USE_LOCAL_MODEL:
53
- import torch
54
- from transformers import pipeline as _hf_pipeline
55
-
56
- _local_pipeline = _hf_pipeline(
57
- "text-generation",
58
- model=LOCAL_MODEL_ID,
59
- torch_dtype=torch.bfloat16,
60
- device_map="auto",
61
- )
62
- LLM_CLIENT = None
63
- else:
64
- _hf_token = os.getenv("HF_TOKEN")
65
- if not _hf_token:
66
- raise ValueError("HF_TOKEN not found. Set it in your .env file.")
67
- LLM_CLIENT = InferenceClient(token=_hf_token)
68
-
69
 
70
  def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str:
71
- """
72
- Call the LLM with the given prompt. Use this function in your agent.
73
-
74
- Args:
75
- prompt: The user prompt (current game state, history, etc.)
76
- system_prompt: The system prompt (instructions for the agent)
77
- seed: Random seed for reproducibility
78
- max_tokens: Maximum tokens in response (default: 300)
79
-
80
- Returns:
81
- The LLM's response text
82
-
83
- Example:
84
- response = call_llm(
85
- prompt="You are in a forest. What do you do?",
86
- system_prompt=SYSTEM_PROMPT,
87
- seed=42,
88
- )
89
- """
90
- messages = [
91
- {"role": "system", "content": system_prompt},
92
- {"role": "user", "content": prompt},
93
- ]
94
-
95
- if USE_LOCAL_MODEL and _local_pipeline is not None:
96
- outputs = _local_pipeline(
97
- messages,
98
- max_new_tokens=max_tokens,
99
- temperature=0.0001, # Near-deterministic (0.0 unsupported by some backends)
100
- do_sample=True,
101
- )
102
- return outputs[0]["generated_text"][-1]["content"]
103
-
104
  response = LLM_CLIENT.chat.completions.create(
105
  model=LLM_MODEL,
106
- messages=messages,
107
- temperature=0.0, # Deterministic for reproducibility
 
 
 
108
  max_tokens=max_tokens,
109
  seed=seed,
110
  )
111
-
112
  return response.choices[0].message.content
113
 
114
-
115
  @dataclass
116
  class RunResult:
117
- """Result of running the agent. Do not modify this class."""
118
  final_score: int
119
  max_score: int
120
  moves: int
@@ -123,183 +49,242 @@ class RunResult:
123
  error: Optional[str] = None
124
  history: list[tuple[str, str, str]] = field(default_factory=list)
125
 
126
-
127
  # =============================================================================
128
- # System Prompt - Customize this for your agent
129
  # =============================================================================
130
 
131
- SYSTEM_PROMPT = """You are playing a classic text adventure game.
132
-
133
- GOAL: Explore the world, solve puzzles, and maximize your score.
134
-
135
- AVAILABLE TOOLS (use via MCP):
136
- - play_action: Execute a game command (north, take lamp, open mailbox, etc.)
137
- - memory: Get current game state and history (if implemented)
138
- - inventory: Check what you're carrying (if implemented)
139
-
140
- VALID GAME COMMANDS for play_action:
141
- - Movement: north, south, east, west, up, down, enter, exit
142
- - Objects: take <item>, drop <item>, open <thing>, close <thing>, examine <thing>
143
- - Other: look, inventory, read <thing>, turn on lamp
144
-
145
- RESPOND IN THIS EXACT FORMAT (no markdown):
146
- THOUGHT: <your reasoning about what to do next>
147
- TOOL: <tool_name>
148
- ARGS: <JSON arguments, e.g., {"action": "look"}>
149
-
150
- Example:
151
- THOUGHT: I should look around to see where I am.
152
- TOOL: play_action
153
- ARGS: {"action": "look"}
154
- """
155
-
156
 
157
  # =============================================================================
158
- # Student Agent - IMPLEMENT THIS CLASS
159
  # =============================================================================
160
 
161
  class StudentAgent:
162
- """
163
- Your ReAct agent implementation.
164
-
165
- TODO:
166
- 1. Implement the run() method with the ReAct loop
167
- 2. Parse LLM responses to extract tool calls
168
- 3. Track state and avoid loops
169
-
170
- Use the provided call_llm() function to interact with the LLM.
171
- """
172
-
173
  def __init__(self):
174
- """Initialize your agent here."""
175
- # TODO: Initialize any state tracking you need
176
- # self.history = []
177
- # self.visited_locations = set()
178
- pass
179
-
180
- async def run(
181
- self,
182
- client, # FastMCP Client connected to your MCP server
183
- game: str,
184
- max_steps: int,
185
- seed: int,
186
- verbose: bool = False,
187
- ) -> RunResult:
188
- """
189
- Run the agent for a game session.
190
 
191
- Args:
192
- client: FastMCP Client connected to your MCP server
193
- game: Name of the game being played (e.g., "zork1")
194
- max_steps: Maximum number of steps to take
195
- seed: Random seed for reproducibility (use for LLM calls)
196
- verbose: Whether to print detailed output
197
-
198
- Returns:
199
- RunResult with final score and statistics
200
- """
201
- # TODO: Implement your ReAct loop here
202
- #
203
- # Basic structure:
204
- # 1. Get initial observation (call play_action with "look")
205
- # 2. Loop for max_steps:
206
- # a. Build prompt with current observation and history
207
- # b. Call LLM to get thought and action
208
- # c. Parse the response to extract tool and args
209
- # d. Call the tool via client.call_tool(tool_name, args)
210
- # e. Update history and state
211
- # f. Check for game over
212
- # 3. Return RunResult with final statistics
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
 
214
- # Example of calling a tool:
215
- # result = await client.call_tool("play_action", {"action": "look"})
216
- # observation = result[0].text if result else "No response"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
217
 
218
- # Example of calling the LLM:
219
- # response = call_llm(
220
- # prompt="Current observation: " + observation,
221
- # system_prompt=SYSTEM_PROMPT,
222
- # seed=seed,
223
- # )
224
 
225
- # Placeholder implementation - replace with your code
226
- locations_visited = set()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  history = []
228
- final_score = 0
229
- moves = 0
230
-
231
- # TODO: Your implementation here
232
- # ...
233
-
234
- return RunResult(
235
- final_score=final_score,
236
- max_score=350, # Zork1 max score, adjust if needed
237
- moves=moves,
238
- locations_visited=locations_visited,
239
- game_completed=False,
240
- history=history,
241
- )
242
-
243
- def _build_prompt(self, observation: str, history: list) -> str:
244
- """
245
- Build the prompt for the LLM.
246
-
247
- TODO: Implement this to create effective prompts
248
- """
249
- # TODO: Combine system prompt, history, and current observation
250
- pass
251
-
252
- def _parse_response(self, response: str) -> tuple[str, str, dict]:
253
- """
254
- Parse LLM response to extract thought, tool name, and arguments.
255
-
256
- TODO: Implement robust parsing
257
-
258
- Returns:
259
- Tuple of (thought, tool_name, args_dict)
260
- """
261
- # TODO: Parse the response format:
262
- # THOUGHT: ...
263
- # TOOL: ...
264
- # ARGS: {...}
265
- pass
266
-
267
- def _call_llm(self, prompt: str, system_prompt: str, seed: int) -> str:
268
- """
269
- Call the LLM with the given prompt.
270
-
271
- This is a convenience wrapper - you can also use call_llm() directly.
272
- """
273
- return call_llm(prompt, system_prompt, seed)
274
 
 
 
 
275
 
276
- # =============================================================================
277
- # For local testing
278
- # =============================================================================
279
 
280
- async def test_agent():
281
- """Test the agent locally."""
282
- from fastmcp import Client
283
-
284
- # Path to your MCP server
285
- server_path = "mcp_server.py"
286
-
287
- agent = StudentAgent()
288
-
289
- async with Client(server_path) as client:
290
- result = await agent.run(
291
- client=client,
292
- game="zork1",
293
- max_steps=10,
294
- seed=42,
295
- verbose=True,
296
- )
297
-
298
- print(f"\nFinal Score: {result.final_score}")
299
- print(f"Moves: {result.moves}")
300
- print(f"Locations: {result.locations_visited}")
301
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
302
 
 
303
  if __name__ == "__main__":
304
  import asyncio
305
- asyncio.run(test_agent())
 
 
 
 
 
 
1
  """
2
  Student Agent for Text Adventure Games
3
+ Features: Continuous Intelligence, Global Mapping, and Robust Parsing.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  """
5
 
6
+ import ast
7
  import json
8
  import os
9
  import re
 
13
  from dotenv import load_dotenv
14
  from huggingface_hub import InferenceClient
15
 
 
16
  load_dotenv()
17
 
 
 
 
 
18
  # =============================================================================
19
+ # LLM Setup
20
  # =============================================================================
21
 
 
22
  LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"
23
+ _hf_token = os.getenv("HF_TOKEN")
24
+ if not _hf_token:
25
+ raise ValueError("HF_TOKEN not found. Set it in your .env file.")
26
 
27
+ LLM_CLIENT = InferenceClient(token=_hf_token)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  response = LLM_CLIENT.chat.completions.create(
31
  model=LLM_MODEL,
32
+ messages=[
33
+ {"role": "system", "content": system_prompt},
34
+ {"role": "user", "content": prompt},
35
+ ],
36
+ temperature=0.0,
37
  max_tokens=max_tokens,
38
  seed=seed,
39
  )
 
40
  return response.choices[0].message.content
41
 
 
42
  @dataclass
43
  class RunResult:
 
44
  final_score: int
45
  max_score: int
46
  moves: int
 
49
  error: Optional[str] = None
50
  history: list[tuple[str, str, str]] = field(default_factory=list)
51
 
 
52
  # =============================================================================
53
+ # Direction Normalization (Fuzzy Support)
54
  # =============================================================================
55
 
56
+ _DIR_MAP = {
57
+ 'n': 'n', 'north': 'n',
58
+ 's': 's', 'south': 's',
59
+ 'e': 'e', 'east': 'e',
60
+ 'w': 'w', 'west': 'w',
61
+ 'u': 'u', 'up': 'u',
62
+ 'd': 'd', 'down': 'd',
63
+ 'ne': 'ne', 'northeast': 'ne', 'north-east': 'ne',
64
+ 'nw': 'nw', 'northwest': 'nw', 'north-west': 'nw',
65
+ 'se': 'se', 'southeast': 'se', 'south-east': 'se',
66
+ 'sw': 'sw', 'southwest': 'sw', 'south-west': 'sw',
67
+ 'in': 'in', 'out': 'out', 'enter': 'enter', 'exit': 'exit'
68
+ }
69
+
70
+ def _normalize_direction(action_str: str) -> str:
71
+ parts = action_str.lower().replace('-', ' ').strip().split()
72
+ if not parts: return ""
73
+ # Handle "go north", "walk ne", etc.
74
+ candidate = parts[1] if len(parts) >= 2 and parts[0] in ("go", "walk", "move") else parts[0]
75
+ return _DIR_MAP.get(candidate, "")
 
 
 
 
 
76
 
77
  # =============================================================================
78
+ # StudentAgent Class
79
  # =============================================================================
80
 
81
  class StudentAgent:
 
 
 
 
 
 
 
 
 
 
 
82
  def __init__(self):
83
+ # Global map: { "Room": { "exits": { "ne": {"dest": "A Hole", "status": "Visited/Hypothesized"} } } }
84
+ self.global_map = {}
85
+ # Local memory: { "Room": { "desc": "", "leads": [], "log": [], "steps": 0 } }
86
+ self.rooms = {}
 
 
 
 
 
 
 
 
 
 
 
 
87
 
88
+ self.current_room = None
89
+ self.inventory = []
90
+ self.score = 0
91
+ self.moves = 0
92
+
93
+ # -------------------------------------------------------------------------
94
+ # 1. Continuous Intelligence (LLM Helpers)
95
+ # -------------------------------------------------------------------------
96
+
97
+ def _summarise(self, action: str, obs: str, seed: int) -> str:
98
+ prompt = f"Action: {action}\nResult: {obs}\nSummarize outcome in 8 words."
99
+ return call_llm(prompt, "Summarizer.", seed).strip()
100
+
101
+ def _extract_best_leads(self, desc: str, actions: list, seed: int) -> list[str]:
102
+ # Filter movement and meta out of leads to keep LLM focused on objects
103
+ filtered = [a for a in actions if _normalize_direction(a) == "" and len(a.split()) < 4]
104
+ prompt = f"DESC: {desc[:400]}\nVALID: {filtered[:30]}\nPick 5 best non-movement leads. Return Python list of strings."
105
+ res = call_llm(prompt, "Strategist.", seed)
106
+ try:
107
+ return ast.literal_eval(res[res.find('['):res.rfind(']')+1])
108
+ except: return filtered[:5]
109
+
110
+ def _hypothesize_exits(self, desc: str, seed: int) -> list[dict]:
111
+ """LLM cartographer: called EVERY turn to ensure no exits are missed."""
112
+ prompt = f"OBSERVATION: {desc[:600]}\nList all exits mentioned in text (n, s, e, w, ne, nw, se, sw, up, down, in, out). Format: [{{'dir': 'ne', 'leads_to': 'a hole'}}, ...] Do not hallucinate exits, only extract from text."
113
+ res = call_llm(prompt, "Cartographer.", seed)
114
+ try:
115
+ return ast.literal_eval(res[res.find('['):res.rfind(']')+1])
116
+ except: return []
117
+
118
+ # -------------------------------------------------------------------------
119
+ # 2. State & Dynamic Map Reconciliation
120
+ # -------------------------------------------------------------------------
121
+
122
+ async def _update_intelligence(self, client, last_obs=None, last_act=None, seed=42):
123
+ """Refreshes game state and map hypotheses based on the LATEST observation."""
124
+ state_res = await client.call_tool("game_state", {})
125
+ data = json.loads(self._text(state_res))
126
 
127
+ new_room = data.get("location", "Unknown")
128
+ self.inventory, self.score, self.moves = data.get("inventory", []), data.get("score", 0), data.get("moves", 0)
129
+
130
+ # 1. If we moved, confirm the path in the global map
131
+ if last_act and self.current_room and self.current_room != new_room:
132
+ norm_dir = _normalize_direction(last_act)
133
+ if norm_dir:
134
+ if self.current_room not in self.global_map: self.global_map[self.current_room] = {"exits": {}}
135
+ self.global_map[self.current_room]["exits"][norm_dir] = {"dest": new_room, "status": "Visited"}
136
+ print(f"[Map] Confirmed: {self.current_room} -> {new_room}")
137
+
138
+ self.current_room = new_room
139
+
140
+ # 2. Discovery: Initialize new room memory if needed
141
+ if new_room not in self.rooms:
142
+ print(f"[Discovery] Arriving at: {new_room}")
143
+ # If we don't have a fresh observation from a play_action, get one via 'look'
144
+ obs = last_obs if last_obs else self._text(await client.call_tool("play_action", {"action": "look"}))
145
+ valid = json.loads(self._text(await client.call_tool("get_valid_actions", {}))).get("valid_actions", [])
146
+
147
+ self.rooms[new_room] = {
148
+ "desc": obs,
149
+ "leads": self._extract_best_leads(obs, valid, seed),
150
+ "log": [],
151
+ "steps": 0
152
+ }
153
 
154
+ # 3. CONTINUOUS MAPPING: Scan the latest observation for exits (every turn!)
155
+ current_obs = last_obs if last_obs else self.rooms[new_room]["desc"]
156
+ if new_room not in self.global_map: self.global_map[new_room] = {"exits": {}}
 
 
 
157
 
158
+ for h in self._hypothesize_exits(current_obs, seed):
159
+ d = _normalize_direction(h['dir'])
160
+ # Only add if it's new OR if the current status is just a guess
161
+ if d and (d not in self.global_map[new_room]["exits"] or self.global_map[new_room]["exits"][d]["status"] == "Hypothesized"):
162
+ self.global_map[new_room]["exits"][d] = {"dest": h['leads_to'], "status": "Hypothesized"}
163
+
164
+ def _get_map_str(self) -> str:
165
+ if not self.global_map: return "Empty"
166
+ lines = []
167
+ for room, info in self.global_map.items():
168
+ ex = info.get("exits", {})
169
+ vis = [f"{d}->{v['dest']}" for d, v in ex.items() if v["status"] == "Visited"]
170
+ hyp = [f"{d}({v['dest']}?)" for d, v in ex.items() if v["status"] == "Hypothesized"]
171
+ if vis or hyp:
172
+ lines.append(f" {room}: {' '.join(vis + hyp)}")
173
+ return "\n".join(lines)
174
+
175
+ # -------------------------------------------------------------------------
176
+ # 3. Main Loop
177
+ # -------------------------------------------------------------------------
178
+
179
+ async def run(self, client, game, max_steps, seed, verbose=False) -> RunResult:
180
  history = []
181
+ await self._update_intelligence(client, seed=seed)
182
+
183
+ system_prompt = """You are an expert Text Adventure player. Maximize score by exploring, solving puzzles, and collecting treasures/items.
184
+ AVAILABLE TOOLS:
185
+ - play_action: {"action": "command"} (Use this for ALL game commands like 'ne', 'take pants', 'light torch')
186
+ - get_valid_actions: {} (Use this if you are stuck)
187
+
188
+ RULES:
189
+ 1. Prioritize 'Hypothesized' exits marked with (?) on the map to find new rooms.
190
+ 2. If the map shows a path leads to a room you want to go to, use that direction.
191
+ 3. Finish 'Promising Leads' (taking items, etc) before leaving.
192
+ 4. Use THOUGHT, TOOL, ARGS format exactly."""
193
+
194
+ for step in range(max_steps):
195
+ room_data = self.rooms[self.current_room]
196
+ room_data["steps"] += 1
197
+
198
+ print(f"\n[DEBUG] STEP {step+1} | MOVES:{self.moves} | LOC:{self.current_room}")
199
+
200
+ bias = ""
201
+ if room_data["steps"] > 6:
202
+ bias = "\n[SYSTEM ADVICE]: You have lingered here. Pick an unexplored (?) path and move!"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
203
 
204
+ prompt = f"""
205
+ === STATUS ===
206
+ Moves: {self.moves} | Score: {self.score} | Inventory: {self.inventory}
207
 
 
 
 
208
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
 
210
+ === CURRENT LOCATION: {self.current_room} ===
211
+ {room_data['desc']}
212
+
213
+ === GLOBAL MAP ===
214
+ {self._get_map_str()}
215
+
216
+ === ACTION LOG (In this room) ===
217
+ {chr(10).join(f"- {l}" for l in room_data['log'][-5:])}
218
+
219
+ === STRATEGY Leads ===
220
+ {room_data['leads']}
221
+ {bias}
222
+
223
+ What is your next move?"""
224
+
225
+ llm_res = call_llm(prompt, system_prompt, seed)
226
+ if verbose: print(f"--- Thought ---\n{llm_res}")
227
+
228
+ thought, tool, args = self._parse(llm_res)
229
+
230
+ try:
231
+ res_obj = await client.call_tool(tool, args)
232
+ obs = self._text(res_obj)
233
+ act_str = args.get('action', "look")
234
+
235
+ if tool == "play_action":
236
+ summary = self._summarise(act_str, obs, seed)
237
+ room_data["log"].append(f"{act_str} -> {summary}")
238
+ # If action was successful, update the room's stored description
239
+ if "not" not in summary.lower() and "fail" not in summary.lower():
240
+ room_data["desc"] = obs
241
+
242
+ # Update intelligence with the NEW observation
243
+ await self._update_intelligence(client, last_obs=obs, last_act=act_str, seed=seed)
244
+ history.append((act_str, obs))
245
+
246
+ except Exception as e:
247
+ print(f"Loop Error: {e}")
248
+
249
+ return RunResult(self.score, 350, self.moves, set(self.rooms.keys()), False, history=history)
250
+
251
+ # -------------------------------------------------------------------------
252
+ # 4. Utilities & Robust Parser
253
+ # -------------------------------------------------------------------------
254
+
255
+ def _text(self, res) -> str:
256
+ if hasattr(res, 'content') and isinstance(res.content, list) and res.content:
257
+ return res.content[0].text
258
+ return str(res.data) if hasattr(res, 'data') else str(res)
259
+
260
+ def _parse(self, res: str) -> tuple[str, str, dict]:
261
+ thought = tool = ""
262
+ args = {}
263
+ t_m = re.search(r"THOUGHT:\s*(.*?)(?=TOOL:|$)", res, re.DOTALL)
264
+ if t_m: thought = t_m.group(1).strip()
265
+ m_m = re.search(r"TOOL:\s*(.*?)(?=ARGS:|$)", res, re.DOTALL)
266
+ if m_m: tool = m_m.group(1).strip()
267
+ a_m = re.search(r"ARGS:\s*(\{.*?\})", res, re.DOTALL)
268
+ if a_m:
269
+ try: args = json.loads(a_m.group(1).strip())
270
+ except: pass
271
+
272
+ # Correction logic
273
+ if tool.lower() in ["go", "move", "take", "walk"]:
274
+ action_val = str(args) if not isinstance(args, dict) else args.get('action', "look")
275
+ tool, args = "play_action", {"action": action_val}
276
+
277
+ if not tool or tool not in ["play_action", "get_valid_actions", "inspect_surroundings"]:
278
+ tool, args = "play_action", {"action": "look"}
279
+
280
+ return thought, tool, args
281
 
282
+ # (Optional Local Test Block)
283
  if __name__ == "__main__":
284
  import asyncio
285
+ async def test_agent():
286
+ from fastmcp import Client
287
+ agent = StudentAgent()
288
+ async with Client("mcp_server.py") as client:
289
+ await agent.run(client, "zork1", 20, 42, verbose=True)
290
+ asyncio.run(test_agent())
mcp_server.py CHANGED
@@ -32,7 +32,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
32
 
33
  from fastmcp import FastMCP
34
  from games.zork_env import TextAdventureEnv
35
-
36
 
37
  # =============================================================================
38
  # Create the MCP Server
@@ -46,52 +46,99 @@ mcp = FastMCP("Student Text Adventure Server")
46
  # =============================================================================
47
 
48
  class GameManager:
49
- """
50
- Manages the text adventure game state.
51
-
52
- TODO: Extend this class to track:
53
- - Action history (for memory tool)
54
- - Explored locations (for mapping)
55
- - Current score and moves
56
- """
57
 
58
  def __init__(self):
59
  self.env: TextAdventureEnv = None
60
  self.state = None
61
  self.game_name: str = ""
62
- # TODO: Add more state tracking
63
- # self.history: list[tuple[str, str]] = []
64
- # self.explored_locations: dict[str, set[str]] = {}
65
- # self.current_location: str = ""
66
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  def initialize(self, game: str = "zork1"):
68
- """Initialize or reset the game."""
69
  self.game_name = game
70
  self.env = TextAdventureEnv(game)
71
  self.state = self.env.reset()
72
- # TODO: Reset your state tracking here
 
 
 
 
 
73
  return self.state.observation
74
 
75
  def step(self, action: str) -> str:
76
- """Execute an action and return the result."""
77
  if self.env is None:
78
  self.initialize()
 
 
 
79
 
80
  self.state = self.env.step(action)
81
 
82
- # TODO: Update your state tracking here
83
- # self.history.append((action, self.state.observation))
84
- # Update location tracking, etc.
 
 
85
 
86
  return self.state.observation
87
 
88
- def get_score(self) -> int:
89
- """Get current score."""
90
- return self.state.score if self.state else 0
91
-
92
- def get_moves(self) -> int:
93
- """Get number of moves taken."""
94
- return self.state.moves if self.state else 0
95
 
96
 
97
  # Global game manager
@@ -114,90 +161,188 @@ def get_game() -> GameManager:
114
 
115
  @mcp.tool()
116
  def play_action(action: str) -> str:
117
- """
118
- Execute a game command and return the result.
119
 
120
- This is the main tool for interacting with the game.
 
121
 
122
- Args:
123
- action: The command to execute (e.g., "north", "take lamp", "open mailbox")
 
 
 
 
 
 
 
 
124
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
125
  Returns:
126
- The game's response to the action
127
-
128
- Valid commands include:
129
- - Movement: north, south, east, west, up, down, enter, exit
130
- - Objects: take <item>, drop <item>, open <thing>, examine <thing>
131
- - Other: look, inventory, read <thing>, turn on lamp
132
  """
133
  game = get_game()
 
134
 
135
- # TODO: You might want to add action validation here
136
- # TODO: You might want to include score changes in the response
 
137
 
138
- result = game.step(action)
 
 
 
 
 
 
 
 
 
 
139
 
140
- # Optional: Append score info
141
- # result += f"\n[Score: {game.get_score()} | Moves: {game.get_moves()}]"
142
 
143
- return result
 
 
 
 
 
144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
 
146
- # TODO: Implement additional tools to help your agent
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
- # @mcp.tool()
149
- # def memory() -> str:
150
- # """
151
- # Get the current game state summary.
152
- #
153
- # Returns:
154
- # A summary including current location, score, moves, and recent history
155
- # """
156
- # game = get_game()
157
- # # TODO: Return useful state information
158
- # pass
159
-
160
-
161
- # @mcp.tool()
162
- # def inventory() -> str:
163
- # """
164
- # Check what the player is carrying.
165
- #
166
- # Returns:
167
- # List of items in the player's inventory
168
- # """
169
- # game = get_game()
170
- # result = game.step("inventory")
171
- # return result
172
-
173
-
174
- # @mcp.tool()
175
- # def get_map() -> str:
176
- # """
177
- # Get a map of explored locations.
178
- #
179
- # Returns:
180
- # A text representation of explored locations and connections
181
- # """
182
- # game = get_game()
183
- # # TODO: Return map of explored locations
184
- # pass
185
-
186
-
187
- # @mcp.tool()
188
- # def get_valid_actions() -> str:
189
- # """
190
- # Get a list of likely valid actions from the current location.
191
- #
192
- # Returns:
193
- # List of actions that might work here
194
- # """
195
- # # This is a hint: Jericho provides get_valid_actions()
196
- # game = get_game()
197
- # if game.env and game.env.env:
198
- # valid = game.env.env.get_valid_actions()
199
- # return "Valid actions: " + ", ".join(valid[:20])
200
- # return "Could not determine valid actions"
201
 
202
 
203
  # =============================================================================
@@ -206,4 +351,4 @@ def play_action(action: str) -> str:
206
 
207
  if __name__ == "__main__":
208
  # This runs the server with stdio transport (for MCP clients)
209
- mcp.run()
 
32
 
33
  from fastmcp import FastMCP
34
  from games.zork_env import TextAdventureEnv
35
+ import json
36
 
37
  # =============================================================================
38
  # Create the MCP Server
 
46
  # =============================================================================
47
 
48
  class GameManager:
49
+ """Manages the text adventure game state and builds a dynamic map."""
 
 
 
 
 
 
 
50
 
51
  def __init__(self):
52
  self.env: TextAdventureEnv = None
53
  self.state = None
54
  self.game_name: str = ""
55
+
56
+ # --- Map Tracking Variables ---
57
+ self.visited = set()
58
+ self.connections = {} # Format: { "Room A": { "north": "Room B" } }
59
+ self.unexplored = {} # Format: { "Room A": set(["east", "west"]) }
60
+
61
+ # Standard Z-machine directions
62
+ self.directions = {
63
+ 'north', 'south', 'east', 'west',
64
+ 'ne', 'nw', 'se', 'sw',
65
+ 'up', 'down', 'in', 'out', 'enter', 'exit',
66
+ 'n', 's', 'e', 'w', 'u', 'd'
67
+ }
68
+
69
+ # Dictionary to normalize shortcuts (n -> north)
70
+ self.dir_map = {
71
+ 'n': 'north', 's': 'south', 'e': 'east', 'w': 'west',
72
+ 'u': 'up', 'd': 'down'
73
+ }
74
+
75
+
76
+ def _normalize_dir(self, direction: str) -> str:
77
+ d = direction.lower().strip()
78
+ return self.dir_map.get(d, d)
79
+
80
+ def _update_map(self, previous_loc: str, action: str, current_loc: str):
81
+ """Builds the graph as the player moves around."""
82
+ if not current_loc:
83
+ # Still record the edge from previous room as a dead-end/blocked path
84
+ if previous_loc and action:
85
+ norm_action = self._normalize_dir(action)
86
+ if norm_action in self.directions and previous_loc in self.unexplored:
87
+ self.unexplored[previous_loc].discard(norm_action)
88
+ return
89
+
90
+ # 1. Initialize the new room if we haven't seen it
91
+ if current_loc not in self.connections:
92
+ self.connections[current_loc] = {}
93
+ self.visited.add(current_loc)
94
+
95
+ # Find which directions are possible from this new room
96
+ if self.env and self.env.env:
97
+ valid_actions = self.env.env.get_valid_actions(use_parallel=False)
98
+ # Filter out standard directions
99
+ valid_dirs = set(self._normalize_dir(a) for a in valid_actions if a.lower() in self.directions)
100
+ self.unexplored[current_loc] = valid_dirs
101
+
102
+ # 2. Record the traversal edge if we just moved
103
+ norm_action = self._normalize_dir(action)
104
+ if previous_loc and previous_loc != current_loc and norm_action in self.directions:
105
+ self.connections[previous_loc][norm_action] = current_loc
106
+
107
+ # Remove this direction from unexplored for the previous room
108
+ if previous_loc in self.unexplored:
109
+ self.unexplored[previous_loc].discard(norm_action)
110
+
111
  def initialize(self, game: str = "zork1"):
 
112
  self.game_name = game
113
  self.env = TextAdventureEnv(game)
114
  self.state = self.env.reset()
115
+
116
+ # Map the starting room
117
+ start_loc = self.env.env.get_player_location()
118
+ if start_loc:
119
+ self._update_map(None, "", start_loc.name)
120
+
121
  return self.state.observation
122
 
123
  def step(self, action: str) -> str:
 
124
  if self.env is None:
125
  self.initialize()
126
+
127
+ loc_before_obj = self.env.env.get_player_location()
128
+ loc_before = loc_before_obj.name if loc_before_obj else None
129
 
130
  self.state = self.env.step(action)
131
 
132
+ loc_after_obj = self.env.env.get_player_location()
133
+ loc_after = loc_after_obj.name if loc_after_obj else None
134
+
135
+ # Update our Map Graph!
136
+ self._update_map(loc_before, action, loc_after)
137
 
138
  return self.state.observation
139
 
140
+ def get_score(self) -> int: return self.state.score if self.state else 0
141
+ def get_moves(self) -> int: return self.state.moves if self.state else 0
 
 
 
 
 
142
 
143
 
144
  # Global game manager
 
161
 
162
  @mcp.tool()
163
  def play_action(action: str) -> str:
164
+ """Execute a game command and return the result."""
165
+ game = get_game()
166
 
167
+ # Get location BEFORE action
168
+ loc_before = game.env.env.get_player_location().name if game.env.env.get_player_location() else ""
169
 
170
+ # Execute action
171
+ result = game.step(action)
172
+
173
+ # Get location AFTER action
174
+ loc_after = game.env.env.get_player_location().name if game.env.env.get_player_location() else ""
175
+
176
+ # If the player moved, append a fresh "look" observation automatically
177
+ # if loc_before != loc_after and action.lower() not in ['look', 'l']:
178
+ # look_result = game.step("look")
179
+ # result += f"\n\n[You moved to a new area]\n{look_result}"
180
 
181
+ return result
182
+
183
+ # TODO: Implement additional tools to help your agent
184
+ @mcp.tool()
185
+ def game_state() -> str:
186
+ """
187
+ Returns the current state of the game: Score, Moves, Location, and Inventory.
188
+ """
189
+ game = get_game()
190
+ if not game.env.env:
191
+ return "Game not initialized."
192
+
193
+ inventory = game.env.env.get_inventory()
194
+ inv_names = [obj.name for obj in inventory] if inventory else ["Empty"]
195
+
196
+ location = game.env.env.get_player_location()
197
+ loc_name = location.name if location else "Unknown"
198
+
199
+ state = {
200
+ "location": loc_name,
201
+ "score": game.get_score(),
202
+ "moves": game.get_moves(),
203
+ "inventory": inv_names
204
+ }
205
+ return json.dumps(state)
206
+
207
+
208
+ @mcp.tool()
209
+ def inventory() -> str:
210
+ """
211
+ Check what the player is carrying.
212
+
213
  Returns:
214
+ List of items in the player's inventory
 
 
 
 
 
215
  """
216
  game = get_game()
217
+ if not game.env.env: return "Game not initialized."
218
 
219
+ inventory_objects = game.env.env.get_inventory()
220
+ if not inventory_objects:
221
+ return "Your inventory is empty."
222
 
223
+ items = [obj.name for obj in inventory_objects]
224
+ return f"Inventory: {', '.join(items)}"
225
+
226
+ @mcp.tool()
227
+ def memory() -> str:
228
+ """
229
+ Get the current game state summary: Location, Score, and Moves.
230
+ Use this to orient yourself.
231
+ """
232
+ game = get_game()
233
+ if not game.env.env: return "Game not initialized."
234
 
235
+ location = game.env.env.get_player_location()
236
+ loc_name = location.name if location else "Unknown Location"
237
 
238
+ return json.dumps({
239
+ "location": loc_name,
240
+ "score": game.get_score(),
241
+ "moves": game.get_moves(),
242
+ "max_score": game.env.env.get_max_score()
243
+ })
244
 
245
+ @mcp.tool()
246
+ def get_map() -> str:
247
+ """
248
+ Get a map of explored locations AND the paths to reach unexplored exits.
249
+ Use this to figure out where to go next to discover new areas.
250
+ """
251
+ game = get_game()
252
+ if not game.env or not game.env.env:
253
+ return "Game not initialized."
254
+
255
+ current_loc_obj = game.env.env.get_player_location()
256
+ if not current_loc_obj:
257
+ return "Cannot determine your current location."
258
+
259
+ current_loc = current_loc_obj.name
260
+
261
+ # 1. List Visited Locations
262
+ visited_str = ", ".join(sorted(list(game.visited)))
263
+
264
+ # 2. Find paths to Unexplored Exits using BFS (Breadth-First Search)
265
+ queue = [(current_loc, [])] # Queue of (Room Name, [Path of Actions])
266
+ visited_bfs = set([current_loc])
267
+
268
+ paths_to_unexplored = []
269
+
270
+ # Rooms that have at least one unexplored exit
271
+ rooms_with_unexplored = {r: exits for r, exits in game.unexplored.items() if len(exits) > 0}
272
+
273
+ while queue:
274
+ curr, path = queue.pop(0)
275
+
276
+ # If this room has unexplored exits, log the path to get here!
277
+ if curr in rooms_with_unexplored:
278
+ unexp_str = ", ".join(rooms_with_unexplored[curr])
279
+ if not path:
280
+ paths_to_unexplored.append(f"Right here in '{curr}', you haven't tried: {unexp_str}")
281
+ else:
282
+ path_str = " -> ".join(path)
283
+ paths_to_unexplored.append(f"To explore '{curr}' ({unexp_str}), walk: {path_str}")
284
+
285
+ # Traverse known connections
286
+ for direction, neighbor in game.connections.get(curr, {}).items():
287
+ if neighbor not in visited_bfs:
288
+ visited_bfs.add(neighbor)
289
+ queue.append((neighbor, path + [direction]))
290
+
291
+ # 3. Format the final beautiful output
292
+ output = f"CURRENT LOCATION: {current_loc}\n\n"
293
+ output += f"VISITED LOCATIONS ({len(game.visited)} total):\n{visited_str}\n\n"
294
+ output += f"UNEXPLORED PATHS & HOW TO GET THERE:\n"
295
+
296
+ if paths_to_unexplored:
297
+ output += "\n".join(f"- {p}" for p in paths_to_unexplored)
298
+ else:
299
+ output += "- No known unexplored paths! You have fully explored everything."
300
+
301
+ return output
302
 
303
+ @mcp.tool()
304
+ def get_valid_actions() -> str:
305
+ """
306
+ Get a list of guaranteed valid actions for the current game state.
307
+ Use this when you are stuck or don't know what verbs the game understands.
308
+ """
309
+ game = get_game()
310
+ if not game.env.env:
311
+ return "Game not initialized."
312
+
313
+ # Jericho extracts valid actions based on the object tree
314
+ valid_actions = game.env.env.get_valid_actions(use_object_tree=True, use_parallel=False)
315
+
316
+ # Filter out boring/meta actions if necessary, or just return them
317
+ return json.dumps({
318
+ "valid_actions": valid_actions[:30] # Limit to top 30 to save context window
319
+ })
320
 
321
+ @mcp.tool()
322
+ def inspect_surroundings() -> str:
323
+ """
324
+ Scans the room and returns a list of interactive objects physically present.
325
+ """
326
+ game = get_game()
327
+ if not game.env.env: return "Game not initialized."
328
+
329
+ player_loc = game.env.env.get_player_location()
330
+ if not player_loc: return "Cannot determine location."
331
+
332
+ # Traverse the object tree: get the first child of the room, then iterate siblings
333
+ objects_in_room = []
334
+ child_num = player_loc.child
335
+
336
+ while child_num != 0:
337
+ obj = game.env.env.get_object(child_num)
338
+ if obj and obj.num != game.env.env.get_player_object().num: # Don't list the player
339
+ objects_in_room.append(obj.name)
340
+ child_num = obj.sibling if obj else 0
341
+
342
+ if not objects_in_room:
343
+ return "No notable interactive objects found here."
344
+
345
+ return f"Interactive objects in this room: {', '.join(objects_in_room)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
346
 
347
 
348
  # =============================================================================
 
351
 
352
  if __name__ == "__main__":
353
  # This runs the server with stdio transport (for MCP clients)
354
+ mcp.run()