mpboyer commited on
Commit
bcfda95
·
1 Parent(s): b9b81fb

local: ptet ça marche en fait

Browse files
Files changed (2) hide show
  1. shell.nix +1 -0
  2. submission/agent.py +264 -103
shell.nix CHANGED
@@ -14,6 +14,7 @@ mkShell {
14
  huggingface-hub
15
  langchain
16
  numpy
 
17
  python-dotenv
18
  requests
19
  spacy
 
14
  huggingface-hub
15
  langchain
16
  numpy
17
+ pathlib2
18
  python-dotenv
19
  requests
20
  spacy
submission/agent.py CHANGED
@@ -120,176 +120,337 @@ class RunResult:
120
 
121
 
122
  # =============================================================================
123
- # System Prompt - Customize this for your agent
124
  # =============================================================================
125
 
126
- SYSTEM_PROMPT = """You are playing a classic text adventure game.
127
- GOAL: Explore the world, solve puzzles, and maximize your score.
128
- AVAILABLE TOOLS (use via MCP):
129
- - play_action: Execute a game command (north, take lamp, open mailbox, etc.)
130
- - memory: Get current game state and history (if implemented)
131
- - inventory: Check what you're carrying (if implemented)
132
  VALID GAME COMMANDS for play_action:
133
  - Movement: north, south, east, west, up, down, enter, exit
134
  - Objects: take <item>, drop <item>, open <thing>, close <thing>, examine <thing>
135
- - Other: look, inventory, read <thing>, turn on lamp
 
 
 
136
  RESPOND IN THIS EXACT FORMAT (no markdown):
137
- THOUGHT: <your reasoning about what to do next>
138
  TOOL: <tool_name>
139
- ARGS: <JSON arguments, e.g., {"action": "look"}>
140
- Example:
141
- THOUGHT: I should look around to see where I am.
142
  TOOL: play_action
143
  ARGS: {"action": "look"}
144
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
145
 
146
 
147
  # =============================================================================
148
- # Student Agent - IMPLEMENT THIS CLASS
149
  # =============================================================================
150
 
151
  class StudentAgent:
152
  """
153
- Your ReAct agent implementation.
154
-
155
- TODO:
156
- 1. Implement the run() method with the ReAct loop
157
- 2. Parse LLM responses to extract tool calls
158
- 3. Track state and avoid loops
159
 
160
- Use the provided call_llm() function to interact with the LLM.
 
 
 
 
161
  """
162
 
163
  def __init__(self):
164
- """Initialize your agent here."""
165
- # TODO: Initialize any state tracking you need
166
- # self.history = []
167
- # self.visited_locations = set()
168
- pass
169
 
170
  async def run(
171
  self,
172
- client, # FastMCP Client connected to your MCP server
173
  game: str,
174
  max_steps: int,
175
  seed: int,
176
  verbose: bool = False,
177
  ) -> RunResult:
178
- """
179
- Run the agent for a game session.
 
 
180
 
181
- Args:
182
- client: FastMCP Client connected to your MCP server
183
- game: Name of the game being played (e.g., "zork1")
184
- max_steps: Maximum number of steps to take
185
- seed: Random seed for reproducibility (use for LLM calls)
186
- verbose: Whether to print detailed output
187
-
188
- Returns:
189
- RunResult with final score and statistics
190
- """
191
- # TODO: Implement your ReAct loop here
192
- #
193
- # Basic structure:
194
- # 1. Get initial observation (call play_action with "look")
195
- # 2. Loop for max_steps:
196
- # a. Build prompt with current observation and history
197
- # b. Call LLM to get thought and action
198
- # c. Parse the response to extract tool and args
199
- # d. Call the tool via client.call_tool(tool_name, args)
200
- # e. Update history and state
201
- # f. Check for game over
202
- # 3. Return RunResult with final statistics
203
 
204
- # Example of calling a tool:
205
- # result = await client.call_tool("play_action", {"action": "look"})
206
- # observation = result[0].text if result else "No response"
207
 
208
- # Example of calling the LLM:
209
- # response = call_llm(
210
- # prompt="Current observation: " + observation,
211
- # system_prompt=SYSTEM_PROMPT,
212
- # seed=seed,
213
- # )
214
 
215
- # Placeholder implementation - replace with your code
216
- locations_visited = set()
217
- history = []
218
- final_score = 0
219
- moves = 0
220
 
221
- # TODO: Your implementation here
222
- # ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
 
224
  return RunResult(
225
- final_score=final_score,
226
- max_score=350, # Zork1 max score, adjust if needed
227
  moves=moves,
228
  locations_visited=locations_visited,
229
- game_completed=False,
230
  history=history,
231
  )
232
 
233
- def _build_prompt(self, observation: str, history: list) -> str:
234
- """
235
- Build the prompt for the LLM.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
 
237
- TODO: Implement this to create effective prompts
238
- """
239
- # TODO: Combine system prompt, history, and current observation
240
- pass
241
 
242
- def _parse_response(self, response: str) -> tuple[str, str, dict]:
243
- """
244
- Parse LLM response to extract thought, tool name, and arguments.
 
 
 
 
 
 
 
 
 
 
 
245
 
246
- TODO: Implement robust parsing
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
 
248
- Returns:
249
- Tuple of (thought, tool_name, args_dict)
250
- """
251
- # TODO: Parse the response format:
252
- # THOUGHT: ...
253
- # TOOL: ...
254
- # ARGS: {...}
255
- pass
 
256
 
257
- def _call_llm(self, prompt: str, system_prompt: str, seed: int) -> str:
258
- """
259
- Call the LLM with the given prompt.
 
 
 
 
260
 
261
- This is a convenience wrapper - you can also use call_llm() directly.
262
- """
263
- return call_llm(prompt, system_prompt, seed)
 
 
 
 
 
 
 
 
 
 
 
 
264
 
265
 
266
  # =============================================================================
267
- # For local testing
268
  # =============================================================================
269
 
270
  async def test_agent():
271
  """Test the agent locally."""
272
  from fastmcp import Client
273
 
274
- # Path to your MCP server
275
- server_path = "mcp_server.py"
276
-
277
  agent = StudentAgent()
278
 
279
- async with Client(server_path) as client:
280
  result = await agent.run(
281
  client=client,
282
  game="zork1",
283
- max_steps=10,
284
  seed=42,
285
  verbose=True,
286
  )
287
 
288
- print(f"\nFinal Score: {result.final_score}")
 
289
  print(f"Moves: {result.moves}")
290
- print(f"Locations: {result.locations_visited}")
291
 
292
 
293
  if __name__ == "__main__":
294
  import asyncio
295
  asyncio.run(test_agent())
 
 
120
 
121
 
122
  # =============================================================================
123
+ # System Prompt
124
  # =============================================================================
125
 
126
+ SYSTEM_PROMPT = """You are an expert text adventure game player. Your goal is to explore, collect treasures, and maximize your score.
127
+ AVAILABLE TOOLS (use these via MCP):
128
+ 1. play_action - Execute game commands (north, take lamp, open mailbox, etc.)
129
+ 2. memory - Get current game state, score, and recent history
130
+ 3. get_map - See explored locations and connections
131
+ 4. inventory - Check what you're carrying
132
  VALID GAME COMMANDS for play_action:
133
  - Movement: north, south, east, west, up, down, enter, exit
134
  - Objects: take <item>, drop <item>, open <thing>, close <thing>, examine <thing>
135
+ - Light: turn on lamp, turn off lamp
136
+ - Combat: attack <enemy> with <weapon>
137
+ - Other: inventory, look, read <thing>, wait
138
+ FORBIDDEN (will NOT work): check, inspect, search, grab, use, help
139
  RESPOND IN THIS EXACT FORMAT (no markdown):
140
+ THOUGHT: <brief reasoning about what to do next>
141
  TOOL: <tool_name>
142
+ ARGS: <JSON arguments>
143
+ Examples:
144
+ THOUGHT: I need to see what's around me.
145
  TOOL: play_action
146
  ARGS: {"action": "look"}
147
+ THOUGHT: Let me check my current state and score.
148
+ TOOL: memory
149
+ ARGS: {}
150
+ THOUGHT: The mailbox might contain something useful.
151
+ TOOL: play_action
152
+ ARGS: {"action": "open mailbox"}
153
+ STRATEGY:
154
+ 1. Start by looking around and checking memory
155
+ 2. Explore systematically - try all directions
156
+ 3. Pick up useful items (lamp, sword, etc.)
157
+ 4. Open containers (mailbox, window, etc.)
158
+ 5. Use get_map to avoid getting lost
159
+ 6. Turn on lamp before dark areas!
160
+ DO NOT repeat the same action multiple times in a row."""
161
 
162
 
163
  # =============================================================================
164
+ # Student Agent Implementation
165
  # =============================================================================
166
 
167
  class StudentAgent:
168
  """
169
+ MCP ReAct Agent - A complete working example.
 
 
 
 
 
170
 
171
+ This agent demonstrates:
172
+ - ReAct loop (Thought -> Tool -> Observation)
173
+ - Loop detection
174
+ - Action validation
175
+ - Score tracking via memory tool
176
  """
177
 
178
  def __init__(self):
179
+ """Initialize the agent state."""
180
+ self.history: list[dict] = []
181
+ self.recent_actions: list[str] = []
182
+ self.score: int = 0
 
183
 
184
  async def run(
185
  self,
186
+ client,
187
  game: str,
188
  max_steps: int,
189
  seed: int,
190
  verbose: bool = False,
191
  ) -> RunResult:
192
+ """Run the agent for a game session."""
193
+ locations_visited = set()
194
+ history = []
195
+ moves = 0
196
 
197
+ # Get list of available tools
198
+ tools = await client.list_tools()
199
+ tool_names = [t.name for t in tools]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
 
201
+ # Get initial observation
202
+ result = await client.call_tool("play_action", {"action": "look"})
203
+ observation = self._extract_result(result)
204
 
205
+ # Track initial location
206
+ location = observation.split("\n")[0] if observation else "Unknown"
207
+ locations_visited.add(location)
 
 
 
208
 
209
+ if verbose:
210
+ print(f"\n{observation}")
 
 
 
211
 
212
+ # Main ReAct loop
213
+ for step in range(1, max_steps + 1):
214
+ # Build prompt with context
215
+ prompt = self._build_prompt(observation)
216
+
217
+ # Call LLM for reasoning (use step-based seed for variety)
218
+ response = call_llm(prompt, SYSTEM_PROMPT, seed + step)
219
+
220
+ # Parse the response
221
+ thought, tool_name, tool_args = self._parse_response(response, tool_names)
222
+
223
+ if verbose:
224
+ print(f"\n--- Step {step} ---")
225
+ print(f"[THOUGHT] {thought}")
226
+ print(f"[TOOL] {tool_name}({tool_args})")
227
+
228
+ # Validate and fix common issues
229
+ tool_name, tool_args = self._validate_tool_call(tool_name, tool_args, tool_names)
230
+
231
+ # Loop detection
232
+ if tool_name == "play_action":
233
+ action = tool_args.get("action", "look")
234
+ self.recent_actions.append(action)
235
+ if len(self.recent_actions) > 5:
236
+ self.recent_actions = self.recent_actions[-5:]
237
+
238
+ # Detect loops - if same action 3 times, force "look"
239
+ if len(self.recent_actions) >= 3 and len(set(self.recent_actions[-3:])) == 1:
240
+ if verbose:
241
+ print(f"[WARNING] Loop detected - forcing 'look'")
242
+ tool_args = {"action": "look"}
243
+ self.recent_actions.append("look")
244
+
245
+ moves += 1
246
+
247
+ # Execute the tool
248
+ try:
249
+ result = await client.call_tool(tool_name, tool_args)
250
+ observation = self._extract_result(result)
251
+
252
+ if verbose:
253
+ print(f"[RESULT] {observation[:200]}...")
254
+ except Exception as e:
255
+ observation = f"Error: {e}"
256
+ if verbose:
257
+ print(f"[ERROR] {e}")
258
+
259
+ # Track location
260
+ location = observation.split("\n")[0] if observation else "Unknown"
261
+ locations_visited.add(location)
262
+
263
+ # Update history
264
+ self.history.append({
265
+ "step": step,
266
+ "thought": thought,
267
+ "tool": tool_name,
268
+ "args": tool_args,
269
+ "result": observation[:200]
270
+ })
271
+ if len(self.history) > 10:
272
+ self.history = self.history[-10:]
273
+
274
+ # Track score from observation
275
+ self._update_score(observation)
276
+
277
+ # Record in result history
278
+ history.append((thought, f"{tool_name}({tool_args})", observation[:100]))
279
+
280
+ # Check for game over
281
+ if self._is_game_over(observation):
282
+ if verbose:
283
+ print("\n*** GAME OVER ***")
284
+ break
285
 
286
  return RunResult(
287
+ final_score=self.score,
288
+ max_score=350,
289
  moves=moves,
290
  locations_visited=locations_visited,
291
+ game_completed=self._is_game_over(observation),
292
  history=history,
293
  )
294
 
295
+ def _build_prompt(self, observation: str) -> str:
296
+ """Build the prompt for the LLM with context."""
297
+ parts = []
298
+
299
+ parts.append(f"Current Score: {self.score}")
300
+
301
+ # Recent history
302
+ if self.history:
303
+ parts.append("\nRecent actions:")
304
+ for entry in self.history[-3:]:
305
+ action = entry.get("args", {}).get("action", entry["tool"])
306
+ result_short = entry["result"][:80] + "..." if len(entry["result"]) > 80 else entry["result"]
307
+ parts.append(f" > {action} -> {result_short}")
308
+
309
+ # Warn about repeated actions
310
+ if self.recent_actions and len(set(self.recent_actions[-3:])) == 1:
311
+ parts.append(f"\n[WARNING: You've been doing '{self.recent_actions[-1]}' repeatedly. TRY SOMETHING DIFFERENT!]")
312
+
313
+ parts.append(f"\nCurrent situation:\n{observation}")
314
+ parts.append("\nWhat do you do next?")
315
+
316
+ return "\n".join(parts)
317
+
318
+ def _parse_response(self, response: str, valid_tools: list[str]) -> tuple[str, str, dict]:
319
+ """Parse the LLM response to extract thought, tool, and arguments."""
320
+ thought = "No reasoning provided"
321
+ tool_name = "play_action"
322
+ tool_args = {"action": "look"}
323
+
324
+ lines = response.strip().split("\n")
325
+
326
+ for line in lines:
327
+ line_clean = line.strip()
328
+ line_upper = line_clean.upper()
329
+
330
+ if line_upper.startswith("THOUGHT:"):
331
+ thought = line_clean.split(":", 1)[1].strip()
332
+
333
+ elif line_upper.startswith("TOOL:"):
334
+ raw_tool = line_clean.split(":", 1)[1].strip().lower()
335
+ raw_tool = raw_tool.replace("**", "").replace("*", "").replace("`", "")
336
+ raw_tool = raw_tool.split()[0] if raw_tool else "play_action"
337
+ tool_name = raw_tool
338
+
339
+ elif line_upper.startswith("ARGS:"):
340
+ args_part = line_clean.split(":", 1)[1].strip()
341
+ try:
342
+ args_part = args_part.replace("'", '"')
343
+ tool_args = json.loads(args_part)
344
+ except json.JSONDecodeError:
345
+ match = re.search(r'"action"\s*:\s*"([^"]+)"', args_part)
346
+ if match:
347
+ tool_args = {"action": match.group(1)}
348
+ else:
349
+ tool_args = {"action": "look"}
350
 
351
+ return thought, tool_name, tool_args
 
 
 
352
 
353
+ def _validate_tool_call(self, tool_name: str, tool_args: dict, valid_tools: list[str]) -> tuple[str, dict]:
354
+ """Validate and fix common tool call issues."""
355
+ # Fix tool name
356
+ if tool_name not in valid_tools:
357
+ if tool_name in ["action", "do", "command"]:
358
+ tool_name = "play_action"
359
+ elif tool_name in ["map", "location"]:
360
+ tool_name = "get_map"
361
+ elif tool_name in ["mem", "state", "status"]:
362
+ tool_name = "memory"
363
+ elif tool_name in ["inv", "items"]:
364
+ tool_name = "inventory"
365
+ else:
366
+ tool_name = "play_action"
367
 
368
+ # Fix action verbs
369
+ if tool_name == "play_action":
370
+ action = tool_args.get("action", "look")
371
+
372
+ invalid_verb_map = {
373
+ "check": "examine",
374
+ "inspect": "examine",
375
+ "search": "look",
376
+ "grab": "take",
377
+ "pick": "take",
378
+ "use": "examine",
379
+ "investigate": "examine",
380
+ }
381
+
382
+ words = action.lower().split()
383
+ if words and words[0] in invalid_verb_map:
384
+ words[0] = invalid_verb_map[words[0]]
385
+ action = " ".join(words)
386
+
387
+ action = action.lower().strip()
388
+ action = action.replace("**", "").replace("*", "").replace("`", "")
389
+ action = " ".join(action.split())
390
+
391
+ tool_args["action"] = action
392
 
393
+ return tool_name, tool_args
394
+
395
+ def _extract_result(self, result) -> str:
396
+ """Extract text from MCP tool result."""
397
+ if hasattr(result, 'content') and result.content:
398
+ return result.content[0].text
399
+ if isinstance(result, list) and result:
400
+ return result[0].text if hasattr(result[0], 'text') else str(result[0])
401
+ return str(result)
402
 
403
+ def _update_score(self, text: str) -> None:
404
+ """Update score from game text."""
405
+ patterns = [
406
+ r'Score:\s*(\d+)',
407
+ r'score[:\s]+(\d+)',
408
+ r'\[Score:\s*(\d+)',
409
+ ]
410
 
411
+ for pattern in patterns:
412
+ match = re.search(pattern, text, re.IGNORECASE)
413
+ if match:
414
+ self.score = max(self.score, int(match.group(1)))
415
+
416
+ def _is_game_over(self, text: str) -> bool:
417
+ """Check if the game is over."""
418
+ game_over_phrases = [
419
+ "game over",
420
+ "you have died",
421
+ "you are dead",
422
+ "*** you have died ***",
423
+ ]
424
+ text_lower = text.lower()
425
+ return any(phrase in text_lower for phrase in game_over_phrases)
426
 
427
 
428
  # =============================================================================
429
+ # Local Testing
430
  # =============================================================================
431
 
432
  async def test_agent():
433
  """Test the agent locally."""
434
  from fastmcp import Client
435
 
 
 
 
436
  agent = StudentAgent()
437
 
438
+ async with Client("mcp_server.py") as client:
439
  result = await agent.run(
440
  client=client,
441
  game="zork1",
442
+ max_steps=20,
443
  seed=42,
444
  verbose=True,
445
  )
446
 
447
+ print(f"\n{'=' * 50}")
448
+ print(f"Final Score: {result.final_score}")
449
  print(f"Moves: {result.moves}")
450
+ print(f"Locations: {len(result.locations_visited)}")
451
 
452
 
453
  if __name__ == "__main__":
454
  import asyncio
455
  asyncio.run(test_agent())
456
+