LEAHPARAPHAEL commited on
Commit
3302cd7
·
1 Parent(s): 7a36b3c

multiple prompting

Browse files
Files changed (2) hide show
  1. agent.py +120 -1
  2. mcp_server.py +24 -1
agent.py CHANGED
@@ -127,6 +127,32 @@ TOOL: play_action
127
  ARGS: {"action": "look"}
128
  """
129
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
  # =============================================================================
132
  # Student Agent - IMPLEMENT THIS CLASS
@@ -148,7 +174,11 @@ class StudentAgent:
148
  """Initialize your agent here."""
149
  # TODO: Initialize any state tracking you need
150
  # self.history = []
151
- # self.visited_locations = set()
 
 
 
 
152
  pass
153
 
154
  async def run(
@@ -204,6 +234,95 @@ class StudentAgent:
204
 
205
  # TODO: Your implementation here
206
  # ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
 
208
  return RunResult(
209
  final_score=final_score,
 
127
  ARGS: {"action": "look"}
128
  """
129
 
130
+ SUMMARIZING_SYSTEM_PROMPT = """
131
+ You are the Chronicler of a text-adventure game. Your goal is to summarize the
132
+ current game state.
133
+
134
+ YOUR INPUT :
135
+ SUMMARY : <a short summary of the story so far>.
136
+ OBSERVATIONS : <the last 5 actions and resulting observations, with the format [action1 ->
137
+ observation1, action2 -> observation2...]>
138
+
139
+ YOUR TASK :
140
+ Update the summary of the game with the last 5 observations. Focus on the
141
+ important details : unsolved puzzles, immediate goal of the agent, important pieces of the
142
+ inventory, failed attempts... Try to orient the agent towards the next logical step in
143
+ the story.
144
+
145
+ YOUR OUTPUT :
146
+ The updated summary only (no comments). The new summary should be less than 300 tokens !
147
+ """
148
+
149
+
150
+ INTERFACE_SYSTEM_PROMPT = """
151
+ You are helping an AI agent interact with a text-adventure game. You receive the raw
152
+ observation resulting from the agent playing an action, and your task is to extract all
153
+ the useful metadata about this observation.
154
+ """
155
+
156
 
157
  # =============================================================================
158
  # Student Agent - IMPLEMENT THIS CLASS
 
174
  """Initialize your agent here."""
175
  # TODO: Initialize any state tracking you need
176
  # self.history = []
177
+ self.visited_locations = set()
178
+ self.history: list[dict] = []
179
+ self.recent_actions: list[str] = []
180
+ self.score: int = 0
181
+
182
  pass
183
 
184
  async def run(
 
234
 
235
  # TODO: Your implementation here
236
  # ...
237
+
238
+ # Get list of available tools
239
+ tools = await client.list_tools()
240
+ tool_names = [t.name for t in tools]
241
+
242
+ # Get initial observation
243
+ result = await client.call_tool("play_action", {"action": "look"})
244
+ observation = self._extract_result(result)
245
+
246
+ # Track initial location
247
+ location = observation.split("\n")[0] if observation else "Unknown"
248
+ locations_visited.add(location)
249
+
250
+ if verbose:
251
+ print(f"\n{observation}")
252
+
253
+ # Main ReAct loop
254
+ for step in range(1, max_steps + 1):
255
+ # Build prompt with context
256
+ prompt = self._build_prompt(observation)
257
+
258
+ # Call LLM for reasoning (use step-based seed for variety)
259
+ response = call_llm(prompt, SYSTEM_PROMPT, seed + step)
260
+
261
+ # Parse the response
262
+ thought, tool_name, tool_args = self._parse_response(response, tool_names)
263
+
264
+ if verbose:
265
+ print(f"\n--- Step {step} ---")
266
+ print(f"[THOUGHT] {thought}")
267
+ print(f"[TOOL] {tool_name}({tool_args})")
268
+
269
+ # Validate and fix common issues
270
+ tool_name, tool_args = self._validate_tool_call(tool_name, tool_args, tool_names)
271
+
272
+ # Loop detection
273
+ if tool_name == "play_action":
274
+ action = tool_args.get("action", "look")
275
+ self.recent_actions.append(action)
276
+ if len(self.recent_actions) > 5:
277
+ self.recent_actions = self.recent_actions[-5:]
278
+
279
+ # Detect loops - if same action 3 times, force "look"
280
+ if len(self.recent_actions) >= 3 and len(set(self.recent_actions[-3:])) == 1:
281
+ if verbose:
282
+ print(f"[WARNING] Loop detected - forcing 'look'")
283
+ tool_args = {"action": "look"}
284
+ self.recent_actions.append("look")
285
+
286
+ moves += 1
287
+
288
+ # Execute the tool
289
+ try:
290
+ result = await client.call_tool(tool_name, tool_args)
291
+ observation = self._extract_result(result)
292
+
293
+ if verbose:
294
+ print(f"[RESULT] {observation[:200]}...")
295
+ except Exception as e:
296
+ observation = f"Error: {e}"
297
+ if verbose:
298
+ print(f"[ERROR] {e}")
299
+
300
+ # Track location
301
+ location = observation.split("\n")[0] if observation else "Unknown"
302
+ locations_visited.add(location)
303
+
304
+ # Update history
305
+ self.history.append({
306
+ "step": step,
307
+ "thought": thought,
308
+ "tool": tool_name,
309
+ "args": tool_args,
310
+ "result": observation[:200]
311
+ })
312
+ if len(self.history) > 10:
313
+ self.history = self.history[-10:]
314
+
315
+ # Track score from observation
316
+ self._update_score(observation)
317
+
318
+ # Record in result history
319
+ history.append((thought, f"{tool_name}({tool_args})", observation[:100]))
320
+
321
+ # Check for game over
322
+ if self._is_game_over(observation):
323
+ if verbose:
324
+ print("\n*** GAME OVER ***")
325
+ break
326
 
327
  return RunResult(
328
  final_score=final_score,
mcp_server.py CHANGED
@@ -63,6 +63,10 @@ class GameManager:
63
  # self.history: list[tuple[str, str]] = []
64
  # self.explored_locations: dict[str, set[str]] = {}
65
  # self.current_location: str = ""
 
 
 
 
66
 
67
  def initialize(self, game: str = "zork1"):
68
  """Initialize or reset the game."""
@@ -82,8 +86,27 @@ class GameManager:
82
  # TODO: Update your state tracking here
83
  # self.history.append((action, self.state.observation))
84
  # Update location tracking, etc.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85
 
86
- return self.state.observation
 
 
 
87
 
88
  def get_score(self) -> int:
89
  """Get current score."""
 
63
  # self.history: list[tuple[str, str]] = []
64
  # self.explored_locations: dict[str, set[str]] = {}
65
  # self.current_location: str = ""
66
+
67
+ self.history: list[tuple[str, str]] = []
68
+ self.explored_locations: dict[str, set[str]] = {}
69
+ self.current_location: str = ""
70
 
71
  def initialize(self, game: str = "zork1"):
72
  """Initialize or reset the game."""
 
86
  # TODO: Update your state tracking here
87
  # self.history.append((action, self.state.observation))
88
  # Update location tracking, etc.
89
+
90
+ result = self.state.observation
91
+
92
+ self.history.append((action, result))
93
+ if len(self.history) > 50:
94
+ self.history = self.history[-50:]
95
+
96
+ # Update map
97
+ new_location = self._extract_location(result)
98
+ if action in ["north", "south", "east", "west", "up", "down",
99
+ "enter", "exit", "n", "s", "e", "w", "u", "d"]:
100
+ if self.current_location not in self.explored_locations:
101
+ self.explored_locations[self.current_location] = set()
102
+ if new_location != self.current_location:
103
+ self.explored_locations[self.current_location].add(f"{action} -> {new_location}")
104
+ self.current_location = new_location
105
 
106
+ return result
107
+
108
+ def get_memory(self) -> str:
109
+ """Get a summary of current game state."""
110
 
111
  def get_score(self) -> int:
112
  """Get current score."""