Spaces:
Sleeping
Sleeping
Commit ·
045d72f
1
Parent(s): f45fc29
Added history resume
Browse files- agent.py +142 -41
- mcp_server.py +32 -25
- requirements.txt +2 -0
agent.py
CHANGED
|
@@ -32,6 +32,7 @@ from typing import Optional
|
|
| 32 |
from dotenv import load_dotenv
|
| 33 |
from huggingface_hub import InferenceClient
|
| 34 |
from termcolor import colored as col
|
|
|
|
| 35 |
|
| 36 |
# Load environment variables
|
| 37 |
load_dotenv()
|
|
@@ -42,6 +43,7 @@ load_dotenv()
|
|
| 42 |
|
| 43 |
# Model to use (fixed for fair evaluation)
|
| 44 |
LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"
|
|
|
|
| 45 |
|
| 46 |
# Initialize the LLM client (uses HF_TOKEN from environment)
|
| 47 |
_hf_token = os.getenv("HF_TOKEN")
|
|
@@ -113,36 +115,44 @@ AVAILABLE TOOLS (use via MCP):
|
|
| 113 |
- inventory: Check what you're carrying
|
| 114 |
- get_map: See explored locations and conections
|
| 115 |
- current_location: Get your current location name
|
| 116 |
-
- get_valid_actions: Get a list of valid actions in the current context
|
| 117 |
- add_knowledge: Add information to your knowledge base (args: {"info": "text to remember"})
|
| 118 |
|
| 119 |
VALID GAME COMMANDS for play_action:
|
| 120 |
- Movement: north, south, east, west, up, down, enter, exit, wait
|
| 121 |
- Objects: take <item>, drop <item>, open <thing>, close <thing>, examine <thing>,
|
| 122 |
-
put <item> on <thing>, put <item> in <thing>, push <thing>, pull <thing>, turn <thing>, feel <thing>
|
| 123 |
- Other: look, inventory, read <thing>, turn on lamp
|
| 124 |
- Interactions: talk to <npc>, give <item> to <npc>, ask <npc> about <topic>, tell <npc> about <message>, show <item> to <npc>
|
| 125 |
- Game: undo, hint
|
| 126 |
|
| 127 |
RESPOND IN THIS EXACT FORMAT (no markdown):
|
| 128 |
THOUGHT: <your reasoning about what to do next>
|
|
|
|
| 129 |
TOOL: <tool_name>
|
| 130 |
ARGS: <JSON arguments, e.g., {"action": "look"}>
|
| 131 |
|
| 132 |
Example:
|
| 133 |
THOUGHT: I should look around to see where I am.
|
|
|
|
| 134 |
TOOL: play_action
|
| 135 |
ARGS: {"action": "look"}
|
| 136 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
STRATEGY:
|
| 138 |
1. Start by looking around
|
| 139 |
-
2.
|
| 140 |
3. Examine everything you find, or try to interact
|
| 141 |
-
4. Pick up useful items (lamp, sword, etc.) or everything that you can take
|
| 142 |
5. Open containers (mailbox, window, etc.)
|
| 143 |
6. Use get_map if you don't know where to go
|
| 144 |
-
7. If you find NPCs, talk to them and see if they have useful information or items
|
| 145 |
-
8. If you are lost use the
|
| 146 |
9. Any odd fact is worth remembering with add_knowledge, it might be useful later
|
| 147 |
10. Use your senses: listen, smell, touch
|
| 148 |
|
|
@@ -171,8 +181,11 @@ class StudentAgent:
|
|
| 171 |
self.history: list[dict] = []
|
| 172 |
self.score: int = 0
|
| 173 |
self.location: str = "Unknown"
|
|
|
|
| 174 |
self.knowledge_base: list[str] = []
|
| 175 |
self.answers=set()
|
|
|
|
|
|
|
| 176 |
|
| 177 |
async def run(
|
| 178 |
self,
|
|
@@ -214,18 +227,26 @@ class StudentAgent:
|
|
| 214 |
if verbose:
|
| 215 |
print(f"\n Observation:{observation}")
|
| 216 |
|
|
|
|
| 217 |
for step in range(1, max_steps + 1):
|
| 218 |
-
prompt = self._build_prompt(observation, self.history, step)
|
|
|
|
|
|
|
|
|
|
| 219 |
response = call_llm(prompt, SYSTEM_PROMPT, seed + step)
|
| 220 |
|
| 221 |
-
thought, tool_name, tool_args = self._parse_response(response)
|
|
|
|
|
|
|
|
|
|
| 222 |
|
| 223 |
-
|
| 224 |
location = await client.call_tool("current_location", {})
|
| 225 |
location = location.structured_content['result']
|
| 226 |
location = re.search(r':\s*(.*?)\s*Parent', location).group(1)
|
| 227 |
prev_location = self.location
|
| 228 |
self.location = location if location else "Unknown"
|
|
|
|
|
|
|
| 229 |
|
| 230 |
if verbose:
|
| 231 |
print(f"\n__________________________________________________ Step {step} __________________________________________________")
|
|
@@ -234,6 +255,8 @@ class StudentAgent:
|
|
| 234 |
print(col(f"[TOOL] ", "blue")+col(f"{tool_name}: {tool_args}", "yellow"))
|
| 235 |
print(col(f"[LOCATION] {location}", "blue"))
|
| 236 |
print(col(f"[KNOWLEDGE] {self.knowledge_base}", "cyan"))
|
|
|
|
|
|
|
| 237 |
|
| 238 |
|
| 239 |
|
|
@@ -255,6 +278,9 @@ class StudentAgent:
|
|
| 255 |
if verbose:
|
| 256 |
print(col(f"[LOCAL ACTION] Knowledge updated", "green"))
|
| 257 |
else:
|
|
|
|
|
|
|
|
|
|
| 258 |
try:
|
| 259 |
result = await client.call_tool(tool_name, tool_args)
|
| 260 |
self._update_score(self._extract_result(result))
|
|
@@ -270,23 +296,25 @@ class StudentAgent:
|
|
| 270 |
locations_visited.add(self.location)
|
| 271 |
|
| 272 |
ignore_repeated.discard(prev_action)
|
|
|
|
|
|
|
| 273 |
|
| 274 |
if prev_location != self.location and step>1:
|
| 275 |
self.history.append({
|
| 276 |
"step": step,
|
| 277 |
"tool": "Moved",
|
| 278 |
"from": prev_location,
|
| 279 |
-
"to": self.location
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 280 |
})
|
| 281 |
-
|
| 282 |
-
self.history.append({
|
| 283 |
-
"step": step,
|
| 284 |
-
"thought": thought,
|
| 285 |
-
"tool": tool_name,
|
| 286 |
-
"args": tool_args,
|
| 287 |
-
"result": observation[:200],
|
| 288 |
-
"location": self.location
|
| 289 |
-
})
|
| 290 |
|
| 291 |
|
| 292 |
if len(self.history) > 100:
|
|
@@ -294,14 +322,13 @@ class StudentAgent:
|
|
| 294 |
|
| 295 |
|
| 296 |
history.append((thought, f"{tool_name}({tool_args})", observation[:100]))
|
| 297 |
-
prev_action = (tool_name, str(tool_args), self.location)
|
| 298 |
-
|
| 299 |
if self._is_game_over(observation):
|
| 300 |
if verbose:
|
| 301 |
print(col("\n*** GAME OVER ***", "red"))
|
| 302 |
break
|
| 303 |
|
| 304 |
print (col(f"\nFinal Score: {self.score}", "magenta"))
|
|
|
|
| 305 |
return RunResult(
|
| 306 |
final_score=self.score,
|
| 307 |
max_score=350, # Zork1 max score, adjust if needed
|
|
@@ -319,18 +346,39 @@ class StudentAgent:
|
|
| 319 |
return step - past["step"]
|
| 320 |
return -1
|
| 321 |
|
| 322 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 323 |
"""
|
| 324 |
Build the prompt for the LLM.
|
| 325 |
|
| 326 |
TODO: Implement this to create effective prompts
|
| 327 |
"""
|
| 328 |
prmt = []
|
| 329 |
-
prmt.append(f"
|
| 330 |
|
| 331 |
kb="\n".join(self.knowledge_base)
|
| 332 |
if kb:
|
| 333 |
-
prmt.append(f"\
|
| 334 |
|
| 335 |
|
| 336 |
|
|
@@ -338,34 +386,78 @@ class StudentAgent:
|
|
| 338 |
chars_to_include = [100,50, 30 , 20]
|
| 339 |
|
| 340 |
if self.history:
|
| 341 |
-
prmt.append("\nRecent actions:")
|
| 342 |
if SHORT_TERM_MEM > len(chars_to_include)-1:
|
| 343 |
chars_to_include= [None]*(SHORT_TERM_MEM-len(chars_to_include)+1) + chars_to_include
|
| 344 |
|
| 345 |
-
for
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 353 |
|
| 354 |
-
|
|
|
|
|
|
|
| 355 |
if observation not in self.answers and observation.strip()!="":
|
| 356 |
-
hint = f"If you found something worth remembering, add it to the knowledge base with the add_knowledge tool, so you can use it later, don't add descriptions. If you found and object that you cant take, take it, dont add the previous location of the object to the memory"
|
|
|
|
| 357 |
self.answers.add(observation)
|
| 358 |
|
| 359 |
-
|
| 360 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 361 |
|
| 362 |
prmt.append("\nWhat do you do next?")
|
| 363 |
|
| 364 |
dbg=col(f"\n[DEBUG] Prompt for step {step}:\n{"\n".join(prmt)}", "red")
|
| 365 |
dbg= '_'*80 + dbg + '\n' + '_'*80
|
| 366 |
#print(dbg)
|
| 367 |
-
|
| 368 |
-
return
|
| 369 |
|
| 370 |
def _parse_response(self, response: str) -> tuple[str, str, dict]:
|
| 371 |
"""
|
|
@@ -379,12 +471,16 @@ class StudentAgent:
|
|
| 379 |
thought = "No reasoning provided"
|
| 380 |
tool_name = "play_action"
|
| 381 |
tool_args = {"action": "look"}
|
|
|
|
| 382 |
|
| 383 |
lines = response.strip().split("\n")
|
| 384 |
|
| 385 |
for line in lines:
|
| 386 |
line_clean = line.strip()
|
| 387 |
line_upper = line_clean.upper()
|
|
|
|
|
|
|
|
|
|
| 388 |
|
| 389 |
if line_upper.startswith("THOUGHT:"):
|
| 390 |
thought = line_clean.split(":", 1)[1].strip()
|
|
@@ -407,7 +503,7 @@ class StudentAgent:
|
|
| 407 |
else:
|
| 408 |
tool_args = {"action": "look"}
|
| 409 |
|
| 410 |
-
return thought, tool_name, tool_args
|
| 411 |
|
| 412 |
def _call_llm(self, prompt: str, system_prompt: str, seed: int) -> str:
|
| 413 |
"""
|
|
@@ -494,8 +590,13 @@ class StudentAgent:
|
|
| 494 |
"you are dead",
|
| 495 |
"*** you have died ***",
|
| 496 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 497 |
text_lower = text.lower()
|
| 498 |
-
return any(phrase in text_lower for phrase in game_over_phrases)
|
| 499 |
|
| 500 |
# =============================================================================
|
| 501 |
# For local testing
|
|
|
|
| 32 |
from dotenv import load_dotenv
|
| 33 |
from huggingface_hub import InferenceClient
|
| 34 |
from termcolor import colored as col
|
| 35 |
+
from transformers import AutoTokenizer
|
| 36 |
|
| 37 |
# Load environment variables
|
| 38 |
load_dotenv()
|
|
|
|
| 43 |
|
| 44 |
# Model to use (fixed for fair evaluation)
|
| 45 |
LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"
|
| 46 |
+
#LLM_MODEL = "Qwen/Qwen2.5-7B-Instruct"
|
| 47 |
|
| 48 |
# Initialize the LLM client (uses HF_TOKEN from environment)
|
| 49 |
_hf_token = os.getenv("HF_TOKEN")
|
|
|
|
| 115 |
- inventory: Check what you're carrying
|
| 116 |
- get_map: See explored locations and conections
|
| 117 |
- current_location: Get your current location name
|
|
|
|
| 118 |
- add_knowledge: Add information to your knowledge base (args: {"info": "text to remember"})
|
| 119 |
|
| 120 |
VALID GAME COMMANDS for play_action:
|
| 121 |
- Movement: north, south, east, west, up, down, enter, exit, wait
|
| 122 |
- Objects: take <item>, drop <item>, open <thing>, close <thing>, examine <thing>,
|
| 123 |
+
put <item> on <thing>, put <item> in <thing>, push <thing>, pull <thing>, turn <thing>, feel <thing>, look in <thing>, look under <thing>
|
| 124 |
- Other: look, inventory, read <thing>, turn on lamp
|
| 125 |
- Interactions: talk to <npc>, give <item> to <npc>, ask <npc> about <topic>, tell <npc> about <message>, show <item> to <npc>
|
| 126 |
- Game: undo, hint
|
| 127 |
|
| 128 |
RESPOND IN THIS EXACT FORMAT (no markdown):
|
| 129 |
THOUGHT: <your reasoning about what to do next>
|
| 130 |
+
GOAL: <what is your current main objective?> (This line is optional, only add it to change your goal)
|
| 131 |
TOOL: <tool_name>
|
| 132 |
ARGS: <JSON arguments, e.g., {"action": "look"}>
|
| 133 |
|
| 134 |
Example:
|
| 135 |
THOUGHT: I should look around to see where I am.
|
| 136 |
+
GOAL: Go inside the house
|
| 137 |
TOOL: play_action
|
| 138 |
ARGS: {"action": "look"}
|
| 139 |
|
| 140 |
+
Actions that don't work:
|
| 141 |
+
examine <thing> closely
|
| 142 |
+
look for objects
|
| 143 |
+
look for <thing>
|
| 144 |
+
examine <thing> in detail
|
| 145 |
+
|
| 146 |
+
|
| 147 |
STRATEGY:
|
| 148 |
1. Start by looking around
|
| 149 |
+
2. EXPLORE systematically, Look in/under, objects may be hidden
|
| 150 |
3. Examine everything you find, or try to interact
|
| 151 |
+
4. Pick up useful items (lamp, sword, etc.) or everything that you can take, examine BEFORE taking
|
| 152 |
5. Open containers (mailbox, window, etc.)
|
| 153 |
6. Use get_map if you don't know where to go
|
| 154 |
+
7. If you find NPCs, talk to them and see if they have useful information or items, exhaust dialogue
|
| 155 |
+
8. If you are lost use the MAP or memory tools
|
| 156 |
9. Any odd fact is worth remembering with add_knowledge, it might be useful later
|
| 157 |
10. Use your senses: listen, smell, touch
|
| 158 |
|
|
|
|
| 181 |
self.history: list[dict] = []
|
| 182 |
self.score: int = 0
|
| 183 |
self.location: str = "Unknown"
|
| 184 |
+
self.goal: str= "Not found general goal at the moment"
|
| 185 |
self.knowledge_base: list[str] = []
|
| 186 |
self.answers=set()
|
| 187 |
+
self.tokenizer= AutoTokenizer.from_pretrained(LLM_MODEL)
|
| 188 |
+
self.actions_resume={}
|
| 189 |
|
| 190 |
async def run(
|
| 191 |
self,
|
|
|
|
| 227 |
if verbose:
|
| 228 |
print(f"\n Observation:{observation}")
|
| 229 |
|
| 230 |
+
prompt_tokens=0
|
| 231 |
for step in range(1, max_steps + 1):
|
| 232 |
+
prompt = self._build_prompt(observation, self.history, step, self.goal)
|
| 233 |
+
prompt_size=self.measure_prompt_size(prompt)
|
| 234 |
+
print(f"[PROMPT TOKENS] {prompt_size}")
|
| 235 |
+
prompt_tokens+=prompt_size
|
| 236 |
response = call_llm(prompt, SYSTEM_PROMPT, seed + step)
|
| 237 |
|
| 238 |
+
thought, tool_name, tool_args, goal = self._parse_response(response)
|
| 239 |
+
|
| 240 |
+
if goal:
|
| 241 |
+
self.goal=goal
|
| 242 |
|
|
|
|
| 243 |
location = await client.call_tool("current_location", {})
|
| 244 |
location = location.structured_content['result']
|
| 245 |
location = re.search(r':\s*(.*?)\s*Parent', location).group(1)
|
| 246 |
prev_location = self.location
|
| 247 |
self.location = location if location else "Unknown"
|
| 248 |
+
if self.actions_resume.get(location,None)==None:
|
| 249 |
+
self.actions_resume[location]=""
|
| 250 |
|
| 251 |
if verbose:
|
| 252 |
print(f"\n__________________________________________________ Step {step} __________________________________________________")
|
|
|
|
| 255 |
print(col(f"[TOOL] ", "blue")+col(f"{tool_name}: {tool_args}", "yellow"))
|
| 256 |
print(col(f"[LOCATION] {location}", "blue"))
|
| 257 |
print(col(f"[KNOWLEDGE] {self.knowledge_base}", "cyan"))
|
| 258 |
+
if goal:
|
| 259 |
+
print(col(f"[NEW GOAL] {goal}", "cyan"))
|
| 260 |
|
| 261 |
|
| 262 |
|
|
|
|
| 278 |
if verbose:
|
| 279 |
print(col(f"[LOCAL ACTION] Knowledge updated", "green"))
|
| 280 |
else:
|
| 281 |
+
if tool_name=='play_action':
|
| 282 |
+
moves+=1
|
| 283 |
+
|
| 284 |
try:
|
| 285 |
result = await client.call_tool(tool_name, tool_args)
|
| 286 |
self._update_score(self._extract_result(result))
|
|
|
|
| 296 |
locations_visited.add(self.location)
|
| 297 |
|
| 298 |
ignore_repeated.discard(prev_action)
|
| 299 |
+
prev_action = (tool_name, str(tool_args), self.location)
|
| 300 |
+
|
| 301 |
|
| 302 |
if prev_location != self.location and step>1:
|
| 303 |
self.history.append({
|
| 304 |
"step": step,
|
| 305 |
"tool": "Moved",
|
| 306 |
"from": prev_location,
|
| 307 |
+
"to": self.location,
|
| 308 |
+
})
|
| 309 |
+
else:
|
| 310 |
+
self.history.append({
|
| 311 |
+
"step": step,
|
| 312 |
+
"thought": thought,
|
| 313 |
+
"tool": tool_name,
|
| 314 |
+
"args": tool_args,
|
| 315 |
+
"result": observation,
|
| 316 |
+
"location": self.location
|
| 317 |
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 318 |
|
| 319 |
|
| 320 |
if len(self.history) > 100:
|
|
|
|
| 322 |
|
| 323 |
|
| 324 |
history.append((thought, f"{tool_name}({tool_args})", observation[:100]))
|
|
|
|
|
|
|
| 325 |
if self._is_game_over(observation):
|
| 326 |
if verbose:
|
| 327 |
print(col("\n*** GAME OVER ***", "red"))
|
| 328 |
break
|
| 329 |
|
| 330 |
print (col(f"\nFinal Score: {self.score}", "magenta"))
|
| 331 |
+
print(f"Average promtp size {prompt_tokens/max_steps}")
|
| 332 |
return RunResult(
|
| 333 |
final_score=self.score,
|
| 334 |
max_score=350, # Zork1 max score, adjust if needed
|
|
|
|
| 346 |
return step - past["step"]
|
| 347 |
return -1
|
| 348 |
|
| 349 |
+
|
| 350 |
+
def measure_prompt_size(self, prompt: str) -> int:
|
| 351 |
+
token_ids = self.tokenizer.encode(prompt, add_special_tokens=True)
|
| 352 |
+
return len(token_ids)
|
| 353 |
+
|
| 354 |
+
def resume_actions(self,history,context):
|
| 355 |
+
SYS_PROMPT='''You are the Game Chronicler for a Text Adventure. Your role is to synthesize player actions into a concise, meaningful narrative summary.
|
| 356 |
+
You will receive two inputs:
|
| 357 |
+
THE CHRONICLE: A summary of previous actions.
|
| 358 |
+
NEW EVENTS: Recent actions and outcomes to be integrated.
|
| 359 |
+
Guidelines for your output:
|
| 360 |
+
Prioritize Conciseness: Condense minor movements or repetitive attempts into single sentences. The summary must be brief and easy to read.
|
| 361 |
+
Track Failures: Explicitly state which actions did not work or were ineffective so the player knows what to avoid.
|
| 362 |
+
Highlight Discoveries: Emphasize interesting results, such as items acquired, lore revealed, or significant environmental changes.
|
| 363 |
+
Plain Text Only: Provide the summary in raw text. Do not use Markdown, bolding, or headers.
|
| 364 |
+
No Meta-Commentary: Do not include "Here is the summary" or any introductory filler. Start the summary immediately.
|
| 365 |
+
'''
|
| 366 |
+
|
| 367 |
+
response = call_llm(f"CHRONICLE: {history} NEW EVENTS: {str(history)}", SYS_PROMPT, 42)
|
| 368 |
+
return response
|
| 369 |
+
|
| 370 |
+
def _build_prompt(self, observation: str, history: list, step: int, goal: str) -> str:
|
| 371 |
"""
|
| 372 |
Build the prompt for the LLM.
|
| 373 |
|
| 374 |
TODO: Implement this to create effective prompts
|
| 375 |
"""
|
| 376 |
prmt = []
|
| 377 |
+
prmt.append(f"[CURRENT LOCATION]: {self.location}")
|
| 378 |
|
| 379 |
kb="\n".join(self.knowledge_base)
|
| 380 |
if kb:
|
| 381 |
+
prmt.append(f"\n[KNOWLEDGE BASE]:\n{kb}")
|
| 382 |
|
| 383 |
|
| 384 |
|
|
|
|
| 386 |
chars_to_include = [100,50, 30 , 20]
|
| 387 |
|
| 388 |
if self.history:
|
|
|
|
| 389 |
if SHORT_TERM_MEM > len(chars_to_include)-1:
|
| 390 |
chars_to_include= [None]*(SHORT_TERM_MEM-len(chars_to_include)+1) + chars_to_include
|
| 391 |
|
| 392 |
+
past_history=[element for element in self.history if (element["tool"] == "Moved" or element["location"]!=self.location)]
|
| 393 |
+
if len(past_history):
|
| 394 |
+
prmt.append("\n[RECENT ACTIONS]:")
|
| 395 |
+
for i, past in enumerate(past_history[-SHORT_TERM_MEM-1:]):
|
| 396 |
+
rem = 6-step if step<6 else 0
|
| 397 |
+
if past["tool"] == "Moved":
|
| 398 |
+
prmt.append(f"- Moved from {past['from']} to {past['to']}")
|
| 399 |
+
continue
|
| 400 |
+
action=past["args"].get("action", "")
|
| 401 |
+
if not action:
|
| 402 |
+
action=past["tool"]
|
| 403 |
+
|
| 404 |
+
res_preview = past["result"][:chars_to_include[SHORT_TERM_MEM-i-rem]].replace('\n', ' ')
|
| 405 |
+
prmt.append(f"- {action} (Result: {res_preview}...)")
|
| 406 |
+
|
| 407 |
+
local_history = [
|
| 408 |
+
h for h in self.history
|
| 409 |
+
if h.get("location", None) is not None and h["location"] == self.location and h["tool"] == "play_action"
|
| 410 |
+
]
|
| 411 |
+
|
| 412 |
+
if local_history:
|
| 413 |
+
if len(local_history)%5==0:
|
| 414 |
+
self.actions_resume[self.location]=self.resume_actions(local_history[-5:],self.actions_resume[self.location])
|
| 415 |
+
|
| 416 |
+
prmt.append("\n[RESUME OF ALL PREVIOUS ACTIONS DONE AT THIS LOCATION]:")
|
| 417 |
+
prmt.append(self.actions_resume[self.location])
|
| 418 |
+
print(col(self.actions_resume[self.location],'red'))
|
| 419 |
+
|
| 420 |
+
else:
|
| 421 |
+
local_history = local_history[-5:]
|
| 422 |
+
if self.actions_resume[self.location]:
|
| 423 |
+
prmt.append(f"[RESUME OF ALL PREVIOUS ACTIONS DONE AT THIS LOCATION]: {self.actions_resume[self.location]}")
|
| 424 |
+
|
| 425 |
+
prmt.append("[LAST ACTIONS DONE AT THIS LOCATION]: ")
|
| 426 |
+
tried_actions = set()
|
| 427 |
+
for h in local_history:
|
| 428 |
+
action = h["args"].get("action", "")
|
| 429 |
+
if action not in tried_actions:
|
| 430 |
+
res_preview = h["result"].replace('\n', ' ')
|
| 431 |
+
prmt.append(f" - {action} (Result: {res_preview}...)")
|
| 432 |
+
tried_actions.add(action)
|
| 433 |
+
prmt.append("DO NOT repeat the above actions unless the environment has changed.")
|
| 434 |
+
|
| 435 |
|
| 436 |
+
prmt.append(f"\n[GOAL]: {goal} (If you want to change your general goal add [GOAL] to your answer)")
|
| 437 |
+
|
| 438 |
+
hints=[]
|
| 439 |
if observation not in self.answers and observation.strip()!="":
|
| 440 |
+
hint = f"If you found something worth remembering, add it to the knowledge base with the add_knowledge tool, so you can use it later, don't add descriptions, don't hesitate to use it. If you found and object that you cant take, take it, dont add the previous location of the object to the memory"
|
| 441 |
+
hints.append(hint)
|
| 442 |
self.answers.add(observation)
|
| 443 |
|
| 444 |
+
maxs=0
|
| 445 |
+
for element in self.history:
|
| 446 |
+
if element['tool']!='Moved' and element['location']!=self.location:
|
| 447 |
+
maxs=max(maxs,element['step'])
|
| 448 |
+
if step-maxs>=20:
|
| 449 |
+
hints.append("You have been in the same location a while, if you feel stagnated move around or use the map")
|
| 450 |
+
|
| 451 |
+
if hints:
|
| 452 |
+
prmt.append(f"\n[HINTS]: \n{"\n".join(hints)}")
|
| 453 |
|
| 454 |
prmt.append("\nWhat do you do next?")
|
| 455 |
|
| 456 |
dbg=col(f"\n[DEBUG] Prompt for step {step}:\n{"\n".join(prmt)}", "red")
|
| 457 |
dbg= '_'*80 + dbg + '\n' + '_'*80
|
| 458 |
#print(dbg)
|
| 459 |
+
prmt="\n".join(prmt)
|
| 460 |
+
return prmt
|
| 461 |
|
| 462 |
def _parse_response(self, response: str) -> tuple[str, str, dict]:
|
| 463 |
"""
|
|
|
|
| 471 |
thought = "No reasoning provided"
|
| 472 |
tool_name = "play_action"
|
| 473 |
tool_args = {"action": "look"}
|
| 474 |
+
goal=None
|
| 475 |
|
| 476 |
lines = response.strip().split("\n")
|
| 477 |
|
| 478 |
for line in lines:
|
| 479 |
line_clean = line.strip()
|
| 480 |
line_upper = line_clean.upper()
|
| 481 |
+
|
| 482 |
+
if line_upper.startswith("GOAL:"):
|
| 483 |
+
goal = line_clean.split(":", 1)[1].strip()
|
| 484 |
|
| 485 |
if line_upper.startswith("THOUGHT:"):
|
| 486 |
thought = line_clean.split(":", 1)[1].strip()
|
|
|
|
| 503 |
else:
|
| 504 |
tool_args = {"action": "look"}
|
| 505 |
|
| 506 |
+
return thought, tool_name, tool_args, goal
|
| 507 |
|
| 508 |
def _call_llm(self, prompt: str, system_prompt: str, seed: int) -> str:
|
| 509 |
"""
|
|
|
|
| 590 |
"you are dead",
|
| 591 |
"*** you have died ***",
|
| 592 |
]
|
| 593 |
+
chance_phrases = [
|
| 594 |
+
"another chance",
|
| 595 |
+
"second chance",
|
| 596 |
+
"another attempt"
|
| 597 |
+
]
|
| 598 |
text_lower = text.lower()
|
| 599 |
+
return any(phrase in text_lower for phrase in game_over_phrases) and not any(phrase in text_lower for phrase in chance_phrases)
|
| 600 |
|
| 601 |
# =============================================================================
|
| 602 |
# For local testing
|
mcp_server.py
CHANGED
|
@@ -26,6 +26,7 @@ Then open the MCP Inspector in your browser to test the tools interactively.
|
|
| 26 |
|
| 27 |
import sys
|
| 28 |
import os
|
|
|
|
| 29 |
# Add parent directory to path to import games module
|
| 30 |
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 31 |
|
|
@@ -75,32 +76,38 @@ class GameManager:
|
|
| 75 |
return self.state.observation
|
| 76 |
|
| 77 |
def step(self, action: str) -> str:
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
prev_location = self.current_location
|
| 83 |
-
|
| 84 |
-
self.state = self.env.step(action)
|
| 85 |
-
new_location = self.state.location
|
| 86 |
-
|
| 87 |
-
self.history.append((action, self.state.observation))
|
| 88 |
-
|
| 89 |
-
if prev_location not in self.explored_locations:
|
| 90 |
-
self.explored_locations[prev_location] = set()
|
| 91 |
-
if new_location not in self.explored_locations:
|
| 92 |
-
self.explored_locations[new_location] = set()
|
| 93 |
-
|
| 94 |
-
inverse_action="inverse of "+action
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
if prev_location != new_location and prev_location != "Unknown":
|
| 98 |
-
self.explored_locations[prev_location].add(f"{action} -> {new_location}")
|
| 99 |
-
self.explored_locations[new_location].add(f"{inverse_action} -> {prev_location}")
|
| 100 |
-
|
| 101 |
-
self.current_location = new_location
|
| 102 |
-
return self.state.observation
|
| 103 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
def get_score(self) -> int:
|
| 105 |
"""Get current score."""
|
| 106 |
return self.state.score if self.state else 0
|
|
|
|
| 26 |
|
| 27 |
import sys
|
| 28 |
import os
|
| 29 |
+
from utils import graph_to_ascii
|
| 30 |
# Add parent directory to path to import games module
|
| 31 |
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 32 |
|
|
|
|
| 76 |
return self.state.observation
|
| 77 |
|
| 78 |
def step(self, action: str) -> str:
|
| 79 |
+
"""Execute an action and return the result."""
|
| 80 |
+
if self.env is None:
|
| 81 |
+
self.initialize()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
|
| 83 |
+
prev_location = self.current_location
|
| 84 |
+
|
| 85 |
+
OPPOSITES = {
|
| 86 |
+
"north": "south", "south": "north", "east": "west", "west": "east",
|
| 87 |
+
"up": "down", "down": "up", "in": "out", "out": "in",
|
| 88 |
+
"enter": "exit", "exit": "enter", "ne": "sw", "sw": "ne", "nw": "se", "se": "nw"
|
| 89 |
+
}
|
| 90 |
+
|
| 91 |
+
self.state = self.env.step(action)
|
| 92 |
+
new_location = self.state.location
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
self.history.append((action, self.state.observation))
|
| 96 |
+
|
| 97 |
+
if prev_location not in self.explored_locations:
|
| 98 |
+
self.explored_locations[prev_location] = set()
|
| 99 |
+
if new_location not in self.explored_locations:
|
| 100 |
+
self.explored_locations[new_location] = set()
|
| 101 |
+
|
| 102 |
+
if prev_location != new_location and prev_location != "Unknown":
|
| 103 |
+
self.explored_locations[prev_location].add(f"{action} -> {new_location}")
|
| 104 |
+
inverse_action = OPPOSITES.get(action.lower(), f"return via {action}")
|
| 105 |
+
self.explored_locations[new_location].add(f"{inverse_action} -> {prev_location}")
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
self.current_location = new_location
|
| 109 |
+
return self.state.observation
|
| 110 |
+
|
| 111 |
def get_score(self) -> int:
|
| 112 |
"""Get current score."""
|
| 113 |
return self.state.score if self.state else 0
|
requirements.txt
CHANGED
|
@@ -7,3 +7,5 @@
|
|
| 7 |
# Add any additional packages your agent needs below:
|
| 8 |
# numpy
|
| 9 |
# requests
|
|
|
|
|
|
|
|
|
| 7 |
# Add any additional packages your agent needs below:
|
| 8 |
# numpy
|
| 9 |
# requests
|
| 10 |
+
termcolor
|
| 11 |
+
transformers
|