Leonardo16AM commited on
Commit
045d72f
·
1 Parent(s): f45fc29

Added history resume

Browse files
Files changed (3) hide show
  1. agent.py +142 -41
  2. mcp_server.py +32 -25
  3. requirements.txt +2 -0
agent.py CHANGED
@@ -32,6 +32,7 @@ from typing import Optional
32
  from dotenv import load_dotenv
33
  from huggingface_hub import InferenceClient
34
  from termcolor import colored as col
 
35
 
36
  # Load environment variables
37
  load_dotenv()
@@ -42,6 +43,7 @@ load_dotenv()
42
 
43
  # Model to use (fixed for fair evaluation)
44
  LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"
 
45
 
46
  # Initialize the LLM client (uses HF_TOKEN from environment)
47
  _hf_token = os.getenv("HF_TOKEN")
@@ -113,36 +115,44 @@ AVAILABLE TOOLS (use via MCP):
113
  - inventory: Check what you're carrying
114
  - get_map: See explored locations and conections
115
  - current_location: Get your current location name
116
- - get_valid_actions: Get a list of valid actions in the current context
117
  - add_knowledge: Add information to your knowledge base (args: {"info": "text to remember"})
118
 
119
  VALID GAME COMMANDS for play_action:
120
  - Movement: north, south, east, west, up, down, enter, exit, wait
121
  - Objects: take <item>, drop <item>, open <thing>, close <thing>, examine <thing>,
122
- put <item> on <thing>, put <item> in <thing>, push <thing>, pull <thing>, turn <thing>, feel <thing>
123
  - Other: look, inventory, read <thing>, turn on lamp
124
  - Interactions: talk to <npc>, give <item> to <npc>, ask <npc> about <topic>, tell <npc> about <message>, show <item> to <npc>
125
  - Game: undo, hint
126
 
127
  RESPOND IN THIS EXACT FORMAT (no markdown):
128
  THOUGHT: <your reasoning about what to do next>
 
129
  TOOL: <tool_name>
130
  ARGS: <JSON arguments, e.g., {"action": "look"}>
131
 
132
  Example:
133
  THOUGHT: I should look around to see where I am.
 
134
  TOOL: play_action
135
  ARGS: {"action": "look"}
136
 
 
 
 
 
 
 
 
137
  STRATEGY:
138
  1. Start by looking around
139
- 2. Explore systematically
140
  3. Examine everything you find, or try to interact
141
- 4. Pick up useful items (lamp, sword, etc.) or everything that you can take
142
  5. Open containers (mailbox, window, etc.)
143
  6. Use get_map if you don't know where to go
144
- 7. If you find NPCs, talk to them and see if they have useful information or items
145
- 8. If you are lost use the map or memory tools
146
  9. Any odd fact is worth remembering with add_knowledge, it might be useful later
147
  10. Use your senses: listen, smell, touch
148
 
@@ -171,8 +181,11 @@ class StudentAgent:
171
  self.history: list[dict] = []
172
  self.score: int = 0
173
  self.location: str = "Unknown"
 
174
  self.knowledge_base: list[str] = []
175
  self.answers=set()
 
 
176
 
177
  async def run(
178
  self,
@@ -214,18 +227,26 @@ class StudentAgent:
214
  if verbose:
215
  print(f"\n Observation:{observation}")
216
 
 
217
  for step in range(1, max_steps + 1):
218
- prompt = self._build_prompt(observation, self.history, step)
 
 
 
219
  response = call_llm(prompt, SYSTEM_PROMPT, seed + step)
220
 
221
- thought, tool_name, tool_args = self._parse_response(response)
 
 
 
222
 
223
-
224
  location = await client.call_tool("current_location", {})
225
  location = location.structured_content['result']
226
  location = re.search(r':\s*(.*?)\s*Parent', location).group(1)
227
  prev_location = self.location
228
  self.location = location if location else "Unknown"
 
 
229
 
230
  if verbose:
231
  print(f"\n__________________________________________________ Step {step} __________________________________________________")
@@ -234,6 +255,8 @@ class StudentAgent:
234
  print(col(f"[TOOL] ", "blue")+col(f"{tool_name}: {tool_args}", "yellow"))
235
  print(col(f"[LOCATION] {location}", "blue"))
236
  print(col(f"[KNOWLEDGE] {self.knowledge_base}", "cyan"))
 
 
237
 
238
 
239
 
@@ -255,6 +278,9 @@ class StudentAgent:
255
  if verbose:
256
  print(col(f"[LOCAL ACTION] Knowledge updated", "green"))
257
  else:
 
 
 
258
  try:
259
  result = await client.call_tool(tool_name, tool_args)
260
  self._update_score(self._extract_result(result))
@@ -270,23 +296,25 @@ class StudentAgent:
270
  locations_visited.add(self.location)
271
 
272
  ignore_repeated.discard(prev_action)
 
 
273
 
274
  if prev_location != self.location and step>1:
275
  self.history.append({
276
  "step": step,
277
  "tool": "Moved",
278
  "from": prev_location,
279
- "to": self.location
 
 
 
 
 
 
 
 
 
280
  })
281
-
282
- self.history.append({
283
- "step": step,
284
- "thought": thought,
285
- "tool": tool_name,
286
- "args": tool_args,
287
- "result": observation[:200],
288
- "location": self.location
289
- })
290
 
291
 
292
  if len(self.history) > 100:
@@ -294,14 +322,13 @@ class StudentAgent:
294
 
295
 
296
  history.append((thought, f"{tool_name}({tool_args})", observation[:100]))
297
- prev_action = (tool_name, str(tool_args), self.location)
298
-
299
  if self._is_game_over(observation):
300
  if verbose:
301
  print(col("\n*** GAME OVER ***", "red"))
302
  break
303
 
304
  print (col(f"\nFinal Score: {self.score}", "magenta"))
 
305
  return RunResult(
306
  final_score=self.score,
307
  max_score=350, # Zork1 max score, adjust if needed
@@ -319,18 +346,39 @@ class StudentAgent:
319
  return step - past["step"]
320
  return -1
321
 
322
- def _build_prompt(self, observation: str, history: list, step: int) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
323
  """
324
  Build the prompt for the LLM.
325
 
326
  TODO: Implement this to create effective prompts
327
  """
328
  prmt = []
329
- prmt.append(f"You are in location: {self.location}")
330
 
331
  kb="\n".join(self.knowledge_base)
332
  if kb:
333
- prmt.append(f"\nKnowledge Base:\n{kb}")
334
 
335
 
336
 
@@ -338,34 +386,78 @@ class StudentAgent:
338
  chars_to_include = [100,50, 30 , 20]
339
 
340
  if self.history:
341
- prmt.append("\nRecent actions:")
342
  if SHORT_TERM_MEM > len(chars_to_include)-1:
343
  chars_to_include= [None]*(SHORT_TERM_MEM-len(chars_to_include)+1) + chars_to_include
344
 
345
- for i, past in enumerate(self.history[-SHORT_TERM_MEM-1:]):
346
- rem = 6-step if step<6 else 0
347
- if past["tool"] == "Moved":
348
- prmt.append(f"\nStep {past['step']}: Moved from {past['from']} to {past['to']}")
349
- continue
350
- tool = f"{past['tool']}({past['args']})"[:chars_to_include[SHORT_TERM_MEM-i-rem]]+'...' if chars_to_include[SHORT_TERM_MEM-i-rem] else f"{past['tool']}({past['args']})"
351
- result = past["result"][:chars_to_include[SHORT_TERM_MEM-i-rem]]+'...' if chars_to_include[SHORT_TERM_MEM-i-rem] else past["result"]
352
- prmt.append(f"\nStep {past['step']}: Tool: {tool} Result: {result}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
353
 
354
- hint=""
 
 
355
  if observation not in self.answers and observation.strip()!="":
356
- hint = f"If you found something worth remembering, add it to the knowledge base with the add_knowledge tool, so you can use it later, don't add descriptions. If you found and object that you cant take, take it, dont add the previous location of the object to the memory"
 
357
  self.answers.add(observation)
358
 
359
- if hint:
360
- prmt.append(f"\nHint :\n{hint}")
 
 
 
 
 
 
 
361
 
362
  prmt.append("\nWhat do you do next?")
363
 
364
  dbg=col(f"\n[DEBUG] Prompt for step {step}:\n{"\n".join(prmt)}", "red")
365
  dbg= '_'*80 + dbg + '\n' + '_'*80
366
  #print(dbg)
367
-
368
- return "\n".join(prmt)
369
 
370
  def _parse_response(self, response: str) -> tuple[str, str, dict]:
371
  """
@@ -379,12 +471,16 @@ class StudentAgent:
379
  thought = "No reasoning provided"
380
  tool_name = "play_action"
381
  tool_args = {"action": "look"}
 
382
 
383
  lines = response.strip().split("\n")
384
 
385
  for line in lines:
386
  line_clean = line.strip()
387
  line_upper = line_clean.upper()
 
 
 
388
 
389
  if line_upper.startswith("THOUGHT:"):
390
  thought = line_clean.split(":", 1)[1].strip()
@@ -407,7 +503,7 @@ class StudentAgent:
407
  else:
408
  tool_args = {"action": "look"}
409
 
410
- return thought, tool_name, tool_args
411
 
412
  def _call_llm(self, prompt: str, system_prompt: str, seed: int) -> str:
413
  """
@@ -494,8 +590,13 @@ class StudentAgent:
494
  "you are dead",
495
  "*** you have died ***",
496
  ]
 
 
 
 
 
497
  text_lower = text.lower()
498
- return any(phrase in text_lower for phrase in game_over_phrases)
499
 
500
  # =============================================================================
501
  # For local testing
 
32
  from dotenv import load_dotenv
33
  from huggingface_hub import InferenceClient
34
  from termcolor import colored as col
35
+ from transformers import AutoTokenizer
36
 
37
  # Load environment variables
38
  load_dotenv()
 
43
 
44
  # Model to use (fixed for fair evaluation)
45
  LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"
46
+ #LLM_MODEL = "Qwen/Qwen2.5-7B-Instruct"
47
 
48
  # Initialize the LLM client (uses HF_TOKEN from environment)
49
  _hf_token = os.getenv("HF_TOKEN")
 
115
  - inventory: Check what you're carrying
116
  - get_map: See explored locations and conections
117
  - current_location: Get your current location name
 
118
  - add_knowledge: Add information to your knowledge base (args: {"info": "text to remember"})
119
 
120
  VALID GAME COMMANDS for play_action:
121
  - Movement: north, south, east, west, up, down, enter, exit, wait
122
  - Objects: take <item>, drop <item>, open <thing>, close <thing>, examine <thing>,
123
+ put <item> on <thing>, put <item> in <thing>, push <thing>, pull <thing>, turn <thing>, feel <thing>, look in <thing>, look under <thing>
124
  - Other: look, inventory, read <thing>, turn on lamp
125
  - Interactions: talk to <npc>, give <item> to <npc>, ask <npc> about <topic>, tell <npc> about <message>, show <item> to <npc>
126
  - Game: undo, hint
127
 
128
  RESPOND IN THIS EXACT FORMAT (no markdown):
129
  THOUGHT: <your reasoning about what to do next>
130
+ GOAL: <what is your current main objective?> (This line is optional, only add it to change your goal)
131
  TOOL: <tool_name>
132
  ARGS: <JSON arguments, e.g., {"action": "look"}>
133
 
134
  Example:
135
  THOUGHT: I should look around to see where I am.
136
+ GOAL: Go inside the house
137
  TOOL: play_action
138
  ARGS: {"action": "look"}
139
 
140
+ Actions that don't work:
141
+ examine <thing> closely
142
+ look for objects
143
+ look for <thing>
144
+ examine <thing> in detail
145
+
146
+
147
  STRATEGY:
148
  1. Start by looking around
149
+ 2. EXPLORE systematically, Look in/under, objects may be hidden
150
  3. Examine everything you find, or try to interact
151
+ 4. Pick up useful items (lamp, sword, etc.) or everything that you can take, examine BEFORE taking
152
  5. Open containers (mailbox, window, etc.)
153
  6. Use get_map if you don't know where to go
154
+ 7. If you find NPCs, talk to them and see if they have useful information or items, exhaust dialogue
155
+ 8. If you are lost use the MAP or memory tools
156
  9. Any odd fact is worth remembering with add_knowledge, it might be useful later
157
  10. Use your senses: listen, smell, touch
158
 
 
181
  self.history: list[dict] = []
182
  self.score: int = 0
183
  self.location: str = "Unknown"
184
+ self.goal: str= "Not found general goal at the moment"
185
  self.knowledge_base: list[str] = []
186
  self.answers=set()
187
+ self.tokenizer= AutoTokenizer.from_pretrained(LLM_MODEL)
188
+ self.actions_resume={}
189
 
190
  async def run(
191
  self,
 
227
  if verbose:
228
  print(f"\n Observation:{observation}")
229
 
230
+ prompt_tokens=0
231
  for step in range(1, max_steps + 1):
232
+ prompt = self._build_prompt(observation, self.history, step, self.goal)
233
+ prompt_size=self.measure_prompt_size(prompt)
234
+ print(f"[PROMPT TOKENS] {prompt_size}")
235
+ prompt_tokens+=prompt_size
236
  response = call_llm(prompt, SYSTEM_PROMPT, seed + step)
237
 
238
+ thought, tool_name, tool_args, goal = self._parse_response(response)
239
+
240
+ if goal:
241
+ self.goal=goal
242
 
 
243
  location = await client.call_tool("current_location", {})
244
  location = location.structured_content['result']
245
  location = re.search(r':\s*(.*?)\s*Parent', location).group(1)
246
  prev_location = self.location
247
  self.location = location if location else "Unknown"
248
+ if self.actions_resume.get(location,None)==None:
249
+ self.actions_resume[location]=""
250
 
251
  if verbose:
252
  print(f"\n__________________________________________________ Step {step} __________________________________________________")
 
255
  print(col(f"[TOOL] ", "blue")+col(f"{tool_name}: {tool_args}", "yellow"))
256
  print(col(f"[LOCATION] {location}", "blue"))
257
  print(col(f"[KNOWLEDGE] {self.knowledge_base}", "cyan"))
258
+ if goal:
259
+ print(col(f"[NEW GOAL] {goal}", "cyan"))
260
 
261
 
262
 
 
278
  if verbose:
279
  print(col(f"[LOCAL ACTION] Knowledge updated", "green"))
280
  else:
281
+ if tool_name=='play_action':
282
+ moves+=1
283
+
284
  try:
285
  result = await client.call_tool(tool_name, tool_args)
286
  self._update_score(self._extract_result(result))
 
296
  locations_visited.add(self.location)
297
 
298
  ignore_repeated.discard(prev_action)
299
+ prev_action = (tool_name, str(tool_args), self.location)
300
+
301
 
302
  if prev_location != self.location and step>1:
303
  self.history.append({
304
  "step": step,
305
  "tool": "Moved",
306
  "from": prev_location,
307
+ "to": self.location,
308
+ })
309
+ else:
310
+ self.history.append({
311
+ "step": step,
312
+ "thought": thought,
313
+ "tool": tool_name,
314
+ "args": tool_args,
315
+ "result": observation,
316
+ "location": self.location
317
  })
 
 
 
 
 
 
 
 
 
318
 
319
 
320
  if len(self.history) > 100:
 
322
 
323
 
324
  history.append((thought, f"{tool_name}({tool_args})", observation[:100]))
 
 
325
  if self._is_game_over(observation):
326
  if verbose:
327
  print(col("\n*** GAME OVER ***", "red"))
328
  break
329
 
330
  print (col(f"\nFinal Score: {self.score}", "magenta"))
331
+ print(f"Average promtp size {prompt_tokens/max_steps}")
332
  return RunResult(
333
  final_score=self.score,
334
  max_score=350, # Zork1 max score, adjust if needed
 
346
  return step - past["step"]
347
  return -1
348
 
349
+
350
+ def measure_prompt_size(self, prompt: str) -> int:
351
+ token_ids = self.tokenizer.encode(prompt, add_special_tokens=True)
352
+ return len(token_ids)
353
+
354
+ def resume_actions(self,history,context):
355
+ SYS_PROMPT='''You are the Game Chronicler for a Text Adventure. Your role is to synthesize player actions into a concise, meaningful narrative summary.
356
+ You will receive two inputs:
357
+ THE CHRONICLE: A summary of previous actions.
358
+ NEW EVENTS: Recent actions and outcomes to be integrated.
359
+ Guidelines for your output:
360
+ Prioritize Conciseness: Condense minor movements or repetitive attempts into single sentences. The summary must be brief and easy to read.
361
+ Track Failures: Explicitly state which actions did not work or were ineffective so the player knows what to avoid.
362
+ Highlight Discoveries: Emphasize interesting results, such as items acquired, lore revealed, or significant environmental changes.
363
+ Plain Text Only: Provide the summary in raw text. Do not use Markdown, bolding, or headers.
364
+ No Meta-Commentary: Do not include "Here is the summary" or any introductory filler. Start the summary immediately.
365
+ '''
366
+
367
+ response = call_llm(f"CHRONICLE: {history} NEW EVENTS: {str(history)}", SYS_PROMPT, 42)
368
+ return response
369
+
370
+ def _build_prompt(self, observation: str, history: list, step: int, goal: str) -> str:
371
  """
372
  Build the prompt for the LLM.
373
 
374
  TODO: Implement this to create effective prompts
375
  """
376
  prmt = []
377
+ prmt.append(f"[CURRENT LOCATION]: {self.location}")
378
 
379
  kb="\n".join(self.knowledge_base)
380
  if kb:
381
+ prmt.append(f"\n[KNOWLEDGE BASE]:\n{kb}")
382
 
383
 
384
 
 
386
  chars_to_include = [100,50, 30 , 20]
387
 
388
  if self.history:
 
389
  if SHORT_TERM_MEM > len(chars_to_include)-1:
390
  chars_to_include= [None]*(SHORT_TERM_MEM-len(chars_to_include)+1) + chars_to_include
391
 
392
+ past_history=[element for element in self.history if (element["tool"] == "Moved" or element["location"]!=self.location)]
393
+ if len(past_history):
394
+ prmt.append("\n[RECENT ACTIONS]:")
395
+ for i, past in enumerate(past_history[-SHORT_TERM_MEM-1:]):
396
+ rem = 6-step if step<6 else 0
397
+ if past["tool"] == "Moved":
398
+ prmt.append(f"- Moved from {past['from']} to {past['to']}")
399
+ continue
400
+ action=past["args"].get("action", "")
401
+ if not action:
402
+ action=past["tool"]
403
+
404
+ res_preview = past["result"][:chars_to_include[SHORT_TERM_MEM-i-rem]].replace('\n', ' ')
405
+ prmt.append(f"- {action} (Result: {res_preview}...)")
406
+
407
+ local_history = [
408
+ h for h in self.history
409
+ if h.get("location", None) is not None and h["location"] == self.location and h["tool"] == "play_action"
410
+ ]
411
+
412
+ if local_history:
413
+ if len(local_history)%5==0:
414
+ self.actions_resume[self.location]=self.resume_actions(local_history[-5:],self.actions_resume[self.location])
415
+
416
+ prmt.append("\n[RESUME OF ALL PREVIOUS ACTIONS DONE AT THIS LOCATION]:")
417
+ prmt.append(self.actions_resume[self.location])
418
+ print(col(self.actions_resume[self.location],'red'))
419
+
420
+ else:
421
+ local_history = local_history[-5:]
422
+ if self.actions_resume[self.location]:
423
+ prmt.append(f"[RESUME OF ALL PREVIOUS ACTIONS DONE AT THIS LOCATION]: {self.actions_resume[self.location]}")
424
+
425
+ prmt.append("[LAST ACTIONS DONE AT THIS LOCATION]: ")
426
+ tried_actions = set()
427
+ for h in local_history:
428
+ action = h["args"].get("action", "")
429
+ if action not in tried_actions:
430
+ res_preview = h["result"].replace('\n', ' ')
431
+ prmt.append(f" - {action} (Result: {res_preview}...)")
432
+ tried_actions.add(action)
433
+ prmt.append("DO NOT repeat the above actions unless the environment has changed.")
434
+
435
 
436
+ prmt.append(f"\n[GOAL]: {goal} (If you want to change your general goal add [GOAL] to your answer)")
437
+
438
+ hints=[]
439
  if observation not in self.answers and observation.strip()!="":
440
+ hint = f"If you found something worth remembering, add it to the knowledge base with the add_knowledge tool, so you can use it later, don't add descriptions, don't hesitate to use it. If you found and object that you cant take, take it, dont add the previous location of the object to the memory"
441
+ hints.append(hint)
442
  self.answers.add(observation)
443
 
444
+ maxs=0
445
+ for element in self.history:
446
+ if element['tool']!='Moved' and element['location']!=self.location:
447
+ maxs=max(maxs,element['step'])
448
+ if step-maxs>=20:
449
+ hints.append("You have been in the same location a while, if you feel stagnated move around or use the map")
450
+
451
+ if hints:
452
+ prmt.append(f"\n[HINTS]: \n{"\n".join(hints)}")
453
 
454
  prmt.append("\nWhat do you do next?")
455
 
456
  dbg=col(f"\n[DEBUG] Prompt for step {step}:\n{"\n".join(prmt)}", "red")
457
  dbg= '_'*80 + dbg + '\n' + '_'*80
458
  #print(dbg)
459
+ prmt="\n".join(prmt)
460
+ return prmt
461
 
462
  def _parse_response(self, response: str) -> tuple[str, str, dict]:
463
  """
 
471
  thought = "No reasoning provided"
472
  tool_name = "play_action"
473
  tool_args = {"action": "look"}
474
+ goal=None
475
 
476
  lines = response.strip().split("\n")
477
 
478
  for line in lines:
479
  line_clean = line.strip()
480
  line_upper = line_clean.upper()
481
+
482
+ if line_upper.startswith("GOAL:"):
483
+ goal = line_clean.split(":", 1)[1].strip()
484
 
485
  if line_upper.startswith("THOUGHT:"):
486
  thought = line_clean.split(":", 1)[1].strip()
 
503
  else:
504
  tool_args = {"action": "look"}
505
 
506
+ return thought, tool_name, tool_args, goal
507
 
508
  def _call_llm(self, prompt: str, system_prompt: str, seed: int) -> str:
509
  """
 
590
  "you are dead",
591
  "*** you have died ***",
592
  ]
593
+ chance_phrases = [
594
+ "another chance",
595
+ "second chance",
596
+ "another attempt"
597
+ ]
598
  text_lower = text.lower()
599
+ return any(phrase in text_lower for phrase in game_over_phrases) and not any(phrase in text_lower for phrase in chance_phrases)
600
 
601
  # =============================================================================
602
  # For local testing
mcp_server.py CHANGED
@@ -26,6 +26,7 @@ Then open the MCP Inspector in your browser to test the tools interactively.
26
 
27
  import sys
28
  import os
 
29
  # Add parent directory to path to import games module
30
  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
31
 
@@ -75,32 +76,38 @@ class GameManager:
75
  return self.state.observation
76
 
77
  def step(self, action: str) -> str:
78
- """Execute an action and return the result."""
79
- if self.env is None:
80
- self.initialize()
81
-
82
- prev_location = self.current_location
83
-
84
- self.state = self.env.step(action)
85
- new_location = self.state.location
86
-
87
- self.history.append((action, self.state.observation))
88
-
89
- if prev_location not in self.explored_locations:
90
- self.explored_locations[prev_location] = set()
91
- if new_location not in self.explored_locations:
92
- self.explored_locations[new_location] = set()
93
-
94
- inverse_action="inverse of "+action
95
-
96
-
97
- if prev_location != new_location and prev_location != "Unknown":
98
- self.explored_locations[prev_location].add(f"{action} -> {new_location}")
99
- self.explored_locations[new_location].add(f"{inverse_action} -> {prev_location}")
100
-
101
- self.current_location = new_location
102
- return self.state.observation
103
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  def get_score(self) -> int:
105
  """Get current score."""
106
  return self.state.score if self.state else 0
 
26
 
27
  import sys
28
  import os
29
+ from utils import graph_to_ascii
30
  # Add parent directory to path to import games module
31
  sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
32
 
 
76
  return self.state.observation
77
 
78
  def step(self, action: str) -> str:
79
+ """Execute an action and return the result."""
80
+ if self.env is None:
81
+ self.initialize()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
83
+ prev_location = self.current_location
84
+
85
+ OPPOSITES = {
86
+ "north": "south", "south": "north", "east": "west", "west": "east",
87
+ "up": "down", "down": "up", "in": "out", "out": "in",
88
+ "enter": "exit", "exit": "enter", "ne": "sw", "sw": "ne", "nw": "se", "se": "nw"
89
+ }
90
+
91
+ self.state = self.env.step(action)
92
+ new_location = self.state.location
93
+
94
+
95
+ self.history.append((action, self.state.observation))
96
+
97
+ if prev_location not in self.explored_locations:
98
+ self.explored_locations[prev_location] = set()
99
+ if new_location not in self.explored_locations:
100
+ self.explored_locations[new_location] = set()
101
+
102
+ if prev_location != new_location and prev_location != "Unknown":
103
+ self.explored_locations[prev_location].add(f"{action} -> {new_location}")
104
+ inverse_action = OPPOSITES.get(action.lower(), f"return via {action}")
105
+ self.explored_locations[new_location].add(f"{inverse_action} -> {prev_location}")
106
+
107
+
108
+ self.current_location = new_location
109
+ return self.state.observation
110
+
111
  def get_score(self) -> int:
112
  """Get current score."""
113
  return self.state.score if self.state else 0
requirements.txt CHANGED
@@ -7,3 +7,5 @@
7
  # Add any additional packages your agent needs below:
8
  # numpy
9
  # requests
 
 
 
7
  # Add any additional packages your agent needs below:
8
  # numpy
9
  # requests
10
+ termcolor
11
+ transformers