text-adventure-agent

Sleeping

App Files Files Community

Leonardo16AM commited on Feb 21

Commit

045d72f

1 Parent(s): f45fc29

Added history resume

Browse files

Files changed (3) hide show

agent.py +142 -41
mcp_server.py +32 -25
requirements.txt +2 -0

agent.py CHANGED Viewed

@@ -32,6 +32,7 @@ from typing import Optional
 from dotenv import load_dotenv
 from huggingface_hub import InferenceClient
 from termcolor import colored as col
 # Load environment variables
 load_dotenv()
@@ -42,6 +43,7 @@ load_dotenv()
 # Model to use (fixed for fair evaluation)
 LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"
 # Initialize the LLM client (uses HF_TOKEN from environment)
 _hf_token = os.getenv("HF_TOKEN")
@@ -113,36 +115,44 @@ AVAILABLE TOOLS (use via MCP):
 - inventory: Check what you're carrying
 - get_map: See explored locations and  conections
 - current_location: Get your current location name
-- get_valid_actions: Get a list of valid actions in the current context
 - add_knowledge: Add information to your knowledge base (args: {"info": "text to remember"})
 VALID GAME COMMANDS for play_action:
 - Movement: north, south, east, west, up, down, enter, exit, wait
 - Objects: take <item>, drop <item>, open <thing>, close <thing>, examine <thing>,
-            put <item> on <thing>, put <item> in <thing>, push <thing>, pull <thing>, turn <thing>, feel <thing>
 - Other: look, inventory, read <thing>, turn on lamp
 - Interactions: talk to <npc>, give <item> to <npc>, ask <npc> about <topic>, tell <npc> about <message>, show <item> to <npc>
 - Game: undo, hint
 RESPOND IN THIS EXACT FORMAT (no markdown):
 THOUGHT: <your reasoning about what to do next>
 TOOL: <tool_name>
 ARGS: <JSON arguments, e.g., {"action": "look"}>
 Example:
 THOUGHT: I should look around to see where I am.
 TOOL: play_action
 ARGS: {"action": "look"}
 STRATEGY:
 1. Start by looking around
-2. Explore systematically
 3. Examine everything you find, or try to interact
-4. Pick up useful items (lamp, sword, etc.) or everything that you can take
 5. Open containers (mailbox, window, etc.)
 6. Use get_map if you don't know where to go
-7. If you find NPCs, talk to them and see if they have useful information or items
-8. If you are lost use the map or memory tools
 9. Any odd fact is worth remembering with add_knowledge, it might be useful later
 10. Use your senses: listen, smell, touch
@@ -171,8 +181,11 @@ class StudentAgent:
         self.history: list[dict] = []
         self.score: int = 0
         self.location: str = "Unknown"
         self.knowledge_base: list[str] = []
         self.answers=set()
     async def run(
         self,
@@ -214,18 +227,26 @@ class StudentAgent:
         if verbose:
             print(f"\n Observation:{observation}")
         for step in range(1, max_steps + 1):
-            prompt = self._build_prompt(observation, self.history, step)
             response = call_llm(prompt, SYSTEM_PROMPT, seed + step)
-            thought, tool_name, tool_args = self._parse_response(response)
             location = await client.call_tool("current_location", {})
             location = location.structured_content['result']
             location = re.search(r':\s*(.*?)\s*Parent', location).group(1)
             prev_location = self.location
             self.location = location if location else "Unknown"
             if verbose:
                 print(f"\n__________________________________________________ Step {step} __________________________________________________")
@@ -234,6 +255,8 @@ class StudentAgent:
                 print(col(f"[TOOL] ", "blue")+col(f"{tool_name}: {tool_args}", "yellow"))
                 print(col(f"[LOCATION] {location}", "blue"))
                 print(col(f"[KNOWLEDGE] {self.knowledge_base}", "cyan"))
@@ -255,6 +278,9 @@ class StudentAgent:
                     if verbose:
                         print(col(f"[LOCAL ACTION] Knowledge updated", "green"))
                 else:
                     try:
                         result = await client.call_tool(tool_name, tool_args)
                         self._update_score(self._extract_result(result))
@@ -270,23 +296,25 @@ class StudentAgent:
                 locations_visited.add(self.location)
             ignore_repeated.discard(prev_action)
             if prev_location != self.location and step>1:
                 self.history.append({
                     "step": step,
                     "tool": "Moved",
                     "from": prev_location,
-                    "to": self.location
                 })
-            self.history.append({
-                "step": step,
-                "thought": thought,
-                "tool": tool_name,
-                "args": tool_args,
-                "result": observation[:200],
-                "location": self.location
-            })
             if len(self.history) > 100:
@@ -294,14 +322,13 @@ class StudentAgent:
             history.append((thought, f"{tool_name}({tool_args})", observation[:100]))
-            prev_action = (tool_name, str(tool_args), self.location)
             if self._is_game_over(observation):
                 if verbose:
                     print(col("\n*** GAME OVER ***", "red"))
                 break
         print (col(f"\nFinal Score: {self.score}", "magenta"))
         return RunResult(
             final_score=self.score,
             max_score=350,  # Zork1 max score, adjust if needed
@@ -319,18 +346,39 @@ class StudentAgent:
                 return step - past["step"]
         return -1
-    def _build_prompt(self, observation: str, history: list, step: int) -> str:
         """
         Build the prompt for the LLM.
         TODO: Implement this to create effective prompts
         """
         prmt = []
-        prmt.append(f"You are in location: {self.location}")
         kb="\n".join(self.knowledge_base)
         if kb:
-            prmt.append(f"\nKnowledge Base:\n{kb}")
@@ -338,34 +386,78 @@ class StudentAgent:
         chars_to_include = [100,50, 30 , 20]
         if self.history:
-            prmt.append("\nRecent actions:")
             if SHORT_TERM_MEM > len(chars_to_include)-1:
                 chars_to_include= [None]*(SHORT_TERM_MEM-len(chars_to_include)+1) + chars_to_include
-            for i, past in enumerate(self.history[-SHORT_TERM_MEM-1:]):
-                rem = 6-step if step<6 else 0
-                if past["tool"] == "Moved":
-                    prmt.append(f"\nStep {past['step']}: Moved from {past['from']} to {past['to']}")
-                    continue
-                tool = f"{past['tool']}({past['args']})"[:chars_to_include[SHORT_TERM_MEM-i-rem]]+'...' if chars_to_include[SHORT_TERM_MEM-i-rem] else f"{past['tool']}({past['args']})"
-                result = past["result"][:chars_to_include[SHORT_TERM_MEM-i-rem]]+'...' if chars_to_include[SHORT_TERM_MEM-i-rem] else past["result"]
-                prmt.append(f"\nStep {past['step']}:  Tool: {tool} Result: {result}")
-        hint=""
         if observation not in self.answers and observation.strip()!="":
-            hint = f"If you found something worth remembering, add it to the knowledge base with the add_knowledge tool, so you can use it later, don't add descriptions. If you found and object that you cant take, take it, dont add the previous location of the object to the memory"
             self.answers.add(observation)
-        if hint:
-            prmt.append(f"\nHint :\n{hint}")
         prmt.append("\nWhat do you do next?")
         dbg=col(f"\n[DEBUG] Prompt for step {step}:\n{"\n".join(prmt)}", "red")
         dbg= '_'*80 + dbg + '\n' + '_'*80
         #print(dbg)
-        return "\n".join(prmt)
     def _parse_response(self, response: str) -> tuple[str, str, dict]:
         """
@@ -379,12 +471,16 @@ class StudentAgent:
         thought = "No reasoning provided"
         tool_name = "play_action"
         tool_args = {"action": "look"}
         lines = response.strip().split("\n")
         for line in lines:
             line_clean = line.strip()
             line_upper = line_clean.upper()
             if line_upper.startswith("THOUGHT:"):
                 thought = line_clean.split(":", 1)[1].strip()
@@ -407,7 +503,7 @@ class StudentAgent:
                     else:
                         tool_args = {"action": "look"}
-        return thought, tool_name, tool_args
     def _call_llm(self, prompt: str, system_prompt: str, seed: int) -> str:
         """
@@ -494,8 +590,13 @@ class StudentAgent:
             "you are dead",
             "*** you have died ***",
         ]
         text_lower = text.lower()
-        return any(phrase in text_lower for phrase in game_over_phrases)
 # =============================================================================
 # For local testing

 from dotenv import load_dotenv
 from huggingface_hub import InferenceClient
 from termcolor import colored as col
+from transformers import AutoTokenizer
 # Load environment variables
 load_dotenv()
 # Model to use (fixed for fair evaluation)
 LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"
+#LLM_MODEL = "Qwen/Qwen2.5-7B-Instruct"
 # Initialize the LLM client (uses HF_TOKEN from environment)
 _hf_token = os.getenv("HF_TOKEN")
 - inventory: Check what you're carrying
 - get_map: See explored locations and  conections
 - current_location: Get your current location name
 - add_knowledge: Add information to your knowledge base (args: {"info": "text to remember"})
 VALID GAME COMMANDS for play_action:
 - Movement: north, south, east, west, up, down, enter, exit, wait
 - Objects: take <item>, drop <item>, open <thing>, close <thing>, examine <thing>,
+            put <item> on <thing>, put <item> in <thing>, push <thing>, pull <thing>, turn <thing>, feel <thing>, look in <thing>, look under <thing>
 - Other: look, inventory, read <thing>, turn on lamp
 - Interactions: talk to <npc>, give <item> to <npc>, ask <npc> about <topic>, tell <npc> about <message>, show <item> to <npc>
 - Game: undo, hint
 RESPOND IN THIS EXACT FORMAT (no markdown):
 THOUGHT: <your reasoning about what to do next>
+GOAL: <what is your current main objective?> (This line is optional, only add it to change your goal)
 TOOL: <tool_name>
 ARGS: <JSON arguments, e.g., {"action": "look"}>
 Example:
 THOUGHT: I should look around to see where I am.
+GOAL: Go inside the house
 TOOL: play_action
 ARGS: {"action": "look"}
+Actions that don't work:
+examine <thing> closely
+look for objects
+look for <thing>
+examine <thing> in detail
 STRATEGY:
 1. Start by looking around
+2. EXPLORE systematically, Look in/under, objects may be hidden
 3. Examine everything you find, or try to interact
+4. Pick up useful items (lamp, sword, etc.) or everything that you can take, examine BEFORE taking
 5. Open containers (mailbox, window, etc.)
 6. Use get_map if you don't know where to go
+7. If you find NPCs, talk to them and see if they have useful information or items, exhaust dialogue
+8. If you are lost use the MAP or memory tools
 9. Any odd fact is worth remembering with add_knowledge, it might be useful later
 10. Use your senses: listen, smell, touch
         self.history: list[dict] = []
         self.score: int = 0
         self.location: str = "Unknown"
+        self.goal: str= "Not found general goal at the moment"
         self.knowledge_base: list[str] = []
         self.answers=set()
+        self.tokenizer= AutoTokenizer.from_pretrained(LLM_MODEL)
+        self.actions_resume={}
     async def run(
         self,
         if verbose:
             print(f"\n Observation:{observation}")
+        prompt_tokens=0
         for step in range(1, max_steps + 1):
+            prompt = self._build_prompt(observation, self.history, step, self.goal)
+            prompt_size=self.measure_prompt_size(prompt)
+            print(f"[PROMPT TOKENS] {prompt_size}")
+            prompt_tokens+=prompt_size
             response = call_llm(prompt, SYSTEM_PROMPT, seed + step)
+            thought, tool_name, tool_args, goal = self._parse_response(response)
+            if goal:
+                self.goal=goal
             location = await client.call_tool("current_location", {})
             location = location.structured_content['result']
             location = re.search(r':\s*(.*?)\s*Parent', location).group(1)
             prev_location = self.location
             self.location = location if location else "Unknown"
+            if self.actions_resume.get(location,None)==None:
+                self.actions_resume[location]=""
             if verbose:
                 print(f"\n__________________________________________________ Step {step} __________________________________________________")
                 print(col(f"[TOOL] ", "blue")+col(f"{tool_name}: {tool_args}", "yellow"))
                 print(col(f"[LOCATION] {location}", "blue"))
                 print(col(f"[KNOWLEDGE] {self.knowledge_base}", "cyan"))
+                if goal:
+                    print(col(f"[NEW GOAL] {goal}", "cyan"))
                     if verbose:
                         print(col(f"[LOCAL ACTION] Knowledge updated", "green"))
                 else:
+                    if tool_name=='play_action':
+                        moves+=1
                     try:
                         result = await client.call_tool(tool_name, tool_args)
                         self._update_score(self._extract_result(result))
                 locations_visited.add(self.location)
             ignore_repeated.discard(prev_action)
+            prev_action = (tool_name, str(tool_args), self.location)
             if prev_location != self.location and step>1:
                 self.history.append({
                     "step": step,
                     "tool": "Moved",
                     "from": prev_location,
+                    "to": self.location,
+                })
+            else:
+                self.history.append({
+                    "step": step,
+                    "thought": thought,
+                    "tool": tool_name,
+                    "args": tool_args,
+                    "result": observation,
+                    "location": self.location
                 })
             if len(self.history) > 100:
             history.append((thought, f"{tool_name}({tool_args})", observation[:100]))
             if self._is_game_over(observation):
                 if verbose:
                     print(col("\n*** GAME OVER ***", "red"))
                 break
         print (col(f"\nFinal Score: {self.score}", "magenta"))
+        print(f"Average promtp size {prompt_tokens/max_steps}")
         return RunResult(
             final_score=self.score,
             max_score=350,  # Zork1 max score, adjust if needed
                 return step - past["step"]
         return -1
+    def measure_prompt_size(self, prompt: str) -> int:
+        token_ids = self.tokenizer.encode(prompt, add_special_tokens=True)
+        return len(token_ids)
+    def resume_actions(self,history,context):
+        SYS_PROMPT='''You are the Game Chronicler for a Text Adventure. Your role is to synthesize player actions into a concise, meaningful narrative summary.
+                        You will receive two inputs:
+                            THE CHRONICLE: A summary of previous actions.
+                            NEW EVENTS: Recent actions and outcomes to be integrated.
+                        Guidelines for your output:
+                            Prioritize Conciseness: Condense minor movements or repetitive attempts into single sentences. The summary must be brief and easy to read.
+                            Track Failures: Explicitly state which actions did not work or were ineffective so the player knows what to avoid.
+                            Highlight Discoveries: Emphasize interesting results, such as items acquired, lore revealed, or significant environmental changes.
+                            Plain Text Only: Provide the summary in raw text. Do not use Markdown, bolding, or headers.
+                            No Meta-Commentary: Do not include "Here is the summary" or any introductory filler. Start the summary immediately.
+                    '''
+        response = call_llm(f"CHRONICLE: {history} NEW EVENTS: {str(history)}", SYS_PROMPT, 42)
+        return response
+    def _build_prompt(self, observation: str, history: list, step: int, goal: str) -> str:
         """
         Build the prompt for the LLM.
         TODO: Implement this to create effective prompts
         """
         prmt = []
+        prmt.append(f"[CURRENT LOCATION]: {self.location}")
         kb="\n".join(self.knowledge_base)
         if kb:
+            prmt.append(f"\n[KNOWLEDGE BASE]:\n{kb}")
         chars_to_include = [100,50, 30 , 20]
         if self.history:
             if SHORT_TERM_MEM > len(chars_to_include)-1:
                 chars_to_include= [None]*(SHORT_TERM_MEM-len(chars_to_include)+1) + chars_to_include
+            past_history=[element for element in self.history if (element["tool"] == "Moved" or element["location"]!=self.location)]
+            if len(past_history):
+                prmt.append("\n[RECENT ACTIONS]:")
+                for i, past in enumerate(past_history[-SHORT_TERM_MEM-1:]):
+                    rem = 6-step if step<6 else 0
+                    if past["tool"] == "Moved":
+                        prmt.append(f"- Moved from {past['from']} to {past['to']}")
+                        continue
+                    action=past["args"].get("action", "")
+                    if not action:
+                        action=past["tool"]
+                    res_preview = past["result"][:chars_to_include[SHORT_TERM_MEM-i-rem]].replace('\n', ' ')
+                    prmt.append(f"- {action} (Result: {res_preview}...)")
+        local_history = [
+            h for h in self.history
+            if h.get("location", None) is not None and h["location"] == self.location and h["tool"] == "play_action"
+        ]
+        if local_history:
+            if len(local_history)%5==0:
+                self.actions_resume[self.location]=self.resume_actions(local_history[-5:],self.actions_resume[self.location])
+                prmt.append("\n[RESUME OF ALL PREVIOUS ACTIONS DONE AT THIS LOCATION]:")
+                prmt.append(self.actions_resume[self.location])
+                print(col(self.actions_resume[self.location],'red'))
+            else:
+                local_history = local_history[-5:]
+                if self.actions_resume[self.location]:
+                    prmt.append(f"[RESUME OF ALL PREVIOUS ACTIONS DONE AT THIS LOCATION]: {self.actions_resume[self.location]}")
+                prmt.append("[LAST ACTIONS DONE AT THIS LOCATION]: ")
+                tried_actions = set()
+                for h in local_history:
+                    action = h["args"].get("action", "")
+                    if action not in tried_actions:
+                        res_preview = h["result"].replace('\n', ' ')
+                        prmt.append(f"  - {action} (Result: {res_preview}...)")
+                        tried_actions.add(action)
+                prmt.append("DO NOT repeat the above actions unless the environment has changed.")
+        prmt.append(f"\n[GOAL]: {goal} (If you want to change your general goal add [GOAL] to your answer)")
+        hints=[]
         if observation not in self.answers and observation.strip()!="":
+            hint = f"If you found something worth remembering, add it to the knowledge base with the add_knowledge tool, so you can use it later, don't add descriptions, don't hesitate to use it. If you found and object that you cant take, take it, dont add the previous location of the object to the memory"
+            hints.append(hint)
             self.answers.add(observation)
+        maxs=0
+        for element in self.history:
+            if element['tool']!='Moved' and element['location']!=self.location:
+                maxs=max(maxs,element['step'])
+        if step-maxs>=20:
+            hints.append("You have been in the same location a while, if you feel stagnated move around or use the map")
+        if hints:
+            prmt.append(f"\n[HINTS]: \n{"\n".join(hints)}")
         prmt.append("\nWhat do you do next?")
         dbg=col(f"\n[DEBUG] Prompt for step {step}:\n{"\n".join(prmt)}", "red")
         dbg= '_'*80 + dbg + '\n' + '_'*80
         #print(dbg)
+        prmt="\n".join(prmt)
+        return prmt
     def _parse_response(self, response: str) -> tuple[str, str, dict]:
         """
         thought = "No reasoning provided"
         tool_name = "play_action"
         tool_args = {"action": "look"}
+        goal=None
         lines = response.strip().split("\n")
         for line in lines:
             line_clean = line.strip()
             line_upper = line_clean.upper()
+            if line_upper.startswith("GOAL:"):
+                goal = line_clean.split(":", 1)[1].strip()
             if line_upper.startswith("THOUGHT:"):
                 thought = line_clean.split(":", 1)[1].strip()
                     else:
                         tool_args = {"action": "look"}
+        return thought, tool_name, tool_args, goal
     def _call_llm(self, prompt: str, system_prompt: str, seed: int) -> str:
         """
             "you are dead",
             "*** you have died ***",
         ]
+        chance_phrases = [
+            "another chance",
+            "second chance",
+            "another attempt"
+        ]
         text_lower = text.lower()
+        return any(phrase in text_lower for phrase in game_over_phrases) and not any(phrase in text_lower for phrase in chance_phrases)
 # =============================================================================
 # For local testing

mcp_server.py CHANGED Viewed

@@ -26,6 +26,7 @@ Then open the MCP Inspector in your browser to test the tools interactively.
 import sys
 import os
 # Add parent directory to path to import games module
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -75,32 +76,38 @@ class GameManager:
         return self.state.observation
     def step(self, action: str) -> str:
-            """Execute an action and return the result."""
-            if self.env is None:
-                self.initialize()
-            prev_location = self.current_location
-            self.state = self.env.step(action)
-            new_location = self.state.location
-            self.history.append((action, self.state.observation))
-            if prev_location not in self.explored_locations:
-                self.explored_locations[prev_location] = set()
-            if new_location not in self.explored_locations:
-                self.explored_locations[new_location] = set()
-            inverse_action="inverse of "+action
-            if prev_location != new_location and prev_location != "Unknown":
-                self.explored_locations[prev_location].add(f"{action} -> {new_location}")
-                self.explored_locations[new_location].add(f"{inverse_action} -> {prev_location}")
-            self.current_location = new_location
-            return self.state.observation
     def get_score(self) -> int:
         """Get current score."""
         return self.state.score if self.state else 0

 import sys
 import os
+from utils import graph_to_ascii
 # Add parent directory to path to import games module
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
         return self.state.observation
     def step(self, action: str) -> str:
+        """Execute an action and return the result."""
+        if self.env is None:
+            self.initialize()
+        prev_location = self.current_location
+        OPPOSITES = {
+            "north": "south", "south": "north", "east": "west", "west": "east",
+            "up": "down", "down": "up", "in": "out", "out": "in",
+            "enter": "exit", "exit": "enter", "ne": "sw", "sw": "ne", "nw": "se", "se": "nw"
+        }
+        self.state = self.env.step(action)
+        new_location = self.state.location
+        self.history.append((action, self.state.observation))
+        if prev_location not in self.explored_locations:
+            self.explored_locations[prev_location] = set()
+        if new_location not in self.explored_locations:
+            self.explored_locations[new_location] = set()
+        if prev_location != new_location and prev_location != "Unknown":
+            self.explored_locations[prev_location].add(f"{action} -> {new_location}")
+            inverse_action = OPPOSITES.get(action.lower(), f"return via {action}")
+            self.explored_locations[new_location].add(f"{inverse_action} -> {prev_location}")
+        self.current_location = new_location
+        return self.state.observation
     def get_score(self) -> int:
         """Get current score."""
         return self.state.score if self.state else 0

requirements.txt CHANGED Viewed

@@ -7,3 +7,5 @@
 # Add any additional packages your agent needs below:
 # numpy
 # requests

 # Add any additional packages your agent needs below:
 # numpy
 # requests
+termcolor
+transformers