text-adventure-agent

Runtime error

App Files Files Community

tlemagny commited on Feb 11

Commit

351074a

1 Parent(s): 7a36b3c

update

Browse files

Files changed (2) hide show

agent.py +312 -27
mcp_server.py +147 -60

agent.py CHANGED Viewed

@@ -82,7 +82,6 @@ def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300)
         max_tokens=max_tokens,
         seed=seed,
     )
     return response.choices[0].message.content
@@ -102,32 +101,55 @@ class RunResult:
 # System Prompt - Customize this for your agent
 # =============================================================================
-SYSTEM_PROMPT = """You are playing a classic text adventure game.
-GOAL: Explore the world, solve puzzles, and maximize your score.
-AVAILABLE TOOLS (use via MCP):
-- play_action: Execute a game command (north, take lamp, open mailbox, etc.)
-- memory: Get current game state and history (if implemented)
-- inventory: Check what you're carrying (if implemented)
 VALID GAME COMMANDS for play_action:
-- Movement: north, south, east, west, up, down, enter, exit
-- Objects: take <item>, drop <item>, open <thing>, close <thing>, examine <thing>
-- Other: look, inventory, read <thing>, turn on lamp
 RESPOND IN THIS EXACT FORMAT (no markdown):
-THOUGHT: <your reasoning about what to do next>
 TOOL: <tool_name>
-ARGS: <JSON arguments, e.g., {"action": "look"}>
-Example:
-THOUGHT: I should look around to see where I am.
 TOOL: play_action
-ARGS: {"action": "look"}
 """
 # =============================================================================
 # Student Agent - IMPLEMENT THIS CLASS
 # =============================================================================
@@ -147,9 +169,13 @@ class StudentAgent:
     def __init__(self):
         """Initialize your agent here."""
         # TODO: Initialize any state tracking you need
-        # self.history = []
-        # self.visited_locations = set()
-        pass
     async def run(
         self,
@@ -204,16 +230,164 @@ class StudentAgent:
         # TODO: Your implementation here
         # ...
         return RunResult(
-            final_score=final_score,
-            max_score=350,  # Zork1 max score, adjust if needed
             moves=moves,
             locations_visited=locations_visited,
-            game_completed=False,
             history=history,
         )
     def _build_prompt(self, observation: str, history: list) -> str:
         """
         Build the prompt for the LLM.
@@ -221,7 +395,44 @@ class StudentAgent:
         TODO: Implement this to create effective prompts
         """
         # TODO: Combine system prompt, history, and current observation
-        pass
     def _parse_response(self, response: str) -> tuple[str, str, dict]:
         """
@@ -233,10 +444,38 @@ class StudentAgent:
             Tuple of (thought, tool_name, args_dict)
         """
         # TODO: Parse the response format:
-        # THOUGHT: ...
-        # TOOL: ...
-        # ARGS: {...}
-        pass
     def _call_llm(self, prompt: str, system_prompt: str, seed: int) -> str:
         """
@@ -245,6 +484,52 @@ class StudentAgent:
         This is a convenience wrapper - you can also use call_llm() directly.
         """
         return call_llm(prompt, system_prompt, seed)
 # =============================================================================

         max_tokens=max_tokens,
         seed=seed,
     )
     return response.choices[0].message.content
 # System Prompt - Customize this for your agent
 # =============================================================================
+SYSTEM_PROMPT = """You are an expert text adventure game player. Your goal is to explore, collect treasures, and maximize your score.
+AVAILABLE TOOLS (use these via MCP):
+1. play_action - Execute game commands (north, take lamp, open mailbox, etc.)
+2. memory - Get current game state, score, and recent history
+3. get_map - See explored locations and connections
+4. inventory - Check what you're carrying
 VALID GAME COMMANDS for play_action:
+- Movement: north, south, east, west, up, down, enter, exit, northeast, northwest, southeast, southwest
+- Objects: take <item>, drop <item>, open <thing>, close <thing>, examine <thing>, put <item> in <container>, pull <object>, push <object>
+- Light: turn on lamp, turn off lamp
+- Combat: attack/hit <enemy> with <weapon> (swords, axes, etc.)
+- Other: inventory, look, read <thing>, wait, ask <character> about <topic>, give <item> to <character>
+FORBIDDEN (will NOT work): check, inspect, search, grab, use, help
 RESPOND IN THIS EXACT FORMAT (no markdown):
+THOUGHT: <brief reasoning about what to do next>
 TOOL: <tool_name>
+ARGS: <JSON arguments>
+Examples:
+THOUGHT: Old stone fountain with big bowl part. It might contain something useful. I should check it out.
 TOOL: play_action
+ARGS: {"action": "examine bowl"}
+THOUGHT: It seems to be a slot where I can put things.
+TOOL: play_action
+ARGS: {"action": "put coin in slot"}
+THOUGHT: In the bowl, there is a coin. I should take it.
+TOOL: play_action
+ARGS: {"action": "take coin"}
+STRATEGY:
+1. Start by looking around and checking memory
+2. Explore systematically - try all directions
+3. Examine everything you find for clues and items. When examining an item there might be other items hidden inside or new actions available.
+4. Pick up all useful items (lamp, sword, pole, etc.) with "take".
+5. Interact with objects in the environment and in your inventory (pull, put, push, etc.)
+6. Use get_map to avoid getting lost
+7. Turn on lamp before dark areas!
+DO NOT repeat the same action multiple times in a row. If you find yourself stuck, try a different action or explore a new area.
 """
 # =============================================================================
 # Student Agent - IMPLEMENT THIS CLASS
 # =============================================================================
     def __init__(self):
         """Initialize your agent here."""
         # TODO: Initialize any state tracking you need
+        self.history = []
+        self.visited_locations = set()
+        self.recent_actions = []
+        self.score = 0
+        self.location_actions = {}
+        self.score_actions = []
+        self.stuck_counter = 0
     async def run(
         self,
         # TODO: Your implementation here
         # ...
+        # Get list of available tools
+        tools = await client.list_tools()
+        tool_names = [t.name for t in tools]
+        # Get initial observation
+        result = await client.call_tool("play_action", {"action": "look"})
+        observation = self._extract_result(result)
+        # Track initial location
+        location = observation.split("\n")[0] if observation else "Unknown"
+        locations_visited.add(location)
+        if verbose:
+            print(f"\n{observation}")
+        # Main ReAct loop
+        for step in range(1, max_steps + 1):
+            # Build prompt with context
+            prompt = self._build_prompt(observation, self.history)
+            # Call LLM for reasoning (use step-based seed for variety)
+            response = call_llm(prompt, SYSTEM_PROMPT, seed + step)
+            # Parse the response
+            thought, tool_name, tool_args = self._parse_response(response)
+            if verbose:
+                print(f"\n--- Step {step} ---")
+                print(f"[THOUGHT] {thought}")
+                print(f"[TOOL] {tool_name}({tool_args})")
+            # Validate and fix common issues
+            tool_name, tool_args = self._validate_tool_call(tool_name, tool_args, tool_names)
+            # Loop detection
+            if tool_name == "play_action":
+                action = tool_args.get("action", "look")
+                self.recent_actions.append(action)
+                if len(self.recent_actions) > 5:
+                    self.recent_actions = self.recent_actions[-5:]
+                # Detect loops - if same action 3 times, force "look"
+                if len(self.recent_actions) >= 2 and len(set(self.recent_actions[-2:])) == 1:
+                    if verbose:
+                        print(f"[WARNING] Loop detected - forcing 'look'")
+                    tool_args = {"action": "look"}
+                    self.recent_actions.append("look")
+                moves += 1
+            # Execute the tool
+            try:
+                result = await client.call_tool(tool_name, tool_args)
+                observation = self._extract_result(result)
+                if verbose:
+                    print(f"[RESULT] {observation[:200]}...")
+            except Exception as e:
+                observation = f"Error: {e}"
+                if verbose:
+                    print(f"[ERROR] {e}")
+            # Track location
+            location = self._get_location(observation)
+            locations_visited.add(location)
+            if location not in self.location_actions:
+                self.location_actions[location] = set()
+            if tool_name == "play_action":
+                self.location_actions[location].add(tool_args.get("action", "look"))
+            observations_lines = observation.splitlines()
+            # Update history
+            self.history.append({
+                "step": step,
+                "location": location,
+                "thought": thought,
+                "tool": tool_name,
+                "args": tool_args,
+                "result": '\n'.join(observations_lines[1:])[:300]
+            })
+            if len(self.history) > 10:
+                self.history = self.history[-10:]
+            current_score = self.score
+            # Track score from observation
+            self._update_score(observation)
+            if self.score > current_score:
+                self.stuck_counter = 0
+                if verbose:
+                    print(f"[SCORE UPDATE] Score increased to {self.score}!")
+                self.score_actions.append((location, tool_args.get("action", "look"), '\n'.join(observations_lines[1:])[:300]))
+                self.score_actions = self.score_actions[-5:]  # Keep last 5 score-increasing actions
+            else:
+                self.stuck_counter += 1
+                if self.stuck_counter >= 10:
+                    if verbose:
+                        print(f"[WARNING] No score increase for {self.stuck_counter} steps. Consider changing strategy.")
+            # Record in result history
+            history.append((thought, f"{tool_name}({tool_args})", observation[:100]))
+            # Check for game over
+            if self._is_game_over(observation):
+                if verbose:
+                    print("\n*** GAME OVER ***")
+                break
         return RunResult(
+            final_score=self.score,
+            max_score=350,
             moves=moves,
             locations_visited=locations_visited,
+            game_completed=self._is_game_over(observation),
             history=history,
         )
+    def _get_location(self, observation):
+        lines = observation.strip().split('\n')
+        if lines:
+            match = re.match(r'Current Location\s*:\s*(.*)', lines[0])
+            if match:
+                return match.group(1)
+            return lines[0]
+        return "Unknown"
+    def _update_score(self, text: str) -> None:
+        """Update score from game text."""
+        patterns = [
+            r'Score:\s*(\d+)',
+            r'score[:\s]+(\d+)',
+            r'\[Score:\s*(\d+)',
+        ]
+        for pattern in patterns:
+            match = re.search(pattern, text, re.IGNORECASE)
+            if match:
+                self.score = max(self.score, int(match.group(1)))
+    def _is_game_over(self, text: str) -> bool:
+        """Check if the game is over."""
+        game_over_phrases = [
+            "game over",
+            "you have died",
+            "you are dead",
+            "*** you have died ***",
+        ]
+        text_lower = text.lower()
+        return any(phrase in text_lower for phrase in game_over_phrases)
+    def _extract_result(self, result) -> str:
+        """Extract text from MCP tool result."""
+        if hasattr(result, 'content') and result.content:
+            return result.content[0].text
+        if isinstance(result, list) and result:
+            return result[0].text if hasattr(result[0], 'text') else str(result[0])
+        return str(result)
     def _build_prompt(self, observation: str, history: list) -> str:
         """
         Build the prompt for the LLM.
         TODO: Implement this to create effective prompts
         """
         # TODO: Combine system prompt, history, and current observation
+        parts = []
+        parts.append(f"Current Score: {self.score}")
+        parts.append(f"Locations Visited: {len(self.visited_locations)}")
+        parts.append(f"Current Location: {self._get_location(observation)}")
+        # Recent history
+        if self.history:
+            parts.append("\nRecent actions:")
+            for entry in self.history[-3:]:
+                action = entry.get("args", {}).get("action", entry["tool"])
+                result_short = entry["result"][:100] + "..." if len(entry["result"]) > 100 else entry["result"]
+                parts.append(f"  > {action} -> {result_short}")
+        if self.location_actions.get(self._get_location(observation)):
+            parts.append(f"\nLast actions taken at this location: {', '.join(self.location_actions[self._get_location(observation)])}")
+            if action in self.location_actions[self._get_location(observation)]:
+                parts.append(f"\n[WARNING: You've already tried '{action}' here. Consider a different action.]")
+        if self.score_actions:
+            parts.append(f"\nRecent score-increasing actions:")
+            for loc, action, result in self.score_actions:
+                result_short = result[:100] + "..." if len(result) > 100 else result
+                parts.append(f"  > At {loc}, action '{action}' led to: {result_short}")
+            # Warn about repeated actions
+            if self.recent_actions and len(set(self.recent_actions[-3:])) == 1:
+                parts.append(f"\n[WARNING: You've been doing '{self.recent_actions[-1]}' repeatedly. TRY SOMETHING DIFFERENT!]")
+        observations = observation.splitlines()
+        parts.append(observations[0])  # Location line
+        parts.append(f"\nCurrent situation:\n{'\n'.join(observations[1:])}")
+        if self.stuck_counter >= 10:
+            parts.append(f"\n[WARNING: No score increase for {self.stuck_counter} steps. Consider changing strategy. Interact with different objects, explore new areas.]")
+            self.stuck_counter = 0  # Reset counter after warning
+        parts.append("\nWhat do you do next?")
+        return "\n".join(parts)
     def _parse_response(self, response: str) -> tuple[str, str, dict]:
         """
             Tuple of (thought, tool_name, args_dict)
         """
         # TODO: Parse the response format:
+        thought = "No reasoning provided"
+        tool_name = "play_action"
+        tool_args = {"action": "look"}
+        lines = response.strip().split("\n")
+        for line in lines:
+            line_clean = line.strip()
+            line_upper = line_clean.upper()
+            if line_upper.startswith("THOUGHT:"):
+                thought = line_clean.split(":", 1)[1].strip()
+            elif line_upper.startswith("TOOL:"):
+                raw_tool = line_clean.split(":", 1)[1].strip().lower()
+                raw_tool = raw_tool.replace("**", "").replace("*", "").replace("`", "")
+                raw_tool = raw_tool.split()[0] if raw_tool else "play_action"
+                tool_name = raw_tool
+            elif line_upper.startswith("ARGS:"):
+                args_part = line_clean.split(":", 1)[1].strip()
+                try:
+                    args_part = args_part.replace("'", '"')
+                    tool_args = json.loads(args_part)
+                except json.JSONDecodeError:
+                    match = re.search(r'"action"\s*:\s*"([^"]+)"', args_part)
+                    if match:
+                        tool_args = {"action": match.group(1)}
+                    else:
+                        tool_args = {"action": "look"}
+        return thought, tool_name, tool_args
     def _call_llm(self, prompt: str, system_prompt: str, seed: int) -> str:
         """
         This is a convenience wrapper - you can also use call_llm() directly.
         """
         return call_llm(prompt, system_prompt, seed)
+    def _validate_tool_call(self, tool_name: str, tool_args: dict, valid_tools: list[str]) -> tuple[str, dict]:
+        """Validate and fix common tool call issues."""
+        # Fix tool name
+        if tool_name not in valid_tools:
+            if tool_name in ["action", "do", "command"]:
+                tool_name = "play_action"
+            elif tool_name in ["map", "location"]:
+                tool_name = "get_map"
+            elif tool_name in ["mem", "state", "status"]:
+                tool_name = "memory"
+            elif tool_name in ["inv", "items"]:
+                tool_name = "inventory"
+            else:
+                tool_name = "play_action"  # Default to play_action if unrecognized
+        # Fix action verbs
+        if tool_name == "play_action":
+            action = tool_args.get("action", "look")
+            invalid_verb_map = {
+                "check": "examine",
+                "inspect": "examine",
+                "search": "look",
+                "grab": "take",
+                "pick": "take",
+                "use": "examine",
+                "investigate": "examine",
+            }
+            words = action.lower().split()
+            if words and words[0] in invalid_verb_map:
+                words[0] = invalid_verb_map[words[0]]
+                action = " ".join(words)
+            if words and words[0] in ["go", "move","enter"] and len(words) > 1:
+                action = words[1]
+            action = action.lower().strip()
+            action = action.replace("**", "").replace("*", "").replace("`", "")
+            action = " ".join(action.split())
+            tool_args["action"] = action
+        return tool_name, tool_args
 # =============================================================================

mcp_server.py CHANGED Viewed

@@ -60,9 +60,11 @@ class GameManager:
         self.state = None
         self.game_name: str = ""
         # TODO: Add more state tracking
-        # self.history: list[tuple[str, str]] = []
-        # self.explored_locations: dict[str, set[str]] = {}
-        # self.current_location: str = ""
     def initialize(self, game: str = "zork1"):
         """Initialize or reset the game."""
@@ -70,8 +72,22 @@ class GameManager:
         self.env = TextAdventureEnv(game)
         self.state = self.env.reset()
         # TODO: Reset your state tracking here
         return self.state.observation
     def step(self, action: str) -> str:
         """Execute an action and return the result."""
         if self.env is None:
@@ -80,8 +96,45 @@ class GameManager:
         self.state = self.env.step(action)
         # TODO: Update your state tracking here
-        # self.history.append((action, self.state.observation))
-        # Update location tracking, etc.
         return self.state.observation
@@ -133,71 +186,105 @@ def play_action(action: str) -> str:
     game = get_game()
     # TODO: You might want to add action validation here
     # TODO: You might want to include score changes in the response
-    result = game.step(action)
     # Optional: Append score info
-    # result += f"\n[Score: {game.get_score()} | Moves: {game.get_moves()}]"
     return result
 # TODO: Implement additional tools to help your agent
-# @mcp.tool()
-# def memory() -> str:
-#     """
-#     Get the current game state summary.
-#
-#     Returns:
-#         A summary including current location, score, moves, and recent history
-#     """
-#     game = get_game()
-#     # TODO: Return useful state information
-#     pass
-# @mcp.tool()
-# def inventory() -> str:
-#     """
-#     Check what the player is carrying.
-#
-#     Returns:
-#         List of items in the player's inventory
-#     """
-#     game = get_game()
-#     result = game.step("inventory")
-#     return result
-# @mcp.tool()
-# def get_map() -> str:
-#     """
-#     Get a map of explored locations.
-#
-#     Returns:
-#         A text representation of explored locations and connections
-#     """
-#     game = get_game()
-#     # TODO: Return map of explored locations
-#     pass
-# @mcp.tool()
-# def get_valid_actions() -> str:
-#     """
-#     Get a list of likely valid actions from the current location.
-#
-#     Returns:
-#         List of actions that might work here
-#     """
-#     # This is a hint: Jericho provides get_valid_actions()
-#     game = get_game()
-#     if game.env and game.env.env:
-#         valid = game.env.env.get_valid_actions()
-#         return "Valid actions: " + ", ".join(valid[:20])
-#     return "Could not determine valid actions"
 # =============================================================================

         self.state = None
         self.game_name: str = ""
         # TODO: Add more state tracking
+        self.history: list[tuple[str, str]] = []
+        self.explored_locations: dict[str, set[str]] = {}
+        self.current_location: str = ""
+        self.map: dict[str, dict[str, str]] = {}
+        self.inventory: list[str] = []
     def initialize(self, game: str = "zork1"):
         """Initialize or reset the game."""
         self.env = TextAdventureEnv(game)
         self.state = self.env.reset()
         # TODO: Reset your state tracking here
+        self.history = []
+        self.explored_locations = {}
+        self.map = {}
+        self.current_location = self.get_current_location()
+        self.inventory = self.get_inventory()
         return self.state.observation
+    def get_current_location(self) -> str:
+        """Get the current location name."""
+        return self.env.env.get_player_location().name
+    def get_inventory(self) -> str:
+        """Get the current inventory as a string."""
+        list_objects = self.env.env.get_inventory()
+        return [obj.name for obj in list_objects]
     def step(self, action: str) -> str:
         """Execute an action and return the result."""
         if self.env is None:
         self.state = self.env.step(action)
         # TODO: Update your state tracking here
+        self.history.append((action, self.state.observation))
+        if len(self.history) > 50:
+            self.history = self.history[-50:]
+        action_inverse = {
+            "north": "south",
+            "south": "north",
+            "east": "west",
+            "west": "east",
+            "up": "down",
+            "down": "up",
+            "enter": "exit",
+            "exit": "enter",
+            "northeast": "southwest",
+            "northwest": "southeast",
+            "southeast": "northwest",
+            "southwest": "northeast",
+        }
+        if action in ["north", "south", "east", "west", "up", "down",
+                      "enter", "exit"]:
+            if self.current_location not in self.explored_locations:
+                self.explored_locations[self.current_location] = set()
+            new_location = self.get_current_location()
+            if new_location != self.current_location:
+                self.explored_locations[self.current_location].add(f"{action} -> {new_location}")
+                if new_location not in self.explored_locations:
+                    self.explored_locations[new_location] = set()
+                self.explored_locations[new_location].add(f"{action_inverse.get(action,'Unknown')} -> {self.current_location}")
+            else :
+                self.explored_locations[self.current_location].add(f"{action} -> 'No movement'")
+        self.current_location = self.get_current_location()
+        if "take" in action or "drop" in action:
+            self.inventory = self.get_inventory()
         return self.state.observation
     game = get_game()
     # TODO: You might want to add action validation here
+    ''' valid_action = False
+    if action in ["north", "south", "east", "west", "up", "down",
+                    "enter", "exit", "n", "s", "e", "w", "u", "d", "look", "inventory", "memory", "get_map"]:
+            valid_action = True
+    elif action.startswith(("take ", "drop ", "open ", "examine ", "read ", "turn on ", "turn off ")):
+            valid_action = True
+    if not valid_action:
+        return f"Action '{action}' may not be valid here. Changed actions may lead to better results."'''
     # TODO: You might want to include score changes in the response
+    result = f"Current Location: {game.current_location}\n"
+    #result += f"Walkthrough of action: {game.env.env.get_walkthrough()}\n\n"
+    result += game.step(action)
+     # Add score info
+    score_info = f"\n\n[Score: {game.state.score} | Moves: {game.state.moves}]"
+    if game.state.reward > 0:
+        score_info = f"\n\n+{game.state.reward} points! (Total: {game.state.score})"
+    done_info = ""
+    if game.state.done:
+        done_info = "\n\nGAME OVER"
     # Optional: Append score info
+    result += score_info + done_info
     return result
 # TODO: Implement additional tools to help your agent
+@mcp.tool()
+def memory() -> str:
+    """
+    Get the current game state summary.
+    Returns:
+        A summary including current location, score, moves, and recent history
+    """
+    game = get_game()
+    # TODO: Return useful state information
+    location = game.current_location
+    score = game.get_score()
+    moves = game.get_moves()
+    recent_history = "\n".join([f"> {a} -> <{r}>" for a, r in game.history[-10:]])
+    return (f"Location: {location}\n"
+            f"Score: {score}\n"
+            f"Moves: {moves}\n"
+            f"10 Last Actions:\n{recent_history}")
+@mcp.tool()
+def inventory() -> str:
+    """
+    Check what the player is carrying.
+    Returns:
+        List of items in the player's inventory
+    """
+    game = get_game()
+    result = "Inventory: " + ", ".join(game.inventory) if game.inventory else "Inventory is empty."
+    return result
+@mcp.tool()
+def get_map() -> str:
+    """
+    Get a map of explored locations.
+    Returns:
+        A text representation of explored locations and connections
+    """
+    game = get_game()
+    # TODO: Return map of explored locations
+    map_str = "Explored Locations:\n"
+    for loc, exits in game.explored_locations.items():
+        map_str += f"-{loc}:\n"
+        for exit_info in exits:
+            map_str += f"  - {exit_info}\n"
+    return map_str if game.explored_locations else "No locations explored yet."
+    pass
+@mcp.tool()
+def get_valid_actions() -> str:
+    """
+    Get a list of likely valid actions from the current location.
+    Returns:
+        List of actions that might work here
+    """
+    # This is a hint: Jericho provides get_valid_actions()
+    game = get_game()
+    if game.env and game.env.env:
+        valid = game.get_valid_actions()
+        return "Valid actions: " + ", ".join(valid[:20])
+    return "Could not determine valid actions"
 # =============================================================================