text-adventure-agent

Sleeping

App Files Files Community

3v324v23 commited on Feb 23

Commit

52754aa

1 Parent(s): 615a63b

Implement agent and MCP server

Browse files

Files changed (2) hide show

agent.py +254 -72
mcp_server.py +143 -67

agent.py CHANGED Viewed

@@ -26,7 +26,9 @@ Tips:
 import json
 import os
 import re
 from dataclasses import dataclass, field
 from typing import Optional
 from dotenv import load_dotenv
@@ -159,23 +161,18 @@ ARGS: {"action": "look"}
 # =============================================================================
 class StudentAgent:
-    """
-    Your ReAct agent implementation.
-    TODO:
-    1. Implement the run() method with the ReAct loop
-    2. Parse LLM responses to extract tool calls
-    3. Track state and avoid loops
-    Use the provided call_llm() function to interact with the LLM.
-    """
     def __init__(self):
-        """Initialize your agent here."""
-        # TODO: Initialize any state tracking you need
-        # self.history = []
-        # self.visited_locations = set()
-        pass
     async def run(
         self,
@@ -185,84 +182,269 @@ class StudentAgent:
         seed: int,
         verbose: bool = False,
     ) -> RunResult:
-        """
-        Run the agent for a game session.
-        Args:
-            client: FastMCP Client connected to your MCP server
-            game: Name of the game being played (e.g., "zork1")
-            max_steps: Maximum number of steps to take
-            seed: Random seed for reproducibility (use for LLM calls)
-            verbose: Whether to print detailed output
-        Returns:
-            RunResult with final score and statistics
-        """
-        # TODO: Implement your ReAct loop here
-        #
-        # Basic structure:
-        # 1. Get initial observation (call play_action with "look")
-        # 2. Loop for max_steps:
-        #    a. Build prompt with current observation and history
-        #    b. Call LLM to get thought and action
-        #    c. Parse the response to extract tool and args
-        #    d. Call the tool via client.call_tool(tool_name, args)
-        #    e. Update history and state
-        #    f. Check for game over
-        # 3. Return RunResult with final statistics
-        # Example of calling a tool:
-        # result = await client.call_tool("play_action", {"action": "look"})
-        # observation = result[0].text if result else "No response"
-        # Example of calling the LLM:
-        # response = call_llm(
-        #     prompt="Current observation: " + observation,
-        #     system_prompt=SYSTEM_PROMPT,
-        #     seed=seed,
-        # )
-        # Placeholder implementation - replace with your code
-        locations_visited = set()
-        history = []
-        final_score = 0
-        moves = 0
-        # TODO: Your implementation here
-        # ...
         return RunResult(
-            final_score=final_score,
-            max_score=350,  # Zork1 max score, adjust if needed
-            moves=moves,
-            locations_visited=locations_visited,
-            game_completed=False,
-            history=history,
         )
     def _build_prompt(self, observation: str, history: list) -> str:
         """
         Build the prompt for the LLM.
-        TODO: Implement this to create effective prompts
         """
-        # TODO: Combine system prompt, history, and current observation
-        pass
     def _parse_response(self, response: str) -> tuple[str, str, dict]:
         """
         Parse LLM response to extract thought, tool name, and arguments.
-        TODO: Implement robust parsing
         Returns:
             Tuple of (thought, tool_name, args_dict)
         """
-        # TODO: Parse the response format:
-        # THOUGHT: ...
-        # TOOL: ...
-        # ARGS: {...}
-        pass
     def _call_llm(self, prompt: str, system_prompt: str, seed: int) -> str:
         """

 import json
 import os
 import re
+import random
 from dataclasses import dataclass, field
+from collections import defaultdict
 from typing import Optional
 from dotenv import load_dotenv
 # =============================================================================
 class StudentAgent:
+    """A lean ReAct agent with a dash of personal taste."""
     def __init__(self):
+        """Initialize run-local state."""
+        self.history: list[tuple[str, str, str]] = []
+        self.visited_locations: set[str] = set()
+        self.actions_tried = defaultdict(lambda: defaultdict(int))  # location -> action -> count
+        self.current_score = 0
+        self.max_score = 350
+        self.moves = 0
+        self.game = ""
+        self.last_location = "Unknown"
     async def run(
         self,
         seed: int,
         verbose: bool = False,
     ) -> RunResult:
+        """Run the ReAct loop."""
+        random.seed(seed)
+        self.history = []
+        self.visited_locations = set()
+        self.actions_tried = defaultdict(lambda: defaultdict(int))
+        self.current_score = 0
+        self.max_score = 350
+        self.moves = 0
+        self.game = game
+        self.last_location = "Unknown"
+        observation = await self._safe_tool(client, "play_action", {"action": "look"})
+        prev_moves_mark = self.moves
+        self._ingest_observation(observation)
+        if self.moves == prev_moves_mark:
+            self.moves += 1
+        mem_text = await self._safe_tool(client, "memory", {"limit": 3})
+        self.max_score = self._parse_max_score(mem_text) or self.max_score
+        self.current_score, self.moves = self._parse_score_moves(
+            mem_text, self.current_score, self.moves
+        )
+        for step in range(max_steps):
+            prompt = self._build_prompt(observation, self.history)
+            llm_response = self._call_llm(prompt, SYSTEM_PROMPT, seed)
+            thought, tool, args = self._parse_response(llm_response)
+            allowed_tools = {"play_action", "memory", "inventory", "get_map", "get_valid_actions"}
+            if tool not in allowed_tools:
+                tool, args = "play_action", {"action": "look"}
+            prev_moves = self.moves
+            if tool == "play_action":
+                action = (args.get("action") or "").strip()
+                if not action:
+                    action = "look"
+                location = self.last_location
+                if self._should_switch(location, action):
+                    action = self._fallback_action(self.actions_tried[location])
+                self.actions_tried[location][action] += 1
+                observation = await self._safe_tool(client, "play_action", {"action": action})
+            else:
+                observation = await self._safe_tool(client, tool, args)
+            self._ingest_observation(observation)
+            if tool == "play_action" and self.moves == prev_moves:
+                self.moves += 1
+            self.history.append((thought, f"{tool} {json.dumps(args)}", observation))
+            if verbose:
+                print(f"\n> {tool} {args}\n{observation}")
+            if self._is_terminal(observation):
+                break
+            if self.moves >= max_steps:
+                break
+        clean_locations = {loc for loc in self.visited_locations if loc != "Unknown"}
+        game_completed = self.current_score >= self.max_score or self._is_win(observation)
         return RunResult(
+            final_score=self.current_score,
+            max_score=self.max_score,
+            moves=self.moves,
+            locations_visited=clean_locations,
+            game_completed=game_completed,
+            history=self.history,
         )
     def _build_prompt(self, observation: str, history: list) -> str:
         """
         Build the prompt for the LLM.
+        Mix a little personality with concise context so the model
+        keeps commands short and avoids spinning in circles.
         """
+        recent = history[-5:]
+        lines = [
+            f"Game: {self.game}",
+            "You are me playing a parser game. Be decisive, keep commands under four words.",
+            "If something failed twice in this room, try a different verb or direction.",
+            "",
+            "Current observation:",
+            observation.strip(),
+            "",
+            "Recent steps:",
+        ]
+        if not recent:
+            lines.append("- none yet")
+        else:
+            for thought, action, obs in recent:
+                snippet = obs.replace("\n", " ")
+                if len(snippet) > 120:
+                    snippet = snippet[:117] + "..."
+                lines.append(f"- {action}: {snippet}")
+        lines.append("\nNext command?")
+        return "\n".join(lines)
     def _parse_response(self, response: str) -> tuple[str, str, dict]:
         """
         Parse LLM response to extract thought, tool name, and arguments.
         Returns:
             Tuple of (thought, tool_name, args_dict)
         """
+        thought = ""
+        tool = "play_action"
+        args: dict = {"action": "look"}
+        if not response:
+            return thought, tool, args
+        cleaned = response.strip().replace("```", "")
+        thought_match = re.search(r"THOUGHT:\s*(.*)", cleaned, re.IGNORECASE)
+        if thought_match:
+            thought = thought_match.group(1).strip()
+        tool_match = re.search(r"TOOL:\s*([A-Za-z0-9_]+)", cleaned, re.IGNORECASE)
+        if tool_match:
+            tool = tool_match.group(1).strip()
+        args_match = re.search(r"ARGS:\s*(\{[\s\S]*\})", cleaned, re.IGNORECASE)
+        if args_match:
+            raw_args = args_match.group(1)
+            raw_args = raw_args[: raw_args.rfind("}") + 1] if "}" in raw_args else raw_args
+            try:
+                args = json.loads(raw_args)
+            except Exception:
+                try:
+                    args = json.loads(raw_args.replace("'", "\""))
+                except Exception:
+                    args = {"action": raw_args.strip("{} ").strip()}
+        if tool == "play_action" and "action" not in args:
+            args["action"] = "look"
+        return thought, tool, args
+    async def _safe_tool(self, client, tool: str, args: dict) -> str:
+        """Call a tool and always return a string."""
+        try:
+            result = await client.call_tool(tool, args)
+        except Exception as exc:
+            return f"[tool-error:{tool}] {exc}"
+        return self._extract_text(result)
+    def _extract_text(self, result) -> str:
+        """Normalize FastMCP tool responses into plain text."""
+        if result is None:
+            return ""
+        if isinstance(result, str):
+            return result
+        if isinstance(result, list):
+            texts = [self._extract_text(r) for r in result]
+            return "\n".join(t for t in texts if t)
+        if hasattr(result, "text"):
+            try:
+                return result.text
+            except Exception:
+                pass
+        if hasattr(result, "content"):
+            content = getattr(result, "content")
+            if isinstance(content, list):
+                texts = [self._extract_text(c) for c in content]
+                return "\n".join(t for t in texts if t)
+            if isinstance(content, str):
+                return content
+        if isinstance(result, dict):
+            for key in ("text", "content", "data", "result", "output"):
+                if key in result:
+                    return self._extract_text(result[key])
+        return str(result)
+    def _ingest_observation(self, observation: str):
+        """Update cached score, move count, and location tracking."""
+        self.current_score, self.moves = self._parse_score_moves(
+            observation, self.current_score, self.moves
+        )
+        location = self._extract_location(observation)
+        self.last_location = location
+        if location and location != "Unknown":
+            self.visited_locations.add(location)
+    def _parse_score_moves(
+        self, text: str, current_score: int, current_moves: int
+    ) -> tuple[int, int]:
+        if not text:
+            return current_score, current_moves
+        score_match = re.search(r"Score:\s*(\d+)", text)
+        move_match = re.search(r"Moves?:\s*(\d+)", text)
+        if score_match:
+            current_score = int(score_match.group(1))
+        if move_match:
+            current_moves = int(move_match.group(1))
+        return current_score, current_moves
+    def _parse_max_score(self, text: str) -> Optional[int]:
+        if not text:
+            return None
+        max_match = re.search(r"Score:\s*\d+\s*/\s*(\d+)", text)
+        return int(max_match.group(1)) if max_match else None
+    def _extract_location(self, observation: str) -> str:
+        if not observation:
+            return "Unknown"
+        match = re.search(r"Location:\s*([^\]\n]+)", observation)
+        if match:
+            return match.group(1).strip()
+        first_line = observation.strip().splitlines()[0].strip()
+        if len(first_line) <= 80:
+            return first_line or "Unknown"
+        return "Unknown"
+    def _should_switch(self, location: str, action: str) -> bool:
+        tried_here = self.actions_tried[location]
+        return tried_here.get(action, 0) >= 2
+    def _fallback_action(self, tried_actions: dict[str, int]) -> str:
+        palette = [
+            "look",
+            "inventory",
+            "north",
+            "south",
+            "east",
+            "west",
+            "up",
+            "down",
+            "enter",
+            "exit",
+            "take all",
+            "open door",
+            "examine room",
+        ]
+        for candidate in palette:
+            if tried_actions.get(candidate, 0) == 0:
+                return candidate
+        return "look"
+    def _is_terminal(self, observation: str) -> bool:
+        if not observation:
+            return False
+        lower = observation.lower()
+        return any(
+            phrase in lower
+            for phrase in [
+                "you have died",
+                "you are dead",
+                "game over",
+                "you have won",
+                "congratulations",
+                "*** the end",
+            ]
+        )
+    def _is_win(self, observation: str) -> bool:
+        if not observation:
+            return False
+        lower = observation.lower()
+        return "you have won" in lower or "congratulations" in lower
     def _call_llm(self, prompt: str, system_prompt: str, seed: int) -> str:
         """

mcp_server.py CHANGED Viewed

@@ -26,9 +26,21 @@ Then open the MCP Inspector in your browser to test the tools interactively.
 import sys
 import os
-# Add parent directory to path to import games module
-sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from fastmcp import FastMCP
 from games.zork_env import TextAdventureEnv
@@ -59,17 +71,29 @@ class GameManager:
         self.env: TextAdventureEnv = None
         self.state = None
         self.game_name: str = ""
-        # TODO: Add more state tracking
-        # self.history: list[tuple[str, str]] = []
-        # self.explored_locations: dict[str, set[str]] = {}
-        # self.current_location: str = ""
     def initialize(self, game: str = "zork1"):
         """Initialize or reset the game."""
         self.game_name = game
         self.env = TextAdventureEnv(game)
         self.state = self.env.reset()
-        # TODO: Reset your state tracking here
         return self.state.observation
     def step(self, action: str) -> str:
@@ -77,11 +101,16 @@ class GameManager:
         if self.env is None:
             self.initialize()
         self.state = self.env.step(action)
-        # TODO: Update your state tracking here
-        # self.history.append((action, self.state.observation))
-        # Update location tracking, etc.
         return self.state.observation
@@ -92,6 +121,11 @@ class GameManager:
     def get_moves(self) -> int:
         """Get number of moves taken."""
         return self.state.moves if self.state else 0
 # Global game manager
@@ -135,69 +169,111 @@ def play_action(action: str) -> str:
     # TODO: You might want to add action validation here
     # TODO: You might want to include score changes in the response
     result = game.step(action)
     # Optional: Append score info
-    # result += f"\n[Score: {game.get_score()} | Moves: {game.get_moves()}]"
     return result
-# TODO: Implement additional tools to help your agent
-# @mcp.tool()
-# def memory() -> str:
-#     """
-#     Get the current game state summary.
-#
-#     Returns:
-#         A summary including current location, score, moves, and recent history
-#     """
-#     game = get_game()
-#     # TODO: Return useful state information
-#     pass
-# @mcp.tool()
-# def inventory() -> str:
-#     """
-#     Check what the player is carrying.
-#
-#     Returns:
-#         List of items in the player's inventory
-#     """
-#     game = get_game()
-#     result = game.step("inventory")
-#     return result
-# @mcp.tool()
-# def get_map() -> str:
-#     """
-#     Get a map of explored locations.
-#
-#     Returns:
-#         A text representation of explored locations and connections
-#     """
-#     game = get_game()
-#     # TODO: Return map of explored locations
-#     pass
-# @mcp.tool()
-# def get_valid_actions() -> str:
-#     """
-#     Get a list of likely valid actions from the current location.
-#
-#     Returns:
-#         List of actions that might work here
-#     """
-#     # This is a hint: Jericho provides get_valid_actions()
-#     game = get_game()
-#     if game.env and game.env.env:
-#         valid = game.env.env.get_valid_actions()
-#         return "Valid actions: " + ", ".join(valid[:20])
-#     return "Could not determine valid actions"
 # =============================================================================

 import sys
 import os
+from collections import defaultdict
+from pathlib import Path
+# Add a path that actually contains the games package (works for template and sibling-repo layout)
+_here = Path(__file__).resolve().parent
+_candidate_roots = [
+    _here.parent,                  # .../zork/
+    _here.parent / "Agentic-zork", # sibling repo with games/
+]
+for _root in _candidate_roots:
+    if (_root / "games").exists():
+        sys.path.insert(0, str(_root))
+        break
+else:
+    sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from fastmcp import FastMCP
 from games.zork_env import TextAdventureEnv
         self.env: TextAdventureEnv = None
         self.state = None
         self.game_name: str = ""
+        self.history: list[tuple[str, str, int, int]] = []  # action, observation, score, moves
+        self.visited_locations: set[str] = set()
+        self.transitions: dict[str, dict[str, str]] = defaultdict(dict)
+        self.current_location: str = "Unknown"
+    def _loc_name(self, state) -> str:
+        """Best-effort location string from env or the observation header."""
+        if state and getattr(state, "location", "Unknown") not in ("Unknown", None, ""):
+            return str(state.location).strip()
+        if state and getattr(state, "observation", ""):
+            first_line = state.observation.splitlines()[0].strip()
+            return first_line or "Unknown"
+        return "Unknown"
     def initialize(self, game: str = "zork1"):
         """Initialize or reset the game."""
         self.game_name = game
         self.env = TextAdventureEnv(game)
         self.state = self.env.reset()
+        self.current_location = self._loc_name(self.state)
+        self.history = []
+        self.visited_locations = {self.current_location}
+        self.transitions = defaultdict(dict)
         return self.state.observation
     def step(self, action: str) -> str:
         if self.env is None:
             self.initialize()
+        prev_location = self.current_location
         self.state = self.env.step(action)
+        self.current_location = self._loc_name(self.state)
+        self.visited_locations.add(self.current_location)
+        self.history.append(
+            (action, self.state.observation, self.state.score, self.state.moves)
+        )
+        if prev_location and self.current_location and self.current_location != "Unknown":
+            self.transitions[prev_location][action] = self.current_location
         return self.state.observation
     def get_moves(self) -> int:
         """Get number of moves taken."""
         return self.state.moves if self.state else 0
+    def get_inventory(self) -> list[str]:
+        if self.state and getattr(self.state, "inventory", None) is not None:
+            return list(self.state.inventory)
+        return []
 # Global game manager
     # TODO: You might want to add action validation here
     # TODO: You might want to include score changes in the response
+    action = action.strip()
+    if not action:
+        return "Action cannot be empty."
     result = game.step(action)
     # Optional: Append score info
+    result += (
+        f"\n[Score: {game.get_score()} | Moves: {game.get_moves()} | "
+        f"Location: {game.current_location}]"
+    )
     return result
+@mcp.tool()
+def memory(limit: int = 6) -> str:
+    """
+    Get the current game state summary.
+    Args:
+        limit: Number of recent steps to include.
+    """
+    game = get_game()
+    if game.state is None:
+        game.initialize()
+    state = game.state
+    lines = [
+        f"Location: {game.current_location}",
+        f"Score: {state.score}/{state.max_score} | Moves: {state.moves} | Last reward: {state.reward}",
+    ]
+    inventory = game.get_inventory()
+    inv_str = ", ".join(inventory) if inventory else "(empty or unknown)"
+    lines.append(f"Inventory: {inv_str}")
+    lines.append(f"Visited locations: {len(game.visited_locations)}")
+    if game.history:
+        lines.append("Recent:")
+        for act, obs, score, mv in game.history[-limit:]:
+            snippet = obs.replace("\n", " ")
+            if len(snippet) > 120:
+                snippet = snippet[:117] + "..."
+            lines.append(f"- {mv:03d} [{score}] {act}: {snippet}")
+    else:
+        lines.append("Recent: (no actions yet)")
+    return "\n".join(lines)
+@mcp.tool()
+def inventory() -> str:
+    """
+    Check what the player is carrying without spending a move.
+    """
+    game = get_game()
+    if game.state is None:
+        game.initialize()
+    items = game.get_inventory()
+    if not items:
+        return "Inventory: (empty or not reported by engine)"
+    return "Inventory: " + ", ".join(items)
+@mcp.tool()
+def get_map() -> str:
+    """
+    Get a map of explored locations.
+    Returns:
+        A text representation of explored locations and connections
+    """
+    game = get_game()
+    if not game.transitions:
+        return "Map is empty. Explore a bit more first."
+    lines = ["Known connections:"]
+    for loc, edges in sorted(game.transitions.items()):
+        edge_str = "; ".join(f"{act} -> {dest}" for act, dest in sorted(edges.items()))
+        lines.append(f"- {loc}: {edge_str}")
+    unseen = game.visited_locations - set(game.transitions.keys())
+    if unseen:
+        lines.append("Visited without exits mapped: " + ", ".join(sorted(unseen)))
+    return "\n".join(lines)
+@mcp.tool()
+def get_valid_actions() -> str:
+    """
+    Get a list of likely valid actions from the current location.
+    Returns:
+        List of actions that might work here
+    """
+    game = get_game()
+    if game.env and game.env.env:
+        try:
+            valid = game.env.env.get_valid_actions()
+            trimmed = ", ".join(valid[:20])
+            return "Valid actions (top 20): " + trimmed
+        except Exception:
+            pass
+    return "Could not determine valid actions"
 # =============================================================================