text-adventure-template

Sleeping

App Files Files Community

clemdrl commited on Feb 22

Commit

c1eb31c

1 Parent(s): 615a63b

Working MCP tools + ReAct loop

Browse files

Files changed (2) hide show

agent.py +135 -8
mcp_server.py +94 -44

agent.py CHANGED Viewed

@@ -172,10 +172,9 @@ class StudentAgent:
     def __init__(self):
         """Initialize your agent here."""
-        # TODO: Initialize any state tracking you need
-        # self.history = []
-        # self.visited_locations = set()
-        pass
     async def run(
         self,
@@ -224,20 +223,97 @@ class StudentAgent:
         # Placeholder implementation - replace with your code
         locations_visited = set()
-        history = []
         final_score = 0
         moves = 0
         # TODO: Your implementation here
         # ...
         return RunResult(
             final_score=final_score,
             max_score=350,  # Zork1 max score, adjust if needed
             moves=moves,
             locations_visited=locations_visited,
             game_completed=False,
-            history=history,
         )
     def _build_prompt(self, observation: str, history: list) -> str:
@@ -247,7 +323,25 @@ class StudentAgent:
         TODO: Implement this to create effective prompts
         """
         # TODO: Combine system prompt, history, and current observation
-        pass
     def _parse_response(self, response: str) -> tuple[str, str, dict]:
         """
@@ -262,7 +356,40 @@ class StudentAgent:
         # THOUGHT: ...
         # TOOL: ...
         # ARGS: {...}
-        pass
     def _call_llm(self, prompt: str, system_prompt: str, seed: int) -> str:
         """

     def __init__(self):
         """Initialize your agent here."""
+        self.history: list[tuple[str, str, str]] = []  # (thought, action, observation)
+        self.last_observation: str = ""
+        self.visited_obs_hashes: list[str] = []  # petit anti-loop basique
     async def run(
         self,
         # Placeholder implementation - replace with your code
         locations_visited = set()
         final_score = 0
         moves = 0
+        r = await client.call_tool("play_action", {"action": "look"})
+        # print("DEBUG type(r):", type(r))
+        # print("DEBUG dir(r) sample:", [a for a in dir(r) if a in ("content","result","data","message","output","text","value")])
+        # print("DEBUG repr(r):", repr(r))
+        # # si r a un attribut content, affiche-le
+        # if hasattr(r, "content"):
+        #     print("DEBUG type(r.content):", type(r.content))
+        #     print("DEBUG repr(r.content):", repr(r.content))
+        #     if r.content:
+        #         print("DEBUG type(r.content[0]):", type(r.content[0]))
+        #         print("DEBUG repr(r.content[0]):", repr(r.content[0]))
+        #         print("DEBUG dir(r.content[0]) sample:", [a for a in dir(r.content[0]) if a in ("text","value","data","content")])
+        observation = r.data if r else "No response"
+        self.last_observation = observation
         # TODO: Your implementation here
         # ...
+        for step in range(max_steps):
+            prompt = self._build_prompt(observation, self.history)
+            # 2) Appel LLM
+            llm_text = self._call_llm(prompt=prompt, system_prompt=SYSTEM_PROMPT, seed=seed + step)
+            # 3) Parse -> tool + args
+            thought, tool_name, args = self._parse_response(llm_text)
+            # fallback si le modèle sort n’importe quoi
+            if tool_name not in ("play_action", "inventory", "memory"):
+                tool_name = "play_action"
+                args = {"action": "look"}
+            if tool_name == "play_action" and ("action" not in args or not isinstance(args["action"], str)):
+                args = {"action": "look"}
+            # 4) Anti-loop très simple: si observation identique trop souvent, force look/inventory
+            obs_key = (observation.strip()[:200]).lower()
+            self.visited_obs_hashes.append(obs_key)
+            if len(self.visited_obs_hashes) > 6:
+                self.visited_obs_hashes.pop(0)
+            if self.visited_obs_hashes.count(obs_key) >= 3:
+                tool_name = "play_action"
+                args = {"action": "inventory" if step % 2 == 0 else "look"}
+                thought = thought + " (anti-loop fallback)"
+            # 5) Appel tool MCP
+            res = await client.call_tool(tool_name, args)
+            new_observation = res.data if res else "No response"
+            # 6) Update state
+            action_str = args.get("action", tool_name)
+            self.history.append((thought, action_str, new_observation))
+            if len(self.history) > 20:
+                self.history = self.history[-20:]
+            if verbose:
+                print(f"\nSTEP {step+1}/{max_steps}")
+                print(f"THOUGHT: {thought}")
+                print(f"TOOL: {tool_name}")
+                print(f"ARGS: {args}")
+                print(f"OBS:\n{new_observation}\n")
+            observation = new_observation
+            self.last_observation = observation
+        status = await client.call_tool("get_status", {})
+        status_txt = status.data if status else ""
+        m = re.search(r"SCORE:\s*(\d+)", status_txt)
+        if m: final_score = int(m.group(1))
+        m = re.search(r"MOVES:\s*(\d+)", status_txt)
+        if m: moves = int(m.group(1))
+        # location -> set
+        m = re.search(r"LOCATION:\s*(.*)", status_txt)
+        if m and m.group(1).strip():
+            locations_visited.add(m.group(1).strip())
         return RunResult(
             final_score=final_score,
             max_score=350,  # Zork1 max score, adjust if needed
             moves=moves,
             locations_visited=locations_visited,
             game_completed=False,
+            history=self.history,
         )
     def _build_prompt(self, observation: str, history: list) -> str:
         TODO: Implement this to create effective prompts
         """
         # TODO: Combine system prompt, history, and current observation
+        recent = history[-6:]
+        hist_txt = ""
+        for i, (t, a, o) in enumerate(recent, 1):
+            o_short = o.strip().replace("\n", " ")
+            if len(o_short) > 300:
+                o_short = o_short[:300] + "..."
+            hist_txt += f"{i}. THOUGHT: {t}\n   ACTION: {a}\n   OBS: {o_short}\n"
+        obs_short = observation.strip()
+        if len(obs_short) > 1200:
+            obs_short = obs_short[:1200] + "..."
+        return (
+            f"GAME: {observation}\n\n"
+            f"RECENT HISTORY:\n{hist_txt if hist_txt else '(none)'}\n"
+            f"CURRENT OBSERVATION:\n{obs_short}\n\n"
+            f"Choose ONE next tool call."
+        )
     def _parse_response(self, response: str) -> tuple[str, str, dict]:
         """
         # THOUGHT: ...
         # TOOL: ...
         # ARGS: {...}
+        thought = ""
+        tool = ""
+        args: dict = {}
+        # tolérant aux espaces/variantes
+        thought_m = re.search(r"THOUGHT:\s*(.*)", response)
+        tool_m = re.search(r"TOOL:\s*(.*)", response)
+        args_m = re.search(r"ARGS:\s*(\{.*\})", response, flags=re.DOTALL)
+        if thought_m:
+            thought = thought_m.group(1).strip()
+        if tool_m:
+            tool = tool_m.group(1).strip()
+        if args_m:
+            raw = args_m.group(1).strip()
+            try:
+                args = json.loads(raw)
+            except Exception:
+                args = {}
+        # fallback si pas trouvé
+        if not tool:
+            tool = "play_action"
+        if tool == "play_action" and "action" not in args:
+            # essaie de deviner une action simple depuis la réponse
+            # ex: le modèle écrit "ACTION: look"
+            act_m = re.search(r"ACTION:\s*(.*)", response)
+            args = {"action": act_m.group(1).strip()} if act_m else {"action": "look"}
+        if not thought:
+            thought = "No thought"
+        return thought, tool, args
     def _call_llm(self, prompt: str, system_prompt: str, seed: int) -> str:
         """

mcp_server.py CHANGED Viewed

@@ -33,14 +33,12 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 from fastmcp import FastMCP
 from games.zork_env import TextAdventureEnv
 # =============================================================================
 # Create the MCP Server
 # =============================================================================
 mcp = FastMCP("Student Text Adventure Server")
 # =============================================================================
 # Game State Management
 # =============================================================================
@@ -60,9 +58,11 @@ class GameManager:
         self.state = None
         self.game_name: str = ""
         # TODO: Add more state tracking
-        # self.history: list[tuple[str, str]] = []
-        # self.explored_locations: dict[str, set[str]] = {}
-        # self.current_location: str = ""
     def initialize(self, game: str = "zork1"):
         """Initialize or reset the game."""
@@ -70,6 +70,12 @@ class GameManager:
         self.env = TextAdventureEnv(game)
         self.state = self.env.reset()
         # TODO: Reset your state tracking here
         return self.state.observation
     def step(self, action: str) -> str:
@@ -77,13 +83,25 @@ class GameManager:
         if self.env is None:
             self.initialize()
         self.state = self.env.step(action)
-        # TODO: Update your state tracking here
-        # self.history.append((action, self.state.observation))
-        # Update location tracking, etc.
-        return self.state.observation
     def get_score(self) -> int:
         """Get current score."""
@@ -93,6 +111,15 @@ class GameManager:
         """Get number of moves taken."""
         return self.state.moves if self.state else 0
 # Global game manager
 _game = GameManager()
@@ -145,44 +172,67 @@ def play_action(action: str) -> str:
 # TODO: Implement additional tools to help your agent
-# @mcp.tool()
-# def memory() -> str:
-#     """
-#     Get the current game state summary.
-#
-#     Returns:
-#         A summary including current location, score, moves, and recent history
-#     """
-#     game = get_game()
-#     # TODO: Return useful state information
-#     pass
-# @mcp.tool()
-# def inventory() -> str:
-#     """
-#     Check what the player is carrying.
-#
-#     Returns:
-#         List of items in the player's inventory
-#     """
-#     game = get_game()
-#     result = game.step("inventory")
-#     return result
-# @mcp.tool()
-# def get_map() -> str:
-#     """
-#     Get a map of explored locations.
-#
-#     Returns:
-#         A text representation of explored locations and connections
-#     """
-#     game = get_game()
-#     # TODO: Return map of explored locations
-#     pass
 # @mcp.tool()
 # def get_valid_actions() -> str:

 from fastmcp import FastMCP
 from games.zork_env import TextAdventureEnv
 # =============================================================================
 # Create the MCP Server
 # =============================================================================
 mcp = FastMCP("Student Text Adventure Server")
 # =============================================================================
 # Game State Management
 # =============================================================================
         self.state = None
         self.game_name: str = ""
         # TODO: Add more state tracking
+        self.history: list[tuple[str, str]] = []
+        self.explored_locations: dict[str, set[str]] = {}
+        self.current_location: str = ""
+        self.map_edges: dict[str, dict[str, str]] = {}
+        self.last_location: str = ""
     def initialize(self, game: str = "zork1"):
         """Initialize or reset the game."""
         self.env = TextAdventureEnv(game)
         self.state = self.env.reset()
         # TODO: Reset your state tracking here
+        self.history = []
+        self.map_edges = {}
+        self.last_location = self._extract_location(self.state.observation)
+        obs0 = self.state.observation or ""
+        self.history.append(("[RESET]", obs0.strip().replace("\n", " ")[:240]))
         return self.state.observation
     def step(self, action: str) -> str:
         if self.env is None:
             self.initialize()
+        prev_loc = self.last_location
         self.state = self.env.step(action)
+        obs = self.state.observation or ""
+        # update location + map
+        new_loc = self._extract_location(obs)
+        if prev_loc and new_loc and action:
+            self.map_edges.setdefault(prev_loc, {})[action.strip().lower()] = new_loc
+        self.last_location = new_loc
+        # update history (court)
+        obs_short = obs.strip().replace("\n", " ")
+        if len(obs_short) > 240:
+            obs_short = obs_short[:240] + "..."
+        self.history.append((action, obs_short))
+        if len(self.history) > 30:
+            self.history = self.history[-30:]
+        return obs
     def get_score(self) -> int:
         """Get current score."""
         """Get number of moves taken."""
         return self.state.moves if self.state else 0
+    def _extract_location(self, observation: str) -> str:
+        """Heuristique: la 1ère ligne non vide est souvent le nom de la room."""
+        for line in (observation or "").splitlines():
+            line = line.strip()
+            if line:
+                # évite des lignes trop longues (descriptions)
+                return line[:80]
+        return ""
 # Global game manager
 _game = GameManager()
 # TODO: Implement additional tools to help your agent
+@mcp.tool()
+def memory() -> str:
+    """
+    Get the current game state summary.
+    Returns:
+        A summary including current location, score, moves, and recent history
+    """
+    game = get_game()
+    # TODO: Return useful state information
+    lines = []
+    lines.append(f"GAME: {game.game_name}")
+    if game.last_location:
+        lines.append(f"LOCATION: {game.last_location}")
+    lines.append(f"SCORE: {game.get_score()}  MOVES: {game.get_moves()}")
+    lines.append("RECENT:")
+    for a, o in game.history[-8:]:
+        lines.append(f"- action: {a}")
+        lines.append(f"  obs: {o}")
+    return "\n".join(lines)
+@mcp.tool()
+def inventory() -> str:
+    """
+    Check what the player is carrying.
+    Returns:
+        List of items in the player's inventory
+    """
+    game = get_game()
+    result = game.step("inventory")
+    return result
+@mcp.tool()
+def get_map() -> str:
+    """
+    Get a map of explored locations.
+    Returns:
+        A text representation of explored locations and connections
+    """
+    game = get_game()
+    if not game.map_edges:
+        return "MAP: (empty)"
+    out = ["MAP:"]
+    for src, edges in game.map_edges.items():
+        for act, dst in edges.items():
+            out.append(f"- {src} --{act}--> {dst}")
+    return "\n".join(out)
+@mcp.tool()
+def get_status() -> str:
+    """
+    Tool simple et stable: score/moves/location sans polluer le jeu.
+    """
+    game = get_game()
+    return f"LOCATION: {game.last_location}\nSCORE: {game.get_score()}\nMOVES: {game.get_moves()}"
 # @mcp.tool()
 # def get_valid_actions() -> str: