text-adventure-template

Sleeping

App Files Files Community

malekfeki14 commited on Feb 22

Commit

2ede02f

verified ·

1 Parent(s): 0d5fd5e

Update agent.py

Browse files

Files changed (1) hide show

agent.py +129 -47

agent.py CHANGED Viewed

@@ -10,6 +10,11 @@ Key upgrades:
 - Oscillation detection to avoid A<->B loops
 - More robust fail/stall detection
 - Fully tool-safe (never trusts tool field)
 """
 import os
@@ -49,17 +54,37 @@ def call_llm(prompt: str, system: str, seed: int) -> str:
 # ==========================================================
-# RESULT STRUCTURE
 # ==========================================================
 @dataclass
 class RunResult:
     final_score: int
     max_score: int
-    moves: int
     locations_visited: set[str]
     game_completed: bool
     error: Optional[str] = None
-    history: list = field(default_factory=list)
 # ==========================================================
@@ -141,8 +166,8 @@ class StudentAgent:
     def __init__(self):
         self.tried: Dict[str, Set[str]] = {}
         self.failed: Dict[str, Set[str]] = {}
-        self.score = 0
         self.global_failed: Set[str] = set()
         self.last_locations: List[str] = []
         self.last_actions: List[str] = []
@@ -151,13 +176,21 @@ class StudentAgent:
     # MAIN LOOP
     # ------------------------------------------------------
     async def run(self, client, game, max_steps, seed, verbose=False):
-        history: List[Tuple[str, str]] = []
         locations_visited: Set[str] = set()
-        moves = 0
         obs = self._tool_to_text(await client.call_tool("play_action", {"action": "look"}))
         for step in range(max_steps):
             # Stable location from MCP if available
             loc = await self._safe_tool(client, "location", {}, fallback=None)
             if not loc:
@@ -181,26 +214,26 @@ class StudentAgent:
                 action = forced
             else:
                 prompt = f"""Observation:
-                {obs}
-                Location: {loc}
-                Inventory: {inv_txt}
-                Objects here: {objs_here_txt}
-                Explored locations (map):
-                {map_txt}
-                Memory:
-                {mem}
-                Valid actions:
-                {valid_txt}
-                Already tried here: {sorted(self.tried.get(loc, set()))}
-                Failed here: {sorted(self.failed.get(loc, set()))}
-                Recent locations: {self.last_locations}
-                Recent actions: {self.last_actions[-10:]}
-            """
                 llm_out = call_llm(prompt, SYSTEM_PROMPT, seed + step)
                 action = self._extract_action_only(llm_out)
@@ -216,47 +249,84 @@ class StudentAgent:
             # Record attempt
             self.tried.setdefault(loc, set()).add(action)
             # Execute
             new_obs = self._tool_to_text(await client.call_tool("play_action", {"action": action}))
-            new_score = self._extract_score(new_obs)
             failed = self._is_failure(new_obs)
-            stalled = self._is_stalled(obs, new_obs)
-            if failed or (stalled and new_score <= self.score):
                 self.failed.setdefault(loc, set()).add(action)
                 if self._should_global_fail(action):
                     self.global_failed.add(action)
-            if new_score > self.score:
-                self.score = new_score
-                # if it just paid off, don't blacklist it globally by accident
                 if action in self.global_failed:
                     self.global_failed.discard(action)
-            obs = new_obs
-            moves += 1
             self.last_actions.append(action)
             self.last_actions = self.last_actions[-20:]
-            history.append((action, obs[:240]))
             if verbose:
                 print("\n" + "=" * 70)
                 print(f"STEP {step + 1}/{max_steps}")
                 print(f"Location: {loc}")
                 print(f"Action: {action}")
-                print(f"Score: {self.score}")
                 print("-" * 70)
-                print(obs.strip())
                 print("=" * 70)
             if "GAME OVER" in obs:
                 break
         return RunResult(
-            final_score=self.score,
             max_score=350,
-            moves=moves,
             locations_visited=locations_visited,
             game_completed=("GAME OVER" in obs),
             history=history,
@@ -322,7 +392,7 @@ class StudentAgent:
     def _parse_valid_actions(self, valid_txt: str) -> List[str]:
         acts: List[str] = []
-        for line in valid_txt.splitlines():
             line = line.strip()
             if not line:
                 continue
@@ -375,7 +445,7 @@ class StudentAgent:
         low = (obs or "").lower()
         if "pitch black" in low or "grue" in low or "too dark" in low:
             # try to light if possible
-            for cand in ("light lamp", "turn on lamp", "light lantern", "turn on lantern"):
                 mapped = self._map_to_valid(cand, valid_actions)
                 if mapped and mapped in valid_actions:
                     return mapped
@@ -452,7 +522,8 @@ class StudentAgent:
     # ------------------------------------------------------
     def _normalize_obs(self, text: str) -> str:
         t = (text or "").lower().strip()
-        t = re.sub(r"\[score:\s*\d+\s*\|\s*moves:\s*\d+\]\s*$", "", t).strip()
         t = re.sub(r"\s+", " ", t).strip()
         return t
@@ -482,13 +553,24 @@ class StudentAgent:
         return i
     # ------------------------------------------------------
-    # SCORE / LOCATION HELPERS
     # ------------------------------------------------------
-    def _extract_score(self, text: str) -> int:
-        m = re.search(r"Score:\s*(\d+)", text)
         if m:
-            return int(m.group(1))
-        return self.score
     def _best_effort_location(self, text: str) -> str:
         lines = [ln.strip() for ln in (text or "").splitlines() if ln.strip()]

 - Oscillation detection to avoid A<->B loops
 - More robust fail/stall detection
 - Fully tool-safe (never trusts tool field)
+FIXES / EXTENSIONS:
+- Stores MUCH richer history (StepRecord) including before/after obs + score/moves deltas
+- Prints the FULL result (new_obs) of each step when verbose=True
+- RunResult.moves now reflects TRUE game moves from server footer when available
 """
 import os
 # ==========================================================
+# STEP + RUN RESULT STRUCTURES (RICH HISTORY)
 # ==========================================================
+@dataclass
+class StepRecord:
+    step: int
+    location: str
+    action: str
+    observation_before: str
+    observation_after: str
+    score_before: int
+    score_after: int
+    moves_before: int
+    moves_after: int
+    failed: bool
+    stalled: bool
 @dataclass
 class RunResult:
     final_score: int
     max_score: int
+    moves: int  # true in-game moves when available
+    agent_steps: int  # number of loop iterations (max_steps cap)
     locations_visited: set[str]
     game_completed: bool
     error: Optional[str] = None
+    history: list[StepRecord] = field(default_factory=list)
 # ==========================================================
     def __init__(self):
         self.tried: Dict[str, Set[str]] = {}
         self.failed: Dict[str, Set[str]] = {}
+        self.score = 0
         self.global_failed: Set[str] = set()
         self.last_locations: List[str] = []
         self.last_actions: List[str] = []
     # MAIN LOOP
     # ------------------------------------------------------
     async def run(self, client, game, max_steps, seed, verbose=False):
+        history: List[StepRecord] = []
         locations_visited: Set[str] = set()
+        # Agent loop iterations (can differ from in-game moves)
+        agent_steps = 0
+        # Prime observation
         obs = self._tool_to_text(await client.call_tool("play_action", {"action": "look"}))
+        # Seed initial score from footer (if present)
+        s0, m0 = self._extract_score_moves(obs)
+        self.score = s0
         for step in range(max_steps):
+            agent_steps += 1
             # Stable location from MCP if available
             loc = await self._safe_tool(client, "location", {}, fallback=None)
             if not loc:
                 action = forced
             else:
                 prompt = f"""Observation:
+{obs}
+Location: {loc}
+Inventory: {inv_txt}
+Objects here: {objs_here_txt}
+Explored locations (map):
+{map_txt}
+Memory:
+{mem}
+Valid actions:
+{valid_txt}
+Already tried here: {sorted(self.tried.get(loc, set()))}
+Failed here: {sorted(self.failed.get(loc, set()))}
+Recent locations: {self.last_locations}
+Recent actions: {self.last_actions[-10:]}
+"""
                 llm_out = call_llm(prompt, SYSTEM_PROMPT, seed + step)
                 action = self._extract_action_only(llm_out)
             # Record attempt
             self.tried.setdefault(loc, set()).add(action)
+            # BEFORE snapshot (for richer history)
+            obs_before = obs
+            score_before, moves_before = self._extract_score_moves(obs_before)
             # Execute
             new_obs = self._tool_to_text(await client.call_tool("play_action", {"action": action}))
+            # AFTER snapshot
+            score_after, moves_after = self._extract_score_moves(new_obs)
             failed = self._is_failure(new_obs)
+            stalled = self._is_stalled(obs_before, new_obs)
+            # Fail/stall bookkeeping
+            if failed or (stalled and score_after <= self.score):
                 self.failed.setdefault(loc, set()).add(action)
                 if self._should_global_fail(action):
                     self.global_failed.add(action)
+            # Update score tracker
+            if score_after > self.score:
+                self.score = score_after
                 if action in self.global_failed:
                     self.global_failed.discard(action)
+            # Save step record (FULL before/after result)
+            history.append(
+                StepRecord(
+                    step=step + 1,
+                    location=loc,
+                    action=action,
+                    observation_before=obs_before,
+                    observation_after=new_obs,
+                    score_before=score_before,
+                    score_after=score_after,
+                    moves_before=moves_before,
+                    moves_after=moves_after,
+                    failed=failed,
+                    stalled=stalled,
+                )
+            )
+            # Update rolling recent lists
             self.last_actions.append(action)
             self.last_actions = self.last_actions[-20:]
+            # IMPORTANT: set obs for next loop
+            obs = new_obs
+            # Print the full result of this step
             if verbose:
                 print("\n" + "=" * 70)
                 print(f"STEP {step + 1}/{max_steps}")
                 print(f"Location: {loc}")
                 print(f"Action: {action}")
+                print(f"Score: {score_after} (was {score_before})")
+                print(f"Moves: {moves_after} (was {moves_before})")
+                print(f"failed={failed} stalled={stalled}")
                 print("-" * 70)
+                print(new_obs.strip())
                 print("=" * 70)
             if "GAME OVER" in obs:
                 break
+        # Final true score/moves from last obs footer if possible
+        final_score, final_moves = self._extract_score_moves(obs)
+        if final_score < 0:
+            final_score = self.score
+        if final_moves < 0:
+            # Fallback: if footer missing, estimate from last record
+            final_moves = history[-1].moves_after if history and history[-1].moves_after >= 0 else agent_steps
         return RunResult(
+            final_score=final_score,
             max_score=350,
+            moves=final_moves,
+            agent_steps=agent_steps,
             locations_visited=locations_visited,
             game_completed=("GAME OVER" in obs),
             history=history,
     def _parse_valid_actions(self, valid_txt: str) -> List[str]:
         acts: List[str] = []
+        for line in (valid_txt or "").splitlines():
             line = line.strip()
             if not line:
                 continue
         low = (obs or "").lower()
         if "pitch black" in low or "grue" in low or "too dark" in low:
             # try to light if possible
+            for cand in ("light lamp", "turn on lamp", "light lantern", "turn on lantern", "light torch"):
                 mapped = self._map_to_valid(cand, valid_actions)
                 if mapped and mapped in valid_actions:
                     return mapped
     # ------------------------------------------------------
     def _normalize_obs(self, text: str) -> str:
         t = (text or "").lower().strip()
+        # remove server footer if present
+        t = re.sub(r"\[score:\s*\d+\s*\|\s*moves:\s*\d+\]\s*$", "", t, flags=re.I).strip()
         t = re.sub(r"\s+", " ", t).strip()
         return t
         return i
     # ------------------------------------------------------
+    # SCORE / MOVES / LOCATION HELPERS
     # ------------------------------------------------------
+    def _extract_score_moves(self, text: str) -> tuple[int, int]:
+        """
+        Prefer the MCP server footer: [Score: X | Moves: Y]
+        Returns (score, moves). Moves=-1 if unavailable.
+        """
+        if not text:
+            return (self.score, -1)
+        m = re.search(r"\[Score:\s*(\d+)\s*\|\s*Moves:\s*(\d+)\]", text)
         if m:
+            return (int(m.group(1)), int(m.group(2)))
+        # fallback
+        m2 = re.search(r"Score:\s*(\d+)", text)
+        score = int(m2.group(1)) if m2 else self.score
+        return (score, -1)
     def _best_effort_location(self, text: str) -> str:
         lines = [ln.strip() for ln in (text or "").splitlines() if ln.strip()]