test1

Sleeping

App Files Files Community

bouhss commited on Feb 22

Commit

42e80f7

verified ·

1 Parent(s): 92b59a5

Update agent.py

Browse files

Files changed (1) hide show

agent.py +407 -321

agent.py CHANGED Viewed

@@ -1,49 +1,49 @@
 """
-Student Agent (Best practical submission)
-- Works even if HF_TOKEN is missing (no crash).
-- Uses peek_action + get_valid_actions + server meta tags to explore and gain score.
-- Uses LLM only as fallback when HF_TOKEN is available.
-- Always returns non-zero moves (internal counter).
 """
 import json
 import os
 import re
-import time
 from dataclasses import dataclass, field
-from typing import Optional, Any
-from collections import defaultdict, deque
 from dotenv import load_dotenv
 from huggingface_hub import InferenceClient
 load_dotenv()
-LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"
 _hf_token = os.getenv("HF_TOKEN")
 LLM_CLIENT = InferenceClient(token=_hf_token) if _hf_token else None
-def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 180) -> str:
     if LLM_CLIENT is None:
-        raise RuntimeError("LLM unavailable (HF_TOKEN missing).")
-    messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}]
-    for attempt in range(3):
-        try:
-            resp = LLM_CLIENT.chat.completions.create(
-                model=LLM_MODEL,
-                messages=messages,
-                temperature=0.0,
-                max_tokens=max_tokens,
-                seed=seed,
-            )
-            return resp.choices[0].message.content
-        except Exception:
-            if attempt < 2:
-                time.sleep(2 ** attempt)
-                continue
-            raise
 @dataclass
@@ -51,337 +51,423 @@ class RunResult:
     final_score: int
     max_score: int
     moves: int
-    locations_visited: set[str]
     game_completed: bool
     error: Optional[str] = None
     history: list[tuple[str, str, str]] = field(default_factory=list)
-SYSTEM_PROMPT = """You are an expert text-adventure agent.
-Output EXACTLY:
-THOUGHT: ...
-TOOL: play_action
-ARGS: {"action": "<one candidate action>"}
-Rules:
-- Choose exactly one action from the candidate list.
-- Do not invent actions outside the list.
-- No extra text, no markdown.
-"""
-MOVE_ALIASES = {"n":"north","s":"south","e":"east","w":"west","u":"up","d":"down","ne":"northeast","nw":"northwest","se":"southeast","sw":"southwest"}
-BAD_PREFIXES = ("save", "restore", "quit", "restart", "help", "verbose", "script", "unscript", "version")
-BAD_EXACT = {"wait", "z"}
 class StudentAgent:
-    def __init__(self):
-        self.score = 0
-        self.max_score = 0
-        self.moves = 0
-        self._internal_moves = 0
-        self.locations_visited: set[str] = set()
-        self.last_location = "Unknown"
-        self.tried = defaultdict(int)
-        self.recent_actions = deque(maxlen=10)
-        self.recent_obs = deque(maxlen=6)
-        self.valid_cache = {}
-    async def run(self, client, game: str, max_steps: int, seed: int, verbose: bool = False) -> RunResult:
-        history: list[tuple[str, str, str]] = []
-        try:
-            tools = await client.list_tools()
-            tool_names = {t.name for t in tools}
-            def has(name: str) -> bool:
-                return name in tool_names
-            # initial look
-            obs = await self._call_tool_text(client, "play_action", {"action": "look"})
-            self._internal_moves += 1
-            self._update_from_text(obs)
-            self.last_location = self._extract_location(obs)
-            self.locations_visited.add(self.last_location)
-            for step in range(1, max_steps + 1):
-                loc = self._extract_location(obs)
-                self.last_location = loc
-                self.locations_visited.add(loc)
-                stuck = self._is_stuck(obs)
-                valid_actions = self.valid_cache.get(loc, [])
-                if has("get_valid_actions") and (stuck or not valid_actions or step % 6 == 0):
-                    va_txt = await self._call_tool_text(client, "get_valid_actions", {"limit": 60})
-                    valid_actions = self._parse_valid_actions(va_txt)
-                    if valid_actions:
-                        self.valid_cache[loc] = valid_actions
-                inv_txt = ""
-                if has("inventory") and (step == 1 or stuck or step % 8 == 0):
-                    inv_txt = await self._call_tool_text(client, "inventory", {})
-                candidates = self._make_candidates(obs, inv_txt, valid_actions, loc)
-                action, thought = None, ""
-                if has("peek_action") and candidates:
-                    action, thought = await self._choose_by_lookahead(client, loc, obs, candidates)
-                if not action:
-                    action, thought = await self._choose_fallback(obs, inv_txt, candidates, seed, step)
-                action = self._norm_action(action or "look")
-                obs2 = await self._call_tool_text(client, "play_action", {"action": action})
-                self._internal_moves += 1
-                self.tried[(loc, action.lower())] += 1
-                self.recent_actions.append(action.lower())
-                self.recent_obs.append((obs2 or "")[:220])
-                self._update_from_text(obs2)
-                new_loc = self._extract_location(obs2)
-                self.locations_visited.add(new_loc)
-                history.append((thought, f"play_action({action})", (obs2 or "")[:260]))
-                if verbose:
-                    print(f"\n--- step {step} ---\nTHOUGHT: {thought}\nACTION: {action}\n{obs2}")
-                obs = obs2
-                if self._is_game_over(obs):
-                    break
-            return RunResult(
-                final_score=self.score,
-                max_score=self.max_score,
-                moves=max(self.moves, self._internal_moves),
-                locations_visited=set(self.locations_visited),
-                game_completed=self._is_game_over(obs),
-                history=history,
-            )
-        except Exception as e:
-            return RunResult(
-                final_score=self.score,
-                max_score=self.max_score,
-                moves=max(self.moves, self._internal_moves),
-                locations_visited=set(self.locations_visited),
-                game_completed=False,
-                error=f"{type(e).__name__}: {e}",
-                history=history,
-            )
-    async def _call_tool_text(self, client, tool: str, args: dict) -> str:
-        r = await client.call_tool(tool, args)
-        return self._extract_text(r)
-    def _extract_text(self, result: Any) -> str:
-        if result is None:
-            return ""
-        if isinstance(result, list) and result:
-            part = result[0]
-            if hasattr(part, "text"):
-                return part.text or ""
-            if isinstance(part, dict) and "text" in part:
-                return part["text"] or ""
-            return str(part)
-        return str(result)
-    def _norm_action(self, a: str) -> str:
-        a = (a or "").strip()
-        low = a.lower()
-        return MOVE_ALIASES.get(low, a)
-    def _update_from_text(self, text: str) -> None:
-        m = re.search(r"\[Score:\s*(\d+)\s*/\s*(\d+)\s*\|\s*Moves:\s*(\d+)\s*\|\s*Location:\s*(.+?)\]", text or "")
-        if m:
-            self.score = int(m.group(1))
-            self.max_score = int(m.group(2))
-            self.moves = int(m.group(3))
-            self.last_location = m.group(4).strip()
-    def _extract_location(self, text: str) -> str:
-        m = re.search(r"\|\s*Location:\s*(.+?)\]", text or "")
-        if m and m.group(1).strip():
-            return m.group(1).strip()
-        for line in (text or "").splitlines():
-            line = line.strip()
-            if line and not line.startswith("[Score:"):
-                return line
-        return "Unknown"
-    def _extract_untried(self, text: str) -> list[str]:
-        m = re.search(r"\[Untried exits:\s*(.+?)\]", text or "")
-        if not m:
-            return []
-        return [self._norm_action(x.strip()).lower() for x in m.group(1).split(",") if x.strip()]
-    def _extract_interactions(self, text: str) -> list[str]:
-        m = re.search(r"\[Interactions:\s*(.+?)\]", text or "")
-        if not m:
-            return []
-        return [x.strip() for x in m.group(1).split(",") if x.strip()]
-    def _is_game_over(self, text: str) -> bool:
-        t = (text or "").lower()
-        return ("game over" in t) or ("you have died" in t) or ("you are dead" in t)
-    def _is_stuck(self, text: str) -> bool:
-        t = (text or "").lower()
-        bad = ["i don't understand", "you can't", "that's not", "not a verb", "nothing happens", "beg your pardon"]
-        rep = len(self.recent_obs) >= 3 and all(self.recent_obs[-1] == x for x in list(self.recent_obs)[-3:])
-        return any(b in t for b in bad) or rep
-    def _parse_valid_actions(self, txt: str) -> list[str]:
-        out = []
-        for line in (txt or "").splitlines():
-            line = line.strip()
-            if line.startswith("- "):
-                a = self._norm_action(line[2:].strip())
-                low = a.lower()
-                if not a:
                     continue
-                if low.startswith(BAD_PREFIXES) or low in BAD_EXACT:
                     continue
-                out.append(a)
-        # dedup
-        seen = set()
-        uniq = []
-        for a in out:
-            low = a.lower()
-            if low not in seen:
-                seen.add(low)
-                uniq.append(a)
-        return uniq
-    def _make_candidates(self, obs: str, inv_txt: str, valid_actions: list[str], loc: str) -> list[str]:
-        candidates, seen = [], set()
-        def add(a: str):
-            a = self._norm_action(a)
-            low = a.lower().strip()
-            if not a:
-                return
-            if low.startswith(BAD_PREFIXES) or low in BAD_EXACT:
-                return
-            if low not in seen:
-                seen.add(low)
-                candidates.append(a)
-        # from tags
-        for d in self._extract_untried(obs):
-            add(d)
-        for a in self._extract_interactions(obs):
-            add(a)
-        # from valid actions
-        for a in valid_actions[:25]:
-            add(a)
-        # basics
-        add("look")
-        add("inventory")
-        add("take all")
-        # avoid too repeated
-        cleaned = []
         for a in candidates:
-            if list(self.recent_actions).count(a.lower()) >= 3:
                 continue
-            cleaned.append(a)
-        return cleaned[:20]
-    async def _choose_by_lookahead(self, client, loc: str, obs: str, candidates: list[str]) -> tuple[Optional[str], str]:
-        base_score = self.score
-        untried = set(self._extract_untried(obs))
-        # shortlist
-        pr = []
-        for a in candidates:
-            low = a.lower().strip()
-            pr.append((0 if low in untried else 1, self.tried[(loc, low)], low, a))
-        pr.sort()
-        shortlist = [x[-1] for x in pr][:10]
-        best_a, best_u, best_th = None, -10**18, ""
-        for a in shortlist:
-            low = a.lower().strip()
-            if self.tried[(loc, low)] >= 4:
                 continue
-            peek = await self._call_tool_text(client, "peek_action", {"action": a})
-            if self._is_game_over(peek):
-                u = -1_000_000_000
-            else:
-                s_after = base_score
-                m = re.search(r"\[Score:\s*(\d+)\s*/", peek or "")
-                if m:
-                    s_after = int(m.group(1))
-                delta = max(0, s_after - base_score)
-                loc_after = self._extract_location(peek)
-                new_loc_bonus = 280 if (loc_after and loc_after not in self.locations_visited and loc_after != self._extract_location(obs)) else 0
-                untried_bonus = 120 if low in untried else 0
-                loop_pen = 90 * list(self.recent_actions).count(low)
-                stuck_pen = 180 if self._is_stuck(peek) else 0
-                u = delta * 900 + new_loc_bonus + untried_bonus - loop_pen - stuck_pen
-            if u > best_u:
-                best_u, best_a = u, a
-                best_th = f"Look-ahead chose '{a}' (utility={u})."
-        if best_a is None or best_u < -10000:
-            return None, "Look-ahead no good action; fallback."
-        return best_a, best_th
-    async def _choose_fallback(self, obs: str, inv_txt: str, candidates: list[str], seed: int, step: int) -> tuple[str, str]:
-        untried = self._extract_untried(obs)
-        if untried:
-            return untried[0], "Heuristic: try untried exit."
-        if not candidates:
-            return "look", "No candidates; fallback."
-        # LLM only if available
         try:
-            prompt = self._llm_prompt(obs, inv_txt, candidates[:10])
-            resp = call_llm(prompt, SYSTEM_PROMPT, seed + step, max_tokens=160)
-            thought, args = self._parse_llm(resp)
-            act = self._norm_action(str(args.get("action", "")).strip())
-            canon = {x.lower(): x for x in candidates[:10]}
-            if act.lower() in canon:
-                return canon[act.lower()], thought or "LLM chose candidate."
         except Exception:
             pass
-        return candidates[0], "Fallback: first candidate."
-    def _llm_prompt(self, obs: str, inv_txt: str, candidates: list[str]) -> str:
-        parts = [
-            f"Score: {self.score}/{self.max_score} | Moves: {max(self.moves, self._internal_moves)}",
-            f"Location: {self.last_location}",
-            "\nCurrent observation:\n" + (obs or "")[:1100],
-            "\nCandidate actions (choose exactly one):",
-        ]
-        for a in candidates:
-            parts.append(f"- {a}")
-        return "\n".join(parts)
-    def _parse_llm(self, resp: str) -> tuple[str, dict]:
-        thought = ""
-        args = {"action": "look"}
-        m = re.search(r"(?im)^THOUGHT:\s*(.+)$", resp or "")
-        if m:
-            thought = m.group(1).strip()
-        m = re.search(r"(?is)^ARGS:\s*(\{.*\})\s*$", resp or "")
-        if m:
-            raw = m.group(1)
-            try:
-                args = json.loads(raw)
-            except Exception:
-                pass
-        return thought, args

 """
+Exploration-first hybrid agent (score + locations) for text adventures.
+Key points:
+- Deterministic policy driven by server status() JSON.
+- Priority:
+  A) Valid untried exits (Jericho-validated) + obs-boosted directions
+  B) Bounded suggested_interactions (game-validated)
+  C) BFS backtrack to nearest frontier (room with untried exits)
+  D) Stuck recovery (look/inventory/examine noun)
+  E) Optional single LLM fallback if HF_TOKEN is present (never required)
+- Uses peek_action (if available) to score a small candidate set quickly.
+- All verbose/debug output goes to stderr only.
 """
 import json
 import os
 import re
+import sys
+from collections import deque
 from dataclasses import dataclass, field
+from typing import Optional
 from dotenv import load_dotenv
 from huggingface_hub import InferenceClient
 load_dotenv()
+LLM_MODEL = os.getenv("HF_MODEL", "Qwen/Qwen2.5-72B-Instruct")
 _hf_token = os.getenv("HF_TOKEN")
 LLM_CLIENT = InferenceClient(token=_hf_token) if _hf_token else None
+def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 120) -> str:
     if LLM_CLIENT is None:
+        raise RuntimeError("HF_TOKEN missing => LLM unavailable")
+    r = LLM_CLIENT.chat.completions.create(
+        model=LLM_MODEL,
+        messages=[{"role": "system", "content": system_prompt},
+                  {"role": "user", "content": prompt}],
+        temperature=0.0,
+        max_tokens=max_tokens,
+        seed=seed,
+    )
+    return r.choices[0].message.content or ""
 @dataclass
     final_score: int
     max_score: int
     moves: int
+    locations_visited: set
     game_completed: bool
     error: Optional[str] = None
     history: list[tuple[str, str, str]] = field(default_factory=list)
+# Tunables
+MAX_INTERACTIONS = 4
+STUCK_THRESHOLD = 10
+MEMORY_LEN = 20
+PEEK_K = 6  # lower if too slow; higher can improve decisions but costs time
+UNSAFE_STARTS = (
+    "burn ", "set fire", "ignite ",
+    "attack ", "kill ", "hit ", "stab ", "shoot ", "punch ", "fight ",
+    "destroy ", "break ", "smash ",
+    "eat ",
+)
+DIR_WORD_RE = re.compile(
+    r"\b(north(?:east|west)?|south(?:east|west)?|east|west|"
+    r"northeast|northwest|southeast|southwest|up|down|in|out)\b",
+    re.IGNORECASE,
+)
+DISAMBIG_RE = re.compile(r"which do you mean|do you mean|be more specific|what do you want", re.IGNORECASE)
+OPTION_RE = re.compile(r"\bthe\s+([a-z]+(?:\s+[a-z]+)?)", re.IGNORECASE)
+LLM_SYSTEM = (
+    "You play a text adventure game. Propose ONE action (<= 5 words) that helps "
+    "explore a new location or gain points. Reply with exactly one line:\n"
+    "ACTION: <command>"
+)
 class StudentAgent:
+    def __init__(self) -> None:
+        self.visited: set[int] = set()
+        self.graph: dict[int, dict[str, int]] = {}
+        self.loc_untried: dict[int, list[str]] = {}
+        self.interactions_done: dict[int, int] = {}
+        self.recent_memory = deque(maxlen=MEMORY_LEN)  # (action, loc_id, score, obs_snip)
+        self.no_progress_steps = 0
+        self.llm_calls = 0
+        self.last_action = ""
+    async def run(self, client, game: str, max_steps: int, seed: int, verbose: bool = False) -> RunResult:
+        history = []
+        moves_taken = 0
+        final_score = 0
+        max_score = 0
+        game_completed = False
+        last_status = {}
+        tools = await client.list_tools()
+        tool_names = {t.name for t in tools}
+        has_peek = "peek_action" in tool_names
+        # prime game
+        _ = await client.call_tool("play_action", {"action": "look"})
+        moves_taken += 1
+        self.last_action = "look"
+        prev_score = 0
+        prev_loc = -1
+        while moves_taken < max_steps:
+            # status (no move cost)
+            try:
+                raw = await client.call_tool("status", {})
+                status = json.loads(self._text(raw))
+                last_status = status
+            except Exception:
+                status = last_status
+            if not status:
+                # emergency
+                res = await client.call_tool("play_action", {"action": "look"})
+                moves_taken += 1
+                history.append(("No status; look", "look", self._text(res)[:140]))
+                continue
+            loc_id = int(status["loc_id"])
+            score = int(status.get("score", 0))
+            final_score = score
+            max_score = int(status.get("max_score", max_score) or max_score)
+            done = bool(status.get("done", False))
+            obs = status.get("last_observation", "") or ""
+            self.visited.add(loc_id)
+            self._merge_edges(loc_id, status.get("edges_here", {}) or {})
+            self.loc_untried[loc_id] = list(status.get("untried_directions", []) or [])
+            if score == prev_score and loc_id == prev_loc:
+                self.no_progress_steps += 1
+            else:
+                self.no_progress_steps = 0
+            prev_score, prev_loc = score, loc_id
+            if done:
+                game_completed = True
+                break
+            thought, action = self._decide(status, seed)
+            if has_peek:
+                action = await self._peek_pick(client, status, action)
+            action = self._sanitize_action(action)
+            res = await client.call_tool("play_action", {"action": action})
+            moves_taken += 1
+            obs2 = self._text(res)
+            self.recent_memory.append((action.lower().strip(), loc_id, score, obs2[:60]))
+            self.last_action = action
+            if verbose:
+                print(
+                    f"[step] loc={loc_id} score={score} stuck={self.no_progress_steps} -> {action!r}",
+                    file=sys.stderr,
+                )
+            history.append((thought, action, obs2[:160]))
+            if self._is_game_over(obs2):
+                game_completed = True
+                break
+        # final status (best effort)
+        try:
+            raw = await client.call_tool("status", {})
+            st2 = json.loads(self._text(raw))
+            final_score = max(final_score, int(st2.get("score", 0)))
+            max_score = max_score or int(st2.get("max_score", 0))
+            self.visited.add(int(st2["loc_id"]))
+        except Exception:
+            pass
+        return RunResult(
+            final_score=final_score,
+            max_score=max_score,
+            moves=moves_taken,
+            locations_visited=self.visited,
+            game_completed=game_completed,
+            history=history,
+        )
+    # -----------------
+    # decision logic
+    # -----------------
+    def _decide(self, status: dict, seed: int) -> tuple[str, str]:
+        loc_id = int(status["loc_id"])
+        obs = status.get("last_observation", "") or ""
+        outcomes = status.get("outcomes_here", {}) or {}
+        banned = {str(x).lower().strip() for x in (status.get("banned_actions_here", []) or [])}
+        untried = status.get("untried_directions", []) or []
+        valid_exits = status.get("valid_exits", []) or []
+        suggested = status.get("suggested_interactions", []) or []
+        # 0) disambiguation
+        if DISAMBIG_RE.search(obs):
+            opt = self._extract_option(obs)
+            if opt and not self._repeat_noop(opt, loc_id):
+                return "Disambiguate", opt
+        # A1) Jericho-validated untried exits
+        untried_set = set(untried)
+        obs_dirs = self._mentioned_dirs(obs)
+        for d in valid_exits:
+            dl = d.lower().strip()
+            if d in untried_set and dl not in banned and not self._repeat_noop(d, loc_id):
+                return f"Valid exit {d}", d
+        # A2) obs-boosted untried dirs
+        for d in obs_dirs:
+            if d in untried_set and d.lower() not in banned and not self._repeat_noop(d, loc_id):
+                return f"Obs-boost {d}", d
+        # A3) any untried
+        for d in untried:
+            if d.lower() not in banned and not self._repeat_noop(d, loc_id):
+                return f"Untried {d}", d
+        # B) bounded interactions (game-validated)
+        n = self.interactions_done.get(loc_id, 0)
+        if n < MAX_INTERACTIONS:
+            for a in suggested:
+                al = a.lower().strip()
+                if al in banned:
                     continue
+                if any(al.startswith(x) for x in UNSAFE_STARTS):
                     continue
+                if a in outcomes:
+                    continue
+                if self._repeat_noop(a, loc_id):
+                    continue
+                self.interactions_done[loc_id] = n + 1
+                return f"Interaction {n+1}", a
+        # C) BFS backtrack to frontier
+        avoid = self._oscillation_avoid()
+        step_dir = self._bfs_step(loc_id, avoid)
+        if step_dir:
+            return "BFS backtrack", step_dir
+        # D) stuck recovery
+        if self.no_progress_steps >= STUCK_THRESHOLD:
+            for a in ("look", "inventory"):
+                if not self._repeat_noop(a, loc_id):
+                    return "Stuck recovery", a
+            noun = self._extract_noun(obs)
+            if noun and not self._repeat_noop(f"examine {noun}", loc_id):
+                return "Stuck examine", f"examine {noun}"
+        # E) optional LLM fallback
+        if LLM_CLIENT is not None:
+            try:
+                self.llm_calls += 1
+                prompt = self._llm_prompt(status)
+                resp = call_llm(prompt, LLM_SYSTEM, seed + self.llm_calls)
+                act = self._parse_llm(resp)
+                if act and act.lower().strip() not in banned and not self._repeat_noop(act, loc_id):
+                    return "LLM fallback", act
+            except Exception:
+                pass
+        return "Fallback", "look"
+    async def _peek_pick(self, client, status: dict, current_action: str) -> str:
+        """Use peek_action to score a small candidate set and pick best."""
+        loc_id = int(status["loc_id"])
+        score = int(status.get("score", 0))
+        candidates = []
+        if current_action:
+            candidates.append(current_action)
+        # add a few candidates
+        for d in (status.get("untried_directions", []) or [])[:4]:
+            if d not in candidates:
+                candidates.append(d)
+        for a in (status.get("suggested_interactions", []) or [])[:4]:
+            if a not in candidates:
+                candidates.append(a)
+        candidates = candidates[:PEEK_K]
+        best = current_action
+        best_u = -10**18
         for a in candidates:
+            try:
+                raw = await client.call_tool("peek_action", {"action": a})
+                st = json.loads(self._text(raw))
+                new_score = int(st.get("score", score))
+                new_loc = int(st.get("loc_id", loc_id))
+                delta = max(0, new_score - score)
+                if new_loc != loc_id:
+                    moved_bonus = 600 if (new_loc not in self.visited) else 80
+                else:
+                    moved_bonus = 0
+                repeat_pen = 120 if self._repeat_noop(a, loc_id) else 0
+                u = delta * 900 + moved_bonus - repeat_pen
+                if u > best_u:
+                    best_u = u
+                    best = a
+            except Exception:
                 continue
+        return best
+    # -----------------
+    # graph / BFS
+    # -----------------
+    def _merge_edges(self, loc_id: int, edges_here: dict) -> None:
+        if not edges_here:
+            return
+        node = self.graph.setdefault(loc_id, {})
+        for d, nid in edges_here.items():
+            try:
+                node[str(d)] = int(nid)
+            except Exception:
+                pass
+    def _oscillation_avoid(self) -> Optional[int]:
+        locs = [x[1] for x in self.recent_memory]
+        if len(locs) >= 4 and locs[-1] == locs[-3] and locs[-2] == locs[-4]:
+            return locs[-2]
+        return None
+    def _bfs_step(self, from_loc: int, avoid_loc: Optional[int]) -> Optional[str]:
+        frontier = {lid for lid, u in self.loc_untried.items() if u and lid != from_loc}
+        if not frontier:
+            return None
+        q = deque()
+        seen = {from_loc}
+        for d, nid in self.graph.get(from_loc, {}).items():
+            if nid not in seen and nid != avoid_loc:
+                q.append((nid, d))
+                seen.add(nid)
+        while q:
+            cur, first_dir = q.popleft()
+            if cur in frontier:
+                return first_dir
+            for d, nid in self.graph.get(cur, {}).items():
+                if nid not in seen:
+                    seen.add(nid)
+                    q.append((nid, first_dir))
+        return None
+    # -----------------
+    # loop / parsing helpers
+    # -----------------
+    def _repeat_noop(self, action: str, loc_id: int) -> bool:
+        a = (action or "").lower().strip()
+        return any(prev_a == a and prev_loc == loc_id for (prev_a, prev_loc, _sc, _o) in self.recent_memory)
+    def _mentioned_dirs(self, obs: str) -> list[str]:
+        out = []
+        for m in DIR_WORD_RE.finditer(obs or ""):
+            d = m.group(1).lower()
+            if d not in out:
+                out.append(d)
+        return out
+    def _extract_option(self, obs: str) -> Optional[str]:
+        m = OPTION_RE.search(obs or "")
+        if m:
+            return m.group(1).strip().lower()
+        return None
+    def _extract_noun(self, obs: str) -> Optional[str]:
+        m = re.search(r"\bthe\s+([a-z]{3,})\b", (obs or "").lower())
+        if m:
+            noun = m.group(1)
+            if noun not in CANONICAL_DIR_SET:
+                return noun
+        return None
+    def _sanitize_action(self, a: str) -> str:
+        a = (a or "").strip()
+        a = re.sub(r"[`\"']", "", a)
+        a = re.sub(r"\s+", " ", a).strip()
+        words = a.split()[:6]
+        return " ".join(words) if words else "look"
+    def _llm_prompt(self, status: dict) -> str:
+        inv = ", ".join(status.get("inventory", [])) or "empty"
+        tried = ", ".join(list((status.get("outcomes_here") or {}).keys())[:20]) or "none"
+        banned = ", ".join(status.get("banned_actions_here", [])) or "none"
+        return (
+            f"Location: {status.get('loc_name')} (id={status.get('loc_id')})\n"
+            f"Score: {status.get('score')}/{status.get('max_score')}  Moves: {status.get('moves')}\n"
+            f"Inventory: {inv}\n"
+            f"Untried dirs: {', '.join((status.get('untried_directions') or [])[:12])}\n"
+            f"Tried here: {tried}\n"
+            f"BANNED: {banned}\n\n"
+            f"Observation:\n{(status.get('last_observation') or '')[:500]}\n"
+        )
+    def _parse_llm(self, resp: str) -> str:
+        for line in (resp or "").splitlines():
+            line = line.strip()
+            if not line:
                 continue
+            if line.upper().startswith("ACTION:"):
+                line = line.split(":", 1)[1].strip()
+            line = line.lower()
+            m = re.match(
+                r"^(?:go\s+)?(north(?:east|west)?|south(?:east|west)?|east|west|up|down|in|out)\b",
+                line,
+            )
+            if m:
+                return m.group(1)
+            return " ".join(line.split()[:5])
+        return "look"
+    def _is_game_over(self, text: str) -> bool:
+        t = (text or "").lower()
+        return any(x in t for x in ("game over", "you have died", "you are dead", "you have won"))
+    def _text(self, result) -> str:
         try:
+            if hasattr(result, "content") and result.content:
+                return result.content[0].text
+            if isinstance(result, list) and result:
+                return result[0].text
         except Exception:
             pass
+        return str(result)
+# Optional smoke-test
+async def _test() -> None:
+    from fastmcp import Client
+    from fastmcp.client.transports import StdioTransport
+    import sys as _sys
+    import os as _os
+    transport = StdioTransport(
+        command=_sys.executable,
+        args=[_os.path.join(_os.path.dirname(__file__), "mcp_server.py")],
+        env={**_os.environ, "GAME": "lostpig"},
+    )
+    agent = StudentAgent()
+    async with Client(transport) as client:
+        res = await agent.run(client, game="lostpig", max_steps=30, seed=42, verbose=True)
+    print(f"Score: {res.final_score}/{res.max_score} | Moves: {res.moves} | Locations: {len(res.locations_visited)}", file=sys.stderr)
+if __name__ == "__main__":
+    import asyncio
+    asyncio.run(_test())