test1

Sleeping

App Files Files Community

bouhss commited on Feb 22

Commit

a8bba7b

verified ·

1 Parent(s): 0fa0cde

Update agent.py

Browse files

Files changed (1) hide show

agent.py +260 -254

agent.py CHANGED Viewed

@@ -1,20 +1,22 @@
 """
-Student Agent for Text Adventure Games (Strong submission)
-Key ideas:
-- Deterministic & robust
-- Uses MCP tools if available:
-  - get_valid_actions: reduce invalid commands
-  - peek_action: simulate actions without committing (safe look-ahead)
-  - inventory / memory / get_map: optional extra context
-- Exploration + score oriented:
-  utility =  score_gain * big_weight + new_location_bonus - loop_penalty - stuck_penalty - death_penalty
-- LLM is used only as fallback, to choose among a candidate list.
 """
 import json
 import os
 import re
 from dataclasses import dataclass, field
 from typing import Optional, Any
 from collections import defaultdict, deque
@@ -22,7 +24,6 @@ from collections import defaultdict, deque
 from dotenv import load_dotenv
 from huggingface_hub import InferenceClient
-# Load environment variables
 load_dotenv()
 # =============================================================================
@@ -31,24 +32,36 @@ load_dotenv()
 LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"
 _hf_token = os.getenv("HF_TOKEN")
-if not _hf_token:
-    raise ValueError("HF_TOKEN not found. Set it in your .env file.")
-LLM_CLIENT = InferenceClient(token=_hf_token)
-def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 220) -> str:
     messages = [
         {"role": "system", "content": system_prompt},
         {"role": "user", "content": prompt},
     ]
-    response = LLM_CLIENT.chat.completions.create(
-        model=LLM_MODEL,
-        messages=messages,
-        temperature=0.0,
-        max_tokens=max_tokens,
-        seed=seed,
-    )
-    return response.choices[0].message.content
 @dataclass
@@ -63,57 +76,50 @@ class RunResult:
     history: list[tuple[str, str, str]] = field(default_factory=list)
-# =============================================================================
-# LLM Prompt (fallback only)
-# =============================================================================
 SYSTEM_PROMPT = """You are an expert text-adventure agent.
-Goal: maximize score and explore new locations while avoiding loops.
-You MUST output EXACTLY:
 THOUGHT: ...
 TOOL: play_action
 ARGS: {"action": "<one candidate action>"}
 Rules:
-- Choose one action EXACTLY from the candidate list provided by the user.
-- Avoid repeating the same action if it failed before.
-- If darkness is mentioned, prioritize lamp actions if present in candidates.
-- No markdown, no extra text.
 """
-MOVE_ACTIONS = ["north", "south", "east", "west", "up", "down", "enter", "exit"]
-MOVE_ALIASES = {"n": "north", "s": "south", "e": "east", "w": "west", "u": "up", "d": "down"}
-# avoid wasting steps on meta commands
 BAD_PREFIXES = ("save", "restore", "quit", "restart", "help", "verbose", "script", "unscript", "version")
 BAD_EXACT = {"wait", "z"}
 class StudentAgent:
     def __init__(self):
-        # parsed from banner
         self.score = 0
         self.max_score = 0
         self.moves = 0
-        # exploration tracking
         self.locations_visited: set[str] = set()
         self.last_location = "Unknown"
-        self.edges = defaultdict(dict)  # edges[loc][move] = new_loc
-        # loop avoidance
-        self.tried = defaultdict(int)   # tried[(loc, action)] += 1
         self.recent_actions = deque(maxlen=10)
         self.recent_obs = deque(maxlen=6)
-        # cached valid actions by location
-        self.valid_actions_cache = {}  # loc -> list[str]
-    # ---------------------------------------------------------------------
-    # Main run loop
     # ---------------------------------------------------------------------
     async def run(self, client, game: str, max_steps: int, seed: int, verbose: bool = False) -> RunResult:
         history: list[tuple[str, str, str]] = []
@@ -122,11 +128,12 @@ class StudentAgent:
             tools = await client.list_tools()
             tool_names = {t.name for t in tools}
-            def has(tname: str) -> bool:
-                return tname in tool_names
-            # initial observation
             obs = await self._call_tool_text(client, "play_action", {"action": "look"})
             self._update_from_text(obs)
             self.last_location = self._extract_location(obs)
             self.locations_visited.add(self.last_location)
@@ -141,63 +148,51 @@ class StudentAgent:
                 stuck = self._is_stuck(obs)
-                # refresh valid actions periodically or when stuck/new location
-                valid_actions = self.valid_actions_cache.get(loc, [])
                 if has("get_valid_actions") and (stuck or not valid_actions or step % 6 == 0):
                     va_txt = await self._call_tool_text(client, "get_valid_actions", {"limit": 60})
                     valid_actions = self._parse_valid_actions(va_txt)
                     if valid_actions:
-                        self.valid_actions_cache[loc] = valid_actions
-                # optional inventory
                 inv_txt = ""
-                if has("inventory") and (stuck or step % 8 == 0 or step == 1):
                     inv_txt = await self._call_tool_text(client, "inventory", {})
-                # build candidates
                 candidates = self._make_candidates(obs, inv_txt, valid_actions, loc)
-                # decide action
                 action = None
                 thought = ""
                 if has("peek_action") and candidates:
                     action, thought = await self._choose_by_lookahead(
-                        client=client,
-                        loc=loc,
-                        obs=obs,
-                        candidates=candidates,
-                        seed=seed,
-                        step=step,
-                        verbose=verbose,
                     )
                 if not action:
                     action, thought = await self._choose_without_peek(
-                        obs=obs,
-                        inv_txt=inv_txt,
-                        candidates=candidates,
-                        seed=seed,
-                        step=step,
                     )
                 action = self._normalize_action(action or "look")
-                # commit the action
                 obs2 = await self._call_tool_text(client, "play_action", {"action": action})
-                # update map edges if movement changed location
-                new_loc = self._extract_location(obs2)
-                if action.lower() in MOVE_ACTIONS and new_loc and new_loc != loc:
-                    self.edges[loc][action.lower()] = new_loc
-                # bookkeeping
                 self.tried[(loc, action.lower())] += 1
                 self.recent_actions.append(action.lower())
                 self.recent_obs.append((obs2 or "")[:220])
                 self._update_from_text(obs2)
-                history.append((thought, f"play_action({action})", (obs2 or "")[:250]))
                 if verbose:
                     print(f"\n--- step {step} ---")
@@ -213,7 +208,7 @@ class StudentAgent:
             return RunResult(
                 final_score=self.score,
                 max_score=self.max_score,
-                moves=self.moves,
                 locations_visited=set(self.locations_visited),
                 game_completed=self._is_game_over(obs),
                 history=history,
@@ -223,15 +218,13 @@ class StudentAgent:
             return RunResult(
                 final_score=self.score,
                 max_score=self.max_score,
-                moves=self.moves,
                 locations_visited=set(self.locations_visited),
                 game_completed=False,
                 error=f"{type(e).__name__}: {e}",
                 history=history,
             )
-    # ---------------------------------------------------------------------
-    # Tool / text helpers
     # ---------------------------------------------------------------------
     async def _call_tool_text(self, client, tool: str, args: dict) -> str:
         result = await client.call_tool(tool, args)
@@ -249,7 +242,38 @@ class StudentAgent:
             return str(part)
         return str(result)
     def _extract_location(self, text: str) -> str:
         if not text:
             return "Unknown"
         for line in text.splitlines():
@@ -261,68 +285,51 @@ class StudentAgent:
             return line
         return "Unknown"
-    def _update_from_text(self, text: str) -> None:
-        # parse banner: [Score: x/y | Moves: n]
-        if not text:
-            return
-        m = re.search(r"\[Score:\s*(\d+)\s*/\s*(\d+)\s*\|\s*Moves:\s*(\d+)\s*\]", text)
-        if m:
-            self.score = int(m.group(1))
-            self.max_score = int(m.group(2))
-            self.moves = int(m.group(3))
-    def _parse_valid_actions(self, txt: str) -> list[str]:
-        if not txt:
             return []
-        actions = []
-        for line in txt.splitlines():
-            line = line.strip()
-            if line.startswith("- "):
-                a = line[2:].strip()
-                a = self._normalize_action(a)
-                low = a.lower()
-                if not a:
-                    continue
-                if low.startswith(BAD_PREFIXES) or low in BAD_EXACT:
-                    continue
-                actions.append(a)
-        # dedup keep order
-        seen = set()
         out = []
-        for a in actions:
-            if a.lower() not in seen:
-                seen.add(a.lower())
                 out.append(a)
         return out
-    def _normalize_action(self, action: str) -> str:
-        a = (action or "").strip()
-        low = a.lower()
-        if low in MOVE_ALIASES:
-            return MOVE_ALIASES[low]
-        return a
     def _is_game_over(self, text: str) -> bool:
         t = (text or "").lower()
-        return ("game over" in t) or ("you have died" in t) or ("you are dead" in t)
     def _is_stuck(self, text: str) -> bool:
         t = (text or "").lower()
         bad = [
-            "i don't understand",
-            "you can't go that way",
-            "that's not a verb",
-            "not a word i know",
-            "nothing happens",
-            "you can't",
-            "can't do that",
         ]
         rep = len(self.recent_obs) >= 3 and all(self.recent_obs[-1] == x for x in list(self.recent_obs)[-3:])
         return any(b in t for b in bad) or rep
     # ---------------------------------------------------------------------
-    # Candidate generation
-    # ---------------------------------------------------------------------
     def _make_candidates(self, obs: str, inv_txt: str, valid_actions: list[str], loc: str) -> list[str]:
         obs_l = (obs or "").lower()
         inv_l = (inv_txt or "").lower()
@@ -334,61 +341,60 @@ class StudentAgent:
             a = self._normalize_action(a)
             if not a:
                 return
-            low = a.lower()
             if low.startswith(BAD_PREFIXES) or low in BAD_EXACT:
                 return
             if low not in seen:
                 seen.add(low)
                 candidates.append(a)
-        # always safe
-        add("look")
-        # darkness handling
-        if "dark" in obs_l:
-            if "lamp" in obs_l or "lamp" in inv_l:
-                add("take lamp")
-                add("turn on lamp")
-        # split valid actions into move vs object
-        move_list = []
-        obj_list = []
         for a in valid_actions or []:
-            low = a.lower()
-            if low in MOVE_ACTIONS:
-                move_list.append(a)
             else:
-                obj_list.append(a)
-        # prioritize untried moves from this location
-        def move_key(m: str):
-            return (self.tried[(loc, m.lower())], 0 if m.lower() not in self.edges.get(loc, {}) else 1)
-        for m in sorted(set(move_list), key=move_key):
             add(m)
-        # if no valid moves known, still try generic moves
-        if not move_list:
-            for m in MOVE_ACTIONS:
-                add(m)
-        # prioritize object actions that often give score
-        scorey_prefixes = ("take ", "get ", "open ", "read ", "examine ", "look at ", "turn on ", "unlock ", "insert ")
-        for a in obj_list:
-            if a.lower().startswith(scorey_prefixes):
                 add(a)
-        # then the rest (limited)
-        for a in obj_list:
             add(a)
-            if len(candidates) >= 22:
                 break
-        # small generic probes (often good across games)
-        add("take all")
         add("inventory")
-        # remove actions repeated too much recently
         cleaned = []
         for a in candidates:
             if list(self.recent_actions).count(a.lower()) >= 3:
@@ -398,30 +404,28 @@ class StudentAgent:
         return cleaned[:20]
     # ---------------------------------------------------------------------
-    # Decision: look-ahead
-    # ---------------------------------------------------------------------
-    async def _choose_by_lookahead(self, client, loc: str, obs: str, candidates: list[str], seed: int, step: int, verbose: bool):
         base_score = self.score
-        base_loc = loc
-        # prioritize a shortlist for speed
         priority = []
         for a in candidates:
-            low = a.lower()
-            is_move = low in MOVE_ACTIONS
-            is_obj = low.startswith(("take ", "get ", "open ", "read ", "examine ", "turn on ", "unlock "))
             tried = self.tried[(loc, low)]
-            priority.append((tried, 0 if is_obj else 1, 0 if is_move else 1, low, a))
         priority.sort()
-        shortlist = [x[-1] for x in priority][:10]  # evaluate at most 10
         best_a = None
         best_u = -10**18
         best_th = ""
         for a in shortlist:
-            low = a.lower()
             if self.tried[(loc, low)] >= 4:
                 continue
@@ -431,22 +435,26 @@ class StudentAgent:
             if self._is_game_over(peek) or "you have died" in peek_l:
                 u = -1_000_000_000
             else:
-                s_after, mx_after, mv_after = self._parse_banner(peek, fallback_score=base_score)
                 delta = max(0, s_after - base_score)
-                new_loc = self._extract_location(peek)
-                changed = (new_loc and new_loc != base_loc)
-                new_loc_bonus = 250 if (changed and new_loc not in self.locations_visited) else 0
-                changed_bonus = 40 if changed else 0
-                loop_pen = 80 * list(self.recent_actions).count(low)
-                stuck_pen = 160 if self._is_stuck(peek) else 0
-                # MAIN utility
-                u = delta * 900 + new_loc_bonus + changed_bonus - loop_pen - stuck_pen
-                # small preference: if darkness, lamp actions
-                if "dark" in (obs or "").lower() and ("lamp" in low):
                     u += 120
             if u > best_u:
@@ -458,34 +466,33 @@ class StudentAgent:
             return None, "Look-ahead found no good action; fallback."
         return best_a, best_th
-    def _parse_banner(self, text: str, fallback_score: int):
         score = fallback_score
-        mx = self.max_score
-        mv = self.moves
-        if not text:
-            return score, mx, mv
-        m = re.search(r"\[Score:\s*(\d+)\s*/\s*(\d+)\s*\|\s*Moves:\s*(\d+)\s*\]", text)
         if m:
-            return int(m.group(1)), int(m.group(2)), int(m.group(3))
-        return score, mx, mv
     # ---------------------------------------------------------------------
-    # Decision: no peek => heuristic then LLM fallback among candidates
-    # ---------------------------------------------------------------------
-    async def _choose_without_peek(self, obs: str, inv_txt: str, candidates: list[str], seed: int, step: int):
         loc = self._extract_location(obs)
-        # heuristic: try an untried move
-        for m in MOVE_ACTIONS:
-            if m in [c.lower() for c in candidates] and self.tried[(loc, m)] == 0:
-                return m, "Heuristic: try an untried move to explore."
-        # heuristic: try untried "take/get/open/read/examine"
         for a in candidates:
-            low = a.lower()
-            if low.startswith(("take ", "get ", "open ", "read ", "examine ", "turn on ")):
                 if self.tried[(loc, low)] == 0:
-                    return a, "Heuristic: try a promising object interaction."
         # LLM fallback: choose from candidate list exactly
         if not candidates:
@@ -493,53 +500,51 @@ class StudentAgent:
         cand = candidates[:10]
         prompt = self._build_llm_prompt(obs, inv_txt, cand)
-        resp = call_llm(prompt, SYSTEM_PROMPT, seed + step, max_tokens=180)
-        thought, tool, args = self._parse_response(resp)
-        a = self._normalize_action(str(args.get("action", "")).strip())
-        # force action to be in candidate list
-        canon = {x.lower(): x for x in cand}
-        if a.lower() in canon:
-            return canon[a.lower()], thought or "LLM chose a candidate."
-        return cand[0], "LLM invalid; fallback to first candidate."
     def _build_llm_prompt(self, obs: str, inv_txt: str, candidates: list[str]) -> str:
         obs = (obs or "").strip()[:1100]
         inv_txt = (inv_txt or "").strip()[:350]
-        lines = [
-            f"Score: {self.score}/{self.max_score} | Moves: {self.moves}",
-            f"Location guess: {self.last_location}",
         ]
         if inv_txt:
-            lines.append(f"Inventory:\n{inv_txt}")
         if self.recent_actions:
-            lines.append("Recent actions: " + ", ".join(list(self.recent_actions)[-6:]))
-        lines.append("\nCurrent observation:\n" + obs)
-        lines.append("\nCandidate actions (choose exactly one):")
         for a in candidates:
-            lines.append(f"- {a}")
-        lines.append("\nOutput TOOL=play_action and ARGS with one candidate action.")
-        return "\n".join(lines)
-    def _parse_response(self, response: str):
         thought = ""
         tool = "play_action"
         args = {"action": "look"}
         if not response:
             return thought, tool, args
         m = re.search(r"(?im)^\s*THOUGHT\s*:\s*(.+)$", response)
         if m:
             thought = m.group(1).strip()
         m = re.search(r"(?im)^\s*TOOL\s*:\s*([a-zA-Z0-9_]+)\s*$", response)
         if m:
             tool = m.group(1).strip()
         m = re.search(r"(?is)^\s*ARGS\s*:\s*(\{.*\})\s*$", response)
         if m:
             raw = m.group(1).strip()
@@ -556,31 +561,32 @@ class StudentAgent:
         if not isinstance(args, dict):
             args = {"action": "look"}
         return thought, tool, args
-# =============================================================================
-# Local testing
-# =============================================================================
-async def test_agent():
-    from fastmcp import Client
-    server_path = "mcp_server.py"
-    agent = StudentAgent()
-    async with Client(server_path) as client:
-        result = await agent.run(
-            client=client,
-            game="lostpig",
-            max_steps=20,
-            seed=42,
-            verbose=True,
-        )
-        print(f"\nFinal Score: {result.final_score}/{result.max_score}")
-        print(f"Moves: {result.moves}")
-        print(f"Locations visited: {len(result.locations_visited)}")
-if __name__ == "__main__":
-    import asyncio
-    asyncio.run(test_agent())

 """
+Student Agent for Text Adventure Games (Best-performance submission)
+Design:
+- Primary driver: heuristics + server tools, not pure LLM.
+- Uses MCP tools:
+  - play_action (commit)
+  - peek_action (simulate without committing) => BIG performance boost
+  - get_valid_actions (reduce hallucinations)
+  - inventory (optional context)
+  - memory/get_map (rare; not required)
+- LLM only as fallback: choose among a candidate list deterministically (temp=0).
+- Robust stats: internal move counter so moves never stay 0 even if banner parsing fails.
 """
 import json
 import os
 import re
+import time
 from dataclasses import dataclass, field
 from typing import Optional, Any
 from collections import defaultdict, deque
 from dotenv import load_dotenv
 from huggingface_hub import InferenceClient
 load_dotenv()
 # =============================================================================
 LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"
 _hf_token = os.getenv("HF_TOKEN")
+LLM_CLIENT = InferenceClient(token=_hf_token) if _hf_token else None
+def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 180) -> str:
+    """
+    Deterministic LLM call (temperature=0). Retries a few times for transient errors.
+    If HF_TOKEN missing, raises.
+    """
+    if LLM_CLIENT is None:
+        raise RuntimeError("HF_TOKEN missing => LLM unavailable")
     messages = [
         {"role": "system", "content": system_prompt},
         {"role": "user", "content": prompt},
     ]
+    for attempt in range(3):
+        try:
+            resp = LLM_CLIENT.chat.completions.create(
+                model=LLM_MODEL,
+                messages=messages,
+                temperature=0.0,
+                max_tokens=max_tokens,
+                seed=seed,
+            )
+            return resp.choices[0].message.content
+        except Exception:
+            if attempt < 2:
+                time.sleep(2 ** attempt)
+                continue
+            raise
 @dataclass
     history: list[tuple[str, str, str]] = field(default_factory=list)
 SYSTEM_PROMPT = """You are an expert text-adventure agent.
+You must output EXACTLY:
 THOUGHT: ...
 TOOL: play_action
 ARGS: {"action": "<one candidate action>"}
 Rules:
+- Choose ONE action EXACTLY from the candidate list provided by the user.
+- Do not invent actions outside that list.
+- Avoid repeating actions that recently failed.
+- No markdown and no extra text.
 """
+MOVE_ACTIONS = ["north", "south", "east", "west", "up", "down", "enter", "exit",
+                "northeast", "northwest", "southeast", "southwest"]
+MOVE_ALIASES = {"n": "north", "s": "south", "e": "east", "w": "west", "u": "up", "d": "down",
+                "ne": "northeast", "nw": "northwest", "se": "southeast", "sw": "southwest"}
 BAD_PREFIXES = ("save", "restore", "quit", "restart", "help", "verbose", "script", "unscript", "version")
 BAD_EXACT = {"wait", "z"}
 class StudentAgent:
     def __init__(self):
+        # parsed from server banner if available
         self.score = 0
         self.max_score = 0
         self.moves = 0
+        # internal moves (robust)
+        self._internal_moves = 0
+        # exploration / loop avoidance
         self.locations_visited: set[str] = set()
         self.last_location = "Unknown"
+        self.tried = defaultdict(int)         # tried[(loc, action)] += 1
         self.recent_actions = deque(maxlen=10)
         self.recent_obs = deque(maxlen=6)
+        # valid actions cache
+        self.valid_cache = {}  # loc -> list[str]
     # ---------------------------------------------------------------------
     async def run(self, client, game: str, max_steps: int, seed: int, verbose: bool = False) -> RunResult:
         history: list[tuple[str, str, str]] = []
             tools = await client.list_tools()
             tool_names = {t.name for t in tools}
+            def has(name: str) -> bool:
+                return name in tool_names
+            # initial look
             obs = await self._call_tool_text(client, "play_action", {"action": "look"})
+            self._internal_moves += 1
             self._update_from_text(obs)
             self.last_location = self._extract_location(obs)
             self.locations_visited.add(self.last_location)
                 stuck = self._is_stuck(obs)
+                # refresh valid actions (sparsely)
+                valid_actions = self.valid_cache.get(loc, [])
                 if has("get_valid_actions") and (stuck or not valid_actions or step % 6 == 0):
                     va_txt = await self._call_tool_text(client, "get_valid_actions", {"limit": 60})
                     valid_actions = self._parse_valid_actions(va_txt)
                     if valid_actions:
+                        self.valid_cache[loc] = valid_actions
                 inv_txt = ""
+                if has("inventory") and (step == 1 or stuck or step % 8 == 0):
                     inv_txt = await self._call_tool_text(client, "inventory", {})
+                # candidates from server meta tags + valid actions
                 candidates = self._make_candidates(obs, inv_txt, valid_actions, loc)
                 action = None
                 thought = ""
+                # look-ahead (best)
                 if has("peek_action") and candidates:
                     action, thought = await self._choose_by_lookahead(
+                        client=client, loc=loc, obs=obs, candidates=candidates
                     )
+                # fallback heuristic + optional LLM
                 if not action:
                     action, thought = await self._choose_without_peek(
+                        obs=obs, inv_txt=inv_txt, candidates=candidates, seed=seed, step=step
                     )
                 action = self._normalize_action(action or "look")
+                # commit
                 obs2 = await self._call_tool_text(client, "play_action", {"action": action})
+                self._internal_moves += 1
                 self.tried[(loc, action.lower())] += 1
                 self.recent_actions.append(action.lower())
                 self.recent_obs.append((obs2 or "")[:220])
                 self._update_from_text(obs2)
+                new_loc = self._extract_location(obs2)
+                self.locations_visited.add(new_loc)
+                history.append((thought, f"play_action({action})", (obs2 or "")[:260]))
                 if verbose:
                     print(f"\n--- step {step} ---")
             return RunResult(
                 final_score=self.score,
                 max_score=self.max_score,
+                moves=max(self.moves, self._internal_moves),
                 locations_visited=set(self.locations_visited),
                 game_completed=self._is_game_over(obs),
                 history=history,
             return RunResult(
                 final_score=self.score,
                 max_score=self.max_score,
+                moves=max(self.moves, self._internal_moves),
                 locations_visited=set(self.locations_visited),
                 game_completed=False,
                 error=f"{type(e).__name__}: {e}",
                 history=history,
             )
     # ---------------------------------------------------------------------
     async def _call_tool_text(self, client, tool: str, args: dict) -> str:
         result = await client.call_tool(tool, args)
             return str(part)
         return str(result)
+    # ---------------------------------------------------------------------
+    # Parsing
+    def _update_from_text(self, text: str) -> None:
+        """
+        Parse server banner:
+        [Score: s/max | Moves: m | Location: L]
+        Also accept +k points tag.
+        """
+        if not text:
+            return
+        m = re.search(r"\[Score:\s*(\d+)\s*/\s*(\d+)\s*\|\s*Moves:\s*(\d+)\s*\|\s*Location:\s*(.+?)\]", text)
+        if m:
+            self.score = int(m.group(1))
+            self.max_score = int(m.group(2))
+            self.moves = int(m.group(3))
+            self.last_location = m.group(4).strip()
+        # fallback: +k points!
+        mp = re.search(r"\[\+(\d+)\s+points", text, flags=re.IGNORECASE)
+        if mp and self.score >= 0:
+            # score already parsed above in most cases; keep safe
+            self.score = max(self.score, self.score + int(mp.group(1)))
     def _extract_location(self, text: str) -> str:
+        # Prefer banner location
+        m = re.search(r"\|\s*Location:\s*(.+?)\]", text or "")
+        if m:
+            loc = m.group(1).strip()
+            if loc:
+                return loc
+        # else fallback: first non-empty line
         if not text:
             return "Unknown"
         for line in text.splitlines():
             return line
         return "Unknown"
+    def _extract_untried_exits(self, text: str) -> list[str]:
+        m = re.search(r"\[Untried exits:\s*(.+?)\]", text or "")
+        if not m:
+            return []
+        dirs = [d.strip() for d in m.group(1).split(",")]
+        out = []
+        for d in dirs:
+            d = self._normalize_action(d).lower()
+            if d and d not in out:
+                out.append(d)
+        return out
+    def _extract_interactions(self, text: str) -> list[str]:
+        m = re.search(r"\[Interactions:\s*(.+?)\]", text or "")
+        if not m:
             return []
+        acts = [a.strip() for a in m.group(1).split(",")]
         out = []
+        for a in acts:
+            if a and a.lower() not in out:
                 out.append(a)
         return out
     def _is_game_over(self, text: str) -> bool:
         t = (text or "").lower()
+        return ("game over" in t) or ("you have died" in t) or ("you are dead" in t) or ("[game over]" in t)
     def _is_stuck(self, text: str) -> bool:
         t = (text or "").lower()
         bad = [
+            "i don't understand", "you can't", "that's not", "not a verb",
+            "nothing happens", "you don't see", "you see nothing", "beg your pardon"
         ]
         rep = len(self.recent_obs) >= 3 and all(self.recent_obs[-1] == x for x in list(self.recent_obs)[-3:])
         return any(b in t for b in bad) or rep
+    def _normalize_action(self, action: str) -> str:
+        a = (action or "").strip()
+        low = a.lower()
+        if low in MOVE_ALIASES:
+            return MOVE_ALIASES[low]
+        return a
     # ---------------------------------------------------------------------
+    # Candidates
     def _make_candidates(self, obs: str, inv_txt: str, valid_actions: list[str], loc: str) -> list[str]:
         obs_l = (obs or "").lower()
         inv_l = (inv_txt or "").lower()
             a = self._normalize_action(a)
             if not a:
                 return
+            low = a.lower().strip()
             if low.startswith(BAD_PREFIXES) or low in BAD_EXACT:
                 return
             if low not in seen:
                 seen.add(low)
                 candidates.append(a)
+        # from server tags
+        for d in self._extract_untried_exits(obs):
+            add(d)
+        for a in self._extract_interactions(obs):
+            add(a)
+        # darkness
+        if "dark" in obs_l and ("lamp" in obs_l or "lamp" in inv_l):
+            add("take lamp")
+            add("turn on lamp")
+        # add valid actions (movement first then interactions)
+        moves = []
+        inter = []
         for a in valid_actions or []:
+            al = a.lower().strip()
+            first = al.split()[0] if al else ""
+            if first in MOVE_ACTIONS:
+                moves.append(a)
             else:
+                inter.append(a)
+        # prioritize movement not tried too often
+        def move_key(a: str):
+            return self.tried[(loc, a.lower().strip())]
+        for m in sorted(set(moves), key=move_key):
             add(m)
+        # common score-ish interactions
+        scorey = ("take ", "get ", "open ", "read ", "examine ", "look at ", "turn on ", "unlock ", "insert ", "put ")
+        for a in inter:
+            if a.lower().startswith(scorey):
                 add(a)
+        for a in inter:
             add(a)
+            if len(candidates) >= 24:
                 break
+        # safe basics
+        add("look")
         add("inventory")
+        add("take all")
+        # remove too-repeated
         cleaned = []
         for a in candidates:
             if list(self.recent_actions).count(a.lower()) >= 3:
         return cleaned[:20]
     # ---------------------------------------------------------------------
+    # Look-ahead selection
+    async def _choose_by_lookahead(self, client, loc: str, obs: str, candidates: list[str]) -> tuple[Optional[str], str]:
         base_score = self.score
+        base_loc = self._extract_location(obs)
+        untried = set(self._extract_untried_exits(obs))
+        # shortlist for speed
         priority = []
         for a in candidates:
+            low = a.lower().strip()
+            is_untried = 0 if low in untried else 1
             tried = self.tried[(loc, low)]
+            priority.append((is_untried, tried, low, a))
         priority.sort()
+        shortlist = [x[-1] for x in priority][:10]
         best_a = None
         best_u = -10**18
         best_th = ""
         for a in shortlist:
+            low = a.lower().strip()
             if self.tried[(loc, low)] >= 4:
                 continue
             if self._is_game_over(peek) or "you have died" in peek_l:
                 u = -1_000_000_000
             else:
+                s_after, loc_after = self._parse_peek_score_loc(peek, fallback_score=base_score)
                 delta = max(0, s_after - base_score)
+                new_loc_bonus = 0
+                changed_bonus = 0
+                if loc_after and loc_after != base_loc:
+                    changed_bonus = 60
+                    if loc_after not in self.locations_visited:
+                        new_loc_bonus = 280
+                loop_pen = 90 * list(self.recent_actions).count(low)
+                stuck_pen = 180 if self._is_stuck(peek) else 0
+                # prefer untried exits
+                untried_bonus = 120 if low in untried else 0
+                u = delta * 900 + new_loc_bonus + changed_bonus + untried_bonus - loop_pen - stuck_pen
+                # lamp preference in darkness
+                if "dark" in (obs or "").lower() and "lamp" in low:
                     u += 120
             if u > best_u:
             return None, "Look-ahead found no good action; fallback."
         return best_a, best_th
+    def _parse_peek_score_loc(self, text: str, fallback_score: int) -> tuple[int, str]:
         score = fallback_score
+        loc = self._extract_location(text)
+        m = re.search(r"\[Score:\s*(\d+)\s*/\s*(\d+)\s*\|\s*Moves:\s*(\d+)\s*\|\s*Location:\s*(.+?)\]", text or "")
         if m:
+            score = int(m.group(1))
+            loc = m.group(4).strip()
+        mp = re.search(r"\[\+(\d+)\s+points", text or "", flags=re.IGNORECASE)
+        if mp and score == fallback_score:
+            score = fallback_score + int(mp.group(1))
+        return score, loc
     # ---------------------------------------------------------------------
+    # No-peek fallback
+    async def _choose_without_peek(self, obs: str, inv_txt: str, candidates: list[str], seed: int, step: int) -> tuple[str, str]:
+        # heuristic: take untried exit first
+        untried = self._extract_untried_exits(obs)
+        if untried:
+            return untried[0], "Heuristic: try an untried exit."
+        # heuristic: try a promising interaction not tried yet
         loc = self._extract_location(obs)
         for a in candidates:
+            low = a.lower().strip()
+            if low.startswith(("take ", "get ", "open ", "read ", "examine ", "turn on ", "unlock ")):
                 if self.tried[(loc, low)] == 0:
+                    return a, "Heuristic: try a high-value interaction."
         # LLM fallback: choose from candidate list exactly
         if not candidates:
         cand = candidates[:10]
         prompt = self._build_llm_prompt(obs, inv_txt, cand)
+        try:
+            resp = call_llm(prompt, SYSTEM_PROMPT, seed + step, max_tokens=160)
+            thought, tool, args = self._parse_llm_response(resp)
+            a = self._normalize_action(str(args.get("action", "")).strip())
+            canon = {x.lower(): x for x in cand}
+            if a.lower() in canon:
+                return canon[a.lower()], thought or "LLM chose a candidate."
+            return cand[0], "LLM invalid; fallback to first candidate."
+        except Exception:
+            # no LLM available / error => deterministic fallback
+            return cand[0], "LLM unavailable/error; fallback to first candidate."
     def _build_llm_prompt(self, obs: str, inv_txt: str, candidates: list[str]) -> str:
         obs = (obs or "").strip()[:1100]
         inv_txt = (inv_txt or "").strip()[:350]
+        parts = [
+            f"Score: {self.score}/{self.max_score} | Moves: {max(self.moves, self._internal_moves)}",
+            f"Location: {self.last_location}",
         ]
         if inv_txt:
+            parts.append(f"Inventory info:\n{inv_txt}")
         if self.recent_actions:
+            parts.append("Recent actions: " + ", ".join(list(self.recent_actions)[-6:]))
+        parts.append("\nCurrent observation:\n" + obs)
+        parts.append("\nCandidate actions (choose exactly ONE):")
         for a in candidates:
+            parts.append(f"- {a}")
+        return "\n".join(parts)
+    def _parse_llm_response(self, response: str) -> tuple[str, str, dict]:
         thought = ""
         tool = "play_action"
         args = {"action": "look"}
         if not response:
             return thought, tool, args
         m = re.search(r"(?im)^\s*THOUGHT\s*:\s*(.+)$", response)
         if m:
             thought = m.group(1).strip()
         m = re.search(r"(?im)^\s*TOOL\s*:\s*([a-zA-Z0-9_]+)\s*$", response)
         if m:
             tool = m.group(1).strip()
         m = re.search(r"(?is)^\s*ARGS\s*:\s*(\{.*\})\s*$", response)
         if m:
             raw = m.group(1).strip()
         if not isinstance(args, dict):
             args = {"action": "look"}
+        # enforce tool
+        tool = "play_action"
         return thought, tool, args
+    # ---------------------------------------------------------------------
+    def _parse_valid_actions(self, txt: str) -> list[str]:
+        if not txt:
+            return []
+        out = []
+        for line in txt.splitlines():
+            line = line.strip()
+            if line.startswith("- "):
+                a = line[2:].strip()
+                a = self._normalize_action(a)
+                low = a.lower()
+                if not a:
+                    continue
+                if low.startswith(BAD_PREFIXES) or low in BAD_EXACT:
+                    continue
+                out.append(a)
+        # dedup keep order
+        seen = set()
+        uniq = []
+        for a in out:
+            low = a.lower()
+            if low not in seen:
+                seen.add(low)
+                uniq.append(a)
+        return uniq