test1

Sleeping

App Files Files Community

bouhss commited on Feb 22

Commit

07df9e7

verified ·

1 Parent(s): a8bba7b

Update agent.py

Browse files

Files changed (1) hide show

agent.py +109 -314

agent.py CHANGED Viewed

@@ -1,16 +1,10 @@
 """
-Student Agent for Text Adventure Games (Best-performance submission)
-Design:
-- Primary driver: heuristics + server tools, not pure LLM.
-- Uses MCP tools:
-  - play_action (commit)
-  - peek_action (simulate without committing) => BIG performance boost
-  - get_valid_actions (reduce hallucinations)
-  - inventory (optional context)
-  - memory/get_map (rare; not required)
-- LLM only as fallback: choose among a candidate list deterministically (temp=0).
-- Robust stats: internal move counter so moves never stay 0 even if banner parsing fails.
 """
 import json
@@ -26,27 +20,15 @@ from huggingface_hub import InferenceClient
 load_dotenv()
-# =============================================================================
-# LLM Configuration - DO NOT MODIFY
-# =============================================================================
 LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"
 _hf_token = os.getenv("HF_TOKEN")
 LLM_CLIENT = InferenceClient(token=_hf_token) if _hf_token else None
 def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 180) -> str:
-    """
-    Deterministic LLM call (temperature=0). Retries a few times for transient errors.
-    If HF_TOKEN missing, raises.
-    """
     if LLM_CLIENT is None:
-        raise RuntimeError("HF_TOKEN missing => LLM unavailable")
-    messages = [
-        {"role": "system", "content": system_prompt},
-        {"role": "user", "content": prompt},
-    ]
     for attempt in range(3):
         try:
             resp = LLM_CLIENT.chat.completions.create(
@@ -66,7 +48,6 @@ def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 180)
 @dataclass
 class RunResult:
-    """Result of running the agent. Do not modify this class."""
     final_score: int
     max_score: int
     moves: int
@@ -78,49 +59,39 @@ class RunResult:
 SYSTEM_PROMPT = """You are an expert text-adventure agent.
-You must output EXACTLY:
 THOUGHT: ...
 TOOL: play_action
 ARGS: {"action": "<one candidate action>"}
 Rules:
-- Choose ONE action EXACTLY from the candidate list provided by the user.
-- Do not invent actions outside that list.
-- Avoid repeating actions that recently failed.
-- No markdown and no extra text.
 """
-MOVE_ACTIONS = ["north", "south", "east", "west", "up", "down", "enter", "exit",
-                "northeast", "northwest", "southeast", "southwest"]
-MOVE_ALIASES = {"n": "north", "s": "south", "e": "east", "w": "west", "u": "up", "d": "down",
-                "ne": "northeast", "nw": "northwest", "se": "southeast", "sw": "southwest"}
 BAD_PREFIXES = ("save", "restore", "quit", "restart", "help", "verbose", "script", "unscript", "version")
 BAD_EXACT = {"wait", "z"}
 class StudentAgent:
     def __init__(self):
-        # parsed from server banner if available
         self.score = 0
         self.max_score = 0
         self.moves = 0
-        # internal moves (robust)
         self._internal_moves = 0
-        # exploration / loop avoidance
         self.locations_visited: set[str] = set()
         self.last_location = "Unknown"
-        self.tried = defaultdict(int)         # tried[(loc, action)] += 1
         self.recent_actions = deque(maxlen=10)
         self.recent_obs = deque(maxlen=6)
-        # valid actions cache
-        self.valid_cache = {}  # loc -> list[str]
-    # ---------------------------------------------------------------------
     async def run(self, client, game: str, max_steps: int, seed: int, verbose: bool = False) -> RunResult:
         history: list[tuple[str, str, str]] = []
@@ -138,9 +109,6 @@ class StudentAgent:
             self.last_location = self._extract_location(obs)
             self.locations_visited.add(self.last_location)
-            if verbose:
-                print(obs)
             for step in range(1, max_steps + 1):
                 loc = self._extract_location(obs)
                 self.last_location = loc
@@ -148,7 +116,6 @@ class StudentAgent:
                 stuck = self._is_stuck(obs)
-                # refresh valid actions (sparsely)
                 valid_actions = self.valid_cache.get(loc, [])
                 if has("get_valid_actions") and (stuck or not valid_actions or step % 6 == 0):
                     va_txt = await self._call_tool_text(client, "get_valid_actions", {"limit": 60})
@@ -160,27 +127,17 @@ class StudentAgent:
                 if has("inventory") and (step == 1 or stuck or step % 8 == 0):
                     inv_txt = await self._call_tool_text(client, "inventory", {})
-                # candidates from server meta tags + valid actions
                 candidates = self._make_candidates(obs, inv_txt, valid_actions, loc)
-                action = None
-                thought = ""
-                # look-ahead (best)
                 if has("peek_action") and candidates:
-                    action, thought = await self._choose_by_lookahead(
-                        client=client, loc=loc, obs=obs, candidates=candidates
-                    )
-                # fallback heuristic + optional LLM
                 if not action:
-                    action, thought = await self._choose_without_peek(
-                        obs=obs, inv_txt=inv_txt, candidates=candidates, seed=seed, step=step
-                    )
-                action = self._normalize_action(action or "look")
-                # commit
                 obs2 = await self._call_tool_text(client, "play_action", {"action": action})
                 self._internal_moves += 1
@@ -193,15 +150,10 @@ class StudentAgent:
                 self.locations_visited.add(new_loc)
                 history.append((thought, f"play_action({action})", (obs2 or "")[:260]))
                 if verbose:
-                    print(f"\n--- step {step} ---")
-                    print(f"THOUGHT: {thought}")
-                    print(f"ACTION: {action}")
-                    print(obs2)
                 obs = obs2
                 if self._is_game_over(obs):
                     break
@@ -225,10 +177,9 @@ class StudentAgent:
                 history=history,
             )
-    # ---------------------------------------------------------------------
     async def _call_tool_text(self, client, tool: str, args: dict) -> str:
-        result = await client.call_tool(tool, args)
-        return self._extract_text(result)
     def _extract_text(self, result: Any) -> str:
         if result is None:
@@ -242,351 +193,195 @@ class StudentAgent:
             return str(part)
         return str(result)
-    # ---------------------------------------------------------------------
-    # Parsing
     def _update_from_text(self, text: str) -> None:
-        """
-        Parse server banner:
-        [Score: s/max | Moves: m | Location: L]
-        Also accept +k points tag.
-        """
-        if not text:
-            return
-        m = re.search(r"\[Score:\s*(\d+)\s*/\s*(\d+)\s*\|\s*Moves:\s*(\d+)\s*\|\s*Location:\s*(.+?)\]", text)
         if m:
             self.score = int(m.group(1))
             self.max_score = int(m.group(2))
             self.moves = int(m.group(3))
             self.last_location = m.group(4).strip()
-        # fallback: +k points!
-        mp = re.search(r"\[\+(\d+)\s+points", text, flags=re.IGNORECASE)
-        if mp and self.score >= 0:
-            # score already parsed above in most cases; keep safe
-            self.score = max(self.score, self.score + int(mp.group(1)))
     def _extract_location(self, text: str) -> str:
-        # Prefer banner location
         m = re.search(r"\|\s*Location:\s*(.+?)\]", text or "")
-        if m:
-            loc = m.group(1).strip()
-            if loc:
-                return loc
-        # else fallback: first non-empty line
-        if not text:
-            return "Unknown"
-        for line in text.splitlines():
             line = line.strip()
-            if not line:
-                continue
-            if line.startswith("[Score:"):
-                continue
-            return line
         return "Unknown"
-    def _extract_untried_exits(self, text: str) -> list[str]:
         m = re.search(r"\[Untried exits:\s*(.+?)\]", text or "")
         if not m:
             return []
-        dirs = [d.strip() for d in m.group(1).split(",")]
-        out = []
-        for d in dirs:
-            d = self._normalize_action(d).lower()
-            if d and d not in out:
-                out.append(d)
-        return out
     def _extract_interactions(self, text: str) -> list[str]:
         m = re.search(r"\[Interactions:\s*(.+?)\]", text or "")
         if not m:
             return []
-        acts = [a.strip() for a in m.group(1).split(",")]
-        out = []
-        for a in acts:
-            if a and a.lower() not in out:
-                out.append(a)
-        return out
     def _is_game_over(self, text: str) -> bool:
         t = (text or "").lower()
-        return ("game over" in t) or ("you have died" in t) or ("you are dead" in t) or ("[game over]" in t)
     def _is_stuck(self, text: str) -> bool:
         t = (text or "").lower()
-        bad = [
-            "i don't understand", "you can't", "that's not", "not a verb",
-            "nothing happens", "you don't see", "you see nothing", "beg your pardon"
-        ]
         rep = len(self.recent_obs) >= 3 and all(self.recent_obs[-1] == x for x in list(self.recent_obs)[-3:])
         return any(b in t for b in bad) or rep
-    def _normalize_action(self, action: str) -> str:
-        a = (action or "").strip()
-        low = a.lower()
-        if low in MOVE_ALIASES:
-            return MOVE_ALIASES[low]
-        return a
-    # ---------------------------------------------------------------------
-    # Candidates
     def _make_candidates(self, obs: str, inv_txt: str, valid_actions: list[str], loc: str) -> list[str]:
-        obs_l = (obs or "").lower()
-        inv_l = (inv_txt or "").lower()
-        candidates = []
-        seen = set()
         def add(a: str):
-            a = self._normalize_action(a)
             if not a:
                 return
-            low = a.lower().strip()
             if low.startswith(BAD_PREFIXES) or low in BAD_EXACT:
                 return
             if low not in seen:
                 seen.add(low)
                 candidates.append(a)
-        # from server tags
-        for d in self._extract_untried_exits(obs):
             add(d)
         for a in self._extract_interactions(obs):
             add(a)
-        # darkness
-        if "dark" in obs_l and ("lamp" in obs_l or "lamp" in inv_l):
-            add("take lamp")
-            add("turn on lamp")
-        # add valid actions (movement first then interactions)
-        moves = []
-        inter = []
-        for a in valid_actions or []:
-            al = a.lower().strip()
-            first = al.split()[0] if al else ""
-            if first in MOVE_ACTIONS:
-                moves.append(a)
-            else:
-                inter.append(a)
-        # prioritize movement not tried too often
-        def move_key(a: str):
-            return self.tried[(loc, a.lower().strip())]
-        for m in sorted(set(moves), key=move_key):
-            add(m)
-        # common score-ish interactions
-        scorey = ("take ", "get ", "open ", "read ", "examine ", "look at ", "turn on ", "unlock ", "insert ", "put ")
-        for a in inter:
-            if a.lower().startswith(scorey):
-                add(a)
-        for a in inter:
             add(a)
-            if len(candidates) >= 24:
-                break
-        # safe basics
         add("look")
         add("inventory")
         add("take all")
-        # remove too-repeated
         cleaned = []
         for a in candidates:
             if list(self.recent_actions).count(a.lower()) >= 3:
                 continue
             cleaned.append(a)
         return cleaned[:20]
-    # ---------------------------------------------------------------------
-    # Look-ahead selection
     async def _choose_by_lookahead(self, client, loc: str, obs: str, candidates: list[str]) -> tuple[Optional[str], str]:
         base_score = self.score
-        base_loc = self._extract_location(obs)
-        untried = set(self._extract_untried_exits(obs))
-        # shortlist for speed
-        priority = []
         for a in candidates:
             low = a.lower().strip()
-            is_untried = 0 if low in untried else 1
-            tried = self.tried[(loc, low)]
-            priority.append((is_untried, tried, low, a))
-        priority.sort()
-        shortlist = [x[-1] for x in priority][:10]
-        best_a = None
-        best_u = -10**18
-        best_th = ""
         for a in shortlist:
             low = a.lower().strip()
             if self.tried[(loc, low)] >= 4:
                 continue
             peek = await self._call_tool_text(client, "peek_action", {"action": a})
-            peek_l = (peek or "").lower()
-            if self._is_game_over(peek) or "you have died" in peek_l:
                 u = -1_000_000_000
             else:
-                s_after, loc_after = self._parse_peek_score_loc(peek, fallback_score=base_score)
                 delta = max(0, s_after - base_score)
-                new_loc_bonus = 0
-                changed_bonus = 0
-                if loc_after and loc_after != base_loc:
-                    changed_bonus = 60
-                    if loc_after not in self.locations_visited:
-                        new_loc_bonus = 280
                 loop_pen = 90 * list(self.recent_actions).count(low)
                 stuck_pen = 180 if self._is_stuck(peek) else 0
-                # prefer untried exits
-                untried_bonus = 120 if low in untried else 0
-                u = delta * 900 + new_loc_bonus + changed_bonus + untried_bonus - loop_pen - stuck_pen
-                # lamp preference in darkness
-                if "dark" in (obs or "").lower() and "lamp" in low:
-                    u += 120
             if u > best_u:
-                best_u = u
-                best_a = a
                 best_th = f"Look-ahead chose '{a}' (utility={u})."
         if best_a is None or best_u < -10000:
-            return None, "Look-ahead found no good action; fallback."
         return best_a, best_th
-    def _parse_peek_score_loc(self, text: str, fallback_score: int) -> tuple[int, str]:
-        score = fallback_score
-        loc = self._extract_location(text)
-        m = re.search(r"\[Score:\s*(\d+)\s*/\s*(\d+)\s*\|\s*Moves:\s*(\d+)\s*\|\s*Location:\s*(.+?)\]", text or "")
-        if m:
-            score = int(m.group(1))
-            loc = m.group(4).strip()
-        mp = re.search(r"\[\+(\d+)\s+points", text or "", flags=re.IGNORECASE)
-        if mp and score == fallback_score:
-            score = fallback_score + int(mp.group(1))
-        return score, loc
-    # ---------------------------------------------------------------------
-    # No-peek fallback
-    async def _choose_without_peek(self, obs: str, inv_txt: str, candidates: list[str], seed: int, step: int) -> tuple[str, str]:
-        # heuristic: take untried exit first
-        untried = self._extract_untried_exits(obs)
         if untried:
-            return untried[0], "Heuristic: try an untried exit."
-        # heuristic: try a promising interaction not tried yet
-        loc = self._extract_location(obs)
-        for a in candidates:
-            low = a.lower().strip()
-            if low.startswith(("take ", "get ", "open ", "read ", "examine ", "turn on ", "unlock ")):
-                if self.tried[(loc, low)] == 0:
-                    return a, "Heuristic: try a high-value interaction."
-        # LLM fallback: choose from candidate list exactly
         if not candidates:
-            return "look", "No candidates; fallback to look."
-        cand = candidates[:10]
-        prompt = self._build_llm_prompt(obs, inv_txt, cand)
         try:
             resp = call_llm(prompt, SYSTEM_PROMPT, seed + step, max_tokens=160)
-            thought, tool, args = self._parse_llm_response(resp)
-            a = self._normalize_action(str(args.get("action", "")).strip())
-            canon = {x.lower(): x for x in cand}
-            if a.lower() in canon:
-                return canon[a.lower()], thought or "LLM chose a candidate."
-            return cand[0], "LLM invalid; fallback to first candidate."
         except Exception:
-            # no LLM available / error => deterministic fallback
-            return cand[0], "LLM unavailable/error; fallback to first candidate."
-    def _build_llm_prompt(self, obs: str, inv_txt: str, candidates: list[str]) -> str:
-        obs = (obs or "").strip()[:1100]
-        inv_txt = (inv_txt or "").strip()[:350]
         parts = [
             f"Score: {self.score}/{self.max_score} | Moves: {max(self.moves, self._internal_moves)}",
             f"Location: {self.last_location}",
         ]
-        if inv_txt:
-            parts.append(f"Inventory info:\n{inv_txt}")
-        if self.recent_actions:
-            parts.append("Recent actions: " + ", ".join(list(self.recent_actions)[-6:]))
-        parts.append("\nCurrent observation:\n" + obs)
-        parts.append("\nCandidate actions (choose exactly ONE):")
         for a in candidates:
             parts.append(f"- {a}")
         return "\n".join(parts)
-    def _parse_llm_response(self, response: str) -> tuple[str, str, dict]:
         thought = ""
-        tool = "play_action"
         args = {"action": "look"}
-        if not response:
-            return thought, tool, args
-        m = re.search(r"(?im)^\s*THOUGHT\s*:\s*(.+)$", response)
         if m:
             thought = m.group(1).strip()
-        m = re.search(r"(?im)^\s*TOOL\s*:\s*([a-zA-Z0-9_]+)\s*$", response)
         if m:
-            tool = m.group(1).strip()
-        m = re.search(r"(?is)^\s*ARGS\s*:\s*(\{.*\})\s*$", response)
-        if m:
-            raw = m.group(1).strip()
             try:
                 args = json.loads(raw)
             except Exception:
-                raw2 = raw.replace("'", '"')
-                raw2 = re.sub(r",\s*}", "}", raw2)
-                try:
-                    args = json.loads(raw2)
-                except Exception:
-                    args = {"action": "look"}
-        if not isinstance(args, dict):
-            args = {"action": "look"}
-        # enforce tool
-        tool = "play_action"
-        return thought, tool, args
-    # ---------------------------------------------------------------------
-    def _parse_valid_actions(self, txt: str) -> list[str]:
-        if not txt:
-            return []
-        out = []
-        for line in txt.splitlines():
-            line = line.strip()
-            if line.startswith("- "):
-                a = line[2:].strip()
-                a = self._normalize_action(a)
-                low = a.lower()
-                if not a:
-                    continue
-                if low.startswith(BAD_PREFIXES) or low in BAD_EXACT:
-                    continue
-                out.append(a)
-        # dedup keep order
-        seen = set()
-        uniq = []
-        for a in out:
-            low = a.lower()
-            if low not in seen:
-                seen.add(low)
-                uniq.append(a)
-        return uniq

 """
+Student Agent (Best practical submission)
+- Works even if HF_TOKEN is missing (no crash).
+- Uses peek_action + get_valid_actions + server meta tags to explore and gain score.
+- Uses LLM only as fallback when HF_TOKEN is available.
+- Always returns non-zero moves (internal counter).
 """
 import json
 load_dotenv()
 LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"
 _hf_token = os.getenv("HF_TOKEN")
 LLM_CLIENT = InferenceClient(token=_hf_token) if _hf_token else None
 def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 180) -> str:
     if LLM_CLIENT is None:
+        raise RuntimeError("LLM unavailable (HF_TOKEN missing).")
+    messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}]
     for attempt in range(3):
         try:
             resp = LLM_CLIENT.chat.completions.create(
 @dataclass
 class RunResult:
     final_score: int
     max_score: int
     moves: int
 SYSTEM_PROMPT = """You are an expert text-adventure agent.
+Output EXACTLY:
 THOUGHT: ...
 TOOL: play_action
 ARGS: {"action": "<one candidate action>"}
 Rules:
+- Choose exactly one action from the candidate list.
+- Do not invent actions outside the list.
+- No extra text, no markdown.
 """
+MOVE_ALIASES = {"n":"north","s":"south","e":"east","w":"west","u":"up","d":"down","ne":"northeast","nw":"northwest","se":"southeast","sw":"southwest"}
 BAD_PREFIXES = ("save", "restore", "quit", "restart", "help", "verbose", "script", "unscript", "version")
 BAD_EXACT = {"wait", "z"}
 class StudentAgent:
     def __init__(self):
         self.score = 0
         self.max_score = 0
         self.moves = 0
         self._internal_moves = 0
         self.locations_visited: set[str] = set()
         self.last_location = "Unknown"
+        self.tried = defaultdict(int)
         self.recent_actions = deque(maxlen=10)
         self.recent_obs = deque(maxlen=6)
+        self.valid_cache = {}
     async def run(self, client, game: str, max_steps: int, seed: int, verbose: bool = False) -> RunResult:
         history: list[tuple[str, str, str]] = []
             self.last_location = self._extract_location(obs)
             self.locations_visited.add(self.last_location)
             for step in range(1, max_steps + 1):
                 loc = self._extract_location(obs)
                 self.last_location = loc
                 stuck = self._is_stuck(obs)
                 valid_actions = self.valid_cache.get(loc, [])
                 if has("get_valid_actions") and (stuck or not valid_actions or step % 6 == 0):
                     va_txt = await self._call_tool_text(client, "get_valid_actions", {"limit": 60})
                 if has("inventory") and (step == 1 or stuck or step % 8 == 0):
                     inv_txt = await self._call_tool_text(client, "inventory", {})
                 candidates = self._make_candidates(obs, inv_txt, valid_actions, loc)
+                action, thought = None, ""
                 if has("peek_action") and candidates:
+                    action, thought = await self._choose_by_lookahead(client, loc, obs, candidates)
                 if not action:
+                    action, thought = await self._choose_fallback(obs, inv_txt, candidates, seed, step)
+                action = self._norm_action(action or "look")
                 obs2 = await self._call_tool_text(client, "play_action", {"action": action})
                 self._internal_moves += 1
                 self.locations_visited.add(new_loc)
                 history.append((thought, f"play_action({action})", (obs2 or "")[:260]))
                 if verbose:
+                    print(f"\n--- step {step} ---\nTHOUGHT: {thought}\nACTION: {action}\n{obs2}")
                 obs = obs2
                 if self._is_game_over(obs):
                     break
                 history=history,
             )
     async def _call_tool_text(self, client, tool: str, args: dict) -> str:
+        r = await client.call_tool(tool, args)
+        return self._extract_text(r)
     def _extract_text(self, result: Any) -> str:
         if result is None:
             return str(part)
         return str(result)
+    def _norm_action(self, a: str) -> str:
+        a = (a or "").strip()
+        low = a.lower()
+        return MOVE_ALIASES.get(low, a)
     def _update_from_text(self, text: str) -> None:
+        m = re.search(r"\[Score:\s*(\d+)\s*/\s*(\d+)\s*\|\s*Moves:\s*(\d+)\s*\|\s*Location:\s*(.+?)\]", text or "")
         if m:
             self.score = int(m.group(1))
             self.max_score = int(m.group(2))
             self.moves = int(m.group(3))
             self.last_location = m.group(4).strip()
     def _extract_location(self, text: str) -> str:
         m = re.search(r"\|\s*Location:\s*(.+?)\]", text or "")
+        if m and m.group(1).strip():
+            return m.group(1).strip()
+        for line in (text or "").splitlines():
             line = line.strip()
+            if line and not line.startswith("[Score:"):
+                return line
         return "Unknown"
+    def _extract_untried(self, text: str) -> list[str]:
         m = re.search(r"\[Untried exits:\s*(.+?)\]", text or "")
         if not m:
             return []
+        return [self._norm_action(x.strip()).lower() for x in m.group(1).split(",") if x.strip()]
     def _extract_interactions(self, text: str) -> list[str]:
         m = re.search(r"\[Interactions:\s*(.+?)\]", text or "")
         if not m:
             return []
+        return [x.strip() for x in m.group(1).split(",") if x.strip()]
     def _is_game_over(self, text: str) -> bool:
         t = (text or "").lower()
+        return ("game over" in t) or ("you have died" in t) or ("you are dead" in t)
     def _is_stuck(self, text: str) -> bool:
         t = (text or "").lower()
+        bad = ["i don't understand", "you can't", "that's not", "not a verb", "nothing happens", "beg your pardon"]
         rep = len(self.recent_obs) >= 3 and all(self.recent_obs[-1] == x for x in list(self.recent_obs)[-3:])
         return any(b in t for b in bad) or rep
+    def _parse_valid_actions(self, txt: str) -> list[str]:
+        out = []
+        for line in (txt or "").splitlines():
+            line = line.strip()
+            if line.startswith("- "):
+                a = self._norm_action(line[2:].strip())
+                low = a.lower()
+                if not a:
+                    continue
+                if low.startswith(BAD_PREFIXES) or low in BAD_EXACT:
+                    continue
+                out.append(a)
+        # dedup
+        seen = set()
+        uniq = []
+        for a in out:
+            low = a.lower()
+            if low not in seen:
+                seen.add(low)
+                uniq.append(a)
+        return uniq
     def _make_candidates(self, obs: str, inv_txt: str, valid_actions: list[str], loc: str) -> list[str]:
+        candidates, seen = [], set()
         def add(a: str):
+            a = self._norm_action(a)
+            low = a.lower().strip()
             if not a:
                 return
             if low.startswith(BAD_PREFIXES) or low in BAD_EXACT:
                 return
             if low not in seen:
                 seen.add(low)
                 candidates.append(a)
+        # from tags
+        for d in self._extract_untried(obs):
             add(d)
         for a in self._extract_interactions(obs):
             add(a)
+        # from valid actions
+        for a in valid_actions[:25]:
             add(a)
+        # basics
         add("look")
         add("inventory")
         add("take all")
+        # avoid too repeated
         cleaned = []
         for a in candidates:
             if list(self.recent_actions).count(a.lower()) >= 3:
                 continue
             cleaned.append(a)
         return cleaned[:20]
     async def _choose_by_lookahead(self, client, loc: str, obs: str, candidates: list[str]) -> tuple[Optional[str], str]:
         base_score = self.score
+        untried = set(self._extract_untried(obs))
+        # shortlist
+        pr = []
         for a in candidates:
             low = a.lower().strip()
+            pr.append((0 if low in untried else 1, self.tried[(loc, low)], low, a))
+        pr.sort()
+        shortlist = [x[-1] for x in pr][:10]
+        best_a, best_u, best_th = None, -10**18, ""
         for a in shortlist:
             low = a.lower().strip()
             if self.tried[(loc, low)] >= 4:
                 continue
             peek = await self._call_tool_text(client, "peek_action", {"action": a})
+            if self._is_game_over(peek):
                 u = -1_000_000_000
             else:
+                s_after = base_score
+                m = re.search(r"\[Score:\s*(\d+)\s*/", peek or "")
+                if m:
+                    s_after = int(m.group(1))
                 delta = max(0, s_after - base_score)
+                loc_after = self._extract_location(peek)
+                new_loc_bonus = 280 if (loc_after and loc_after not in self.locations_visited and loc_after != self._extract_location(obs)) else 0
+                untried_bonus = 120 if low in untried else 0
                 loop_pen = 90 * list(self.recent_actions).count(low)
                 stuck_pen = 180 if self._is_stuck(peek) else 0
+                u = delta * 900 + new_loc_bonus + untried_bonus - loop_pen - stuck_pen
             if u > best_u:
+                best_u, best_a = u, a
                 best_th = f"Look-ahead chose '{a}' (utility={u})."
         if best_a is None or best_u < -10000:
+            return None, "Look-ahead no good action; fallback."
         return best_a, best_th
+    async def _choose_fallback(self, obs: str, inv_txt: str, candidates: list[str], seed: int, step: int) -> tuple[str, str]:
+        untried = self._extract_untried(obs)
         if untried:
+            return untried[0], "Heuristic: try untried exit."
         if not candidates:
+            return "look", "No candidates; fallback."
+        # LLM only if available
         try:
+            prompt = self._llm_prompt(obs, inv_txt, candidates[:10])
             resp = call_llm(prompt, SYSTEM_PROMPT, seed + step, max_tokens=160)
+            thought, args = self._parse_llm(resp)
+            act = self._norm_action(str(args.get("action", "")).strip())
+            canon = {x.lower(): x for x in candidates[:10]}
+            if act.lower() in canon:
+                return canon[act.lower()], thought or "LLM chose candidate."
         except Exception:
+            pass
+        return candidates[0], "Fallback: first candidate."
+    def _llm_prompt(self, obs: str, inv_txt: str, candidates: list[str]) -> str:
         parts = [
             f"Score: {self.score}/{self.max_score} | Moves: {max(self.moves, self._internal_moves)}",
             f"Location: {self.last_location}",
+            "\nCurrent observation:\n" + (obs or "")[:1100],
+            "\nCandidate actions (choose exactly one):",
         ]
         for a in candidates:
             parts.append(f"- {a}")
         return "\n".join(parts)
+    def _parse_llm(self, resp: str) -> tuple[str, dict]:
         thought = ""
         args = {"action": "look"}
+        m = re.search(r"(?im)^THOUGHT:\s*(.+)$", resp or "")
         if m:
             thought = m.group(1).strip()
+        m = re.search(r"(?is)^ARGS:\s*(\{.*\})\s*$", resp or "")
         if m:
+            raw = m.group(1)
             try:
                 args = json.loads(raw)
             except Exception:
+                pass
+        return thought, args