bouhss commited on
Commit
07df9e7
·
verified ·
1 Parent(s): a8bba7b

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +109 -314
agent.py CHANGED
@@ -1,16 +1,10 @@
1
  """
2
- Student Agent for Text Adventure Games (Best-performance submission)
3
-
4
- Design:
5
- - Primary driver: heuristics + server tools, not pure LLM.
6
- - Uses MCP tools:
7
- - play_action (commit)
8
- - peek_action (simulate without committing) => BIG performance boost
9
- - get_valid_actions (reduce hallucinations)
10
- - inventory (optional context)
11
- - memory/get_map (rare; not required)
12
- - LLM only as fallback: choose among a candidate list deterministically (temp=0).
13
- - Robust stats: internal move counter so moves never stay 0 even if banner parsing fails.
14
  """
15
 
16
  import json
@@ -26,27 +20,15 @@ from huggingface_hub import InferenceClient
26
 
27
  load_dotenv()
28
 
29
- # =============================================================================
30
- # LLM Configuration - DO NOT MODIFY
31
- # =============================================================================
32
  LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"
33
-
34
  _hf_token = os.getenv("HF_TOKEN")
35
  LLM_CLIENT = InferenceClient(token=_hf_token) if _hf_token else None
36
 
37
 
38
  def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 180) -> str:
39
- """
40
- Deterministic LLM call (temperature=0). Retries a few times for transient errors.
41
- If HF_TOKEN missing, raises.
42
- """
43
  if LLM_CLIENT is None:
44
- raise RuntimeError("HF_TOKEN missing => LLM unavailable")
45
-
46
- messages = [
47
- {"role": "system", "content": system_prompt},
48
- {"role": "user", "content": prompt},
49
- ]
50
  for attempt in range(3):
51
  try:
52
  resp = LLM_CLIENT.chat.completions.create(
@@ -66,7 +48,6 @@ def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 180)
66
 
67
  @dataclass
68
  class RunResult:
69
- """Result of running the agent. Do not modify this class."""
70
  final_score: int
71
  max_score: int
72
  moves: int
@@ -78,49 +59,39 @@ class RunResult:
78
 
79
  SYSTEM_PROMPT = """You are an expert text-adventure agent.
80
 
81
- You must output EXACTLY:
82
  THOUGHT: ...
83
  TOOL: play_action
84
  ARGS: {"action": "<one candidate action>"}
85
 
86
  Rules:
87
- - Choose ONE action EXACTLY from the candidate list provided by the user.
88
- - Do not invent actions outside that list.
89
- - Avoid repeating actions that recently failed.
90
- - No markdown and no extra text.
91
  """
92
 
93
 
94
- MOVE_ACTIONS = ["north", "south", "east", "west", "up", "down", "enter", "exit",
95
- "northeast", "northwest", "southeast", "southwest"]
96
- MOVE_ALIASES = {"n": "north", "s": "south", "e": "east", "w": "west", "u": "up", "d": "down",
97
- "ne": "northeast", "nw": "northwest", "se": "southeast", "sw": "southwest"}
98
-
99
  BAD_PREFIXES = ("save", "restore", "quit", "restart", "help", "verbose", "script", "unscript", "version")
100
  BAD_EXACT = {"wait", "z"}
101
 
102
 
103
  class StudentAgent:
104
  def __init__(self):
105
- # parsed from server banner if available
106
  self.score = 0
107
  self.max_score = 0
108
  self.moves = 0
109
-
110
- # internal moves (robust)
111
  self._internal_moves = 0
112
 
113
- # exploration / loop avoidance
114
  self.locations_visited: set[str] = set()
115
  self.last_location = "Unknown"
116
- self.tried = defaultdict(int) # tried[(loc, action)] += 1
 
117
  self.recent_actions = deque(maxlen=10)
118
  self.recent_obs = deque(maxlen=6)
119
 
120
- # valid actions cache
121
- self.valid_cache = {} # loc -> list[str]
122
 
123
- # ---------------------------------------------------------------------
124
  async def run(self, client, game: str, max_steps: int, seed: int, verbose: bool = False) -> RunResult:
125
  history: list[tuple[str, str, str]] = []
126
 
@@ -138,9 +109,6 @@ class StudentAgent:
138
  self.last_location = self._extract_location(obs)
139
  self.locations_visited.add(self.last_location)
140
 
141
- if verbose:
142
- print(obs)
143
-
144
  for step in range(1, max_steps + 1):
145
  loc = self._extract_location(obs)
146
  self.last_location = loc
@@ -148,7 +116,6 @@ class StudentAgent:
148
 
149
  stuck = self._is_stuck(obs)
150
 
151
- # refresh valid actions (sparsely)
152
  valid_actions = self.valid_cache.get(loc, [])
153
  if has("get_valid_actions") and (stuck or not valid_actions or step % 6 == 0):
154
  va_txt = await self._call_tool_text(client, "get_valid_actions", {"limit": 60})
@@ -160,27 +127,17 @@ class StudentAgent:
160
  if has("inventory") and (step == 1 or stuck or step % 8 == 0):
161
  inv_txt = await self._call_tool_text(client, "inventory", {})
162
 
163
- # candidates from server meta tags + valid actions
164
  candidates = self._make_candidates(obs, inv_txt, valid_actions, loc)
165
 
166
- action = None
167
- thought = ""
168
-
169
- # look-ahead (best)
170
  if has("peek_action") and candidates:
171
- action, thought = await self._choose_by_lookahead(
172
- client=client, loc=loc, obs=obs, candidates=candidates
173
- )
174
 
175
- # fallback heuristic + optional LLM
176
  if not action:
177
- action, thought = await self._choose_without_peek(
178
- obs=obs, inv_txt=inv_txt, candidates=candidates, seed=seed, step=step
179
- )
180
 
181
- action = self._normalize_action(action or "look")
182
 
183
- # commit
184
  obs2 = await self._call_tool_text(client, "play_action", {"action": action})
185
  self._internal_moves += 1
186
 
@@ -193,15 +150,10 @@ class StudentAgent:
193
  self.locations_visited.add(new_loc)
194
 
195
  history.append((thought, f"play_action({action})", (obs2 or "")[:260]))
196
-
197
  if verbose:
198
- print(f"\n--- step {step} ---")
199
- print(f"THOUGHT: {thought}")
200
- print(f"ACTION: {action}")
201
- print(obs2)
202
 
203
  obs = obs2
204
-
205
  if self._is_game_over(obs):
206
  break
207
 
@@ -225,10 +177,9 @@ class StudentAgent:
225
  history=history,
226
  )
227
 
228
- # ---------------------------------------------------------------------
229
  async def _call_tool_text(self, client, tool: str, args: dict) -> str:
230
- result = await client.call_tool(tool, args)
231
- return self._extract_text(result)
232
 
233
  def _extract_text(self, result: Any) -> str:
234
  if result is None:
@@ -242,351 +193,195 @@ class StudentAgent:
242
  return str(part)
243
  return str(result)
244
 
245
- # ---------------------------------------------------------------------
246
- # Parsing
 
 
 
247
  def _update_from_text(self, text: str) -> None:
248
- """
249
- Parse server banner:
250
- [Score: s/max | Moves: m | Location: L]
251
- Also accept +k points tag.
252
- """
253
- if not text:
254
- return
255
-
256
- m = re.search(r"\[Score:\s*(\d+)\s*/\s*(\d+)\s*\|\s*Moves:\s*(\d+)\s*\|\s*Location:\s*(.+?)\]", text)
257
  if m:
258
  self.score = int(m.group(1))
259
  self.max_score = int(m.group(2))
260
  self.moves = int(m.group(3))
261
  self.last_location = m.group(4).strip()
262
 
263
- # fallback: +k points!
264
- mp = re.search(r"\[\+(\d+)\s+points", text, flags=re.IGNORECASE)
265
- if mp and self.score >= 0:
266
- # score already parsed above in most cases; keep safe
267
- self.score = max(self.score, self.score + int(mp.group(1)))
268
-
269
  def _extract_location(self, text: str) -> str:
270
- # Prefer banner location
271
  m = re.search(r"\|\s*Location:\s*(.+?)\]", text or "")
272
- if m:
273
- loc = m.group(1).strip()
274
- if loc:
275
- return loc
276
- # else fallback: first non-empty line
277
- if not text:
278
- return "Unknown"
279
- for line in text.splitlines():
280
  line = line.strip()
281
- if not line:
282
- continue
283
- if line.startswith("[Score:"):
284
- continue
285
- return line
286
  return "Unknown"
287
 
288
- def _extract_untried_exits(self, text: str) -> list[str]:
289
  m = re.search(r"\[Untried exits:\s*(.+?)\]", text or "")
290
  if not m:
291
  return []
292
- dirs = [d.strip() for d in m.group(1).split(",")]
293
- out = []
294
- for d in dirs:
295
- d = self._normalize_action(d).lower()
296
- if d and d not in out:
297
- out.append(d)
298
- return out
299
 
300
  def _extract_interactions(self, text: str) -> list[str]:
301
  m = re.search(r"\[Interactions:\s*(.+?)\]", text or "")
302
  if not m:
303
  return []
304
- acts = [a.strip() for a in m.group(1).split(",")]
305
- out = []
306
- for a in acts:
307
- if a and a.lower() not in out:
308
- out.append(a)
309
- return out
310
 
311
  def _is_game_over(self, text: str) -> bool:
312
  t = (text or "").lower()
313
- return ("game over" in t) or ("you have died" in t) or ("you are dead" in t) or ("[game over]" in t)
314
 
315
  def _is_stuck(self, text: str) -> bool:
316
  t = (text or "").lower()
317
- bad = [
318
- "i don't understand", "you can't", "that's not", "not a verb",
319
- "nothing happens", "you don't see", "you see nothing", "beg your pardon"
320
- ]
321
  rep = len(self.recent_obs) >= 3 and all(self.recent_obs[-1] == x for x in list(self.recent_obs)[-3:])
322
  return any(b in t for b in bad) or rep
323
 
324
- def _normalize_action(self, action: str) -> str:
325
- a = (action or "").strip()
326
- low = a.lower()
327
- if low in MOVE_ALIASES:
328
- return MOVE_ALIASES[low]
329
- return a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
 
331
- # ---------------------------------------------------------------------
332
- # Candidates
333
  def _make_candidates(self, obs: str, inv_txt: str, valid_actions: list[str], loc: str) -> list[str]:
334
- obs_l = (obs or "").lower()
335
- inv_l = (inv_txt or "").lower()
336
-
337
- candidates = []
338
- seen = set()
339
 
340
  def add(a: str):
341
- a = self._normalize_action(a)
 
342
  if not a:
343
  return
344
- low = a.lower().strip()
345
  if low.startswith(BAD_PREFIXES) or low in BAD_EXACT:
346
  return
347
  if low not in seen:
348
  seen.add(low)
349
  candidates.append(a)
350
 
351
- # from server tags
352
- for d in self._extract_untried_exits(obs):
353
  add(d)
354
-
355
  for a in self._extract_interactions(obs):
356
  add(a)
357
 
358
- # darkness
359
- if "dark" in obs_l and ("lamp" in obs_l or "lamp" in inv_l):
360
- add("take lamp")
361
- add("turn on lamp")
362
-
363
- # add valid actions (movement first then interactions)
364
- moves = []
365
- inter = []
366
- for a in valid_actions or []:
367
- al = a.lower().strip()
368
- first = al.split()[0] if al else ""
369
- if first in MOVE_ACTIONS:
370
- moves.append(a)
371
- else:
372
- inter.append(a)
373
-
374
- # prioritize movement not tried too often
375
- def move_key(a: str):
376
- return self.tried[(loc, a.lower().strip())]
377
-
378
- for m in sorted(set(moves), key=move_key):
379
- add(m)
380
-
381
- # common score-ish interactions
382
- scorey = ("take ", "get ", "open ", "read ", "examine ", "look at ", "turn on ", "unlock ", "insert ", "put ")
383
- for a in inter:
384
- if a.lower().startswith(scorey):
385
- add(a)
386
-
387
- for a in inter:
388
  add(a)
389
- if len(candidates) >= 24:
390
- break
391
 
392
- # safe basics
393
  add("look")
394
  add("inventory")
395
  add("take all")
396
 
397
- # remove too-repeated
398
  cleaned = []
399
  for a in candidates:
400
  if list(self.recent_actions).count(a.lower()) >= 3:
401
  continue
402
  cleaned.append(a)
403
-
404
  return cleaned[:20]
405
 
406
- # ---------------------------------------------------------------------
407
- # Look-ahead selection
408
  async def _choose_by_lookahead(self, client, loc: str, obs: str, candidates: list[str]) -> tuple[Optional[str], str]:
409
  base_score = self.score
410
- base_loc = self._extract_location(obs)
411
- untried = set(self._extract_untried_exits(obs))
412
 
413
- # shortlist for speed
414
- priority = []
415
  for a in candidates:
416
  low = a.lower().strip()
417
- is_untried = 0 if low in untried else 1
418
- tried = self.tried[(loc, low)]
419
- priority.append((is_untried, tried, low, a))
420
- priority.sort()
421
- shortlist = [x[-1] for x in priority][:10]
422
-
423
- best_a = None
424
- best_u = -10**18
425
- best_th = ""
426
 
 
427
  for a in shortlist:
428
  low = a.lower().strip()
429
  if self.tried[(loc, low)] >= 4:
430
  continue
431
-
432
  peek = await self._call_tool_text(client, "peek_action", {"action": a})
433
- peek_l = (peek or "").lower()
434
-
435
- if self._is_game_over(peek) or "you have died" in peek_l:
436
  u = -1_000_000_000
437
  else:
438
- s_after, loc_after = self._parse_peek_score_loc(peek, fallback_score=base_score)
 
 
 
439
  delta = max(0, s_after - base_score)
440
-
441
- new_loc_bonus = 0
442
- changed_bonus = 0
443
- if loc_after and loc_after != base_loc:
444
- changed_bonus = 60
445
- if loc_after not in self.locations_visited:
446
- new_loc_bonus = 280
447
-
448
  loop_pen = 90 * list(self.recent_actions).count(low)
449
  stuck_pen = 180 if self._is_stuck(peek) else 0
450
-
451
- # prefer untried exits
452
- untried_bonus = 120 if low in untried else 0
453
-
454
- u = delta * 900 + new_loc_bonus + changed_bonus + untried_bonus - loop_pen - stuck_pen
455
-
456
- # lamp preference in darkness
457
- if "dark" in (obs or "").lower() and "lamp" in low:
458
- u += 120
459
 
460
  if u > best_u:
461
- best_u = u
462
- best_a = a
463
  best_th = f"Look-ahead chose '{a}' (utility={u})."
464
 
465
  if best_a is None or best_u < -10000:
466
- return None, "Look-ahead found no good action; fallback."
467
  return best_a, best_th
468
 
469
- def _parse_peek_score_loc(self, text: str, fallback_score: int) -> tuple[int, str]:
470
- score = fallback_score
471
- loc = self._extract_location(text)
472
- m = re.search(r"\[Score:\s*(\d+)\s*/\s*(\d+)\s*\|\s*Moves:\s*(\d+)\s*\|\s*Location:\s*(.+?)\]", text or "")
473
- if m:
474
- score = int(m.group(1))
475
- loc = m.group(4).strip()
476
- mp = re.search(r"\[\+(\d+)\s+points", text or "", flags=re.IGNORECASE)
477
- if mp and score == fallback_score:
478
- score = fallback_score + int(mp.group(1))
479
- return score, loc
480
-
481
- # ---------------------------------------------------------------------
482
- # No-peek fallback
483
- async def _choose_without_peek(self, obs: str, inv_txt: str, candidates: list[str], seed: int, step: int) -> tuple[str, str]:
484
- # heuristic: take untried exit first
485
- untried = self._extract_untried_exits(obs)
486
  if untried:
487
- return untried[0], "Heuristic: try an untried exit."
488
 
489
- # heuristic: try a promising interaction not tried yet
490
- loc = self._extract_location(obs)
491
- for a in candidates:
492
- low = a.lower().strip()
493
- if low.startswith(("take ", "get ", "open ", "read ", "examine ", "turn on ", "unlock ")):
494
- if self.tried[(loc, low)] == 0:
495
- return a, "Heuristic: try a high-value interaction."
496
-
497
- # LLM fallback: choose from candidate list exactly
498
  if not candidates:
499
- return "look", "No candidates; fallback to look."
500
-
501
- cand = candidates[:10]
502
- prompt = self._build_llm_prompt(obs, inv_txt, cand)
503
 
 
504
  try:
 
505
  resp = call_llm(prompt, SYSTEM_PROMPT, seed + step, max_tokens=160)
506
- thought, tool, args = self._parse_llm_response(resp)
507
- a = self._normalize_action(str(args.get("action", "")).strip())
508
- canon = {x.lower(): x for x in cand}
509
- if a.lower() in canon:
510
- return canon[a.lower()], thought or "LLM chose a candidate."
511
- return cand[0], "LLM invalid; fallback to first candidate."
512
  except Exception:
513
- # no LLM available / error => deterministic fallback
514
- return cand[0], "LLM unavailable/error; fallback to first candidate."
515
 
516
- def _build_llm_prompt(self, obs: str, inv_txt: str, candidates: list[str]) -> str:
517
- obs = (obs or "").strip()[:1100]
518
- inv_txt = (inv_txt or "").strip()[:350]
519
 
 
520
  parts = [
521
  f"Score: {self.score}/{self.max_score} | Moves: {max(self.moves, self._internal_moves)}",
522
  f"Location: {self.last_location}",
 
 
523
  ]
524
- if inv_txt:
525
- parts.append(f"Inventory info:\n{inv_txt}")
526
- if self.recent_actions:
527
- parts.append("Recent actions: " + ", ".join(list(self.recent_actions)[-6:]))
528
-
529
- parts.append("\nCurrent observation:\n" + obs)
530
- parts.append("\nCandidate actions (choose exactly ONE):")
531
  for a in candidates:
532
  parts.append(f"- {a}")
533
  return "\n".join(parts)
534
 
535
- def _parse_llm_response(self, response: str) -> tuple[str, str, dict]:
536
  thought = ""
537
- tool = "play_action"
538
  args = {"action": "look"}
539
- if not response:
540
- return thought, tool, args
541
-
542
- m = re.search(r"(?im)^\s*THOUGHT\s*:\s*(.+)$", response)
543
  if m:
544
  thought = m.group(1).strip()
545
- m = re.search(r"(?im)^\s*TOOL\s*:\s*([a-zA-Z0-9_]+)\s*$", response)
546
  if m:
547
- tool = m.group(1).strip()
548
- m = re.search(r"(?is)^\s*ARGS\s*:\s*(\{.*\})\s*$", response)
549
- if m:
550
- raw = m.group(1).strip()
551
  try:
552
  args = json.loads(raw)
553
  except Exception:
554
- raw2 = raw.replace("'", '"')
555
- raw2 = re.sub(r",\s*}", "}", raw2)
556
- try:
557
- args = json.loads(raw2)
558
- except Exception:
559
- args = {"action": "look"}
560
-
561
- if not isinstance(args, dict):
562
- args = {"action": "look"}
563
-
564
- # enforce tool
565
- tool = "play_action"
566
- return thought, tool, args
567
-
568
- # ---------------------------------------------------------------------
569
- def _parse_valid_actions(self, txt: str) -> list[str]:
570
- if not txt:
571
- return []
572
- out = []
573
- for line in txt.splitlines():
574
- line = line.strip()
575
- if line.startswith("- "):
576
- a = line[2:].strip()
577
- a = self._normalize_action(a)
578
- low = a.lower()
579
- if not a:
580
- continue
581
- if low.startswith(BAD_PREFIXES) or low in BAD_EXACT:
582
- continue
583
- out.append(a)
584
- # dedup keep order
585
- seen = set()
586
- uniq = []
587
- for a in out:
588
- low = a.lower()
589
- if low not in seen:
590
- seen.add(low)
591
- uniq.append(a)
592
- return uniq
 
1
  """
2
+ Student Agent (Best practical submission)
3
+
4
+ - Works even if HF_TOKEN is missing (no crash).
5
+ - Uses peek_action + get_valid_actions + server meta tags to explore and gain score.
6
+ - Uses LLM only as fallback when HF_TOKEN is available.
7
+ - Always returns non-zero moves (internal counter).
 
 
 
 
 
 
8
  """
9
 
10
  import json
 
20
 
21
  load_dotenv()
22
 
 
 
 
23
  LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"
 
24
  _hf_token = os.getenv("HF_TOKEN")
25
  LLM_CLIENT = InferenceClient(token=_hf_token) if _hf_token else None
26
 
27
 
28
  def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 180) -> str:
 
 
 
 
29
  if LLM_CLIENT is None:
30
+ raise RuntimeError("LLM unavailable (HF_TOKEN missing).")
31
+ messages = [{"role": "system", "content": system_prompt}, {"role": "user", "content": prompt}]
 
 
 
 
32
  for attempt in range(3):
33
  try:
34
  resp = LLM_CLIENT.chat.completions.create(
 
48
 
49
  @dataclass
50
  class RunResult:
 
51
  final_score: int
52
  max_score: int
53
  moves: int
 
59
 
60
  SYSTEM_PROMPT = """You are an expert text-adventure agent.
61
 
62
+ Output EXACTLY:
63
  THOUGHT: ...
64
  TOOL: play_action
65
  ARGS: {"action": "<one candidate action>"}
66
 
67
  Rules:
68
+ - Choose exactly one action from the candidate list.
69
+ - Do not invent actions outside the list.
70
+ - No extra text, no markdown.
 
71
  """
72
 
73
 
74
+ MOVE_ALIASES = {"n":"north","s":"south","e":"east","w":"west","u":"up","d":"down","ne":"northeast","nw":"northwest","se":"southeast","sw":"southwest"}
 
 
 
 
75
  BAD_PREFIXES = ("save", "restore", "quit", "restart", "help", "verbose", "script", "unscript", "version")
76
  BAD_EXACT = {"wait", "z"}
77
 
78
 
79
  class StudentAgent:
80
  def __init__(self):
 
81
  self.score = 0
82
  self.max_score = 0
83
  self.moves = 0
 
 
84
  self._internal_moves = 0
85
 
 
86
  self.locations_visited: set[str] = set()
87
  self.last_location = "Unknown"
88
+
89
+ self.tried = defaultdict(int)
90
  self.recent_actions = deque(maxlen=10)
91
  self.recent_obs = deque(maxlen=6)
92
 
93
+ self.valid_cache = {}
 
94
 
 
95
  async def run(self, client, game: str, max_steps: int, seed: int, verbose: bool = False) -> RunResult:
96
  history: list[tuple[str, str, str]] = []
97
 
 
109
  self.last_location = self._extract_location(obs)
110
  self.locations_visited.add(self.last_location)
111
 
 
 
 
112
  for step in range(1, max_steps + 1):
113
  loc = self._extract_location(obs)
114
  self.last_location = loc
 
116
 
117
  stuck = self._is_stuck(obs)
118
 
 
119
  valid_actions = self.valid_cache.get(loc, [])
120
  if has("get_valid_actions") and (stuck or not valid_actions or step % 6 == 0):
121
  va_txt = await self._call_tool_text(client, "get_valid_actions", {"limit": 60})
 
127
  if has("inventory") and (step == 1 or stuck or step % 8 == 0):
128
  inv_txt = await self._call_tool_text(client, "inventory", {})
129
 
 
130
  candidates = self._make_candidates(obs, inv_txt, valid_actions, loc)
131
 
132
+ action, thought = None, ""
 
 
 
133
  if has("peek_action") and candidates:
134
+ action, thought = await self._choose_by_lookahead(client, loc, obs, candidates)
 
 
135
 
 
136
  if not action:
137
+ action, thought = await self._choose_fallback(obs, inv_txt, candidates, seed, step)
 
 
138
 
139
+ action = self._norm_action(action or "look")
140
 
 
141
  obs2 = await self._call_tool_text(client, "play_action", {"action": action})
142
  self._internal_moves += 1
143
 
 
150
  self.locations_visited.add(new_loc)
151
 
152
  history.append((thought, f"play_action({action})", (obs2 or "")[:260]))
 
153
  if verbose:
154
+ print(f"\n--- step {step} ---\nTHOUGHT: {thought}\nACTION: {action}\n{obs2}")
 
 
 
155
 
156
  obs = obs2
 
157
  if self._is_game_over(obs):
158
  break
159
 
 
177
  history=history,
178
  )
179
 
 
180
  async def _call_tool_text(self, client, tool: str, args: dict) -> str:
181
+ r = await client.call_tool(tool, args)
182
+ return self._extract_text(r)
183
 
184
  def _extract_text(self, result: Any) -> str:
185
  if result is None:
 
193
  return str(part)
194
  return str(result)
195
 
196
+ def _norm_action(self, a: str) -> str:
197
+ a = (a or "").strip()
198
+ low = a.lower()
199
+ return MOVE_ALIASES.get(low, a)
200
+
201
  def _update_from_text(self, text: str) -> None:
202
+ m = re.search(r"\[Score:\s*(\d+)\s*/\s*(\d+)\s*\|\s*Moves:\s*(\d+)\s*\|\s*Location:\s*(.+?)\]", text or "")
 
 
 
 
 
 
 
 
203
  if m:
204
  self.score = int(m.group(1))
205
  self.max_score = int(m.group(2))
206
  self.moves = int(m.group(3))
207
  self.last_location = m.group(4).strip()
208
 
 
 
 
 
 
 
209
  def _extract_location(self, text: str) -> str:
 
210
  m = re.search(r"\|\s*Location:\s*(.+?)\]", text or "")
211
+ if m and m.group(1).strip():
212
+ return m.group(1).strip()
213
+ for line in (text or "").splitlines():
 
 
 
 
 
214
  line = line.strip()
215
+ if line and not line.startswith("[Score:"):
216
+ return line
 
 
 
217
  return "Unknown"
218
 
219
+ def _extract_untried(self, text: str) -> list[str]:
220
  m = re.search(r"\[Untried exits:\s*(.+?)\]", text or "")
221
  if not m:
222
  return []
223
+ return [self._norm_action(x.strip()).lower() for x in m.group(1).split(",") if x.strip()]
 
 
 
 
 
 
224
 
225
  def _extract_interactions(self, text: str) -> list[str]:
226
  m = re.search(r"\[Interactions:\s*(.+?)\]", text or "")
227
  if not m:
228
  return []
229
+ return [x.strip() for x in m.group(1).split(",") if x.strip()]
 
 
 
 
 
230
 
231
  def _is_game_over(self, text: str) -> bool:
232
  t = (text or "").lower()
233
+ return ("game over" in t) or ("you have died" in t) or ("you are dead" in t)
234
 
235
  def _is_stuck(self, text: str) -> bool:
236
  t = (text or "").lower()
237
+ bad = ["i don't understand", "you can't", "that's not", "not a verb", "nothing happens", "beg your pardon"]
 
 
 
238
  rep = len(self.recent_obs) >= 3 and all(self.recent_obs[-1] == x for x in list(self.recent_obs)[-3:])
239
  return any(b in t for b in bad) or rep
240
 
241
+ def _parse_valid_actions(self, txt: str) -> list[str]:
242
+ out = []
243
+ for line in (txt or "").splitlines():
244
+ line = line.strip()
245
+ if line.startswith("- "):
246
+ a = self._norm_action(line[2:].strip())
247
+ low = a.lower()
248
+ if not a:
249
+ continue
250
+ if low.startswith(BAD_PREFIXES) or low in BAD_EXACT:
251
+ continue
252
+ out.append(a)
253
+ # dedup
254
+ seen = set()
255
+ uniq = []
256
+ for a in out:
257
+ low = a.lower()
258
+ if low not in seen:
259
+ seen.add(low)
260
+ uniq.append(a)
261
+ return uniq
262
 
 
 
263
  def _make_candidates(self, obs: str, inv_txt: str, valid_actions: list[str], loc: str) -> list[str]:
264
+ candidates, seen = [], set()
 
 
 
 
265
 
266
  def add(a: str):
267
+ a = self._norm_action(a)
268
+ low = a.lower().strip()
269
  if not a:
270
  return
 
271
  if low.startswith(BAD_PREFIXES) or low in BAD_EXACT:
272
  return
273
  if low not in seen:
274
  seen.add(low)
275
  candidates.append(a)
276
 
277
+ # from tags
278
+ for d in self._extract_untried(obs):
279
  add(d)
 
280
  for a in self._extract_interactions(obs):
281
  add(a)
282
 
283
+ # from valid actions
284
+ for a in valid_actions[:25]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
285
  add(a)
 
 
286
 
287
+ # basics
288
  add("look")
289
  add("inventory")
290
  add("take all")
291
 
292
+ # avoid too repeated
293
  cleaned = []
294
  for a in candidates:
295
  if list(self.recent_actions).count(a.lower()) >= 3:
296
  continue
297
  cleaned.append(a)
 
298
  return cleaned[:20]
299
 
 
 
300
  async def _choose_by_lookahead(self, client, loc: str, obs: str, candidates: list[str]) -> tuple[Optional[str], str]:
301
  base_score = self.score
302
+ untried = set(self._extract_untried(obs))
 
303
 
304
+ # shortlist
305
+ pr = []
306
  for a in candidates:
307
  low = a.lower().strip()
308
+ pr.append((0 if low in untried else 1, self.tried[(loc, low)], low, a))
309
+ pr.sort()
310
+ shortlist = [x[-1] for x in pr][:10]
 
 
 
 
 
 
311
 
312
+ best_a, best_u, best_th = None, -10**18, ""
313
  for a in shortlist:
314
  low = a.lower().strip()
315
  if self.tried[(loc, low)] >= 4:
316
  continue
 
317
  peek = await self._call_tool_text(client, "peek_action", {"action": a})
318
+ if self._is_game_over(peek):
 
 
319
  u = -1_000_000_000
320
  else:
321
+ s_after = base_score
322
+ m = re.search(r"\[Score:\s*(\d+)\s*/", peek or "")
323
+ if m:
324
+ s_after = int(m.group(1))
325
  delta = max(0, s_after - base_score)
326
+ loc_after = self._extract_location(peek)
327
+ new_loc_bonus = 280 if (loc_after and loc_after not in self.locations_visited and loc_after != self._extract_location(obs)) else 0
328
+ untried_bonus = 120 if low in untried else 0
 
 
 
 
 
329
  loop_pen = 90 * list(self.recent_actions).count(low)
330
  stuck_pen = 180 if self._is_stuck(peek) else 0
331
+ u = delta * 900 + new_loc_bonus + untried_bonus - loop_pen - stuck_pen
 
 
 
 
 
 
 
 
332
 
333
  if u > best_u:
334
+ best_u, best_a = u, a
 
335
  best_th = f"Look-ahead chose '{a}' (utility={u})."
336
 
337
  if best_a is None or best_u < -10000:
338
+ return None, "Look-ahead no good action; fallback."
339
  return best_a, best_th
340
 
341
+ async def _choose_fallback(self, obs: str, inv_txt: str, candidates: list[str], seed: int, step: int) -> tuple[str, str]:
342
+ untried = self._extract_untried(obs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
343
  if untried:
344
+ return untried[0], "Heuristic: try untried exit."
345
 
 
 
 
 
 
 
 
 
 
346
  if not candidates:
347
+ return "look", "No candidates; fallback."
 
 
 
348
 
349
+ # LLM only if available
350
  try:
351
+ prompt = self._llm_prompt(obs, inv_txt, candidates[:10])
352
  resp = call_llm(prompt, SYSTEM_PROMPT, seed + step, max_tokens=160)
353
+ thought, args = self._parse_llm(resp)
354
+ act = self._norm_action(str(args.get("action", "")).strip())
355
+ canon = {x.lower(): x for x in candidates[:10]}
356
+ if act.lower() in canon:
357
+ return canon[act.lower()], thought or "LLM chose candidate."
 
358
  except Exception:
359
+ pass
 
360
 
361
+ return candidates[0], "Fallback: first candidate."
 
 
362
 
363
+ def _llm_prompt(self, obs: str, inv_txt: str, candidates: list[str]) -> str:
364
  parts = [
365
  f"Score: {self.score}/{self.max_score} | Moves: {max(self.moves, self._internal_moves)}",
366
  f"Location: {self.last_location}",
367
+ "\nCurrent observation:\n" + (obs or "")[:1100],
368
+ "\nCandidate actions (choose exactly one):",
369
  ]
 
 
 
 
 
 
 
370
  for a in candidates:
371
  parts.append(f"- {a}")
372
  return "\n".join(parts)
373
 
374
+ def _parse_llm(self, resp: str) -> tuple[str, dict]:
375
  thought = ""
 
376
  args = {"action": "look"}
377
+ m = re.search(r"(?im)^THOUGHT:\s*(.+)$", resp or "")
 
 
 
378
  if m:
379
  thought = m.group(1).strip()
380
+ m = re.search(r"(?is)^ARGS:\s*(\{.*\})\s*$", resp or "")
381
  if m:
382
+ raw = m.group(1)
 
 
 
383
  try:
384
  args = json.loads(raw)
385
  except Exception:
386
+ pass
387
+ return thought, args