bouhss commited on
Commit
a8bba7b
·
verified ·
1 Parent(s): 0fa0cde

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +260 -254
agent.py CHANGED
@@ -1,20 +1,22 @@
1
  """
2
- Student Agent for Text Adventure Games (Strong submission)
3
-
4
- Key ideas:
5
- - Deterministic & robust
6
- - Uses MCP tools if available:
7
- - get_valid_actions: reduce invalid commands
8
- - peek_action: simulate actions without committing (safe look-ahead)
9
- - inventory / memory / get_map: optional extra context
10
- - Exploration + score oriented:
11
- utility = score_gain * big_weight + new_location_bonus - loop_penalty - stuck_penalty - death_penalty
12
- - LLM is used only as fallback, to choose among a candidate list.
 
13
  """
14
 
15
  import json
16
  import os
17
  import re
 
18
  from dataclasses import dataclass, field
19
  from typing import Optional, Any
20
  from collections import defaultdict, deque
@@ -22,7 +24,6 @@ from collections import defaultdict, deque
22
  from dotenv import load_dotenv
23
  from huggingface_hub import InferenceClient
24
 
25
- # Load environment variables
26
  load_dotenv()
27
 
28
  # =============================================================================
@@ -31,24 +32,36 @@ load_dotenv()
31
  LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"
32
 
33
  _hf_token = os.getenv("HF_TOKEN")
34
- if not _hf_token:
35
- raise ValueError("HF_TOKEN not found. Set it in your .env file.")
36
- LLM_CLIENT = InferenceClient(token=_hf_token)
37
 
38
 
39
- def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 220) -> str:
 
 
 
 
 
 
 
40
  messages = [
41
  {"role": "system", "content": system_prompt},
42
  {"role": "user", "content": prompt},
43
  ]
44
- response = LLM_CLIENT.chat.completions.create(
45
- model=LLM_MODEL,
46
- messages=messages,
47
- temperature=0.0,
48
- max_tokens=max_tokens,
49
- seed=seed,
50
- )
51
- return response.choices[0].message.content
 
 
 
 
 
 
 
52
 
53
 
54
  @dataclass
@@ -63,57 +76,50 @@ class RunResult:
63
  history: list[tuple[str, str, str]] = field(default_factory=list)
64
 
65
 
66
- # =============================================================================
67
- # LLM Prompt (fallback only)
68
- # =============================================================================
69
-
70
  SYSTEM_PROMPT = """You are an expert text-adventure agent.
71
 
72
- Goal: maximize score and explore new locations while avoiding loops.
73
-
74
- You MUST output EXACTLY:
75
  THOUGHT: ...
76
  TOOL: play_action
77
  ARGS: {"action": "<one candidate action>"}
78
 
79
  Rules:
80
- - Choose one action EXACTLY from the candidate list provided by the user.
81
- - Avoid repeating the same action if it failed before.
82
- - If darkness is mentioned, prioritize lamp actions if present in candidates.
83
- - No markdown, no extra text.
84
  """
85
 
86
 
87
- MOVE_ACTIONS = ["north", "south", "east", "west", "up", "down", "enter", "exit"]
88
- MOVE_ALIASES = {"n": "north", "s": "south", "e": "east", "w": "west", "u": "up", "d": "down"}
 
 
89
 
90
- # avoid wasting steps on meta commands
91
  BAD_PREFIXES = ("save", "restore", "quit", "restart", "help", "verbose", "script", "unscript", "version")
92
  BAD_EXACT = {"wait", "z"}
93
 
94
 
95
  class StudentAgent:
96
  def __init__(self):
97
- # parsed from banner
98
  self.score = 0
99
  self.max_score = 0
100
  self.moves = 0
101
 
102
- # exploration tracking
 
 
 
103
  self.locations_visited: set[str] = set()
104
  self.last_location = "Unknown"
105
- self.edges = defaultdict(dict) # edges[loc][move] = new_loc
106
-
107
- # loop avoidance
108
- self.tried = defaultdict(int) # tried[(loc, action)] += 1
109
  self.recent_actions = deque(maxlen=10)
110
  self.recent_obs = deque(maxlen=6)
111
 
112
- # cached valid actions by location
113
- self.valid_actions_cache = {} # loc -> list[str]
114
 
115
- # ---------------------------------------------------------------------
116
- # Main run loop
117
  # ---------------------------------------------------------------------
118
  async def run(self, client, game: str, max_steps: int, seed: int, verbose: bool = False) -> RunResult:
119
  history: list[tuple[str, str, str]] = []
@@ -122,11 +128,12 @@ class StudentAgent:
122
  tools = await client.list_tools()
123
  tool_names = {t.name for t in tools}
124
 
125
- def has(tname: str) -> bool:
126
- return tname in tool_names
127
 
128
- # initial observation
129
  obs = await self._call_tool_text(client, "play_action", {"action": "look"})
 
130
  self._update_from_text(obs)
131
  self.last_location = self._extract_location(obs)
132
  self.locations_visited.add(self.last_location)
@@ -141,63 +148,51 @@ class StudentAgent:
141
 
142
  stuck = self._is_stuck(obs)
143
 
144
- # refresh valid actions periodically or when stuck/new location
145
- valid_actions = self.valid_actions_cache.get(loc, [])
146
  if has("get_valid_actions") and (stuck or not valid_actions or step % 6 == 0):
147
  va_txt = await self._call_tool_text(client, "get_valid_actions", {"limit": 60})
148
  valid_actions = self._parse_valid_actions(va_txt)
149
  if valid_actions:
150
- self.valid_actions_cache[loc] = valid_actions
151
 
152
- # optional inventory
153
  inv_txt = ""
154
- if has("inventory") and (stuck or step % 8 == 0 or step == 1):
155
  inv_txt = await self._call_tool_text(client, "inventory", {})
156
 
157
- # build candidates
158
  candidates = self._make_candidates(obs, inv_txt, valid_actions, loc)
159
 
160
- # decide action
161
  action = None
162
  thought = ""
163
 
 
164
  if has("peek_action") and candidates:
165
  action, thought = await self._choose_by_lookahead(
166
- client=client,
167
- loc=loc,
168
- obs=obs,
169
- candidates=candidates,
170
- seed=seed,
171
- step=step,
172
- verbose=verbose,
173
  )
174
 
 
175
  if not action:
176
  action, thought = await self._choose_without_peek(
177
- obs=obs,
178
- inv_txt=inv_txt,
179
- candidates=candidates,
180
- seed=seed,
181
- step=step,
182
  )
183
 
184
  action = self._normalize_action(action or "look")
185
 
186
- # commit the action
187
  obs2 = await self._call_tool_text(client, "play_action", {"action": action})
 
188
 
189
- # update map edges if movement changed location
190
- new_loc = self._extract_location(obs2)
191
- if action.lower() in MOVE_ACTIONS and new_loc and new_loc != loc:
192
- self.edges[loc][action.lower()] = new_loc
193
-
194
- # bookkeeping
195
  self.tried[(loc, action.lower())] += 1
196
  self.recent_actions.append(action.lower())
197
  self.recent_obs.append((obs2 or "")[:220])
 
198
  self._update_from_text(obs2)
 
 
199
 
200
- history.append((thought, f"play_action({action})", (obs2 or "")[:250]))
201
 
202
  if verbose:
203
  print(f"\n--- step {step} ---")
@@ -213,7 +208,7 @@ class StudentAgent:
213
  return RunResult(
214
  final_score=self.score,
215
  max_score=self.max_score,
216
- moves=self.moves,
217
  locations_visited=set(self.locations_visited),
218
  game_completed=self._is_game_over(obs),
219
  history=history,
@@ -223,15 +218,13 @@ class StudentAgent:
223
  return RunResult(
224
  final_score=self.score,
225
  max_score=self.max_score,
226
- moves=self.moves,
227
  locations_visited=set(self.locations_visited),
228
  game_completed=False,
229
  error=f"{type(e).__name__}: {e}",
230
  history=history,
231
  )
232
 
233
- # ---------------------------------------------------------------------
234
- # Tool / text helpers
235
  # ---------------------------------------------------------------------
236
  async def _call_tool_text(self, client, tool: str, args: dict) -> str:
237
  result = await client.call_tool(tool, args)
@@ -249,7 +242,38 @@ class StudentAgent:
249
  return str(part)
250
  return str(result)
251
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
  def _extract_location(self, text: str) -> str:
 
 
 
 
 
 
 
253
  if not text:
254
  return "Unknown"
255
  for line in text.splitlines():
@@ -261,68 +285,51 @@ class StudentAgent:
261
  return line
262
  return "Unknown"
263
 
264
- def _update_from_text(self, text: str) -> None:
265
- # parse banner: [Score: x/y | Moves: n]
266
- if not text:
267
- return
268
- m = re.search(r"\[Score:\s*(\d+)\s*/\s*(\d+)\s*\|\s*Moves:\s*(\d+)\s*\]", text)
269
- if m:
270
- self.score = int(m.group(1))
271
- self.max_score = int(m.group(2))
272
- self.moves = int(m.group(3))
 
 
273
 
274
- def _parse_valid_actions(self, txt: str) -> list[str]:
275
- if not txt:
 
276
  return []
277
- actions = []
278
- for line in txt.splitlines():
279
- line = line.strip()
280
- if line.startswith("- "):
281
- a = line[2:].strip()
282
- a = self._normalize_action(a)
283
- low = a.lower()
284
- if not a:
285
- continue
286
- if low.startswith(BAD_PREFIXES) or low in BAD_EXACT:
287
- continue
288
- actions.append(a)
289
- # dedup keep order
290
- seen = set()
291
  out = []
292
- for a in actions:
293
- if a.lower() not in seen:
294
- seen.add(a.lower())
295
  out.append(a)
296
  return out
297
 
298
- def _normalize_action(self, action: str) -> str:
299
- a = (action or "").strip()
300
- low = a.lower()
301
- if low in MOVE_ALIASES:
302
- return MOVE_ALIASES[low]
303
- return a
304
-
305
  def _is_game_over(self, text: str) -> bool:
306
  t = (text or "").lower()
307
- return ("game over" in t) or ("you have died" in t) or ("you are dead" in t)
308
 
309
  def _is_stuck(self, text: str) -> bool:
310
  t = (text or "").lower()
311
  bad = [
312
- "i don't understand",
313
- "you can't go that way",
314
- "that's not a verb",
315
- "not a word i know",
316
- "nothing happens",
317
- "you can't",
318
- "can't do that",
319
  ]
320
  rep = len(self.recent_obs) >= 3 and all(self.recent_obs[-1] == x for x in list(self.recent_obs)[-3:])
321
  return any(b in t for b in bad) or rep
322
 
 
 
 
 
 
 
 
323
  # ---------------------------------------------------------------------
324
- # Candidate generation
325
- # ---------------------------------------------------------------------
326
  def _make_candidates(self, obs: str, inv_txt: str, valid_actions: list[str], loc: str) -> list[str]:
327
  obs_l = (obs or "").lower()
328
  inv_l = (inv_txt or "").lower()
@@ -334,61 +341,60 @@ class StudentAgent:
334
  a = self._normalize_action(a)
335
  if not a:
336
  return
337
- low = a.lower()
338
  if low.startswith(BAD_PREFIXES) or low in BAD_EXACT:
339
  return
340
  if low not in seen:
341
  seen.add(low)
342
  candidates.append(a)
343
 
344
- # always safe
345
- add("look")
 
346
 
347
- # darkness handling
348
- if "dark" in obs_l:
349
- if "lamp" in obs_l or "lamp" in inv_l:
350
- add("take lamp")
351
- add("turn on lamp")
352
 
353
- # split valid actions into move vs object
354
- move_list = []
355
- obj_list = []
 
 
 
 
 
356
  for a in valid_actions or []:
357
- low = a.lower()
358
- if low in MOVE_ACTIONS:
359
- move_list.append(a)
 
360
  else:
361
- obj_list.append(a)
362
 
363
- # prioritize untried moves from this location
364
- def move_key(m: str):
365
- return (self.tried[(loc, m.lower())], 0 if m.lower() not in self.edges.get(loc, {}) else 1)
366
 
367
- for m in sorted(set(move_list), key=move_key):
368
  add(m)
369
 
370
- # if no valid moves known, still try generic moves
371
- if not move_list:
372
- for m in MOVE_ACTIONS:
373
- add(m)
374
-
375
- # prioritize object actions that often give score
376
- scorey_prefixes = ("take ", "get ", "open ", "read ", "examine ", "look at ", "turn on ", "unlock ", "insert ")
377
- for a in obj_list:
378
- if a.lower().startswith(scorey_prefixes):
379
  add(a)
380
 
381
- # then the rest (limited)
382
- for a in obj_list:
383
  add(a)
384
- if len(candidates) >= 22:
385
  break
386
 
387
- # small generic probes (often good across games)
388
- add("take all")
389
  add("inventory")
 
390
 
391
- # remove actions repeated too much recently
392
  cleaned = []
393
  for a in candidates:
394
  if list(self.recent_actions).count(a.lower()) >= 3:
@@ -398,30 +404,28 @@ class StudentAgent:
398
  return cleaned[:20]
399
 
400
  # ---------------------------------------------------------------------
401
- # Decision: look-ahead
402
- # ---------------------------------------------------------------------
403
- async def _choose_by_lookahead(self, client, loc: str, obs: str, candidates: list[str], seed: int, step: int, verbose: bool):
404
  base_score = self.score
405
- base_loc = loc
 
406
 
407
- # prioritize a shortlist for speed
408
  priority = []
409
  for a in candidates:
410
- low = a.lower()
411
- is_move = low in MOVE_ACTIONS
412
- is_obj = low.startswith(("take ", "get ", "open ", "read ", "examine ", "turn on ", "unlock "))
413
  tried = self.tried[(loc, low)]
414
- priority.append((tried, 0 if is_obj else 1, 0 if is_move else 1, low, a))
415
  priority.sort()
416
-
417
- shortlist = [x[-1] for x in priority][:10] # evaluate at most 10
418
 
419
  best_a = None
420
  best_u = -10**18
421
  best_th = ""
422
 
423
  for a in shortlist:
424
- low = a.lower()
425
  if self.tried[(loc, low)] >= 4:
426
  continue
427
 
@@ -431,22 +435,26 @@ class StudentAgent:
431
  if self._is_game_over(peek) or "you have died" in peek_l:
432
  u = -1_000_000_000
433
  else:
434
- s_after, mx_after, mv_after = self._parse_banner(peek, fallback_score=base_score)
435
  delta = max(0, s_after - base_score)
436
 
437
- new_loc = self._extract_location(peek)
438
- changed = (new_loc and new_loc != base_loc)
439
- new_loc_bonus = 250 if (changed and new_loc not in self.locations_visited) else 0
440
- changed_bonus = 40 if changed else 0
 
 
441
 
442
- loop_pen = 80 * list(self.recent_actions).count(low)
443
- stuck_pen = 160 if self._is_stuck(peek) else 0
444
 
445
- # MAIN utility
446
- u = delta * 900 + new_loc_bonus + changed_bonus - loop_pen - stuck_pen
447
 
448
- # small preference: if darkness, lamp actions
449
- if "dark" in (obs or "").lower() and ("lamp" in low):
 
 
450
  u += 120
451
 
452
  if u > best_u:
@@ -458,34 +466,33 @@ class StudentAgent:
458
  return None, "Look-ahead found no good action; fallback."
459
  return best_a, best_th
460
 
461
- def _parse_banner(self, text: str, fallback_score: int):
462
  score = fallback_score
463
- mx = self.max_score
464
- mv = self.moves
465
- if not text:
466
- return score, mx, mv
467
- m = re.search(r"\[Score:\s*(\d+)\s*/\s*(\d+)\s*\|\s*Moves:\s*(\d+)\s*\]", text)
468
  if m:
469
- return int(m.group(1)), int(m.group(2)), int(m.group(3))
470
- return score, mx, mv
 
 
 
 
471
 
472
  # ---------------------------------------------------------------------
473
- # Decision: no peek => heuristic then LLM fallback among candidates
474
- # ---------------------------------------------------------------------
475
- async def _choose_without_peek(self, obs: str, inv_txt: str, candidates: list[str], seed: int, step: int):
 
 
 
 
 
476
  loc = self._extract_location(obs)
477
-
478
- # heuristic: try an untried move
479
- for m in MOVE_ACTIONS:
480
- if m in [c.lower() for c in candidates] and self.tried[(loc, m)] == 0:
481
- return m, "Heuristic: try an untried move to explore."
482
-
483
- # heuristic: try untried "take/get/open/read/examine"
484
  for a in candidates:
485
- low = a.lower()
486
- if low.startswith(("take ", "get ", "open ", "read ", "examine ", "turn on ")):
487
  if self.tried[(loc, low)] == 0:
488
- return a, "Heuristic: try a promising object interaction."
489
 
490
  # LLM fallback: choose from candidate list exactly
491
  if not candidates:
@@ -493,53 +500,51 @@ class StudentAgent:
493
 
494
  cand = candidates[:10]
495
  prompt = self._build_llm_prompt(obs, inv_txt, cand)
496
- resp = call_llm(prompt, SYSTEM_PROMPT, seed + step, max_tokens=180)
497
-
498
- thought, tool, args = self._parse_response(resp)
499
- a = self._normalize_action(str(args.get("action", "")).strip())
500
 
501
- # force action to be in candidate list
502
- canon = {x.lower(): x for x in cand}
503
- if a.lower() in canon:
504
- return canon[a.lower()], thought or "LLM chose a candidate."
505
- return cand[0], "LLM invalid; fallback to first candidate."
 
 
 
 
 
 
506
 
507
  def _build_llm_prompt(self, obs: str, inv_txt: str, candidates: list[str]) -> str:
508
  obs = (obs or "").strip()[:1100]
509
  inv_txt = (inv_txt or "").strip()[:350]
510
 
511
- lines = [
512
- f"Score: {self.score}/{self.max_score} | Moves: {self.moves}",
513
- f"Location guess: {self.last_location}",
514
  ]
515
  if inv_txt:
516
- lines.append(f"Inventory:\n{inv_txt}")
517
  if self.recent_actions:
518
- lines.append("Recent actions: " + ", ".join(list(self.recent_actions)[-6:]))
519
 
520
- lines.append("\nCurrent observation:\n" + obs)
521
- lines.append("\nCandidate actions (choose exactly one):")
522
  for a in candidates:
523
- lines.append(f"- {a}")
524
- lines.append("\nOutput TOOL=play_action and ARGS with one candidate action.")
525
- return "\n".join(lines)
526
 
527
- def _parse_response(self, response: str):
528
  thought = ""
529
  tool = "play_action"
530
  args = {"action": "look"}
531
-
532
  if not response:
533
  return thought, tool, args
534
 
535
  m = re.search(r"(?im)^\s*THOUGHT\s*:\s*(.+)$", response)
536
  if m:
537
  thought = m.group(1).strip()
538
-
539
  m = re.search(r"(?im)^\s*TOOL\s*:\s*([a-zA-Z0-9_]+)\s*$", response)
540
  if m:
541
  tool = m.group(1).strip()
542
-
543
  m = re.search(r"(?is)^\s*ARGS\s*:\s*(\{.*\})\s*$", response)
544
  if m:
545
  raw = m.group(1).strip()
@@ -556,31 +561,32 @@ class StudentAgent:
556
  if not isinstance(args, dict):
557
  args = {"action": "look"}
558
 
 
 
559
  return thought, tool, args
560
 
561
-
562
- # =============================================================================
563
- # Local testing
564
- # =============================================================================
565
- async def test_agent():
566
- from fastmcp import Client
567
-
568
- server_path = "mcp_server.py"
569
- agent = StudentAgent()
570
-
571
- async with Client(server_path) as client:
572
- result = await agent.run(
573
- client=client,
574
- game="lostpig",
575
- max_steps=20,
576
- seed=42,
577
- verbose=True,
578
- )
579
- print(f"\nFinal Score: {result.final_score}/{result.max_score}")
580
- print(f"Moves: {result.moves}")
581
- print(f"Locations visited: {len(result.locations_visited)}")
582
-
583
-
584
- if __name__ == "__main__":
585
- import asyncio
586
- asyncio.run(test_agent())
 
1
  """
2
+ Student Agent for Text Adventure Games (Best-performance submission)
3
+
4
+ Design:
5
+ - Primary driver: heuristics + server tools, not pure LLM.
6
+ - Uses MCP tools:
7
+ - play_action (commit)
8
+ - peek_action (simulate without committing) => BIG performance boost
9
+ - get_valid_actions (reduce hallucinations)
10
+ - inventory (optional context)
11
+ - memory/get_map (rare; not required)
12
+ - LLM only as fallback: choose among a candidate list deterministically (temp=0).
13
+ - Robust stats: internal move counter so moves never stay 0 even if banner parsing fails.
14
  """
15
 
16
  import json
17
  import os
18
  import re
19
+ import time
20
  from dataclasses import dataclass, field
21
  from typing import Optional, Any
22
  from collections import defaultdict, deque
 
24
  from dotenv import load_dotenv
25
  from huggingface_hub import InferenceClient
26
 
 
27
  load_dotenv()
28
 
29
  # =============================================================================
 
32
  LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"
33
 
34
  _hf_token = os.getenv("HF_TOKEN")
35
+ LLM_CLIENT = InferenceClient(token=_hf_token) if _hf_token else None
 
 
36
 
37
 
38
+ def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 180) -> str:
39
+ """
40
+ Deterministic LLM call (temperature=0). Retries a few times for transient errors.
41
+ If HF_TOKEN missing, raises.
42
+ """
43
+ if LLM_CLIENT is None:
44
+ raise RuntimeError("HF_TOKEN missing => LLM unavailable")
45
+
46
  messages = [
47
  {"role": "system", "content": system_prompt},
48
  {"role": "user", "content": prompt},
49
  ]
50
+ for attempt in range(3):
51
+ try:
52
+ resp = LLM_CLIENT.chat.completions.create(
53
+ model=LLM_MODEL,
54
+ messages=messages,
55
+ temperature=0.0,
56
+ max_tokens=max_tokens,
57
+ seed=seed,
58
+ )
59
+ return resp.choices[0].message.content
60
+ except Exception:
61
+ if attempt < 2:
62
+ time.sleep(2 ** attempt)
63
+ continue
64
+ raise
65
 
66
 
67
  @dataclass
 
76
  history: list[tuple[str, str, str]] = field(default_factory=list)
77
 
78
 
 
 
 
 
79
  SYSTEM_PROMPT = """You are an expert text-adventure agent.
80
 
81
+ You must output EXACTLY:
 
 
82
  THOUGHT: ...
83
  TOOL: play_action
84
  ARGS: {"action": "<one candidate action>"}
85
 
86
  Rules:
87
+ - Choose ONE action EXACTLY from the candidate list provided by the user.
88
+ - Do not invent actions outside that list.
89
+ - Avoid repeating actions that recently failed.
90
+ - No markdown and no extra text.
91
  """
92
 
93
 
94
+ MOVE_ACTIONS = ["north", "south", "east", "west", "up", "down", "enter", "exit",
95
+ "northeast", "northwest", "southeast", "southwest"]
96
+ MOVE_ALIASES = {"n": "north", "s": "south", "e": "east", "w": "west", "u": "up", "d": "down",
97
+ "ne": "northeast", "nw": "northwest", "se": "southeast", "sw": "southwest"}
98
 
 
99
  BAD_PREFIXES = ("save", "restore", "quit", "restart", "help", "verbose", "script", "unscript", "version")
100
  BAD_EXACT = {"wait", "z"}
101
 
102
 
103
  class StudentAgent:
104
  def __init__(self):
105
+ # parsed from server banner if available
106
  self.score = 0
107
  self.max_score = 0
108
  self.moves = 0
109
 
110
+ # internal moves (robust)
111
+ self._internal_moves = 0
112
+
113
+ # exploration / loop avoidance
114
  self.locations_visited: set[str] = set()
115
  self.last_location = "Unknown"
116
+ self.tried = defaultdict(int) # tried[(loc, action)] += 1
 
 
 
117
  self.recent_actions = deque(maxlen=10)
118
  self.recent_obs = deque(maxlen=6)
119
 
120
+ # valid actions cache
121
+ self.valid_cache = {} # loc -> list[str]
122
 
 
 
123
  # ---------------------------------------------------------------------
124
  async def run(self, client, game: str, max_steps: int, seed: int, verbose: bool = False) -> RunResult:
125
  history: list[tuple[str, str, str]] = []
 
128
  tools = await client.list_tools()
129
  tool_names = {t.name for t in tools}
130
 
131
+ def has(name: str) -> bool:
132
+ return name in tool_names
133
 
134
+ # initial look
135
  obs = await self._call_tool_text(client, "play_action", {"action": "look"})
136
+ self._internal_moves += 1
137
  self._update_from_text(obs)
138
  self.last_location = self._extract_location(obs)
139
  self.locations_visited.add(self.last_location)
 
148
 
149
  stuck = self._is_stuck(obs)
150
 
151
+ # refresh valid actions (sparsely)
152
+ valid_actions = self.valid_cache.get(loc, [])
153
  if has("get_valid_actions") and (stuck or not valid_actions or step % 6 == 0):
154
  va_txt = await self._call_tool_text(client, "get_valid_actions", {"limit": 60})
155
  valid_actions = self._parse_valid_actions(va_txt)
156
  if valid_actions:
157
+ self.valid_cache[loc] = valid_actions
158
 
 
159
  inv_txt = ""
160
+ if has("inventory") and (step == 1 or stuck or step % 8 == 0):
161
  inv_txt = await self._call_tool_text(client, "inventory", {})
162
 
163
+ # candidates from server meta tags + valid actions
164
  candidates = self._make_candidates(obs, inv_txt, valid_actions, loc)
165
 
 
166
  action = None
167
  thought = ""
168
 
169
+ # look-ahead (best)
170
  if has("peek_action") and candidates:
171
  action, thought = await self._choose_by_lookahead(
172
+ client=client, loc=loc, obs=obs, candidates=candidates
 
 
 
 
 
 
173
  )
174
 
175
+ # fallback heuristic + optional LLM
176
  if not action:
177
  action, thought = await self._choose_without_peek(
178
+ obs=obs, inv_txt=inv_txt, candidates=candidates, seed=seed, step=step
 
 
 
 
179
  )
180
 
181
  action = self._normalize_action(action or "look")
182
 
183
+ # commit
184
  obs2 = await self._call_tool_text(client, "play_action", {"action": action})
185
+ self._internal_moves += 1
186
 
 
 
 
 
 
 
187
  self.tried[(loc, action.lower())] += 1
188
  self.recent_actions.append(action.lower())
189
  self.recent_obs.append((obs2 or "")[:220])
190
+
191
  self._update_from_text(obs2)
192
+ new_loc = self._extract_location(obs2)
193
+ self.locations_visited.add(new_loc)
194
 
195
+ history.append((thought, f"play_action({action})", (obs2 or "")[:260]))
196
 
197
  if verbose:
198
  print(f"\n--- step {step} ---")
 
208
  return RunResult(
209
  final_score=self.score,
210
  max_score=self.max_score,
211
+ moves=max(self.moves, self._internal_moves),
212
  locations_visited=set(self.locations_visited),
213
  game_completed=self._is_game_over(obs),
214
  history=history,
 
218
  return RunResult(
219
  final_score=self.score,
220
  max_score=self.max_score,
221
+ moves=max(self.moves, self._internal_moves),
222
  locations_visited=set(self.locations_visited),
223
  game_completed=False,
224
  error=f"{type(e).__name__}: {e}",
225
  history=history,
226
  )
227
 
 
 
228
  # ---------------------------------------------------------------------
229
  async def _call_tool_text(self, client, tool: str, args: dict) -> str:
230
  result = await client.call_tool(tool, args)
 
242
  return str(part)
243
  return str(result)
244
 
245
+ # ---------------------------------------------------------------------
246
+ # Parsing
247
+ def _update_from_text(self, text: str) -> None:
248
+ """
249
+ Parse server banner:
250
+ [Score: s/max | Moves: m | Location: L]
251
+ Also accept +k points tag.
252
+ """
253
+ if not text:
254
+ return
255
+
256
+ m = re.search(r"\[Score:\s*(\d+)\s*/\s*(\d+)\s*\|\s*Moves:\s*(\d+)\s*\|\s*Location:\s*(.+?)\]", text)
257
+ if m:
258
+ self.score = int(m.group(1))
259
+ self.max_score = int(m.group(2))
260
+ self.moves = int(m.group(3))
261
+ self.last_location = m.group(4).strip()
262
+
263
+ # fallback: +k points!
264
+ mp = re.search(r"\[\+(\d+)\s+points", text, flags=re.IGNORECASE)
265
+ if mp and self.score >= 0:
266
+ # score already parsed above in most cases; keep safe
267
+ self.score = max(self.score, self.score + int(mp.group(1)))
268
+
269
  def _extract_location(self, text: str) -> str:
270
+ # Prefer banner location
271
+ m = re.search(r"\|\s*Location:\s*(.+?)\]", text or "")
272
+ if m:
273
+ loc = m.group(1).strip()
274
+ if loc:
275
+ return loc
276
+ # else fallback: first non-empty line
277
  if not text:
278
  return "Unknown"
279
  for line in text.splitlines():
 
285
  return line
286
  return "Unknown"
287
 
288
+ def _extract_untried_exits(self, text: str) -> list[str]:
289
+ m = re.search(r"\[Untried exits:\s*(.+?)\]", text or "")
290
+ if not m:
291
+ return []
292
+ dirs = [d.strip() for d in m.group(1).split(",")]
293
+ out = []
294
+ for d in dirs:
295
+ d = self._normalize_action(d).lower()
296
+ if d and d not in out:
297
+ out.append(d)
298
+ return out
299
 
300
+ def _extract_interactions(self, text: str) -> list[str]:
301
+ m = re.search(r"\[Interactions:\s*(.+?)\]", text or "")
302
+ if not m:
303
  return []
304
+ acts = [a.strip() for a in m.group(1).split(",")]
 
 
 
 
 
 
 
 
 
 
 
 
 
305
  out = []
306
+ for a in acts:
307
+ if a and a.lower() not in out:
 
308
  out.append(a)
309
  return out
310
 
 
 
 
 
 
 
 
311
  def _is_game_over(self, text: str) -> bool:
312
  t = (text or "").lower()
313
+ return ("game over" in t) or ("you have died" in t) or ("you are dead" in t) or ("[game over]" in t)
314
 
315
  def _is_stuck(self, text: str) -> bool:
316
  t = (text or "").lower()
317
  bad = [
318
+ "i don't understand", "you can't", "that's not", "not a verb",
319
+ "nothing happens", "you don't see", "you see nothing", "beg your pardon"
 
 
 
 
 
320
  ]
321
  rep = len(self.recent_obs) >= 3 and all(self.recent_obs[-1] == x for x in list(self.recent_obs)[-3:])
322
  return any(b in t for b in bad) or rep
323
 
324
+ def _normalize_action(self, action: str) -> str:
325
+ a = (action or "").strip()
326
+ low = a.lower()
327
+ if low in MOVE_ALIASES:
328
+ return MOVE_ALIASES[low]
329
+ return a
330
+
331
  # ---------------------------------------------------------------------
332
+ # Candidates
 
333
  def _make_candidates(self, obs: str, inv_txt: str, valid_actions: list[str], loc: str) -> list[str]:
334
  obs_l = (obs or "").lower()
335
  inv_l = (inv_txt or "").lower()
 
341
  a = self._normalize_action(a)
342
  if not a:
343
  return
344
+ low = a.lower().strip()
345
  if low.startswith(BAD_PREFIXES) or low in BAD_EXACT:
346
  return
347
  if low not in seen:
348
  seen.add(low)
349
  candidates.append(a)
350
 
351
+ # from server tags
352
+ for d in self._extract_untried_exits(obs):
353
+ add(d)
354
 
355
+ for a in self._extract_interactions(obs):
356
+ add(a)
 
 
 
357
 
358
+ # darkness
359
+ if "dark" in obs_l and ("lamp" in obs_l or "lamp" in inv_l):
360
+ add("take lamp")
361
+ add("turn on lamp")
362
+
363
+ # add valid actions (movement first then interactions)
364
+ moves = []
365
+ inter = []
366
  for a in valid_actions or []:
367
+ al = a.lower().strip()
368
+ first = al.split()[0] if al else ""
369
+ if first in MOVE_ACTIONS:
370
+ moves.append(a)
371
  else:
372
+ inter.append(a)
373
 
374
+ # prioritize movement not tried too often
375
+ def move_key(a: str):
376
+ return self.tried[(loc, a.lower().strip())]
377
 
378
+ for m in sorted(set(moves), key=move_key):
379
  add(m)
380
 
381
+ # common score-ish interactions
382
+ scorey = ("take ", "get ", "open ", "read ", "examine ", "look at ", "turn on ", "unlock ", "insert ", "put ")
383
+ for a in inter:
384
+ if a.lower().startswith(scorey):
 
 
 
 
 
385
  add(a)
386
 
387
+ for a in inter:
 
388
  add(a)
389
+ if len(candidates) >= 24:
390
  break
391
 
392
+ # safe basics
393
+ add("look")
394
  add("inventory")
395
+ add("take all")
396
 
397
+ # remove too-repeated
398
  cleaned = []
399
  for a in candidates:
400
  if list(self.recent_actions).count(a.lower()) >= 3:
 
404
  return cleaned[:20]
405
 
406
  # ---------------------------------------------------------------------
407
+ # Look-ahead selection
408
+ async def _choose_by_lookahead(self, client, loc: str, obs: str, candidates: list[str]) -> tuple[Optional[str], str]:
 
409
  base_score = self.score
410
+ base_loc = self._extract_location(obs)
411
+ untried = set(self._extract_untried_exits(obs))
412
 
413
+ # shortlist for speed
414
  priority = []
415
  for a in candidates:
416
+ low = a.lower().strip()
417
+ is_untried = 0 if low in untried else 1
 
418
  tried = self.tried[(loc, low)]
419
+ priority.append((is_untried, tried, low, a))
420
  priority.sort()
421
+ shortlist = [x[-1] for x in priority][:10]
 
422
 
423
  best_a = None
424
  best_u = -10**18
425
  best_th = ""
426
 
427
  for a in shortlist:
428
+ low = a.lower().strip()
429
  if self.tried[(loc, low)] >= 4:
430
  continue
431
 
 
435
  if self._is_game_over(peek) or "you have died" in peek_l:
436
  u = -1_000_000_000
437
  else:
438
+ s_after, loc_after = self._parse_peek_score_loc(peek, fallback_score=base_score)
439
  delta = max(0, s_after - base_score)
440
 
441
+ new_loc_bonus = 0
442
+ changed_bonus = 0
443
+ if loc_after and loc_after != base_loc:
444
+ changed_bonus = 60
445
+ if loc_after not in self.locations_visited:
446
+ new_loc_bonus = 280
447
 
448
+ loop_pen = 90 * list(self.recent_actions).count(low)
449
+ stuck_pen = 180 if self._is_stuck(peek) else 0
450
 
451
+ # prefer untried exits
452
+ untried_bonus = 120 if low in untried else 0
453
 
454
+ u = delta * 900 + new_loc_bonus + changed_bonus + untried_bonus - loop_pen - stuck_pen
455
+
456
+ # lamp preference in darkness
457
+ if "dark" in (obs or "").lower() and "lamp" in low:
458
  u += 120
459
 
460
  if u > best_u:
 
466
  return None, "Look-ahead found no good action; fallback."
467
  return best_a, best_th
468
 
469
+ def _parse_peek_score_loc(self, text: str, fallback_score: int) -> tuple[int, str]:
470
  score = fallback_score
471
+ loc = self._extract_location(text)
472
+ m = re.search(r"\[Score:\s*(\d+)\s*/\s*(\d+)\s*\|\s*Moves:\s*(\d+)\s*\|\s*Location:\s*(.+?)\]", text or "")
 
 
 
473
  if m:
474
+ score = int(m.group(1))
475
+ loc = m.group(4).strip()
476
+ mp = re.search(r"\[\+(\d+)\s+points", text or "", flags=re.IGNORECASE)
477
+ if mp and score == fallback_score:
478
+ score = fallback_score + int(mp.group(1))
479
+ return score, loc
480
 
481
  # ---------------------------------------------------------------------
482
+ # No-peek fallback
483
+ async def _choose_without_peek(self, obs: str, inv_txt: str, candidates: list[str], seed: int, step: int) -> tuple[str, str]:
484
+ # heuristic: take untried exit first
485
+ untried = self._extract_untried_exits(obs)
486
+ if untried:
487
+ return untried[0], "Heuristic: try an untried exit."
488
+
489
+ # heuristic: try a promising interaction not tried yet
490
  loc = self._extract_location(obs)
 
 
 
 
 
 
 
491
  for a in candidates:
492
+ low = a.lower().strip()
493
+ if low.startswith(("take ", "get ", "open ", "read ", "examine ", "turn on ", "unlock ")):
494
  if self.tried[(loc, low)] == 0:
495
+ return a, "Heuristic: try a high-value interaction."
496
 
497
  # LLM fallback: choose from candidate list exactly
498
  if not candidates:
 
500
 
501
  cand = candidates[:10]
502
  prompt = self._build_llm_prompt(obs, inv_txt, cand)
 
 
 
 
503
 
504
+ try:
505
+ resp = call_llm(prompt, SYSTEM_PROMPT, seed + step, max_tokens=160)
506
+ thought, tool, args = self._parse_llm_response(resp)
507
+ a = self._normalize_action(str(args.get("action", "")).strip())
508
+ canon = {x.lower(): x for x in cand}
509
+ if a.lower() in canon:
510
+ return canon[a.lower()], thought or "LLM chose a candidate."
511
+ return cand[0], "LLM invalid; fallback to first candidate."
512
+ except Exception:
513
+ # no LLM available / error => deterministic fallback
514
+ return cand[0], "LLM unavailable/error; fallback to first candidate."
515
 
516
  def _build_llm_prompt(self, obs: str, inv_txt: str, candidates: list[str]) -> str:
517
  obs = (obs or "").strip()[:1100]
518
  inv_txt = (inv_txt or "").strip()[:350]
519
 
520
+ parts = [
521
+ f"Score: {self.score}/{self.max_score} | Moves: {max(self.moves, self._internal_moves)}",
522
+ f"Location: {self.last_location}",
523
  ]
524
  if inv_txt:
525
+ parts.append(f"Inventory info:\n{inv_txt}")
526
  if self.recent_actions:
527
+ parts.append("Recent actions: " + ", ".join(list(self.recent_actions)[-6:]))
528
 
529
+ parts.append("\nCurrent observation:\n" + obs)
530
+ parts.append("\nCandidate actions (choose exactly ONE):")
531
  for a in candidates:
532
+ parts.append(f"- {a}")
533
+ return "\n".join(parts)
 
534
 
535
+ def _parse_llm_response(self, response: str) -> tuple[str, str, dict]:
536
  thought = ""
537
  tool = "play_action"
538
  args = {"action": "look"}
 
539
  if not response:
540
  return thought, tool, args
541
 
542
  m = re.search(r"(?im)^\s*THOUGHT\s*:\s*(.+)$", response)
543
  if m:
544
  thought = m.group(1).strip()
 
545
  m = re.search(r"(?im)^\s*TOOL\s*:\s*([a-zA-Z0-9_]+)\s*$", response)
546
  if m:
547
  tool = m.group(1).strip()
 
548
  m = re.search(r"(?is)^\s*ARGS\s*:\s*(\{.*\})\s*$", response)
549
  if m:
550
  raw = m.group(1).strip()
 
561
  if not isinstance(args, dict):
562
  args = {"action": "look"}
563
 
564
+ # enforce tool
565
+ tool = "play_action"
566
  return thought, tool, args
567
 
568
+ # ---------------------------------------------------------------------
569
+ def _parse_valid_actions(self, txt: str) -> list[str]:
570
+ if not txt:
571
+ return []
572
+ out = []
573
+ for line in txt.splitlines():
574
+ line = line.strip()
575
+ if line.startswith("- "):
576
+ a = line[2:].strip()
577
+ a = self._normalize_action(a)
578
+ low = a.lower()
579
+ if not a:
580
+ continue
581
+ if low.startswith(BAD_PREFIXES) or low in BAD_EXACT:
582
+ continue
583
+ out.append(a)
584
+ # dedup keep order
585
+ seen = set()
586
+ uniq = []
587
+ for a in out:
588
+ low = a.lower()
589
+ if low not in seen:
590
+ seen.add(low)
591
+ uniq.append(a)
592
+ return uniq