janisaiad commited on
Commit
a9fa50a
·
0 Parent(s):
Files changed (7) hide show
  1. .gitignore +22 -0
  2. README.md +59 -0
  3. agent.py +667 -0
  4. app.py +71 -0
  5. mcp_server.py +277 -0
  6. requirements.txt +9 -0
  7. run.sh +7 -0
.gitignore ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ *.egg-info/
8
+ dist/
9
+ build/
10
+
11
+ # Environment
12
+ .env
13
+ .venv/
14
+ venv/
15
+
16
+ # IDE
17
+ .vscode/
18
+ .idea/
19
+
20
+ # OS
21
+ .DS_Store
22
+ Thumbs.db
README.md ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Text Adventure Agent Submission
3
+ emoji: "\U0001F5FA"
4
+ colorFrom: green
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: "5.12.0"
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ ---
12
+
13
+ # Text Adventure Agent Submission
14
+
15
+ ## Overview
16
+
17
+ This is my submission for the Text Adventure Agent assignment. My agent uses the ReAct pattern to play text adventure games via MCP.
18
+
19
+ ## Approach
20
+
21
+ <!-- Describe your approach here -->
22
+
23
+ - What strategy does your agent use?
24
+ - What tools did you implement in your MCP server?
25
+ - Any interesting techniques or optimizations?
26
+
27
+ ## Files
28
+
29
+ | File | Description |
30
+ |------|-------------|
31
+ | `agent.py` | ReAct agent with `StudentAgent` class |
32
+ | `mcp_server.py` | MCP server with game interaction tools |
33
+ | `app.py` | Gradio interface for HF Space |
34
+ | `requirements.txt` | Additional dependencies |
35
+
36
+ ## How to Submit
37
+
38
+ 1. Fork the template Space: `https://huggingface.co/spaces/LLM-course/text-adventure-template`
39
+ 2. Clone your fork locally
40
+ 3. Implement your agent in `agent.py` and `mcp_server.py`
41
+ 4. Test locally (see below)
42
+ 5. Push your changes to your Space
43
+ 6. Submit your Space URL on the course platform
44
+
45
+ ## Local Testing
46
+
47
+ ```bash
48
+ # Install dependencies
49
+ pip install -r requirements.txt
50
+
51
+ # Test the MCP server interactively
52
+ fastmcp dev mcp_server.py
53
+
54
+ # Run your agent on a game
55
+ python run_agent.py --agent . --game lostpig -v -n 20
56
+
57
+ # Run evaluation
58
+ python -m evaluation.evaluate -s . -g lostpig -t 3
59
+ ```
agent.py ADDED
@@ -0,0 +1,667 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ZorkGPT-Lite: Full orchestrator with Agent, Critic, Extractor, StrategyGen.
3
+ Uses Z-machine data (memory, inventory, get_valid_actions) + LLM for reasoning.
4
+ """
5
+
6
+ import asyncio
7
+ import json
8
+ import os
9
+ import re
10
+ from dataclasses import dataclass, field
11
+ from typing import Optional
12
+
13
+ from dotenv import load_dotenv
14
+
15
+ load_dotenv()
16
+
17
+ try:
18
+ from transformers import AutoModelForCausalLM, AutoTokenizer
19
+ import torch
20
+ _LOCAL_INFERENCE_AVAILABLE = True
21
+ except ImportError:
22
+ _LOCAL_INFERENCE_AVAILABLE = False
23
+
24
+ from huggingface_hub import InferenceClient
25
+
26
+ # =============================================================================
27
+ # LLM Configuration
28
+ # =============================================================================
29
+
30
+ LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"
31
+ _USE_LOCAL = os.getenv("USE_LOCAL_MODEL", "false").lower() in ("true", "1", "yes")
32
+ _HF_MODEL_LOCAL = os.getenv("HF_MODEL", "Qwen/Qwen2.5-7B-Instruct")
33
+ _hf_token = os.getenv("HF_TOKEN")
34
+
35
+ if not _USE_LOCAL or not _LOCAL_INFERENCE_AVAILABLE:
36
+ if not _hf_token:
37
+ raise ValueError("HF_TOKEN not found. Set it in your .env file (or use USE_LOCAL_MODEL=true with transformers).")
38
+ LLM_CLIENT: Optional[InferenceClient] = InferenceClient(token=_hf_token)
39
+ else:
40
+ LLM_CLIENT = None
41
+
42
+ _local_tokenizer = None
43
+ _local_model = None
44
+
45
+
46
+ def _ensure_local_model() -> None:
47
+ global _local_tokenizer, _local_model
48
+ if _local_model is not None:
49
+ return
50
+ if not _LOCAL_INFERENCE_AVAILABLE or not _USE_LOCAL:
51
+ return
52
+ device = "cuda" if torch.cuda.is_available() else "cpu"
53
+ token_kw = {"token": _hf_token} if _hf_token else {}
54
+ if not _hf_token:
55
+ print("[INFO] No HF_TOKEN; gated models may fail. Set HF_TOKEN in .env for e.g. Gemma.")
56
+ _local_tokenizer = AutoTokenizer.from_pretrained(_HF_MODEL_LOCAL, **token_kw)
57
+ _local_model = AutoModelForCausalLM.from_pretrained(
58
+ _HF_MODEL_LOCAL,
59
+ torch_dtype=torch.float16 if device == "cuda" else torch.float32,
60
+ device_map="auto" if device == "cuda" else None,
61
+ **token_kw,
62
+ )
63
+ if device == "cpu":
64
+ _local_model = _local_model.to(device)
65
+ print(f"[INFO] Local model loaded: {_HF_MODEL_LOCAL} on {device}")
66
+
67
+
68
+ def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 400) -> str:
69
+ """Call the LLM (API or local)."""
70
+ messages = [
71
+ {"role": "system", "content": system_prompt},
72
+ {"role": "user", "content": prompt},
73
+ ]
74
+ if _USE_LOCAL and _LOCAL_INFERENCE_AVAILABLE:
75
+ _ensure_local_model()
76
+ if _local_tokenizer is None or _local_model is None:
77
+ raise RuntimeError("Local model failed to load.")
78
+ if hasattr(_local_tokenizer, "apply_chat_template"):
79
+ formatted = _local_tokenizer.apply_chat_template(
80
+ messages, tokenize=False, add_generation_prompt=True
81
+ )
82
+ else:
83
+ formatted = f"{system_prompt}\n\nUser: {prompt}\n\nAssistant:"
84
+ # we add "THOUGHT:" to prime small models (Gemma) to output the expected format
85
+ formatted = formatted.rstrip() + "\nTHOUGHT:"
86
+ inputs = _local_tokenizer(formatted, return_tensors="pt")
87
+ model_device = next(_local_model.parameters()).device
88
+ inputs = {k: (v.to(model_device) if isinstance(v, torch.Tensor) else v) for k, v in inputs.items()}
89
+ with torch.no_grad():
90
+ gen_out = _local_model.generate(
91
+ **inputs,
92
+ max_new_tokens=max_tokens,
93
+ pad_token_id=_local_tokenizer.eos_token_id,
94
+ do_sample=False,
95
+ )
96
+ out_slice = gen_out[0][inputs["input_ids"].shape[1]:]
97
+ if out_slice.is_cuda:
98
+ out_slice = out_slice.cpu()
99
+ raw = _local_tokenizer.decode(out_slice, skip_special_tokens=True).strip()
100
+ if formatted.rstrip().endswith("THOUGHT:") and raw and not raw.upper().startswith("THOUGHT:"):
101
+ raw = "THOUGHT: " + raw
102
+ return raw
103
+ response = LLM_CLIENT.chat.completions.create(
104
+ model=LLM_MODEL,
105
+ messages=messages,
106
+ temperature=0.0,
107
+ max_tokens=max_tokens,
108
+ seed=seed,
109
+ )
110
+ return response.choices[0].message.content
111
+
112
+
113
+ @dataclass
114
+ class RunResult:
115
+ """Result of running the agent. Do not modify this class."""
116
+ final_score: int
117
+ max_score: int
118
+ moves: int
119
+ locations_visited: set[str]
120
+ game_completed: bool
121
+ error: Optional[str] = None
122
+ history: list[tuple[str, str, str]] = field(default_factory=list)
123
+
124
+
125
+ # =============================================================================
126
+ # Prompts: Agent, Critic, StrategyGen
127
+ # =============================================================================
128
+
129
+ AGENT_PROMPT = """You are an expert text adventure player. MAXIMIZE YOUR SCORE.
130
+
131
+ AVAILABLE MCP TOOLS:
132
+ - play_action: Execute game commands (north, take lamp, open mailbox, get up, etc.)
133
+ - memory: Get current state from Z-machine
134
+ - inventory: Get items from Z-machine
135
+ - get_map: Explored locations
136
+
137
+ CRITICAL: You MUST respond in this exact format (no markdown, no extra text):
138
+ THOUGHT: <one sentence about what to do next>
139
+ TOOL: play_action
140
+ ARGS: {"action": "<command>"}
141
+
142
+ Universal rules (apply to any text adventure):
143
+ - If game says "get out of bed first" or "have to get up": try get up, stand
144
+ - If "too dark" or "can't see": light lamp, take lamp
145
+ - If "can't go that way": try different direction
146
+ - If "don't understand": try simpler verb (look, examine, take X)
147
+ - Explore directions (north, south, east, west). Take items. Do NOT repeat same action in a loop."""
148
+
149
+ CRITIC_PROMPT = """You evaluate whether a proposed game action is good.
150
+
151
+ Given: current observation, valid actions from Z-machine, proposed action.
152
+
153
+ Score 0-1: 0=bad (invalid,重复, no progress), 1=good (valid, progresses).
154
+
155
+ Respond in one line: SCORE: <0.0 to 1.0> REASON: <brief reason>
156
+
157
+ If action is in valid_actions or is a common command (look, north, take X), score >= 0.6."""
158
+
159
+ STRATEGY_PROMPT = """Analyze this gameplay history and extract 3-5 strategic insights.
160
+
161
+ Format each as a short rule. Example: "In dark games, get lamp before exploring."
162
+ Output only the insights, one per line."""
163
+
164
+
165
+ # =============================================================================
166
+ # StudentAgent: Full ZorkGPT-Lite Orchestrator
167
+ # =============================================================================
168
+
169
+ class StudentAgent:
170
+ """
171
+ Full orchestrator: Extractor (Z-machine) -> Agent -> Critic (Z-machine + LLM) -> Execute.
172
+ StrategyGen updates knowledge_base every 25 turns.
173
+ """
174
+
175
+ CRITIC_THRESHOLD = 0.5
176
+ MAX_CRITIC_RETRIES = 3
177
+ STRATEGY_UPDATE_INTERVAL = 12
178
+ VALID_ACTIONS_TIMEOUT = 0.8
179
+
180
+ def __init__(self):
181
+ self.history: list[dict] = []
182
+ self.recent_actions: list[str] = []
183
+ self.failed_actions: set[str] = set() # we avoid repeating actions that failed
184
+ self.score: int = 0
185
+ self.max_score: int = 350
186
+ self.steps_without_score: int = 0
187
+ self.knowledge_base: str = "General: Explore, take items, use lamp before dark. Try get up if stuck. Try east/north when south fails."
188
+ self.seen_state_hashes: set[str] = set()
189
+
190
+ async def run(
191
+ self,
192
+ client,
193
+ game: str,
194
+ max_steps: int,
195
+ seed: int,
196
+ verbose: bool = False,
197
+ ) -> RunResult:
198
+ """Run the full orchestrator loop."""
199
+ locations_visited = set()
200
+ history = []
201
+ moves = 0
202
+ tool_names = [t.name for t in await client.list_tools()]
203
+ self.failed_actions = set()
204
+ self.steps_without_score = 0
205
+
206
+ # we get initial observation
207
+ result = await client.call_tool("play_action", {"action": "look"})
208
+ observation = self._extract_result(result)
209
+ loc = observation.split("\n")[0] if observation else "Unknown"
210
+ locations_visited.add(loc)
211
+ if verbose:
212
+ print(f"\n{observation}")
213
+
214
+ context = {}
215
+ for step in range(1, max_steps + 1):
216
+ # we extract context from Z-machine (no LLM)
217
+ context = await self._extract_context(client)
218
+ # we build agent prompt
219
+ prompt = self._build_agent_prompt(observation, context)
220
+ thought, tool_name, tool_args = "No reasoning", "play_action", {"action": "look"}
221
+ action = "look"
222
+
223
+ # we get action from Agent LM (max_tokens 250 for small models)
224
+ response = call_llm(prompt, AGENT_PROMPT, seed + step, max_tokens=250)
225
+ if not response.strip():
226
+ response = self._heuristic_action(observation)
227
+ if verbose:
228
+ print(f"[DEBUG] LLM empty, heuristic: {response[:80]}")
229
+ thought, tool_name, tool_args = self._parse_response(response, tool_names)
230
+ tool_name, tool_args = self._validate_tool_call(tool_name, tool_args, tool_names)
231
+ if tool_name == "play_action":
232
+ action = tool_args.get("action", "look")
233
+
234
+ # we run Critic: fast check first; LLM only when uncertain (saves ~1 LLM call/step)
235
+ accepted = False
236
+ fast_ok = self._critic_fast_check(action, context.get("valid_actions", ""))
237
+ if fast_ok:
238
+ accepted = True
239
+ for attempt in range(self.MAX_CRITIC_RETRIES):
240
+ if accepted:
241
+ break
242
+ critic_prompt = f"""Observation: {observation[:300]}
243
+ Valid actions: {context.get('valid_actions', 'unknown')}
244
+ Proposed: {action}
245
+
246
+ Score and reason?"""
247
+ critic_resp = call_llm(critic_prompt, CRITIC_PROMPT, seed + step + attempt, max_tokens=80)
248
+ score = self._parse_critic_score(critic_resp)
249
+ if score >= self.CRITIC_THRESHOLD:
250
+ accepted = True
251
+ break
252
+ if attempt < self.MAX_CRITIC_RETRIES - 1:
253
+ feedback = f"Action '{action}' rejected (score {score:.1f}). Try different."
254
+ prompt = self._build_agent_prompt(observation, context, feedback)
255
+ response = call_llm(prompt, AGENT_PROMPT, seed + step + attempt)
256
+ thought, tool_name, tool_args = self._parse_response(response, tool_names)
257
+ tool_name, tool_args = self._validate_tool_call(tool_name, tool_args, tool_names)
258
+ if tool_name == "play_action":
259
+ action = tool_args.get("action", "look")
260
+ else:
261
+ accepted = True
262
+
263
+ # we loop detection: try result-based heuristic first, then generic verb cycle
264
+ if len(self.recent_actions) >= 3 and len(set(self.recent_actions[-3:])) == 1:
265
+ res = self._result_based_heuristic(observation)
266
+ if res is not None:
267
+ action = res
268
+ else:
269
+ action = self._generic_verb_cycle()
270
+ tool_args = {"action": action}
271
+ if verbose:
272
+ print(f"[WARNING] Loop detected - trying '{action}' instead")
273
+ # we skip actions that recently failed
274
+ if action.lower() in self.failed_actions:
275
+ action = self._generic_verb_cycle()
276
+ tool_args = {"action": action}
277
+ # we prefer valid_actions when stuck (no score for many steps)
278
+ if self.steps_without_score >= 5 and context.get("valid_actions"):
279
+ va = context["valid_actions"].lower()
280
+ for cand in ["take all", "take lamp", "take keys", "open", "examine", "north", "east"]:
281
+ if cand in va and cand not in self.failed_actions:
282
+ if cand not in [a.lower() for a in self.recent_actions[-3:]]:
283
+ action = cand
284
+ tool_args = {"action": action}
285
+ break
286
+ self.recent_actions.append(action)
287
+ if len(self.recent_actions) > 10:
288
+ self.recent_actions = self.recent_actions[-10:]
289
+ # we track failed actions (rejection, no movement, no score)
290
+ if self._is_failure_result(observation, action):
291
+ self.failed_actions.add(action.lower())
292
+ else:
293
+ self.failed_actions.discard(action.lower())
294
+ # we track score progress and reinforce what worked
295
+ old_score = self.score
296
+ self._update_score(observation)
297
+ if self.score > old_score:
298
+ self.steps_without_score = 0
299
+ if len(self.knowledge_base) < 800:
300
+ self.knowledge_base = self.knowledge_base + f"\nScore: {action} worked."
301
+ else:
302
+ self.steps_without_score += 1
303
+
304
+ if verbose:
305
+ print(f"\n--- Step {step} ---")
306
+ print(f"[THOUGHT] {thought}")
307
+ print(f"[TOOL] {tool_name}({tool_args})")
308
+
309
+ # we execute
310
+ try:
311
+ result = await client.call_tool(tool_name, tool_args)
312
+ observation = self._extract_result(result)
313
+ moves += 1
314
+ except Exception as e:
315
+ observation = f"Error: {e}"
316
+ if verbose:
317
+ print(f"[ERROR] {e}")
318
+
319
+ loc = observation.split("\n")[0] if observation else "Unknown"
320
+ locations_visited.add(loc)
321
+ self._update_score(observation)
322
+ history.append((thought, f"{tool_name}({tool_args})", observation[:100]))
323
+
324
+ self.history.append({"step": step, "thought": thought, "action": action, "result": observation[:200]})
325
+ if len(self.history) > 20:
326
+ self.history = self.history[-20:]
327
+
328
+ if verbose:
329
+ print(f"[RESULT] {observation[:200]}...")
330
+
331
+ # we update knowledge_base every N turns (StrategyGen)
332
+ if step % self.STRATEGY_UPDATE_INTERVAL == 0 and self.history:
333
+ strategy_hist = "\n".join([f"Step {h['step']}: {h['action']} -> {h['result'][:80]}" for h in self.history[-15:]])
334
+ strat_prompt = f"History:\n{strategy_hist}\n\nCurrent score: {self.score}\nExtract insights:"
335
+ try:
336
+ insights = call_llm(strat_prompt, STRATEGY_PROMPT, seed + step, max_tokens=150)
337
+ if insights.strip():
338
+ self.knowledge_base = self.knowledge_base + "\n" + insights.strip()[:300]
339
+ except Exception:
340
+ pass
341
+
342
+ if self._is_game_over(observation):
343
+ if verbose:
344
+ print("\n*** GAME OVER ***")
345
+ break
346
+
347
+ return RunResult(
348
+ final_score=self.score,
349
+ max_score=self.max_score,
350
+ moves=moves,
351
+ locations_visited=locations_visited,
352
+ game_completed=self._is_game_over(observation),
353
+ history=history,
354
+ )
355
+
356
+ async def _extract_context(self, client) -> dict:
357
+ """Extractor: Z-machine data via MCP tools (no LLM)."""
358
+ ctx = {}
359
+ tools_to_try = [
360
+ ("memory", "memory"),
361
+ ("inventory", "inventory"),
362
+ ("get_map", "map"),
363
+ ]
364
+ for tool_name, key in tools_to_try:
365
+ try:
366
+ r = await client.call_tool(tool_name, {})
367
+ ctx[key] = self._extract_result(r)
368
+ except Exception:
369
+ ctx[key] = ""
370
+ # we skip get_valid_actions by default (can block on spacy); set USE_VALID_ACTIONS=true to enable
371
+ if os.getenv("USE_VALID_ACTIONS", "false").lower() in ("true", "1", "yes"):
372
+ try:
373
+ r = await asyncio.wait_for(
374
+ client.call_tool("get_valid_actions", {}),
375
+ timeout=self.VALID_ACTIONS_TIMEOUT,
376
+ )
377
+ ctx["valid_actions"] = self._extract_result(r)
378
+ except (asyncio.TimeoutError, Exception):
379
+ ctx["valid_actions"] = ""
380
+ else:
381
+ ctx["valid_actions"] = ""
382
+ return ctx
383
+
384
+ def _build_agent_prompt(self, observation: str, context: dict, feedback: str = "") -> str:
385
+ """Build agent prompt with context."""
386
+ parts = [f"Knowledge base:\n{self.knowledge_base[:500]}\n"]
387
+ parts.append(f"Current score: {self.score}")
388
+ if context.get("valid_actions"):
389
+ parts.append(f"\nValid actions (prefer these): {context['valid_actions'][:200]}")
390
+ if context.get("memory"):
391
+ parts.append(f"\nZ-machine state:\n{context['memory'][:350]}")
392
+ if context.get("map"):
393
+ parts.append(f"\nMap:\n{context['map'][:250]}")
394
+ if context.get("inventory"):
395
+ parts.append(f"\n{context['inventory']}")
396
+ if self.failed_actions:
397
+ parts.append(f"\nAvoid (recently failed): {', '.join(list(self.failed_actions)[:8])}")
398
+ if self.history:
399
+ parts.append("\nRecent:")
400
+ for h in self.history[-4:]:
401
+ parts.append(f" > {h.get('action','?')} -> {h.get('result','')[:55]}...")
402
+ if feedback:
403
+ parts.append(f"\n[FEEDBACK] {feedback}")
404
+ parts.append(f"\nCurrent observation:\n{observation}")
405
+ parts.append("\nWhat do you do next?")
406
+ return "\n".join(parts)
407
+
408
+ def _critic_fast_check(self, action: str, valid_actions_str: str) -> bool:
409
+ """Fast validation: is action likely valid?"""
410
+ action_lower = action.lower().strip()
411
+ if valid_actions_str and "valid actions:" in valid_actions_str.lower():
412
+ va = valid_actions_str.lower()
413
+ if action_lower in va or any(action_lower.startswith(a.strip()) for a in va.split(",")[:20] if a.strip()):
414
+ return True
415
+ verb = action_lower.split()[0] if action_lower.split() else ""
416
+ if verb in ["look", "inventory", "north", "south", "east", "west", "take", "open", "examine"]:
417
+ return True
418
+ common = ["look", "inventory", "north", "south", "east", "west", "up", "down", "take", "drop", "open", "examine", "read", "get"]
419
+ if any(action_lower.startswith(c) for c in common):
420
+ return True
421
+ return True
422
+
423
+ def _parse_critic_score(self, resp: str) -> float:
424
+ """Parse critic score from response."""
425
+ m = re.search(r"SCORE:\s*([\d.]+)", resp, re.IGNORECASE)
426
+ if m:
427
+ try:
428
+ return float(m.group(1))
429
+ except ValueError:
430
+ pass
431
+ return 0.5
432
+
433
+ # =========================================================================
434
+ # Universal verb vocabulary (game-agnostic) per common_structure.md
435
+ # =========================================================================
436
+ # we cycle through these when no result-based pattern matches
437
+ UNIVERSAL_VERB_CYCLE = [
438
+ "look", "examine", "inventory",
439
+ "north", "south", "east", "west", "up", "down", "in", "out",
440
+ "take all", "take lamp", "take keys", "take wallet", "take phone", "take sword",
441
+ "open mailbox", "open door", "open", "open chest",
442
+ "get up", "stand", "rise", "wake",
443
+ "light lamp", "turn on lamp", "wear", "use", "read",
444
+ ]
445
+
446
+ def _result_based_heuristic(self, result_text: str) -> str | None:
447
+ """Game-agnostic heuristic from result text per common_structure.md."""
448
+ r = result_text.lower()
449
+ # we prioritize taking visible objects when room lists them (905, etc)
450
+ if "telephone" in r or ("phone" in r and "take phone" not in [a.lower() for a in self.recent_actions[-3:]]):
451
+ if "take phone" not in self.failed_actions:
452
+ return "take phone"
453
+ if "wallet" in r and "take wallet" not in self.failed_actions and "take wallet" not in [a.lower() for a in self.recent_actions[-3:]]:
454
+ return "take wallet"
455
+ if "keys" in r and "take keys" not in self.failed_actions and "take keys" not in [a.lower() for a in self.recent_actions[-3:]]:
456
+ return "take keys"
457
+ # prerequisite: get out of bed, have to get up
458
+ if "get out of bed" in r or "out of bed" in r or "have to get up" in r:
459
+ return "get up"
460
+ if "get up" in r and "have to" in r:
461
+ return "get up"
462
+ if "stand" in r and ("have to" in r or "must" in r):
463
+ return "stand"
464
+ # light: too dark, can't see
465
+ if "too dark" in r or "can't see" in r or "too dark to" in r:
466
+ for cmd in ["light lamp", "turn on lamp", "take lamp"]:
467
+ if cmd not in [a.lower() for a in self.recent_actions[-3:]]:
468
+ return cmd
469
+ return "light lamp"
470
+ # movement block: wall, can't go that way
471
+ if "can't go" in r or "wall" in r or "can't go that way" in r or "too narrow" in r:
472
+ return None # we let generic cycle pick next direction
473
+ # parser rejection: don't understand, can't
474
+ if "don't understand" in r or "i don't understand" in r:
475
+ return "look"
476
+ if "you can't" in r or "can't do that" in r:
477
+ return "examine"
478
+ # object: take X when objects mentioned (keys, wallet, lamp, etc)
479
+ common_objects = ["telephone", "phone", "keys", "wallet", "lamp", "sword", "treasure", "book", "rope", "knife", "chest", "dresser"]
480
+ for word in common_objects:
481
+ if word in r:
482
+ action_try = f"take {word}"
483
+ if word == "telephone":
484
+ action_try = "take phone"
485
+ if action_try in self.failed_actions:
486
+ continue
487
+ recent_lower = [a.lower() for a in self.recent_actions[-5:]]
488
+ if action_try not in recent_lower:
489
+ return action_try
490
+ if "dresser" in r:
491
+ if "open dresser" not in [a.lower() for a in self.recent_actions[-3:]]:
492
+ return "open dresser"
493
+ for obj in self._extract_objects_from_room(result_text):
494
+ action_try = f"take {obj}"
495
+ if action_try in self.failed_actions:
496
+ continue
497
+ recent_lower = [a.lower() for a in self.recent_actions[-5:]]
498
+ if action_try not in recent_lower:
499
+ return action_try
500
+ if "mailbox" in r:
501
+ recent_lower = [a.lower() for a in self.recent_actions[-3:]]
502
+ if "open mailbox" not in recent_lower:
503
+ return "open mailbox"
504
+ if "open" in r and "closed" in r:
505
+ for word in ["door", "mailbox", "chest", "box"]:
506
+ if word in r:
507
+ return f"open {word}"
508
+ if "open" in r and "door" in r:
509
+ return "open door"
510
+ # no such thing, I don't see
511
+ if "don't see" in r or "no such" in r or "can't see any" in r:
512
+ return "look"
513
+ # only go X (extract direction)
514
+ for d in ["north", "south", "east", "west"]:
515
+ if f"only go {d}" in r or f"only {d}" in r or f"can only go {d}" in r:
516
+ return d
517
+ # lostpig / general: south fails with "trouble" -> try east (forest)
518
+ if "get in big trouble" in r or "big trouble" in r:
519
+ south_count = sum(1 for a in self.recent_actions[-5:] if a.lower() == "south")
520
+ if south_count >= 2:
521
+ return "east"
522
+ return "north"
523
+ # forest dark / pig somewhere: try forest first, then try west/south when stuck
524
+ if "forest" in r and "dark" in r:
525
+ east_count = sum(1 for a in self.recent_actions[-6:] if a.lower() == "east")
526
+ north_count = sum(1 for a in self.recent_actions[-6:] if a.lower() == "north")
527
+ if east_count + north_count >= 4:
528
+ return "west"
529
+ if east_count < 2:
530
+ return "east"
531
+ return "north"
532
+ return None
533
+
534
+ def _extract_objects_from_room(self, text: str) -> list[str]:
535
+ """Extract object names from room description for take/examine."""
536
+ r = text.lower()
537
+ objects = []
538
+ # patterns: "there is a X", "you see X", "X and Y", "on the X are Y", "X, Y and Z"
539
+ for m in re.finditer(r"\b(there is|you see|are|on the \w+ are)\s+[a ]+(\w+)", r):
540
+ objects.append(m.group(2))
541
+ for m in re.finditer(r"\b(telephone|phone|wallet|keys|lamp|sword|book|rope|knife|chest|mailbox)\b", r):
542
+ objects.append(m.group(1))
543
+ return list(dict.fromkeys(objects))[:5]
544
+
545
+ def _generic_verb_cycle(self) -> str:
546
+ """Return next action from universal cycle, skipping failed actions."""
547
+ cycle = self.UNIVERSAL_VERB_CYCLE
548
+ start = 0
549
+ if self.recent_actions:
550
+ last = self.recent_actions[-1].lower()
551
+ idx = next((i for i, a in enumerate(cycle) if a == last), -1)
552
+ start = (idx + 1) % len(cycle)
553
+ for i in range(len(cycle)):
554
+ cand = cycle[(start + i) % len(cycle)]
555
+ if cand not in self.failed_actions:
556
+ return cand
557
+ return "look"
558
+
559
+ def _heuristic_action(self, observation: str) -> str:
560
+ """Heuristic when LLM empty: result-based first, then generic verb cycle."""
561
+ action = self._result_based_heuristic(observation)
562
+ if action is None:
563
+ action = self._generic_verb_cycle()
564
+ return f"THOUGHT: Try {action}.\nTOOL: play_action\nARGS: {{\"action\": \"{action}\"}}"
565
+
566
+ def _parse_response(self, response: str, valid_tools: list[str]) -> tuple[str, str, dict]:
567
+ """Parse LLM response; fallback to extracting action from raw text."""
568
+ thought = "No reasoning provided"
569
+ tool_name = "play_action"
570
+ tool_args = {"action": "look"}
571
+ for line in response.strip().split("\n"):
572
+ lc = line.strip()
573
+ lu = lc.upper()
574
+ if lu.startswith("THOUGHT:"):
575
+ thought = lc.split(":", 1)[1].strip() or thought
576
+ elif lu.startswith("TOOL:"):
577
+ raw = lc.split(":", 1)[1].strip().lower().replace("**", "").replace("*", "")
578
+ raw = raw.split()[0] if raw else "play_action"
579
+ tool_name = raw
580
+ elif lu.startswith("ARGS:"):
581
+ s = lc.split(":", 1)[1].strip().replace("'", '"')
582
+ try:
583
+ tool_args = json.loads(s)
584
+ except json.JSONDecodeError:
585
+ m = re.search(r'"action"\s*:\s*"([^"]+)"', s)
586
+ if m:
587
+ tool_args = {"action": m.group(1)}
588
+ # we fallback: if still "look", try to extract action from raw response
589
+ if tool_args.get("action", "look") == "look" and response.strip():
590
+ r = response.lower()
591
+ for cmd in ["east", "north", "south", "west", "inventory", "take all", "take lamp"]:
592
+ if cmd in r:
593
+ tool_args = {"action": cmd}
594
+ break
595
+ return thought, tool_name, tool_args
596
+
597
+ def _validate_tool_call(self, tool_name: str, tool_args: dict, valid_tools: list[str]) -> tuple[str, dict]:
598
+ """Validate and fix tool call."""
599
+ if tool_name not in valid_tools:
600
+ tool_name = "play_action"
601
+ if tool_name == "play_action":
602
+ action = tool_args.get("action", "look")
603
+ invalid = {"check": "examine", "inspect": "examine", "search": "look", "grab": "take", "pick": "take"}
604
+ words = action.lower().split()
605
+ if words and words[0] in invalid:
606
+ words[0] = invalid[words[0]]
607
+ action = " ".join(words)
608
+ action = action.lower().strip().replace("**", "").replace("*", "")
609
+ action = " ".join(action.split())
610
+ tool_args["action"] = action
611
+ return tool_name, tool_args
612
+
613
+ def _extract_result(self, result) -> str:
614
+ """Extract text from MCP result."""
615
+ if hasattr(result, "content") and result.content:
616
+ return result.content[0].text
617
+ if isinstance(result, list) and result:
618
+ return result[0].text if hasattr(result[0], "text") else str(result[0])
619
+ return str(result)
620
+
621
+ def _update_score(self, text: str) -> None:
622
+ """Update score from text."""
623
+ for pat in [r"Score:\s*(\d+)", r"\[Score:\s*(\d+)", r"Total:\s*(\d+)"]:
624
+ m = re.search(pat, text, re.IGNORECASE)
625
+ if m:
626
+ self.score = max(self.score, int(m.group(1)))
627
+ break
628
+
629
+ def _is_game_over(self, text: str) -> bool:
630
+ """Check game over."""
631
+ t = text.lower()
632
+ return any(p in t for p in ["game over", "you have died", "you are dead", "*** you have died ***"])
633
+
634
+ def _is_failure_result(self, result: str, action: str) -> bool:
635
+ """Check if result indicates action failed (rejection, no progress)."""
636
+ r = result.lower()
637
+ failure_phrases = [
638
+ "don't understand", "you can't", "can't do that", "can't go that way",
639
+ "there is no", "no such", "you'll have to", "have to get", "get out of bed first",
640
+ "verb error", "not recognized", "i don't see", "can't see any",
641
+ ]
642
+ if any(p in r for p in failure_phrases):
643
+ return True
644
+ if "get in big trouble" in r or "grunk get in big trouble" in r:
645
+ return True
646
+ return False
647
+
648
+
649
+ async def test_agent():
650
+ """Test the agent locally."""
651
+ from fastmcp import Client
652
+ from fastmcp.client.transports import StdioTransport
653
+ import sys
654
+ from pathlib import Path
655
+ server_path = Path(__file__).parent / "mcp_server.py"
656
+ env = os.environ.copy()
657
+ env["GAME"] = "lostpig"
658
+ transport = StdioTransport(command=sys.executable, args=[str(server_path)], env=env)
659
+ agent = StudentAgent()
660
+ async with Client(transport) as client:
661
+ result = await agent.run(client=client, game="lostpig", max_steps=10, seed=42, verbose=True)
662
+ print(f"\nFinal: score={result.final_score}, moves={result.moves}")
663
+
664
+
665
+ if __name__ == "__main__":
666
+ import asyncio
667
+ asyncio.run(test_agent())
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Hugging Face Space - Text Adventure Agent Submission
3
+
4
+ This is a code-only Space for submitting your agent implementation.
5
+ The evaluation is run separately.
6
+
7
+ Files in this submission:
8
+ - agent.py: Your ReAct agent implementation
9
+ - mcp_server.py: Your MCP server implementation
10
+ - requirements.txt: Additional dependencies
11
+
12
+ To test locally:
13
+ fastmcp dev mcp_server.py
14
+ python agent.py
15
+ """
16
+
17
+ import gradio as gr
18
+ from pathlib import Path
19
+
20
+
21
+ def read_readme():
22
+ """Read the README content."""
23
+ readme_path = Path(__file__).parent / "README.md"
24
+ if readme_path.exists():
25
+ return readme_path.read_text()
26
+ return "# Submission\n\nNo README.md found."
27
+
28
+
29
+ def read_file_content(filename: str) -> str:
30
+ """Read a source file's content."""
31
+ file_path = Path(__file__).parent / filename
32
+ if file_path.exists():
33
+ return file_path.read_text()
34
+ return f"# File not found: {filename}"
35
+
36
+
37
+ # Create the Gradio interface
38
+ with gr.Blocks(title="Text Adventure Agent Submission") as demo:
39
+ gr.Markdown("# Text Adventure Agent Submission")
40
+ gr.Markdown(
41
+ "This Space contains a student submission for the Text Adventure Agent assignment. "
42
+ "Use the tabs below to view the submitted code."
43
+ )
44
+
45
+ with gr.Tabs():
46
+ with gr.Tab("README"):
47
+ gr.Markdown(read_readme())
48
+
49
+ with gr.Tab("Agent Code"):
50
+ gr.Code(
51
+ value=read_file_content("agent.py"),
52
+ language="python",
53
+ label="agent.py",
54
+ )
55
+
56
+ with gr.Tab("MCP Server Code"):
57
+ gr.Code(
58
+ value=read_file_content("mcp_server.py"),
59
+ language="python",
60
+ label="mcp_server.py",
61
+ )
62
+
63
+ gr.Markdown(
64
+ "---\n"
65
+ "**Note:** This is a code submission Space. "
66
+ "Evaluation is performed using the evaluation script."
67
+ )
68
+
69
+
70
+ if __name__ == "__main__":
71
+ demo.launch()
mcp_server.py ADDED
@@ -0,0 +1,277 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Student MCP Server for Text Adventure Games
3
+
4
+ Full Z-machine integration via Jericho: inventory, location, score, moves,
5
+ valid_actions, and state hash come directly from the Z-machine (no LLM parsing).
6
+ """
7
+
8
+ import sys
9
+ import os
10
+
11
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
12
+
13
+ from fastmcp import FastMCP
14
+ from games.zork_env import TextAdventureEnv
15
+
16
+
17
+ # =============================================================================
18
+ # Create the MCP Server
19
+ # =============================================================================
20
+
21
+ mcp = FastMCP("Student Text Adventure Server")
22
+
23
+
24
+ # =============================================================================
25
+ # Game State Management (Z-machine direct access via Jericho)
26
+ # =============================================================================
27
+
28
+ class GameManager:
29
+ """
30
+ Manages game state with direct Z-machine access through Jericho FrotzEnv.
31
+ All structured data (inventory, location, score, valid_actions) comes from
32
+ the Z-machine, not from text parsing.
33
+ """
34
+
35
+ def __init__(self):
36
+ self.env: TextAdventureEnv | None = None
37
+ self.state = None
38
+ self.game_name: str = ""
39
+ self.history: list[tuple[str, str]] = []
40
+ self.explored_locations: dict[str, set[str]] = {}
41
+
42
+ def initialize(self, game: str = "zork1"):
43
+ """Initialize or reset the game."""
44
+ self.game_name = game
45
+ self.env = TextAdventureEnv(game)
46
+ self.state = self.env.reset()
47
+ self.history = []
48
+ self.explored_locations = {}
49
+ return self.state.observation
50
+
51
+ def step(self, action: str) -> str:
52
+ """Execute an action and return the result."""
53
+ if self.env is None:
54
+ self.initialize(os.environ.get("GAME", "zork1"))
55
+ self.state = self.env.step(action)
56
+ self.history.append((action, self.state.observation))
57
+ if len(self.history) > 50:
58
+ self.history = self.history[-50:]
59
+ # we update map from Z-machine location (state.location is from get_player_location)
60
+ self._update_map(action)
61
+ return self.state.observation
62
+
63
+ def _update_map(self, action: str):
64
+ """Update explored locations from Z-machine state."""
65
+ if action.lower() in ("north", "south", "east", "west", "up", "down", "enter", "exit",
66
+ "n", "s", "e", "w", "u", "d"):
67
+ prev_loc = self._get_location()
68
+ new_loc = self.state.location if hasattr(self.state, "location") else self._extract_location(self.state.observation)
69
+ if prev_loc not in self.explored_locations:
70
+ self.explored_locations[prev_loc] = set()
71
+ if new_loc != prev_loc:
72
+ self.explored_locations[prev_loc].add(f"{action} -> {new_loc}")
73
+
74
+ def _extract_location(self, observation: str) -> str:
75
+ """Fallback: extract location from first line of observation."""
76
+ lines = observation.strip().split("\n")
77
+ return lines[0] if lines else "Unknown"
78
+
79
+ def _get_location(self) -> str:
80
+ """Get current location from Z-machine (state.location) or fallback."""
81
+ if self.state and hasattr(self.state, "location") and self.state.location:
82
+ return self.state.location
83
+ if self.history:
84
+ return self._extract_location(self.history[-1][1])
85
+ return "Unknown"
86
+
87
+ def get_score(self) -> int:
88
+ """Get current score from Z-machine."""
89
+ return self.state.score if self.state else 0
90
+
91
+ def get_moves(self) -> int:
92
+ """Get number of moves from Z-machine."""
93
+ return self.state.moves if self.state else 0
94
+
95
+ def get_max_score(self) -> int:
96
+ """Get max possible score from Z-machine."""
97
+ if self.state and hasattr(self.state, "max_score"):
98
+ return self.state.max_score
99
+ try:
100
+ return self.env.env.get_max_score() if self.env else 0
101
+ except Exception:
102
+ return 0
103
+
104
+ def get_inventory_zmachine(self) -> list:
105
+ """Get inventory directly from Z-machine (list of objects)."""
106
+ try:
107
+ return [str(obj) for obj in self.env.env.get_inventory()]
108
+ except Exception:
109
+ return self.state.inventory if (self.state and hasattr(self.state, "inventory")) else []
110
+
111
+ def get_valid_actions_zmachine(self) -> list[str]:
112
+ """Get valid actions directly from Z-machine (object tree)."""
113
+ try:
114
+ return self.env.get_valid_actions()
115
+ except Exception:
116
+ return ["north", "south", "east", "west", "up", "down", "look", "inventory", "take all"]
117
+
118
+ def get_state_hash(self) -> str:
119
+ """Get world state hash from Z-machine for loop detection."""
120
+ try:
121
+ return str(self.env.env.get_state())
122
+ except Exception:
123
+ return ""
124
+
125
+ def get_player_location_zmachine(self) -> str:
126
+ """Get player location directly from Z-machine."""
127
+ try:
128
+ loc = self.env.env.get_player_location()
129
+ return str(loc) if loc else self._get_location()
130
+ except Exception:
131
+ return self._get_location()
132
+
133
+ def format_inventory(self, items: list) -> str:
134
+ """Format inventory items (clean Z-machine object names)."""
135
+ if not items:
136
+ return "Inventory: You are empty-handed."
137
+ names = []
138
+ for item in items:
139
+ s = str(item).lower()
140
+ if "parent" in s:
141
+ idx = s.index("parent")
142
+ name = str(item)[:idx].strip()
143
+ if ":" in name:
144
+ name = name.split(":", 1)[1].strip()
145
+ names.append(name)
146
+ elif ":" in str(item):
147
+ names.append(str(item).split(":", 1)[1].strip())
148
+ else:
149
+ names.append(str(item))
150
+ return f"Inventory: {', '.join(names)}"
151
+
152
+ def get_memory(self) -> str:
153
+ """Get game state summary (location/score/moves from Z-machine)."""
154
+ recent = self.history[-5:] if self.history else []
155
+ recent_str = "\n".join([f" > {a} -> {r[:60]}..." for a, r in recent]) if recent else " (none yet)"
156
+ loc = self.get_player_location_zmachine()
157
+ return f"""Current State (Z-machine):
158
+ - Location: {loc}
159
+ - Score: {self.get_score()} / {self.get_max_score()} points
160
+ - Moves: {self.get_moves()}
161
+ - Game: {self.game_name}
162
+
163
+ Recent Actions:
164
+ {recent_str}
165
+
166
+ Current Observation:
167
+ {self.state.observation if self.state else 'N/A'}"""
168
+
169
+ def get_map(self) -> str:
170
+ """Get map of explored locations."""
171
+ if not self.explored_locations:
172
+ return "Map: No locations explored yet. Try moving around!"
173
+ lines = ["Explored Locations and Exits:"]
174
+ for loc, exits in sorted(self.explored_locations.items()):
175
+ lines.append(f"\n* {loc}")
176
+ for exit_info in sorted(exits):
177
+ lines.append(f" -> {exit_info}")
178
+ lines.append(f"\n[Current] {self.get_player_location_zmachine()}")
179
+ return "\n".join(lines)
180
+
181
+
182
+ # Global game manager
183
+ _game: GameManager | None = None
184
+
185
+
186
+ def get_game() -> GameManager:
187
+ """Get or initialize the game manager."""
188
+ global _game
189
+ if _game is None:
190
+ _game = GameManager()
191
+ if _game.env is None:
192
+ _game.initialize(os.environ.get("GAME", "zork1"))
193
+ return _game
194
+
195
+
196
+ # =============================================================================
197
+ # MCP Tools (all use Z-machine data where available)
198
+ # =============================================================================
199
+
200
+ @mcp.tool()
201
+ def play_action(action: str) -> str:
202
+ """
203
+ Execute a game command and return the result.
204
+
205
+ Args:
206
+ action: The command to execute (e.g., "north", "take lamp", "open mailbox")
207
+
208
+ Returns:
209
+ The game's response to the action
210
+ """
211
+ game = get_game()
212
+ result = game.step(action)
213
+ score_info = f"\n\n[Score: {game.get_score()} | Moves: {game.get_moves()}]"
214
+ if game.state and game.state.reward > 0:
215
+ score_info = f"\n\n+{game.state.reward} points! (Total: {game.get_score()})"
216
+ done_info = "\n\nGAME OVER" if (game.state and game.state.done) else ""
217
+ return result + score_info + done_info
218
+
219
+
220
+ @mcp.tool()
221
+ def memory() -> str:
222
+ """
223
+ Get current game state summary (location, score, moves, recent history).
224
+ Location and score come from Z-machine directly.
225
+ """
226
+ return get_game().get_memory()
227
+
228
+
229
+ @mcp.tool()
230
+ def inventory() -> str:
231
+ """
232
+ Check what the player is carrying.
233
+ Data comes directly from Z-machine get_inventory().
234
+ """
235
+ game = get_game()
236
+ items = game.get_inventory_zmachine()
237
+ return game.format_inventory(items)
238
+
239
+
240
+ @mcp.tool()
241
+ def get_map() -> str:
242
+ """
243
+ Get a map of explored locations and connections.
244
+ """
245
+ return get_game().get_map()
246
+
247
+
248
+ @mcp.tool()
249
+ def get_valid_actions() -> str:
250
+ """
251
+ Get a list of valid actions from the Z-machine object tree.
252
+ Used by Critic for fast validation before LLM evaluation.
253
+ """
254
+ game = get_game()
255
+ try:
256
+ valid = game.get_valid_actions_zmachine()
257
+ return "Valid actions: " + ", ".join(valid[:30])
258
+ except Exception:
259
+ return "Could not get valid actions (spacy may be required)."
260
+
261
+
262
+ @mcp.tool()
263
+ def get_state_hash() -> str:
264
+ """
265
+ Get a hash of the current Z-machine world state for loop detection.
266
+ """
267
+ game = get_game()
268
+ h = game.get_state_hash()
269
+ return f"State hash: {h[:80]}..." if len(h) > 80 else f"State hash: {h}"
270
+
271
+
272
+ # =============================================================================
273
+ # Run the server
274
+ # =============================================================================
275
+
276
+ if __name__ == "__main__":
277
+ mcp.run()
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # HF Spaces already has gradio and huggingface_hub pre-installed
2
+ # Do not add them here or you may get version conflicts
3
+
4
+ # Agent dependencies (these are provided by the evaluation infrastructure)
5
+ # Do not add jericho, fastmcp here - they are installed during evaluation
6
+
7
+ # Add any additional packages your agent needs below:
8
+ # numpy
9
+ # requests
run.sh ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ # we run from text-adventure-template (parent dir) so run_agent.py is found
3
+ cd "$(dirname "$0")/.."
4
+ USE_LOCAL_MODEL="${USE_LOCAL_MODEL:-true}"
5
+ # usage: ./run.sh [game] [steps]
6
+ # e.g. ./run.sh lostpig 15 or ./run.sh zork1 20
7
+ exec uv run python run_agent.py --agent submission_template --game "${1:-lostpig}" -v -n "${2:-15}"