dspy-zmachine

Runtime error

App Files Files Community

janisaiad commited on Feb 12

Commit

a9fa50a

0 Parent(s):

Update

Browse files

Files changed (7) hide show

.gitignore +22 -0
README.md +59 -0
agent.py +667 -0
app.py +71 -0
mcp_server.py +277 -0
requirements.txt +9 -0
run.sh +7 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,22 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+*.egg-info/
+dist/
+build/
+# Environment
+.env
+.venv/
+venv/
+# IDE
+.vscode/
+.idea/
+# OS
+.DS_Store
+Thumbs.db

README.md ADDED Viewed

	@@ -0,0 +1,59 @@

+---
+title: Text Adventure Agent Submission
+emoji: "\U0001F5FA"
+colorFrom: green
+colorTo: blue
+sdk: gradio
+sdk_version: "5.12.0"
+app_file: app.py
+pinned: false
+license: mit
+---
+# Text Adventure Agent Submission
+## Overview
+This is my submission for the Text Adventure Agent assignment. My agent uses the ReAct pattern to play text adventure games via MCP.
+## Approach
+<!-- Describe your approach here -->
+- What strategy does your agent use?
+- What tools did you implement in your MCP server?
+- Any interesting techniques or optimizations?
+## Files
+| File | Description |
+|------|-------------|
+| `agent.py` | ReAct agent with `StudentAgent` class |
+| `mcp_server.py` | MCP server with game interaction tools |
+| `app.py` | Gradio interface for HF Space |
+| `requirements.txt` | Additional dependencies |
+## How to Submit
+1. Fork the template Space: `https://huggingface.co/spaces/LLM-course/text-adventure-template`
+2. Clone your fork locally
+3. Implement your agent in `agent.py` and `mcp_server.py`
+4. Test locally (see below)
+5. Push your changes to your Space
+6. Submit your Space URL on the course platform
+## Local Testing
+```bash
+# Install dependencies
+pip install -r requirements.txt
+# Test the MCP server interactively
+fastmcp dev mcp_server.py
+# Run your agent on a game
+python run_agent.py --agent . --game lostpig -v -n 20
+# Run evaluation
+python -m evaluation.evaluate -s . -g lostpig -t 3
+```

agent.py ADDED Viewed

	@@ -0,0 +1,667 @@

+"""
+ZorkGPT-Lite: Full orchestrator with Agent, Critic, Extractor, StrategyGen.
+Uses Z-machine data (memory, inventory, get_valid_actions) + LLM for reasoning.
+"""
+import asyncio
+import json
+import os
+import re
+from dataclasses import dataclass, field
+from typing import Optional
+from dotenv import load_dotenv
+load_dotenv()
+try:
+    from transformers import AutoModelForCausalLM, AutoTokenizer
+    import torch
+    _LOCAL_INFERENCE_AVAILABLE = True
+except ImportError:
+    _LOCAL_INFERENCE_AVAILABLE = False
+from huggingface_hub import InferenceClient
+# =============================================================================
+# LLM Configuration
+# =============================================================================
+LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"
+_USE_LOCAL = os.getenv("USE_LOCAL_MODEL", "false").lower() in ("true", "1", "yes")
+_HF_MODEL_LOCAL = os.getenv("HF_MODEL", "Qwen/Qwen2.5-7B-Instruct")
+_hf_token = os.getenv("HF_TOKEN")
+if not _USE_LOCAL or not _LOCAL_INFERENCE_AVAILABLE:
+    if not _hf_token:
+        raise ValueError("HF_TOKEN not found. Set it in your .env file (or use USE_LOCAL_MODEL=true with transformers).")
+    LLM_CLIENT: Optional[InferenceClient] = InferenceClient(token=_hf_token)
+else:
+    LLM_CLIENT = None
+_local_tokenizer = None
+_local_model = None
+def _ensure_local_model() -> None:
+    global _local_tokenizer, _local_model
+    if _local_model is not None:
+        return
+    if not _LOCAL_INFERENCE_AVAILABLE or not _USE_LOCAL:
+        return
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    token_kw = {"token": _hf_token} if _hf_token else {}
+    if not _hf_token:
+        print("[INFO] No HF_TOKEN; gated models may fail. Set HF_TOKEN in .env for e.g. Gemma.")
+    _local_tokenizer = AutoTokenizer.from_pretrained(_HF_MODEL_LOCAL, **token_kw)
+    _local_model = AutoModelForCausalLM.from_pretrained(
+        _HF_MODEL_LOCAL,
+        torch_dtype=torch.float16 if device == "cuda" else torch.float32,
+        device_map="auto" if device == "cuda" else None,
+        **token_kw,
+    )
+    if device == "cpu":
+        _local_model = _local_model.to(device)
+    print(f"[INFO] Local model loaded: {_HF_MODEL_LOCAL} on {device}")
+def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 400) -> str:
+    """Call the LLM (API or local)."""
+    messages = [
+        {"role": "system", "content": system_prompt},
+        {"role": "user", "content": prompt},
+    ]
+    if _USE_LOCAL and _LOCAL_INFERENCE_AVAILABLE:
+        _ensure_local_model()
+        if _local_tokenizer is None or _local_model is None:
+            raise RuntimeError("Local model failed to load.")
+        if hasattr(_local_tokenizer, "apply_chat_template"):
+            formatted = _local_tokenizer.apply_chat_template(
+                messages, tokenize=False, add_generation_prompt=True
+            )
+        else:
+            formatted = f"{system_prompt}\n\nUser: {prompt}\n\nAssistant:"
+        # we add "THOUGHT:" to prime small models (Gemma) to output the expected format
+        formatted = formatted.rstrip() + "\nTHOUGHT:"
+        inputs = _local_tokenizer(formatted, return_tensors="pt")
+        model_device = next(_local_model.parameters()).device
+        inputs = {k: (v.to(model_device) if isinstance(v, torch.Tensor) else v) for k, v in inputs.items()}
+        with torch.no_grad():
+            gen_out = _local_model.generate(
+                **inputs,
+                max_new_tokens=max_tokens,
+                pad_token_id=_local_tokenizer.eos_token_id,
+                do_sample=False,
+            )
+        out_slice = gen_out[0][inputs["input_ids"].shape[1]:]
+        if out_slice.is_cuda:
+            out_slice = out_slice.cpu()
+        raw = _local_tokenizer.decode(out_slice, skip_special_tokens=True).strip()
+        if formatted.rstrip().endswith("THOUGHT:") and raw and not raw.upper().startswith("THOUGHT:"):
+            raw = "THOUGHT: " + raw
+        return raw
+    response = LLM_CLIENT.chat.completions.create(
+        model=LLM_MODEL,
+        messages=messages,
+        temperature=0.0,
+        max_tokens=max_tokens,
+        seed=seed,
+    )
+    return response.choices[0].message.content
+@dataclass
+class RunResult:
+    """Result of running the agent. Do not modify this class."""
+    final_score: int
+    max_score: int
+    moves: int
+    locations_visited: set[str]
+    game_completed: bool
+    error: Optional[str] = None
+    history: list[tuple[str, str, str]] = field(default_factory=list)
+# =============================================================================
+# Prompts: Agent, Critic, StrategyGen
+# =============================================================================
+AGENT_PROMPT = """You are an expert text adventure player. MAXIMIZE YOUR SCORE.
+AVAILABLE MCP TOOLS:
+- play_action: Execute game commands (north, take lamp, open mailbox, get up, etc.)
+- memory: Get current state from Z-machine
+- inventory: Get items from Z-machine
+- get_map: Explored locations
+CRITICAL: You MUST respond in this exact format (no markdown, no extra text):
+THOUGHT: <one sentence about what to do next>
+TOOL: play_action
+ARGS: {"action": "<command>"}
+Universal rules (apply to any text adventure):
+- If game says "get out of bed first" or "have to get up": try get up, stand
+- If "too dark" or "can't see": light lamp, take lamp
+- If "can't go that way": try different direction
+- If "don't understand": try simpler verb (look, examine, take X)
+- Explore directions (north, south, east, west). Take items. Do NOT repeat same action in a loop."""
+CRITIC_PROMPT = """You evaluate whether a proposed game action is good.
+Given: current observation, valid actions from Z-machine, proposed action.
+Score 0-1: 0=bad (invalid,重复, no progress), 1=good (valid, progresses).
+Respond in one line: SCORE: <0.0 to 1.0> REASON: <brief reason>
+If action is in valid_actions or is a common command (look, north, take X), score >= 0.6."""
+STRATEGY_PROMPT = """Analyze this gameplay history and extract 3-5 strategic insights.
+Format each as a short rule. Example: "In dark games, get lamp before exploring."
+Output only the insights, one per line."""
+# =============================================================================
+# StudentAgent: Full ZorkGPT-Lite Orchestrator
+# =============================================================================
+class StudentAgent:
+    """
+    Full orchestrator: Extractor (Z-machine) -> Agent -> Critic (Z-machine + LLM) -> Execute.
+    StrategyGen updates knowledge_base every 25 turns.
+    """
+    CRITIC_THRESHOLD = 0.5
+    MAX_CRITIC_RETRIES = 3
+    STRATEGY_UPDATE_INTERVAL = 12
+    VALID_ACTIONS_TIMEOUT = 0.8
+    def __init__(self):
+        self.history: list[dict] = []
+        self.recent_actions: list[str] = []
+        self.failed_actions: set[str] = set()  # we avoid repeating actions that failed
+        self.score: int = 0
+        self.max_score: int = 350
+        self.steps_without_score: int = 0
+        self.knowledge_base: str = "General: Explore, take items, use lamp before dark. Try get up if stuck. Try east/north when south fails."
+        self.seen_state_hashes: set[str] = set()
+    async def run(
+        self,
+        client,
+        game: str,
+        max_steps: int,
+        seed: int,
+        verbose: bool = False,
+    ) -> RunResult:
+        """Run the full orchestrator loop."""
+        locations_visited = set()
+        history = []
+        moves = 0
+        tool_names = [t.name for t in await client.list_tools()]
+        self.failed_actions = set()
+        self.steps_without_score = 0
+        # we get initial observation
+        result = await client.call_tool("play_action", {"action": "look"})
+        observation = self._extract_result(result)
+        loc = observation.split("\n")[0] if observation else "Unknown"
+        locations_visited.add(loc)
+        if verbose:
+            print(f"\n{observation}")
+        context = {}
+        for step in range(1, max_steps + 1):
+            # we extract context from Z-machine (no LLM)
+            context = await self._extract_context(client)
+            # we build agent prompt
+            prompt = self._build_agent_prompt(observation, context)
+            thought, tool_name, tool_args = "No reasoning", "play_action", {"action": "look"}
+            action = "look"
+            # we get action from Agent LM (max_tokens 250 for small models)
+            response = call_llm(prompt, AGENT_PROMPT, seed + step, max_tokens=250)
+            if not response.strip():
+                response = self._heuristic_action(observation)
+                if verbose:
+                    print(f"[DEBUG] LLM empty, heuristic: {response[:80]}")
+            thought, tool_name, tool_args = self._parse_response(response, tool_names)
+            tool_name, tool_args = self._validate_tool_call(tool_name, tool_args, tool_names)
+            if tool_name == "play_action":
+                action = tool_args.get("action", "look")
+            # we run Critic: fast check first; LLM only when uncertain (saves ~1 LLM call/step)
+            accepted = False
+            fast_ok = self._critic_fast_check(action, context.get("valid_actions", ""))
+            if fast_ok:
+                accepted = True
+            for attempt in range(self.MAX_CRITIC_RETRIES):
+                if accepted:
+                    break
+                critic_prompt = f"""Observation: {observation[:300]}
+Valid actions: {context.get('valid_actions', 'unknown')}
+Proposed: {action}
+Score and reason?"""
+                critic_resp = call_llm(critic_prompt, CRITIC_PROMPT, seed + step + attempt, max_tokens=80)
+                score = self._parse_critic_score(critic_resp)
+                if score >= self.CRITIC_THRESHOLD:
+                    accepted = True
+                    break
+                if attempt < self.MAX_CRITIC_RETRIES - 1:
+                    feedback = f"Action '{action}' rejected (score {score:.1f}). Try different."
+                    prompt = self._build_agent_prompt(observation, context, feedback)
+                    response = call_llm(prompt, AGENT_PROMPT, seed + step + attempt)
+                    thought, tool_name, tool_args = self._parse_response(response, tool_names)
+                    tool_name, tool_args = self._validate_tool_call(tool_name, tool_args, tool_names)
+                    if tool_name == "play_action":
+                        action = tool_args.get("action", "look")
+                else:
+                    accepted = True
+            # we loop detection: try result-based heuristic first, then generic verb cycle
+            if len(self.recent_actions) >= 3 and len(set(self.recent_actions[-3:])) == 1:
+                res = self._result_based_heuristic(observation)
+                if res is not None:
+                    action = res
+                else:
+                    action = self._generic_verb_cycle()
+                tool_args = {"action": action}
+                if verbose:
+                    print(f"[WARNING] Loop detected - trying '{action}' instead")
+            # we skip actions that recently failed
+            if action.lower() in self.failed_actions:
+                action = self._generic_verb_cycle()
+                tool_args = {"action": action}
+            # we prefer valid_actions when stuck (no score for many steps)
+            if self.steps_without_score >= 5 and context.get("valid_actions"):
+                va = context["valid_actions"].lower()
+                for cand in ["take all", "take lamp", "take keys", "open", "examine", "north", "east"]:
+                    if cand in va and cand not in self.failed_actions:
+                        if cand not in [a.lower() for a in self.recent_actions[-3:]]:
+                            action = cand
+                            tool_args = {"action": action}
+                            break
+            self.recent_actions.append(action)
+            if len(self.recent_actions) > 10:
+                self.recent_actions = self.recent_actions[-10:]
+            # we track failed actions (rejection, no movement, no score)
+            if self._is_failure_result(observation, action):
+                self.failed_actions.add(action.lower())
+            else:
+                self.failed_actions.discard(action.lower())
+            # we track score progress and reinforce what worked
+            old_score = self.score
+            self._update_score(observation)
+            if self.score > old_score:
+                self.steps_without_score = 0
+                if len(self.knowledge_base) < 800:
+                    self.knowledge_base = self.knowledge_base + f"\nScore: {action} worked."
+            else:
+                self.steps_without_score += 1
+            if verbose:
+                print(f"\n--- Step {step} ---")
+                print(f"[THOUGHT] {thought}")
+                print(f"[TOOL] {tool_name}({tool_args})")
+            # we execute
+            try:
+                result = await client.call_tool(tool_name, tool_args)
+                observation = self._extract_result(result)
+                moves += 1
+            except Exception as e:
+                observation = f"Error: {e}"
+                if verbose:
+                    print(f"[ERROR] {e}")
+            loc = observation.split("\n")[0] if observation else "Unknown"
+            locations_visited.add(loc)
+            self._update_score(observation)
+            history.append((thought, f"{tool_name}({tool_args})", observation[:100]))
+            self.history.append({"step": step, "thought": thought, "action": action, "result": observation[:200]})
+            if len(self.history) > 20:
+                self.history = self.history[-20:]
+            if verbose:
+                print(f"[RESULT] {observation[:200]}...")
+            # we update knowledge_base every N turns (StrategyGen)
+            if step % self.STRATEGY_UPDATE_INTERVAL == 0 and self.history:
+                strategy_hist = "\n".join([f"Step {h['step']}: {h['action']} -> {h['result'][:80]}" for h in self.history[-15:]])
+                strat_prompt = f"History:\n{strategy_hist}\n\nCurrent score: {self.score}\nExtract insights:"
+                try:
+                    insights = call_llm(strat_prompt, STRATEGY_PROMPT, seed + step, max_tokens=150)
+                    if insights.strip():
+                        self.knowledge_base = self.knowledge_base + "\n" + insights.strip()[:300]
+                except Exception:
+                    pass
+            if self._is_game_over(observation):
+                if verbose:
+                    print("\n*** GAME OVER ***")
+                break
+        return RunResult(
+            final_score=self.score,
+            max_score=self.max_score,
+            moves=moves,
+            locations_visited=locations_visited,
+            game_completed=self._is_game_over(observation),
+            history=history,
+        )
+    async def _extract_context(self, client) -> dict:
+        """Extractor: Z-machine data via MCP tools (no LLM)."""
+        ctx = {}
+        tools_to_try = [
+            ("memory", "memory"),
+            ("inventory", "inventory"),
+            ("get_map", "map"),
+        ]
+        for tool_name, key in tools_to_try:
+            try:
+                r = await client.call_tool(tool_name, {})
+                ctx[key] = self._extract_result(r)
+            except Exception:
+                ctx[key] = ""
+        # we skip get_valid_actions by default (can block on spacy); set USE_VALID_ACTIONS=true to enable
+        if os.getenv("USE_VALID_ACTIONS", "false").lower() in ("true", "1", "yes"):
+            try:
+                r = await asyncio.wait_for(
+                    client.call_tool("get_valid_actions", {}),
+                    timeout=self.VALID_ACTIONS_TIMEOUT,
+                )
+                ctx["valid_actions"] = self._extract_result(r)
+            except (asyncio.TimeoutError, Exception):
+                ctx["valid_actions"] = ""
+        else:
+            ctx["valid_actions"] = ""
+        return ctx
+    def _build_agent_prompt(self, observation: str, context: dict, feedback: str = "") -> str:
+        """Build agent prompt with context."""
+        parts = [f"Knowledge base:\n{self.knowledge_base[:500]}\n"]
+        parts.append(f"Current score: {self.score}")
+        if context.get("valid_actions"):
+            parts.append(f"\nValid actions (prefer these): {context['valid_actions'][:200]}")
+        if context.get("memory"):
+            parts.append(f"\nZ-machine state:\n{context['memory'][:350]}")
+        if context.get("map"):
+            parts.append(f"\nMap:\n{context['map'][:250]}")
+        if context.get("inventory"):
+            parts.append(f"\n{context['inventory']}")
+        if self.failed_actions:
+            parts.append(f"\nAvoid (recently failed): {', '.join(list(self.failed_actions)[:8])}")
+        if self.history:
+            parts.append("\nRecent:")
+            for h in self.history[-4:]:
+                parts.append(f"  > {h.get('action','?')} -> {h.get('result','')[:55]}...")
+        if feedback:
+            parts.append(f"\n[FEEDBACK] {feedback}")
+        parts.append(f"\nCurrent observation:\n{observation}")
+        parts.append("\nWhat do you do next?")
+        return "\n".join(parts)
+    def _critic_fast_check(self, action: str, valid_actions_str: str) -> bool:
+        """Fast validation: is action likely valid?"""
+        action_lower = action.lower().strip()
+        if valid_actions_str and "valid actions:" in valid_actions_str.lower():
+            va = valid_actions_str.lower()
+            if action_lower in va or any(action_lower.startswith(a.strip()) for a in va.split(",")[:20] if a.strip()):
+                return True
+            verb = action_lower.split()[0] if action_lower.split() else ""
+            if verb in ["look", "inventory", "north", "south", "east", "west", "take", "open", "examine"]:
+                return True
+        common = ["look", "inventory", "north", "south", "east", "west", "up", "down", "take", "drop", "open", "examine", "read", "get"]
+        if any(action_lower.startswith(c) for c in common):
+            return True
+        return True
+    def _parse_critic_score(self, resp: str) -> float:
+        """Parse critic score from response."""
+        m = re.search(r"SCORE:\s*([\d.]+)", resp, re.IGNORECASE)
+        if m:
+            try:
+                return float(m.group(1))
+            except ValueError:
+                pass
+        return 0.5
+    # =========================================================================
+    # Universal verb vocabulary (game-agnostic) per common_structure.md
+    # =========================================================================
+    # we cycle through these when no result-based pattern matches
+    UNIVERSAL_VERB_CYCLE = [
+        "look", "examine", "inventory",
+        "north", "south", "east", "west", "up", "down", "in", "out",
+        "take all", "take lamp", "take keys", "take wallet", "take phone", "take sword",
+        "open mailbox", "open door", "open", "open chest",
+        "get up", "stand", "rise", "wake",
+        "light lamp", "turn on lamp", "wear", "use", "read",
+    ]
+    def _result_based_heuristic(self, result_text: str) -> str | None:
+        """Game-agnostic heuristic from result text per common_structure.md."""
+        r = result_text.lower()
+        # we prioritize taking visible objects when room lists them (905, etc)
+        if "telephone" in r or ("phone" in r and "take phone" not in [a.lower() for a in self.recent_actions[-3:]]):
+            if "take phone" not in self.failed_actions:
+                return "take phone"
+        if "wallet" in r and "take wallet" not in self.failed_actions and "take wallet" not in [a.lower() for a in self.recent_actions[-3:]]:
+            return "take wallet"
+        if "keys" in r and "take keys" not in self.failed_actions and "take keys" not in [a.lower() for a in self.recent_actions[-3:]]:
+            return "take keys"
+        # prerequisite: get out of bed, have to get up
+        if "get out of bed" in r or "out of bed" in r or "have to get up" in r:
+            return "get up"
+        if "get up" in r and "have to" in r:
+            return "get up"
+        if "stand" in r and ("have to" in r or "must" in r):
+            return "stand"
+        # light: too dark, can't see
+        if "too dark" in r or "can't see" in r or "too dark to" in r:
+            for cmd in ["light lamp", "turn on lamp", "take lamp"]:
+                if cmd not in [a.lower() for a in self.recent_actions[-3:]]:
+                    return cmd
+            return "light lamp"
+        # movement block: wall, can't go that way
+        if "can't go" in r or "wall" in r or "can't go that way" in r or "too narrow" in r:
+            return None  # we let generic cycle pick next direction
+        # parser rejection: don't understand, can't
+        if "don't understand" in r or "i don't understand" in r:
+            return "look"
+        if "you can't" in r or "can't do that" in r:
+            return "examine"
+        # object: take X when objects mentioned (keys, wallet, lamp, etc)
+        common_objects = ["telephone", "phone", "keys", "wallet", "lamp", "sword", "treasure", "book", "rope", "knife", "chest", "dresser"]
+        for word in common_objects:
+            if word in r:
+                action_try = f"take {word}"
+                if word == "telephone":
+                    action_try = "take phone"
+                if action_try in self.failed_actions:
+                    continue
+                recent_lower = [a.lower() for a in self.recent_actions[-5:]]
+                if action_try not in recent_lower:
+                    return action_try
+        if "dresser" in r:
+            if "open dresser" not in [a.lower() for a in self.recent_actions[-3:]]:
+                return "open dresser"
+        for obj in self._extract_objects_from_room(result_text):
+            action_try = f"take {obj}"
+            if action_try in self.failed_actions:
+                continue
+            recent_lower = [a.lower() for a in self.recent_actions[-5:]]
+            if action_try not in recent_lower:
+                return action_try
+        if "mailbox" in r:
+            recent_lower = [a.lower() for a in self.recent_actions[-3:]]
+            if "open mailbox" not in recent_lower:
+                return "open mailbox"
+        if "open" in r and "closed" in r:
+            for word in ["door", "mailbox", "chest", "box"]:
+                if word in r:
+                    return f"open {word}"
+        if "open" in r and "door" in r:
+            return "open door"
+        # no such thing, I don't see
+        if "don't see" in r or "no such" in r or "can't see any" in r:
+            return "look"
+        # only go X (extract direction)
+        for d in ["north", "south", "east", "west"]:
+            if f"only go {d}" in r or f"only {d}" in r or f"can only go {d}" in r:
+                return d
+        # lostpig / general: south fails with "trouble" -> try east (forest)
+        if "get in big trouble" in r or "big trouble" in r:
+            south_count = sum(1 for a in self.recent_actions[-5:] if a.lower() == "south")
+            if south_count >= 2:
+                return "east"
+            return "north"
+        # forest dark / pig somewhere: try forest first, then try west/south when stuck
+        if "forest" in r and "dark" in r:
+            east_count = sum(1 for a in self.recent_actions[-6:] if a.lower() == "east")
+            north_count = sum(1 for a in self.recent_actions[-6:] if a.lower() == "north")
+            if east_count + north_count >= 4:
+                return "west"
+            if east_count < 2:
+                return "east"
+            return "north"
+        return None
+    def _extract_objects_from_room(self, text: str) -> list[str]:
+        """Extract object names from room description for take/examine."""
+        r = text.lower()
+        objects = []
+        # patterns: "there is a X", "you see X", "X and Y", "on the X are Y", "X, Y and Z"
+        for m in re.finditer(r"\b(there is|you see|are|on the \w+ are)\s+[a ]+(\w+)", r):
+            objects.append(m.group(2))
+        for m in re.finditer(r"\b(telephone|phone|wallet|keys|lamp|sword|book|rope|knife|chest|mailbox)\b", r):
+            objects.append(m.group(1))
+        return list(dict.fromkeys(objects))[:5]
+    def _generic_verb_cycle(self) -> str:
+        """Return next action from universal cycle, skipping failed actions."""
+        cycle = self.UNIVERSAL_VERB_CYCLE
+        start = 0
+        if self.recent_actions:
+            last = self.recent_actions[-1].lower()
+            idx = next((i for i, a in enumerate(cycle) if a == last), -1)
+            start = (idx + 1) % len(cycle)
+        for i in range(len(cycle)):
+            cand = cycle[(start + i) % len(cycle)]
+            if cand not in self.failed_actions:
+                return cand
+        return "look"
+    def _heuristic_action(self, observation: str) -> str:
+        """Heuristic when LLM empty: result-based first, then generic verb cycle."""
+        action = self._result_based_heuristic(observation)
+        if action is None:
+            action = self._generic_verb_cycle()
+        return f"THOUGHT: Try {action}.\nTOOL: play_action\nARGS: {{\"action\": \"{action}\"}}"
+    def _parse_response(self, response: str, valid_tools: list[str]) -> tuple[str, str, dict]:
+        """Parse LLM response; fallback to extracting action from raw text."""
+        thought = "No reasoning provided"
+        tool_name = "play_action"
+        tool_args = {"action": "look"}
+        for line in response.strip().split("\n"):
+            lc = line.strip()
+            lu = lc.upper()
+            if lu.startswith("THOUGHT:"):
+                thought = lc.split(":", 1)[1].strip() or thought
+            elif lu.startswith("TOOL:"):
+                raw = lc.split(":", 1)[1].strip().lower().replace("**", "").replace("*", "")
+                raw = raw.split()[0] if raw else "play_action"
+                tool_name = raw
+            elif lu.startswith("ARGS:"):
+                s = lc.split(":", 1)[1].strip().replace("'", '"')
+                try:
+                    tool_args = json.loads(s)
+                except json.JSONDecodeError:
+                    m = re.search(r'"action"\s*:\s*"([^"]+)"', s)
+                    if m:
+                        tool_args = {"action": m.group(1)}
+        # we fallback: if still "look", try to extract action from raw response
+        if tool_args.get("action", "look") == "look" and response.strip():
+            r = response.lower()
+            for cmd in ["east", "north", "south", "west", "inventory", "take all", "take lamp"]:
+                if cmd in r:
+                    tool_args = {"action": cmd}
+                    break
+        return thought, tool_name, tool_args
+    def _validate_tool_call(self, tool_name: str, tool_args: dict, valid_tools: list[str]) -> tuple[str, dict]:
+        """Validate and fix tool call."""
+        if tool_name not in valid_tools:
+            tool_name = "play_action"
+        if tool_name == "play_action":
+            action = tool_args.get("action", "look")
+            invalid = {"check": "examine", "inspect": "examine", "search": "look", "grab": "take", "pick": "take"}
+            words = action.lower().split()
+            if words and words[0] in invalid:
+                words[0] = invalid[words[0]]
+                action = " ".join(words)
+            action = action.lower().strip().replace("**", "").replace("*", "")
+            action = " ".join(action.split())
+            tool_args["action"] = action
+        return tool_name, tool_args
+    def _extract_result(self, result) -> str:
+        """Extract text from MCP result."""
+        if hasattr(result, "content") and result.content:
+            return result.content[0].text
+        if isinstance(result, list) and result:
+            return result[0].text if hasattr(result[0], "text") else str(result[0])
+        return str(result)
+    def _update_score(self, text: str) -> None:
+        """Update score from text."""
+        for pat in [r"Score:\s*(\d+)", r"\[Score:\s*(\d+)", r"Total:\s*(\d+)"]:
+            m = re.search(pat, text, re.IGNORECASE)
+            if m:
+                self.score = max(self.score, int(m.group(1)))
+                break
+    def _is_game_over(self, text: str) -> bool:
+        """Check game over."""
+        t = text.lower()
+        return any(p in t for p in ["game over", "you have died", "you are dead", "*** you have died ***"])
+    def _is_failure_result(self, result: str, action: str) -> bool:
+        """Check if result indicates action failed (rejection, no progress)."""
+        r = result.lower()
+        failure_phrases = [
+            "don't understand", "you can't", "can't do that", "can't go that way",
+            "there is no", "no such", "you'll have to", "have to get", "get out of bed first",
+            "verb error", "not recognized", "i don't see", "can't see any",
+        ]
+        if any(p in r for p in failure_phrases):
+            return True
+        if "get in big trouble" in r or "grunk get in big trouble" in r:
+            return True
+        return False
+async def test_agent():
+    """Test the agent locally."""
+    from fastmcp import Client
+    from fastmcp.client.transports import StdioTransport
+    import sys
+    from pathlib import Path
+    server_path = Path(__file__).parent / "mcp_server.py"
+    env = os.environ.copy()
+    env["GAME"] = "lostpig"
+    transport = StdioTransport(command=sys.executable, args=[str(server_path)], env=env)
+    agent = StudentAgent()
+    async with Client(transport) as client:
+        result = await agent.run(client=client, game="lostpig", max_steps=10, seed=42, verbose=True)
+    print(f"\nFinal: score={result.final_score}, moves={result.moves}")
+if __name__ == "__main__":
+    import asyncio
+    asyncio.run(test_agent())

app.py ADDED Viewed

	@@ -0,0 +1,71 @@

+"""
+Hugging Face Space - Text Adventure Agent Submission
+This is a code-only Space for submitting your agent implementation.
+The evaluation is run separately.
+Files in this submission:
+- agent.py: Your ReAct agent implementation
+- mcp_server.py: Your MCP server implementation
+- requirements.txt: Additional dependencies
+To test locally:
+    fastmcp dev mcp_server.py
+    python agent.py
+"""
+import gradio as gr
+from pathlib import Path
+def read_readme():
+    """Read the README content."""
+    readme_path = Path(__file__).parent / "README.md"
+    if readme_path.exists():
+        return readme_path.read_text()
+    return "# Submission\n\nNo README.md found."
+def read_file_content(filename: str) -> str:
+    """Read a source file's content."""
+    file_path = Path(__file__).parent / filename
+    if file_path.exists():
+        return file_path.read_text()
+    return f"# File not found: {filename}"
+# Create the Gradio interface
+with gr.Blocks(title="Text Adventure Agent Submission") as demo:
+    gr.Markdown("# Text Adventure Agent Submission")
+    gr.Markdown(
+        "This Space contains a student submission for the Text Adventure Agent assignment. "
+        "Use the tabs below to view the submitted code."
+    )
+    with gr.Tabs():
+        with gr.Tab("README"):
+            gr.Markdown(read_readme())
+        with gr.Tab("Agent Code"):
+            gr.Code(
+                value=read_file_content("agent.py"),
+                language="python",
+                label="agent.py",
+            )
+        with gr.Tab("MCP Server Code"):
+            gr.Code(
+                value=read_file_content("mcp_server.py"),
+                language="python",
+                label="mcp_server.py",
+            )
+    gr.Markdown(
+        "---\n"
+        "**Note:** This is a code submission Space. "
+        "Evaluation is performed using the evaluation script."
+    )
+if __name__ == "__main__":
+    demo.launch()

mcp_server.py ADDED Viewed

	@@ -0,0 +1,277 @@

+"""
+Student MCP Server for Text Adventure Games
+Full Z-machine integration via Jericho: inventory, location, score, moves,
+valid_actions, and state hash come directly from the Z-machine (no LLM parsing).
+"""
+import sys
+import os
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from fastmcp import FastMCP
+from games.zork_env import TextAdventureEnv
+# =============================================================================
+# Create the MCP Server
+# =============================================================================
+mcp = FastMCP("Student Text Adventure Server")
+# =============================================================================
+# Game State Management (Z-machine direct access via Jericho)
+# =============================================================================
+class GameManager:
+    """
+    Manages game state with direct Z-machine access through Jericho FrotzEnv.
+    All structured data (inventory, location, score, valid_actions) comes from
+    the Z-machine, not from text parsing.
+    """
+    def __init__(self):
+        self.env: TextAdventureEnv | None = None
+        self.state = None
+        self.game_name: str = ""
+        self.history: list[tuple[str, str]] = []
+        self.explored_locations: dict[str, set[str]] = {}
+    def initialize(self, game: str = "zork1"):
+        """Initialize or reset the game."""
+        self.game_name = game
+        self.env = TextAdventureEnv(game)
+        self.state = self.env.reset()
+        self.history = []
+        self.explored_locations = {}
+        return self.state.observation
+    def step(self, action: str) -> str:
+        """Execute an action and return the result."""
+        if self.env is None:
+            self.initialize(os.environ.get("GAME", "zork1"))
+        self.state = self.env.step(action)
+        self.history.append((action, self.state.observation))
+        if len(self.history) > 50:
+            self.history = self.history[-50:]
+        # we update map from Z-machine location (state.location is from get_player_location)
+        self._update_map(action)
+        return self.state.observation
+    def _update_map(self, action: str):
+        """Update explored locations from Z-machine state."""
+        if action.lower() in ("north", "south", "east", "west", "up", "down", "enter", "exit",
+                             "n", "s", "e", "w", "u", "d"):
+            prev_loc = self._get_location()
+            new_loc = self.state.location if hasattr(self.state, "location") else self._extract_location(self.state.observation)
+            if prev_loc not in self.explored_locations:
+                self.explored_locations[prev_loc] = set()
+            if new_loc != prev_loc:
+                self.explored_locations[prev_loc].add(f"{action} -> {new_loc}")
+    def _extract_location(self, observation: str) -> str:
+        """Fallback: extract location from first line of observation."""
+        lines = observation.strip().split("\n")
+        return lines[0] if lines else "Unknown"
+    def _get_location(self) -> str:
+        """Get current location from Z-machine (state.location) or fallback."""
+        if self.state and hasattr(self.state, "location") and self.state.location:
+            return self.state.location
+        if self.history:
+            return self._extract_location(self.history[-1][1])
+        return "Unknown"
+    def get_score(self) -> int:
+        """Get current score from Z-machine."""
+        return self.state.score if self.state else 0
+    def get_moves(self) -> int:
+        """Get number of moves from Z-machine."""
+        return self.state.moves if self.state else 0
+    def get_max_score(self) -> int:
+        """Get max possible score from Z-machine."""
+        if self.state and hasattr(self.state, "max_score"):
+            return self.state.max_score
+        try:
+            return self.env.env.get_max_score() if self.env else 0
+        except Exception:
+            return 0
+    def get_inventory_zmachine(self) -> list:
+        """Get inventory directly from Z-machine (list of objects)."""
+        try:
+            return [str(obj) for obj in self.env.env.get_inventory()]
+        except Exception:
+            return self.state.inventory if (self.state and hasattr(self.state, "inventory")) else []
+    def get_valid_actions_zmachine(self) -> list[str]:
+        """Get valid actions directly from Z-machine (object tree)."""
+        try:
+            return self.env.get_valid_actions()
+        except Exception:
+            return ["north", "south", "east", "west", "up", "down", "look", "inventory", "take all"]
+    def get_state_hash(self) -> str:
+        """Get world state hash from Z-machine for loop detection."""
+        try:
+            return str(self.env.env.get_state())
+        except Exception:
+            return ""
+    def get_player_location_zmachine(self) -> str:
+        """Get player location directly from Z-machine."""
+        try:
+            loc = self.env.env.get_player_location()
+            return str(loc) if loc else self._get_location()
+        except Exception:
+            return self._get_location()
+    def format_inventory(self, items: list) -> str:
+        """Format inventory items (clean Z-machine object names)."""
+        if not items:
+            return "Inventory: You are empty-handed."
+        names = []
+        for item in items:
+            s = str(item).lower()
+            if "parent" in s:
+                idx = s.index("parent")
+                name = str(item)[:idx].strip()
+                if ":" in name:
+                    name = name.split(":", 1)[1].strip()
+                names.append(name)
+            elif ":" in str(item):
+                names.append(str(item).split(":", 1)[1].strip())
+            else:
+                names.append(str(item))
+        return f"Inventory: {', '.join(names)}"
+    def get_memory(self) -> str:
+        """Get game state summary (location/score/moves from Z-machine)."""
+        recent = self.history[-5:] if self.history else []
+        recent_str = "\n".join([f"  > {a} -> {r[:60]}..." for a, r in recent]) if recent else "  (none yet)"
+        loc = self.get_player_location_zmachine()
+        return f"""Current State (Z-machine):
+- Location: {loc}
+- Score: {self.get_score()} / {self.get_max_score()} points
+- Moves: {self.get_moves()}
+- Game: {self.game_name}
+Recent Actions:
+{recent_str}
+Current Observation:
+{self.state.observation if self.state else 'N/A'}"""
+    def get_map(self) -> str:
+        """Get map of explored locations."""
+        if not self.explored_locations:
+            return "Map: No locations explored yet. Try moving around!"
+        lines = ["Explored Locations and Exits:"]
+        for loc, exits in sorted(self.explored_locations.items()):
+            lines.append(f"\n* {loc}")
+            for exit_info in sorted(exits):
+                lines.append(f"    -> {exit_info}")
+        lines.append(f"\n[Current] {self.get_player_location_zmachine()}")
+        return "\n".join(lines)
+# Global game manager
+_game: GameManager | None = None
+def get_game() -> GameManager:
+    """Get or initialize the game manager."""
+    global _game
+    if _game is None:
+        _game = GameManager()
+    if _game.env is None:
+        _game.initialize(os.environ.get("GAME", "zork1"))
+    return _game
+# =============================================================================
+# MCP Tools (all use Z-machine data where available)
+# =============================================================================
+@mcp.tool()
+def play_action(action: str) -> str:
+    """
+    Execute a game command and return the result.
+    Args:
+        action: The command to execute (e.g., "north", "take lamp", "open mailbox")
+    Returns:
+        The game's response to the action
+    """
+    game = get_game()
+    result = game.step(action)
+    score_info = f"\n\n[Score: {game.get_score()} | Moves: {game.get_moves()}]"
+    if game.state and game.state.reward > 0:
+        score_info = f"\n\n+{game.state.reward} points! (Total: {game.get_score()})"
+    done_info = "\n\nGAME OVER" if (game.state and game.state.done) else ""
+    return result + score_info + done_info
+@mcp.tool()
+def memory() -> str:
+    """
+    Get current game state summary (location, score, moves, recent history).
+    Location and score come from Z-machine directly.
+    """
+    return get_game().get_memory()
+@mcp.tool()
+def inventory() -> str:
+    """
+    Check what the player is carrying.
+    Data comes directly from Z-machine get_inventory().
+    """
+    game = get_game()
+    items = game.get_inventory_zmachine()
+    return game.format_inventory(items)
+@mcp.tool()
+def get_map() -> str:
+    """
+    Get a map of explored locations and connections.
+    """
+    return get_game().get_map()
+@mcp.tool()
+def get_valid_actions() -> str:
+    """
+    Get a list of valid actions from the Z-machine object tree.
+    Used by Critic for fast validation before LLM evaluation.
+    """
+    game = get_game()
+    try:
+        valid = game.get_valid_actions_zmachine()
+        return "Valid actions: " + ", ".join(valid[:30])
+    except Exception:
+        return "Could not get valid actions (spacy may be required)."
+@mcp.tool()
+def get_state_hash() -> str:
+    """
+    Get a hash of the current Z-machine world state for loop detection.
+    """
+    game = get_game()
+    h = game.get_state_hash()
+    return f"State hash: {h[:80]}..." if len(h) > 80 else f"State hash: {h}"
+# =============================================================================
+# Run the server
+# =============================================================================
+if __name__ == "__main__":
+    mcp.run()

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+# HF Spaces already has gradio and huggingface_hub pre-installed
+# Do not add them here or you may get version conflicts
+# Agent dependencies (these are provided by the evaluation infrastructure)
+# Do not add jericho, fastmcp here - they are installed during evaluation
+# Add any additional packages your agent needs below:
+# numpy
+# requests

run.sh ADDED Viewed

	@@ -0,0 +1,7 @@

+#!/usr/bin/env bash
+# we run from text-adventure-template (parent dir) so run_agent.py is found
+cd "$(dirname "$0")/.."
+USE_LOCAL_MODEL="${USE_LOCAL_MODEL:-true}"
+# usage: ./run.sh [game] [steps]
+# e.g. ./run.sh lostpig 15  or  ./run.sh zork1 20
+exec uv run python run_agent.py --agent submission_template --game "${1:-lostpig}" -v -n "${2:-15}"