test1

Sleeping

App Files Files Community

bouhss commited on Feb 22

Commit

0c8ceb9

verified ·

1 Parent(s): 615a63b

Update agent.py

Browse files

Files changed (1) hide show

agent.py +508 -227

agent.py CHANGED Viewed

@@ -1,33 +1,23 @@
 """
-Student Agent for Text Adventure Games
-This is your submission file. Implement the StudentAgent class to play
-text adventure games using the MCP server you also implement.
-Your agent should:
-1. Connect to the MCP server via the provided client
-2. Use the ReAct pattern (Thought -> Action -> Observation)
-3. Call MCP tools to interact with the game
-4. Maximize the game score within the step limit
-Required method:
-    async def run(self, client, game, max_steps, seed, verbose) -> RunResult
-The 'client' is a FastMCP Client already connected to your MCP server.
-Use it to call tools like: await client.call_tool("play_action", {"action": "look"})
-Tips:
-- Start by looking around and understanding your environment
-- Keep track of visited locations to avoid loops
-- Pick up useful items (lamp, sword, etc.)
-- The seed parameter should be used to set your LLM's seed for reproducibility
 """
 import json
 import os
 import re
 from dataclasses import dataclass, field
-from typing import Optional
 from dotenv import load_dotenv
 from huggingface_hub import InferenceClient
@@ -35,80 +25,29 @@ from huggingface_hub import InferenceClient
 # Load environment variables
 load_dotenv()
-# Set USE_LOCAL_MODEL=1 in your .env to use a locally downloaded model
-USE_LOCAL_MODEL = os.getenv("USE_LOCAL_MODEL", "0").strip() in ("1", "true", "yes")
-LOCAL_MODEL_ID = os.getenv("LOCAL_MODEL_ID", "Qwen/Qwen2.5-3B-Instruct")
 # =============================================================================
 # LLM Configuration - DO NOT MODIFY
 # =============================================================================
-# Model to use (fixed for fair evaluation)
 LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"
-# Initialize the LLM client based on mode
-_local_pipeline = None
-if USE_LOCAL_MODEL:
-    import torch
-    from transformers import pipeline as _hf_pipeline
-    _local_pipeline = _hf_pipeline(
-        "text-generation",
-        model=LOCAL_MODEL_ID,
-        torch_dtype=torch.bfloat16,
-        device_map="auto",
-    )
-    LLM_CLIENT = None
-else:
-    _hf_token = os.getenv("HF_TOKEN")
-    if not _hf_token:
-        raise ValueError("HF_TOKEN not found. Set it in your .env file.")
-    LLM_CLIENT = InferenceClient(token=_hf_token)
-def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str:
-    """
-    Call the LLM with the given prompt. Use this function in your agent.
-    Args:
-        prompt: The user prompt (current game state, history, etc.)
-        system_prompt: The system prompt (instructions for the agent)
-        seed: Random seed for reproducibility
-        max_tokens: Maximum tokens in response (default: 300)
-    Returns:
-        The LLM's response text
-    Example:
-        response = call_llm(
-            prompt="You are in a forest. What do you do?",
-            system_prompt=SYSTEM_PROMPT,
-            seed=42,
-        )
-    """
     messages = [
         {"role": "system", "content": system_prompt},
         {"role": "user", "content": prompt},
     ]
-    if USE_LOCAL_MODEL and _local_pipeline is not None:
-        outputs = _local_pipeline(
-            messages,
-            max_new_tokens=max_tokens,
-            temperature=0.0001,  # Near-deterministic (0.0 unsupported by some backends)
-            do_sample=True,
-        )
-        return outputs[0]["generated_text"][-1]["content"]
     response = LLM_CLIENT.chat.completions.create(
         model=LLM_MODEL,
         messages=messages,
-        temperature=0.0,  # Deterministic for reproducibility
         max_tokens=max_tokens,
         seed=seed,
     )
     return response.choices[0].message.content
@@ -125,181 +64,523 @@ class RunResult:
 # =============================================================================
-# System Prompt - Customize this for your agent
 # =============================================================================
-SYSTEM_PROMPT = """You are playing a classic text adventure game.
-GOAL: Explore the world, solve puzzles, and maximize your score.
-AVAILABLE TOOLS (use via MCP):
-- play_action: Execute a game command (north, take lamp, open mailbox, etc.)
-- memory: Get current game state and history (if implemented)
-- inventory: Check what you're carrying (if implemented)
-VALID GAME COMMANDS for play_action:
-- Movement: north, south, east, west, up, down, enter, exit
-- Objects: take <item>, drop <item>, open <thing>, close <thing>, examine <thing>
-- Other: look, inventory, read <thing>, turn on lamp
-RESPOND IN THIS EXACT FORMAT (no markdown):
-THOUGHT: <your reasoning about what to do next>
-TOOL: <tool_name>
-ARGS: <JSON arguments, e.g., {"action": "look"}>
-Example:
-THOUGHT: I should look around to see where I am.
-TOOL: play_action
-ARGS: {"action": "look"}
-"""
-# =============================================================================
-# Student Agent - IMPLEMENT THIS CLASS
-# =============================================================================
 class StudentAgent:
-    """
-    Your ReAct agent implementation.
-    TODO:
-    1. Implement the run() method with the ReAct loop
-    2. Parse LLM responses to extract tool calls
-    3. Track state and avoid loops
-    Use the provided call_llm() function to interact with the LLM.
-    """
     def __init__(self):
-        """Initialize your agent here."""
-        # TODO: Initialize any state tracking you need
-        # self.history = []
-        # self.visited_locations = set()
-        pass
-    async def run(
-        self,
-        client,  # FastMCP Client connected to your MCP server
-        game: str,
-        max_steps: int,
-        seed: int,
-        verbose: bool = False,
-    ) -> RunResult:
-        """
-        Run the agent for a game session.
-        Args:
-            client: FastMCP Client connected to your MCP server
-            game: Name of the game being played (e.g., "zork1")
-            max_steps: Maximum number of steps to take
-            seed: Random seed for reproducibility (use for LLM calls)
-            verbose: Whether to print detailed output
-        Returns:
-            RunResult with final score and statistics
-        """
-        # TODO: Implement your ReAct loop here
-        #
-        # Basic structure:
-        # 1. Get initial observation (call play_action with "look")
-        # 2. Loop for max_steps:
-        #    a. Build prompt with current observation and history
-        #    b. Call LLM to get thought and action
-        #    c. Parse the response to extract tool and args
-        #    d. Call the tool via client.call_tool(tool_name, args)
-        #    e. Update history and state
-        #    f. Check for game over
-        # 3. Return RunResult with final statistics
-        # Example of calling a tool:
-        # result = await client.call_tool("play_action", {"action": "look"})
-        # observation = result[0].text if result else "No response"
-        # Example of calling the LLM:
-        # response = call_llm(
-        #     prompt="Current observation: " + observation,
-        #     system_prompt=SYSTEM_PROMPT,
-        #     seed=seed,
-        # )
-        # Placeholder implementation - replace with your code
-        locations_visited = set()
-        history = []
-        final_score = 0
-        moves = 0
-        # TODO: Your implementation here
-        # ...
-        return RunResult(
-            final_score=final_score,
-            max_score=350,  # Zork1 max score, adjust if needed
-            moves=moves,
-            locations_visited=locations_visited,
-            game_completed=False,
-            history=history,
-        )
-    def _build_prompt(self, observation: str, history: list) -> str:
-        """
-        Build the prompt for the LLM.
-        TODO: Implement this to create effective prompts
-        """
-        # TODO: Combine system prompt, history, and current observation
-        pass
-    def _parse_response(self, response: str) -> tuple[str, str, dict]:
-        """
-        Parse LLM response to extract thought, tool name, and arguments.
-        TODO: Implement robust parsing
-        Returns:
-            Tuple of (thought, tool_name, args_dict)
-        """
-        # TODO: Parse the response format:
-        # THOUGHT: ...
-        # TOOL: ...
-        # ARGS: {...}
-        pass
-    def _call_llm(self, prompt: str, system_prompt: str, seed: int) -> str:
-        """
-        Call the LLM with the given prompt.
-        This is a convenience wrapper - you can also use call_llm() directly.
-        """
-        return call_llm(prompt, system_prompt, seed)
 # =============================================================================
-# For local testing
 # =============================================================================
 async def test_agent():
-    """Test the agent locally."""
     from fastmcp import Client
-    # Path to your MCP server
     server_path = "mcp_server.py"
     agent = StudentAgent()
     async with Client(server_path) as client:
         result = await agent.run(
             client=client,
-            game="zork1",
-            max_steps=10,
             seed=42,
             verbose=True,
         )
-        print(f"\nFinal Score: {result.final_score}")
         print(f"Moves: {result.moves}")
-        print(f"Locations: {result.locations_visited}")
 if __name__ == "__main__":
     import asyncio
-    asyncio.run(test_agent())

 """
+Student Agent for Text Adventure Games (Strong submission)
+Key ideas:
+- Deterministic & robust
+- Uses MCP tools if available:
+  - get_valid_actions: reduce invalid commands
+  - peek_action: simulate actions without committing (safe look-ahead)
+  - inventory / memory / get_map: optional extra context
+- Exploration + score oriented:
+  utility =  score_gain * big_weight + new_location_bonus - loop_penalty - stuck_penalty - death_penalty
+- LLM is used only as fallback, to choose among a candidate list.
 """
 import json
 import os
 import re
 from dataclasses import dataclass, field
+from typing import Optional, Any
+from collections import defaultdict, deque
 from dotenv import load_dotenv
 from huggingface_hub import InferenceClient
 # Load environment variables
 load_dotenv()
 # =============================================================================
 # LLM Configuration - DO NOT MODIFY
 # =============================================================================
 LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"
+_hf_token = os.getenv("HF_TOKEN")
+if not _hf_token:
+    raise ValueError("HF_TOKEN not found. Set it in your .env file.")
+LLM_CLIENT = InferenceClient(token=_hf_token)
+def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 220) -> str:
     messages = [
         {"role": "system", "content": system_prompt},
         {"role": "user", "content": prompt},
     ]
     response = LLM_CLIENT.chat.completions.create(
         model=LLM_MODEL,
         messages=messages,
+        temperature=0.0,
         max_tokens=max_tokens,
         seed=seed,
     )
     return response.choices[0].message.content
 # =============================================================================
+# LLM Prompt (fallback only)
 # =============================================================================
+SYSTEM_PROMPT = """You are an expert text-adventure agent.
+Goal: maximize score and explore new locations while avoiding loops.
+You MUST output EXACTLY:
+THOUGHT: ...
+TOOL: play_action
+ARGS: {"action": "<one candidate action>"}
+Rules:
+- Choose one action EXACTLY from the candidate list provided by the user.
+- Avoid repeating the same action if it failed before.
+- If darkness is mentioned, prioritize lamp actions if present in candidates.
+- No markdown, no extra text.
+"""
+MOVE_ACTIONS = ["north", "south", "east", "west", "up", "down", "enter", "exit"]
+MOVE_ALIASES = {"n": "north", "s": "south", "e": "east", "w": "west", "u": "up", "d": "down"}
+# avoid wasting steps on meta commands
+BAD_PREFIXES = ("save", "restore", "quit", "restart", "help", "verbose", "script", "unscript", "version")
+BAD_EXACT = {"wait", "z"}
 class StudentAgent:
     def __init__(self):
+        # parsed from banner
+        self.score = 0
+        self.max_score = 0
+        self.moves = 0
+        # exploration tracking
+        self.locations_visited: set[str] = set()
+        self.last_location = "Unknown"
+        self.edges = defaultdict(dict)  # edges[loc][move] = new_loc
+        # loop avoidance
+        self.tried = defaultdict(int)   # tried[(loc, action)] += 1
+        self.recent_actions = deque(maxlen=10)
+        self.recent_obs = deque(maxlen=6)
+        # cached valid actions by location
+        self.valid_actions_cache = {}  # loc -> list[str]
+    # ---------------------------------------------------------------------
+    # Main run loop
+    # ---------------------------------------------------------------------
+    async def run(self, client, game: str, max_steps: int, seed: int, verbose: bool = False) -> RunResult:
+        history: list[tuple[str, str, str]] = []
+        try:
+            tools = await client.list_tools()
+            tool_names = {t.name for t in tools}
+            def has(tname: str) -> bool:
+                return tname in tool_names
+            # initial observation
+            obs = await self._call_tool_text(client, "play_action", {"action": "look"})
+            self._update_from_text(obs)
+            self.last_location = self._extract_location(obs)
+            self.locations_visited.add(self.last_location)
+            if verbose:
+                print(obs)
+            for step in range(1, max_steps + 1):
+                loc = self._extract_location(obs)
+                self.last_location = loc
+                self.locations_visited.add(loc)
+                stuck = self._is_stuck(obs)
+                # refresh valid actions periodically or when stuck/new location
+                valid_actions = self.valid_actions_cache.get(loc, [])
+                if has("get_valid_actions") and (stuck or not valid_actions or step % 6 == 0):
+                    va_txt = await self._call_tool_text(client, "get_valid_actions", {"limit": 60})
+                    valid_actions = self._parse_valid_actions(va_txt)
+                    if valid_actions:
+                        self.valid_actions_cache[loc] = valid_actions
+                # optional inventory
+                inv_txt = ""
+                if has("inventory") and (stuck or step % 8 == 0 or step == 1):
+                    inv_txt = await self._call_tool_text(client, "inventory", {})
+                # build candidates
+                candidates = self._make_candidates(obs, inv_txt, valid_actions, loc)
+                # decide action
+                action = None
+                thought = ""
+                if has("peek_action") and candidates:
+                    action, thought = await self._choose_by_lookahead(
+                        client=client,
+                        loc=loc,
+                        obs=obs,
+                        candidates=candidates,
+                        seed=seed,
+                        step=step,
+                        verbose=verbose,
+                    )
+                if not action:
+                    action, thought = await self._choose_without_peek(
+                        obs=obs,
+                        inv_txt=inv_txt,
+                        candidates=candidates,
+                        seed=seed,
+                        step=step,
+                    )
+                action = self._normalize_action(action or "look")
+                # commit the action
+                obs2 = await self._call_tool_text(client, "play_action", {"action": action})
+                # update map edges if movement changed location
+                new_loc = self._extract_location(obs2)
+                if action.lower() in MOVE_ACTIONS and new_loc and new_loc != loc:
+                    self.edges[loc][action.lower()] = new_loc
+                # bookkeeping
+                self.tried[(loc, action.lower())] += 1
+                self.recent_actions.append(action.lower())
+                self.recent_obs.append((obs2 or "")[:220])
+                self._update_from_text(obs2)
+                history.append((thought, f"play_action({action})", (obs2 or "")[:250]))
+                if verbose:
+                    print(f"\n--- step {step} ---")
+                    print(f"THOUGHT: {thought}")
+                    print(f"ACTION: {action}")
+                    print(obs2)
+                obs = obs2
+                if self._is_game_over(obs):
+                    break
+            return RunResult(
+                final_score=self.score,
+                max_score=self.max_score,
+                moves=self.moves,
+                locations_visited=set(self.locations_visited),
+                game_completed=self._is_game_over(obs),
+                history=history,
+            )
+        except Exception as e:
+            return RunResult(
+                final_score=self.score,
+                max_score=self.max_score,
+                moves=self.moves,
+                locations_visited=set(self.locations_visited),
+                game_completed=False,
+                error=f"{type(e).__name__}: {e}",
+                history=history,
+            )
+    # ---------------------------------------------------------------------
+    # Tool / text helpers
+    # ---------------------------------------------------------------------
+    async def _call_tool_text(self, client, tool: str, args: dict) -> str:
+        result = await client.call_tool(tool, args)
+        return self._extract_text(result)
+    def _extract_text(self, result: Any) -> str:
+        if result is None:
+            return ""
+        if isinstance(result, list) and result:
+            part = result[0]
+            if hasattr(part, "text"):
+                return part.text or ""
+            if isinstance(part, dict) and "text" in part:
+                return part["text"] or ""
+            return str(part)
+        return str(result)
+    def _extract_location(self, text: str) -> str:
+        if not text:
+            return "Unknown"
+        for line in text.splitlines():
+            line = line.strip()
+            if not line:
+                continue
+            if line.startswith("[Score:"):
+                continue
+            return line
+        return "Unknown"
+    def _update_from_text(self, text: str) -> None:
+        # parse banner: [Score: x/y | Moves: n]
+        if not text:
+            return
+        m = re.search(r"\[Score:\s*(\d+)\s*/\s*(\d+)\s*\|\s*Moves:\s*(\d+)\s*\]", text)
+        if m:
+            self.score = int(m.group(1))
+            self.max_score = int(m.group(2))
+            self.moves = int(m.group(3))
+    def _parse_valid_actions(self, txt: str) -> list[str]:
+        if not txt:
+            return []
+        actions = []
+        for line in txt.splitlines():
+            line = line.strip()
+            if line.startswith("- "):
+                a = line[2:].strip()
+                a = self._normalize_action(a)
+                low = a.lower()
+                if not a:
+                    continue
+                if low.startswith(BAD_PREFIXES) or low in BAD_EXACT:
+                    continue
+                actions.append(a)
+        # dedup keep order
+        seen = set()
+        out = []
+        for a in actions:
+            if a.lower() not in seen:
+                seen.add(a.lower())
+                out.append(a)
+        return out
+    def _normalize_action(self, action: str) -> str:
+        a = (action or "").strip()
+        low = a.lower()
+        if low in MOVE_ALIASES:
+            return MOVE_ALIASES[low]
+        return a
+    def _is_game_over(self, text: str) -> bool:
+        t = (text or "").lower()
+        return ("game over" in t) or ("you have died" in t) or ("you are dead" in t)
+    def _is_stuck(self, text: str) -> bool:
+        t = (text or "").lower()
+        bad = [
+            "i don't understand",
+            "you can't go that way",
+            "that's not a verb",
+            "not a word i know",
+            "nothing happens",
+            "you can't",
+            "can't do that",
+        ]
+        rep = len(self.recent_obs) >= 3 and all(self.recent_obs[-1] == x for x in list(self.recent_obs)[-3:])
+        return any(b in t for b in bad) or rep
+    # ---------------------------------------------------------------------
+    # Candidate generation
+    # ---------------------------------------------------------------------
+    def _make_candidates(self, obs: str, inv_txt: str, valid_actions: list[str], loc: str) -> list[str]:
+        obs_l = (obs or "").lower()
+        inv_l = (inv_txt or "").lower()
+        candidates = []
+        seen = set()
+        def add(a: str):
+            a = self._normalize_action(a)
+            if not a:
+                return
+            low = a.lower()
+            if low.startswith(BAD_PREFIXES) or low in BAD_EXACT:
+                return
+            if low not in seen:
+                seen.add(low)
+                candidates.append(a)
+        # always safe
+        add("look")
+        # darkness handling
+        if "dark" in obs_l:
+            if "lamp" in obs_l or "lamp" in inv_l:
+                add("take lamp")
+                add("turn on lamp")
+        # split valid actions into move vs object
+        move_list = []
+        obj_list = []
+        for a in valid_actions or []:
+            low = a.lower()
+            if low in MOVE_ACTIONS:
+                move_list.append(a)
+            else:
+                obj_list.append(a)
+        # prioritize untried moves from this location
+        def move_key(m: str):
+            return (self.tried[(loc, m.lower())], 0 if m.lower() not in self.edges.get(loc, {}) else 1)
+        for m in sorted(set(move_list), key=move_key):
+            add(m)
+        # if no valid moves known, still try generic moves
+        if not move_list:
+            for m in MOVE_ACTIONS:
+                add(m)
+        # prioritize object actions that often give score
+        scorey_prefixes = ("take ", "get ", "open ", "read ", "examine ", "look at ", "turn on ", "unlock ", "insert ")
+        for a in obj_list:
+            if a.lower().startswith(scorey_prefixes):
+                add(a)
+        # then the rest (limited)
+        for a in obj_list:
+            add(a)
+            if len(candidates) >= 22:
+                break
+        # small generic probes (often good across games)
+        add("take all")
+        add("inventory")
+        # remove actions repeated too much recently
+        cleaned = []
+        for a in candidates:
+            if list(self.recent_actions).count(a.lower()) >= 3:
+                continue
+            cleaned.append(a)
+        return cleaned[:20]
+    # ---------------------------------------------------------------------
+    # Decision: look-ahead
+    # ---------------------------------------------------------------------
+    async def _choose_by_lookahead(self, client, loc: str, obs: str, candidates: list[str], seed: int, step: int, verbose: bool):
+        base_score = self.score
+        base_loc = loc
+        # prioritize a shortlist for speed
+        priority = []
+        for a in candidates:
+            low = a.lower()
+            is_move = low in MOVE_ACTIONS
+            is_obj = low.startswith(("take ", "get ", "open ", "read ", "examine ", "turn on ", "unlock "))
+            tried = self.tried[(loc, low)]
+            priority.append((tried, 0 if is_obj else 1, 0 if is_move else 1, low, a))
+        priority.sort()
+        shortlist = [x[-1] for x in priority][:10]  # evaluate at most 10
+        best_a = None
+        best_u = -10**18
+        best_th = ""
+        for a in shortlist:
+            low = a.lower()
+            if self.tried[(loc, low)] >= 4:
+                continue
+            peek = await self._call_tool_text(client, "peek_action", {"action": a})
+            peek_l = (peek or "").lower()
+            if self._is_game_over(peek) or "you have died" in peek_l:
+                u = -1_000_000_000
+            else:
+                s_after, mx_after, mv_after = self._parse_banner(peek, fallback_score=base_score)
+                delta = max(0, s_after - base_score)
+                new_loc = self._extract_location(peek)
+                changed = (new_loc and new_loc != base_loc)
+                new_loc_bonus = 250 if (changed and new_loc not in self.locations_visited) else 0
+                changed_bonus = 40 if changed else 0
+                loop_pen = 80 * list(self.recent_actions).count(low)
+                stuck_pen = 160 if self._is_stuck(peek) else 0
+                # MAIN utility
+                u = delta * 900 + new_loc_bonus + changed_bonus - loop_pen - stuck_pen
+                # small preference: if darkness, lamp actions
+                if "dark" in (obs or "").lower() and ("lamp" in low):
+                    u += 120
+            if u > best_u:
+                best_u = u
+                best_a = a
+                best_th = f"Look-ahead chose '{a}' (utility={u})."
+        if best_a is None or best_u < -10000:
+            return None, "Look-ahead found no good action; fallback."
+        return best_a, best_th
+    def _parse_banner(self, text: str, fallback_score: int):
+        score = fallback_score
+        mx = self.max_score
+        mv = self.moves
+        if not text:
+            return score, mx, mv
+        m = re.search(r"\[Score:\s*(\d+)\s*/\s*(\d+)\s*\|\s*Moves:\s*(\d+)\s*\]", text)
+        if m:
+            return int(m.group(1)), int(m.group(2)), int(m.group(3))
+        return score, mx, mv
+    # ---------------------------------------------------------------------
+    # Decision: no peek => heuristic then LLM fallback among candidates
+    # ---------------------------------------------------------------------
+    async def _choose_without_peek(self, obs: str, inv_txt: str, candidates: list[str], seed: int, step: int):
+        loc = self._extract_location(obs)
+        # heuristic: try an untried move
+        for m in MOVE_ACTIONS:
+            if m in [c.lower() for c in candidates] and self.tried[(loc, m)] == 0:
+                return m, "Heuristic: try an untried move to explore."
+        # heuristic: try untried "take/get/open/read/examine"
+        for a in candidates:
+            low = a.lower()
+            if low.startswith(("take ", "get ", "open ", "read ", "examine ", "turn on ")):
+                if self.tried[(loc, low)] == 0:
+                    return a, "Heuristic: try a promising object interaction."
+        # LLM fallback: choose from candidate list exactly
+        if not candidates:
+            return "look", "No candidates; fallback to look."
+        cand = candidates[:10]
+        prompt = self._build_llm_prompt(obs, inv_txt, cand)
+        resp = call_llm(prompt, SYSTEM_PROMPT, seed + step, max_tokens=180)
+        thought, tool, args = self._parse_response(resp)
+        a = self._normalize_action(str(args.get("action", "")).strip())
+        # force action to be in candidate list
+        canon = {x.lower(): x for x in cand}
+        if a.lower() in canon:
+            return canon[a.lower()], thought or "LLM chose a candidate."
+        return cand[0], "LLM invalid; fallback to first candidate."
+    def _build_llm_prompt(self, obs: str, inv_txt: str, candidates: list[str]) -> str:
+        obs = (obs or "").strip()[:1100]
+        inv_txt = (inv_txt or "").strip()[:350]
+        lines = [
+            f"Score: {self.score}/{self.max_score} | Moves: {self.moves}",
+            f"Location guess: {self.last_location}",
+        ]
+        if inv_txt:
+            lines.append(f"Inventory:\n{inv_txt}")
+        if self.recent_actions:
+            lines.append("Recent actions: " + ", ".join(list(self.recent_actions)[-6:]))
+        lines.append("\nCurrent observation:\n" + obs)
+        lines.append("\nCandidate actions (choose exactly one):")
+        for a in candidates:
+            lines.append(f"- {a}")
+        lines.append("\nOutput TOOL=play_action and ARGS with one candidate action.")
+        return "\n".join(lines)
+    def _parse_response(self, response: str):
+        thought = ""
+        tool = "play_action"
+        args = {"action": "look"}
+        if not response:
+            return thought, tool, args
+        m = re.search(r"(?im)^\s*THOUGHT\s*:\s*(.+)$", response)
+        if m:
+            thought = m.group(1).strip()
+        m = re.search(r"(?im)^\s*TOOL\s*:\s*([a-zA-Z0-9_]+)\s*$", response)
+        if m:
+            tool = m.group(1).strip()
+        m = re.search(r"(?is)^\s*ARGS\s*:\s*(\{.*\})\s*$", response)
+        if m:
+            raw = m.group(1).strip()
+            try:
+                args = json.loads(raw)
+            except Exception:
+                raw2 = raw.replace("'", '"')
+                raw2 = re.sub(r",\s*}", "}", raw2)
+                try:
+                    args = json.loads(raw2)
+                except Exception:
+                    args = {"action": "look"}
+        if not isinstance(args, dict):
+            args = {"action": "look"}
+        return thought, tool, args
 # =============================================================================
+# Local testing
 # =============================================================================
 async def test_agent():
     from fastmcp import Client
     server_path = "mcp_server.py"
     agent = StudentAgent()
     async with Client(server_path) as client:
         result = await agent.run(
             client=client,
+            game="lostpig",
+            max_steps=20,
             seed=42,
             verbose=True,
         )
+        print(f"\nFinal Score: {result.final_score}/{result.max_score}")
         print(f"Moves: {result.moves}")
+        print(f"Locations visited: {len(result.locations_visited)}")
 if __name__ == "__main__":
     import asyncio
+    asyncio.run(test_agent())