text-adventure-template

Sleeping

App Files Files Community

Hugo PERCOT commited on Feb 19

Commit

a9ad18c

1 Parent(s): 615a63b

v1

Browse files

Files changed (3) hide show

README.md +16 -4
agent.py +482 -81
mcp_server.py +242 -52

README.md CHANGED Viewed

@@ -18,11 +18,23 @@ This is my submission for the Text Adventure Agent assignment. My agent uses the
 ## Approach
-<!-- Describe your approach here -->
-- What strategy does your agent use?
-- What tools did you implement in your MCP server?
-- Any interesting techniques or optimizations?
 ## Files

 ## Approach
+- Strategy: score-first ReAct with explicit anti-loop controls and exploration frontier bias.
+- The agent tracks repeated `(location, action)` pairs and loop signals, then overrides low-yield repeated actions with unexplored movement.
+- Prompting includes compact recent history, score/move/loop diagnostics, inventory snapshot, and map frontier hints.
+- Tool usage is constrained so `play_action` dominates turns; planning tools are used mainly when stagnation appears.
+Implemented MCP tools in `mcp_server.py`:
+- `play_action(action)` executes commands and appends score + loop diagnostics.
+- `memory()` returns state summary, recent history, and notes.
+- `inventory()` returns current inventory without consuming a move.
+- `get_map()` returns explored transitions and untried frontier directions.
+- `get_stats()` returns compact JSON-like state used by the agent for robust tracking.
+- `remember(key, value)` / `recall(key)` provide persistent note memory for clues.
+Interesting optimizations:
+- Stagnation-aware action override (`no_progress_streak`, repeated actions, same-location streak).
+- Frontier extraction from map snapshots for systematic exploration.
+- Robust parser/validator for LLM tool calls (malformed JSON and tool alias handling).
 ## Files

agent.py CHANGED Viewed

@@ -26,7 +26,9 @@ Tips:
 import json
 import os
 import re
 from dataclasses import dataclass, field
 from typing import Optional
 from dotenv import load_dotenv
@@ -35,36 +37,23 @@ from huggingface_hub import InferenceClient
 # Load environment variables
 load_dotenv()
-# Set USE_LOCAL_MODEL=1 in your .env to use a locally downloaded model
-USE_LOCAL_MODEL = os.getenv("USE_LOCAL_MODEL", "0").strip() in ("1", "true", "yes")
-LOCAL_MODEL_ID = os.getenv("LOCAL_MODEL_ID", "Qwen/Qwen2.5-3B-Instruct")
 # =============================================================================
 # LLM Configuration - DO NOT MODIFY
 # =============================================================================
-# Model to use (fixed for fair evaluation)
-LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"
-# Initialize the LLM client based on mode
-_local_pipeline = None
-if USE_LOCAL_MODEL:
-    import torch
-    from transformers import pipeline as _hf_pipeline
-    _local_pipeline = _hf_pipeline(
-        "text-generation",
-        model=LOCAL_MODEL_ID,
-        torch_dtype=torch.bfloat16,
-        device_map="auto",
-    )
-    LLM_CLIENT = None
-else:
-    _hf_token = os.getenv("HF_TOKEN")
-    if not _hf_token:
-        raise ValueError("HF_TOKEN not found. Set it in your .env file.")
-    LLM_CLIENT = InferenceClient(token=_hf_token)
 def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str:
@@ -92,14 +81,30 @@ def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300)
         {"role": "user", "content": prompt},
     ]
-    if USE_LOCAL_MODEL and _local_pipeline is not None:
-        outputs = _local_pipeline(
-            messages,
-            max_new_tokens=max_tokens,
-            temperature=0.0001,  # Near-deterministic (0.0 unsupported by some backends)
-            do_sample=True,
         )
-        return outputs[0]["generated_text"][-1]["content"]
     response = LLM_CLIENT.chat.completions.create(
         model=LLM_MODEL,
@@ -134,8 +139,12 @@ GOAL: Explore the world, solve puzzles, and maximize your score.
 AVAILABLE TOOLS (use via MCP):
 - play_action: Execute a game command (north, take lamp, open mailbox, etc.)
-- memory: Get current game state and history (if implemented)
-- inventory: Check what you're carrying (if implemented)
 VALID GAME COMMANDS for play_action:
 - Movement: north, south, east, west, up, down, enter, exit
@@ -151,6 +160,12 @@ Example:
 THOUGHT: I should look around to see where I am.
 TOOL: play_action
 ARGS: {"action": "look"}
 """
@@ -172,10 +187,16 @@ class StudentAgent:
     def __init__(self):
         """Initialize your agent here."""
-        # TODO: Initialize any state tracking you need
-        # self.history = []
-        # self.visited_locations = set()
-        pass
     async def run(
         self,
@@ -198,71 +219,255 @@ class StudentAgent:
         Returns:
             RunResult with final score and statistics
         """
-        # TODO: Implement your ReAct loop here
-        #
-        # Basic structure:
-        # 1. Get initial observation (call play_action with "look")
-        # 2. Loop for max_steps:
-        #    a. Build prompt with current observation and history
-        #    b. Call LLM to get thought and action
-        #    c. Parse the response to extract tool and args
-        #    d. Call the tool via client.call_tool(tool_name, args)
-        #    e. Update history and state
-        #    f. Check for game over
-        # 3. Return RunResult with final statistics
-        # Example of calling a tool:
-        # result = await client.call_tool("play_action", {"action": "look"})
-        # observation = result[0].text if result else "No response"
-        # Example of calling the LLM:
-        # response = call_llm(
-        #     prompt="Current observation: " + observation,
-        #     system_prompt=SYSTEM_PROMPT,
-        #     seed=seed,
-        # )
-        # Placeholder implementation - replace with your code
         locations_visited = set()
-        history = []
         final_score = 0
         moves = 0
-        # TODO: Your implementation here
-        # ...
         return RunResult(
             final_score=final_score,
-            max_score=350,  # Zork1 max score, adjust if needed
             moves=moves,
             locations_visited=locations_visited,
-            game_completed=False,
             history=history,
         )
-    def _build_prompt(self, observation: str, history: list) -> str:
         """
         Build the prompt for the LLM.
-        TODO: Implement this to create effective prompts
         """
-        # TODO: Combine system prompt, history, and current observation
-        pass
     def _parse_response(self, response: str) -> tuple[str, str, dict]:
         """
         Parse LLM response to extract thought, tool name, and arguments.
-        TODO: Implement robust parsing
         Returns:
             Tuple of (thought, tool_name, args_dict)
         """
-        # TODO: Parse the response format:
-        # THOUGHT: ...
-        # TOOL: ...
-        # ARGS: {...}
-        pass
     def _call_llm(self, prompt: str, system_prompt: str, seed: int) -> str:
         """
@@ -272,6 +477,202 @@ class StudentAgent:
         """
         return call_llm(prompt, system_prompt, seed)
 # =============================================================================
 # For local testing

 import json
 import os
 import re
+import urllib.request
 from dataclasses import dataclass, field
+from collections import deque
 from typing import Optional
 from dotenv import load_dotenv
 # Load environment variables
 load_dotenv()
 # =============================================================================
 # LLM Configuration - DO NOT MODIFY
 # =============================================================================
+# Backend selection
+LLM_BACKEND = os.getenv("LLM_BACKEND", "hf").lower()
+# HF model (default backend)
+LLM_MODEL = os.getenv("HF_MODEL", "Qwen/Qwen2.5-72B-Instruct")
+# Ollama model (local backend)
+OLLAMA_MODEL = os.getenv("OLLAMA_MODEL", "qwen2.5:3b")
+OLLAMA_URL = os.getenv("OLLAMA_URL", "http://127.0.0.1:11434/api/chat")
+# Initialize the LLM client (uses HF_TOKEN from environment)
+_hf_token = os.getenv("HF_TOKEN")
+LLM_CLIENT = InferenceClient(token=_hf_token) if _hf_token else None
 def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str:
         {"role": "user", "content": prompt},
     ]
+    if LLM_BACKEND == "ollama":
+        payload = {
+            "model": OLLAMA_MODEL,
+            "messages": messages,
+            "stream": False,
+            "options": {
+                "temperature": 0.0,
+                "num_predict": max_tokens,
+                "seed": seed,
+            },
+        }
+        req = urllib.request.Request(
+            OLLAMA_URL,
+            data=json.dumps(payload).encode("utf-8"),
+            headers={"Content-Type": "application/json"},
+            method="POST",
         )
+        with urllib.request.urlopen(req, timeout=120) as response:
+            body = response.read().decode("utf-8")
+        parsed = json.loads(body)
+        return parsed.get("message", {}).get("content", "")
+    if not LLM_CLIENT:
+        raise ValueError("HF_TOKEN not found. Set it in your .env file or use LLM_BACKEND=ollama.")
     response = LLM_CLIENT.chat.completions.create(
         model=LLM_MODEL,
 AVAILABLE TOOLS (use via MCP):
 - play_action: Execute a game command (north, take lamp, open mailbox, etc.)
+- memory: Get state summary + recent history + loop diagnostics
+- inventory: Check what you're carrying
+- get_map: Get explored locations and frontier directions
+- get_stats: Get compact state JSON (score, moves, done, loop signals)
+- remember: Save a short note as key/value
+- recall: Retrieve saved notes
 VALID GAME COMMANDS for play_action:
 - Movement: north, south, east, west, up, down, enter, exit
 THOUGHT: I should look around to see where I am.
 TOOL: play_action
 ARGS: {"action": "look"}
+POLICY:
+1) Prefer play_action on most turns.
+2) If score/reward is stagnant or location repeats, prioritize unexplored movement/frontier.
+3) Avoid repeating the same action in the same location unless new evidence appears.
+4) Use memory/get_map/get_stats only when needed to break uncertainty.
 """
     def __init__(self):
         """Initialize your agent here."""
+        self.history: list[dict] = []
+        self.recent_actions: deque[str] = deque(maxlen=8)
+        self.location_action_counts: dict[tuple[str, str], int] = {}
+        self.score: int = 0
+        self.max_score: int = 350
+        self.last_observation: str = ""
+        self.non_play_streak: int = 0
+        self.cached_map: str = ""
+        self.cached_inventory: str = ""
+        self.note_counter: int = 0
     async def run(
         self,
         Returns:
             RunResult with final score and statistics
         """
         locations_visited = set()
+        history: list[tuple[str, str, str]] = []
         final_score = 0
         moves = 0
+        game_completed = False
+        error = None
+        print(f"Starting game '{game}' with seed {seed}, using LLM model '{LLM_MODEL}'.")
+        try:
+            tools = await client.list_tools()
+            tool_names = {t.name for t in tools}
+            async def call_tool(tool: str, args: dict) -> str:
+                result = await client.call_tool(tool, args)
+                return self._extract_result(result)
+            observation = await call_tool("play_action", {"action": "look"})
+            self.last_observation = observation
+            stats = await self._get_stats(client, tool_names)
+            self._update_state_from_stats(stats)
+            location = stats.get("location") or self._extract_location(observation)
+            if location:
+                locations_visited.add(location)
+            if "get_map" in tool_names:
+                self.cached_map = await call_tool("get_map", {})
+            if "inventory" in tool_names:
+                self.cached_inventory = await call_tool("inventory", {})
+            if verbose:
+                print(f"\n{observation}")
+            print(max_steps)
+            for step in range(1, max_steps + 1):
+                location = stats.get("location") or self._extract_location(observation)
+                no_progress = int(stats.get("no_progress_streak", 0) or 0)
+                if "get_map" in tool_names and (step % 6 == 0 or no_progress >= 3):
+                    self.cached_map = await call_tool("get_map", {})
+                if "inventory" in tool_names and step % 12 == 0:
+                    self.cached_inventory = await call_tool("inventory", {})
+                prompt = self._build_prompt(
+                    observation=observation,
+                    location=location,
+                    step=step,
+                    max_steps=max_steps,
+                    stats=stats,
+                    map_snapshot=self.cached_map,
+                    inventory_snapshot=self.cached_inventory,
+                )
+                response = call_llm(
+                    prompt=prompt,
+                    system_prompt=SYSTEM_PROMPT,
+                    seed=seed + (step * 31),
+                )
+                thought, tool_name, tool_args = self._parse_response(response)
+                tool_name, tool_args = self._validate_tool_call(tool_name, tool_args, tool_names)
+                if tool_name != "play_action":
+                    self.non_play_streak += 1
+                else:
+                    self.non_play_streak = 0
+                if self.non_play_streak >= 2:
+                    tool_name = "play_action"
+                    tool_args = {
+                        "action": self._next_exploration_action(
+                            current_location=location,
+                            map_snapshot=self.cached_map,
+                        )
+                    }
+                    self.non_play_streak = 0
+                if tool_name == "play_action":
+                    action = tool_args.get("action", "look")
+                    action = self._normalize_action(action)
+                    action = self._anti_loop_override(action, location, stats)
+                    tool_args = {"action": action}
+                    self.recent_actions.append(action)
+                    moves += 1
+                if verbose:
+                    print(f"\n--- Step {step} ---")
+                    print(f"[THOUGHT] {thought}")
+                    print(f"[TOOL] {tool_name}({tool_args})")
+                try:
+                    observation = await call_tool(tool_name, tool_args)
+                except Exception as tool_exc:
+                    observation = f"Tool error: {tool_exc}"
+                    tool_name = "play_action"
+                    fallback_action = self._next_exploration_action(location, self.cached_map)
+                    tool_args = {"action": fallback_action}
+                    observation = await call_tool(tool_name, tool_args)
+                self.last_observation = observation
+                stats = await self._get_stats(client, tool_names)
+                self._update_state_from_stats(stats)
+                location = stats.get("location") or self._extract_location(observation)
+                if location:
+                    locations_visited.add(location)
+                final_score = int(stats.get("score", self.score) or self.score)
+                moves = int(stats.get("moves", moves) or moves)
+                self.max_score = int(stats.get("max_score", self.max_score) or self.max_score)
+                game_completed = bool(stats.get("done", False)) or self._is_game_over(observation)
+                if tool_name == "play_action":
+                    loc_key = location or "Unknown"
+                    act_key = tool_args.get("action", "look")
+                    key = (loc_key, act_key)
+                    self.location_action_counts[key] = self.location_action_counts.get(key, 0) + 1
+                    await self._maybe_store_note(client, tool_names, location, observation)
+                self.history.append(
+                    {
+                        "step": step,
+                        "thought": thought,
+                        "tool": tool_name,
+                        "args": tool_args,
+                        "observation": observation[:220],
+                        "score": final_score,
+                    }
+                )
+                if len(self.history) > 18:
+                    self.history = self.history[-18:]
+                history.append((thought, f"{tool_name}({tool_args})", observation[:120]))
+                if verbose:
+                    print(f"[RESULT] {observation[:220]}...")
+                    print(
+                        f"[STATE] score={final_score}/{self.max_score} "
+                        f"moves={moves} loc={location}"
+                    )
+                if game_completed:
+                    print(f"Game completed at step {step} with score {final_score}.")
+                    break
+        except Exception as exc:
+            print(f"Error during agent run: {exc}")
+            error = str(exc)
+        if final_score == 0:
+            print("Agent failed to score any points. Consider improving your action selection and exploration strategy.")
+            final_score = self.score
+        print("end")
         return RunResult(
             final_score=final_score,
+            max_score=self.max_score,
             moves=moves,
             locations_visited=locations_visited,
+            game_completed=game_completed,
+            error=error,
             history=history,
         )
+    def _build_prompt(
+        self,
+        observation: str,
+        location: str,
+        step: int,
+        max_steps: int,
+        stats: dict,
+        map_snapshot: str,
+        inventory_snapshot: str,
+    ) -> str:
         """
         Build the prompt for the LLM.
         """
+        recent_lines = []
+        for item in self.history[-5:]:
+            recent_lines.append(
+                f"- {item['tool']} {item['args']} => score {item['score']} => {item['observation']}"
+            )
+        if not recent_lines:
+            recent_lines = ["- (none)"]
+        frontier_hint = self._extract_frontier_from_map(map_snapshot)
+        no_progress = int(stats.get("no_progress_streak", 0) or 0)
+        prompt = (
+            f"Game: current run\n"
+            f"Step: {step}/{max_steps}\n"
+            f"Location: {location}\n"
+            f"Score: {stats.get('score', self.score)}/{stats.get('max_score', self.max_score)}\n"
+            f"Loop signals: no_progress={stats.get('no_progress_streak', 0)}, "
+            f"same_location={stats.get('same_location_streak', 0)}, "
+            f"repeat_action={stats.get('repeated_action_streak', 0)}\n\n"
+            f"Recent decisions:\n" + "\n".join(recent_lines) + "\n\n"
+            f"Inventory snapshot:\n{inventory_snapshot[:280] if inventory_snapshot else '(unknown)'}\n\n"
+            f"Map/frontier snapshot:\n{map_snapshot[:520] if map_snapshot else '(unknown)'}\n\n"
+            f"Current observation:\n{observation}\n\n"
+            f"Guidance:\n"
+            f"- Prefer play_action now unless a planning query is necessary.\n"
+            f"- If no_progress >= 3, prioritize an unexplored movement from frontier ({', '.join(frontier_hint)}).\n"
+            f"- Avoid repeating recent actions: {', '.join(list(self.recent_actions)[-4:])}.\n"
+            f"- If you mention a clue in THOUGHT, keep it concise.\n"
+        )
+        if no_progress >= 3:
+            prompt += "\nYou appear stuck: choose a different movement or interaction than recent attempts.\n"
+        return prompt
     def _parse_response(self, response: str) -> tuple[str, str, dict]:
         """
         Parse LLM response to extract thought, tool name, and arguments.
         Returns:
             Tuple of (thought, tool_name, args_dict)
         """
+        thought = "No thought"
+        tool_name = "play_action"
+        args = {"action": "look"}
+        thought_match = re.search(r"THOUGHT\s*:\s*(.+)", response, flags=re.IGNORECASE)
+        if thought_match:
+            thought = thought_match.group(1).strip()
+        tool_match = re.search(r"TOOL\s*:\s*([^\n]+)", response, flags=re.IGNORECASE)
+        if tool_match:
+            tool_name = tool_match.group(1).strip().lower()
+            tool_name = re.sub(r"[^a-zA-Z0-9_]+", "", tool_name)
+        args_match = re.search(r"ARGS\s*:\s*(\{.*\})", response, flags=re.IGNORECASE | re.DOTALL)
+        if args_match:
+            raw_args = args_match.group(1).strip()
+            try:
+                args = json.loads(raw_args)
+            except json.JSONDecodeError:
+                raw_args = raw_args.replace("'", '"')
+                try:
+                    args = json.loads(raw_args)
+                except json.JSONDecodeError:
+                    action_match = re.search(r'"action"\s*:\s*"([^"]+)"', raw_args)
+                    if action_match:
+                        args = {"action": action_match.group(1)}
+                    else:
+                        args = {"action": "look"}
+        if not isinstance(args, dict):
+            args = {"action": "look"}
+        return thought, tool_name, args
     def _call_llm(self, prompt: str, system_prompt: str, seed: int) -> str:
         """
         """
         return call_llm(prompt, system_prompt, seed)
+    async def _get_stats(self, client, tool_names: set[str]) -> dict:
+        if "get_stats" not in tool_names:
+            return {
+                "score": self.score,
+                "max_score": self.max_score,
+                "moves": 0,
+                "done": False,
+            }
+        try:
+            result = await client.call_tool("get_stats", {})
+            text = self._extract_result(result)
+            return self._parse_stats(text)
+        except Exception:
+            return {
+                "score": self.score,
+                "max_score": self.max_score,
+                "moves": 0,
+                "done": False,
+            }
+    def _parse_stats(self, text: str) -> dict:
+        text = text.strip()
+        try:
+            return json.loads(text)
+        except Exception:
+            data: dict[str, object] = {}
+            for key in [
+                "score", "max_score", "moves", "reward", "no_progress_streak",
+                "same_location_streak", "repeated_action_streak", "unique_locations",
+                "unique_recent_observations",
+            ]:
+                match = re.search(rf'"{key}"\s*:\s*(-?\d+)', text)
+                if match:
+                    data[key] = int(match.group(1))
+            for key in ["game", "location"]:
+                match = re.search(rf'"{key}"\s*:\s*"([^"]*)"', text)
+                if match:
+                    data[key] = match.group(1)
+            done_match = re.search(r'"done"\s*:\s*(true|false)', text, flags=re.IGNORECASE)
+            if done_match:
+                data["done"] = done_match.group(1).lower() == "true"
+            return data
+    def _update_state_from_stats(self, stats: dict) -> None:
+        if not stats:
+            return
+        self.score = int(stats.get("score", self.score) or self.score)
+        self.max_score = int(stats.get("max_score", self.max_score) or self.max_score)
+    def _validate_tool_call(self, tool_name: str, tool_args: dict, valid_tools: set[str]) -> tuple[str, dict]:
+        aliases = {
+            "action": "play_action",
+            "act": "play_action",
+            "play": "play_action",
+            "map": "get_map",
+            "stats": "get_stats",
+            "state": "memory",
+            "inv": "inventory",
+            "store": "remember",
+            "notes": "recall",
+        }
+        tool_name = aliases.get(tool_name, tool_name)
+        if tool_name not in valid_tools:
+            tool_name = "play_action"
+        if not isinstance(tool_args, dict):
+            tool_args = {}
+        if tool_name == "play_action":
+            action = tool_args.get("action", "look")
+            tool_args = {"action": self._normalize_action(action)}
+        elif tool_name == "remember":
+            key = str(tool_args.get("key", "note")).strip() or "note"
+            value = str(tool_args.get("value", "")).strip() or "unknown"
+            tool_args = {"key": key[:64], "value": value[:220]}
+        elif tool_name == "recall":
+            key = str(tool_args.get("key", "")).strip()
+            tool_args = {"key": key}
+        else:
+            tool_args = {}
+        return tool_name, tool_args
+    def _normalize_action(self, action: str) -> str:
+        action = str(action).lower().strip()
+        action = action.replace("**", "").replace("`", "")
+        action = " ".join(action.split())
+        invalid_verb_map = {
+            "check": "examine",
+            "inspect": "examine",
+            "search": "look",
+            "grab": "take",
+            "pick": "take",
+            "investigate": "examine",
+        }
+        words = action.split()
+        if words and words[0] in invalid_verb_map:
+            words[0] = invalid_verb_map[words[0]]
+            action = " ".join(words)
+        return action or "look"
+    def _anti_loop_override(self, action: str, location: str, stats: dict) -> str:
+        loc = location or "Unknown"
+        key = (loc, action)
+        no_progress = int(stats.get("no_progress_streak", 0) or 0)
+        repeated_action_streak = int(stats.get("repeated_action_streak", 0) or 0)
+        if self.location_action_counts.get(key, 0) >= 2 and no_progress >= 2:
+            return self._next_exploration_action(loc, self.cached_map)
+        if repeated_action_streak >= 2 and len(self.recent_actions) >= 2:
+            if action == self.recent_actions[-1]:
+                return self._next_exploration_action(loc, self.cached_map)
+        if no_progress >= 4 and action in {"look", "inventory", "wait"}:
+            return self._next_exploration_action(loc, self.cached_map)
+        return action
+    def _next_exploration_action(self, current_location: str, map_snapshot: str) -> str:
+        frontier = self._extract_frontier_from_map(map_snapshot)
+        recent = set(list(self.recent_actions)[-4:])
+        for direction in frontier:
+            if direction not in recent:
+                return direction
+        fallback = [
+            "north", "south", "east", "west", "up", "down",
+            "enter", "exit", "examine room", "look",
+        ]
+        loc = current_location or "Unknown"
+        for action in fallback:
+            if self.location_action_counts.get((loc, action), 0) < 2:
+                return action
+        return "look"
+    def _extract_frontier_from_map(self, map_snapshot: str) -> list[str]:
+        if not map_snapshot:
+            return ["north", "south", "east", "west"]
+        match = re.search(
+            r"Frontier directions not yet tried here:\s*(.+)",
+            map_snapshot,
+            flags=re.IGNORECASE,
+        )
+        if not match:
+            return ["north", "south", "east", "west"]
+        dirs = [d.strip().lower() for d in match.group(1).split(",") if d.strip()]
+        return dirs or ["north", "south", "east", "west"]
+    def _extract_result(self, result) -> str:
+        if hasattr(result, "content") and result.content:
+            item = result.content[0]
+            if hasattr(item, "text"):
+                return item.text
+            return str(item)
+        if isinstance(result, list) and result:
+            first = result[0]
+            if hasattr(first, "text"):
+                return first.text
+            return str(first)
+        return str(result)
+    def _extract_location(self, observation: str) -> str:
+        lines = (observation or "").strip().split("\n")
+        if not lines:
+            return "Unknown"
+        first = lines[0].strip()
+        return first[:120] if first else "Unknown"
+    def _is_game_over(self, text: str) -> bool:
+        lower = (text or "").lower()
+        endings = [
+            "game over",
+            "you have died",
+            "you are dead",
+            "*** you have died ***",
+            "[game_over]",
+        ]
+        return any(token in lower for token in endings)
+    async def _maybe_store_note(self, client, tool_names: set[str], location: str, observation: str) -> None:
+        if "remember" not in tool_names:
+            return
+        text = (observation or "").lower()
+        keywords = ["locked", "key", "door", "treasure", "cannot", "need", "dark"]
+        if not any(k in text for k in keywords):
+            return
+        note_text = " ".join((observation or "").strip().split())[:170]
+        key = f"clue_{self.note_counter}_{(location or 'unknown')[:20]}"
+        self.note_counter += 1
+        try:
+            await client.call_tool("remember", {"key": key, "value": note_text})
+        except Exception:
+            return
 # =============================================================================
 # For local testing

mcp_server.py CHANGED Viewed

@@ -26,6 +26,8 @@ Then open the MCP Inspector in your browser to test the tools interactively.
 import sys
 import os
 # Add parent directory to path to import games module
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
@@ -59,29 +61,123 @@ class GameManager:
         self.env: TextAdventureEnv = None
         self.state = None
         self.game_name: str = ""
-        # TODO: Add more state tracking
-        # self.history: list[tuple[str, str]] = []
-        # self.explored_locations: dict[str, set[str]] = {}
-        # self.current_location: str = ""
     def initialize(self, game: str = "zork1"):
         """Initialize or reset the game."""
         self.game_name = game
         self.env = TextAdventureEnv(game)
         self.state = self.env.reset()
-        # TODO: Reset your state tracking here
         return self.state.observation
     def step(self, action: str) -> str:
         """Execute an action and return the result."""
         if self.env is None:
             self.initialize()
         self.state = self.env.step(action)
-        # TODO: Update your state tracking here
-        # self.history.append((action, self.state.observation))
-        # Update location tracking, etc.
         return self.state.observation
@@ -131,57 +227,151 @@ def play_action(action: str) -> str:
         - Other: look, inventory, read <thing>, turn on lamp
     """
     game = get_game()
-    # TODO: You might want to add action validation here
-    # TODO: You might want to include score changes in the response
     result = game.step(action)
-    # Optional: Append score info
-    # result += f"\n[Score: {game.get_score()} | Moves: {game.get_moves()}]"
     return result
-# TODO: Implement additional tools to help your agent
-# @mcp.tool()
-# def memory() -> str:
-#     """
-#     Get the current game state summary.
-#
-#     Returns:
-#         A summary including current location, score, moves, and recent history
-#     """
-#     game = get_game()
-#     # TODO: Return useful state information
-#     pass
-# @mcp.tool()
-# def inventory() -> str:
-#     """
-#     Check what the player is carrying.
-#
-#     Returns:
-#         List of items in the player's inventory
-#     """
-#     game = get_game()
-#     result = game.step("inventory")
-#     return result
-# @mcp.tool()
-# def get_map() -> str:
-#     """
-#     Get a map of explored locations.
-#
-#     Returns:
-#         A text representation of explored locations and connections
-#     """
-#     game = get_game()
-#     # TODO: Return map of explored locations
-#     pass
 # @mcp.tool()

 import sys
 import os
+import hashlib
+from collections import deque
 # Add parent directory to path to import games module
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
         self.env: TextAdventureEnv = None
         self.state = None
         self.game_name: str = ""
+        self.history: deque[dict] = deque(maxlen=60)
+        self.current_location: str = "Unknown"
+        self.location_visits: dict[str, int] = {}
+        self.map_edges: dict[str, dict[str, str]] = {}
+        self.recent_transitions: deque[tuple[str, str]] = deque(maxlen=16)
+        self.notes: dict[str, str] = {}
+        self.note_order: deque[str] = deque(maxlen=40)
+        self.no_progress_streak: int = 0
+        self.same_location_streak: int = 0
+        self.repeated_action_streak: int = 0
+        self.obs_hash_history: deque[str] = deque(maxlen=8)
+        self.last_action: str = ""
+        self.action_counts: dict[tuple[str, str], int] = {}
+    @staticmethod
+    def _clean_text(text: str) -> str:
+        return " ".join((text or "").strip().split())
+    def _extract_location(self, observation: str) -> str:
+        candidate = (self.state.location if self.state else "") or ""
+        candidate = candidate.strip()
+        if candidate and candidate.lower() != "unknown":
+            return candidate
+        lines = (observation or "").strip().split("\n")
+        if not lines:
+            return "Unknown"
+        first = lines[0].strip()
+        if first:
+            return first[:120]
+        return "Unknown"
+    def _is_movement_action(self, action: str) -> bool:
+        movement = {
+            "north", "south", "east", "west", "up", "down", "enter", "exit",
+            "n", "s", "e", "w", "u", "d", "in", "out",
+            "northeast", "northwest", "southeast", "southwest",
+            "ne", "nw", "se", "sw",
+        }
+        return action.strip().lower() in movement
+    def _observation_hash(self, observation: str) -> str:
+        clean = self._clean_text(observation).lower()[:400]
+        return hashlib.sha1(clean.encode("utf-8")).hexdigest()
     def initialize(self, game: str = "zork1"):
         """Initialize or reset the game."""
         self.game_name = game
         self.env = TextAdventureEnv(game)
         self.state = self.env.reset()
+        self.history.clear()
+        self.map_edges.clear()
+        self.location_visits.clear()
+        self.recent_transitions.clear()
+        self.notes.clear()
+        self.note_order.clear()
+        self.no_progress_streak = 0
+        self.same_location_streak = 0
+        self.repeated_action_streak = 0
+        self.obs_hash_history.clear()
+        self.last_action = ""
+        self.action_counts.clear()
+        self.current_location = self._extract_location(self.state.observation)
+        self.location_visits[self.current_location] = 1
+        self.obs_hash_history.append(self._observation_hash(self.state.observation))
         return self.state.observation
     def step(self, action: str) -> str:
         """Execute an action and return the result."""
         if self.env is None:
             self.initialize()
+        action = self._clean_text(action).lower() or "look"
+        prev_loc = self.current_location
         self.state = self.env.step(action)
+        new_loc = self._extract_location(self.state.observation)
+        self.current_location = new_loc
+        self.location_visits[new_loc] = self.location_visits.get(new_loc, 0) + 1
+        if self._is_movement_action(action):
+            if prev_loc not in self.map_edges:
+                self.map_edges[prev_loc] = {}
+            self.map_edges[prev_loc][action] = new_loc
+        if self.state.reward > 0:
+            self.no_progress_streak = 0
+        else:
+            self.no_progress_streak += 1
+        if new_loc == prev_loc:
+            self.same_location_streak += 1
+        else:
+            self.same_location_streak = 0
+        if action == self.last_action:
+            self.repeated_action_streak += 1
+        else:
+            self.repeated_action_streak = 1
+        self.last_action = action
+        self.recent_transitions.append((prev_loc, new_loc))
+        self.action_counts[(new_loc, action)] = self.action_counts.get((new_loc, action), 0) + 1
+        obs_hash = self._observation_hash(self.state.observation)
+        self.obs_hash_history.append(obs_hash)
+        self.history.append(
+            {
+                "action": action,
+                "location_before": prev_loc,
+                "location_after": new_loc,
+                "score": self.state.score,
+                "reward": self.state.reward,
+                "moves": self.state.moves,
+                "observation": self.state.observation[:280],
+            }
+        )
         return self.state.observation
         - Other: look, inventory, read <thing>, turn on lamp
     """
     game = get_game()
     result = game.step(action)
+    score_info = (
+        f"\n\n[Score: {game.state.score}/{game.state.max_score} | "
+        f"Reward: {game.state.reward:+d} | Moves: {game.state.moves}]"
+    )
+    loop_info = (
+        f"\n[LoopSignals: no_progress={game.no_progress_streak}, "
+        f"same_location={game.same_location_streak}, "
+        f"repeat_action={game.repeated_action_streak}]"
+    )
+    if game.state.done:
+        score_info += "\n[GAME_OVER]"
+    result = result + score_info + loop_info
     return result
+@mcp.tool()
+def memory() -> str:
+    """Return current state with compact recent history and loop diagnostics."""
+    game = get_game()
+    recent = list(game.history)[-6:]
+    recent_lines = []
+    for item in recent:
+        recent_lines.append(
+            f"- {item['action']} @ {item['location_before']} -> {item['location_after']} "
+            f"(reward {item['reward']:+d}, score {item['score']})"
+        )
+    if not recent_lines:
+        recent_lines = ["- (no actions yet)"]
+    notes = []
+    for key in list(game.note_order)[-8:]:
+        notes.append(f"- {key}: {game.notes.get(key, '')[:120]}")
+    if not notes:
+        notes = ["- (none)"]
+    return (
+        f"Game: {game.game_name}\n"
+        f"Location: {game.current_location}\n"
+        f"Score: {game.state.score}/{game.state.max_score}\n"
+        f"Moves: {game.state.moves}\n"
+        f"Done: {game.state.done}\n"
+        f"LoopSignals: no_progress={game.no_progress_streak}, "
+        f"same_location={game.same_location_streak}, repeat_action={game.repeated_action_streak}\n\n"
+        f"Recent history:\n" + "\n".join(recent_lines) + "\n\n"
+        f"Notes:\n" + "\n".join(notes) + "\n\n"
+        f"Observation:\n{game.state.observation}"
+    )
+@mcp.tool()
+def inventory() -> str:
+    """Return current inventory without spending a move."""
+    game = get_game()
+    items = game.state.inventory if game.state and game.state.inventory else []
+    if not items:
+        return "Inventory: empty"
+    item_list = [str(i) for i in items]
+    return "Inventory: " + ", ".join(item_list)
+@mcp.tool()
+def get_map() -> str:
+    """Return explored location graph and candidate frontier directions."""
+    game = get_game()
+    if not game.location_visits:
+        return "Map: empty"
+    lines = ["Explored map:"]
+    for loc, count in sorted(game.location_visits.items(), key=lambda x: x[0].lower()):
+        lines.append(f"- {loc} (visits: {count})")
+        exits = game.map_edges.get(loc, {})
+        if exits:
+            for action, dst in sorted(exits.items(), key=lambda x: x[0]):
+                lines.append(f"    {action} -> {dst}")
+    known_dirs = set(game.map_edges.get(game.current_location, {}).keys())
+    all_dirs = {
+        "north", "south", "east", "west", "up", "down", "enter", "exit",
+        "n", "s", "e", "w", "u", "d", "in", "out", "ne", "nw", "se", "sw"
+    }
+    frontier = sorted(all_dirs - known_dirs)
+    lines.append(f"\nCurrent: {game.current_location}")
+    lines.append("Frontier directions not yet tried here: " + ", ".join(frontier[:12]))
+    return "\n".join(lines)
+@mcp.tool()
+def get_stats() -> str:
+    """Return compact machine-readable state for planning and scoring."""
+    game = get_game()
+    unique_obs = len(set(game.obs_hash_history))
+    return (
+        "{"
+        f'"game":"{game.game_name}",'
+        f'"location":"{game.current_location.replace("\"", "")}",'
+        f'"score":{game.state.score},'
+        f'"max_score":{game.state.max_score},'
+        f'"moves":{game.state.moves},'
+        f'"reward":{game.state.reward},'
+        f'"done":{str(game.state.done).lower()},'
+        f'"no_progress_streak":{game.no_progress_streak},'
+        f'"same_location_streak":{game.same_location_streak},'
+        f'"repeated_action_streak":{game.repeated_action_streak},'
+        f'"unique_locations":{len(game.location_visits)},'
+        f'"unique_recent_observations":{unique_obs}'
+        "}"
+    )
+@mcp.tool()
+def remember(key: str, value: str) -> str:
+    """Store a short note for the agent (e.g., puzzle clue, locked door)."""
+    game = get_game()
+    clean_key = GameManager._clean_text(key)[:64]
+    clean_value = GameManager._clean_text(value)[:320]
+    if not clean_key:
+        return "ERROR: key cannot be empty"
+    game.notes[clean_key] = clean_value
+    if clean_key in game.note_order:
+        game.note_order.remove(clean_key)
+    game.note_order.append(clean_key)
+    return f"Stored note '{clean_key}'"
+@mcp.tool()
+def recall(key: str = "") -> str:
+    """Recall one note by key, or list all recent notes if key is empty."""
+    game = get_game()
+    clean_key = GameManager._clean_text(key)
+    if clean_key:
+        if clean_key in game.notes:
+            return f"{clean_key}: {game.notes[clean_key]}"
+        return f"No note for key '{clean_key}'"
+    if not game.note_order:
+        return "No notes stored"
+    lines = ["Notes:"]
+    for k in list(game.note_order)[-12:]:
+        lines.append(f"- {k}: {game.notes.get(k, '')}")
+    return "\n".join(lines)
 # @mcp.tool()