text-adventure-template

Sleeping

App Files Files Community

eyaa99 commited on Feb 22

Commit

50e7ecd

verified ·

1 Parent(s): 68314ab

Update agent.py

Browse files

Files changed (1) hide show

agent.py +78 -10

agent.py CHANGED Viewed

@@ -27,6 +27,13 @@ from typing import Optional
 from dotenv import load_dotenv
 from huggingface_hub import InferenceClient
 # Load environment variables
 load_dotenv()
@@ -88,14 +95,16 @@ def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300)
     ]
     if USE_LOCAL_MODEL and _local_pipeline is not None:
         outputs = _local_pipeline(
             messages,
-            max_new_tokens=max_tokens,
             temperature=0.0001,  # Near-deterministic (0.0 unsupported by some backends)
             do_sample=True,
         )
         return outputs[0]["generated_text"][-1]["content"]
     response = LLM_CLIENT.chat.completions.create(
         model=LLM_MODEL,
         messages=messages,
@@ -248,25 +257,35 @@ class StudentAgent:
             response = self._call_llm(prompt, SYSTEM_PROMPT, seed + step)
             thought, tool_name, args = self._parse_response(response)
             tool_name = "play_action"
             action = str(args.get("action", "look")).strip() if isinstance(args, dict) else "look"
             if not action:
                 action = "look"
             if action in self.failed_actions:
-                action = self._fallback_action()
             # avoid repeating exact action too much
             if len(self.recent_actions) >= 2 and self.recent_actions[-1] == action and self.recent_actions[-2] == action:
-                action = self._fallback_action()
             new_observation = str(await client.call_tool("play_action", {"action": action}))
             self._update_score(new_observation)
-            # mark failure if no change
             new_norm = self._norm_obs(new_observation)
             if new_norm == self.last_obs_norm:
-                self.failed_actions.add(action)
             self.last_obs_norm = new_norm
             self.recent_actions.append(action)
@@ -284,7 +303,7 @@ class StudentAgent:
             if "GAME OVER" in observation:
                 return RunResult(
                     final_score=final_score,
-                    max_score=350,
                     moves=moves,
                     locations_visited=locations_visited,
                     game_completed=True,
@@ -293,7 +312,7 @@ class StudentAgent:
         return RunResult(
             final_score=final_score,
-            max_score=350,
             moves=moves,
             locations_visited=locations_visited,
             game_completed=False,
@@ -355,11 +374,28 @@ class StudentAgent:
         """
         return call_llm(prompt, system_prompt, seed)
-    def _fallback_action(self) -> str:
-        # Simple exploration fallback
         for a in ["north", "south", "east", "west", "up", "down", "in", "out"]:
             if a not in self.failed_actions:
                 return a
         return "look"
     def _update_score(self, text: str):
@@ -371,4 +407,36 @@ class StudentAgent:
         s = re.sub(r"\[Score:.*?\]", "", text, flags=re.I)
         s = re.sub(r"Score:\s*\d+|Moves:\s*\d+", "", s, flags=re.I)
         s = re.sub(r"\s+", " ", s).strip()
-        return s[:700]

 from dotenv import load_dotenv
 from huggingface_hub import InferenceClient
+# Silence transformers warnings in local mode (prevents repeated max_length/max_new_tokens spam)
+try:
+    import transformers
+    transformers.utils.logging.set_verbosity_error()
+except Exception:
+    pass
 # Load environment variables
 load_dotenv()
     ]
     if USE_LOCAL_MODEL and _local_pipeline is not None:
+        # Keep local generation shorter + quieter
         outputs = _local_pipeline(
             messages,
+            max_new_tokens=min(max_tokens, 128),
             temperature=0.0001,  # Near-deterministic (0.0 unsupported by some backends)
             do_sample=True,
         )
         return outputs[0]["generated_text"][-1]["content"]
+    # Hosted inference (may fail with 402 if credits depleted)
     response = LLM_CLIENT.chat.completions.create(
         model=LLM_MODEL,
         messages=messages,
             response = self._call_llm(prompt, SYSTEM_PROMPT, seed + step)
             thought, tool_name, args = self._parse_response(response)
+            # Keep it simple: always call play_action
             tool_name = "play_action"
             action = str(args.get("action", "look")).strip() if isinstance(args, dict) else "look"
             if not action:
                 action = "look"
+            # Simple avoidance: don't repeat known-failed actions
             if action in self.failed_actions:
+                action = self._fallback_action_from_observation(observation)
+            # Hard anti-stuck rule: if we keep doing "look", force exploration
+            if len(self.recent_actions) >= 2 and self.recent_actions[-1] == "look" and self.recent_actions[-2] == "look":
+                if "inventory" not in self.failed_actions:
+                    action = "inventory"
+                else:
+                    action = self._fallback_action_from_observation(observation)
             # avoid repeating exact action too much
             if len(self.recent_actions) >= 2 and self.recent_actions[-1] == action and self.recent_actions[-2] == action:
+                action = self._fallback_action_from_observation(observation)
             new_observation = str(await client.call_tool("play_action", {"action": action}))
             self._update_score(new_observation)
+            # mark failure if no change (but do not mark "look" as failed)
             new_norm = self._norm_obs(new_observation)
             if new_norm == self.last_obs_norm:
+                if action != "look":
+                    self.failed_actions.add(action)
             self.last_obs_norm = new_norm
             self.recent_actions.append(action)
             if "GAME OVER" in observation:
                 return RunResult(
                     final_score=final_score,
+                    max_score=350,  # Zork1 max score, adjust if needed
                     moves=moves,
                     locations_visited=locations_visited,
                     game_completed=True,
         return RunResult(
             final_score=final_score,
+            max_score=350,  # Zork1 max score, adjust if needed
             moves=moves,
             locations_visited=locations_visited,
             game_completed=False,
         """
         return call_llm(prompt, system_prompt, seed)
+    def _fallback_action_from_observation(self, observation: str) -> str:
+        # Try movement first
         for a in ["north", "south", "east", "west", "up", "down", "in", "out"]:
             if a not in self.failed_actions:
                 return a
+        # Try simple object interactions based on words in the observation
+        words = re.findall(r"[A-Za-z]{3,}", observation.lower())
+        stop = {
+            "the","and","you","are","with","that","this","from","your","have","here","there",
+            "into","over","under","would","could","should","what","when","then","than","them",
+            "been","were","will","just","about","some","there","where","which"
+        }
+        candidates = [w for w in words if w not in stop]
+        candidates = candidates[:25]
+        for w in candidates:
+            for verb in ["examine", "take", "open"]:
+                cmd = f"{verb} {w}"
+                if cmd not in self.failed_actions:
+                    return cmd
         return "look"
     def _update_score(self, text: str):
         s = re.sub(r"\[Score:.*?\]", "", text, flags=re.I)
         s = re.sub(r"Score:\s*\d+|Moves:\s*\d+", "", s, flags=re.I)
         s = re.sub(r"\s+", " ", s).strip()
+        return s[:700]
+# =============================================================================
+# For local testing
+# =============================================================================
+async def test_agent():
+    """Test the agent locally."""
+    from fastmcp import Client
+    # Path to your MCP server
+    server_path = "mcp_server.py"
+    agent = StudentAgent()
+    async with Client(server_path) as client:
+        result = await agent.run(
+            client=client,
+            game="zork1",
+            max_steps=10,
+            seed=42,
+            verbose=True,
+        )
+        print(f"\nFinal Score: {result.final_score}")
+        print(f"Moves: {result.moves}")
+        print(f"Locations: {result.locations_visited}")
+if __name__ == "__main__":
+    import asyncio
+    asyncio.run(test_agent())