Spaces:

gbl1357
/

AgenticZork

No application file

App Files Files Community

AgenticZork / agent.py

gbl1357

Update agent.py

d86f7b8 verified about 2 months ago

raw

history blame contribute delete

16.7 kB

	"""
	Optimized MCP ReAct Agent for Generalized Text Adventures
	Designed to maximize score across 51 Jericho games.
	"""

	import json
	import os
	import re
	import sys
	from dataclasses import dataclass, field
	from pathlib import Path
	from typing import Optional, Any

	from dotenv import load_dotenv
	from huggingface_hub import InferenceClient

	load_dotenv()

	LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"

	_hf_token = os.getenv("HF_TOKEN")
	if not _hf_token:
	raise ValueError("HF_TOKEN not found. Set it in your .env file.")

	LLM_CLIENT = InferenceClient(token=_hf_token)

	def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str:
	messages = [
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": prompt},
	]
	response = LLM_CLIENT.chat.completions.create(
	model=LLM_MODEL,
	messages=messages,
	temperature=0.0,
	max_tokens=max_tokens,
	seed=seed,
	)
	return response.choices[0].message.content

	@dataclass
	class RunResult:
	final_score: int
	max_score: int
	moves: int
	locations_visited: set[str]
	game_completed: bool
	error: Optional[str] = None
	history: list[tuple[str, str, str]] = field(default_factory=list)

	# Highly optimized, generalized prompt for text adventure heuristics
	SYSTEM_PROMPT = """You are an expert AI agent playing a classic text adventure game. Your ultimate goal is to MAXIMIZE YOUR SCORE. To get points, you must explore, interact with objects, solve puzzles, and collect treasures.

	AVAILABLE TOOLS:
	1. play_action - Execute game commands (e.g., 'north', 'take lamp', 'examine door')
	2. memory - Get current game state, score, and recent history
	3. get_map - See explored locations and connections
	4. inventory - Check what you're carrying

	UNIVERSAL HEURISTICS FOR SCORING POINTS:
	1. TAKE EVERYTHING: If a room description mentions an item, your FIRST action should be "take <item>" or "take all".
	2. EXAMINE NOUNS: If you enter a room and see an object (e.g., a rug, a tree, a button), "examine <object>" to find hidden clues.
	3. OPEN CONTAINERS: If you see a door, window, box, chest, or mailbox, try to "open <object>".
	4. EXPLORE UNMAPPED AREAS: Try compass directions (n, s, e, w, u, d, ne, nw, se, sw) to find new rooms.
	5. NEVER PING-PONG: Do not walk back and forth between two rooms (e.g., going East, then immediately West) unless you hit a dead end.
	6. LEARN FROM FAILURE: If a command says "You can't do that" or "I don't understand", NEVER try that exact command again.
	7. USE INVENTORY: If you are stuck, check your inventory. Try to "wear", "eat", "turn on", or "unlock <object> with <item>".

	RESPOND IN THIS EXACT FORMAT:
	THOUGHT: <Identify nouns/objects in the room description to interact with, OR decide which unexplored direction to take>
	TOOL: <tool_name>
	ARGS: <JSON arguments>

	Example of excellent gameplay:
	THOUGHT: The description mentions a mailbox. I should open it to see if there is a treasure or clue inside.
	TOOL: play_action
	ARGS: {"action": "open mailbox"}
	"""

	class StudentAgent:
	def __init__(self, logger: Any = None, enable_logging: bool = False):
	self.history: list[dict] = []
	self.recent_actions: list[str] = []
	self.score: int = 0
	self.failed_actions: dict[str, int] = {}
	self.locations_explored: set[str] = set()
	self.unexplored_directions: list[str] = []
	self.steps_since_map_check: int = 0
	self.steps_since_progress: int = 0
	self.current_map: Optional[str] = None
	self.walkthrough_hints: Optional[list[str]] = None
	self.logger = None
	self.current_inventory: list[str] = []
	self.last_direction_moved: Optional[str] = None

	async def run(
	self, client, game: str, max_steps: int, seed: int, verbose: bool = False, walkthrough: Optional[list[str]] = None
	) -> RunResult:
	locations_visited = set()
	history = []
	moves = 0
	self.walkthrough_hints = walkthrough

	tools = await client.list_tools()
	tool_names = [t.name for t in tools]

	inv_result = await client.call_tool("inventory", {})
	self.current_inventory = self._parse_inventory(self._extract_result(inv_result))

	result = await client.call_tool("play_action", {"action": "look"})
	observation = self._extract_result(result)

	location = "Start"
	location = self._extract_location(observation, location)
	locations_visited.add(location)
	self.locations_explored.add(location)
	self.unexplored_directions = ["north", "south", "east", "west", "up", "down", "ne", "nw", "se", "sw"]

	if verbose: print(f"\n{observation}")

	for step in range(1, max_steps + 1):
	self.steps_since_map_check += 1
	if self.steps_since_map_check >= 6 or self.steps_since_progress > 3:
	map_result = await client.call_tool("get_map", {})
	self.current_map = self._extract_result(map_result)
	self.steps_since_map_check = 0

	prompt = self._build_prompt(observation)
	response = call_llm(prompt, SYSTEM_PROMPT, seed + step)
	thought, tool_name, tool_args = self._parse_response(response, tool_names)

	if verbose:
	print(f"\n--- Step {step} ---")
	print(f"[THOUGHT] {thought}")
	print(f"[TOOL] {tool_name}({tool_args})")

	tool_name, tool_args = self._validate_tool_call(tool_name, tool_args, tool_names)

	if tool_name == "play_action":
	action = tool_args.get("action", "look")
	self.recent_actions.append(action)
	if len(self.recent_actions) > 5: self.recent_actions = self.recent_actions[-5:]

	# Severe anti-loop detection
	if len(self.recent_actions) >= 3 and len(set(self.recent_actions[-3:])) == 1:
	if self.unexplored_directions:
	action = self.unexplored_directions.pop(0)
	tool_args = {"action": action}
	else:
	tool_args = {"action": "look"}
	self.recent_actions[-1] = tool_args["action"]

	# Track last movement to prevent immediate backtracking
	move_cmds = {"north":"south", "south":"north", "east":"west", "west":"east", "up":"down", "down":"up"}
	if action in move_cmds:
	self.last_direction_moved = action
	elif action not in move_cmds.values():
	self.last_direction_moved = None

	moves += 1

	try:
	result = await client.call_tool(tool_name, tool_args)
	observation = self._extract_result(result)
	if tool_name == "inventory":
	self.current_inventory = self._parse_inventory(observation)
	except Exception as e:
	observation = f"Error: {e}"

	new_location = self._extract_location(observation, location)
	old_score = self.score
	self._update_score(observation)

	# Check for TRUE progress (New room or more points)
	is_new_room = new_location not in self.locations_explored

	if is_new_room or self.score > old_score:
	self.steps_since_progress = 0 # Only reset if we actually achieve something new!
	else:
	self.steps_since_progress += 1

	# Always update location tracking
	if new_location != location:
	location = new_location
	locations_visited.add(location)
	if is_new_room:
	self.locations_explored.add(location)
	# Reset unexplored directions for the new room
	self.unexplored_directions = ["north", "south", "east", "west", "up", "down", "ne", "nw", "se", "sw"]

	# Track failed actions to avoid repeating them
	if tool_name == "play_action":
	action = tool_args.get("action", "look")
	failure_phrases = ["can't", "cannot", "don't", "not", "fail", "impossible", "doesn't work", "not allowed", "look dark", "i don't understand", "no such"]
	if any(phrase in observation.lower() for phrase in failure_phrases):
	self.failed_actions[action] = self.failed_actions.get(action, 0) + 1

	if verbose: print(f"[LOCATION] {location} \| Score: {self.score} \| Explored: {len(self.locations_explored)} \| Progress Steps: {self.steps_since_progress}")

	self.history.append({
	"step": step, "thought": thought, "tool": tool_name, "args": tool_args,
	"result": observation[:200], "location": location, "score": self.score
	})
	if len(self.history) > 10: self.history = self.history[-10:]
	history.append((thought, f"{tool_name}({tool_args})", observation[:100]))

	if self._is_game_over(observation):
	if verbose: print("\n* GAME OVER *")
	break

	return RunResult(
	final_score=self.score, max_score=350, moves=moves,
	locations_visited=locations_visited, game_completed=self._is_game_over(observation), history=history
	)

	def _extract_location(self, observation: str, current_location: str = "Unknown") -> str:
	if not observation: return current_location
	ignore_phrases = ["you can't go", "you cannot go", "impenetrable", "nothing special", "doesn't seem to work", "i don't understand", "it's pitch black", "locked", "closed", "inventory:", "valid actions:", "there is no", "you hear", "you are empty-handed", "already", "that's not", "what do you want to", "i see no", "failed"]
	lines = observation.strip().split('\n')
	for line in lines:
	line = line.strip()
	line_lower = line.lower()
	if not line or line.startswith('['): continue
	if any(phrase in line_lower for phrase in ignore_phrases): continue
	if line.endswith('.') and len(line.split()) > 3: continue
	return line
	return current_location

	def _build_prompt(self, observation: str) -> str:
	parts = [f"Current Score: {self.score}", f"Locations explored: {len(self.locations_explored)}"]

	if self.history:
	parts.append("\nRecent actions:")
	for entry in self.history[-3:]:
	action = entry.get("args", {}).get("action", entry["tool"])
	res = entry["result"].replace('\n', ' ')
	res_short = res[:80] + "..." if len(res) > 80 else res
	parts.append(f" > {action} -> {res_short}")

	# Dynamic State Injection
	if self.steps_since_progress == 0 and observation != "Unknown" and len(self.history) > 0:
	parts.append("\n[TACTICAL ADVICE: You just discovered a new area!]")
	parts.append("1. DO NOT move to another room yet.")
	parts.append("2. Look closely at the description below. Are there any objects mentioned? (e.g., mailbox, chest, sword)")
	parts.append("3. If yes, you MUST try to 'take', 'open', or 'examine' them right now.")
	elif self.steps_since_progress > 3:
	parts.append(f"\n[CRITICAL WARNING: You have made {self.steps_since_progress} moves with NO score increase and NO NEW ROOMS.]")
	parts.append("You are walking in circles through already-explored areas. STOP WANDERING.")
	parts.append("To break out of this loop, you MUST do one of the following:")
	parts.append(" 1. Call the 'get_map' tool to see which directions you haven't tried yet.")
	parts.append(" 2. Move in a completely unexplored direction (n, s, e, w, u, d).")
	parts.append(" 3. Examine or interact with an object you previously ignored.")

	# Warn about failed actions
	if self.failed_actions:
	failed_list = [f"'{k}'" for k, v in self.failed_actions.items() if v >= 2]
	if failed_list: parts.append(f"\n[AVOID: These actions do not work here: {', '.join(failed_list)}]")

	parts.append(f"\nCurrent situation:\n{observation}\n\nWhat do you do next?")
	return "\n".join(parts)

	def _parse_response(self, response: str, valid_tools: list[str]) -> tuple[str, str, dict]:
	thought, tool_name, tool_args = "No reasoning provided", "play_action", {"action": "look"}
	for line in response.strip().split("\n"):
	line_clean = line.strip()
	line_upper = line_clean.upper()
	if line_upper.startswith("THOUGHT:"): thought = line_clean.split(":", 1)[1].strip()
	elif line_upper.startswith("TOOL:"):
	raw = line_clean.split(":", 1)[1].strip().lower().replace("*", "").replace("", "").replace("`", "")
	tool_name = raw.split()[0] if raw else "play_action"
	elif line_upper.startswith("ARGS:"):
	args_part = line_clean.split(":", 1)[1].strip()
	try: tool_args = json.loads(args_part.replace("'", '"'))
	except:
	match = re.search(r'"action"\s:\s"([^"]+)"', args_part)
	tool_args = {"action": match.group(1)} if match else {"action": "look"}
	return thought, tool_name, tool_args

	def _validate_tool_call(self, tool_name: str, tool_args: dict, valid_tools: list[str]) -> tuple[str, dict]:
	if tool_name not in valid_tools: tool_name = "play_action"

	if tool_name == "play_action":
	action = tool_args.get("action", "look").lower().strip().replace("**", "")

	# Map bad verbs to Z-Machine standard verbs
	verb_map = {"check": "examine", "inspect": "examine", "investigate": "examine", "grab": "take", "pick up": "take"}
	words = action.split()
	if words and words[0] in verb_map:
	words[0] = verb_map[words[0]]
	action = " ".join(words)

	# Prevent immediate backtracking (ping-ponging)
	reverse_dirs = {"north":"south", "south":"north", "east":"west", "west":"east", "up":"down", "down":"up"}
	if self.last_direction_moved and action == reverse_dirs.get(self.last_direction_moved):
	if self.unexplored_directions:
	action = self.unexplored_directions.pop(0) # Force a different direction

	# Prevent repeating failed actions
	if action in self.failed_actions and self.failed_actions[action] >= 2:
	action = self.unexplored_directions.pop(0) if self.unexplored_directions else "look"

	tool_args["action"] = action

	return tool_name, tool_args

	def _extract_result(self, result) -> str:
	if hasattr(result, 'content') and result.content: return result.content[0].text
	if isinstance(result, list) and result: return result[0].text if hasattr(result[0], 'text') else str(result[0])
	return str(result)

	def _update_score(self, text: str) -> None:
	for pattern in [r'Score:\s(\d+)', r'score[:\s]+(\d+)', r'\[Score:\s(\d+)']:
	match = re.search(pattern, text, re.IGNORECASE)
	if match: self.score = max(self.score, int(match.group(1)))

	def _is_game_over(self, text: str) -> bool:
	return any(phrase in text.lower() for phrase in ["game over", "you have died", "you are dead", "* you have died *"])

	def _parse_inventory(self, inv_text: str) -> list[str]:
	if "empty-handed" in inv_text.lower() or "nothing" in inv_text.lower(): return []
	if ":" in inv_text: return [item.strip() for item in inv_text.split(":", 1)[1].strip().split(",") if item.strip()]
	return []

	async def test_agent():
	from fastmcp import Client
	agent = StudentAgent()
	async with Client("mcp_server.py") as client:
	result = await agent.run(client=client, game="zork1", max_steps=40, seed=42, verbose=True)
	print(f"\n{'=' * 50}\nFinal Score: {result.final_score}\nMoves: {result.moves}\nLocations: {len(result.locations_visited)}")

	if __name__ == "__main__":
	import asyncio
	asyncio.run(test_agent())