Zork-Adventure-RL-Agent

Sleeping

App Files Files Community

InesManelB commited on Feb 22

Commit

ac36746

1 Parent(s): 5cf1bbc

Added runner code

Browse files

Files changed (6) hide show

.env.example +14 -0
agent.py +1 -4
cross_episode_memory.py +0 -1
games/__init__.py +6 -0
games/zork_env.py +219 -0
run_agent.py +226 -0

.env.example ADDED Viewed

	@@ -0,0 +1,14 @@

+# Hugging Face Hub Configuration
+HF_TOKEN=your_huggingface_token_here
+# Model Configuration
+# Default model for all modes (react, function, mcp)
+HF_MODEL=meta-llama/Llama-3.2-3B-Instruct
+# Alternative models to try:
+# HF_MODEL=google/gemma-2-2b-it
+# HF_MODEL=Qwen/Qwen2.5-7B-Instruct
+# Optional API Keys (if using other providers)
+# ANTHROPIC_API_KEY=your_anthropic_key_here
+# OPENAI_API_KEY=your_openai_key_here

agent.py CHANGED Viewed

@@ -193,11 +193,8 @@ class StudentAgent:
             # Build prompt with context
             user_prompt, sys_prompt, memory_text = self._build_prompt(state)
-            # Get response format
-            response_format = self._get_response_format()
             # Call LLM for reasoning (use step-based seed for variety)
-            response = call_llm(user_prompt, sys_prompt, seed=seed + step, response_format=response_format)
             # Parse the response
             action, options_with_confidences = self._parse_response(response)

             # Build prompt with context
             user_prompt, sys_prompt, memory_text = self._build_prompt(state)
             # Call LLM for reasoning (use step-based seed for variety)
+            response = call_llm(user_prompt, sys_prompt, seed=seed + step)
             # Parse the response
             action, options_with_confidences = self._parse_response(response)

cross_episode_memory.py CHANGED Viewed

@@ -22,7 +22,6 @@ from sentence_transformers import SentenceTransformer
 from huggingface_hub import InferenceClient
 import faiss
 import traceback
-from openai import OpenAI
 load_dotenv()

 from huggingface_hub import InferenceClient
 import faiss
 import traceback
 load_dotenv()

games/__init__.py ADDED Viewed

	@@ -0,0 +1,6 @@

+from .zork_env import TextAdventureEnv, GameState, list_available_games, discover_games
+# Alias for backwards compatibility
+ZorkEnvironment = TextAdventureEnv
+__all__ = ["TextAdventureEnv", "ZorkEnvironment", "GameState", "list_available_games", "discover_games"]

games/zork_env.py ADDED Viewed

	@@ -0,0 +1,219 @@

+"""
+Text Adventure Game Environment
+Provides a clean interface to text adventure games via Jericho.
+Supports Zork and many other classic Z-machine games.
+"""
+from jericho import FrotzEnv
+from dataclasses import dataclass
+from typing import Optional
+from pathlib import Path
+import os
+@dataclass
+class GameState:
+    """Represents the current state of the game."""
+    observation: str
+    score: int
+    max_score: int
+    moves: int
+    done: bool
+    reward: int  # Points gained from last action
+    inventory: list[str]
+    location: str
+def get_default_games_dir() -> Path:
+    """Get the default directory containing game files."""
+    project_root = Path(__file__).parent.parent
+    return project_root / "z-machine-games-master" / "jericho-game-suite"
+def discover_games(games_dir: Optional[Path] = None) -> dict[str, Path]:
+    """
+    Discover all available Z-machine games in the games directory.
+    Args:
+        games_dir: Directory to search for games (default: jericho-game-suite)
+    Returns:
+        Dictionary mapping game name (without extension) to full path
+    """
+    if games_dir is None:
+        games_dir = get_default_games_dir()
+    games_dir = Path(games_dir)
+    if not games_dir.exists():
+        return {}
+    games = {}
+    # Find all Z-machine game files (.z3, .z4, .z5, .z8)
+    for ext in ["*.z3", "*.z4", "*.z5", "*.z8"]:
+        for game_path in games_dir.glob(ext):
+            # Use stem (filename without extension) as game name
+            game_name = game_path.stem.lower()
+            games[game_name] = game_path
+    return dict(sorted(games.items()))
+def list_available_games(games_dir: Optional[Path] = None) -> list[str]:
+    """Return a sorted list of available game names."""
+    return list(discover_games(games_dir).keys())
+class TextAdventureEnv:
+    """Wrapper around Jericho's FrotzEnv for text adventure games."""
+    def __init__(self, game: str = "zork1", games_dir: Optional[str] = None):
+        """
+        Initialize the text adventure environment.
+        Args:
+            game: Game name (e.g., 'zork1', 'advent', 'enchanter')
+                  Can also be a full path to a .z* file
+            games_dir: Directory containing game files (optional)
+        """
+        # Check if game is a full path
+        if os.path.isfile(game):
+            game_path = Path(game)
+            self.game = game_path.stem
+        else:
+            # Look up game by name
+            games_path = Path(games_dir) if games_dir else None
+            available_games = discover_games(games_path)
+            if game.lower() not in available_games:
+                available = list(available_games.keys())[:20]
+                raise ValueError(
+                    f"Unknown game: {game}. "
+                    f"Available: {', '.join(available)}... "
+                    f"({len(available_games)} total)"
+                )
+            game_path = available_games[game.lower()]
+            self.game = game.lower()
+        self.env = FrotzEnv(str(game_path))
+        self.game_path = game_path
+        self._last_score = 0
+        self._history: list[tuple[str, str]] = []  # (action, observation) pairs
+    def reset(self) -> GameState:
+        """Reset the game to the beginning."""
+        observation, info = self.env.reset()
+        self._last_score = 0
+        self._history = []
+        return self._make_game_state(observation, info, done=False, reward=0)
+    def step(self, action: str) -> GameState:
+        """
+        Take an action in the game.
+        Args:
+            action: The text command to execute (e.g., "go north", "take lamp")
+        Returns:
+            GameState with the result of the action
+        """
+        observation, reward, done, info = self.env.step(action)
+        # Track reward as score change
+        current_score = info.get('score', 0)
+        reward = current_score - self._last_score
+        self._last_score = current_score
+        # Record history
+        self._history.append((action, observation))
+        return self._make_game_state(observation, info, done, reward)
+    def _make_game_state(self, observation: str, info: dict, done: bool, reward: int) -> GameState:
+        """Create a GameState from the environment info."""
+        # Try to get inventory and location (may fail without spacy)
+        try:
+            inventory = [str(obj) for obj in self.env.get_inventory()]
+        except Exception:
+            inventory = []
+        try:
+            location = str(self.env.get_player_location())
+        except Exception:
+            location = "Unknown"
+        return GameState(
+            observation=observation,
+            score=info.get('score', 0),
+            max_score=self.env.get_max_score(),
+            moves=info.get('moves', 0),
+            done=done,
+            reward=reward,
+            inventory=inventory,
+            location=location,
+        )
+    def get_history(self) -> list[tuple[str, str]]:
+        """Get the history of (action, observation) pairs."""
+        return self._history.copy()
+    def get_valid_actions(self) -> list[str]:
+        """
+        Get a list of valid actions for the current state.
+        Note: This requires spacy to be properly installed.
+        """
+        try:
+            return self.env.get_valid_actions(use_ctypes=True, use_parallel=False)
+        except Exception:
+            # Return common actions if spacy isn't available
+            return [
+                "north", "south", "east", "west",
+                "up", "down", "look", "inventory",
+                "take all", "open mailbox", "read"
+            ]
+    def save_state(self):
+        """Save the current game state."""
+        return self.env.get_state()
+    def load_state(self, state):
+        """Load a previously saved game state."""
+        self.env.set_state(state)
+    def get_walkthrough(self) -> list[str]:
+        """Get the walkthrough for the game (for debugging/comparison only)."""
+        return self.env.get_walkthrough()
+# Alias for backwards compatibility
+ZorkEnvironment = TextAdventureEnv
+# Example usage
+if __name__ == "__main__":
+    import sys
+    # List available games
+    games = list_available_games()
+    print(f"Available games ({len(games)} total):")
+    print(f"  {', '.join(games[:15])}...")
+    print()
+    # Use command line arg or default to zork1
+    game = sys.argv[1] if len(sys.argv) > 1 else "zork1"
+    env = TextAdventureEnv(game)
+    state = env.reset()
+    print(f"=== {env.game.upper()} ===")
+    print(f"Max Score: {state.max_score}")
+    print(f"\n{state.observation}")
+    print(f"\nValid actions: {env.get_valid_actions()[:10]}...")
+    # Try a few actions
+    for action in ["look", "inventory"]:
+        print(f"\n> {action}")
+        state = env.step(action)
+        print(state.observation)
+        print(f"Score: {state.score}, Reward: {state.reward}")

run_agent.py ADDED Viewed

	@@ -0,0 +1,226 @@

+#!/usr/bin/env python3
+"""
+Text Adventure Agent Runner
+Run the MCP ReAct agent to play text adventure games like Zork.
+Usage:
+    python run_agent.py
+    python run_agent.py --game advent
+    python run_agent.py --max-steps 50
+    python run_agent.py --agent hidden_submission
+Examples:
+    # Run on Zork 1 with example agent (default)
+    python run_agent.py
+    # Play a different game
+    python run_agent.py --game advent
+    # Use a different agent folder
+    python run_agent.py --agent hidden_submission
+    # List all available games
+    python run_agent.py --list-games
+    # Run with verbose output
+    python run_agent.py -v
+"""
+import argparse
+import sys
+import os
+import asyncio
+from pathlib import Path
+# Add games module to path for discovering available games
+sys.path.insert(0, str(Path(__file__).parent))
+from games.zork_env import list_available_games
+def find_agent_folders() -> list[str]:
+    """Find all folders containing agent.py and mcp_server.py."""
+    project_root = Path(__file__).parent
+    agent_folders = ["."]
+    for folder in project_root.iterdir():
+        if folder.is_dir():
+            agent_file = folder / "agent.py"
+            server_file = folder / "mcp_server.py"
+            if agent_file.exists() and server_file.exists():
+                agent_folders.append(folder.name)
+    return sorted(agent_folders)
+async def run_mcp_agent(args):
+    """Run MCP ReAct Agent from the specified folder."""
+    agent_folder = Path(__file__).parent / args.agent
+    agent_file = agent_folder / "agent.py"
+    server_file = agent_folder / "mcp_server.py"
+    # Validate folder structure
+    if not agent_folder.exists():
+        raise FileNotFoundError(f"Agent folder not found: {agent_folder}")
+    if not agent_file.exists():
+        raise FileNotFoundError(f"agent.py not found in {agent_folder}")
+    if not server_file.exists():
+        raise FileNotFoundError(f"mcp_server.py not found in {agent_folder}")
+    # Import from the specified folder
+    sys.path.insert(0, str(agent_folder))
+    from agent import StudentAgent
+    from fastmcp import Client
+    from fastmcp.client.transports import StdioTransport
+    print(f"\n[MCP] Running Student Agent with FastMCP")
+    print(f"   Agent: {args.agent}/")
+    print(f"   Game: {args.game}")
+    print()
+    agent = StudentAgent(game=args.game)
+    # Create transport for the MCP server
+    env_vars = os.environ.copy()
+    env_vars["GAME"] = args.game
+    transport = StdioTransport(
+        command=sys.executable,
+        args=[str(server_file)],
+        env=env_vars,
+    )
+    async with Client(transport) as client:
+        return await agent.run(
+            client=client,
+            game=args.game,
+            max_steps=args.max_steps,
+            seed=42,  # Using a fixed seed for direct running
+            verbose=args.verbose,
+        )
+def main():
+    # Find available agent folders
+    agent_folders = find_agent_folders()
+    parser = argparse.ArgumentParser(
+        description="Run the MCP ReAct agent to play text adventure games",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=f"""
+Examples:
+  python run_agent.py                           # Play Zork 1 with example agent
+  python run_agent.py --game advent             # Play Adventure
+  python run_agent.py --agent hidden_submission # Use hidden agent
+  python run_agent.py --list-games              # List all games
+  python run_agent.py --list-agents             # List all agent folders
+  python run_agent.py -v                        # Verbose output
+        """
+    )
+    # Get available games for help text
+    available_games = list_available_games()
+    game_help = f"Game to play (default: zork1). {len(available_games)} games available."
+    agent_help = f"Agent folder to use (default: example_submission). Available: {', '.join(agent_folders)}"
+    parser.add_argument(
+        "--agent", "-a",
+        type=str,
+        default="example_submission",
+        help=agent_help
+    )
+    parser.add_argument(
+        "--game", "-g",
+        type=str,
+        default="lostpig",
+        help=game_help
+    )
+    parser.add_argument(
+        "--list-games",
+        action="store_true",
+        help="List all available games and exit"
+    )
+    parser.add_argument(
+        "--list-agents",
+        action="store_true",
+        help="List all available agent folders and exit"
+    )
+    parser.add_argument(
+        "--max-steps", "-n",
+        type=int,
+        default=100,
+        help="Maximum number of steps to run (default: 100)"
+    )
+    parser.add_argument(
+        "--verbose", "-v",
+        action="store_true",
+        help="Show detailed reasoning from the agent"
+    )
+    args = parser.parse_args()
+    # Handle --list-agents
+    if args.list_agents:
+        print(f"\nAvailable agent folders ({len(agent_folders)} total):\n")
+        for folder in agent_folders:
+            print(f"  {folder}/")
+        print("\nEach folder must contain agent.py and mcp_server.py")
+        print()
+        sys.exit(0)
+    # Handle --list-games
+    if args.list_games:
+        print(f"\nAvailable games ({len(available_games)} total):\n")
+        # Print in columns
+        cols = 5
+        for i in range(0, len(available_games), cols):
+            row = available_games[i:i+cols]
+            print("  " + "  ".join(f"{g:<15}" for g in row))
+        print()
+        sys.exit(0)
+    # Validate agent choice
+    if args.agent not in agent_folders:
+        print(f"\nError: Unknown agent folder '{args.agent}'")
+        print(f"Available: {', '.join(agent_folders)}")
+        print("Use --list-agents to see details.")
+        sys.exit(1)
+    # Validate game choice
+    if args.game.lower() not in available_games:
+        print(f"\nError: Unknown game '{args.game}'")
+        print(f"Use --list-games to see {len(available_games)} available options.")
+        sys.exit(1)
+    print("\n" + "=" * 60)
+    print("Text Adventure MCP Agent Runner")
+    print("=" * 60)
+    print(f"Agent: {args.agent}/")
+    print(f"Game: {args.game}")
+    print(f"Max Steps: {args.max_steps}")
+    print(f"Verbose: {args.verbose}")
+    # Run the agent
+    try:
+        results = asyncio.run(run_mcp_agent(args))
+    except FileNotFoundError as e:
+        print(f"\n[Error] {e}")
+        sys.exit(1)
+    except ValueError as e:
+        print(f"\n[Error] {e}")
+        print("\nTo fix this:")
+        print("1. Copy .env.example to .env")
+        print("2. Add your HuggingFace token (HF_TOKEN)")
+        sys.exit(1)
+    except ImportError as e:
+        print(f"\n[Import Error] {e}")
+        print("\nMake sure to install dependencies:")
+        print("  pip install -r requirements.txt")
+        sys.exit(1)
+    return results
+if __name__ == "__main__":
+    main()