InesManelB commited on
Commit
ac36746
·
1 Parent(s): 5cf1bbc

Added runner code

Browse files
Files changed (6) hide show
  1. .env.example +14 -0
  2. agent.py +1 -4
  3. cross_episode_memory.py +0 -1
  4. games/__init__.py +6 -0
  5. games/zork_env.py +219 -0
  6. run_agent.py +226 -0
.env.example ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Hugging Face Hub Configuration
2
+ HF_TOKEN=your_huggingface_token_here
3
+
4
+ # Model Configuration
5
+ # Default model for all modes (react, function, mcp)
6
+ HF_MODEL=meta-llama/Llama-3.2-3B-Instruct
7
+
8
+ # Alternative models to try:
9
+ # HF_MODEL=google/gemma-2-2b-it
10
+ # HF_MODEL=Qwen/Qwen2.5-7B-Instruct
11
+
12
+ # Optional API Keys (if using other providers)
13
+ # ANTHROPIC_API_KEY=your_anthropic_key_here
14
+ # OPENAI_API_KEY=your_openai_key_here
agent.py CHANGED
@@ -193,11 +193,8 @@ class StudentAgent:
193
  # Build prompt with context
194
  user_prompt, sys_prompt, memory_text = self._build_prompt(state)
195
 
196
- # Get response format
197
- response_format = self._get_response_format()
198
-
199
  # Call LLM for reasoning (use step-based seed for variety)
200
- response = call_llm(user_prompt, sys_prompt, seed=seed + step, response_format=response_format)
201
 
202
  # Parse the response
203
  action, options_with_confidences = self._parse_response(response)
 
193
  # Build prompt with context
194
  user_prompt, sys_prompt, memory_text = self._build_prompt(state)
195
 
 
 
 
196
  # Call LLM for reasoning (use step-based seed for variety)
197
+ response = call_llm(user_prompt, sys_prompt, seed=seed + step)
198
 
199
  # Parse the response
200
  action, options_with_confidences = self._parse_response(response)
cross_episode_memory.py CHANGED
@@ -22,7 +22,6 @@ from sentence_transformers import SentenceTransformer
22
  from huggingface_hub import InferenceClient
23
  import faiss
24
  import traceback
25
- from openai import OpenAI
26
 
27
  load_dotenv()
28
 
 
22
  from huggingface_hub import InferenceClient
23
  import faiss
24
  import traceback
 
25
 
26
  load_dotenv()
27
 
games/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from .zork_env import TextAdventureEnv, GameState, list_available_games, discover_games
2
+
3
+ # Alias for backwards compatibility
4
+ ZorkEnvironment = TextAdventureEnv
5
+
6
+ __all__ = ["TextAdventureEnv", "ZorkEnvironment", "GameState", "list_available_games", "discover_games"]
games/zork_env.py ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Text Adventure Game Environment
3
+
4
+ Provides a clean interface to text adventure games via Jericho.
5
+ Supports Zork and many other classic Z-machine games.
6
+ """
7
+
8
+ from jericho import FrotzEnv
9
+ from dataclasses import dataclass
10
+ from typing import Optional
11
+ from pathlib import Path
12
+ import os
13
+
14
+
15
+ @dataclass
16
+ class GameState:
17
+ """Represents the current state of the game."""
18
+ observation: str
19
+ score: int
20
+ max_score: int
21
+ moves: int
22
+ done: bool
23
+ reward: int # Points gained from last action
24
+ inventory: list[str]
25
+ location: str
26
+
27
+
28
+ def get_default_games_dir() -> Path:
29
+ """Get the default directory containing game files."""
30
+ project_root = Path(__file__).parent.parent
31
+ return project_root / "z-machine-games-master" / "jericho-game-suite"
32
+
33
+
34
+ def discover_games(games_dir: Optional[Path] = None) -> dict[str, Path]:
35
+ """
36
+ Discover all available Z-machine games in the games directory.
37
+
38
+ Args:
39
+ games_dir: Directory to search for games (default: jericho-game-suite)
40
+
41
+ Returns:
42
+ Dictionary mapping game name (without extension) to full path
43
+ """
44
+ if games_dir is None:
45
+ games_dir = get_default_games_dir()
46
+
47
+ games_dir = Path(games_dir)
48
+ if not games_dir.exists():
49
+ return {}
50
+
51
+ games = {}
52
+ # Find all Z-machine game files (.z3, .z4, .z5, .z8)
53
+ for ext in ["*.z3", "*.z4", "*.z5", "*.z8"]:
54
+ for game_path in games_dir.glob(ext):
55
+ # Use stem (filename without extension) as game name
56
+ game_name = game_path.stem.lower()
57
+ games[game_name] = game_path
58
+
59
+ return dict(sorted(games.items()))
60
+
61
+
62
+ def list_available_games(games_dir: Optional[Path] = None) -> list[str]:
63
+ """Return a sorted list of available game names."""
64
+ return list(discover_games(games_dir).keys())
65
+
66
+
67
+ class TextAdventureEnv:
68
+ """Wrapper around Jericho's FrotzEnv for text adventure games."""
69
+
70
+ def __init__(self, game: str = "zork1", games_dir: Optional[str] = None):
71
+ """
72
+ Initialize the text adventure environment.
73
+
74
+ Args:
75
+ game: Game name (e.g., 'zork1', 'advent', 'enchanter')
76
+ Can also be a full path to a .z* file
77
+ games_dir: Directory containing game files (optional)
78
+ """
79
+ # Check if game is a full path
80
+ if os.path.isfile(game):
81
+ game_path = Path(game)
82
+ self.game = game_path.stem
83
+ else:
84
+ # Look up game by name
85
+ games_path = Path(games_dir) if games_dir else None
86
+ available_games = discover_games(games_path)
87
+
88
+ if game.lower() not in available_games:
89
+ available = list(available_games.keys())[:20]
90
+ raise ValueError(
91
+ f"Unknown game: {game}. "
92
+ f"Available: {', '.join(available)}... "
93
+ f"({len(available_games)} total)"
94
+ )
95
+
96
+ game_path = available_games[game.lower()]
97
+ self.game = game.lower()
98
+
99
+ self.env = FrotzEnv(str(game_path))
100
+ self.game_path = game_path
101
+ self._last_score = 0
102
+ self._history: list[tuple[str, str]] = [] # (action, observation) pairs
103
+
104
+ def reset(self) -> GameState:
105
+ """Reset the game to the beginning."""
106
+ observation, info = self.env.reset()
107
+ self._last_score = 0
108
+ self._history = []
109
+ return self._make_game_state(observation, info, done=False, reward=0)
110
+
111
+ def step(self, action: str) -> GameState:
112
+ """
113
+ Take an action in the game.
114
+
115
+ Args:
116
+ action: The text command to execute (e.g., "go north", "take lamp")
117
+
118
+ Returns:
119
+ GameState with the result of the action
120
+ """
121
+ observation, reward, done, info = self.env.step(action)
122
+
123
+ # Track reward as score change
124
+ current_score = info.get('score', 0)
125
+ reward = current_score - self._last_score
126
+ self._last_score = current_score
127
+
128
+ # Record history
129
+ self._history.append((action, observation))
130
+
131
+ return self._make_game_state(observation, info, done, reward)
132
+
133
+ def _make_game_state(self, observation: str, info: dict, done: bool, reward: int) -> GameState:
134
+ """Create a GameState from the environment info."""
135
+ # Try to get inventory and location (may fail without spacy)
136
+ try:
137
+ inventory = [str(obj) for obj in self.env.get_inventory()]
138
+ except Exception:
139
+ inventory = []
140
+
141
+ try:
142
+ location = str(self.env.get_player_location())
143
+ except Exception:
144
+ location = "Unknown"
145
+
146
+ return GameState(
147
+ observation=observation,
148
+ score=info.get('score', 0),
149
+ max_score=self.env.get_max_score(),
150
+ moves=info.get('moves', 0),
151
+ done=done,
152
+ reward=reward,
153
+ inventory=inventory,
154
+ location=location,
155
+ )
156
+
157
+ def get_history(self) -> list[tuple[str, str]]:
158
+ """Get the history of (action, observation) pairs."""
159
+ return self._history.copy()
160
+
161
+ def get_valid_actions(self) -> list[str]:
162
+ """
163
+ Get a list of valid actions for the current state.
164
+ Note: This requires spacy to be properly installed.
165
+ """
166
+ try:
167
+ return self.env.get_valid_actions(use_ctypes=True, use_parallel=False)
168
+ except Exception:
169
+ # Return common actions if spacy isn't available
170
+ return [
171
+ "north", "south", "east", "west",
172
+ "up", "down", "look", "inventory",
173
+ "take all", "open mailbox", "read"
174
+ ]
175
+
176
+ def save_state(self):
177
+ """Save the current game state."""
178
+ return self.env.get_state()
179
+
180
+ def load_state(self, state):
181
+ """Load a previously saved game state."""
182
+ self.env.set_state(state)
183
+
184
+ def get_walkthrough(self) -> list[str]:
185
+ """Get the walkthrough for the game (for debugging/comparison only)."""
186
+ return self.env.get_walkthrough()
187
+
188
+
189
+ # Alias for backwards compatibility
190
+ ZorkEnvironment = TextAdventureEnv
191
+
192
+
193
+ # Example usage
194
+ if __name__ == "__main__":
195
+ import sys
196
+
197
+ # List available games
198
+ games = list_available_games()
199
+ print(f"Available games ({len(games)} total):")
200
+ print(f" {', '.join(games[:15])}...")
201
+ print()
202
+
203
+ # Use command line arg or default to zork1
204
+ game = sys.argv[1] if len(sys.argv) > 1 else "zork1"
205
+
206
+ env = TextAdventureEnv(game)
207
+ state = env.reset()
208
+
209
+ print(f"=== {env.game.upper()} ===")
210
+ print(f"Max Score: {state.max_score}")
211
+ print(f"\n{state.observation}")
212
+ print(f"\nValid actions: {env.get_valid_actions()[:10]}...")
213
+
214
+ # Try a few actions
215
+ for action in ["look", "inventory"]:
216
+ print(f"\n> {action}")
217
+ state = env.step(action)
218
+ print(state.observation)
219
+ print(f"Score: {state.score}, Reward: {state.reward}")
run_agent.py ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Text Adventure Agent Runner
4
+
5
+ Run the MCP ReAct agent to play text adventure games like Zork.
6
+
7
+ Usage:
8
+ python run_agent.py
9
+ python run_agent.py --game advent
10
+ python run_agent.py --max-steps 50
11
+ python run_agent.py --agent hidden_submission
12
+
13
+ Examples:
14
+ # Run on Zork 1 with example agent (default)
15
+ python run_agent.py
16
+
17
+ # Play a different game
18
+ python run_agent.py --game advent
19
+
20
+ # Use a different agent folder
21
+ python run_agent.py --agent hidden_submission
22
+
23
+ # List all available games
24
+ python run_agent.py --list-games
25
+
26
+ # Run with verbose output
27
+ python run_agent.py -v
28
+ """
29
+
30
+ import argparse
31
+ import sys
32
+ import os
33
+ import asyncio
34
+ from pathlib import Path
35
+
36
+ # Add games module to path for discovering available games
37
+ sys.path.insert(0, str(Path(__file__).parent))
38
+ from games.zork_env import list_available_games
39
+
40
+
41
+ def find_agent_folders() -> list[str]:
42
+ """Find all folders containing agent.py and mcp_server.py."""
43
+ project_root = Path(__file__).parent
44
+ agent_folders = ["."]
45
+
46
+ for folder in project_root.iterdir():
47
+ if folder.is_dir():
48
+ agent_file = folder / "agent.py"
49
+ server_file = folder / "mcp_server.py"
50
+ if agent_file.exists() and server_file.exists():
51
+ agent_folders.append(folder.name)
52
+
53
+ return sorted(agent_folders)
54
+
55
+
56
+ async def run_mcp_agent(args):
57
+ """Run MCP ReAct Agent from the specified folder."""
58
+ agent_folder = Path(__file__).parent / args.agent
59
+ agent_file = agent_folder / "agent.py"
60
+ server_file = agent_folder / "mcp_server.py"
61
+
62
+ # Validate folder structure
63
+ if not agent_folder.exists():
64
+ raise FileNotFoundError(f"Agent folder not found: {agent_folder}")
65
+ if not agent_file.exists():
66
+ raise FileNotFoundError(f"agent.py not found in {agent_folder}")
67
+ if not server_file.exists():
68
+ raise FileNotFoundError(f"mcp_server.py not found in {agent_folder}")
69
+
70
+ # Import from the specified folder
71
+ sys.path.insert(0, str(agent_folder))
72
+ from agent import StudentAgent
73
+ from fastmcp import Client
74
+ from fastmcp.client.transports import StdioTransport
75
+
76
+ print(f"\n[MCP] Running Student Agent with FastMCP")
77
+ print(f" Agent: {args.agent}/")
78
+ print(f" Game: {args.game}")
79
+ print()
80
+
81
+ agent = StudentAgent(game=args.game)
82
+
83
+ # Create transport for the MCP server
84
+ env_vars = os.environ.copy()
85
+ env_vars["GAME"] = args.game
86
+
87
+ transport = StdioTransport(
88
+ command=sys.executable,
89
+ args=[str(server_file)],
90
+ env=env_vars,
91
+ )
92
+
93
+ async with Client(transport) as client:
94
+ return await agent.run(
95
+ client=client,
96
+ game=args.game,
97
+ max_steps=args.max_steps,
98
+ seed=42, # Using a fixed seed for direct running
99
+ verbose=args.verbose,
100
+ )
101
+
102
+
103
+ def main():
104
+ # Find available agent folders
105
+ agent_folders = find_agent_folders()
106
+
107
+ parser = argparse.ArgumentParser(
108
+ description="Run the MCP ReAct agent to play text adventure games",
109
+ formatter_class=argparse.RawDescriptionHelpFormatter,
110
+ epilog=f"""
111
+ Examples:
112
+ python run_agent.py # Play Zork 1 with example agent
113
+ python run_agent.py --game advent # Play Adventure
114
+ python run_agent.py --agent hidden_submission # Use hidden agent
115
+ python run_agent.py --list-games # List all games
116
+ python run_agent.py --list-agents # List all agent folders
117
+ python run_agent.py -v # Verbose output
118
+ """
119
+ )
120
+
121
+ # Get available games for help text
122
+ available_games = list_available_games()
123
+ game_help = f"Game to play (default: zork1). {len(available_games)} games available."
124
+ agent_help = f"Agent folder to use (default: example_submission). Available: {', '.join(agent_folders)}"
125
+
126
+ parser.add_argument(
127
+ "--agent", "-a",
128
+ type=str,
129
+ default="example_submission",
130
+ help=agent_help
131
+ )
132
+ parser.add_argument(
133
+ "--game", "-g",
134
+ type=str,
135
+ default="lostpig",
136
+ help=game_help
137
+ )
138
+ parser.add_argument(
139
+ "--list-games",
140
+ action="store_true",
141
+ help="List all available games and exit"
142
+ )
143
+ parser.add_argument(
144
+ "--list-agents",
145
+ action="store_true",
146
+ help="List all available agent folders and exit"
147
+ )
148
+ parser.add_argument(
149
+ "--max-steps", "-n",
150
+ type=int,
151
+ default=100,
152
+ help="Maximum number of steps to run (default: 100)"
153
+ )
154
+ parser.add_argument(
155
+ "--verbose", "-v",
156
+ action="store_true",
157
+ help="Show detailed reasoning from the agent"
158
+ )
159
+
160
+ args = parser.parse_args()
161
+
162
+ # Handle --list-agents
163
+ if args.list_agents:
164
+ print(f"\nAvailable agent folders ({len(agent_folders)} total):\n")
165
+ for folder in agent_folders:
166
+ print(f" {folder}/")
167
+ print("\nEach folder must contain agent.py and mcp_server.py")
168
+ print()
169
+ sys.exit(0)
170
+
171
+ # Handle --list-games
172
+ if args.list_games:
173
+ print(f"\nAvailable games ({len(available_games)} total):\n")
174
+ # Print in columns
175
+ cols = 5
176
+ for i in range(0, len(available_games), cols):
177
+ row = available_games[i:i+cols]
178
+ print(" " + " ".join(f"{g:<15}" for g in row))
179
+ print()
180
+ sys.exit(0)
181
+
182
+ # Validate agent choice
183
+ if args.agent not in agent_folders:
184
+ print(f"\nError: Unknown agent folder '{args.agent}'")
185
+ print(f"Available: {', '.join(agent_folders)}")
186
+ print("Use --list-agents to see details.")
187
+ sys.exit(1)
188
+
189
+ # Validate game choice
190
+ if args.game.lower() not in available_games:
191
+ print(f"\nError: Unknown game '{args.game}'")
192
+ print(f"Use --list-games to see {len(available_games)} available options.")
193
+ sys.exit(1)
194
+
195
+ print("\n" + "=" * 60)
196
+ print("Text Adventure MCP Agent Runner")
197
+ print("=" * 60)
198
+ print(f"Agent: {args.agent}/")
199
+ print(f"Game: {args.game}")
200
+ print(f"Max Steps: {args.max_steps}")
201
+ print(f"Verbose: {args.verbose}")
202
+
203
+ # Run the agent
204
+ try:
205
+ results = asyncio.run(run_mcp_agent(args))
206
+
207
+ except FileNotFoundError as e:
208
+ print(f"\n[Error] {e}")
209
+ sys.exit(1)
210
+ except ValueError as e:
211
+ print(f"\n[Error] {e}")
212
+ print("\nTo fix this:")
213
+ print("1. Copy .env.example to .env")
214
+ print("2. Add your HuggingFace token (HF_TOKEN)")
215
+ sys.exit(1)
216
+ except ImportError as e:
217
+ print(f"\n[Import Error] {e}")
218
+ print("\nMake sure to install dependencies:")
219
+ print(" pip install -r requirements.txt")
220
+ sys.exit(1)
221
+
222
+ return results
223
+
224
+
225
+ if __name__ == "__main__":
226
+ main()