nathanael-fijalkow commited on
Commit
e1da269
·
0 Parent(s):

Initial template

Browse files
Files changed (6) hide show
  1. .gitignore +22 -0
  2. README.md +59 -0
  3. agent.py +279 -0
  4. app.py +71 -0
  5. mcp_server.py +209 -0
  6. requirements.txt +9 -0
.gitignore ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ *.egg-info/
8
+ dist/
9
+ build/
10
+
11
+ # Environment
12
+ .env
13
+ .venv/
14
+ venv/
15
+
16
+ # IDE
17
+ .vscode/
18
+ .idea/
19
+
20
+ # OS
21
+ .DS_Store
22
+ Thumbs.db
README.md ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Text Adventure Agent Submission
3
+ emoji: "\U0001F5FA"
4
+ colorFrom: green
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: "5.0.0"
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ ---
12
+
13
+ # Text Adventure Agent Submission
14
+
15
+ ## Overview
16
+
17
+ This is my submission for the Text Adventure Agent assignment. My agent uses the ReAct pattern to play text adventure games via MCP.
18
+
19
+ ## Approach
20
+
21
+ <!-- Describe your approach here -->
22
+
23
+ - What strategy does your agent use?
24
+ - What tools did you implement in your MCP server?
25
+ - Any interesting techniques or optimizations?
26
+
27
+ ## Files
28
+
29
+ | File | Description |
30
+ |------|-------------|
31
+ | `agent.py` | ReAct agent with `StudentAgent` class |
32
+ | `mcp_server.py` | MCP server with game interaction tools |
33
+ | `app.py` | Gradio interface for HF Space |
34
+ | `requirements.txt` | Additional dependencies |
35
+
36
+ ## How to Submit
37
+
38
+ 1. Fork the template Space: `https://huggingface.co/spaces/LLM-course/text-adventure-template`
39
+ 2. Clone your fork locally
40
+ 3. Implement your agent in `agent.py` and `mcp_server.py`
41
+ 4. Test locally (see below)
42
+ 5. Push your changes to your Space
43
+ 6. Submit your Space URL on the course platform
44
+
45
+ ## Local Testing
46
+
47
+ ```bash
48
+ # Install dependencies
49
+ pip install -r requirements.txt
50
+
51
+ # Test the MCP server interactively
52
+ fastmcp dev mcp_server.py
53
+
54
+ # Run your agent on a game
55
+ python run_agent.py --agent . --game lostpig -v -n 20
56
+
57
+ # Run evaluation
58
+ python -m evaluation.evaluate -s . -g lostpig -t 3
59
+ ```
agent.py ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Student Agent for Text Adventure Games
3
+
4
+ This is your submission file. Implement the StudentAgent class to play
5
+ text adventure games using the MCP server you also implement.
6
+
7
+ Your agent should:
8
+ 1. Connect to the MCP server via the provided client
9
+ 2. Use the ReAct pattern (Thought -> Action -> Observation)
10
+ 3. Call MCP tools to interact with the game
11
+ 4. Maximize the game score within the step limit
12
+
13
+ Required method:
14
+ async def run(self, client, game, max_steps, seed, verbose) -> RunResult
15
+
16
+ The 'client' is a FastMCP Client already connected to your MCP server.
17
+ Use it to call tools like: await client.call_tool("play_action", {"action": "look"})
18
+
19
+ Tips:
20
+ - Start by looking around and understanding your environment
21
+ - Keep track of visited locations to avoid loops
22
+ - Pick up useful items (lamp, sword, etc.)
23
+ - The seed parameter should be used to set your LLM's seed for reproducibility
24
+ """
25
+
26
+ import json
27
+ import os
28
+ import re
29
+ from dataclasses import dataclass, field
30
+ from typing import Optional
31
+
32
+ from dotenv import load_dotenv
33
+ from huggingface_hub import InferenceClient
34
+
35
+ # Load environment variables
36
+ load_dotenv()
37
+
38
+ # =============================================================================
39
+ # LLM Configuration - DO NOT MODIFY
40
+ # =============================================================================
41
+
42
+ # Model to use (fixed for fair evaluation)
43
+ LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"
44
+
45
+ # Initialize the LLM client (uses HF_TOKEN from environment)
46
+ _hf_token = os.getenv("HF_TOKEN")
47
+ if not _hf_token:
48
+ raise ValueError("HF_TOKEN not found. Set it in your .env file.")
49
+
50
+ LLM_CLIENT = InferenceClient(token=_hf_token)
51
+
52
+
53
+ def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str:
54
+ """
55
+ Call the LLM with the given prompt. Use this function in your agent.
56
+
57
+ Args:
58
+ prompt: The user prompt (current game state, history, etc.)
59
+ system_prompt: The system prompt (instructions for the agent)
60
+ seed: Random seed for reproducibility
61
+ max_tokens: Maximum tokens in response (default: 300)
62
+
63
+ Returns:
64
+ The LLM's response text
65
+
66
+ Example:
67
+ response = call_llm(
68
+ prompt="You are in a forest. What do you do?",
69
+ system_prompt=SYSTEM_PROMPT,
70
+ seed=42,
71
+ )
72
+ """
73
+ messages = [
74
+ {"role": "system", "content": system_prompt},
75
+ {"role": "user", "content": prompt},
76
+ ]
77
+
78
+ response = LLM_CLIENT.chat.completions.create(
79
+ model=LLM_MODEL,
80
+ messages=messages,
81
+ temperature=0.0, # Deterministic for reproducibility
82
+ max_tokens=max_tokens,
83
+ seed=seed,
84
+ )
85
+
86
+ return response.choices[0].message.content
87
+
88
+
89
+ @dataclass
90
+ class RunResult:
91
+ """Result of running the agent. Do not modify this class."""
92
+ final_score: int
93
+ max_score: int
94
+ moves: int
95
+ locations_visited: set[str]
96
+ game_completed: bool
97
+ error: Optional[str] = None
98
+ history: list[tuple[str, str, str]] = field(default_factory=list)
99
+
100
+
101
+ # =============================================================================
102
+ # System Prompt - Customize this for your agent
103
+ # =============================================================================
104
+
105
+ SYSTEM_PROMPT = """You are playing a classic text adventure game.
106
+
107
+ GOAL: Explore the world, solve puzzles, and maximize your score.
108
+
109
+ AVAILABLE TOOLS (use via MCP):
110
+ - play_action: Execute a game command (north, take lamp, open mailbox, etc.)
111
+ - memory: Get current game state and history (if implemented)
112
+ - inventory: Check what you're carrying (if implemented)
113
+
114
+ VALID GAME COMMANDS for play_action:
115
+ - Movement: north, south, east, west, up, down, enter, exit
116
+ - Objects: take <item>, drop <item>, open <thing>, close <thing>, examine <thing>
117
+ - Other: look, inventory, read <thing>, turn on lamp
118
+
119
+ RESPOND IN THIS EXACT FORMAT (no markdown):
120
+ THOUGHT: <your reasoning about what to do next>
121
+ TOOL: <tool_name>
122
+ ARGS: <JSON arguments, e.g., {"action": "look"}>
123
+
124
+ Example:
125
+ THOUGHT: I should look around to see where I am.
126
+ TOOL: play_action
127
+ ARGS: {"action": "look"}
128
+ """
129
+
130
+
131
+ # =============================================================================
132
+ # Student Agent - IMPLEMENT THIS CLASS
133
+ # =============================================================================
134
+
135
+ class StudentAgent:
136
+ """
137
+ Your ReAct agent implementation.
138
+
139
+ TODO:
140
+ 1. Implement the run() method with the ReAct loop
141
+ 2. Parse LLM responses to extract tool calls
142
+ 3. Track state and avoid loops
143
+
144
+ Use the provided call_llm() function to interact with the LLM.
145
+ """
146
+
147
+ def __init__(self):
148
+ """Initialize your agent here."""
149
+ # TODO: Initialize any state tracking you need
150
+ # self.history = []
151
+ # self.visited_locations = set()
152
+ pass
153
+
154
+ async def run(
155
+ self,
156
+ client, # FastMCP Client connected to your MCP server
157
+ game: str,
158
+ max_steps: int,
159
+ seed: int,
160
+ verbose: bool = False,
161
+ ) -> RunResult:
162
+ """
163
+ Run the agent for a game session.
164
+
165
+ Args:
166
+ client: FastMCP Client connected to your MCP server
167
+ game: Name of the game being played (e.g., "zork1")
168
+ max_steps: Maximum number of steps to take
169
+ seed: Random seed for reproducibility (use for LLM calls)
170
+ verbose: Whether to print detailed output
171
+
172
+ Returns:
173
+ RunResult with final score and statistics
174
+ """
175
+ # TODO: Implement your ReAct loop here
176
+ #
177
+ # Basic structure:
178
+ # 1. Get initial observation (call play_action with "look")
179
+ # 2. Loop for max_steps:
180
+ # a. Build prompt with current observation and history
181
+ # b. Call LLM to get thought and action
182
+ # c. Parse the response to extract tool and args
183
+ # d. Call the tool via client.call_tool(tool_name, args)
184
+ # e. Update history and state
185
+ # f. Check for game over
186
+ # 3. Return RunResult with final statistics
187
+
188
+ # Example of calling a tool:
189
+ # result = await client.call_tool("play_action", {"action": "look"})
190
+ # observation = result[0].text if result else "No response"
191
+
192
+ # Example of calling the LLM:
193
+ # response = call_llm(
194
+ # prompt="Current observation: " + observation,
195
+ # system_prompt=SYSTEM_PROMPT,
196
+ # seed=seed,
197
+ # )
198
+
199
+ # Placeholder implementation - replace with your code
200
+ locations_visited = set()
201
+ history = []
202
+ final_score = 0
203
+ moves = 0
204
+
205
+ # TODO: Your implementation here
206
+ # ...
207
+
208
+ return RunResult(
209
+ final_score=final_score,
210
+ max_score=350, # Zork1 max score, adjust if needed
211
+ moves=moves,
212
+ locations_visited=locations_visited,
213
+ game_completed=False,
214
+ history=history,
215
+ )
216
+
217
+ def _build_prompt(self, observation: str, history: list) -> str:
218
+ """
219
+ Build the prompt for the LLM.
220
+
221
+ TODO: Implement this to create effective prompts
222
+ """
223
+ # TODO: Combine system prompt, history, and current observation
224
+ pass
225
+
226
+ def _parse_response(self, response: str) -> tuple[str, str, dict]:
227
+ """
228
+ Parse LLM response to extract thought, tool name, and arguments.
229
+
230
+ TODO: Implement robust parsing
231
+
232
+ Returns:
233
+ Tuple of (thought, tool_name, args_dict)
234
+ """
235
+ # TODO: Parse the response format:
236
+ # THOUGHT: ...
237
+ # TOOL: ...
238
+ # ARGS: {...}
239
+ pass
240
+
241
+ def _call_llm(self, prompt: str, system_prompt: str, seed: int) -> str:
242
+ """
243
+ Call the LLM with the given prompt.
244
+
245
+ This is a convenience wrapper - you can also use call_llm() directly.
246
+ """
247
+ return call_llm(prompt, system_prompt, seed)
248
+
249
+
250
+ # =============================================================================
251
+ # For local testing
252
+ # =============================================================================
253
+
254
+ async def test_agent():
255
+ """Test the agent locally."""
256
+ from fastmcp import Client
257
+
258
+ # Path to your MCP server
259
+ server_path = "mcp_server.py"
260
+
261
+ agent = StudentAgent()
262
+
263
+ async with Client(server_path) as client:
264
+ result = await agent.run(
265
+ client=client,
266
+ game="zork1",
267
+ max_steps=10,
268
+ seed=42,
269
+ verbose=True,
270
+ )
271
+
272
+ print(f"\nFinal Score: {result.final_score}")
273
+ print(f"Moves: {result.moves}")
274
+ print(f"Locations: {result.locations_visited}")
275
+
276
+
277
+ if __name__ == "__main__":
278
+ import asyncio
279
+ asyncio.run(test_agent())
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Hugging Face Space - Text Adventure Agent Submission
3
+
4
+ This is a code-only Space for submitting your agent implementation.
5
+ The evaluation is run separately.
6
+
7
+ Files in this submission:
8
+ - agent.py: Your ReAct agent implementation
9
+ - mcp_server.py: Your MCP server implementation
10
+ - requirements.txt: Additional dependencies
11
+
12
+ To test locally:
13
+ fastmcp dev mcp_server.py
14
+ python agent.py
15
+ """
16
+
17
+ import gradio as gr
18
+ from pathlib import Path
19
+
20
+
21
+ def read_readme():
22
+ """Read the README content."""
23
+ readme_path = Path(__file__).parent / "README.md"
24
+ if readme_path.exists():
25
+ return readme_path.read_text()
26
+ return "# Submission\n\nNo README.md found."
27
+
28
+
29
+ def read_file_content(filename: str) -> str:
30
+ """Read a source file's content."""
31
+ file_path = Path(__file__).parent / filename
32
+ if file_path.exists():
33
+ return file_path.read_text()
34
+ return f"# File not found: {filename}"
35
+
36
+
37
+ # Create the Gradio interface
38
+ with gr.Blocks(title="Text Adventure Agent Submission") as demo:
39
+ gr.Markdown("# Text Adventure Agent Submission")
40
+ gr.Markdown(
41
+ "This Space contains a student submission for the Text Adventure Agent assignment. "
42
+ "Use the tabs below to view the submitted code."
43
+ )
44
+
45
+ with gr.Tabs():
46
+ with gr.Tab("README"):
47
+ gr.Markdown(read_readme())
48
+
49
+ with gr.Tab("Agent Code"):
50
+ gr.Code(
51
+ value=read_file_content("agent.py"),
52
+ language="python",
53
+ label="agent.py",
54
+ )
55
+
56
+ with gr.Tab("MCP Server Code"):
57
+ gr.Code(
58
+ value=read_file_content("mcp_server.py"),
59
+ language="python",
60
+ label="mcp_server.py",
61
+ )
62
+
63
+ gr.Markdown(
64
+ "---\n"
65
+ "**Note:** This is a code submission Space. "
66
+ "Evaluation is performed using the evaluation script."
67
+ )
68
+
69
+
70
+ if __name__ == "__main__":
71
+ demo.launch()
mcp_server.py ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Student MCP Server for Text Adventure Games
3
+
4
+ This is your MCP server submission. Implement the tools that your agent
5
+ will use to play text adventure games.
6
+
7
+ Required tool:
8
+ play_action(action: str) -> str
9
+ Execute a game command and return the result.
10
+
11
+ Recommended tools:
12
+ memory() -> str
13
+ Return current game state, score, and recent history.
14
+
15
+ inventory() -> str
16
+ Return the player's current inventory.
17
+
18
+ get_map() -> str
19
+ Return a map of explored locations.
20
+
21
+ Test your server with:
22
+ fastmcp dev submission_template/mcp_server.py
23
+
24
+ Then open the MCP Inspector in your browser to test the tools interactively.
25
+ """
26
+
27
+ import sys
28
+ import os
29
+
30
+ # Add parent directory to path to import games module
31
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
32
+
33
+ from fastmcp import FastMCP
34
+ from games.zork_env import TextAdventureEnv
35
+
36
+
37
+ # =============================================================================
38
+ # Create the MCP Server
39
+ # =============================================================================
40
+
41
+ mcp = FastMCP("Student Text Adventure Server")
42
+
43
+
44
+ # =============================================================================
45
+ # Game State Management
46
+ # =============================================================================
47
+
48
+ class GameManager:
49
+ """
50
+ Manages the text adventure game state.
51
+
52
+ TODO: Extend this class to track:
53
+ - Action history (for memory tool)
54
+ - Explored locations (for mapping)
55
+ - Current score and moves
56
+ """
57
+
58
+ def __init__(self):
59
+ self.env: TextAdventureEnv = None
60
+ self.state = None
61
+ self.game_name: str = ""
62
+ # TODO: Add more state tracking
63
+ # self.history: list[tuple[str, str]] = []
64
+ # self.explored_locations: dict[str, set[str]] = {}
65
+ # self.current_location: str = ""
66
+
67
+ def initialize(self, game: str = "zork1"):
68
+ """Initialize or reset the game."""
69
+ self.game_name = game
70
+ self.env = TextAdventureEnv(game)
71
+ self.state = self.env.reset()
72
+ # TODO: Reset your state tracking here
73
+ return self.state.observation
74
+
75
+ def step(self, action: str) -> str:
76
+ """Execute an action and return the result."""
77
+ if self.env is None:
78
+ self.initialize()
79
+
80
+ self.state = self.env.step(action)
81
+
82
+ # TODO: Update your state tracking here
83
+ # self.history.append((action, self.state.observation))
84
+ # Update location tracking, etc.
85
+
86
+ return self.state.observation
87
+
88
+ def get_score(self) -> int:
89
+ """Get current score."""
90
+ return self.state.score if self.state else 0
91
+
92
+ def get_moves(self) -> int:
93
+ """Get number of moves taken."""
94
+ return self.state.moves if self.state else 0
95
+
96
+
97
+ # Global game manager
98
+ _game = GameManager()
99
+
100
+
101
+ def get_game() -> GameManager:
102
+ """Get or initialize the game manager."""
103
+ global _game
104
+ if _game.env is None:
105
+ # Get game from environment variable (set by evaluator)
106
+ game = os.environ.get("GAME", "zork1")
107
+ _game.initialize(game)
108
+ return _game
109
+
110
+
111
+ # =============================================================================
112
+ # MCP Tools - IMPLEMENT THESE
113
+ # =============================================================================
114
+
115
+ @mcp.tool()
116
+ def play_action(action: str) -> str:
117
+ """
118
+ Execute a game command and return the result.
119
+
120
+ This is the main tool for interacting with the game.
121
+
122
+ Args:
123
+ action: The command to execute (e.g., "north", "take lamp", "open mailbox")
124
+
125
+ Returns:
126
+ The game's response to the action
127
+
128
+ Valid commands include:
129
+ - Movement: north, south, east, west, up, down, enter, exit
130
+ - Objects: take <item>, drop <item>, open <thing>, examine <thing>
131
+ - Other: look, inventory, read <thing>, turn on lamp
132
+ """
133
+ game = get_game()
134
+
135
+ # TODO: You might want to add action validation here
136
+ # TODO: You might want to include score changes in the response
137
+
138
+ result = game.step(action)
139
+
140
+ # Optional: Append score info
141
+ # result += f"\n[Score: {game.get_score()} | Moves: {game.get_moves()}]"
142
+
143
+ return result
144
+
145
+
146
+ # TODO: Implement additional tools to help your agent
147
+
148
+ # @mcp.tool()
149
+ # def memory() -> str:
150
+ # """
151
+ # Get the current game state summary.
152
+ #
153
+ # Returns:
154
+ # A summary including current location, score, moves, and recent history
155
+ # """
156
+ # game = get_game()
157
+ # # TODO: Return useful state information
158
+ # pass
159
+
160
+
161
+ # @mcp.tool()
162
+ # def inventory() -> str:
163
+ # """
164
+ # Check what the player is carrying.
165
+ #
166
+ # Returns:
167
+ # List of items in the player's inventory
168
+ # """
169
+ # game = get_game()
170
+ # result = game.step("inventory")
171
+ # return result
172
+
173
+
174
+ # @mcp.tool()
175
+ # def get_map() -> str:
176
+ # """
177
+ # Get a map of explored locations.
178
+ #
179
+ # Returns:
180
+ # A text representation of explored locations and connections
181
+ # """
182
+ # game = get_game()
183
+ # # TODO: Return map of explored locations
184
+ # pass
185
+
186
+
187
+ # @mcp.tool()
188
+ # def get_valid_actions() -> str:
189
+ # """
190
+ # Get a list of likely valid actions from the current location.
191
+ #
192
+ # Returns:
193
+ # List of actions that might work here
194
+ # """
195
+ # # This is a hint: Jericho provides get_valid_actions()
196
+ # game = get_game()
197
+ # if game.env and game.env.env:
198
+ # valid = game.env.env.get_valid_actions()
199
+ # return "Valid actions: " + ", ".join(valid[:20])
200
+ # return "Could not determine valid actions"
201
+
202
+
203
+ # =============================================================================
204
+ # Run the server
205
+ # =============================================================================
206
+
207
+ if __name__ == "__main__":
208
+ # This runs the server with stdio transport (for MCP clients)
209
+ mcp.run()
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # Required for HF Space display
2
+ gradio>=4.0.0
3
+
4
+ # Agent dependencies (these are provided by the evaluation infrastructure)
5
+ # Do not add jericho, fastmcp, or huggingface_hub here - they are already installed
6
+
7
+ # Add any additional packages your agent needs below:
8
+ # numpy
9
+ # requests