nathanael-fijalkow commited on
Commit
7e0ba48
·
verified ·
1 Parent(s): 1c650c1

Full project upload

Browse files
.env.example ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Hugging Face Hub Configuration
2
+ HF_TOKEN=your_huggingface_token_here
3
+
4
+ # Model Configuration
5
+ # Default model for all modes (react, function, mcp)
6
+ HF_MODEL=meta-llama/Llama-3.2-3B-Instruct
7
+
8
+ # Alternative models to try:
9
+ # HF_MODEL=google/gemma-2-2b-it
10
+ # HF_MODEL=Qwen/Qwen2.5-7B-Instruct
11
+
12
+ # Optional API Keys (if using other providers)
13
+ # ANTHROPIC_API_KEY=your_anthropic_key_here
14
+ # OPENAI_API_KEY=your_openai_key_here
.gitignore ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ master.zip
2
+ .github/
3
+
4
+ # Byte-compiled / optimized / DLL files
5
+ __pycache__/
6
+ *.py[cod]
7
+ *$py.class
8
+
9
+ # C extensions
10
+ *.so
11
+
12
+ # Distribution / packaging
13
+ .Python
14
+ build/
15
+ develop-eggs/
16
+ dist/
17
+ downloads/
18
+ eggs/
19
+ .eggs/
20
+ lib/
21
+ lib64/
22
+ parts/
23
+ sdist/
24
+ var/
25
+ wheels/
26
+ pip-wheel-metadata/
27
+ share/python-wheels/
28
+ *.egg-info/
29
+ .installed.cfg
30
+ *.egg
31
+ MANIFEST
32
+
33
+ # PyInstaller
34
+ *.manifest
35
+ *.spec
36
+
37
+ # Installer logs
38
+ pip-log.txt
39
+ pip-delete-this-directory.txt
40
+
41
+ # Unit test / coverage reports
42
+ htmlcov/
43
+ .tox/
44
+ .nox/
45
+ .coverage
46
+ .coverage.*
47
+ .cache
48
+ nosetests.xml
49
+ coverage.xml
50
+ *.cover
51
+ *.py,cover
52
+ .hypothesis/
53
+ .pytest_cache/
54
+
55
+ # Translations
56
+ *.mo
57
+ *.pot
58
+
59
+ # Django stuff:
60
+ *.log
61
+ local_settings.py
62
+ db.sqlite3
63
+ db.sqlite3-journal
64
+
65
+ # Flask stuff:
66
+ instance/
67
+ .webassets-cache
68
+
69
+ # Scrapy stuff:
70
+ .scrapy
71
+
72
+ # Sphinx documentation
73
+ docs/_build/
74
+
75
+ # PyBuilder
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ .python-version
87
+
88
+ # pipenv
89
+ Pipfile.lock
90
+
91
+ # PEP 582
92
+ __pypackages__/
93
+
94
+ # Celery stuff
95
+ celerybeat-schedule
96
+ celerybeat.pid
97
+
98
+ # SageMath parsed files
99
+ *.sage.py
100
+
101
+ # Environments
102
+ .env
103
+ .venv
104
+ env/
105
+ venv/
106
+ ENV/
107
+ env.bak/
108
+ venv.bak/
109
+
110
+ # Spyder project settings
111
+ .spyderproject
112
+ .spyproject
113
+
114
+ # Rope project settings
115
+ .ropeproject
116
+
117
+ # mkdocs documentation
118
+ /site
119
+
120
+ # mypy
121
+ .mypy_cache/
122
+ .dmypy.json
123
+ dmypy.json
124
+
125
+ # Pyre type checker
126
+ .pyre/
127
+
128
+ # macOS
129
+ .DS_Store
130
+ .AppleDouble
131
+ .LSOverride
132
+ ._*
133
+ .Spotlight-V100
134
+ .Trashes
135
+
136
+ # IDE
137
+ .vscode/
138
+ .idea/
139
+ *.swp
140
+ *.swo
141
+ *~
142
+
143
+ # Game files
144
+ z-machine-games-master/
145
+ *.z3
146
+ *.z4
147
+ *.z5
148
+ *.z8
149
+
150
+ # Temp files
151
+ .mcp_config_temp.json
.gitmodules ADDED
File without changes
README.md CHANGED
@@ -10,8 +10,206 @@ pinned: false
10
  license: mit
11
  ---
12
 
13
- # Playing Zork has never been so boring
14
 
15
- In this assignment, you will build an AI Agent and an MCP server to play text adventure games like Zork.
16
 
17
- See the instructions below to get started!
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  license: mit
11
  ---
12
 
13
+ # Text Adventure LLM Agent Project
14
 
15
+ Build AI agents to play classic text adventure games (Zork, Colossal Cave, Enchanter, etc.) using the Model Context Protocol (MCP) and HuggingFace models.
16
 
17
+ ## Overview
18
+
19
+ This project provides:
20
+
21
+ 1. **MCP Server** - Exposes text adventure games as MCP tools using FastMCP
22
+ 2. **ReAct Agent** - An agent that uses MCP tools to play games with reasoning
23
+ 3. **Templates** - Starter code for students to implement their own solutions
24
+ 4. **57 Games** - Zork trilogy, Infocom classics, and many more Z-machine games
25
+
26
+ ## Architecture
27
+
28
+ ```
29
+ +-------------------+ MCP Protocol +------------------+
30
+ | | <------------------> | |
31
+ | ReAct Agent | (tool calls) | MCP Server |
32
+ | (FastMCP Client)| | (FastMCP) |
33
+ | | | |
34
+ +-------------------+ +------------------+
35
+ | |
36
+ | LLM API | Game API
37
+ v v
38
+ +-------------------+ +------------------+
39
+ | HuggingFace | | Text Adventure |
40
+ | Inference API | | (Jericho) |
41
+ +-------------------+ +------------------+
42
+ ```
43
+
44
+ ## Quick Start
45
+
46
+ ### 1. Setup
47
+
48
+ ```bash
49
+ # Create virtual environment (using uv recommended)
50
+ uv venv
51
+ source .venv/bin/activate
52
+
53
+ # Install dependencies
54
+ uv pip install -r requirements.txt
55
+
56
+ # Configure environment
57
+ cp .env.example .env
58
+ # Edit .env and add your HuggingFace token (HF_TOKEN)
59
+ ```
60
+
61
+ Get your HuggingFace token at: https://huggingface.co/settings/tokens
62
+
63
+ ### 2. Run an Agent
64
+
65
+ ```bash
66
+ # MCP mode (recommended) - uses FastMCP Client
67
+ python run_agent.py --mode mcp
68
+
69
+ # Basic ReAct agent (direct game interaction)
70
+ python run_agent.py --mode react
71
+
72
+ # Function calling mode
73
+ python run_agent.py --mode function --simple
74
+ ```
75
+
76
+ ## Project Structure
77
+
78
+ ```
79
+ .
80
+ +-- run_agent.py # Unified agent runner
81
+ +-- mcp_server/
82
+ | +-- zork_server.py # Full MCP server with all tools
83
+ +-- agents/
84
+ | +-- base_agent.py # Abstract base class
85
+ | +-- react_agent.py # Basic ReAct agent (no MCP)
86
+ | +-- mcp_react_agent.py # MCP-enabled ReAct agent
87
+ +-- templates/ # Student templates
88
+ | +-- README.md # Assignment instructions
89
+ | +-- mcp_server_template.py # MCP server starter
90
+ | +-- react_agent_template.py # Agent starter
91
+ +-- function_calling/ # Alternative: function calling
92
+ | +-- controller.py
93
+ | +-- simple_controller.py
94
+ | +-- tools.py
95
+ +-- games/
96
+ | +-- zork_env.py # Jericho wrapper
97
+ +-- z-machine-games-master/ # Game files
98
+ ```
99
+
100
+ ## Agent Modes
101
+
102
+ | Mode | Description | Command |
103
+ |------|-------------|---------|
104
+ | `mcp` | MCP ReAct agent (FastMCP Client) | `--mode mcp` |
105
+ | `react` | Basic ReAct (direct game) | `--mode react` |
106
+ | `function` | Function calling (API) | `--mode function` |
107
+ | `function --simple` | Function calling (text) | `--mode function --simple` |
108
+
109
+ ### Examples
110
+
111
+ ```bash
112
+ # Run MCP agent with verbose output
113
+ python run_agent.py --mode mcp -v
114
+
115
+ # Run with different model
116
+ python run_agent.py --mode mcp --model google/gemma-2-2b-it
117
+
118
+ # Limit steps
119
+ python run_agent.py --mode mcp -n 50
120
+
121
+ # Play different games
122
+ python run_agent.py --mode mcp --game zork2
123
+ python run_agent.py --mode mcp --game advent # Colossal Cave Adventure
124
+ python run_agent.py --mode mcp --game enchanter # Infocom classic
125
+ python run_agent.py --mode mcp --game hhgg # Hitchhiker's Guide
126
+
127
+ # List all 57 available games
128
+ python run_agent.py --list-games
129
+ ```
130
+
131
+ ## MCP Server Tools
132
+
133
+ The MCP server exposes these tools:
134
+
135
+ | Tool | Description |
136
+ |------|-------------|
137
+ | `play_action(action)` | Execute a game command (north, take lamp, etc.) |
138
+ | `memory()` | Get current state (location, score, history) |
139
+ | `get_map()` | View explored locations and connections |
140
+ | `inventory()` | Check items you're carrying |
141
+ | `valid_actions()` | Get command hints |
142
+ | `reset_game(game)` | Start over or switch games |
143
+ | `list_games()` | See all 57 available games |
144
+ | `hint()` | Get contextual hints |
145
+
146
+ ### Testing the MCP Server
147
+
148
+ ```bash
149
+ # Run server directly (stdio transport) - default game is zork1
150
+ python mcp_server/zork_server.py
151
+
152
+ # Run with a specific game
153
+ GAME=advent python mcp_server/zork_server.py
154
+
155
+ # Use MCP Inspector for interactive testing
156
+ npx @modelcontextprotocol/inspector python mcp_server/zork_server.py
157
+
158
+ # Use FastMCP dev mode
159
+ fastmcp dev mcp_server/zork_server.py
160
+ ```
161
+
162
+ ## Student Assignment
163
+
164
+ See [templates/README.md](templates/README.md) for the assignment.
165
+
166
+ Students implement:
167
+ 1. **MCP Server** (`mcp_server_template.py`) - Expose game functionality as MCP tools
168
+ 2. **ReAct Agent** (`react_agent_template.py`) - Play text adventures using MCP
169
+
170
+ ## Configuration
171
+
172
+ ### Environment Variables
173
+
174
+ Create `.env` from `.env.example`:
175
+
176
+ ```bash
177
+ # Required: HuggingFace token
178
+ HF_TOKEN=hf_your_token_here
179
+
180
+ # Optional: Model override (default: meta-llama/Llama-3.2-3B-Instruct)
181
+ HF_MODEL=meta-llama/Llama-3.2-3B-Instruct
182
+ ```
183
+
184
+ ### Recommended Models
185
+
186
+ | Model | Notes |
187
+ |-------|-------|
188
+ | `meta-llama/Llama-3.2-3B-Instruct` | Default, good balance |
189
+ | `google/gemma-2-2b-it` | Smaller, faster |
190
+ | `Qwen/Qwen2.5-7B-Instruct` | Good instruction following |
191
+
192
+ ## Evaluation
193
+
194
+ Run the evaluator to test agent performance:
195
+
196
+ ```bash
197
+ python evaluate.py --mode mcp --games zork1 --runs 3
198
+ ```
199
+
200
+ Metrics:
201
+ - **Score**: Points earned in-game
202
+ - **Score %**: Score / Max possible score
203
+ - **Steps**: Number of actions taken
204
+ - **Time**: Elapsed time
205
+
206
+ ## Resources
207
+
208
+ - [FastMCP Documentation](https://gofastmcp.com/)
209
+ - [MCP Protocol](https://modelcontextprotocol.io/)
210
+ - [Jericho (Text Adventures)](https://github.com/microsoft/jericho)
211
+ - [HuggingFace Inference API](https://huggingface.co/docs/huggingface_hub/guides/inference)
212
+
213
+ ## License
214
+
215
+ MIT
SPACES_README.md ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: Agentic Zork
3
+ emoji: "🎮"
4
+ colorFrom: green
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 5.0.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
+ ---
12
+
13
+ # Playing Zork has never been so boring
14
+
15
+ In this assignment, you will build an AI Agent and an MCP server to play text adventure games like Zork.
16
+
17
+ See the instructions below to get started!
agents/__init__.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from .base_agent import BaseAgent, AgentConfig
2
+ from .react_agent import ReActAgent, ReActConfig
3
+ from .mcp_react_agent import MCPReActAgent, MCPAgentConfig
4
+
5
+ __all__ = [
6
+ "BaseAgent", "AgentConfig",
7
+ "ReActAgent", "ReActConfig",
8
+ "MCPReActAgent", "MCPAgentConfig",
9
+ ]
agents/base_agent.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Base Agent Abstract Class
3
+
4
+ Defines the interface that all text adventure agents must implement.
5
+ """
6
+
7
+ from abc import ABC, abstractmethod
8
+ from dataclasses import dataclass
9
+ from games.zork_env import GameState
10
+
11
+
12
+ @dataclass
13
+ class AgentConfig:
14
+ """Configuration for an agent."""
15
+ name: str = "BaseAgent"
16
+ max_history: int = 20 # Maximum number of past interactions to remember
17
+ verbose: bool = False
18
+
19
+
20
+ class BaseAgent(ABC):
21
+ """
22
+ Abstract base class for text adventure agents.
23
+
24
+ Students should extend this class and implement the `choose_action` method.
25
+ """
26
+
27
+ def __init__(self, config: AgentConfig = None):
28
+ self.config = config or AgentConfig()
29
+ self.history: list[tuple[str, str, GameState]] = [] # (action, observation, state)
30
+
31
+ @abstractmethod
32
+ def choose_action(self, observation: str, game_state: GameState) -> str:
33
+ """
34
+ Choose the next action based on the current observation and game state.
35
+
36
+ Args:
37
+ observation: The text observation from the game
38
+ game_state: The current GameState object with score, inventory, etc.
39
+
40
+ Returns:
41
+ A string action to take in the game (e.g., "go north", "take lamp")
42
+ """
43
+ pass
44
+
45
+ def update_history(self, action: str, observation: str, game_state: GameState):
46
+ """
47
+ Update the agent's history after taking an action.
48
+
49
+ Args:
50
+ action: The action that was taken
51
+ observation: The resulting observation
52
+ game_state: The resulting game state
53
+ """
54
+ self.history.append((action, observation, game_state))
55
+
56
+ # Keep history bounded
57
+ if len(self.history) > self.config.max_history:
58
+ self.history = self.history[-self.config.max_history:]
59
+
60
+ def reset(self):
61
+ """Reset the agent's internal state for a new game."""
62
+ self.history = []
63
+
64
+ def get_history_text(self) -> str:
65
+ """Get a text summary of recent history for context."""
66
+ if not self.history:
67
+ return "No previous actions taken."
68
+
69
+ lines = []
70
+ for action, observation, state in self.history[-10:]: # Last 10 actions
71
+ lines.append(f"> {action}")
72
+ # Truncate long observations
73
+ obs_preview = observation[:200] + "..." if len(observation) > 200 else observation
74
+ lines.append(obs_preview)
75
+ lines.append(f"[Score: {state.score}, Moves: {state.moves}]")
76
+ lines.append("")
77
+
78
+ return "\n".join(lines)
agents/mcp_react_agent.py ADDED
@@ -0,0 +1,477 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ MCP ReAct Agent for Text Adventure Games
3
+
4
+ A production-ready ReAct agent that uses FastMCP Client to play text adventures via MCP tools.
5
+ This agent connects to the Text Adventure MCP server and uses the LLM to reason and act.
6
+
7
+ Features:
8
+ - FastMCP Client integration for MCP server communication
9
+ - ReAct loop (Thought -> Tool -> Observation)
10
+ - Loop detection and action validation
11
+ - History tracking and memory management
12
+ - Score tracking and game over detection
13
+ """
14
+
15
+ import asyncio
16
+ import json
17
+ import os
18
+ import re
19
+ import sys
20
+ from dataclasses import dataclass, field
21
+ from huggingface_hub import InferenceClient
22
+ from dotenv import load_dotenv
23
+ from fastmcp import Client
24
+ from fastmcp.client.transports import StdioTransport
25
+
26
+
27
+ @dataclass
28
+ class MCPAgentConfig:
29
+ """Configuration for the MCP ReAct agent."""
30
+ model: str = "meta-llama/Llama-3.2-3B-Instruct"
31
+ game: str = "zork1" # Default game to play
32
+ temperature: float = 0.7
33
+ max_tokens: int = 300
34
+ max_history: int = 10
35
+ verbose: bool = True
36
+
37
+
38
+ SYSTEM_PROMPT = """You are an expert text adventure game player. Your goal is to explore, collect treasures, and maximize your score.
39
+
40
+ AVAILABLE TOOLS (use these via MCP):
41
+ 1. play_action - Execute game commands (north, take lamp, open mailbox, etc.)
42
+ 2. memory - Get current game state, score, and recent history
43
+ 3. get_map - See explored locations and connections
44
+ 4. inventory - Check what you're carrying
45
+ 5. hint - Get a hint if stuck
46
+ 6. list_games - See available games
47
+ 7. reset_game - Switch to a different game
48
+
49
+ VALID GAME COMMANDS for play_action:
50
+ - Movement: north, south, east, west, up, down, enter, exit
51
+ - Objects: take <item>, drop <item>, open <thing>, close <thing>, examine <thing>
52
+ - Light: turn on lamp, turn off lamp
53
+ - Combat: attack <enemy> with <weapon>
54
+ - Other: inventory, look, read <thing>, wait
55
+
56
+ FORBIDDEN (will NOT work): check, inspect, search, grab, use, help
57
+
58
+ RESPOND IN THIS EXACT FORMAT (no markdown):
59
+ THOUGHT: <brief reasoning about what to do next>
60
+ TOOL: <tool_name>
61
+ ARGS: <JSON arguments>
62
+
63
+ Examples:
64
+ THOUGHT: I need to see what's around me.
65
+ TOOL: play_action
66
+ ARGS: {"action": "look"}
67
+
68
+ THOUGHT: Let me check my current state and score.
69
+ TOOL: memory
70
+ ARGS: {}
71
+
72
+ THOUGHT: The mailbox might contain something useful.
73
+ TOOL: play_action
74
+ ARGS: {"action": "open mailbox"}
75
+
76
+ STRATEGY:
77
+ 1. Start by looking around and checking memory
78
+ 2. Explore systematically - try all directions
79
+ 3. Pick up useful items (lamp, sword, etc.)
80
+ 4. Open containers (mailbox, window, etc.)
81
+ 5. Use get_map to avoid getting lost
82
+ 6. Turn on lamp before dark areas!
83
+
84
+ DO NOT repeat the same action multiple times in a row."""
85
+
86
+
87
+ class MCPReActAgent:
88
+ """
89
+ A ReAct agent that plays text adventure games using MCP tools via FastMCP Client.
90
+
91
+ This is the robust/production version with:
92
+ - Full MCP integration
93
+ - Loop detection
94
+ - Action validation
95
+ - Score tracking
96
+ """
97
+
98
+ def __init__(self, mcp_server_path: str, config: MCPAgentConfig = None):
99
+ """
100
+ Initialize the MCP ReAct agent.
101
+
102
+ Args:
103
+ mcp_server_path: Path to the MCP server script
104
+ config: Agent configuration
105
+ """
106
+ load_dotenv()
107
+
108
+ self.mcp_server_path = mcp_server_path
109
+ self.config = config or MCPAgentConfig()
110
+
111
+ # Override model from environment if set
112
+ env_model = os.getenv("HF_MODEL")
113
+ if env_model:
114
+ self.config.model = env_model
115
+
116
+ # Initialize LLM client
117
+ token = os.getenv("HF_TOKEN")
118
+ if not token:
119
+ raise ValueError("HF_TOKEN not found. Set it in your .env file.")
120
+ self.llm = InferenceClient(token=token)
121
+
122
+ # Agent state
123
+ self.history: list[dict] = []
124
+ self.thoughts: list[str] = []
125
+ self.score: int = 0
126
+ self.max_score: int = 350
127
+ self.recent_actions: list[str] = [] # For loop detection
128
+
129
+ async def run(self, max_steps: int = 100) -> dict:
130
+ """
131
+ Run the ReAct agent loop.
132
+
133
+ Args:
134
+ max_steps: Maximum number of steps to run
135
+
136
+ Returns:
137
+ Dictionary with game results
138
+ """
139
+ import time
140
+ start_time = time.time()
141
+ step = 0
142
+ game_over = False
143
+ game_name = self.config.game
144
+
145
+ print("=" * 60)
146
+ print(f"MCP ReAct Agent - Playing {game_name.upper()}")
147
+ print(f"Model: {self.config.model}")
148
+ print("=" * 60)
149
+
150
+ # Set game as environment variable for the server
151
+ env = os.environ.copy()
152
+ env["GAME"] = game_name
153
+
154
+ # Create transport with environment variables
155
+ transport = StdioTransport(
156
+ command=sys.executable,
157
+ args=[self.mcp_server_path],
158
+ env=env,
159
+ )
160
+
161
+ # Connect to MCP server with game environment
162
+ async with Client(transport) as client:
163
+ # List available tools
164
+ tools = await client.list_tools()
165
+ tool_names = [t.name for t in tools]
166
+ print(f"\nConnected to MCP server. Tools: {tool_names}")
167
+
168
+ # Get initial observation
169
+ result = await client.call_tool("play_action", {"action": "look"})
170
+ observation = self._extract_result(result)
171
+ print(f"\n{observation}\n")
172
+
173
+ # Parse initial score
174
+ self._update_score(observation)
175
+
176
+ # Main ReAct loop
177
+ for step in range(1, max_steps + 1):
178
+ print(f"\n{'─' * 50}")
179
+ print(f"Step {step}/{max_steps} | Score: {self.score}")
180
+ print("─" * 50)
181
+
182
+ # Build prompt with context
183
+ prompt = self._build_prompt(observation)
184
+
185
+ # Call LLM for reasoning
186
+ response = self._call_llm(prompt)
187
+
188
+ # Parse response
189
+ thought, tool_name, tool_args = self._parse_response(response, tool_names)
190
+
191
+ self.thoughts.append(thought)
192
+
193
+ if self.config.verbose:
194
+ print(f"\n[THOUGHT] {thought}")
195
+ print(f"[TOOL] {tool_name}({tool_args})")
196
+
197
+ # Validate and fix common issues
198
+ tool_name, tool_args = self._validate_tool_call(tool_name, tool_args, tool_names)
199
+
200
+ # Check for loops
201
+ if tool_name == "play_action":
202
+ action = tool_args.get("action", "look")
203
+ self.recent_actions.append(action)
204
+ if len(self.recent_actions) > 5:
205
+ self.recent_actions = self.recent_actions[-5:]
206
+
207
+ # Detect loops
208
+ if len(self.recent_actions) >= 3 and len(set(self.recent_actions[-3:])) == 1:
209
+ print(f"\n[WARNING] Loop detected - repeating '{action}'")
210
+ # Force a different action
211
+ tool_args = {"action": "look"}
212
+ self.recent_actions.append("look")
213
+
214
+ # Execute tool via MCP
215
+ try:
216
+ result = await client.call_tool(tool_name, tool_args)
217
+ observation = self._extract_result(result)
218
+ print(f"\n{observation}")
219
+ except Exception as e:
220
+ observation = f"Error executing tool: {e}"
221
+ print(f"\n[ERROR] {e}")
222
+
223
+ # Update history
224
+ self.history.append({
225
+ "step": step,
226
+ "thought": thought,
227
+ "tool": tool_name,
228
+ "args": tool_args,
229
+ "result": observation[:200]
230
+ })
231
+ if len(self.history) > self.config.max_history:
232
+ self.history = self.history[-self.config.max_history:]
233
+
234
+ # Update score
235
+ self._update_score(observation)
236
+
237
+ # Check for game over
238
+ if self._is_game_over(observation):
239
+ game_over = True
240
+ print("\n" + "=" * 60)
241
+ print("GAME OVER!")
242
+ break
243
+
244
+ elapsed_time = time.time() - start_time
245
+
246
+ # Print summary
247
+ return self._print_summary(step, elapsed_time, game_over)
248
+
249
+ def _build_prompt(self, observation: str) -> str:
250
+ """Build the prompt for the LLM with context."""
251
+ parts = []
252
+
253
+ # Score info
254
+ parts.append(f"Current Score: {self.score}/{self.max_score}")
255
+
256
+ # Recent history (compact)
257
+ if self.history:
258
+ parts.append("\nRecent actions:")
259
+ for entry in self.history[-3:]:
260
+ action = entry.get("args", {}).get("action", entry["tool"])
261
+ result_short = entry["result"][:80] + "..." if len(entry["result"]) > 80 else entry["result"]
262
+ parts.append(f" > {action} -> {result_short}")
263
+
264
+ # Warn about repeated actions
265
+ if self.recent_actions and len(set(self.recent_actions[-3:])) == 1:
266
+ parts.append(f"\n[WARNING: You've been doing '{self.recent_actions[-1]}' repeatedly. TRY SOMETHING DIFFERENT!]")
267
+
268
+ # Current observation
269
+ parts.append(f"\nCurrent situation:\n{observation}")
270
+ parts.append("\nWhat do you do next?")
271
+
272
+ return "\n".join(parts)
273
+
274
+ def _call_llm(self, prompt: str) -> str:
275
+ """Call the LLM for reasoning."""
276
+ try:
277
+ messages = [
278
+ {"role": "system", "content": SYSTEM_PROMPT},
279
+ {"role": "user", "content": prompt}
280
+ ]
281
+
282
+ response = self.llm.chat.completions.create(
283
+ model=self.config.model,
284
+ messages=messages,
285
+ temperature=self.config.temperature,
286
+ max_tokens=self.config.max_tokens,
287
+ )
288
+ return response.choices[0].message.content
289
+ except Exception as e:
290
+ print(f"[LLM Error] {e}")
291
+ return "THOUGHT: LLM error, trying look.\nTOOL: play_action\nARGS: {\"action\": \"look\"}"
292
+
293
+ def _parse_response(self, response: str, valid_tools: list[str]) -> tuple[str, str, dict]:
294
+ """Parse the LLM response to extract thought, tool, and arguments."""
295
+ thought = "No reasoning provided"
296
+ tool_name = "play_action"
297
+ tool_args = {"action": "look"}
298
+
299
+ lines = response.strip().split("\n")
300
+
301
+ for i, line in enumerate(lines):
302
+ line_clean = line.strip()
303
+ line_upper = line_clean.upper()
304
+
305
+ if line_upper.startswith("THOUGHT:"):
306
+ thought = line_clean.split(":", 1)[1].strip()
307
+
308
+ elif line_upper.startswith("TOOL:"):
309
+ raw_tool = line_clean.split(":", 1)[1].strip().lower()
310
+ # Clean up common issues
311
+ raw_tool = raw_tool.replace("**", "").replace("*", "").replace("`", "")
312
+ raw_tool = raw_tool.split()[0] if raw_tool else "play_action"
313
+ tool_name = raw_tool
314
+
315
+ elif line_upper.startswith("ARGS:"):
316
+ args_part = line_clean.split(":", 1)[1].strip()
317
+ try:
318
+ # Handle various JSON formats
319
+ args_part = args_part.replace("'", '"')
320
+ tool_args = json.loads(args_part)
321
+ except json.JSONDecodeError:
322
+ # Try to extract action from text
323
+ match = re.search(r'"action"\s*:\s*"([^"]+)"', args_part)
324
+ if match:
325
+ tool_args = {"action": match.group(1)}
326
+ else:
327
+ # Fallback: try to use the whole thing as action
328
+ tool_args = {"action": "look"}
329
+
330
+ return thought, tool_name, tool_args
331
+
332
+ def _validate_tool_call(self, tool_name: str, tool_args: dict, valid_tools: list[str]) -> tuple[str, dict]:
333
+ """Validate and fix common tool call issues."""
334
+ # Fix tool name
335
+ if tool_name not in valid_tools:
336
+ # Try common alternatives
337
+ if tool_name in ["action", "do", "command"]:
338
+ tool_name = "play_action"
339
+ elif tool_name in ["map", "location"]:
340
+ tool_name = "get_map"
341
+ elif tool_name in ["mem", "state", "status"]:
342
+ tool_name = "memory"
343
+ elif tool_name in ["inv", "items"]:
344
+ tool_name = "inventory"
345
+ else:
346
+ tool_name = "play_action"
347
+
348
+ # Fix action in args
349
+ if tool_name == "play_action":
350
+ action = tool_args.get("action", "look")
351
+
352
+ # Fix invalid verbs
353
+ invalid_verb_map = {
354
+ "check": "examine",
355
+ "inspect": "examine",
356
+ "search": "look",
357
+ "grab": "take",
358
+ "pick": "take",
359
+ "use": "examine",
360
+ "investigate": "examine",
361
+ }
362
+
363
+ words = action.lower().split()
364
+ if words and words[0] in invalid_verb_map:
365
+ words[0] = invalid_verb_map[words[0]]
366
+ action = " ".join(words)
367
+
368
+ # Clean up action
369
+ action = action.lower().strip()
370
+ action = action.replace("**", "").replace("*", "").replace("`", "")
371
+ action = " ".join(action.split())
372
+
373
+ tool_args["action"] = action
374
+
375
+ return tool_name, tool_args
376
+
377
+ def _extract_result(self, result) -> str:
378
+ """Extract text from MCP tool result."""
379
+ if hasattr(result, 'content') and result.content:
380
+ return result.content[0].text
381
+ return str(result)
382
+
383
+ def _update_score(self, text: str) -> None:
384
+ """Update score from game text."""
385
+ # Look for score patterns
386
+ patterns = [
387
+ r'\+(\d+) points',
388
+ r'Score:\s*(\d+)',
389
+ r'Total:\s*(\d+)',
390
+ ]
391
+
392
+ for pattern in patterns:
393
+ match = re.search(pattern, text, re.IGNORECASE)
394
+ if match:
395
+ score = int(match.group(1))
396
+ if "+" in pattern:
397
+ self.score += score
398
+ else:
399
+ self.score = max(self.score, score)
400
+
401
+ def _is_game_over(self, text: str) -> bool:
402
+ """Check if the game is over."""
403
+ game_over_phrases = [
404
+ "game over",
405
+ "you have died",
406
+ "you are dead",
407
+ "*** you have died ***",
408
+ ]
409
+ text_lower = text.lower()
410
+ return any(phrase in text_lower for phrase in game_over_phrases)
411
+
412
+ def _print_summary(self, step: int, elapsed_time: float, game_over: bool) -> dict:
413
+ """Print game summary and return results."""
414
+ print("\n" + "=" * 60)
415
+ print("GAME SUMMARY")
416
+ print("=" * 60)
417
+ print(f"Final Score: {self.score}/{self.max_score} ({100*self.score/self.max_score:.1f}%)")
418
+ print(f"Steps Taken: {step}")
419
+ print(f"Time Elapsed: {elapsed_time:.1f} seconds")
420
+ print(f"Game Over: {game_over}")
421
+ print("=" * 60)
422
+
423
+ return {
424
+ "final_score": self.score,
425
+ "max_score": self.max_score,
426
+ "score_percentage": 100 * self.score / self.max_score,
427
+ "steps": step,
428
+ "elapsed_time": elapsed_time,
429
+ "game_over": game_over,
430
+ }
431
+
432
+
433
+ # =============================================================================
434
+ # Main
435
+ # =============================================================================
436
+
437
+ async def main():
438
+ """Run the MCP ReAct agent."""
439
+ import argparse
440
+
441
+ parser = argparse.ArgumentParser(description="Run the MCP ReAct Text Adventure Agent")
442
+ parser.add_argument(
443
+ "--server", "-s",
444
+ default="mcp_server/zork_server.py",
445
+ help="Path to the MCP server script"
446
+ )
447
+ parser.add_argument(
448
+ "--max-steps", "-n",
449
+ type=int,
450
+ default=100,
451
+ help="Maximum steps to run"
452
+ )
453
+ parser.add_argument(
454
+ "--model",
455
+ type=str,
456
+ default=None,
457
+ help="HuggingFace model to use"
458
+ )
459
+ parser.add_argument(
460
+ "--verbose", "-v",
461
+ action="store_true",
462
+ default=True,
463
+ help="Show detailed output"
464
+ )
465
+
466
+ args = parser.parse_args()
467
+
468
+ config = MCPAgentConfig(verbose=args.verbose)
469
+ if args.model:
470
+ config.model = args.model
471
+
472
+ agent = MCPReActAgent(args.server, config)
473
+ return await agent.run(max_steps=args.max_steps)
474
+
475
+
476
+ if __name__ == "__main__":
477
+ asyncio.run(main())
agents/react_agent.py ADDED
@@ -0,0 +1,243 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ReAct Agent for Text Adventure Games
3
+
4
+ Implements a ReAct (Reasoning + Acting) loop using an LLM to play text adventures.
5
+ The agent thinks about its situation, decides on an action, and learns from the result.
6
+ """
7
+
8
+ import os
9
+ from dataclasses import dataclass
10
+ from huggingface_hub import InferenceClient
11
+ from dotenv import load_dotenv
12
+
13
+ from agents.base_agent import BaseAgent, AgentConfig
14
+ from games.zork_env import GameState
15
+
16
+
17
+ @dataclass
18
+ class ReActConfig(AgentConfig):
19
+ """Configuration for the ReAct agent."""
20
+ name: str = "ReActAgent"
21
+ model: str = "meta-llama/Llama-3.2-3B-Instruct"
22
+ temperature: float = 0.7
23
+ max_tokens: int = 300
24
+ max_history: int = 15
25
+
26
+
27
+ SYSTEM_PROMPT = """You are playing a classic text adventure game.
28
+
29
+ GOAL: Explore the world, solve puzzles, collect treasures, and maximize your score.
30
+
31
+ VALID COMMANDS:
32
+ - Movement: north, south, east, west, up, down, enter, exit
33
+ - Looking: look, examine <thing>, read <thing>
34
+ - Objects: take <item>, drop <item>, open <thing>, close <thing>
35
+ - Light: turn on lamp, light match
36
+ - Combat: attack <enemy> with <weapon>
37
+ - Other: inventory, wait, push <thing>, move <thing>
38
+
39
+ INVALID COMMANDS (do NOT use): check, inspect, search, grab, use, help
40
+
41
+ TIPS:
42
+ - Explore systematically - try all directions
43
+ - Examine interesting objects and read documents
44
+ - Pick up useful items (lamp, keys, weapons)
45
+ - Open containers to find hidden items
46
+
47
+ You MUST respond in EXACTLY this format (no markdown, no extra text):
48
+ THOUGHT: <your reasoning in one sentence>
49
+ ACTION: <one valid command>
50
+
51
+ Example response:
52
+ THOUGHT: I see a container here, I should check what is inside.
53
+ ACTION: open container"""
54
+
55
+
56
+ class ReActAgent(BaseAgent):
57
+ """
58
+ A ReAct (Reasoning + Acting) agent that uses an LLM to play text adventures.
59
+
60
+ Uses Hugging Face Hub's Inference API.
61
+ """
62
+
63
+ def __init__(self, config: ReActConfig = None, token: str = None):
64
+ super().__init__(config or ReActConfig())
65
+ self.config: ReActConfig = self.config
66
+
67
+ # Load token from environment if not provided
68
+ load_dotenv()
69
+ token = token or os.getenv("HF_TOKEN")
70
+ if not token:
71
+ raise ValueError("HF_TOKEN not found. Set HF_TOKEN environment variable or pass token parameter.")
72
+
73
+ # Override model from environment if set
74
+ env_model = os.getenv("HF_MODEL")
75
+ if env_model:
76
+ self.config.model = env_model
77
+
78
+ self.client = InferenceClient(token=token)
79
+ self.thoughts: list[str] = [] # Store reasoning history
80
+
81
+ def choose_action(self, observation: str, game_state: GameState) -> str:
82
+ """
83
+ Use the LLM to reason about the situation and choose an action.
84
+ """
85
+ # Build the prompt with context
86
+ prompt = self._build_prompt(observation, game_state)
87
+
88
+ # Call the LLM
89
+ response = self._call_llm(prompt)
90
+
91
+ # Parse the response
92
+ thought, action = self._parse_response(response)
93
+
94
+ # Store the thought for history
95
+ self.thoughts.append(thought)
96
+
97
+ if self.config.verbose:
98
+ print(f"\n[Thought] {thought}")
99
+ print(f"[Action] {action}")
100
+
101
+ return action
102
+
103
+ def _build_prompt(self, observation: str, game_state: GameState) -> str:
104
+ """Build the prompt for the LLM with current context."""
105
+ parts = []
106
+
107
+ # Current status (compact for small models)
108
+ parts.append(f"Score: {game_state.score}/{game_state.max_score} | Moves: {game_state.moves}")
109
+
110
+ if game_state.inventory:
111
+ parts.append(f"Inventory: {', '.join(game_state.inventory)}")
112
+
113
+ # Recent history (only last 3 for small models)
114
+ if self.history:
115
+ parts.append("\nRecent:")
116
+ recent_actions = []
117
+ for action, obs, state in self.history[-3:]:
118
+ obs_short = obs[:150] + "..." if len(obs) > 150 else obs
119
+ parts.append(f"> {action}\n{obs_short}")
120
+ recent_actions.append(action)
121
+
122
+ # Warn about repeated actions
123
+ if len(recent_actions) >= 2 and len(set(recent_actions)) == 1:
124
+ parts.append(f"\n[WARNING: You've done '{recent_actions[0]}' multiple times. Try something different!]")
125
+
126
+ # Current observation
127
+ parts.append(f"\nNow:\n{observation}")
128
+ parts.append("\nWhat do you do next? (Try a NEW action)")
129
+
130
+ return "\n".join(parts)
131
+
132
+ def _call_llm(self, prompt: str) -> str:
133
+ """Call the Hugging Face Inference API."""
134
+ try:
135
+ messages = [
136
+ {"role": "system", "content": SYSTEM_PROMPT},
137
+ {"role": "user", "content": prompt}
138
+ ]
139
+
140
+ response = self.client.chat.completions.create(
141
+ model=self.config.model,
142
+ messages=messages,
143
+ temperature=self.config.temperature,
144
+ max_tokens=self.config.max_tokens,
145
+ )
146
+ return response.choices[0].message.content
147
+ except Exception as e:
148
+ print(f"Error calling LLM: {e}")
149
+ return "THOUGHT: Error occurred, trying a safe action.\nACTION: look"
150
+
151
+ def _parse_response(self, response: str) -> tuple[str, str]:
152
+ """Parse the LLM response to extract thought and action."""
153
+ thought = ""
154
+ action = "look" # Default fallback action
155
+
156
+ lines = response.strip().split("\n")
157
+
158
+ for i, line in enumerate(lines):
159
+ line_upper = line.upper().strip()
160
+
161
+ if line_upper.startswith("THOUGHT:"):
162
+ # Extract thought (may span multiple lines until ACTION)
163
+ thought_parts = [line.split(":", 1)[1].strip()]
164
+ for j in range(i + 1, len(lines)):
165
+ if lines[j].upper().strip().startswith("ACTION:"):
166
+ break
167
+ thought_parts.append(lines[j].strip())
168
+ thought = " ".join(thought_parts).strip()
169
+
170
+ elif line_upper.startswith("ACTION:"):
171
+ action = line.split(":", 1)[1].strip().lower()
172
+ # Clean up the action - remove quotes, markdown, and extra whitespace
173
+ action = action.strip('"\'')
174
+ # Remove markdown bold/italic markers
175
+ action = action.replace("**", "").replace("*", "").replace("__", "").replace("_", " ")
176
+ # Remove backticks
177
+ action = action.replace("`", "")
178
+ # Clean up whitespace
179
+ action = " ".join(action.split())
180
+ break
181
+
182
+ # Validate action isn't empty
183
+ if not action or action.isspace():
184
+ action = "look"
185
+
186
+ return thought, action
187
+
188
+ def reset(self):
189
+ """Reset the agent for a new game."""
190
+ super().reset()
191
+ self.thoughts = []
192
+
193
+ def get_summary(self) -> str:
194
+ """Get a summary of the agent's reasoning."""
195
+ if not self.thoughts:
196
+ return "No thoughts recorded yet."
197
+
198
+ return "\n---\n".join(self.thoughts[-5:])
199
+
200
+
201
+ # Example usage and testing
202
+ if __name__ == "__main__":
203
+ import sys
204
+ from games.zork_env import TextAdventureEnv
205
+
206
+ # Use command line arg or default to zork1
207
+ game = sys.argv[1] if len(sys.argv) > 1 else "zork1"
208
+
209
+ # Quick test
210
+ config = ReActConfig(verbose=True)
211
+
212
+ try:
213
+ agent = ReActAgent(config)
214
+ env = TextAdventureEnv(game)
215
+
216
+ state = env.reset()
217
+ print("=" * 50)
218
+ print(f"{game.upper()} (using {agent.config.model})")
219
+ print("=" * 50)
220
+ print(state.observation)
221
+
222
+ # Run a few steps
223
+ for step in range(5):
224
+ print(f"\n{'=' * 50}")
225
+ print(f"Step {step + 1}")
226
+ print("=" * 50)
227
+
228
+ action = agent.choose_action(state.observation, state)
229
+ print(f"\n> {action}")
230
+
231
+ state = env.step(action)
232
+ print(f"\n{state.observation}")
233
+ print(f"\nScore: {state.score}/{state.max_score}")
234
+
235
+ agent.update_history(action, state.observation, state)
236
+
237
+ if state.done:
238
+ print("\nGAME OVER!")
239
+ break
240
+
241
+ except ValueError as e:
242
+ print(f"Setup error: {e}")
243
+ print("Make sure to set your HF_TOKEN in .env file")
function_calling/controller.py ADDED
@@ -0,0 +1,291 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Function-Calling Controller for Zork (API-Based)
3
+
4
+ This controller uses the HuggingFace API's native function calling feature.
5
+ The model is given tool schemas and can call them via the tools API.
6
+
7
+ Model: Llama 3.2 3B Instruct (supports native function calling)
8
+
9
+ Compare with simple_controller.py which uses text-based "parsing" approach.
10
+ """
11
+
12
+ import os
13
+ import json
14
+ from dotenv import load_dotenv
15
+ from huggingface_hub import InferenceClient
16
+
17
+ from tools import ALL_TOOLS, set_game_state, add_to_history
18
+
19
+ # Add parent directory to path to import games module
20
+ import sys
21
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
22
+ from games.zork_env import ZorkEnvironment
23
+
24
+
25
+ # System prompt for the agent
26
+ SYSTEM_PROMPT = """You are playing Zork, a classic text adventure game.
27
+
28
+ ## YOUR GOAL
29
+ Explore, collect treasures (bring them to the trophy case), and maximize your score.
30
+
31
+ ## VALID COMMANDS (use ONLY these exact verbs)
32
+
33
+ Movement:
34
+ north, south, east, west, up, down (or n, s, e, w, u, d)
35
+ enter, exit, climb, cross, go <direction>
36
+
37
+ Looking:
38
+ look, examine <thing>, look at <thing>, look in <thing>, read <thing>
39
+
40
+ Objects:
41
+ take <item>, drop <item>, pick up <item>
42
+ open <thing>, close <thing>, unlock <thing> with <key>
43
+ put <item> in <container>, give <item> to <person>
44
+
45
+ Light:
46
+ turn on lamp, turn off lamp, light match
47
+
48
+ Combat:
49
+ attack <enemy> with <weapon>, kill <enemy> with <weapon>
50
+
51
+ Other:
52
+ inventory (or i), wait (or z), score, save, restore
53
+ push <thing>, pull <thing>, move <thing>, tie <rope> to <thing>
54
+ eat <food>, drink <liquid>, wave <item>
55
+
56
+ ## FORBIDDEN (these will NOT work):
57
+ check, inspect, search, investigate, grab, pick, use, interact,
58
+ go to, walk to, head to, travel, proceed
59
+
60
+ ## YOUR TOOLS
61
+ memory() - See current state and recent actions
62
+ get_map() - See explored locations
63
+ inventory() - Check what you're carrying
64
+
65
+ ## RESPONSE FORMAT
66
+ When you want to take a game action, respond with:
67
+ ACTION: <command>
68
+
69
+ Examples:
70
+ ACTION: open mailbox
71
+ ACTION: north
72
+ ACTION: take lamp
73
+ ACTION: examine leaflet"""
74
+
75
+
76
+ # Valid Zork command verbs for validation
77
+ VALID_VERBS = {
78
+ "north", "south", "east", "west", "up", "down", "n", "s", "e", "w", "u", "d",
79
+ "look", "l", "examine", "x", "read",
80
+ "take", "get", "drop", "put", "give",
81
+ "open", "close", "unlock", "lock",
82
+ "turn", "light", "extinguish", "blow",
83
+ "attack", "kill", "fight", "hit",
84
+ "enter", "exit", "go", "climb", "jump",
85
+ "inventory", "i", "wait", "z", "score",
86
+ "move", "push", "pull", "tie", "untie",
87
+ "eat", "drink", "smell", "touch", "rub",
88
+ "wave", "raise", "lower", "pour",
89
+ "say", "answer", "yes", "no",
90
+ "pray", "odysseus", "echo", "hello",
91
+ }
92
+
93
+
94
+ def validate_action(action: str) -> str:
95
+ """Validate and potentially fix an action."""
96
+ action = action.strip().lower()
97
+ if not action:
98
+ return "look"
99
+
100
+ verb = action.split()[0]
101
+
102
+ if verb in VALID_VERBS:
103
+ return action
104
+
105
+ # Common corrections
106
+ corrections = {
107
+ "check": "examine",
108
+ "inspect": "examine",
109
+ "search": "examine",
110
+ "grab": "take",
111
+ "pick": "take",
112
+ "see": "look",
113
+ "view": "look",
114
+ "walk": "go",
115
+ }
116
+
117
+ if verb in corrections:
118
+ return corrections[verb] + action[len(verb):]
119
+
120
+ return "look" # Default fallback
121
+
122
+
123
+ def build_tool_schemas():
124
+ """Convert LangChain tools to OpenAI function schemas."""
125
+ schemas = []
126
+ for tool in ALL_TOOLS:
127
+ schema = {
128
+ "type": "function",
129
+ "function": {
130
+ "name": tool.name,
131
+ "description": tool.description,
132
+ "parameters": {
133
+ "type": "object",
134
+ "properties": {},
135
+ "required": []
136
+ }
137
+ }
138
+ }
139
+ schemas.append(schema)
140
+ return schemas
141
+
142
+
143
+ def run_tool(tool_name: str) -> str:
144
+ """Execute a tool by name and return its result."""
145
+ for tool in ALL_TOOLS:
146
+ if tool.name == tool_name:
147
+ return tool.invoke({})
148
+ return f"Unknown tool: {tool_name}"
149
+
150
+
151
+ class FunctionCallingController:
152
+ """Controller using LLM API-based function calling."""
153
+
154
+ def __init__(self, model: str = "meta-llama/Llama-3.2-3B-Instruct"):
155
+ load_dotenv()
156
+ token = os.getenv("HF_TOKEN")
157
+ if not token:
158
+ raise ValueError("HF_TOKEN not set in environment")
159
+
160
+ self.client = InferenceClient(token=token)
161
+ self.model = os.getenv("HF_MODEL", model)
162
+ self.tool_schemas = build_tool_schemas()
163
+
164
+ def get_action(self, observation: str, game_state) -> str:
165
+ """Get the next action from the LLM."""
166
+
167
+ # Update tool state
168
+ set_game_state(
169
+ observation=observation,
170
+ inventory=list(game_state.inventory) if game_state.inventory else [],
171
+ score=game_state.score,
172
+ moves=game_state.moves
173
+ )
174
+
175
+ # Build messages fresh each time (simpler than managing tool history)
176
+ messages = [
177
+ {"role": "system", "content": SYSTEM_PROMPT},
178
+ {"role": "user", "content": f"Game output:\n{observation}\n\nWhat do you do?"}
179
+ ]
180
+
181
+ # Allow up to 3 tool calls before requiring action
182
+ for _ in range(3):
183
+ response = self.client.chat.completions.create(
184
+ model=self.model,
185
+ messages=messages,
186
+ tools=self.tool_schemas,
187
+ tool_choice="auto",
188
+ max_tokens=300,
189
+ )
190
+
191
+ message = response.choices[0].message
192
+
193
+ # Check if model wants to use a tool
194
+ if message.tool_calls:
195
+ tool_call = message.tool_calls[0]
196
+ tool_name = tool_call.function.name
197
+
198
+ print(f" [Tool] {tool_name}")
199
+ tool_result = run_tool(tool_name)
200
+ print(f" {tool_result[:100]}...")
201
+
202
+ # Add tool interaction to messages for next iteration
203
+ messages.append({
204
+ "role": "assistant",
205
+ "content": None,
206
+ "tool_calls": [{
207
+ "id": tool_call.id,
208
+ "type": "function",
209
+ "function": {"name": tool_name, "arguments": "{}"}
210
+ }]
211
+ })
212
+ messages.append({
213
+ "role": "tool",
214
+ "tool_call_id": tool_call.id,
215
+ "content": tool_result
216
+ })
217
+
218
+ # Continue to get the actual action
219
+ continue
220
+
221
+ # Model responded with text - extract action
222
+ content = message.content or ""
223
+
224
+ # Look for ACTION: in response
225
+ if "ACTION:" in content.upper():
226
+ for line in content.split('\n'):
227
+ if "ACTION:" in line.upper():
228
+ action = line.split(":", 1)[1].strip().lower()
229
+ validated = validate_action(action)
230
+ if validated:
231
+ return validated
232
+ else:
233
+ print(f" [Warning] Invalid action '{action}', defaulting to 'look'")
234
+ return "look"
235
+
236
+ # If no ACTION found, try to extract a command from the response
237
+ content_lower = content.lower().strip()
238
+ validated = validate_action(content_lower)
239
+ if validated:
240
+ return validated
241
+
242
+ # Default
243
+ return "look"
244
+
245
+ # After 3 tool calls, just return look
246
+ return "look"
247
+
248
+
249
+ def main():
250
+ """Run the API-based function-calling controller."""
251
+ print("=" * 60)
252
+ print("Zork - API Function Calling Controller")
253
+ print(" (using Llama 3.2 3B with native tool calling)")
254
+ print("=" * 60)
255
+
256
+ controller = FunctionCallingController()
257
+ env = ZorkEnvironment("zork1")
258
+
259
+ state = env.reset()
260
+ print(f"\n{state.observation}\n")
261
+
262
+ max_steps = 30
263
+
264
+ for step in range(max_steps):
265
+ print(f"\n{'─' * 50}")
266
+ print(f"Step {step + 1}/{max_steps} | Score: {state.score}")
267
+ print("─" * 50)
268
+
269
+ action = controller.get_action(state.observation, state)
270
+ print(f"\n> ACTION: {action}")
271
+
272
+ # Take action in game
273
+ state = env.step(action)
274
+ add_to_history(action, state.observation)
275
+
276
+ print(f"\n{state.observation}")
277
+
278
+ if state.reward > 0:
279
+ print(f"\n+{state.reward} points!")
280
+
281
+ if state.done:
282
+ print("\nGAME OVER!")
283
+ break
284
+
285
+ print(f"\n{'=' * 60}")
286
+ print(f"Final Score: {state.score}")
287
+ print("=" * 60)
288
+
289
+
290
+ if __name__ == "__main__":
291
+ main()
function_calling/simple_controller.py ADDED
@@ -0,0 +1,268 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Function-Calling Controller for Zork (Text-Based)
3
+
4
+ This controller uses text-based "function calling" - the LLM outputs
5
+ TOOL: <name> or ACTION: <command> and we parse the text response.
6
+
7
+ Model: Qwen 2.5 7B Instruct (any chat model works)
8
+
9
+ This approach is:
10
+ - Simpler and more reliable than API-based function calling
11
+ - Works with any chat model (no special support needed)
12
+
13
+ Compare with controller.py which uses API-based tool calling.
14
+ """
15
+
16
+ import os
17
+ import re
18
+ from dotenv import load_dotenv
19
+ from huggingface_hub import InferenceClient
20
+
21
+ from tools import ALL_TOOLS, set_game_state, add_to_history
22
+
23
+ # Add parent directory to path
24
+ import sys
25
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
26
+ from games.zork_env import ZorkEnvironment
27
+
28
+
29
+ SYSTEM_PROMPT = """You are playing Zork, a classic text adventure game.
30
+
31
+ ## YOUR GOAL
32
+ Explore, collect treasures (bring them to the trophy case), and maximize your score.
33
+
34
+ ## VALID COMMANDS (use ONLY these exact verbs)
35
+
36
+ Movement:
37
+ north, south, east, west, up, down (or n, s, e, w, u, d)
38
+ enter, exit, climb, cross, go <direction>
39
+
40
+ Looking:
41
+ look, examine <thing>, look at <thing>, look in <thing>, read <thing>
42
+
43
+ Objects:
44
+ take <item>, drop <item>, pick up <item>
45
+ open <thing>, close <thing>, unlock <thing> with <key>
46
+ put <item> in <container>, give <item> to <person>
47
+
48
+ Light:
49
+ turn on lamp, turn off lamp, light match
50
+
51
+ Combat:
52
+ attack <enemy> with <weapon>, kill <enemy> with <weapon>
53
+
54
+ Other:
55
+ inventory (or i), wait (or z), score, save, restore
56
+ push <thing>, pull <thing>, move <thing>, tie <rope> to <thing>
57
+ eat <food>, drink <liquid>, wave <item>
58
+
59
+ ## FORBIDDEN (these will NOT work):
60
+ check, inspect, search, investigate, grab, pick, use, interact,
61
+ go to, walk to, head to, travel, proceed
62
+
63
+ ## YOUR TOOLS
64
+ TOOL: memory - See current state and recent actions
65
+ TOOL: get_map - See explored locations
66
+ TOOL: inventory - Check what you're carrying
67
+
68
+ ## RESPONSE FORMAT
69
+ Either use a tool:
70
+ TOOL: memory
71
+
72
+ Or take a game action:
73
+ ACTION: open mailbox
74
+
75
+ Always respond with TOOL: or ACTION: followed by your choice."""
76
+
77
+
78
+ # Valid Zork command verbs for validation
79
+ VALID_VERBS = {
80
+ "north", "south", "east", "west", "up", "down", "n", "s", "e", "w", "u", "d",
81
+ "look", "l", "examine", "x", "read",
82
+ "take", "get", "drop", "put", "give",
83
+ "open", "close", "unlock", "lock",
84
+ "turn", "light", "extinguish", "blow",
85
+ "attack", "kill", "fight", "hit",
86
+ "enter", "exit", "go", "climb", "jump",
87
+ "inventory", "i", "wait", "z", "score",
88
+ "move", "push", "pull", "tie", "untie",
89
+ "eat", "drink", "smell", "touch", "rub",
90
+ "wave", "raise", "lower", "pour",
91
+ "say", "answer", "yes", "no",
92
+ "pray", "odysseus", "echo", "hello",
93
+ }
94
+
95
+
96
+ def run_tool(tool_name: str) -> str:
97
+ """Execute a tool by name."""
98
+ tool_name = tool_name.strip().lower().replace(" ", "_")
99
+ for tool in ALL_TOOLS:
100
+ if tool.name == tool_name:
101
+ return tool.invoke({})
102
+ return f"Unknown tool: {tool_name}. Available: memory, get_map, inventory"
103
+
104
+
105
+ class SimpleController:
106
+ """Controller using text-based tool calling."""
107
+
108
+ def __init__(self, model: str = "Qwen/Qwen2.5-7B-Instruct"):
109
+ load_dotenv()
110
+ token = os.getenv("HF_TOKEN")
111
+ if not token:
112
+ raise ValueError("HF_TOKEN not set in environment")
113
+
114
+ self.client = InferenceClient(token=token)
115
+ self.model = os.getenv("HF_MODEL", model)
116
+ self.messages = []
117
+
118
+ def _call_llm(self, user_message: str) -> str:
119
+ """Call the LLM and get response."""
120
+ self.messages.append({"role": "user", "content": user_message})
121
+
122
+ # Keep conversation short
123
+ if len(self.messages) > 15:
124
+ self.messages = self.messages[-15:]
125
+
126
+ response = self.client.chat.completions.create(
127
+ model=self.model,
128
+ messages=[{"role": "system", "content": SYSTEM_PROMPT}] + self.messages,
129
+ max_tokens=150,
130
+ temperature=0.7,
131
+ )
132
+
133
+ reply = response.choices[0].message.content or ""
134
+ self.messages.append({"role": "assistant", "content": reply})
135
+ return reply
136
+
137
+ def _validate_action(self, action: str) -> str | None:
138
+ """Validate and potentially fix an action. Returns None if invalid."""
139
+ action = action.strip().lower()
140
+ if not action:
141
+ return None
142
+
143
+ # Get the first word (verb)
144
+ verb = action.split()[0]
145
+
146
+ # Check if it's a valid verb
147
+ if verb in VALID_VERBS:
148
+ return action
149
+
150
+ # Try common corrections
151
+ corrections = {
152
+ "check": "examine",
153
+ "inspect": "examine",
154
+ "search": "examine",
155
+ "grab": "take",
156
+ "pick": "take", # "pick up" -> "take"
157
+ "see": "look",
158
+ "view": "look",
159
+ "walk": "go",
160
+ }
161
+
162
+ if verb in corrections:
163
+ fixed = corrections[verb] + action[len(verb):]
164
+ print(f" [Correcting] '{verb}' -> '{corrections[verb]}'")
165
+ return fixed
166
+
167
+ return None
168
+
169
+ def get_action(self, observation: str, game_state) -> str:
170
+ """Get the next action, allowing tool use."""
171
+
172
+ # Update tool state
173
+ set_game_state(
174
+ observation=observation,
175
+ inventory=list(game_state.inventory) if game_state.inventory else [],
176
+ score=game_state.score,
177
+ moves=game_state.moves
178
+ )
179
+
180
+ prompt = f"Game:\n{observation}\n\nRespond with TOOL: or ACTION:"
181
+
182
+ # Allow up to 3 tool calls before requiring an action
183
+ for _ in range(3):
184
+ response = self._call_llm(prompt)
185
+
186
+ # Check for TOOL:
187
+ tool_match = re.search(r'TOOL:\s*(\w+)', response, re.IGNORECASE)
188
+ if tool_match:
189
+ tool_name = tool_match.group(1)
190
+ print(f" [Tool] {tool_name}")
191
+
192
+ result = run_tool(tool_name)
193
+ print(f" {result[:80]}...")
194
+
195
+ # Feed result back
196
+ prompt = f"Tool result:\n{result}\n\nNow respond with TOOL: or ACTION:"
197
+ continue
198
+
199
+ # Check for ACTION:
200
+ action_match = re.search(r'ACTION:\s*(.+)', response, re.IGNORECASE)
201
+ if action_match:
202
+ action = action_match.group(1).strip().lower()
203
+ # Clean up action (remove quotes, extra text)
204
+ action = action.split('\n')[0].strip('"\'')
205
+
206
+ # Validate the action
207
+ validated = self._validate_action(action)
208
+ if validated:
209
+ return validated
210
+ else:
211
+ print(f" [Warning] Invalid action '{action}', asking for retry...")
212
+ prompt = f"'{action}' is not a valid Zork command. Use verbs like: look, examine, take, open, north, south, etc.\n\nRespond with ACTION:"
213
+ continue
214
+
215
+ # If neither, try to extract a command
216
+ words = response.lower().split()
217
+ for cmd in ["north", "south", "east", "west", "up", "down",
218
+ "look", "take", "open", "enter", "examine"]:
219
+ if cmd in words:
220
+ idx = words.index(cmd)
221
+ return " ".join(words[idx:idx+3])
222
+
223
+ return "look"
224
+
225
+ return "look"
226
+
227
+
228
+ def main():
229
+ """Run the simple controller."""
230
+ print("=" * 60)
231
+ print("Zork - Simple Function Calling Demo")
232
+ print("=" * 60)
233
+
234
+ controller = SimpleController()
235
+ env = ZorkEnvironment("zork1")
236
+
237
+ state = env.reset()
238
+ print(f"\n{state.observation}\n")
239
+
240
+ max_steps = 30
241
+
242
+ for step in range(max_steps):
243
+ print(f"\n{'─' * 50}")
244
+ print(f"Step {step + 1}/{max_steps} | Score: {state.score}")
245
+ print("─" * 50)
246
+
247
+ action = controller.get_action(state.observation, state)
248
+ print(f"\n> ACTION: {action}")
249
+
250
+ state = env.step(action)
251
+ add_to_history(action, state.observation)
252
+
253
+ print(f"\n{state.observation}")
254
+
255
+ if state.reward > 0:
256
+ print(f"\n+{state.reward} points!")
257
+
258
+ if state.done:
259
+ print("\nGAME OVER!")
260
+ break
261
+
262
+ print(f"\n{'=' * 60}")
263
+ print(f"Final Score: {state.score}")
264
+ print("=" * 60)
265
+
266
+
267
+ if __name__ == "__main__":
268
+ main()
function_calling/tools.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Simple tools for the Zork agent using LangChain's tool decorator.
3
+ """
4
+
5
+ from langchain_core.tools import tool
6
+
7
+
8
+ # Game state that tools can access (set by the controller)
9
+ _game_state = {
10
+ "observation": "",
11
+ "inventory": [],
12
+ "score": 0,
13
+ "moves": 0,
14
+ "history": [], # List of (action, result) tuples
15
+ }
16
+
17
+
18
+ def set_game_state(observation: str, inventory: list, score: int, moves: int):
19
+ """Update the game state (called by controller after each action)."""
20
+ _game_state["observation"] = observation
21
+ _game_state["inventory"] = inventory
22
+ _game_state["score"] = score
23
+ _game_state["moves"] = moves
24
+
25
+
26
+ def add_to_history(action: str, result: str):
27
+ """Add an action and its result to history."""
28
+ _game_state["history"].append((action, result))
29
+ # Keep only last 10 actions
30
+ if len(_game_state["history"]) > 10:
31
+ _game_state["history"] = _game_state["history"][-10:]
32
+
33
+
34
+ @tool
35
+ def memory() -> str:
36
+ """Get a summary of the current game state including location, score, and recent actions."""
37
+ obs = _game_state["observation"]
38
+ score = _game_state["score"]
39
+ moves = _game_state["moves"]
40
+
41
+ # Extract location (first line of observation)
42
+ lines = obs.strip().split('\n')
43
+ location = lines[0] if lines else "Unknown"
44
+
45
+ # Recent actions
46
+ recent = _game_state["history"][-5:] if _game_state["history"] else []
47
+ recent_str = "\n".join([f" > {a} → {r[:50]}..." for a, r in recent]) if recent else " (none yet)"
48
+
49
+ return f"""Current State:
50
+ - Location: {location}
51
+ - Score: {score} points
52
+ - Moves: {moves}
53
+
54
+ Recent Actions:
55
+ {recent_str}
56
+
57
+ Current Observation:
58
+ {obs}"""
59
+
60
+
61
+ @tool
62
+ def get_map() -> str:
63
+ """Get a map showing known locations and connections based on exploration history."""
64
+ # Build a simple map from history
65
+ locations = set()
66
+ connections = []
67
+
68
+ prev_loc = None
69
+ for action, result in _game_state["history"]:
70
+ # Extract location from result
71
+ lines = result.strip().split('\n')
72
+ if lines:
73
+ loc = lines[0]
74
+ locations.add(loc)
75
+
76
+ # If this was a movement action, record connection
77
+ if action in ["north", "south", "east", "west", "up", "down", "enter", "exit"]:
78
+ if prev_loc and prev_loc != loc:
79
+ connections.append(f" {prev_loc} --{action}--> {loc}")
80
+ prev_loc = loc
81
+
82
+ if not locations:
83
+ return "Map: No locations explored yet. Try moving around!"
84
+
85
+ loc_list = "\n".join([f" - {loc}" for loc in sorted(locations)])
86
+ conn_list = "\n".join(connections[-10:]) if connections else " (no connections recorded)"
87
+
88
+ return f"""Known Locations:
89
+ {loc_list}
90
+
91
+ Connections:
92
+ {conn_list}"""
93
+
94
+
95
+ @tool
96
+ def inventory() -> str:
97
+ """Get the list of items currently in your inventory."""
98
+ items = _game_state["inventory"]
99
+
100
+ if not items:
101
+ return "Inventory: You are empty-handed."
102
+
103
+ # Clean up item names (Jericho returns objects with metadata)
104
+ item_names = []
105
+ for item in items:
106
+ item_str = str(item)
107
+ # Handle Jericho's object format: "leaflet Parent4 Sibling0..."
108
+ # Look for "Parent" (case-insensitive) to find where metadata starts
109
+ item_lower = item_str.lower()
110
+ if "parent" in item_lower:
111
+ idx = item_lower.index("parent")
112
+ name = item_str[:idx].strip()
113
+ # Remove leading "obj123: " if present
114
+ if ":" in name:
115
+ name = name.split(":", 1)[1].strip()
116
+ item_names.append(name)
117
+ elif ":" in item_str:
118
+ name = item_str.split(":")[1].strip()
119
+ item_names.append(name)
120
+ else:
121
+ item_names.append(item_str)
122
+
123
+ return f"Inventory: {', '.join(item_names)}"
124
+
125
+
126
+ # Export all tools
127
+ ALL_TOOLS = [memory, get_map, inventory]
games/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from .zork_env import TextAdventureEnv, GameState, list_available_games, discover_games
2
+
3
+ # Alias for backwards compatibility
4
+ ZorkEnvironment = TextAdventureEnv
5
+
6
+ __all__ = ["TextAdventureEnv", "ZorkEnvironment", "GameState", "list_available_games", "discover_games"]
games/zork_env.py ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Text Adventure Game Environment
3
+
4
+ Provides a clean interface to text adventure games via Jericho.
5
+ Supports Zork and many other classic Z-machine games.
6
+ """
7
+
8
+ from jericho import FrotzEnv
9
+ from dataclasses import dataclass
10
+ from typing import Optional
11
+ from pathlib import Path
12
+ import os
13
+
14
+
15
+ @dataclass
16
+ class GameState:
17
+ """Represents the current state of the game."""
18
+ observation: str
19
+ score: int
20
+ max_score: int
21
+ moves: int
22
+ done: bool
23
+ reward: int # Points gained from last action
24
+ inventory: list[str]
25
+ location: str
26
+
27
+
28
+ def get_default_games_dir() -> Path:
29
+ """Get the default directory containing game files."""
30
+ project_root = Path(__file__).parent.parent
31
+ return project_root / "z-machine-games-master" / "jericho-game-suite"
32
+
33
+
34
+ def discover_games(games_dir: Optional[Path] = None) -> dict[str, Path]:
35
+ """
36
+ Discover all available Z-machine games in the games directory.
37
+
38
+ Args:
39
+ games_dir: Directory to search for games (default: jericho-game-suite)
40
+
41
+ Returns:
42
+ Dictionary mapping game name (without extension) to full path
43
+ """
44
+ if games_dir is None:
45
+ games_dir = get_default_games_dir()
46
+
47
+ games_dir = Path(games_dir)
48
+ if not games_dir.exists():
49
+ return {}
50
+
51
+ games = {}
52
+ # Find all Z-machine game files (.z3, .z4, .z5, .z8)
53
+ for ext in ["*.z3", "*.z4", "*.z5", "*.z8"]:
54
+ for game_path in games_dir.glob(ext):
55
+ # Use stem (filename without extension) as game name
56
+ game_name = game_path.stem.lower()
57
+ games[game_name] = game_path
58
+
59
+ return dict(sorted(games.items()))
60
+
61
+
62
+ def list_available_games(games_dir: Optional[Path] = None) -> list[str]:
63
+ """Return a sorted list of available game names."""
64
+ return list(discover_games(games_dir).keys())
65
+
66
+
67
+ class TextAdventureEnv:
68
+ """Wrapper around Jericho's FrotzEnv for text adventure games."""
69
+
70
+ def __init__(self, game: str = "zork1", games_dir: Optional[str] = None):
71
+ """
72
+ Initialize the text adventure environment.
73
+
74
+ Args:
75
+ game: Game name (e.g., 'zork1', 'advent', 'enchanter')
76
+ Can also be a full path to a .z* file
77
+ games_dir: Directory containing game files (optional)
78
+ """
79
+ # Check if game is a full path
80
+ if os.path.isfile(game):
81
+ game_path = Path(game)
82
+ self.game = game_path.stem
83
+ else:
84
+ # Look up game by name
85
+ games_path = Path(games_dir) if games_dir else None
86
+ available_games = discover_games(games_path)
87
+
88
+ if game.lower() not in available_games:
89
+ available = list(available_games.keys())[:20]
90
+ raise ValueError(
91
+ f"Unknown game: {game}. "
92
+ f"Available: {', '.join(available)}... "
93
+ f"({len(available_games)} total)"
94
+ )
95
+
96
+ game_path = available_games[game.lower()]
97
+ self.game = game.lower()
98
+
99
+ self.env = FrotzEnv(str(game_path))
100
+ self.game_path = game_path
101
+ self._last_score = 0
102
+ self._history: list[tuple[str, str]] = [] # (action, observation) pairs
103
+
104
+ def reset(self) -> GameState:
105
+ """Reset the game to the beginning."""
106
+ observation, info = self.env.reset()
107
+ self._last_score = 0
108
+ self._history = []
109
+ return self._make_game_state(observation, info, done=False, reward=0)
110
+
111
+ def step(self, action: str) -> GameState:
112
+ """
113
+ Take an action in the game.
114
+
115
+ Args:
116
+ action: The text command to execute (e.g., "go north", "take lamp")
117
+
118
+ Returns:
119
+ GameState with the result of the action
120
+ """
121
+ observation, reward, done, info = self.env.step(action)
122
+
123
+ # Track reward as score change
124
+ current_score = info.get('score', 0)
125
+ reward = current_score - self._last_score
126
+ self._last_score = current_score
127
+
128
+ # Record history
129
+ self._history.append((action, observation))
130
+
131
+ return self._make_game_state(observation, info, done, reward)
132
+
133
+ def _make_game_state(self, observation: str, info: dict, done: bool, reward: int) -> GameState:
134
+ """Create a GameState from the environment info."""
135
+ # Try to get inventory and location (may fail without spacy)
136
+ try:
137
+ inventory = [str(obj) for obj in self.env.get_inventory()]
138
+ except Exception:
139
+ inventory = []
140
+
141
+ try:
142
+ location = str(self.env.get_player_location())
143
+ except Exception:
144
+ location = "Unknown"
145
+
146
+ return GameState(
147
+ observation=observation,
148
+ score=info.get('score', 0),
149
+ max_score=self.env.get_max_score(),
150
+ moves=info.get('moves', 0),
151
+ done=done,
152
+ reward=reward,
153
+ inventory=inventory,
154
+ location=location,
155
+ )
156
+
157
+ def get_history(self) -> list[tuple[str, str]]:
158
+ """Get the history of (action, observation) pairs."""
159
+ return self._history.copy()
160
+
161
+ def get_valid_actions(self) -> list[str]:
162
+ """
163
+ Get a list of valid actions for the current state.
164
+ Note: This requires spacy to be properly installed.
165
+ """
166
+ try:
167
+ return self.env.get_valid_actions()
168
+ except Exception:
169
+ # Return common actions if spacy isn't available
170
+ return [
171
+ "north", "south", "east", "west",
172
+ "up", "down", "look", "inventory",
173
+ "take all", "open mailbox", "read"
174
+ ]
175
+
176
+ def save_state(self):
177
+ """Save the current game state."""
178
+ return self.env.get_state()
179
+
180
+ def load_state(self, state):
181
+ """Load a previously saved game state."""
182
+ self.env.set_state(state)
183
+
184
+ def get_walkthrough(self) -> list[str]:
185
+ """Get the walkthrough for the game (for debugging/comparison only)."""
186
+ return self.env.get_walkthrough()
187
+
188
+
189
+ # Alias for backwards compatibility
190
+ ZorkEnvironment = TextAdventureEnv
191
+
192
+
193
+ # Example usage
194
+ if __name__ == "__main__":
195
+ import sys
196
+
197
+ # List available games
198
+ games = list_available_games()
199
+ print(f"Available games ({len(games)} total):")
200
+ print(f" {', '.join(games[:15])}...")
201
+ print()
202
+
203
+ # Use command line arg or default to zork1
204
+ game = sys.argv[1] if len(sys.argv) > 1 else "zork1"
205
+
206
+ env = TextAdventureEnv(game)
207
+ state = env.reset()
208
+
209
+ print(f"=== {env.game.upper()} ===")
210
+ print(f"Max Score: {state.max_score}")
211
+ print(f"\n{state.observation}")
212
+ print(f"\nValid actions: {env.get_valid_actions()[:10]}...")
213
+
214
+ # Try a few actions
215
+ for action in ["look", "inventory"]:
216
+ print(f"\n> {action}")
217
+ state = env.step(action)
218
+ print(state.observation)
219
+ print(f"Score: {state.score}, Reward: {state.reward}")
mcp_server/README.md ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Zork MCP Server
2
+
3
+ This directory contains an MCP (Model Context Protocol) server that exposes Zork game tools to LLM agents.
4
+
5
+ ## Overview
6
+
7
+ The MCP server wraps the Jericho Zork environment and provides tools that any MCP-compatible agent (like Mini SWE Agent) can use to play the game.
8
+
9
+ ## Tools Available
10
+
11
+ | Tool | Description |
12
+ |------|-------------|
13
+ | `play_action(action)` | Execute a game command (e.g., "north", "take lamp") |
14
+ | `memory()` | Get current state summary (location, score, recent actions) |
15
+ | `get_map()` | View explored locations and connections |
16
+ | `inventory()` | Check items you're carrying |
17
+ | `valid_actions()` | Get hints on available commands |
18
+ | `reset_game(game)` | Start over with zork1, zork2, or zork3 |
19
+ | `hint()` | Get contextual hints for your situation |
20
+
21
+ ## Resources
22
+
23
+ The server also exposes MCP resources:
24
+ - `zork://state` - Current game state
25
+ - `zork://history` - Complete action history
26
+ - `zork://map` - Explored locations map
27
+
28
+ ## Running the Server
29
+
30
+ ### Standalone (for testing)
31
+ ```bash
32
+ python mcp_server/zork_server.py
33
+ ```
34
+
35
+ ### With MCP Inspector (for debugging)
36
+ ```bash
37
+ npx @modelcontextprotocol/inspector python mcp_server/zork_server.py
38
+ ```
39
+
40
+ ### With Mini SWE Agent
41
+ ```bash
42
+ python play_zork.py
43
+ ```
44
+
45
+ ## Configuration
46
+
47
+ The `mcp_config.json` file configures the server for use with MCP clients:
48
+
49
+ ```json
50
+ {
51
+ "mcpServers": {
52
+ "zork": {
53
+ "command": "python",
54
+ "args": ["mcp_server/zork_server.py"]
55
+ }
56
+ }
57
+ }
58
+ ```
59
+
60
+ ## Architecture
61
+
62
+ ```
63
+ ┌─────────────────────────────────────────┐
64
+ │ MCP Client (Agent) │
65
+ │ (Mini SWE Agent / Claude / etc.) │
66
+ └──────────────────┬──────────────────────┘
67
+ │ MCP Protocol (stdio)
68
+
69
+ ┌─────────────────────────────────────────┐
70
+ │ Zork MCP Server │
71
+ │ (FastMCP - zork_server.py) │
72
+ │ │
73
+ │ Tools: play_action, memory, map, │
74
+ │ inventory, valid_actions, │
75
+ │ reset_game, hint │
76
+ └──────────────────┬──────────────────────┘
77
+
78
+
79
+ ┌─────────────────────────────────────────┐
80
+ │ Jericho + Frotz │
81
+ │ (Z-machine game interpreter) │
82
+ └─────────────────────────────────────────┘
83
+ ```
mcp_server/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ # Text Adventure MCP Server
mcp_server/mcp_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "mcpServers": {
3
+ "zork": {
4
+ "command": "python",
5
+ "args": ["mcp_server/zork_server.py"],
6
+ "cwd": "${workspaceFolder}"
7
+ }
8
+ }
9
+ }
mcp_server/zork_server.py ADDED
@@ -0,0 +1,420 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Text Adventure MCP Server - Exposes text adventure games via Model Context Protocol.
3
+
4
+ This server allows any MCP-compatible agent to play Zork and other text adventure
5
+ games using tools for game actions, memory, mapping, and inventory.
6
+
7
+ Uses FastMCP for simple, Pythonic MCP server implementation.
8
+
9
+ Usage:
10
+ # Run directly (stdio transport) - default game is zork1
11
+ python mcp_server/zork_server.py
12
+
13
+ # Run with a different game
14
+ GAME=zork2 python mcp_server/zork_server.py
15
+ GAME=advent python mcp_server/zork_server.py
16
+ GAME=enchanter python mcp_server/zork_server.py
17
+
18
+ # Use with FastMCP dev tools
19
+ fastmcp dev mcp_server/zork_server.py
20
+
21
+ # Connect from an MCP client
22
+ from fastmcp import Client
23
+ async with Client("mcp_server/zork_server.py") as client:
24
+ result = await client.call_tool("play_action", {"action": "look"})
25
+ """
26
+
27
+ import sys
28
+ import os
29
+
30
+ # Add parent directory to path to import games module
31
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
32
+
33
+ from fastmcp import FastMCP
34
+ from games.zork_env import TextAdventureEnv, list_available_games
35
+
36
+
37
+ # Get game from environment variable (default: zork1)
38
+ INITIAL_GAME = os.environ.get("GAME", "zork1")
39
+
40
+ # Create the MCP server
41
+ mcp = FastMCP("Text Adventure Server")
42
+
43
+
44
+ class GameState:
45
+ """Manages the text adventure game state and exploration data."""
46
+
47
+ def __init__(self, game: str = "zork1"):
48
+ self.game_name = game
49
+ self.env = TextAdventureEnv(game)
50
+ self.state = self.env.reset()
51
+ self.history: list[tuple[str, str]] = []
52
+ self.explored_locations: dict[str, set[str]] = {} # location -> set of exits
53
+ self.current_location: str = self._extract_location(self.state.observation)
54
+
55
+ def _extract_location(self, observation: str) -> str:
56
+ """Extract location name from observation (usually first line)."""
57
+ lines = observation.strip().split('\n')
58
+ return lines[0] if lines else "Unknown"
59
+
60
+ def take_action(self, action: str) -> str:
61
+ """Execute a game action and return the result."""
62
+ self.state = self.env.step(action)
63
+ result = self.state.observation
64
+
65
+ # Track history
66
+ self.history.append((action, result))
67
+ if len(self.history) > 50:
68
+ self.history = self.history[-50:]
69
+
70
+ # Update map
71
+ new_location = self._extract_location(result)
72
+ if action in ["north", "south", "east", "west", "up", "down",
73
+ "enter", "exit", "n", "s", "e", "w", "u", "d"]:
74
+ if self.current_location not in self.explored_locations:
75
+ self.explored_locations[self.current_location] = set()
76
+ if new_location != self.current_location:
77
+ self.explored_locations[self.current_location].add(f"{action} -> {new_location}")
78
+ self.current_location = new_location
79
+
80
+ return result
81
+
82
+ def get_memory(self) -> str:
83
+ """Get a summary of current game state."""
84
+ recent = self.history[-5:] if self.history else []
85
+ recent_str = "\n".join([f" > {a} → {r[:60]}..." for a, r in recent]) if recent else " (none yet)"
86
+
87
+ return f"""Current State:
88
+ - Location: {self.current_location}
89
+ - Score: {self.state.score} points
90
+ - Moves: {self.state.moves}
91
+ - Game: {self.game_name}
92
+
93
+ Recent Actions:
94
+ {recent_str}
95
+
96
+ Current Observation:
97
+ {self.state.observation}"""
98
+
99
+ def get_map(self) -> str:
100
+ """Get a map of explored locations."""
101
+ if not self.explored_locations:
102
+ return "Map: No locations explored yet. Try moving around!"
103
+
104
+ lines = ["Explored Locations and Exits:"]
105
+ for loc, exits in sorted(self.explored_locations.items()):
106
+ lines.append(f"\n* {loc}")
107
+ for exit_info in sorted(exits):
108
+ lines.append(f" -> {exit_info}")
109
+
110
+ lines.append(f"\n[Current] {self.current_location}")
111
+ return "\n".join(lines)
112
+
113
+ def get_inventory(self) -> str:
114
+ """Get current inventory."""
115
+ items = self.state.inventory if hasattr(self.state, 'inventory') and self.state.inventory else []
116
+
117
+ if not items:
118
+ return "Inventory: You are empty-handed."
119
+
120
+ item_names = []
121
+ for item in items:
122
+ item_str = str(item)
123
+ # Handle Jericho's object format: "leaflet Parent4 Sibling0..."
124
+ # Look for "Parent" (case-insensitive) to find where metadata starts
125
+ item_lower = item_str.lower()
126
+ if "parent" in item_lower:
127
+ idx = item_lower.index("parent")
128
+ name = item_str[:idx].strip()
129
+ # Remove leading "obj123: " if present
130
+ if ":" in name:
131
+ name = name.split(":", 1)[1].strip()
132
+ item_names.append(name)
133
+ elif ":" in item_str:
134
+ name = item_str.split(":")[1].strip()
135
+ item_names.append(name)
136
+ else:
137
+ item_names.append(item_str)
138
+
139
+ return f"Inventory: {', '.join(item_names)}"
140
+
141
+ def get_valid_actions(self) -> str:
142
+ """Get list of valid actions in current state."""
143
+ try:
144
+ valid = self.env.get_valid_actions() if hasattr(self.env, 'get_valid_actions') else []
145
+ if valid:
146
+ return f"Valid actions: {', '.join(valid[:20])}"
147
+ except Exception:
148
+ pass
149
+ return "Valid actions: Try standard commands like look, north, south, east, west, take <item>, open <thing>"
150
+
151
+
152
+ # Global game state (initialized on first use)
153
+ _game_state: GameState | None = None
154
+
155
+
156
+ def get_game() -> GameState:
157
+ """Get or initialize the game state."""
158
+ global _game_state
159
+ if _game_state is None:
160
+ _game_state = GameState(INITIAL_GAME)
161
+ return _game_state
162
+
163
+
164
+ # ============================================================================
165
+ # MCP Tools
166
+ # ============================================================================
167
+
168
+ @mcp.tool()
169
+ def play_action(action: str) -> str:
170
+ """
171
+ Execute a game action in the text adventure.
172
+
173
+ Common commands:
174
+ - Movement: north, south, east, west, up, down, enter, exit (or n, s, e, w, u, d)
175
+ - Objects: take <item>, drop <item>, open <thing>, close <thing>, put <item> in <container>
176
+ - Look: look, examine <thing>, read <thing>
177
+ - Combat: attack <enemy> with <weapon>
178
+ - Light: turn on lamp, light match
179
+ - Other: wait, score, inventory
180
+
181
+ Args:
182
+ action: The command to execute (e.g., 'north', 'take lamp', 'open mailbox')
183
+
184
+ Returns:
185
+ The game's response to your action
186
+ """
187
+ game = get_game()
188
+ result = game.take_action(action)
189
+
190
+ # Add score info if points were earned
191
+ score_info = ""
192
+ if game.state.reward > 0:
193
+ score_info = f"\n\n+{game.state.reward} points! (Total: {game.state.score})"
194
+
195
+ done_info = ""
196
+ if game.state.done:
197
+ done_info = "\n\nGAME OVER"
198
+
199
+ return result + score_info + done_info
200
+
201
+
202
+ @mcp.tool()
203
+ def memory() -> str:
204
+ """
205
+ Get a summary of the current game state.
206
+
207
+ Returns your location, score, moves, recent actions, and current observation.
208
+ Use this to understand where you are and what happened recently.
209
+ Very useful for avoiding loops and tracking progress.
210
+ """
211
+ return get_game().get_memory()
212
+
213
+
214
+ @mcp.tool()
215
+ def get_map() -> str:
216
+ """
217
+ Get a map showing all locations you have explored and the connections between them.
218
+
219
+ Useful for navigation and planning routes back to previous locations.
220
+ The map builds up as you explore more of the game world.
221
+ """
222
+ return get_game().get_map()
223
+
224
+
225
+ @mcp.tool()
226
+ def inventory() -> str:
227
+ """
228
+ Check what items you are currently carrying.
229
+
230
+ Essential before trying to use, drop, or interact with items.
231
+ Most games have an inventory limit, so manage your items wisely.
232
+ """
233
+ return get_game().get_inventory()
234
+
235
+
236
+ @mcp.tool()
237
+ def valid_actions() -> str:
238
+ """
239
+ Get a list of valid actions available in the current game state.
240
+
241
+ Helpful when stuck or unsure what commands the game accepts.
242
+ Note: This may not include all possible actions, just common ones.
243
+ """
244
+ return get_game().get_valid_actions()
245
+
246
+
247
+ @mcp.tool()
248
+ def reset_game(game: str = "zork1") -> str:
249
+ """
250
+ Reset the game to the beginning or switch to a different game.
251
+
252
+ Use this to start over if you get stuck, die, or want to try a different game.
253
+
254
+ Args:
255
+ game: Game name (e.g., 'zork1', 'zork2', 'advent', 'enchanter')
256
+ Use list_games() to see available options.
257
+
258
+ Returns:
259
+ The initial game text
260
+ """
261
+ global _game_state
262
+ try:
263
+ _game_state = GameState(game)
264
+ return f"Game reset to {game}.\n\n{_game_state.state.observation}"
265
+ except ValueError as e:
266
+ return f"Error: {e}"
267
+
268
+
269
+ @mcp.tool()
270
+ def list_games() -> str:
271
+ """
272
+ List all available text adventure games.
273
+
274
+ Returns:
275
+ List of game names that can be passed to reset_game()
276
+ """
277
+ games = list_available_games()
278
+ return f"Available games ({len(games)} total):\n" + ", ".join(games)
279
+
280
+
281
+ @mcp.tool()
282
+ def hint() -> str:
283
+ """
284
+ Get a hint about what to do next based on your current situation.
285
+
286
+ Provides general guidance without spoiling puzzle solutions.
287
+ """
288
+ game = get_game()
289
+ location = game.current_location.lower()
290
+ inv = game.get_inventory().lower()
291
+ observation = game.state.observation.lower()
292
+
293
+ hints = []
294
+
295
+ # Darkness detection (common in many games)
296
+ if "dark" in location or "dark" in observation or "pitch black" in observation:
297
+ hints.append("It's dangerous in the dark! You need a light source.")
298
+ hints.append("If you have a lamp, try 'turn on lamp'.")
299
+
300
+ # Common items to look for
301
+ if "lamp" in observation and "lamp" not in inv:
302
+ hints.append("There's a lamp here - light sources are essential!")
303
+ if "lantern" in observation and "lantern" not in inv:
304
+ hints.append("There's a lantern here - you'll need light for dark areas!")
305
+ if "sword" in observation and "sword" not in inv:
306
+ hints.append("A sword might be useful for combat encounters.")
307
+ if "key" in observation and "key" not in inv:
308
+ hints.append("A key might unlock something important.")
309
+
310
+ # Container hints
311
+ if any(word in observation for word in ["mailbox", "chest", "box", "container", "cabinet"]):
312
+ hints.append("Try opening containers to find hidden items.")
313
+
314
+ # Door/window hints
315
+ if "door" in observation or "window" in observation:
316
+ hints.append("There might be a way in or out here. Try 'open' commands.")
317
+
318
+ # General hints if nothing specific found
319
+ if not hints:
320
+ hints.append("Explore all directions: north, south, east, west, up, down.")
321
+ hints.append("Examine interesting objects with 'examine <thing>'.")
322
+ hints.append("Pick up useful items with 'take <item>'.")
323
+ hints.append("Open containers and read documents for clues.")
324
+
325
+ return "Hints:\\n" + "\\n".join(f" - {h}" for h in hints)
326
+
327
+
328
+ # ============================================================================
329
+ # MCP Resources
330
+ # ============================================================================
331
+
332
+ @mcp.resource("game://state")
333
+ def get_state_resource() -> str:
334
+ """Current game state as a resource."""
335
+ return get_game().get_memory()
336
+
337
+
338
+ @mcp.resource("game://history")
339
+ def get_history_resource() -> str:
340
+ """Complete action history as a resource."""
341
+ game = get_game()
342
+ if not game.history:
343
+ return "No actions taken yet."
344
+ lines = [f"{i+1}. {action} -> {result[:80]}..." for i, (action, result) in enumerate(game.history)]
345
+ return "\n".join(lines)
346
+
347
+
348
+ @mcp.resource("game://map")
349
+ def get_map_resource() -> str:
350
+ """Explored map as a resource."""
351
+ return get_game().get_map()
352
+
353
+
354
+ # ============================================================================
355
+ # Game Prompt (for agents)
356
+ # ============================================================================
357
+
358
+ GAME_PROMPT = """You are playing a classic text adventure game.
359
+
360
+ ## YOUR GOAL
361
+ Explore the world, solve puzzles, collect treasures, and maximize your score.
362
+
363
+ ## VALID COMMANDS (use ONLY these exact verbs)
364
+
365
+ Movement:
366
+ north, south, east, west, up, down (or n, s, e, w, u, d)
367
+ enter, exit, climb, cross, go <direction>
368
+
369
+ Looking:
370
+ look, examine <thing>, look at <thing>, look in <thing>, read <thing>
371
+
372
+ Objects:
373
+ take <item>, drop <item>, pick up <item>
374
+ open <thing>, close <thing>, unlock <thing> with <key>
375
+ put <item> in <container>, give <item> to <person>
376
+
377
+ Light:
378
+ turn on lamp, turn off lamp, light match
379
+
380
+ Combat:
381
+ attack <enemy> with <weapon>, kill <enemy> with <weapon>
382
+
383
+ Other:
384
+ inventory (or i), wait (or z), score
385
+ push <thing>, pull <thing>, move <thing>
386
+ tie <rope> to <thing>, eat <food>, wave <item>
387
+
388
+ ## FORBIDDEN VERBS (these will NOT work):
389
+ check, inspect, search, investigate, grab, pick, use, interact,
390
+ go to, walk to, head to, travel, proceed
391
+
392
+ ## STRATEGY TIPS
393
+ 1. Explore systematically - check all directions
394
+ 2. Read everything - open containers, read documents, examine objects
395
+ 3. Use get_map() to track explored locations
396
+ 4. Light is essential - find a light source before dark areas!
397
+ 5. Manage inventory - you can only carry limited items
398
+
399
+ ## GETTING STARTED
400
+ 1. Call memory() to see your current state
401
+ 2. Explore your starting area thoroughly
402
+ 3. Pick up useful items (light sources, weapons, keys)
403
+
404
+ Good luck!
405
+ """
406
+
407
+
408
+ def get_game_prompt(game: str = "zork1") -> str:
409
+ """Get the system prompt for playing text adventures."""
410
+ prompt = GAME_PROMPT
411
+ prompt += f"\n\nNote: Currently playing {game}. Use list_games() to see all 57 available games."
412
+ return prompt
413
+
414
+
415
+ # ============================================================================
416
+ # Main
417
+ # ============================================================================
418
+
419
+ if __name__ == "__main__":
420
+ mcp.run()
requirements.txt CHANGED
@@ -1 +1,14 @@
1
- gradio>=4.0.0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core dependencies
2
+ jericho
3
+ python-dotenv
4
+
5
+ # LLM providers
6
+ huggingface_hub
7
+ openai
8
+ anthropic
9
+
10
+ # MCP Server
11
+ fastmcp
12
+
13
+ # Function calling (optional, for the alternative approach)
14
+ langchain-core
run_agent.py ADDED
@@ -0,0 +1,352 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Unified Text Adventure Agent Runner
4
+
5
+ Run different types of LLM agents to play text adventure games:
6
+ - react: Basic ReAct agent with HuggingFace models
7
+ - function: Function-calling controller (API-based or text-based)
8
+ - mcp: MCP ReAct agent using FastMCP Client
9
+
10
+ Usage:
11
+ python run_agent.py --mode react
12
+ python run_agent.py --mode function
13
+ python run_agent.py --mode mcp
14
+
15
+ Examples:
16
+ # Run the basic ReAct agent
17
+ python run_agent.py --mode react
18
+
19
+ # Run the function-calling controller (API-based)
20
+ python run_agent.py --mode function
21
+
22
+ # Run the function-calling controller (text-based, works with any model)
23
+ python run_agent.py --mode function --simple
24
+
25
+ # Run with MCP ReAct agent (uses FastMCP Client)
26
+ python run_agent.py --mode mcp
27
+
28
+ # Play a different game
29
+ python run_agent.py --mode mcp --game advent
30
+ """
31
+
32
+ import argparse
33
+ import sys
34
+ import os
35
+ import time
36
+ from pathlib import Path
37
+
38
+ # Add games module to path for discovering available games
39
+ sys.path.insert(0, str(Path(__file__).parent))
40
+ from games.zork_env import list_available_games, TextAdventureEnv
41
+
42
+
43
+ # =============================================================================
44
+ # Mode: ReAct Agent
45
+ # =============================================================================
46
+
47
+ def run_react_agent(args):
48
+ """Run the basic ReAct agent."""
49
+ from agents.react_agent import ReActAgent, ReActConfig
50
+
51
+ print("\n[ReAct] Running ReAct Agent")
52
+ print(f" Game: {args.game}")
53
+ print(f" Model: {args.model}")
54
+ print()
55
+
56
+ env = TextAdventureEnv(args.game)
57
+ config = ReActConfig(verbose=args.verbose, model=args.model)
58
+ agent = ReActAgent(config)
59
+
60
+ return run_game_loop(env, agent, args.max_steps, args.verbose)
61
+
62
+
63
+ def run_game_loop(env, agent, max_steps: int, verbose: bool) -> dict:
64
+ """Common game loop for ReAct-style agents."""
65
+ state = env.reset()
66
+ agent.reset()
67
+
68
+ print("=" * 60)
69
+ print(f"{env.game.upper()} - Starting Game")
70
+ print(f"Max Score: {state.max_score}")
71
+ print("=" * 60)
72
+ print(f"\n{state.observation}\n")
73
+
74
+ start_time = time.time()
75
+ step = 0
76
+
77
+ try:
78
+ for step in range(1, max_steps + 1):
79
+ print(f"\n{'─' * 40}")
80
+ print(f"Step {step}")
81
+ print("─" * 40)
82
+
83
+ action = agent.choose_action(state.observation, state)
84
+ print(f"\n> {action}")
85
+
86
+ state = env.step(action)
87
+ print(f"\n{state.observation}")
88
+
89
+ if state.reward > 0:
90
+ print(f"\n+{state.reward} points! (Total: {state.score}/{state.max_score})")
91
+ elif state.reward < 0:
92
+ print(f"\n{state.reward} points! (Total: {state.score}/{state.max_score})")
93
+ else:
94
+ print(f"\nScore: {state.score}/{state.max_score}")
95
+
96
+ agent.update_history(action, state.observation, state)
97
+
98
+ if state.done:
99
+ print("\n" + "=" * 60)
100
+ print("GAME OVER!")
101
+ break
102
+
103
+ except KeyboardInterrupt:
104
+ print("\n\nGame interrupted by user")
105
+
106
+ elapsed_time = time.time() - start_time
107
+ return print_summary(env.game, state, step, elapsed_time)
108
+
109
+
110
+ # =============================================================================
111
+ # Mode: MCP ReAct Agent
112
+ # =============================================================================
113
+
114
+ def run_mcp_agent(args):
115
+ """Run MCP ReAct Agent using FastMCP Client."""
116
+ import asyncio
117
+ from agents.mcp_react_agent import MCPReActAgent, MCPAgentConfig
118
+
119
+ print("\n[MCP] Running MCP ReAct Agent with FastMCP")
120
+ print(f" Game: {args.game}")
121
+ print(f" Model: {args.model}")
122
+ print(f" Server: mcp_server/zork_server.py")
123
+ print()
124
+
125
+ config = MCPAgentConfig(verbose=args.verbose, model=args.model, game=args.game)
126
+ agent = MCPReActAgent("mcp_server/zork_server.py", config)
127
+
128
+ return asyncio.run(agent.run(max_steps=args.max_steps))
129
+
130
+
131
+ # =============================================================================
132
+ # Mode: Function Calling
133
+ # =============================================================================
134
+
135
+ def run_function_calling(args):
136
+ """Run the function-calling controller."""
137
+ # Import the appropriate controller
138
+ sys.path.insert(0, str(Path(__file__).parent / "function_calling"))
139
+ from tools import add_to_history
140
+
141
+ if args.simple:
142
+ from simple_controller import SimpleController
143
+ print("\n[Function] Running Function Calling Controller (text-based)")
144
+ controller = SimpleController(model=args.model)
145
+ else:
146
+ from controller import FunctionCallingController
147
+ print("\n[Function] Running Function Calling Controller (API-based)")
148
+ controller = FunctionCallingController(model=args.model)
149
+
150
+ print(f" Game: {args.game}")
151
+ print(f" Model: {args.model}")
152
+ print()
153
+
154
+ env = TextAdventureEnv(args.game)
155
+ state = env.reset()
156
+
157
+ print("=" * 60)
158
+ print(f"{args.game.upper()} - Function Calling Mode")
159
+ print("=" * 60)
160
+ print(f"\n{state.observation}\n")
161
+
162
+ start_time = time.time()
163
+ step = 0
164
+
165
+ try:
166
+ for step in range(1, args.max_steps + 1):
167
+ print(f"\n{'─' * 50}")
168
+ print(f"Step {step}/{args.max_steps} | Score: {state.score}")
169
+ print("─" * 50)
170
+
171
+ action = controller.get_action(state.observation, state)
172
+ print(f"\n> ACTION: {action}")
173
+
174
+ state = env.step(action)
175
+ add_to_history(action, state.observation)
176
+
177
+ print(f"\n{state.observation}")
178
+
179
+ if state.reward > 0:
180
+ print(f"\n+{state.reward} points!")
181
+
182
+ if state.done:
183
+ print("\nGAME OVER!")
184
+ break
185
+
186
+ except KeyboardInterrupt:
187
+ print("\n\nGame interrupted by user")
188
+
189
+ elapsed_time = time.time() - start_time
190
+ return print_summary(args.game, state, step, elapsed_time)
191
+
192
+
193
+ # =============================================================================
194
+ # Common Utilities
195
+ # =============================================================================
196
+
197
+ def print_summary(game: str, state, step: int, elapsed_time: float) -> dict:
198
+ """Print game summary and return results dict."""
199
+ print("\n" + "=" * 60)
200
+ print("GAME SUMMARY")
201
+ print("=" * 60)
202
+ print(f"Game: {game}")
203
+ print(f"Final Score: {state.score}/{state.max_score} ({100*state.score/state.max_score:.1f}%)")
204
+ print(f"Total Moves: {state.moves}")
205
+ print(f"Steps Taken: {step}")
206
+ print(f"Time Elapsed: {elapsed_time:.1f} seconds")
207
+ print("=" * 60)
208
+
209
+ return {
210
+ "game": game,
211
+ "final_score": state.score,
212
+ "max_score": state.max_score,
213
+ "score_percentage": 100 * state.score / state.max_score,
214
+ "moves": state.moves,
215
+ "steps": step,
216
+ "elapsed_time": elapsed_time,
217
+ "game_over": state.done,
218
+ }
219
+
220
+
221
+ def main():
222
+ parser = argparse.ArgumentParser(
223
+ description="Run an LLM agent to play text adventure games",
224
+ formatter_class=argparse.RawDescriptionHelpFormatter,
225
+ epilog="""
226
+ Modes:
227
+ react Basic ReAct agent (direct game interaction)
228
+ function Function-calling controller (use --simple for text-based)
229
+ mcp MCP ReAct agent using FastMCP Client (recommended)
230
+
231
+ Examples:
232
+ python run_agent.py --mode react
233
+ python run_agent.py --mode function
234
+ python run_agent.py --mode function --simple # text-based, any model
235
+ python run_agent.py --mode mcp # MCP with FastMCP
236
+ python run_agent.py --mode mcp --game advent # Play different game
237
+ python run_agent.py --mode mcp --model google/gemma-2-2b-it
238
+ """
239
+ )
240
+
241
+ # Get available games for help text
242
+ available_games = list_available_games()
243
+ game_help = f"Game to play (default: zork1). {len(available_games)} games available."
244
+
245
+ parser.add_argument(
246
+ "--mode", "-m",
247
+ type=str,
248
+ default="react",
249
+ choices=["react", "function", "mcp"],
250
+ help="Which agent mode to use (default: react)"
251
+ )
252
+ parser.add_argument(
253
+ "--game", "-g",
254
+ type=str,
255
+ default="zork1",
256
+ help=game_help
257
+ )
258
+ parser.add_argument(
259
+ "--list-games",
260
+ action="store_true",
261
+ help="List all available games and exit"
262
+ )
263
+ parser.add_argument(
264
+ "--max-steps", "-n",
265
+ type=int,
266
+ default=100,
267
+ help="Maximum number of steps to run (default: 100)"
268
+ )
269
+ parser.add_argument(
270
+ "--model",
271
+ type=str,
272
+ default=None,
273
+ help="Model to use (default: meta-llama/Llama-3.2-3B-Instruct)"
274
+ )
275
+ parser.add_argument(
276
+ "--verbose", "-v",
277
+ action="store_true",
278
+ help="Show detailed reasoning from the agent"
279
+ )
280
+ parser.add_argument(
281
+ "--simple",
282
+ action="store_true",
283
+ help="Use text-based function calling (works with any model, only for --mode function)"
284
+ )
285
+
286
+ args = parser.parse_args()
287
+
288
+ # Handle --list-games
289
+ if args.list_games:
290
+ print(f"\nAvailable games ({len(available_games)} total):\n")
291
+ # Print in columns
292
+ cols = 5
293
+ for i in range(0, len(available_games), cols):
294
+ row = available_games[i:i+cols]
295
+ print(" " + " ".join(f"{g:<15}" for g in row))
296
+ print()
297
+ sys.exit(0)
298
+
299
+ # Validate game choice
300
+ if args.game.lower() not in available_games:
301
+ print(f"\nError: Unknown game '{args.game}'")
302
+ print(f"Use --list-games to see {len(available_games)} available options.")
303
+ sys.exit(1)
304
+
305
+ # Get default model from environment
306
+ default_model = os.getenv("HF_MODEL", "meta-llama/Llama-3.2-3B-Instruct")
307
+
308
+ # Set model if not specified
309
+ if args.model is None:
310
+ args.model = default_model
311
+
312
+ print("\n" + "=" * 60)
313
+ print("Text Adventure LLM Agent Runner")
314
+ print("=" * 60)
315
+ print(f"Mode: {args.mode}" + (" (simple)" if args.simple else ""))
316
+ print(f"Game: {args.game}")
317
+ print(f"Max Steps: {args.max_steps}")
318
+ print(f"Model: {args.model}")
319
+ print(f"Verbose: {args.verbose}")
320
+
321
+ # Run the selected mode
322
+ try:
323
+ if args.mode == "react":
324
+ results = run_react_agent(args)
325
+ elif args.mode == "function":
326
+ results = run_function_calling(args)
327
+ elif args.mode == "mcp":
328
+ results = run_mcp_agent(args)
329
+ else:
330
+ print(f"Unknown mode: {args.mode}")
331
+ sys.exit(1)
332
+
333
+ except FileNotFoundError as e:
334
+ print(f"\n[Error] {e}")
335
+ sys.exit(1)
336
+ except ValueError as e:
337
+ print(f"\n[Error] {e}")
338
+ print("\nTo fix this:")
339
+ print("1. Copy .env.example to .env")
340
+ print("2. Add your HuggingFace token (HF_TOKEN)")
341
+ sys.exit(1)
342
+ except ImportError as e:
343
+ print(f"\n[Import Error] {e}")
344
+ print("\nMake sure to install dependencies:")
345
+ print(" pip install -r requirements.txt")
346
+ sys.exit(1)
347
+
348
+ return results
349
+
350
+
351
+ if __name__ == "__main__":
352
+ main()
spaces_requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ gradio>=4.0.0