Spaces:
Running
Running
Full project upload
Browse files- .env.example +14 -0
- .gitignore +151 -0
- .gitmodules +0 -0
- README.md +201 -3
- SPACES_README.md +17 -0
- agents/__init__.py +9 -0
- agents/base_agent.py +78 -0
- agents/mcp_react_agent.py +477 -0
- agents/react_agent.py +243 -0
- function_calling/controller.py +291 -0
- function_calling/simple_controller.py +268 -0
- function_calling/tools.py +127 -0
- games/__init__.py +6 -0
- games/zork_env.py +219 -0
- mcp_server/README.md +83 -0
- mcp_server/__init__.py +1 -0
- mcp_server/mcp_config.json +9 -0
- mcp_server/zork_server.py +420 -0
- requirements.txt +14 -1
- run_agent.py +352 -0
- spaces_requirements.txt +1 -0
.env.example
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Hugging Face Hub Configuration
|
| 2 |
+
HF_TOKEN=your_huggingface_token_here
|
| 3 |
+
|
| 4 |
+
# Model Configuration
|
| 5 |
+
# Default model for all modes (react, function, mcp)
|
| 6 |
+
HF_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
| 7 |
+
|
| 8 |
+
# Alternative models to try:
|
| 9 |
+
# HF_MODEL=google/gemma-2-2b-it
|
| 10 |
+
# HF_MODEL=Qwen/Qwen2.5-7B-Instruct
|
| 11 |
+
|
| 12 |
+
# Optional API Keys (if using other providers)
|
| 13 |
+
# ANTHROPIC_API_KEY=your_anthropic_key_here
|
| 14 |
+
# OPENAI_API_KEY=your_openai_key_here
|
.gitignore
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
master.zip
|
| 2 |
+
.github/
|
| 3 |
+
|
| 4 |
+
# Byte-compiled / optimized / DLL files
|
| 5 |
+
__pycache__/
|
| 6 |
+
*.py[cod]
|
| 7 |
+
*$py.class
|
| 8 |
+
|
| 9 |
+
# C extensions
|
| 10 |
+
*.so
|
| 11 |
+
|
| 12 |
+
# Distribution / packaging
|
| 13 |
+
.Python
|
| 14 |
+
build/
|
| 15 |
+
develop-eggs/
|
| 16 |
+
dist/
|
| 17 |
+
downloads/
|
| 18 |
+
eggs/
|
| 19 |
+
.eggs/
|
| 20 |
+
lib/
|
| 21 |
+
lib64/
|
| 22 |
+
parts/
|
| 23 |
+
sdist/
|
| 24 |
+
var/
|
| 25 |
+
wheels/
|
| 26 |
+
pip-wheel-metadata/
|
| 27 |
+
share/python-wheels/
|
| 28 |
+
*.egg-info/
|
| 29 |
+
.installed.cfg
|
| 30 |
+
*.egg
|
| 31 |
+
MANIFEST
|
| 32 |
+
|
| 33 |
+
# PyInstaller
|
| 34 |
+
*.manifest
|
| 35 |
+
*.spec
|
| 36 |
+
|
| 37 |
+
# Installer logs
|
| 38 |
+
pip-log.txt
|
| 39 |
+
pip-delete-this-directory.txt
|
| 40 |
+
|
| 41 |
+
# Unit test / coverage reports
|
| 42 |
+
htmlcov/
|
| 43 |
+
.tox/
|
| 44 |
+
.nox/
|
| 45 |
+
.coverage
|
| 46 |
+
.coverage.*
|
| 47 |
+
.cache
|
| 48 |
+
nosetests.xml
|
| 49 |
+
coverage.xml
|
| 50 |
+
*.cover
|
| 51 |
+
*.py,cover
|
| 52 |
+
.hypothesis/
|
| 53 |
+
.pytest_cache/
|
| 54 |
+
|
| 55 |
+
# Translations
|
| 56 |
+
*.mo
|
| 57 |
+
*.pot
|
| 58 |
+
|
| 59 |
+
# Django stuff:
|
| 60 |
+
*.log
|
| 61 |
+
local_settings.py
|
| 62 |
+
db.sqlite3
|
| 63 |
+
db.sqlite3-journal
|
| 64 |
+
|
| 65 |
+
# Flask stuff:
|
| 66 |
+
instance/
|
| 67 |
+
.webassets-cache
|
| 68 |
+
|
| 69 |
+
# Scrapy stuff:
|
| 70 |
+
.scrapy
|
| 71 |
+
|
| 72 |
+
# Sphinx documentation
|
| 73 |
+
docs/_build/
|
| 74 |
+
|
| 75 |
+
# PyBuilder
|
| 76 |
+
target/
|
| 77 |
+
|
| 78 |
+
# Jupyter Notebook
|
| 79 |
+
.ipynb_checkpoints
|
| 80 |
+
|
| 81 |
+
# IPython
|
| 82 |
+
profile_default/
|
| 83 |
+
ipython_config.py
|
| 84 |
+
|
| 85 |
+
# pyenv
|
| 86 |
+
.python-version
|
| 87 |
+
|
| 88 |
+
# pipenv
|
| 89 |
+
Pipfile.lock
|
| 90 |
+
|
| 91 |
+
# PEP 582
|
| 92 |
+
__pypackages__/
|
| 93 |
+
|
| 94 |
+
# Celery stuff
|
| 95 |
+
celerybeat-schedule
|
| 96 |
+
celerybeat.pid
|
| 97 |
+
|
| 98 |
+
# SageMath parsed files
|
| 99 |
+
*.sage.py
|
| 100 |
+
|
| 101 |
+
# Environments
|
| 102 |
+
.env
|
| 103 |
+
.venv
|
| 104 |
+
env/
|
| 105 |
+
venv/
|
| 106 |
+
ENV/
|
| 107 |
+
env.bak/
|
| 108 |
+
venv.bak/
|
| 109 |
+
|
| 110 |
+
# Spyder project settings
|
| 111 |
+
.spyderproject
|
| 112 |
+
.spyproject
|
| 113 |
+
|
| 114 |
+
# Rope project settings
|
| 115 |
+
.ropeproject
|
| 116 |
+
|
| 117 |
+
# mkdocs documentation
|
| 118 |
+
/site
|
| 119 |
+
|
| 120 |
+
# mypy
|
| 121 |
+
.mypy_cache/
|
| 122 |
+
.dmypy.json
|
| 123 |
+
dmypy.json
|
| 124 |
+
|
| 125 |
+
# Pyre type checker
|
| 126 |
+
.pyre/
|
| 127 |
+
|
| 128 |
+
# macOS
|
| 129 |
+
.DS_Store
|
| 130 |
+
.AppleDouble
|
| 131 |
+
.LSOverride
|
| 132 |
+
._*
|
| 133 |
+
.Spotlight-V100
|
| 134 |
+
.Trashes
|
| 135 |
+
|
| 136 |
+
# IDE
|
| 137 |
+
.vscode/
|
| 138 |
+
.idea/
|
| 139 |
+
*.swp
|
| 140 |
+
*.swo
|
| 141 |
+
*~
|
| 142 |
+
|
| 143 |
+
# Game files
|
| 144 |
+
z-machine-games-master/
|
| 145 |
+
*.z3
|
| 146 |
+
*.z4
|
| 147 |
+
*.z5
|
| 148 |
+
*.z8
|
| 149 |
+
|
| 150 |
+
# Temp files
|
| 151 |
+
.mcp_config_temp.json
|
.gitmodules
ADDED
|
File without changes
|
README.md
CHANGED
|
@@ -10,8 +10,206 @@ pinned: false
|
|
| 10 |
license: mit
|
| 11 |
---
|
| 12 |
|
| 13 |
-
#
|
| 14 |
|
| 15 |
-
|
| 16 |
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
license: mit
|
| 11 |
---
|
| 12 |
|
| 13 |
+
# Text Adventure LLM Agent Project
|
| 14 |
|
| 15 |
+
Build AI agents to play classic text adventure games (Zork, Colossal Cave, Enchanter, etc.) using the Model Context Protocol (MCP) and HuggingFace models.
|
| 16 |
|
| 17 |
+
## Overview
|
| 18 |
+
|
| 19 |
+
This project provides:
|
| 20 |
+
|
| 21 |
+
1. **MCP Server** - Exposes text adventure games as MCP tools using FastMCP
|
| 22 |
+
2. **ReAct Agent** - An agent that uses MCP tools to play games with reasoning
|
| 23 |
+
3. **Templates** - Starter code for students to implement their own solutions
|
| 24 |
+
4. **57 Games** - Zork trilogy, Infocom classics, and many more Z-machine games
|
| 25 |
+
|
| 26 |
+
## Architecture
|
| 27 |
+
|
| 28 |
+
```
|
| 29 |
+
+-------------------+ MCP Protocol +------------------+
|
| 30 |
+
| | <------------------> | |
|
| 31 |
+
| ReAct Agent | (tool calls) | MCP Server |
|
| 32 |
+
| (FastMCP Client)| | (FastMCP) |
|
| 33 |
+
| | | |
|
| 34 |
+
+-------------------+ +------------------+
|
| 35 |
+
| |
|
| 36 |
+
| LLM API | Game API
|
| 37 |
+
v v
|
| 38 |
+
+-------------------+ +------------------+
|
| 39 |
+
| HuggingFace | | Text Adventure |
|
| 40 |
+
| Inference API | | (Jericho) |
|
| 41 |
+
+-------------------+ +------------------+
|
| 42 |
+
```
|
| 43 |
+
|
| 44 |
+
## Quick Start
|
| 45 |
+
|
| 46 |
+
### 1. Setup
|
| 47 |
+
|
| 48 |
+
```bash
|
| 49 |
+
# Create virtual environment (using uv recommended)
|
| 50 |
+
uv venv
|
| 51 |
+
source .venv/bin/activate
|
| 52 |
+
|
| 53 |
+
# Install dependencies
|
| 54 |
+
uv pip install -r requirements.txt
|
| 55 |
+
|
| 56 |
+
# Configure environment
|
| 57 |
+
cp .env.example .env
|
| 58 |
+
# Edit .env and add your HuggingFace token (HF_TOKEN)
|
| 59 |
+
```
|
| 60 |
+
|
| 61 |
+
Get your HuggingFace token at: https://huggingface.co/settings/tokens
|
| 62 |
+
|
| 63 |
+
### 2. Run an Agent
|
| 64 |
+
|
| 65 |
+
```bash
|
| 66 |
+
# MCP mode (recommended) - uses FastMCP Client
|
| 67 |
+
python run_agent.py --mode mcp
|
| 68 |
+
|
| 69 |
+
# Basic ReAct agent (direct game interaction)
|
| 70 |
+
python run_agent.py --mode react
|
| 71 |
+
|
| 72 |
+
# Function calling mode
|
| 73 |
+
python run_agent.py --mode function --simple
|
| 74 |
+
```
|
| 75 |
+
|
| 76 |
+
## Project Structure
|
| 77 |
+
|
| 78 |
+
```
|
| 79 |
+
.
|
| 80 |
+
+-- run_agent.py # Unified agent runner
|
| 81 |
+
+-- mcp_server/
|
| 82 |
+
| +-- zork_server.py # Full MCP server with all tools
|
| 83 |
+
+-- agents/
|
| 84 |
+
| +-- base_agent.py # Abstract base class
|
| 85 |
+
| +-- react_agent.py # Basic ReAct agent (no MCP)
|
| 86 |
+
| +-- mcp_react_agent.py # MCP-enabled ReAct agent
|
| 87 |
+
+-- templates/ # Student templates
|
| 88 |
+
| +-- README.md # Assignment instructions
|
| 89 |
+
| +-- mcp_server_template.py # MCP server starter
|
| 90 |
+
| +-- react_agent_template.py # Agent starter
|
| 91 |
+
+-- function_calling/ # Alternative: function calling
|
| 92 |
+
| +-- controller.py
|
| 93 |
+
| +-- simple_controller.py
|
| 94 |
+
| +-- tools.py
|
| 95 |
+
+-- games/
|
| 96 |
+
| +-- zork_env.py # Jericho wrapper
|
| 97 |
+
+-- z-machine-games-master/ # Game files
|
| 98 |
+
```
|
| 99 |
+
|
| 100 |
+
## Agent Modes
|
| 101 |
+
|
| 102 |
+
| Mode | Description | Command |
|
| 103 |
+
|------|-------------|---------|
|
| 104 |
+
| `mcp` | MCP ReAct agent (FastMCP Client) | `--mode mcp` |
|
| 105 |
+
| `react` | Basic ReAct (direct game) | `--mode react` |
|
| 106 |
+
| `function` | Function calling (API) | `--mode function` |
|
| 107 |
+
| `function --simple` | Function calling (text) | `--mode function --simple` |
|
| 108 |
+
|
| 109 |
+
### Examples
|
| 110 |
+
|
| 111 |
+
```bash
|
| 112 |
+
# Run MCP agent with verbose output
|
| 113 |
+
python run_agent.py --mode mcp -v
|
| 114 |
+
|
| 115 |
+
# Run with different model
|
| 116 |
+
python run_agent.py --mode mcp --model google/gemma-2-2b-it
|
| 117 |
+
|
| 118 |
+
# Limit steps
|
| 119 |
+
python run_agent.py --mode mcp -n 50
|
| 120 |
+
|
| 121 |
+
# Play different games
|
| 122 |
+
python run_agent.py --mode mcp --game zork2
|
| 123 |
+
python run_agent.py --mode mcp --game advent # Colossal Cave Adventure
|
| 124 |
+
python run_agent.py --mode mcp --game enchanter # Infocom classic
|
| 125 |
+
python run_agent.py --mode mcp --game hhgg # Hitchhiker's Guide
|
| 126 |
+
|
| 127 |
+
# List all 57 available games
|
| 128 |
+
python run_agent.py --list-games
|
| 129 |
+
```
|
| 130 |
+
|
| 131 |
+
## MCP Server Tools
|
| 132 |
+
|
| 133 |
+
The MCP server exposes these tools:
|
| 134 |
+
|
| 135 |
+
| Tool | Description |
|
| 136 |
+
|------|-------------|
|
| 137 |
+
| `play_action(action)` | Execute a game command (north, take lamp, etc.) |
|
| 138 |
+
| `memory()` | Get current state (location, score, history) |
|
| 139 |
+
| `get_map()` | View explored locations and connections |
|
| 140 |
+
| `inventory()` | Check items you're carrying |
|
| 141 |
+
| `valid_actions()` | Get command hints |
|
| 142 |
+
| `reset_game(game)` | Start over or switch games |
|
| 143 |
+
| `list_games()` | See all 57 available games |
|
| 144 |
+
| `hint()` | Get contextual hints |
|
| 145 |
+
|
| 146 |
+
### Testing the MCP Server
|
| 147 |
+
|
| 148 |
+
```bash
|
| 149 |
+
# Run server directly (stdio transport) - default game is zork1
|
| 150 |
+
python mcp_server/zork_server.py
|
| 151 |
+
|
| 152 |
+
# Run with a specific game
|
| 153 |
+
GAME=advent python mcp_server/zork_server.py
|
| 154 |
+
|
| 155 |
+
# Use MCP Inspector for interactive testing
|
| 156 |
+
npx @modelcontextprotocol/inspector python mcp_server/zork_server.py
|
| 157 |
+
|
| 158 |
+
# Use FastMCP dev mode
|
| 159 |
+
fastmcp dev mcp_server/zork_server.py
|
| 160 |
+
```
|
| 161 |
+
|
| 162 |
+
## Student Assignment
|
| 163 |
+
|
| 164 |
+
See [templates/README.md](templates/README.md) for the assignment.
|
| 165 |
+
|
| 166 |
+
Students implement:
|
| 167 |
+
1. **MCP Server** (`mcp_server_template.py`) - Expose game functionality as MCP tools
|
| 168 |
+
2. **ReAct Agent** (`react_agent_template.py`) - Play text adventures using MCP
|
| 169 |
+
|
| 170 |
+
## Configuration
|
| 171 |
+
|
| 172 |
+
### Environment Variables
|
| 173 |
+
|
| 174 |
+
Create `.env` from `.env.example`:
|
| 175 |
+
|
| 176 |
+
```bash
|
| 177 |
+
# Required: HuggingFace token
|
| 178 |
+
HF_TOKEN=hf_your_token_here
|
| 179 |
+
|
| 180 |
+
# Optional: Model override (default: meta-llama/Llama-3.2-3B-Instruct)
|
| 181 |
+
HF_MODEL=meta-llama/Llama-3.2-3B-Instruct
|
| 182 |
+
```
|
| 183 |
+
|
| 184 |
+
### Recommended Models
|
| 185 |
+
|
| 186 |
+
| Model | Notes |
|
| 187 |
+
|-------|-------|
|
| 188 |
+
| `meta-llama/Llama-3.2-3B-Instruct` | Default, good balance |
|
| 189 |
+
| `google/gemma-2-2b-it` | Smaller, faster |
|
| 190 |
+
| `Qwen/Qwen2.5-7B-Instruct` | Good instruction following |
|
| 191 |
+
|
| 192 |
+
## Evaluation
|
| 193 |
+
|
| 194 |
+
Run the evaluator to test agent performance:
|
| 195 |
+
|
| 196 |
+
```bash
|
| 197 |
+
python evaluate.py --mode mcp --games zork1 --runs 3
|
| 198 |
+
```
|
| 199 |
+
|
| 200 |
+
Metrics:
|
| 201 |
+
- **Score**: Points earned in-game
|
| 202 |
+
- **Score %**: Score / Max possible score
|
| 203 |
+
- **Steps**: Number of actions taken
|
| 204 |
+
- **Time**: Elapsed time
|
| 205 |
+
|
| 206 |
+
## Resources
|
| 207 |
+
|
| 208 |
+
- [FastMCP Documentation](https://gofastmcp.com/)
|
| 209 |
+
- [MCP Protocol](https://modelcontextprotocol.io/)
|
| 210 |
+
- [Jericho (Text Adventures)](https://github.com/microsoft/jericho)
|
| 211 |
+
- [HuggingFace Inference API](https://huggingface.co/docs/huggingface_hub/guides/inference)
|
| 212 |
+
|
| 213 |
+
## License
|
| 214 |
+
|
| 215 |
+
MIT
|
SPACES_README.md
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: Agentic Zork
|
| 3 |
+
emoji: "🎮"
|
| 4 |
+
colorFrom: green
|
| 5 |
+
colorTo: purple
|
| 6 |
+
sdk: gradio
|
| 7 |
+
sdk_version: 5.0.0
|
| 8 |
+
app_file: app.py
|
| 9 |
+
pinned: false
|
| 10 |
+
license: mit
|
| 11 |
+
---
|
| 12 |
+
|
| 13 |
+
# Playing Zork has never been so boring
|
| 14 |
+
|
| 15 |
+
In this assignment, you will build an AI Agent and an MCP server to play text adventure games like Zork.
|
| 16 |
+
|
| 17 |
+
See the instructions below to get started!
|
agents/__init__.py
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .base_agent import BaseAgent, AgentConfig
|
| 2 |
+
from .react_agent import ReActAgent, ReActConfig
|
| 3 |
+
from .mcp_react_agent import MCPReActAgent, MCPAgentConfig
|
| 4 |
+
|
| 5 |
+
__all__ = [
|
| 6 |
+
"BaseAgent", "AgentConfig",
|
| 7 |
+
"ReActAgent", "ReActConfig",
|
| 8 |
+
"MCPReActAgent", "MCPAgentConfig",
|
| 9 |
+
]
|
agents/base_agent.py
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Base Agent Abstract Class
|
| 3 |
+
|
| 4 |
+
Defines the interface that all text adventure agents must implement.
|
| 5 |
+
"""
|
| 6 |
+
|
| 7 |
+
from abc import ABC, abstractmethod
|
| 8 |
+
from dataclasses import dataclass
|
| 9 |
+
from games.zork_env import GameState
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
@dataclass
|
| 13 |
+
class AgentConfig:
|
| 14 |
+
"""Configuration for an agent."""
|
| 15 |
+
name: str = "BaseAgent"
|
| 16 |
+
max_history: int = 20 # Maximum number of past interactions to remember
|
| 17 |
+
verbose: bool = False
|
| 18 |
+
|
| 19 |
+
|
| 20 |
+
class BaseAgent(ABC):
|
| 21 |
+
"""
|
| 22 |
+
Abstract base class for text adventure agents.
|
| 23 |
+
|
| 24 |
+
Students should extend this class and implement the `choose_action` method.
|
| 25 |
+
"""
|
| 26 |
+
|
| 27 |
+
def __init__(self, config: AgentConfig = None):
|
| 28 |
+
self.config = config or AgentConfig()
|
| 29 |
+
self.history: list[tuple[str, str, GameState]] = [] # (action, observation, state)
|
| 30 |
+
|
| 31 |
+
@abstractmethod
|
| 32 |
+
def choose_action(self, observation: str, game_state: GameState) -> str:
|
| 33 |
+
"""
|
| 34 |
+
Choose the next action based on the current observation and game state.
|
| 35 |
+
|
| 36 |
+
Args:
|
| 37 |
+
observation: The text observation from the game
|
| 38 |
+
game_state: The current GameState object with score, inventory, etc.
|
| 39 |
+
|
| 40 |
+
Returns:
|
| 41 |
+
A string action to take in the game (e.g., "go north", "take lamp")
|
| 42 |
+
"""
|
| 43 |
+
pass
|
| 44 |
+
|
| 45 |
+
def update_history(self, action: str, observation: str, game_state: GameState):
|
| 46 |
+
"""
|
| 47 |
+
Update the agent's history after taking an action.
|
| 48 |
+
|
| 49 |
+
Args:
|
| 50 |
+
action: The action that was taken
|
| 51 |
+
observation: The resulting observation
|
| 52 |
+
game_state: The resulting game state
|
| 53 |
+
"""
|
| 54 |
+
self.history.append((action, observation, game_state))
|
| 55 |
+
|
| 56 |
+
# Keep history bounded
|
| 57 |
+
if len(self.history) > self.config.max_history:
|
| 58 |
+
self.history = self.history[-self.config.max_history:]
|
| 59 |
+
|
| 60 |
+
def reset(self):
|
| 61 |
+
"""Reset the agent's internal state for a new game."""
|
| 62 |
+
self.history = []
|
| 63 |
+
|
| 64 |
+
def get_history_text(self) -> str:
|
| 65 |
+
"""Get a text summary of recent history for context."""
|
| 66 |
+
if not self.history:
|
| 67 |
+
return "No previous actions taken."
|
| 68 |
+
|
| 69 |
+
lines = []
|
| 70 |
+
for action, observation, state in self.history[-10:]: # Last 10 actions
|
| 71 |
+
lines.append(f"> {action}")
|
| 72 |
+
# Truncate long observations
|
| 73 |
+
obs_preview = observation[:200] + "..." if len(observation) > 200 else observation
|
| 74 |
+
lines.append(obs_preview)
|
| 75 |
+
lines.append(f"[Score: {state.score}, Moves: {state.moves}]")
|
| 76 |
+
lines.append("")
|
| 77 |
+
|
| 78 |
+
return "\n".join(lines)
|
agents/mcp_react_agent.py
ADDED
|
@@ -0,0 +1,477 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
MCP ReAct Agent for Text Adventure Games
|
| 3 |
+
|
| 4 |
+
A production-ready ReAct agent that uses FastMCP Client to play text adventures via MCP tools.
|
| 5 |
+
This agent connects to the Text Adventure MCP server and uses the LLM to reason and act.
|
| 6 |
+
|
| 7 |
+
Features:
|
| 8 |
+
- FastMCP Client integration for MCP server communication
|
| 9 |
+
- ReAct loop (Thought -> Tool -> Observation)
|
| 10 |
+
- Loop detection and action validation
|
| 11 |
+
- History tracking and memory management
|
| 12 |
+
- Score tracking and game over detection
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
import asyncio
|
| 16 |
+
import json
|
| 17 |
+
import os
|
| 18 |
+
import re
|
| 19 |
+
import sys
|
| 20 |
+
from dataclasses import dataclass, field
|
| 21 |
+
from huggingface_hub import InferenceClient
|
| 22 |
+
from dotenv import load_dotenv
|
| 23 |
+
from fastmcp import Client
|
| 24 |
+
from fastmcp.client.transports import StdioTransport
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
@dataclass
|
| 28 |
+
class MCPAgentConfig:
|
| 29 |
+
"""Configuration for the MCP ReAct agent."""
|
| 30 |
+
model: str = "meta-llama/Llama-3.2-3B-Instruct"
|
| 31 |
+
game: str = "zork1" # Default game to play
|
| 32 |
+
temperature: float = 0.7
|
| 33 |
+
max_tokens: int = 300
|
| 34 |
+
max_history: int = 10
|
| 35 |
+
verbose: bool = True
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
SYSTEM_PROMPT = """You are an expert text adventure game player. Your goal is to explore, collect treasures, and maximize your score.
|
| 39 |
+
|
| 40 |
+
AVAILABLE TOOLS (use these via MCP):
|
| 41 |
+
1. play_action - Execute game commands (north, take lamp, open mailbox, etc.)
|
| 42 |
+
2. memory - Get current game state, score, and recent history
|
| 43 |
+
3. get_map - See explored locations and connections
|
| 44 |
+
4. inventory - Check what you're carrying
|
| 45 |
+
5. hint - Get a hint if stuck
|
| 46 |
+
6. list_games - See available games
|
| 47 |
+
7. reset_game - Switch to a different game
|
| 48 |
+
|
| 49 |
+
VALID GAME COMMANDS for play_action:
|
| 50 |
+
- Movement: north, south, east, west, up, down, enter, exit
|
| 51 |
+
- Objects: take <item>, drop <item>, open <thing>, close <thing>, examine <thing>
|
| 52 |
+
- Light: turn on lamp, turn off lamp
|
| 53 |
+
- Combat: attack <enemy> with <weapon>
|
| 54 |
+
- Other: inventory, look, read <thing>, wait
|
| 55 |
+
|
| 56 |
+
FORBIDDEN (will NOT work): check, inspect, search, grab, use, help
|
| 57 |
+
|
| 58 |
+
RESPOND IN THIS EXACT FORMAT (no markdown):
|
| 59 |
+
THOUGHT: <brief reasoning about what to do next>
|
| 60 |
+
TOOL: <tool_name>
|
| 61 |
+
ARGS: <JSON arguments>
|
| 62 |
+
|
| 63 |
+
Examples:
|
| 64 |
+
THOUGHT: I need to see what's around me.
|
| 65 |
+
TOOL: play_action
|
| 66 |
+
ARGS: {"action": "look"}
|
| 67 |
+
|
| 68 |
+
THOUGHT: Let me check my current state and score.
|
| 69 |
+
TOOL: memory
|
| 70 |
+
ARGS: {}
|
| 71 |
+
|
| 72 |
+
THOUGHT: The mailbox might contain something useful.
|
| 73 |
+
TOOL: play_action
|
| 74 |
+
ARGS: {"action": "open mailbox"}
|
| 75 |
+
|
| 76 |
+
STRATEGY:
|
| 77 |
+
1. Start by looking around and checking memory
|
| 78 |
+
2. Explore systematically - try all directions
|
| 79 |
+
3. Pick up useful items (lamp, sword, etc.)
|
| 80 |
+
4. Open containers (mailbox, window, etc.)
|
| 81 |
+
5. Use get_map to avoid getting lost
|
| 82 |
+
6. Turn on lamp before dark areas!
|
| 83 |
+
|
| 84 |
+
DO NOT repeat the same action multiple times in a row."""
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
class MCPReActAgent:
|
| 88 |
+
"""
|
| 89 |
+
A ReAct agent that plays text adventure games using MCP tools via FastMCP Client.
|
| 90 |
+
|
| 91 |
+
This is the robust/production version with:
|
| 92 |
+
- Full MCP integration
|
| 93 |
+
- Loop detection
|
| 94 |
+
- Action validation
|
| 95 |
+
- Score tracking
|
| 96 |
+
"""
|
| 97 |
+
|
| 98 |
+
def __init__(self, mcp_server_path: str, config: MCPAgentConfig = None):
|
| 99 |
+
"""
|
| 100 |
+
Initialize the MCP ReAct agent.
|
| 101 |
+
|
| 102 |
+
Args:
|
| 103 |
+
mcp_server_path: Path to the MCP server script
|
| 104 |
+
config: Agent configuration
|
| 105 |
+
"""
|
| 106 |
+
load_dotenv()
|
| 107 |
+
|
| 108 |
+
self.mcp_server_path = mcp_server_path
|
| 109 |
+
self.config = config or MCPAgentConfig()
|
| 110 |
+
|
| 111 |
+
# Override model from environment if set
|
| 112 |
+
env_model = os.getenv("HF_MODEL")
|
| 113 |
+
if env_model:
|
| 114 |
+
self.config.model = env_model
|
| 115 |
+
|
| 116 |
+
# Initialize LLM client
|
| 117 |
+
token = os.getenv("HF_TOKEN")
|
| 118 |
+
if not token:
|
| 119 |
+
raise ValueError("HF_TOKEN not found. Set it in your .env file.")
|
| 120 |
+
self.llm = InferenceClient(token=token)
|
| 121 |
+
|
| 122 |
+
# Agent state
|
| 123 |
+
self.history: list[dict] = []
|
| 124 |
+
self.thoughts: list[str] = []
|
| 125 |
+
self.score: int = 0
|
| 126 |
+
self.max_score: int = 350
|
| 127 |
+
self.recent_actions: list[str] = [] # For loop detection
|
| 128 |
+
|
| 129 |
+
async def run(self, max_steps: int = 100) -> dict:
|
| 130 |
+
"""
|
| 131 |
+
Run the ReAct agent loop.
|
| 132 |
+
|
| 133 |
+
Args:
|
| 134 |
+
max_steps: Maximum number of steps to run
|
| 135 |
+
|
| 136 |
+
Returns:
|
| 137 |
+
Dictionary with game results
|
| 138 |
+
"""
|
| 139 |
+
import time
|
| 140 |
+
start_time = time.time()
|
| 141 |
+
step = 0
|
| 142 |
+
game_over = False
|
| 143 |
+
game_name = self.config.game
|
| 144 |
+
|
| 145 |
+
print("=" * 60)
|
| 146 |
+
print(f"MCP ReAct Agent - Playing {game_name.upper()}")
|
| 147 |
+
print(f"Model: {self.config.model}")
|
| 148 |
+
print("=" * 60)
|
| 149 |
+
|
| 150 |
+
# Set game as environment variable for the server
|
| 151 |
+
env = os.environ.copy()
|
| 152 |
+
env["GAME"] = game_name
|
| 153 |
+
|
| 154 |
+
# Create transport with environment variables
|
| 155 |
+
transport = StdioTransport(
|
| 156 |
+
command=sys.executable,
|
| 157 |
+
args=[self.mcp_server_path],
|
| 158 |
+
env=env,
|
| 159 |
+
)
|
| 160 |
+
|
| 161 |
+
# Connect to MCP server with game environment
|
| 162 |
+
async with Client(transport) as client:
|
| 163 |
+
# List available tools
|
| 164 |
+
tools = await client.list_tools()
|
| 165 |
+
tool_names = [t.name for t in tools]
|
| 166 |
+
print(f"\nConnected to MCP server. Tools: {tool_names}")
|
| 167 |
+
|
| 168 |
+
# Get initial observation
|
| 169 |
+
result = await client.call_tool("play_action", {"action": "look"})
|
| 170 |
+
observation = self._extract_result(result)
|
| 171 |
+
print(f"\n{observation}\n")
|
| 172 |
+
|
| 173 |
+
# Parse initial score
|
| 174 |
+
self._update_score(observation)
|
| 175 |
+
|
| 176 |
+
# Main ReAct loop
|
| 177 |
+
for step in range(1, max_steps + 1):
|
| 178 |
+
print(f"\n{'─' * 50}")
|
| 179 |
+
print(f"Step {step}/{max_steps} | Score: {self.score}")
|
| 180 |
+
print("─" * 50)
|
| 181 |
+
|
| 182 |
+
# Build prompt with context
|
| 183 |
+
prompt = self._build_prompt(observation)
|
| 184 |
+
|
| 185 |
+
# Call LLM for reasoning
|
| 186 |
+
response = self._call_llm(prompt)
|
| 187 |
+
|
| 188 |
+
# Parse response
|
| 189 |
+
thought, tool_name, tool_args = self._parse_response(response, tool_names)
|
| 190 |
+
|
| 191 |
+
self.thoughts.append(thought)
|
| 192 |
+
|
| 193 |
+
if self.config.verbose:
|
| 194 |
+
print(f"\n[THOUGHT] {thought}")
|
| 195 |
+
print(f"[TOOL] {tool_name}({tool_args})")
|
| 196 |
+
|
| 197 |
+
# Validate and fix common issues
|
| 198 |
+
tool_name, tool_args = self._validate_tool_call(tool_name, tool_args, tool_names)
|
| 199 |
+
|
| 200 |
+
# Check for loops
|
| 201 |
+
if tool_name == "play_action":
|
| 202 |
+
action = tool_args.get("action", "look")
|
| 203 |
+
self.recent_actions.append(action)
|
| 204 |
+
if len(self.recent_actions) > 5:
|
| 205 |
+
self.recent_actions = self.recent_actions[-5:]
|
| 206 |
+
|
| 207 |
+
# Detect loops
|
| 208 |
+
if len(self.recent_actions) >= 3 and len(set(self.recent_actions[-3:])) == 1:
|
| 209 |
+
print(f"\n[WARNING] Loop detected - repeating '{action}'")
|
| 210 |
+
# Force a different action
|
| 211 |
+
tool_args = {"action": "look"}
|
| 212 |
+
self.recent_actions.append("look")
|
| 213 |
+
|
| 214 |
+
# Execute tool via MCP
|
| 215 |
+
try:
|
| 216 |
+
result = await client.call_tool(tool_name, tool_args)
|
| 217 |
+
observation = self._extract_result(result)
|
| 218 |
+
print(f"\n{observation}")
|
| 219 |
+
except Exception as e:
|
| 220 |
+
observation = f"Error executing tool: {e}"
|
| 221 |
+
print(f"\n[ERROR] {e}")
|
| 222 |
+
|
| 223 |
+
# Update history
|
| 224 |
+
self.history.append({
|
| 225 |
+
"step": step,
|
| 226 |
+
"thought": thought,
|
| 227 |
+
"tool": tool_name,
|
| 228 |
+
"args": tool_args,
|
| 229 |
+
"result": observation[:200]
|
| 230 |
+
})
|
| 231 |
+
if len(self.history) > self.config.max_history:
|
| 232 |
+
self.history = self.history[-self.config.max_history:]
|
| 233 |
+
|
| 234 |
+
# Update score
|
| 235 |
+
self._update_score(observation)
|
| 236 |
+
|
| 237 |
+
# Check for game over
|
| 238 |
+
if self._is_game_over(observation):
|
| 239 |
+
game_over = True
|
| 240 |
+
print("\n" + "=" * 60)
|
| 241 |
+
print("GAME OVER!")
|
| 242 |
+
break
|
| 243 |
+
|
| 244 |
+
elapsed_time = time.time() - start_time
|
| 245 |
+
|
| 246 |
+
# Print summary
|
| 247 |
+
return self._print_summary(step, elapsed_time, game_over)
|
| 248 |
+
|
| 249 |
+
def _build_prompt(self, observation: str) -> str:
|
| 250 |
+
"""Build the prompt for the LLM with context."""
|
| 251 |
+
parts = []
|
| 252 |
+
|
| 253 |
+
# Score info
|
| 254 |
+
parts.append(f"Current Score: {self.score}/{self.max_score}")
|
| 255 |
+
|
| 256 |
+
# Recent history (compact)
|
| 257 |
+
if self.history:
|
| 258 |
+
parts.append("\nRecent actions:")
|
| 259 |
+
for entry in self.history[-3:]:
|
| 260 |
+
action = entry.get("args", {}).get("action", entry["tool"])
|
| 261 |
+
result_short = entry["result"][:80] + "..." if len(entry["result"]) > 80 else entry["result"]
|
| 262 |
+
parts.append(f" > {action} -> {result_short}")
|
| 263 |
+
|
| 264 |
+
# Warn about repeated actions
|
| 265 |
+
if self.recent_actions and len(set(self.recent_actions[-3:])) == 1:
|
| 266 |
+
parts.append(f"\n[WARNING: You've been doing '{self.recent_actions[-1]}' repeatedly. TRY SOMETHING DIFFERENT!]")
|
| 267 |
+
|
| 268 |
+
# Current observation
|
| 269 |
+
parts.append(f"\nCurrent situation:\n{observation}")
|
| 270 |
+
parts.append("\nWhat do you do next?")
|
| 271 |
+
|
| 272 |
+
return "\n".join(parts)
|
| 273 |
+
|
| 274 |
+
def _call_llm(self, prompt: str) -> str:
|
| 275 |
+
"""Call the LLM for reasoning."""
|
| 276 |
+
try:
|
| 277 |
+
messages = [
|
| 278 |
+
{"role": "system", "content": SYSTEM_PROMPT},
|
| 279 |
+
{"role": "user", "content": prompt}
|
| 280 |
+
]
|
| 281 |
+
|
| 282 |
+
response = self.llm.chat.completions.create(
|
| 283 |
+
model=self.config.model,
|
| 284 |
+
messages=messages,
|
| 285 |
+
temperature=self.config.temperature,
|
| 286 |
+
max_tokens=self.config.max_tokens,
|
| 287 |
+
)
|
| 288 |
+
return response.choices[0].message.content
|
| 289 |
+
except Exception as e:
|
| 290 |
+
print(f"[LLM Error] {e}")
|
| 291 |
+
return "THOUGHT: LLM error, trying look.\nTOOL: play_action\nARGS: {\"action\": \"look\"}"
|
| 292 |
+
|
| 293 |
+
def _parse_response(self, response: str, valid_tools: list[str]) -> tuple[str, str, dict]:
|
| 294 |
+
"""Parse the LLM response to extract thought, tool, and arguments."""
|
| 295 |
+
thought = "No reasoning provided"
|
| 296 |
+
tool_name = "play_action"
|
| 297 |
+
tool_args = {"action": "look"}
|
| 298 |
+
|
| 299 |
+
lines = response.strip().split("\n")
|
| 300 |
+
|
| 301 |
+
for i, line in enumerate(lines):
|
| 302 |
+
line_clean = line.strip()
|
| 303 |
+
line_upper = line_clean.upper()
|
| 304 |
+
|
| 305 |
+
if line_upper.startswith("THOUGHT:"):
|
| 306 |
+
thought = line_clean.split(":", 1)[1].strip()
|
| 307 |
+
|
| 308 |
+
elif line_upper.startswith("TOOL:"):
|
| 309 |
+
raw_tool = line_clean.split(":", 1)[1].strip().lower()
|
| 310 |
+
# Clean up common issues
|
| 311 |
+
raw_tool = raw_tool.replace("**", "").replace("*", "").replace("`", "")
|
| 312 |
+
raw_tool = raw_tool.split()[0] if raw_tool else "play_action"
|
| 313 |
+
tool_name = raw_tool
|
| 314 |
+
|
| 315 |
+
elif line_upper.startswith("ARGS:"):
|
| 316 |
+
args_part = line_clean.split(":", 1)[1].strip()
|
| 317 |
+
try:
|
| 318 |
+
# Handle various JSON formats
|
| 319 |
+
args_part = args_part.replace("'", '"')
|
| 320 |
+
tool_args = json.loads(args_part)
|
| 321 |
+
except json.JSONDecodeError:
|
| 322 |
+
# Try to extract action from text
|
| 323 |
+
match = re.search(r'"action"\s*:\s*"([^"]+)"', args_part)
|
| 324 |
+
if match:
|
| 325 |
+
tool_args = {"action": match.group(1)}
|
| 326 |
+
else:
|
| 327 |
+
# Fallback: try to use the whole thing as action
|
| 328 |
+
tool_args = {"action": "look"}
|
| 329 |
+
|
| 330 |
+
return thought, tool_name, tool_args
|
| 331 |
+
|
| 332 |
+
def _validate_tool_call(self, tool_name: str, tool_args: dict, valid_tools: list[str]) -> tuple[str, dict]:
|
| 333 |
+
"""Validate and fix common tool call issues."""
|
| 334 |
+
# Fix tool name
|
| 335 |
+
if tool_name not in valid_tools:
|
| 336 |
+
# Try common alternatives
|
| 337 |
+
if tool_name in ["action", "do", "command"]:
|
| 338 |
+
tool_name = "play_action"
|
| 339 |
+
elif tool_name in ["map", "location"]:
|
| 340 |
+
tool_name = "get_map"
|
| 341 |
+
elif tool_name in ["mem", "state", "status"]:
|
| 342 |
+
tool_name = "memory"
|
| 343 |
+
elif tool_name in ["inv", "items"]:
|
| 344 |
+
tool_name = "inventory"
|
| 345 |
+
else:
|
| 346 |
+
tool_name = "play_action"
|
| 347 |
+
|
| 348 |
+
# Fix action in args
|
| 349 |
+
if tool_name == "play_action":
|
| 350 |
+
action = tool_args.get("action", "look")
|
| 351 |
+
|
| 352 |
+
# Fix invalid verbs
|
| 353 |
+
invalid_verb_map = {
|
| 354 |
+
"check": "examine",
|
| 355 |
+
"inspect": "examine",
|
| 356 |
+
"search": "look",
|
| 357 |
+
"grab": "take",
|
| 358 |
+
"pick": "take",
|
| 359 |
+
"use": "examine",
|
| 360 |
+
"investigate": "examine",
|
| 361 |
+
}
|
| 362 |
+
|
| 363 |
+
words = action.lower().split()
|
| 364 |
+
if words and words[0] in invalid_verb_map:
|
| 365 |
+
words[0] = invalid_verb_map[words[0]]
|
| 366 |
+
action = " ".join(words)
|
| 367 |
+
|
| 368 |
+
# Clean up action
|
| 369 |
+
action = action.lower().strip()
|
| 370 |
+
action = action.replace("**", "").replace("*", "").replace("`", "")
|
| 371 |
+
action = " ".join(action.split())
|
| 372 |
+
|
| 373 |
+
tool_args["action"] = action
|
| 374 |
+
|
| 375 |
+
return tool_name, tool_args
|
| 376 |
+
|
| 377 |
+
def _extract_result(self, result) -> str:
|
| 378 |
+
"""Extract text from MCP tool result."""
|
| 379 |
+
if hasattr(result, 'content') and result.content:
|
| 380 |
+
return result.content[0].text
|
| 381 |
+
return str(result)
|
| 382 |
+
|
| 383 |
+
def _update_score(self, text: str) -> None:
|
| 384 |
+
"""Update score from game text."""
|
| 385 |
+
# Look for score patterns
|
| 386 |
+
patterns = [
|
| 387 |
+
r'\+(\d+) points',
|
| 388 |
+
r'Score:\s*(\d+)',
|
| 389 |
+
r'Total:\s*(\d+)',
|
| 390 |
+
]
|
| 391 |
+
|
| 392 |
+
for pattern in patterns:
|
| 393 |
+
match = re.search(pattern, text, re.IGNORECASE)
|
| 394 |
+
if match:
|
| 395 |
+
score = int(match.group(1))
|
| 396 |
+
if "+" in pattern:
|
| 397 |
+
self.score += score
|
| 398 |
+
else:
|
| 399 |
+
self.score = max(self.score, score)
|
| 400 |
+
|
| 401 |
+
def _is_game_over(self, text: str) -> bool:
|
| 402 |
+
"""Check if the game is over."""
|
| 403 |
+
game_over_phrases = [
|
| 404 |
+
"game over",
|
| 405 |
+
"you have died",
|
| 406 |
+
"you are dead",
|
| 407 |
+
"*** you have died ***",
|
| 408 |
+
]
|
| 409 |
+
text_lower = text.lower()
|
| 410 |
+
return any(phrase in text_lower for phrase in game_over_phrases)
|
| 411 |
+
|
| 412 |
+
def _print_summary(self, step: int, elapsed_time: float, game_over: bool) -> dict:
|
| 413 |
+
"""Print game summary and return results."""
|
| 414 |
+
print("\n" + "=" * 60)
|
| 415 |
+
print("GAME SUMMARY")
|
| 416 |
+
print("=" * 60)
|
| 417 |
+
print(f"Final Score: {self.score}/{self.max_score} ({100*self.score/self.max_score:.1f}%)")
|
| 418 |
+
print(f"Steps Taken: {step}")
|
| 419 |
+
print(f"Time Elapsed: {elapsed_time:.1f} seconds")
|
| 420 |
+
print(f"Game Over: {game_over}")
|
| 421 |
+
print("=" * 60)
|
| 422 |
+
|
| 423 |
+
return {
|
| 424 |
+
"final_score": self.score,
|
| 425 |
+
"max_score": self.max_score,
|
| 426 |
+
"score_percentage": 100 * self.score / self.max_score,
|
| 427 |
+
"steps": step,
|
| 428 |
+
"elapsed_time": elapsed_time,
|
| 429 |
+
"game_over": game_over,
|
| 430 |
+
}
|
| 431 |
+
|
| 432 |
+
|
| 433 |
+
# =============================================================================
|
| 434 |
+
# Main
|
| 435 |
+
# =============================================================================
|
| 436 |
+
|
| 437 |
+
async def main():
|
| 438 |
+
"""Run the MCP ReAct agent."""
|
| 439 |
+
import argparse
|
| 440 |
+
|
| 441 |
+
parser = argparse.ArgumentParser(description="Run the MCP ReAct Text Adventure Agent")
|
| 442 |
+
parser.add_argument(
|
| 443 |
+
"--server", "-s",
|
| 444 |
+
default="mcp_server/zork_server.py",
|
| 445 |
+
help="Path to the MCP server script"
|
| 446 |
+
)
|
| 447 |
+
parser.add_argument(
|
| 448 |
+
"--max-steps", "-n",
|
| 449 |
+
type=int,
|
| 450 |
+
default=100,
|
| 451 |
+
help="Maximum steps to run"
|
| 452 |
+
)
|
| 453 |
+
parser.add_argument(
|
| 454 |
+
"--model",
|
| 455 |
+
type=str,
|
| 456 |
+
default=None,
|
| 457 |
+
help="HuggingFace model to use"
|
| 458 |
+
)
|
| 459 |
+
parser.add_argument(
|
| 460 |
+
"--verbose", "-v",
|
| 461 |
+
action="store_true",
|
| 462 |
+
default=True,
|
| 463 |
+
help="Show detailed output"
|
| 464 |
+
)
|
| 465 |
+
|
| 466 |
+
args = parser.parse_args()
|
| 467 |
+
|
| 468 |
+
config = MCPAgentConfig(verbose=args.verbose)
|
| 469 |
+
if args.model:
|
| 470 |
+
config.model = args.model
|
| 471 |
+
|
| 472 |
+
agent = MCPReActAgent(args.server, config)
|
| 473 |
+
return await agent.run(max_steps=args.max_steps)
|
| 474 |
+
|
| 475 |
+
|
| 476 |
+
if __name__ == "__main__":
|
| 477 |
+
asyncio.run(main())
|
agents/react_agent.py
ADDED
|
@@ -0,0 +1,243 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ReAct Agent for Text Adventure Games
|
| 3 |
+
|
| 4 |
+
Implements a ReAct (Reasoning + Acting) loop using an LLM to play text adventures.
|
| 5 |
+
The agent thinks about its situation, decides on an action, and learns from the result.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
import os
|
| 9 |
+
from dataclasses import dataclass
|
| 10 |
+
from huggingface_hub import InferenceClient
|
| 11 |
+
from dotenv import load_dotenv
|
| 12 |
+
|
| 13 |
+
from agents.base_agent import BaseAgent, AgentConfig
|
| 14 |
+
from games.zork_env import GameState
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
@dataclass
|
| 18 |
+
class ReActConfig(AgentConfig):
|
| 19 |
+
"""Configuration for the ReAct agent."""
|
| 20 |
+
name: str = "ReActAgent"
|
| 21 |
+
model: str = "meta-llama/Llama-3.2-3B-Instruct"
|
| 22 |
+
temperature: float = 0.7
|
| 23 |
+
max_tokens: int = 300
|
| 24 |
+
max_history: int = 15
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
SYSTEM_PROMPT = """You are playing a classic text adventure game.
|
| 28 |
+
|
| 29 |
+
GOAL: Explore the world, solve puzzles, collect treasures, and maximize your score.
|
| 30 |
+
|
| 31 |
+
VALID COMMANDS:
|
| 32 |
+
- Movement: north, south, east, west, up, down, enter, exit
|
| 33 |
+
- Looking: look, examine <thing>, read <thing>
|
| 34 |
+
- Objects: take <item>, drop <item>, open <thing>, close <thing>
|
| 35 |
+
- Light: turn on lamp, light match
|
| 36 |
+
- Combat: attack <enemy> with <weapon>
|
| 37 |
+
- Other: inventory, wait, push <thing>, move <thing>
|
| 38 |
+
|
| 39 |
+
INVALID COMMANDS (do NOT use): check, inspect, search, grab, use, help
|
| 40 |
+
|
| 41 |
+
TIPS:
|
| 42 |
+
- Explore systematically - try all directions
|
| 43 |
+
- Examine interesting objects and read documents
|
| 44 |
+
- Pick up useful items (lamp, keys, weapons)
|
| 45 |
+
- Open containers to find hidden items
|
| 46 |
+
|
| 47 |
+
You MUST respond in EXACTLY this format (no markdown, no extra text):
|
| 48 |
+
THOUGHT: <your reasoning in one sentence>
|
| 49 |
+
ACTION: <one valid command>
|
| 50 |
+
|
| 51 |
+
Example response:
|
| 52 |
+
THOUGHT: I see a container here, I should check what is inside.
|
| 53 |
+
ACTION: open container"""
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
class ReActAgent(BaseAgent):
|
| 57 |
+
"""
|
| 58 |
+
A ReAct (Reasoning + Acting) agent that uses an LLM to play text adventures.
|
| 59 |
+
|
| 60 |
+
Uses Hugging Face Hub's Inference API.
|
| 61 |
+
"""
|
| 62 |
+
|
| 63 |
+
def __init__(self, config: ReActConfig = None, token: str = None):
|
| 64 |
+
super().__init__(config or ReActConfig())
|
| 65 |
+
self.config: ReActConfig = self.config
|
| 66 |
+
|
| 67 |
+
# Load token from environment if not provided
|
| 68 |
+
load_dotenv()
|
| 69 |
+
token = token or os.getenv("HF_TOKEN")
|
| 70 |
+
if not token:
|
| 71 |
+
raise ValueError("HF_TOKEN not found. Set HF_TOKEN environment variable or pass token parameter.")
|
| 72 |
+
|
| 73 |
+
# Override model from environment if set
|
| 74 |
+
env_model = os.getenv("HF_MODEL")
|
| 75 |
+
if env_model:
|
| 76 |
+
self.config.model = env_model
|
| 77 |
+
|
| 78 |
+
self.client = InferenceClient(token=token)
|
| 79 |
+
self.thoughts: list[str] = [] # Store reasoning history
|
| 80 |
+
|
| 81 |
+
def choose_action(self, observation: str, game_state: GameState) -> str:
|
| 82 |
+
"""
|
| 83 |
+
Use the LLM to reason about the situation and choose an action.
|
| 84 |
+
"""
|
| 85 |
+
# Build the prompt with context
|
| 86 |
+
prompt = self._build_prompt(observation, game_state)
|
| 87 |
+
|
| 88 |
+
# Call the LLM
|
| 89 |
+
response = self._call_llm(prompt)
|
| 90 |
+
|
| 91 |
+
# Parse the response
|
| 92 |
+
thought, action = self._parse_response(response)
|
| 93 |
+
|
| 94 |
+
# Store the thought for history
|
| 95 |
+
self.thoughts.append(thought)
|
| 96 |
+
|
| 97 |
+
if self.config.verbose:
|
| 98 |
+
print(f"\n[Thought] {thought}")
|
| 99 |
+
print(f"[Action] {action}")
|
| 100 |
+
|
| 101 |
+
return action
|
| 102 |
+
|
| 103 |
+
def _build_prompt(self, observation: str, game_state: GameState) -> str:
|
| 104 |
+
"""Build the prompt for the LLM with current context."""
|
| 105 |
+
parts = []
|
| 106 |
+
|
| 107 |
+
# Current status (compact for small models)
|
| 108 |
+
parts.append(f"Score: {game_state.score}/{game_state.max_score} | Moves: {game_state.moves}")
|
| 109 |
+
|
| 110 |
+
if game_state.inventory:
|
| 111 |
+
parts.append(f"Inventory: {', '.join(game_state.inventory)}")
|
| 112 |
+
|
| 113 |
+
# Recent history (only last 3 for small models)
|
| 114 |
+
if self.history:
|
| 115 |
+
parts.append("\nRecent:")
|
| 116 |
+
recent_actions = []
|
| 117 |
+
for action, obs, state in self.history[-3:]:
|
| 118 |
+
obs_short = obs[:150] + "..." if len(obs) > 150 else obs
|
| 119 |
+
parts.append(f"> {action}\n{obs_short}")
|
| 120 |
+
recent_actions.append(action)
|
| 121 |
+
|
| 122 |
+
# Warn about repeated actions
|
| 123 |
+
if len(recent_actions) >= 2 and len(set(recent_actions)) == 1:
|
| 124 |
+
parts.append(f"\n[WARNING: You've done '{recent_actions[0]}' multiple times. Try something different!]")
|
| 125 |
+
|
| 126 |
+
# Current observation
|
| 127 |
+
parts.append(f"\nNow:\n{observation}")
|
| 128 |
+
parts.append("\nWhat do you do next? (Try a NEW action)")
|
| 129 |
+
|
| 130 |
+
return "\n".join(parts)
|
| 131 |
+
|
| 132 |
+
def _call_llm(self, prompt: str) -> str:
|
| 133 |
+
"""Call the Hugging Face Inference API."""
|
| 134 |
+
try:
|
| 135 |
+
messages = [
|
| 136 |
+
{"role": "system", "content": SYSTEM_PROMPT},
|
| 137 |
+
{"role": "user", "content": prompt}
|
| 138 |
+
]
|
| 139 |
+
|
| 140 |
+
response = self.client.chat.completions.create(
|
| 141 |
+
model=self.config.model,
|
| 142 |
+
messages=messages,
|
| 143 |
+
temperature=self.config.temperature,
|
| 144 |
+
max_tokens=self.config.max_tokens,
|
| 145 |
+
)
|
| 146 |
+
return response.choices[0].message.content
|
| 147 |
+
except Exception as e:
|
| 148 |
+
print(f"Error calling LLM: {e}")
|
| 149 |
+
return "THOUGHT: Error occurred, trying a safe action.\nACTION: look"
|
| 150 |
+
|
| 151 |
+
def _parse_response(self, response: str) -> tuple[str, str]:
|
| 152 |
+
"""Parse the LLM response to extract thought and action."""
|
| 153 |
+
thought = ""
|
| 154 |
+
action = "look" # Default fallback action
|
| 155 |
+
|
| 156 |
+
lines = response.strip().split("\n")
|
| 157 |
+
|
| 158 |
+
for i, line in enumerate(lines):
|
| 159 |
+
line_upper = line.upper().strip()
|
| 160 |
+
|
| 161 |
+
if line_upper.startswith("THOUGHT:"):
|
| 162 |
+
# Extract thought (may span multiple lines until ACTION)
|
| 163 |
+
thought_parts = [line.split(":", 1)[1].strip()]
|
| 164 |
+
for j in range(i + 1, len(lines)):
|
| 165 |
+
if lines[j].upper().strip().startswith("ACTION:"):
|
| 166 |
+
break
|
| 167 |
+
thought_parts.append(lines[j].strip())
|
| 168 |
+
thought = " ".join(thought_parts).strip()
|
| 169 |
+
|
| 170 |
+
elif line_upper.startswith("ACTION:"):
|
| 171 |
+
action = line.split(":", 1)[1].strip().lower()
|
| 172 |
+
# Clean up the action - remove quotes, markdown, and extra whitespace
|
| 173 |
+
action = action.strip('"\'')
|
| 174 |
+
# Remove markdown bold/italic markers
|
| 175 |
+
action = action.replace("**", "").replace("*", "").replace("__", "").replace("_", " ")
|
| 176 |
+
# Remove backticks
|
| 177 |
+
action = action.replace("`", "")
|
| 178 |
+
# Clean up whitespace
|
| 179 |
+
action = " ".join(action.split())
|
| 180 |
+
break
|
| 181 |
+
|
| 182 |
+
# Validate action isn't empty
|
| 183 |
+
if not action or action.isspace():
|
| 184 |
+
action = "look"
|
| 185 |
+
|
| 186 |
+
return thought, action
|
| 187 |
+
|
| 188 |
+
def reset(self):
|
| 189 |
+
"""Reset the agent for a new game."""
|
| 190 |
+
super().reset()
|
| 191 |
+
self.thoughts = []
|
| 192 |
+
|
| 193 |
+
def get_summary(self) -> str:
|
| 194 |
+
"""Get a summary of the agent's reasoning."""
|
| 195 |
+
if not self.thoughts:
|
| 196 |
+
return "No thoughts recorded yet."
|
| 197 |
+
|
| 198 |
+
return "\n---\n".join(self.thoughts[-5:])
|
| 199 |
+
|
| 200 |
+
|
| 201 |
+
# Example usage and testing
|
| 202 |
+
if __name__ == "__main__":
|
| 203 |
+
import sys
|
| 204 |
+
from games.zork_env import TextAdventureEnv
|
| 205 |
+
|
| 206 |
+
# Use command line arg or default to zork1
|
| 207 |
+
game = sys.argv[1] if len(sys.argv) > 1 else "zork1"
|
| 208 |
+
|
| 209 |
+
# Quick test
|
| 210 |
+
config = ReActConfig(verbose=True)
|
| 211 |
+
|
| 212 |
+
try:
|
| 213 |
+
agent = ReActAgent(config)
|
| 214 |
+
env = TextAdventureEnv(game)
|
| 215 |
+
|
| 216 |
+
state = env.reset()
|
| 217 |
+
print("=" * 50)
|
| 218 |
+
print(f"{game.upper()} (using {agent.config.model})")
|
| 219 |
+
print("=" * 50)
|
| 220 |
+
print(state.observation)
|
| 221 |
+
|
| 222 |
+
# Run a few steps
|
| 223 |
+
for step in range(5):
|
| 224 |
+
print(f"\n{'=' * 50}")
|
| 225 |
+
print(f"Step {step + 1}")
|
| 226 |
+
print("=" * 50)
|
| 227 |
+
|
| 228 |
+
action = agent.choose_action(state.observation, state)
|
| 229 |
+
print(f"\n> {action}")
|
| 230 |
+
|
| 231 |
+
state = env.step(action)
|
| 232 |
+
print(f"\n{state.observation}")
|
| 233 |
+
print(f"\nScore: {state.score}/{state.max_score}")
|
| 234 |
+
|
| 235 |
+
agent.update_history(action, state.observation, state)
|
| 236 |
+
|
| 237 |
+
if state.done:
|
| 238 |
+
print("\nGAME OVER!")
|
| 239 |
+
break
|
| 240 |
+
|
| 241 |
+
except ValueError as e:
|
| 242 |
+
print(f"Setup error: {e}")
|
| 243 |
+
print("Make sure to set your HF_TOKEN in .env file")
|
function_calling/controller.py
ADDED
|
@@ -0,0 +1,291 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Function-Calling Controller for Zork (API-Based)
|
| 3 |
+
|
| 4 |
+
This controller uses the HuggingFace API's native function calling feature.
|
| 5 |
+
The model is given tool schemas and can call them via the tools API.
|
| 6 |
+
|
| 7 |
+
Model: Llama 3.2 3B Instruct (supports native function calling)
|
| 8 |
+
|
| 9 |
+
Compare with simple_controller.py which uses text-based "parsing" approach.
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
import os
|
| 13 |
+
import json
|
| 14 |
+
from dotenv import load_dotenv
|
| 15 |
+
from huggingface_hub import InferenceClient
|
| 16 |
+
|
| 17 |
+
from tools import ALL_TOOLS, set_game_state, add_to_history
|
| 18 |
+
|
| 19 |
+
# Add parent directory to path to import games module
|
| 20 |
+
import sys
|
| 21 |
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 22 |
+
from games.zork_env import ZorkEnvironment
|
| 23 |
+
|
| 24 |
+
|
| 25 |
+
# System prompt for the agent
|
| 26 |
+
SYSTEM_PROMPT = """You are playing Zork, a classic text adventure game.
|
| 27 |
+
|
| 28 |
+
## YOUR GOAL
|
| 29 |
+
Explore, collect treasures (bring them to the trophy case), and maximize your score.
|
| 30 |
+
|
| 31 |
+
## VALID COMMANDS (use ONLY these exact verbs)
|
| 32 |
+
|
| 33 |
+
Movement:
|
| 34 |
+
north, south, east, west, up, down (or n, s, e, w, u, d)
|
| 35 |
+
enter, exit, climb, cross, go <direction>
|
| 36 |
+
|
| 37 |
+
Looking:
|
| 38 |
+
look, examine <thing>, look at <thing>, look in <thing>, read <thing>
|
| 39 |
+
|
| 40 |
+
Objects:
|
| 41 |
+
take <item>, drop <item>, pick up <item>
|
| 42 |
+
open <thing>, close <thing>, unlock <thing> with <key>
|
| 43 |
+
put <item> in <container>, give <item> to <person>
|
| 44 |
+
|
| 45 |
+
Light:
|
| 46 |
+
turn on lamp, turn off lamp, light match
|
| 47 |
+
|
| 48 |
+
Combat:
|
| 49 |
+
attack <enemy> with <weapon>, kill <enemy> with <weapon>
|
| 50 |
+
|
| 51 |
+
Other:
|
| 52 |
+
inventory (or i), wait (or z), score, save, restore
|
| 53 |
+
push <thing>, pull <thing>, move <thing>, tie <rope> to <thing>
|
| 54 |
+
eat <food>, drink <liquid>, wave <item>
|
| 55 |
+
|
| 56 |
+
## FORBIDDEN (these will NOT work):
|
| 57 |
+
check, inspect, search, investigate, grab, pick, use, interact,
|
| 58 |
+
go to, walk to, head to, travel, proceed
|
| 59 |
+
|
| 60 |
+
## YOUR TOOLS
|
| 61 |
+
memory() - See current state and recent actions
|
| 62 |
+
get_map() - See explored locations
|
| 63 |
+
inventory() - Check what you're carrying
|
| 64 |
+
|
| 65 |
+
## RESPONSE FORMAT
|
| 66 |
+
When you want to take a game action, respond with:
|
| 67 |
+
ACTION: <command>
|
| 68 |
+
|
| 69 |
+
Examples:
|
| 70 |
+
ACTION: open mailbox
|
| 71 |
+
ACTION: north
|
| 72 |
+
ACTION: take lamp
|
| 73 |
+
ACTION: examine leaflet"""
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
# Valid Zork command verbs for validation
|
| 77 |
+
VALID_VERBS = {
|
| 78 |
+
"north", "south", "east", "west", "up", "down", "n", "s", "e", "w", "u", "d",
|
| 79 |
+
"look", "l", "examine", "x", "read",
|
| 80 |
+
"take", "get", "drop", "put", "give",
|
| 81 |
+
"open", "close", "unlock", "lock",
|
| 82 |
+
"turn", "light", "extinguish", "blow",
|
| 83 |
+
"attack", "kill", "fight", "hit",
|
| 84 |
+
"enter", "exit", "go", "climb", "jump",
|
| 85 |
+
"inventory", "i", "wait", "z", "score",
|
| 86 |
+
"move", "push", "pull", "tie", "untie",
|
| 87 |
+
"eat", "drink", "smell", "touch", "rub",
|
| 88 |
+
"wave", "raise", "lower", "pour",
|
| 89 |
+
"say", "answer", "yes", "no",
|
| 90 |
+
"pray", "odysseus", "echo", "hello",
|
| 91 |
+
}
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
def validate_action(action: str) -> str:
|
| 95 |
+
"""Validate and potentially fix an action."""
|
| 96 |
+
action = action.strip().lower()
|
| 97 |
+
if not action:
|
| 98 |
+
return "look"
|
| 99 |
+
|
| 100 |
+
verb = action.split()[0]
|
| 101 |
+
|
| 102 |
+
if verb in VALID_VERBS:
|
| 103 |
+
return action
|
| 104 |
+
|
| 105 |
+
# Common corrections
|
| 106 |
+
corrections = {
|
| 107 |
+
"check": "examine",
|
| 108 |
+
"inspect": "examine",
|
| 109 |
+
"search": "examine",
|
| 110 |
+
"grab": "take",
|
| 111 |
+
"pick": "take",
|
| 112 |
+
"see": "look",
|
| 113 |
+
"view": "look",
|
| 114 |
+
"walk": "go",
|
| 115 |
+
}
|
| 116 |
+
|
| 117 |
+
if verb in corrections:
|
| 118 |
+
return corrections[verb] + action[len(verb):]
|
| 119 |
+
|
| 120 |
+
return "look" # Default fallback
|
| 121 |
+
|
| 122 |
+
|
| 123 |
+
def build_tool_schemas():
|
| 124 |
+
"""Convert LangChain tools to OpenAI function schemas."""
|
| 125 |
+
schemas = []
|
| 126 |
+
for tool in ALL_TOOLS:
|
| 127 |
+
schema = {
|
| 128 |
+
"type": "function",
|
| 129 |
+
"function": {
|
| 130 |
+
"name": tool.name,
|
| 131 |
+
"description": tool.description,
|
| 132 |
+
"parameters": {
|
| 133 |
+
"type": "object",
|
| 134 |
+
"properties": {},
|
| 135 |
+
"required": []
|
| 136 |
+
}
|
| 137 |
+
}
|
| 138 |
+
}
|
| 139 |
+
schemas.append(schema)
|
| 140 |
+
return schemas
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
def run_tool(tool_name: str) -> str:
|
| 144 |
+
"""Execute a tool by name and return its result."""
|
| 145 |
+
for tool in ALL_TOOLS:
|
| 146 |
+
if tool.name == tool_name:
|
| 147 |
+
return tool.invoke({})
|
| 148 |
+
return f"Unknown tool: {tool_name}"
|
| 149 |
+
|
| 150 |
+
|
| 151 |
+
class FunctionCallingController:
|
| 152 |
+
"""Controller using LLM API-based function calling."""
|
| 153 |
+
|
| 154 |
+
def __init__(self, model: str = "meta-llama/Llama-3.2-3B-Instruct"):
|
| 155 |
+
load_dotenv()
|
| 156 |
+
token = os.getenv("HF_TOKEN")
|
| 157 |
+
if not token:
|
| 158 |
+
raise ValueError("HF_TOKEN not set in environment")
|
| 159 |
+
|
| 160 |
+
self.client = InferenceClient(token=token)
|
| 161 |
+
self.model = os.getenv("HF_MODEL", model)
|
| 162 |
+
self.tool_schemas = build_tool_schemas()
|
| 163 |
+
|
| 164 |
+
def get_action(self, observation: str, game_state) -> str:
|
| 165 |
+
"""Get the next action from the LLM."""
|
| 166 |
+
|
| 167 |
+
# Update tool state
|
| 168 |
+
set_game_state(
|
| 169 |
+
observation=observation,
|
| 170 |
+
inventory=list(game_state.inventory) if game_state.inventory else [],
|
| 171 |
+
score=game_state.score,
|
| 172 |
+
moves=game_state.moves
|
| 173 |
+
)
|
| 174 |
+
|
| 175 |
+
# Build messages fresh each time (simpler than managing tool history)
|
| 176 |
+
messages = [
|
| 177 |
+
{"role": "system", "content": SYSTEM_PROMPT},
|
| 178 |
+
{"role": "user", "content": f"Game output:\n{observation}\n\nWhat do you do?"}
|
| 179 |
+
]
|
| 180 |
+
|
| 181 |
+
# Allow up to 3 tool calls before requiring action
|
| 182 |
+
for _ in range(3):
|
| 183 |
+
response = self.client.chat.completions.create(
|
| 184 |
+
model=self.model,
|
| 185 |
+
messages=messages,
|
| 186 |
+
tools=self.tool_schemas,
|
| 187 |
+
tool_choice="auto",
|
| 188 |
+
max_tokens=300,
|
| 189 |
+
)
|
| 190 |
+
|
| 191 |
+
message = response.choices[0].message
|
| 192 |
+
|
| 193 |
+
# Check if model wants to use a tool
|
| 194 |
+
if message.tool_calls:
|
| 195 |
+
tool_call = message.tool_calls[0]
|
| 196 |
+
tool_name = tool_call.function.name
|
| 197 |
+
|
| 198 |
+
print(f" [Tool] {tool_name}")
|
| 199 |
+
tool_result = run_tool(tool_name)
|
| 200 |
+
print(f" {tool_result[:100]}...")
|
| 201 |
+
|
| 202 |
+
# Add tool interaction to messages for next iteration
|
| 203 |
+
messages.append({
|
| 204 |
+
"role": "assistant",
|
| 205 |
+
"content": None,
|
| 206 |
+
"tool_calls": [{
|
| 207 |
+
"id": tool_call.id,
|
| 208 |
+
"type": "function",
|
| 209 |
+
"function": {"name": tool_name, "arguments": "{}"}
|
| 210 |
+
}]
|
| 211 |
+
})
|
| 212 |
+
messages.append({
|
| 213 |
+
"role": "tool",
|
| 214 |
+
"tool_call_id": tool_call.id,
|
| 215 |
+
"content": tool_result
|
| 216 |
+
})
|
| 217 |
+
|
| 218 |
+
# Continue to get the actual action
|
| 219 |
+
continue
|
| 220 |
+
|
| 221 |
+
# Model responded with text - extract action
|
| 222 |
+
content = message.content or ""
|
| 223 |
+
|
| 224 |
+
# Look for ACTION: in response
|
| 225 |
+
if "ACTION:" in content.upper():
|
| 226 |
+
for line in content.split('\n'):
|
| 227 |
+
if "ACTION:" in line.upper():
|
| 228 |
+
action = line.split(":", 1)[1].strip().lower()
|
| 229 |
+
validated = validate_action(action)
|
| 230 |
+
if validated:
|
| 231 |
+
return validated
|
| 232 |
+
else:
|
| 233 |
+
print(f" [Warning] Invalid action '{action}', defaulting to 'look'")
|
| 234 |
+
return "look"
|
| 235 |
+
|
| 236 |
+
# If no ACTION found, try to extract a command from the response
|
| 237 |
+
content_lower = content.lower().strip()
|
| 238 |
+
validated = validate_action(content_lower)
|
| 239 |
+
if validated:
|
| 240 |
+
return validated
|
| 241 |
+
|
| 242 |
+
# Default
|
| 243 |
+
return "look"
|
| 244 |
+
|
| 245 |
+
# After 3 tool calls, just return look
|
| 246 |
+
return "look"
|
| 247 |
+
|
| 248 |
+
|
| 249 |
+
def main():
|
| 250 |
+
"""Run the API-based function-calling controller."""
|
| 251 |
+
print("=" * 60)
|
| 252 |
+
print("Zork - API Function Calling Controller")
|
| 253 |
+
print(" (using Llama 3.2 3B with native tool calling)")
|
| 254 |
+
print("=" * 60)
|
| 255 |
+
|
| 256 |
+
controller = FunctionCallingController()
|
| 257 |
+
env = ZorkEnvironment("zork1")
|
| 258 |
+
|
| 259 |
+
state = env.reset()
|
| 260 |
+
print(f"\n{state.observation}\n")
|
| 261 |
+
|
| 262 |
+
max_steps = 30
|
| 263 |
+
|
| 264 |
+
for step in range(max_steps):
|
| 265 |
+
print(f"\n{'─' * 50}")
|
| 266 |
+
print(f"Step {step + 1}/{max_steps} | Score: {state.score}")
|
| 267 |
+
print("─" * 50)
|
| 268 |
+
|
| 269 |
+
action = controller.get_action(state.observation, state)
|
| 270 |
+
print(f"\n> ACTION: {action}")
|
| 271 |
+
|
| 272 |
+
# Take action in game
|
| 273 |
+
state = env.step(action)
|
| 274 |
+
add_to_history(action, state.observation)
|
| 275 |
+
|
| 276 |
+
print(f"\n{state.observation}")
|
| 277 |
+
|
| 278 |
+
if state.reward > 0:
|
| 279 |
+
print(f"\n+{state.reward} points!")
|
| 280 |
+
|
| 281 |
+
if state.done:
|
| 282 |
+
print("\nGAME OVER!")
|
| 283 |
+
break
|
| 284 |
+
|
| 285 |
+
print(f"\n{'=' * 60}")
|
| 286 |
+
print(f"Final Score: {state.score}")
|
| 287 |
+
print("=" * 60)
|
| 288 |
+
|
| 289 |
+
|
| 290 |
+
if __name__ == "__main__":
|
| 291 |
+
main()
|
function_calling/simple_controller.py
ADDED
|
@@ -0,0 +1,268 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Function-Calling Controller for Zork (Text-Based)
|
| 3 |
+
|
| 4 |
+
This controller uses text-based "function calling" - the LLM outputs
|
| 5 |
+
TOOL: <name> or ACTION: <command> and we parse the text response.
|
| 6 |
+
|
| 7 |
+
Model: Qwen 2.5 7B Instruct (any chat model works)
|
| 8 |
+
|
| 9 |
+
This approach is:
|
| 10 |
+
- Simpler and more reliable than API-based function calling
|
| 11 |
+
- Works with any chat model (no special support needed)
|
| 12 |
+
|
| 13 |
+
Compare with controller.py which uses API-based tool calling.
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
import os
|
| 17 |
+
import re
|
| 18 |
+
from dotenv import load_dotenv
|
| 19 |
+
from huggingface_hub import InferenceClient
|
| 20 |
+
|
| 21 |
+
from tools import ALL_TOOLS, set_game_state, add_to_history
|
| 22 |
+
|
| 23 |
+
# Add parent directory to path
|
| 24 |
+
import sys
|
| 25 |
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 26 |
+
from games.zork_env import ZorkEnvironment
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
SYSTEM_PROMPT = """You are playing Zork, a classic text adventure game.
|
| 30 |
+
|
| 31 |
+
## YOUR GOAL
|
| 32 |
+
Explore, collect treasures (bring them to the trophy case), and maximize your score.
|
| 33 |
+
|
| 34 |
+
## VALID COMMANDS (use ONLY these exact verbs)
|
| 35 |
+
|
| 36 |
+
Movement:
|
| 37 |
+
north, south, east, west, up, down (or n, s, e, w, u, d)
|
| 38 |
+
enter, exit, climb, cross, go <direction>
|
| 39 |
+
|
| 40 |
+
Looking:
|
| 41 |
+
look, examine <thing>, look at <thing>, look in <thing>, read <thing>
|
| 42 |
+
|
| 43 |
+
Objects:
|
| 44 |
+
take <item>, drop <item>, pick up <item>
|
| 45 |
+
open <thing>, close <thing>, unlock <thing> with <key>
|
| 46 |
+
put <item> in <container>, give <item> to <person>
|
| 47 |
+
|
| 48 |
+
Light:
|
| 49 |
+
turn on lamp, turn off lamp, light match
|
| 50 |
+
|
| 51 |
+
Combat:
|
| 52 |
+
attack <enemy> with <weapon>, kill <enemy> with <weapon>
|
| 53 |
+
|
| 54 |
+
Other:
|
| 55 |
+
inventory (or i), wait (or z), score, save, restore
|
| 56 |
+
push <thing>, pull <thing>, move <thing>, tie <rope> to <thing>
|
| 57 |
+
eat <food>, drink <liquid>, wave <item>
|
| 58 |
+
|
| 59 |
+
## FORBIDDEN (these will NOT work):
|
| 60 |
+
check, inspect, search, investigate, grab, pick, use, interact,
|
| 61 |
+
go to, walk to, head to, travel, proceed
|
| 62 |
+
|
| 63 |
+
## YOUR TOOLS
|
| 64 |
+
TOOL: memory - See current state and recent actions
|
| 65 |
+
TOOL: get_map - See explored locations
|
| 66 |
+
TOOL: inventory - Check what you're carrying
|
| 67 |
+
|
| 68 |
+
## RESPONSE FORMAT
|
| 69 |
+
Either use a tool:
|
| 70 |
+
TOOL: memory
|
| 71 |
+
|
| 72 |
+
Or take a game action:
|
| 73 |
+
ACTION: open mailbox
|
| 74 |
+
|
| 75 |
+
Always respond with TOOL: or ACTION: followed by your choice."""
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
# Valid Zork command verbs for validation
|
| 79 |
+
VALID_VERBS = {
|
| 80 |
+
"north", "south", "east", "west", "up", "down", "n", "s", "e", "w", "u", "d",
|
| 81 |
+
"look", "l", "examine", "x", "read",
|
| 82 |
+
"take", "get", "drop", "put", "give",
|
| 83 |
+
"open", "close", "unlock", "lock",
|
| 84 |
+
"turn", "light", "extinguish", "blow",
|
| 85 |
+
"attack", "kill", "fight", "hit",
|
| 86 |
+
"enter", "exit", "go", "climb", "jump",
|
| 87 |
+
"inventory", "i", "wait", "z", "score",
|
| 88 |
+
"move", "push", "pull", "tie", "untie",
|
| 89 |
+
"eat", "drink", "smell", "touch", "rub",
|
| 90 |
+
"wave", "raise", "lower", "pour",
|
| 91 |
+
"say", "answer", "yes", "no",
|
| 92 |
+
"pray", "odysseus", "echo", "hello",
|
| 93 |
+
}
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
def run_tool(tool_name: str) -> str:
|
| 97 |
+
"""Execute a tool by name."""
|
| 98 |
+
tool_name = tool_name.strip().lower().replace(" ", "_")
|
| 99 |
+
for tool in ALL_TOOLS:
|
| 100 |
+
if tool.name == tool_name:
|
| 101 |
+
return tool.invoke({})
|
| 102 |
+
return f"Unknown tool: {tool_name}. Available: memory, get_map, inventory"
|
| 103 |
+
|
| 104 |
+
|
| 105 |
+
class SimpleController:
|
| 106 |
+
"""Controller using text-based tool calling."""
|
| 107 |
+
|
| 108 |
+
def __init__(self, model: str = "Qwen/Qwen2.5-7B-Instruct"):
|
| 109 |
+
load_dotenv()
|
| 110 |
+
token = os.getenv("HF_TOKEN")
|
| 111 |
+
if not token:
|
| 112 |
+
raise ValueError("HF_TOKEN not set in environment")
|
| 113 |
+
|
| 114 |
+
self.client = InferenceClient(token=token)
|
| 115 |
+
self.model = os.getenv("HF_MODEL", model)
|
| 116 |
+
self.messages = []
|
| 117 |
+
|
| 118 |
+
def _call_llm(self, user_message: str) -> str:
|
| 119 |
+
"""Call the LLM and get response."""
|
| 120 |
+
self.messages.append({"role": "user", "content": user_message})
|
| 121 |
+
|
| 122 |
+
# Keep conversation short
|
| 123 |
+
if len(self.messages) > 15:
|
| 124 |
+
self.messages = self.messages[-15:]
|
| 125 |
+
|
| 126 |
+
response = self.client.chat.completions.create(
|
| 127 |
+
model=self.model,
|
| 128 |
+
messages=[{"role": "system", "content": SYSTEM_PROMPT}] + self.messages,
|
| 129 |
+
max_tokens=150,
|
| 130 |
+
temperature=0.7,
|
| 131 |
+
)
|
| 132 |
+
|
| 133 |
+
reply = response.choices[0].message.content or ""
|
| 134 |
+
self.messages.append({"role": "assistant", "content": reply})
|
| 135 |
+
return reply
|
| 136 |
+
|
| 137 |
+
def _validate_action(self, action: str) -> str | None:
|
| 138 |
+
"""Validate and potentially fix an action. Returns None if invalid."""
|
| 139 |
+
action = action.strip().lower()
|
| 140 |
+
if not action:
|
| 141 |
+
return None
|
| 142 |
+
|
| 143 |
+
# Get the first word (verb)
|
| 144 |
+
verb = action.split()[0]
|
| 145 |
+
|
| 146 |
+
# Check if it's a valid verb
|
| 147 |
+
if verb in VALID_VERBS:
|
| 148 |
+
return action
|
| 149 |
+
|
| 150 |
+
# Try common corrections
|
| 151 |
+
corrections = {
|
| 152 |
+
"check": "examine",
|
| 153 |
+
"inspect": "examine",
|
| 154 |
+
"search": "examine",
|
| 155 |
+
"grab": "take",
|
| 156 |
+
"pick": "take", # "pick up" -> "take"
|
| 157 |
+
"see": "look",
|
| 158 |
+
"view": "look",
|
| 159 |
+
"walk": "go",
|
| 160 |
+
}
|
| 161 |
+
|
| 162 |
+
if verb in corrections:
|
| 163 |
+
fixed = corrections[verb] + action[len(verb):]
|
| 164 |
+
print(f" [Correcting] '{verb}' -> '{corrections[verb]}'")
|
| 165 |
+
return fixed
|
| 166 |
+
|
| 167 |
+
return None
|
| 168 |
+
|
| 169 |
+
def get_action(self, observation: str, game_state) -> str:
|
| 170 |
+
"""Get the next action, allowing tool use."""
|
| 171 |
+
|
| 172 |
+
# Update tool state
|
| 173 |
+
set_game_state(
|
| 174 |
+
observation=observation,
|
| 175 |
+
inventory=list(game_state.inventory) if game_state.inventory else [],
|
| 176 |
+
score=game_state.score,
|
| 177 |
+
moves=game_state.moves
|
| 178 |
+
)
|
| 179 |
+
|
| 180 |
+
prompt = f"Game:\n{observation}\n\nRespond with TOOL: or ACTION:"
|
| 181 |
+
|
| 182 |
+
# Allow up to 3 tool calls before requiring an action
|
| 183 |
+
for _ in range(3):
|
| 184 |
+
response = self._call_llm(prompt)
|
| 185 |
+
|
| 186 |
+
# Check for TOOL:
|
| 187 |
+
tool_match = re.search(r'TOOL:\s*(\w+)', response, re.IGNORECASE)
|
| 188 |
+
if tool_match:
|
| 189 |
+
tool_name = tool_match.group(1)
|
| 190 |
+
print(f" [Tool] {tool_name}")
|
| 191 |
+
|
| 192 |
+
result = run_tool(tool_name)
|
| 193 |
+
print(f" {result[:80]}...")
|
| 194 |
+
|
| 195 |
+
# Feed result back
|
| 196 |
+
prompt = f"Tool result:\n{result}\n\nNow respond with TOOL: or ACTION:"
|
| 197 |
+
continue
|
| 198 |
+
|
| 199 |
+
# Check for ACTION:
|
| 200 |
+
action_match = re.search(r'ACTION:\s*(.+)', response, re.IGNORECASE)
|
| 201 |
+
if action_match:
|
| 202 |
+
action = action_match.group(1).strip().lower()
|
| 203 |
+
# Clean up action (remove quotes, extra text)
|
| 204 |
+
action = action.split('\n')[0].strip('"\'')
|
| 205 |
+
|
| 206 |
+
# Validate the action
|
| 207 |
+
validated = self._validate_action(action)
|
| 208 |
+
if validated:
|
| 209 |
+
return validated
|
| 210 |
+
else:
|
| 211 |
+
print(f" [Warning] Invalid action '{action}', asking for retry...")
|
| 212 |
+
prompt = f"'{action}' is not a valid Zork command. Use verbs like: look, examine, take, open, north, south, etc.\n\nRespond with ACTION:"
|
| 213 |
+
continue
|
| 214 |
+
|
| 215 |
+
# If neither, try to extract a command
|
| 216 |
+
words = response.lower().split()
|
| 217 |
+
for cmd in ["north", "south", "east", "west", "up", "down",
|
| 218 |
+
"look", "take", "open", "enter", "examine"]:
|
| 219 |
+
if cmd in words:
|
| 220 |
+
idx = words.index(cmd)
|
| 221 |
+
return " ".join(words[idx:idx+3])
|
| 222 |
+
|
| 223 |
+
return "look"
|
| 224 |
+
|
| 225 |
+
return "look"
|
| 226 |
+
|
| 227 |
+
|
| 228 |
+
def main():
|
| 229 |
+
"""Run the simple controller."""
|
| 230 |
+
print("=" * 60)
|
| 231 |
+
print("Zork - Simple Function Calling Demo")
|
| 232 |
+
print("=" * 60)
|
| 233 |
+
|
| 234 |
+
controller = SimpleController()
|
| 235 |
+
env = ZorkEnvironment("zork1")
|
| 236 |
+
|
| 237 |
+
state = env.reset()
|
| 238 |
+
print(f"\n{state.observation}\n")
|
| 239 |
+
|
| 240 |
+
max_steps = 30
|
| 241 |
+
|
| 242 |
+
for step in range(max_steps):
|
| 243 |
+
print(f"\n{'─' * 50}")
|
| 244 |
+
print(f"Step {step + 1}/{max_steps} | Score: {state.score}")
|
| 245 |
+
print("─" * 50)
|
| 246 |
+
|
| 247 |
+
action = controller.get_action(state.observation, state)
|
| 248 |
+
print(f"\n> ACTION: {action}")
|
| 249 |
+
|
| 250 |
+
state = env.step(action)
|
| 251 |
+
add_to_history(action, state.observation)
|
| 252 |
+
|
| 253 |
+
print(f"\n{state.observation}")
|
| 254 |
+
|
| 255 |
+
if state.reward > 0:
|
| 256 |
+
print(f"\n+{state.reward} points!")
|
| 257 |
+
|
| 258 |
+
if state.done:
|
| 259 |
+
print("\nGAME OVER!")
|
| 260 |
+
break
|
| 261 |
+
|
| 262 |
+
print(f"\n{'=' * 60}")
|
| 263 |
+
print(f"Final Score: {state.score}")
|
| 264 |
+
print("=" * 60)
|
| 265 |
+
|
| 266 |
+
|
| 267 |
+
if __name__ == "__main__":
|
| 268 |
+
main()
|
function_calling/tools.py
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Simple tools for the Zork agent using LangChain's tool decorator.
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
from langchain_core.tools import tool
|
| 6 |
+
|
| 7 |
+
|
| 8 |
+
# Game state that tools can access (set by the controller)
|
| 9 |
+
_game_state = {
|
| 10 |
+
"observation": "",
|
| 11 |
+
"inventory": [],
|
| 12 |
+
"score": 0,
|
| 13 |
+
"moves": 0,
|
| 14 |
+
"history": [], # List of (action, result) tuples
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def set_game_state(observation: str, inventory: list, score: int, moves: int):
|
| 19 |
+
"""Update the game state (called by controller after each action)."""
|
| 20 |
+
_game_state["observation"] = observation
|
| 21 |
+
_game_state["inventory"] = inventory
|
| 22 |
+
_game_state["score"] = score
|
| 23 |
+
_game_state["moves"] = moves
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def add_to_history(action: str, result: str):
|
| 27 |
+
"""Add an action and its result to history."""
|
| 28 |
+
_game_state["history"].append((action, result))
|
| 29 |
+
# Keep only last 10 actions
|
| 30 |
+
if len(_game_state["history"]) > 10:
|
| 31 |
+
_game_state["history"] = _game_state["history"][-10:]
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
@tool
|
| 35 |
+
def memory() -> str:
|
| 36 |
+
"""Get a summary of the current game state including location, score, and recent actions."""
|
| 37 |
+
obs = _game_state["observation"]
|
| 38 |
+
score = _game_state["score"]
|
| 39 |
+
moves = _game_state["moves"]
|
| 40 |
+
|
| 41 |
+
# Extract location (first line of observation)
|
| 42 |
+
lines = obs.strip().split('\n')
|
| 43 |
+
location = lines[0] if lines else "Unknown"
|
| 44 |
+
|
| 45 |
+
# Recent actions
|
| 46 |
+
recent = _game_state["history"][-5:] if _game_state["history"] else []
|
| 47 |
+
recent_str = "\n".join([f" > {a} → {r[:50]}..." for a, r in recent]) if recent else " (none yet)"
|
| 48 |
+
|
| 49 |
+
return f"""Current State:
|
| 50 |
+
- Location: {location}
|
| 51 |
+
- Score: {score} points
|
| 52 |
+
- Moves: {moves}
|
| 53 |
+
|
| 54 |
+
Recent Actions:
|
| 55 |
+
{recent_str}
|
| 56 |
+
|
| 57 |
+
Current Observation:
|
| 58 |
+
{obs}"""
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
@tool
|
| 62 |
+
def get_map() -> str:
|
| 63 |
+
"""Get a map showing known locations and connections based on exploration history."""
|
| 64 |
+
# Build a simple map from history
|
| 65 |
+
locations = set()
|
| 66 |
+
connections = []
|
| 67 |
+
|
| 68 |
+
prev_loc = None
|
| 69 |
+
for action, result in _game_state["history"]:
|
| 70 |
+
# Extract location from result
|
| 71 |
+
lines = result.strip().split('\n')
|
| 72 |
+
if lines:
|
| 73 |
+
loc = lines[0]
|
| 74 |
+
locations.add(loc)
|
| 75 |
+
|
| 76 |
+
# If this was a movement action, record connection
|
| 77 |
+
if action in ["north", "south", "east", "west", "up", "down", "enter", "exit"]:
|
| 78 |
+
if prev_loc and prev_loc != loc:
|
| 79 |
+
connections.append(f" {prev_loc} --{action}--> {loc}")
|
| 80 |
+
prev_loc = loc
|
| 81 |
+
|
| 82 |
+
if not locations:
|
| 83 |
+
return "Map: No locations explored yet. Try moving around!"
|
| 84 |
+
|
| 85 |
+
loc_list = "\n".join([f" - {loc}" for loc in sorted(locations)])
|
| 86 |
+
conn_list = "\n".join(connections[-10:]) if connections else " (no connections recorded)"
|
| 87 |
+
|
| 88 |
+
return f"""Known Locations:
|
| 89 |
+
{loc_list}
|
| 90 |
+
|
| 91 |
+
Connections:
|
| 92 |
+
{conn_list}"""
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
@tool
|
| 96 |
+
def inventory() -> str:
|
| 97 |
+
"""Get the list of items currently in your inventory."""
|
| 98 |
+
items = _game_state["inventory"]
|
| 99 |
+
|
| 100 |
+
if not items:
|
| 101 |
+
return "Inventory: You are empty-handed."
|
| 102 |
+
|
| 103 |
+
# Clean up item names (Jericho returns objects with metadata)
|
| 104 |
+
item_names = []
|
| 105 |
+
for item in items:
|
| 106 |
+
item_str = str(item)
|
| 107 |
+
# Handle Jericho's object format: "leaflet Parent4 Sibling0..."
|
| 108 |
+
# Look for "Parent" (case-insensitive) to find where metadata starts
|
| 109 |
+
item_lower = item_str.lower()
|
| 110 |
+
if "parent" in item_lower:
|
| 111 |
+
idx = item_lower.index("parent")
|
| 112 |
+
name = item_str[:idx].strip()
|
| 113 |
+
# Remove leading "obj123: " if present
|
| 114 |
+
if ":" in name:
|
| 115 |
+
name = name.split(":", 1)[1].strip()
|
| 116 |
+
item_names.append(name)
|
| 117 |
+
elif ":" in item_str:
|
| 118 |
+
name = item_str.split(":")[1].strip()
|
| 119 |
+
item_names.append(name)
|
| 120 |
+
else:
|
| 121 |
+
item_names.append(item_str)
|
| 122 |
+
|
| 123 |
+
return f"Inventory: {', '.join(item_names)}"
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
# Export all tools
|
| 127 |
+
ALL_TOOLS = [memory, get_map, inventory]
|
games/__init__.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from .zork_env import TextAdventureEnv, GameState, list_available_games, discover_games
|
| 2 |
+
|
| 3 |
+
# Alias for backwards compatibility
|
| 4 |
+
ZorkEnvironment = TextAdventureEnv
|
| 5 |
+
|
| 6 |
+
__all__ = ["TextAdventureEnv", "ZorkEnvironment", "GameState", "list_available_games", "discover_games"]
|
games/zork_env.py
ADDED
|
@@ -0,0 +1,219 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Text Adventure Game Environment
|
| 3 |
+
|
| 4 |
+
Provides a clean interface to text adventure games via Jericho.
|
| 5 |
+
Supports Zork and many other classic Z-machine games.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from jericho import FrotzEnv
|
| 9 |
+
from dataclasses import dataclass
|
| 10 |
+
from typing import Optional
|
| 11 |
+
from pathlib import Path
|
| 12 |
+
import os
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
@dataclass
|
| 16 |
+
class GameState:
|
| 17 |
+
"""Represents the current state of the game."""
|
| 18 |
+
observation: str
|
| 19 |
+
score: int
|
| 20 |
+
max_score: int
|
| 21 |
+
moves: int
|
| 22 |
+
done: bool
|
| 23 |
+
reward: int # Points gained from last action
|
| 24 |
+
inventory: list[str]
|
| 25 |
+
location: str
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def get_default_games_dir() -> Path:
|
| 29 |
+
"""Get the default directory containing game files."""
|
| 30 |
+
project_root = Path(__file__).parent.parent
|
| 31 |
+
return project_root / "z-machine-games-master" / "jericho-game-suite"
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def discover_games(games_dir: Optional[Path] = None) -> dict[str, Path]:
|
| 35 |
+
"""
|
| 36 |
+
Discover all available Z-machine games in the games directory.
|
| 37 |
+
|
| 38 |
+
Args:
|
| 39 |
+
games_dir: Directory to search for games (default: jericho-game-suite)
|
| 40 |
+
|
| 41 |
+
Returns:
|
| 42 |
+
Dictionary mapping game name (without extension) to full path
|
| 43 |
+
"""
|
| 44 |
+
if games_dir is None:
|
| 45 |
+
games_dir = get_default_games_dir()
|
| 46 |
+
|
| 47 |
+
games_dir = Path(games_dir)
|
| 48 |
+
if not games_dir.exists():
|
| 49 |
+
return {}
|
| 50 |
+
|
| 51 |
+
games = {}
|
| 52 |
+
# Find all Z-machine game files (.z3, .z4, .z5, .z8)
|
| 53 |
+
for ext in ["*.z3", "*.z4", "*.z5", "*.z8"]:
|
| 54 |
+
for game_path in games_dir.glob(ext):
|
| 55 |
+
# Use stem (filename without extension) as game name
|
| 56 |
+
game_name = game_path.stem.lower()
|
| 57 |
+
games[game_name] = game_path
|
| 58 |
+
|
| 59 |
+
return dict(sorted(games.items()))
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def list_available_games(games_dir: Optional[Path] = None) -> list[str]:
|
| 63 |
+
"""Return a sorted list of available game names."""
|
| 64 |
+
return list(discover_games(games_dir).keys())
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
class TextAdventureEnv:
|
| 68 |
+
"""Wrapper around Jericho's FrotzEnv for text adventure games."""
|
| 69 |
+
|
| 70 |
+
def __init__(self, game: str = "zork1", games_dir: Optional[str] = None):
|
| 71 |
+
"""
|
| 72 |
+
Initialize the text adventure environment.
|
| 73 |
+
|
| 74 |
+
Args:
|
| 75 |
+
game: Game name (e.g., 'zork1', 'advent', 'enchanter')
|
| 76 |
+
Can also be a full path to a .z* file
|
| 77 |
+
games_dir: Directory containing game files (optional)
|
| 78 |
+
"""
|
| 79 |
+
# Check if game is a full path
|
| 80 |
+
if os.path.isfile(game):
|
| 81 |
+
game_path = Path(game)
|
| 82 |
+
self.game = game_path.stem
|
| 83 |
+
else:
|
| 84 |
+
# Look up game by name
|
| 85 |
+
games_path = Path(games_dir) if games_dir else None
|
| 86 |
+
available_games = discover_games(games_path)
|
| 87 |
+
|
| 88 |
+
if game.lower() not in available_games:
|
| 89 |
+
available = list(available_games.keys())[:20]
|
| 90 |
+
raise ValueError(
|
| 91 |
+
f"Unknown game: {game}. "
|
| 92 |
+
f"Available: {', '.join(available)}... "
|
| 93 |
+
f"({len(available_games)} total)"
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
game_path = available_games[game.lower()]
|
| 97 |
+
self.game = game.lower()
|
| 98 |
+
|
| 99 |
+
self.env = FrotzEnv(str(game_path))
|
| 100 |
+
self.game_path = game_path
|
| 101 |
+
self._last_score = 0
|
| 102 |
+
self._history: list[tuple[str, str]] = [] # (action, observation) pairs
|
| 103 |
+
|
| 104 |
+
def reset(self) -> GameState:
|
| 105 |
+
"""Reset the game to the beginning."""
|
| 106 |
+
observation, info = self.env.reset()
|
| 107 |
+
self._last_score = 0
|
| 108 |
+
self._history = []
|
| 109 |
+
return self._make_game_state(observation, info, done=False, reward=0)
|
| 110 |
+
|
| 111 |
+
def step(self, action: str) -> GameState:
|
| 112 |
+
"""
|
| 113 |
+
Take an action in the game.
|
| 114 |
+
|
| 115 |
+
Args:
|
| 116 |
+
action: The text command to execute (e.g., "go north", "take lamp")
|
| 117 |
+
|
| 118 |
+
Returns:
|
| 119 |
+
GameState with the result of the action
|
| 120 |
+
"""
|
| 121 |
+
observation, reward, done, info = self.env.step(action)
|
| 122 |
+
|
| 123 |
+
# Track reward as score change
|
| 124 |
+
current_score = info.get('score', 0)
|
| 125 |
+
reward = current_score - self._last_score
|
| 126 |
+
self._last_score = current_score
|
| 127 |
+
|
| 128 |
+
# Record history
|
| 129 |
+
self._history.append((action, observation))
|
| 130 |
+
|
| 131 |
+
return self._make_game_state(observation, info, done, reward)
|
| 132 |
+
|
| 133 |
+
def _make_game_state(self, observation: str, info: dict, done: bool, reward: int) -> GameState:
|
| 134 |
+
"""Create a GameState from the environment info."""
|
| 135 |
+
# Try to get inventory and location (may fail without spacy)
|
| 136 |
+
try:
|
| 137 |
+
inventory = [str(obj) for obj in self.env.get_inventory()]
|
| 138 |
+
except Exception:
|
| 139 |
+
inventory = []
|
| 140 |
+
|
| 141 |
+
try:
|
| 142 |
+
location = str(self.env.get_player_location())
|
| 143 |
+
except Exception:
|
| 144 |
+
location = "Unknown"
|
| 145 |
+
|
| 146 |
+
return GameState(
|
| 147 |
+
observation=observation,
|
| 148 |
+
score=info.get('score', 0),
|
| 149 |
+
max_score=self.env.get_max_score(),
|
| 150 |
+
moves=info.get('moves', 0),
|
| 151 |
+
done=done,
|
| 152 |
+
reward=reward,
|
| 153 |
+
inventory=inventory,
|
| 154 |
+
location=location,
|
| 155 |
+
)
|
| 156 |
+
|
| 157 |
+
def get_history(self) -> list[tuple[str, str]]:
|
| 158 |
+
"""Get the history of (action, observation) pairs."""
|
| 159 |
+
return self._history.copy()
|
| 160 |
+
|
| 161 |
+
def get_valid_actions(self) -> list[str]:
|
| 162 |
+
"""
|
| 163 |
+
Get a list of valid actions for the current state.
|
| 164 |
+
Note: This requires spacy to be properly installed.
|
| 165 |
+
"""
|
| 166 |
+
try:
|
| 167 |
+
return self.env.get_valid_actions()
|
| 168 |
+
except Exception:
|
| 169 |
+
# Return common actions if spacy isn't available
|
| 170 |
+
return [
|
| 171 |
+
"north", "south", "east", "west",
|
| 172 |
+
"up", "down", "look", "inventory",
|
| 173 |
+
"take all", "open mailbox", "read"
|
| 174 |
+
]
|
| 175 |
+
|
| 176 |
+
def save_state(self):
|
| 177 |
+
"""Save the current game state."""
|
| 178 |
+
return self.env.get_state()
|
| 179 |
+
|
| 180 |
+
def load_state(self, state):
|
| 181 |
+
"""Load a previously saved game state."""
|
| 182 |
+
self.env.set_state(state)
|
| 183 |
+
|
| 184 |
+
def get_walkthrough(self) -> list[str]:
|
| 185 |
+
"""Get the walkthrough for the game (for debugging/comparison only)."""
|
| 186 |
+
return self.env.get_walkthrough()
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
# Alias for backwards compatibility
|
| 190 |
+
ZorkEnvironment = TextAdventureEnv
|
| 191 |
+
|
| 192 |
+
|
| 193 |
+
# Example usage
|
| 194 |
+
if __name__ == "__main__":
|
| 195 |
+
import sys
|
| 196 |
+
|
| 197 |
+
# List available games
|
| 198 |
+
games = list_available_games()
|
| 199 |
+
print(f"Available games ({len(games)} total):")
|
| 200 |
+
print(f" {', '.join(games[:15])}...")
|
| 201 |
+
print()
|
| 202 |
+
|
| 203 |
+
# Use command line arg or default to zork1
|
| 204 |
+
game = sys.argv[1] if len(sys.argv) > 1 else "zork1"
|
| 205 |
+
|
| 206 |
+
env = TextAdventureEnv(game)
|
| 207 |
+
state = env.reset()
|
| 208 |
+
|
| 209 |
+
print(f"=== {env.game.upper()} ===")
|
| 210 |
+
print(f"Max Score: {state.max_score}")
|
| 211 |
+
print(f"\n{state.observation}")
|
| 212 |
+
print(f"\nValid actions: {env.get_valid_actions()[:10]}...")
|
| 213 |
+
|
| 214 |
+
# Try a few actions
|
| 215 |
+
for action in ["look", "inventory"]:
|
| 216 |
+
print(f"\n> {action}")
|
| 217 |
+
state = env.step(action)
|
| 218 |
+
print(state.observation)
|
| 219 |
+
print(f"Score: {state.score}, Reward: {state.reward}")
|
mcp_server/README.md
ADDED
|
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Zork MCP Server
|
| 2 |
+
|
| 3 |
+
This directory contains an MCP (Model Context Protocol) server that exposes Zork game tools to LLM agents.
|
| 4 |
+
|
| 5 |
+
## Overview
|
| 6 |
+
|
| 7 |
+
The MCP server wraps the Jericho Zork environment and provides tools that any MCP-compatible agent (like Mini SWE Agent) can use to play the game.
|
| 8 |
+
|
| 9 |
+
## Tools Available
|
| 10 |
+
|
| 11 |
+
| Tool | Description |
|
| 12 |
+
|------|-------------|
|
| 13 |
+
| `play_action(action)` | Execute a game command (e.g., "north", "take lamp") |
|
| 14 |
+
| `memory()` | Get current state summary (location, score, recent actions) |
|
| 15 |
+
| `get_map()` | View explored locations and connections |
|
| 16 |
+
| `inventory()` | Check items you're carrying |
|
| 17 |
+
| `valid_actions()` | Get hints on available commands |
|
| 18 |
+
| `reset_game(game)` | Start over with zork1, zork2, or zork3 |
|
| 19 |
+
| `hint()` | Get contextual hints for your situation |
|
| 20 |
+
|
| 21 |
+
## Resources
|
| 22 |
+
|
| 23 |
+
The server also exposes MCP resources:
|
| 24 |
+
- `zork://state` - Current game state
|
| 25 |
+
- `zork://history` - Complete action history
|
| 26 |
+
- `zork://map` - Explored locations map
|
| 27 |
+
|
| 28 |
+
## Running the Server
|
| 29 |
+
|
| 30 |
+
### Standalone (for testing)
|
| 31 |
+
```bash
|
| 32 |
+
python mcp_server/zork_server.py
|
| 33 |
+
```
|
| 34 |
+
|
| 35 |
+
### With MCP Inspector (for debugging)
|
| 36 |
+
```bash
|
| 37 |
+
npx @modelcontextprotocol/inspector python mcp_server/zork_server.py
|
| 38 |
+
```
|
| 39 |
+
|
| 40 |
+
### With Mini SWE Agent
|
| 41 |
+
```bash
|
| 42 |
+
python play_zork.py
|
| 43 |
+
```
|
| 44 |
+
|
| 45 |
+
## Configuration
|
| 46 |
+
|
| 47 |
+
The `mcp_config.json` file configures the server for use with MCP clients:
|
| 48 |
+
|
| 49 |
+
```json
|
| 50 |
+
{
|
| 51 |
+
"mcpServers": {
|
| 52 |
+
"zork": {
|
| 53 |
+
"command": "python",
|
| 54 |
+
"args": ["mcp_server/zork_server.py"]
|
| 55 |
+
}
|
| 56 |
+
}
|
| 57 |
+
}
|
| 58 |
+
```
|
| 59 |
+
|
| 60 |
+
## Architecture
|
| 61 |
+
|
| 62 |
+
```
|
| 63 |
+
┌─────────────────────────────────────────┐
|
| 64 |
+
│ MCP Client (Agent) │
|
| 65 |
+
│ (Mini SWE Agent / Claude / etc.) │
|
| 66 |
+
└──────────────────┬──────────────────────┘
|
| 67 |
+
│ MCP Protocol (stdio)
|
| 68 |
+
▼
|
| 69 |
+
┌─────────────────────────────────────────┐
|
| 70 |
+
│ Zork MCP Server │
|
| 71 |
+
│ (FastMCP - zork_server.py) │
|
| 72 |
+
│ │
|
| 73 |
+
│ Tools: play_action, memory, map, │
|
| 74 |
+
│ inventory, valid_actions, │
|
| 75 |
+
│ reset_game, hint │
|
| 76 |
+
└──────────────────┬──────────────────────┘
|
| 77 |
+
│
|
| 78 |
+
▼
|
| 79 |
+
┌─────────────────────────────────────────┐
|
| 80 |
+
│ Jericho + Frotz │
|
| 81 |
+
│ (Z-machine game interpreter) │
|
| 82 |
+
└─────────────────────────────────────────┘
|
| 83 |
+
```
|
mcp_server/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# Text Adventure MCP Server
|
mcp_server/mcp_config.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"mcpServers": {
|
| 3 |
+
"zork": {
|
| 4 |
+
"command": "python",
|
| 5 |
+
"args": ["mcp_server/zork_server.py"],
|
| 6 |
+
"cwd": "${workspaceFolder}"
|
| 7 |
+
}
|
| 8 |
+
}
|
| 9 |
+
}
|
mcp_server/zork_server.py
ADDED
|
@@ -0,0 +1,420 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Text Adventure MCP Server - Exposes text adventure games via Model Context Protocol.
|
| 3 |
+
|
| 4 |
+
This server allows any MCP-compatible agent to play Zork and other text adventure
|
| 5 |
+
games using tools for game actions, memory, mapping, and inventory.
|
| 6 |
+
|
| 7 |
+
Uses FastMCP for simple, Pythonic MCP server implementation.
|
| 8 |
+
|
| 9 |
+
Usage:
|
| 10 |
+
# Run directly (stdio transport) - default game is zork1
|
| 11 |
+
python mcp_server/zork_server.py
|
| 12 |
+
|
| 13 |
+
# Run with a different game
|
| 14 |
+
GAME=zork2 python mcp_server/zork_server.py
|
| 15 |
+
GAME=advent python mcp_server/zork_server.py
|
| 16 |
+
GAME=enchanter python mcp_server/zork_server.py
|
| 17 |
+
|
| 18 |
+
# Use with FastMCP dev tools
|
| 19 |
+
fastmcp dev mcp_server/zork_server.py
|
| 20 |
+
|
| 21 |
+
# Connect from an MCP client
|
| 22 |
+
from fastmcp import Client
|
| 23 |
+
async with Client("mcp_server/zork_server.py") as client:
|
| 24 |
+
result = await client.call_tool("play_action", {"action": "look"})
|
| 25 |
+
"""
|
| 26 |
+
|
| 27 |
+
import sys
|
| 28 |
+
import os
|
| 29 |
+
|
| 30 |
+
# Add parent directory to path to import games module
|
| 31 |
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 32 |
+
|
| 33 |
+
from fastmcp import FastMCP
|
| 34 |
+
from games.zork_env import TextAdventureEnv, list_available_games
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
# Get game from environment variable (default: zork1)
|
| 38 |
+
INITIAL_GAME = os.environ.get("GAME", "zork1")
|
| 39 |
+
|
| 40 |
+
# Create the MCP server
|
| 41 |
+
mcp = FastMCP("Text Adventure Server")
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
class GameState:
|
| 45 |
+
"""Manages the text adventure game state and exploration data."""
|
| 46 |
+
|
| 47 |
+
def __init__(self, game: str = "zork1"):
|
| 48 |
+
self.game_name = game
|
| 49 |
+
self.env = TextAdventureEnv(game)
|
| 50 |
+
self.state = self.env.reset()
|
| 51 |
+
self.history: list[tuple[str, str]] = []
|
| 52 |
+
self.explored_locations: dict[str, set[str]] = {} # location -> set of exits
|
| 53 |
+
self.current_location: str = self._extract_location(self.state.observation)
|
| 54 |
+
|
| 55 |
+
def _extract_location(self, observation: str) -> str:
|
| 56 |
+
"""Extract location name from observation (usually first line)."""
|
| 57 |
+
lines = observation.strip().split('\n')
|
| 58 |
+
return lines[0] if lines else "Unknown"
|
| 59 |
+
|
| 60 |
+
def take_action(self, action: str) -> str:
|
| 61 |
+
"""Execute a game action and return the result."""
|
| 62 |
+
self.state = self.env.step(action)
|
| 63 |
+
result = self.state.observation
|
| 64 |
+
|
| 65 |
+
# Track history
|
| 66 |
+
self.history.append((action, result))
|
| 67 |
+
if len(self.history) > 50:
|
| 68 |
+
self.history = self.history[-50:]
|
| 69 |
+
|
| 70 |
+
# Update map
|
| 71 |
+
new_location = self._extract_location(result)
|
| 72 |
+
if action in ["north", "south", "east", "west", "up", "down",
|
| 73 |
+
"enter", "exit", "n", "s", "e", "w", "u", "d"]:
|
| 74 |
+
if self.current_location not in self.explored_locations:
|
| 75 |
+
self.explored_locations[self.current_location] = set()
|
| 76 |
+
if new_location != self.current_location:
|
| 77 |
+
self.explored_locations[self.current_location].add(f"{action} -> {new_location}")
|
| 78 |
+
self.current_location = new_location
|
| 79 |
+
|
| 80 |
+
return result
|
| 81 |
+
|
| 82 |
+
def get_memory(self) -> str:
|
| 83 |
+
"""Get a summary of current game state."""
|
| 84 |
+
recent = self.history[-5:] if self.history else []
|
| 85 |
+
recent_str = "\n".join([f" > {a} → {r[:60]}..." for a, r in recent]) if recent else " (none yet)"
|
| 86 |
+
|
| 87 |
+
return f"""Current State:
|
| 88 |
+
- Location: {self.current_location}
|
| 89 |
+
- Score: {self.state.score} points
|
| 90 |
+
- Moves: {self.state.moves}
|
| 91 |
+
- Game: {self.game_name}
|
| 92 |
+
|
| 93 |
+
Recent Actions:
|
| 94 |
+
{recent_str}
|
| 95 |
+
|
| 96 |
+
Current Observation:
|
| 97 |
+
{self.state.observation}"""
|
| 98 |
+
|
| 99 |
+
def get_map(self) -> str:
|
| 100 |
+
"""Get a map of explored locations."""
|
| 101 |
+
if not self.explored_locations:
|
| 102 |
+
return "Map: No locations explored yet. Try moving around!"
|
| 103 |
+
|
| 104 |
+
lines = ["Explored Locations and Exits:"]
|
| 105 |
+
for loc, exits in sorted(self.explored_locations.items()):
|
| 106 |
+
lines.append(f"\n* {loc}")
|
| 107 |
+
for exit_info in sorted(exits):
|
| 108 |
+
lines.append(f" -> {exit_info}")
|
| 109 |
+
|
| 110 |
+
lines.append(f"\n[Current] {self.current_location}")
|
| 111 |
+
return "\n".join(lines)
|
| 112 |
+
|
| 113 |
+
def get_inventory(self) -> str:
|
| 114 |
+
"""Get current inventory."""
|
| 115 |
+
items = self.state.inventory if hasattr(self.state, 'inventory') and self.state.inventory else []
|
| 116 |
+
|
| 117 |
+
if not items:
|
| 118 |
+
return "Inventory: You are empty-handed."
|
| 119 |
+
|
| 120 |
+
item_names = []
|
| 121 |
+
for item in items:
|
| 122 |
+
item_str = str(item)
|
| 123 |
+
# Handle Jericho's object format: "leaflet Parent4 Sibling0..."
|
| 124 |
+
# Look for "Parent" (case-insensitive) to find where metadata starts
|
| 125 |
+
item_lower = item_str.lower()
|
| 126 |
+
if "parent" in item_lower:
|
| 127 |
+
idx = item_lower.index("parent")
|
| 128 |
+
name = item_str[:idx].strip()
|
| 129 |
+
# Remove leading "obj123: " if present
|
| 130 |
+
if ":" in name:
|
| 131 |
+
name = name.split(":", 1)[1].strip()
|
| 132 |
+
item_names.append(name)
|
| 133 |
+
elif ":" in item_str:
|
| 134 |
+
name = item_str.split(":")[1].strip()
|
| 135 |
+
item_names.append(name)
|
| 136 |
+
else:
|
| 137 |
+
item_names.append(item_str)
|
| 138 |
+
|
| 139 |
+
return f"Inventory: {', '.join(item_names)}"
|
| 140 |
+
|
| 141 |
+
def get_valid_actions(self) -> str:
|
| 142 |
+
"""Get list of valid actions in current state."""
|
| 143 |
+
try:
|
| 144 |
+
valid = self.env.get_valid_actions() if hasattr(self.env, 'get_valid_actions') else []
|
| 145 |
+
if valid:
|
| 146 |
+
return f"Valid actions: {', '.join(valid[:20])}"
|
| 147 |
+
except Exception:
|
| 148 |
+
pass
|
| 149 |
+
return "Valid actions: Try standard commands like look, north, south, east, west, take <item>, open <thing>"
|
| 150 |
+
|
| 151 |
+
|
| 152 |
+
# Global game state (initialized on first use)
|
| 153 |
+
_game_state: GameState | None = None
|
| 154 |
+
|
| 155 |
+
|
| 156 |
+
def get_game() -> GameState:
|
| 157 |
+
"""Get or initialize the game state."""
|
| 158 |
+
global _game_state
|
| 159 |
+
if _game_state is None:
|
| 160 |
+
_game_state = GameState(INITIAL_GAME)
|
| 161 |
+
return _game_state
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
# ============================================================================
|
| 165 |
+
# MCP Tools
|
| 166 |
+
# ============================================================================
|
| 167 |
+
|
| 168 |
+
@mcp.tool()
|
| 169 |
+
def play_action(action: str) -> str:
|
| 170 |
+
"""
|
| 171 |
+
Execute a game action in the text adventure.
|
| 172 |
+
|
| 173 |
+
Common commands:
|
| 174 |
+
- Movement: north, south, east, west, up, down, enter, exit (or n, s, e, w, u, d)
|
| 175 |
+
- Objects: take <item>, drop <item>, open <thing>, close <thing>, put <item> in <container>
|
| 176 |
+
- Look: look, examine <thing>, read <thing>
|
| 177 |
+
- Combat: attack <enemy> with <weapon>
|
| 178 |
+
- Light: turn on lamp, light match
|
| 179 |
+
- Other: wait, score, inventory
|
| 180 |
+
|
| 181 |
+
Args:
|
| 182 |
+
action: The command to execute (e.g., 'north', 'take lamp', 'open mailbox')
|
| 183 |
+
|
| 184 |
+
Returns:
|
| 185 |
+
The game's response to your action
|
| 186 |
+
"""
|
| 187 |
+
game = get_game()
|
| 188 |
+
result = game.take_action(action)
|
| 189 |
+
|
| 190 |
+
# Add score info if points were earned
|
| 191 |
+
score_info = ""
|
| 192 |
+
if game.state.reward > 0:
|
| 193 |
+
score_info = f"\n\n+{game.state.reward} points! (Total: {game.state.score})"
|
| 194 |
+
|
| 195 |
+
done_info = ""
|
| 196 |
+
if game.state.done:
|
| 197 |
+
done_info = "\n\nGAME OVER"
|
| 198 |
+
|
| 199 |
+
return result + score_info + done_info
|
| 200 |
+
|
| 201 |
+
|
| 202 |
+
@mcp.tool()
|
| 203 |
+
def memory() -> str:
|
| 204 |
+
"""
|
| 205 |
+
Get a summary of the current game state.
|
| 206 |
+
|
| 207 |
+
Returns your location, score, moves, recent actions, and current observation.
|
| 208 |
+
Use this to understand where you are and what happened recently.
|
| 209 |
+
Very useful for avoiding loops and tracking progress.
|
| 210 |
+
"""
|
| 211 |
+
return get_game().get_memory()
|
| 212 |
+
|
| 213 |
+
|
| 214 |
+
@mcp.tool()
|
| 215 |
+
def get_map() -> str:
|
| 216 |
+
"""
|
| 217 |
+
Get a map showing all locations you have explored and the connections between them.
|
| 218 |
+
|
| 219 |
+
Useful for navigation and planning routes back to previous locations.
|
| 220 |
+
The map builds up as you explore more of the game world.
|
| 221 |
+
"""
|
| 222 |
+
return get_game().get_map()
|
| 223 |
+
|
| 224 |
+
|
| 225 |
+
@mcp.tool()
|
| 226 |
+
def inventory() -> str:
|
| 227 |
+
"""
|
| 228 |
+
Check what items you are currently carrying.
|
| 229 |
+
|
| 230 |
+
Essential before trying to use, drop, or interact with items.
|
| 231 |
+
Most games have an inventory limit, so manage your items wisely.
|
| 232 |
+
"""
|
| 233 |
+
return get_game().get_inventory()
|
| 234 |
+
|
| 235 |
+
|
| 236 |
+
@mcp.tool()
|
| 237 |
+
def valid_actions() -> str:
|
| 238 |
+
"""
|
| 239 |
+
Get a list of valid actions available in the current game state.
|
| 240 |
+
|
| 241 |
+
Helpful when stuck or unsure what commands the game accepts.
|
| 242 |
+
Note: This may not include all possible actions, just common ones.
|
| 243 |
+
"""
|
| 244 |
+
return get_game().get_valid_actions()
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
@mcp.tool()
|
| 248 |
+
def reset_game(game: str = "zork1") -> str:
|
| 249 |
+
"""
|
| 250 |
+
Reset the game to the beginning or switch to a different game.
|
| 251 |
+
|
| 252 |
+
Use this to start over if you get stuck, die, or want to try a different game.
|
| 253 |
+
|
| 254 |
+
Args:
|
| 255 |
+
game: Game name (e.g., 'zork1', 'zork2', 'advent', 'enchanter')
|
| 256 |
+
Use list_games() to see available options.
|
| 257 |
+
|
| 258 |
+
Returns:
|
| 259 |
+
The initial game text
|
| 260 |
+
"""
|
| 261 |
+
global _game_state
|
| 262 |
+
try:
|
| 263 |
+
_game_state = GameState(game)
|
| 264 |
+
return f"Game reset to {game}.\n\n{_game_state.state.observation}"
|
| 265 |
+
except ValueError as e:
|
| 266 |
+
return f"Error: {e}"
|
| 267 |
+
|
| 268 |
+
|
| 269 |
+
@mcp.tool()
|
| 270 |
+
def list_games() -> str:
|
| 271 |
+
"""
|
| 272 |
+
List all available text adventure games.
|
| 273 |
+
|
| 274 |
+
Returns:
|
| 275 |
+
List of game names that can be passed to reset_game()
|
| 276 |
+
"""
|
| 277 |
+
games = list_available_games()
|
| 278 |
+
return f"Available games ({len(games)} total):\n" + ", ".join(games)
|
| 279 |
+
|
| 280 |
+
|
| 281 |
+
@mcp.tool()
|
| 282 |
+
def hint() -> str:
|
| 283 |
+
"""
|
| 284 |
+
Get a hint about what to do next based on your current situation.
|
| 285 |
+
|
| 286 |
+
Provides general guidance without spoiling puzzle solutions.
|
| 287 |
+
"""
|
| 288 |
+
game = get_game()
|
| 289 |
+
location = game.current_location.lower()
|
| 290 |
+
inv = game.get_inventory().lower()
|
| 291 |
+
observation = game.state.observation.lower()
|
| 292 |
+
|
| 293 |
+
hints = []
|
| 294 |
+
|
| 295 |
+
# Darkness detection (common in many games)
|
| 296 |
+
if "dark" in location or "dark" in observation or "pitch black" in observation:
|
| 297 |
+
hints.append("It's dangerous in the dark! You need a light source.")
|
| 298 |
+
hints.append("If you have a lamp, try 'turn on lamp'.")
|
| 299 |
+
|
| 300 |
+
# Common items to look for
|
| 301 |
+
if "lamp" in observation and "lamp" not in inv:
|
| 302 |
+
hints.append("There's a lamp here - light sources are essential!")
|
| 303 |
+
if "lantern" in observation and "lantern" not in inv:
|
| 304 |
+
hints.append("There's a lantern here - you'll need light for dark areas!")
|
| 305 |
+
if "sword" in observation and "sword" not in inv:
|
| 306 |
+
hints.append("A sword might be useful for combat encounters.")
|
| 307 |
+
if "key" in observation and "key" not in inv:
|
| 308 |
+
hints.append("A key might unlock something important.")
|
| 309 |
+
|
| 310 |
+
# Container hints
|
| 311 |
+
if any(word in observation for word in ["mailbox", "chest", "box", "container", "cabinet"]):
|
| 312 |
+
hints.append("Try opening containers to find hidden items.")
|
| 313 |
+
|
| 314 |
+
# Door/window hints
|
| 315 |
+
if "door" in observation or "window" in observation:
|
| 316 |
+
hints.append("There might be a way in or out here. Try 'open' commands.")
|
| 317 |
+
|
| 318 |
+
# General hints if nothing specific found
|
| 319 |
+
if not hints:
|
| 320 |
+
hints.append("Explore all directions: north, south, east, west, up, down.")
|
| 321 |
+
hints.append("Examine interesting objects with 'examine <thing>'.")
|
| 322 |
+
hints.append("Pick up useful items with 'take <item>'.")
|
| 323 |
+
hints.append("Open containers and read documents for clues.")
|
| 324 |
+
|
| 325 |
+
return "Hints:\\n" + "\\n".join(f" - {h}" for h in hints)
|
| 326 |
+
|
| 327 |
+
|
| 328 |
+
# ============================================================================
|
| 329 |
+
# MCP Resources
|
| 330 |
+
# ============================================================================
|
| 331 |
+
|
| 332 |
+
@mcp.resource("game://state")
|
| 333 |
+
def get_state_resource() -> str:
|
| 334 |
+
"""Current game state as a resource."""
|
| 335 |
+
return get_game().get_memory()
|
| 336 |
+
|
| 337 |
+
|
| 338 |
+
@mcp.resource("game://history")
|
| 339 |
+
def get_history_resource() -> str:
|
| 340 |
+
"""Complete action history as a resource."""
|
| 341 |
+
game = get_game()
|
| 342 |
+
if not game.history:
|
| 343 |
+
return "No actions taken yet."
|
| 344 |
+
lines = [f"{i+1}. {action} -> {result[:80]}..." for i, (action, result) in enumerate(game.history)]
|
| 345 |
+
return "\n".join(lines)
|
| 346 |
+
|
| 347 |
+
|
| 348 |
+
@mcp.resource("game://map")
|
| 349 |
+
def get_map_resource() -> str:
|
| 350 |
+
"""Explored map as a resource."""
|
| 351 |
+
return get_game().get_map()
|
| 352 |
+
|
| 353 |
+
|
| 354 |
+
# ============================================================================
|
| 355 |
+
# Game Prompt (for agents)
|
| 356 |
+
# ============================================================================
|
| 357 |
+
|
| 358 |
+
GAME_PROMPT = """You are playing a classic text adventure game.
|
| 359 |
+
|
| 360 |
+
## YOUR GOAL
|
| 361 |
+
Explore the world, solve puzzles, collect treasures, and maximize your score.
|
| 362 |
+
|
| 363 |
+
## VALID COMMANDS (use ONLY these exact verbs)
|
| 364 |
+
|
| 365 |
+
Movement:
|
| 366 |
+
north, south, east, west, up, down (or n, s, e, w, u, d)
|
| 367 |
+
enter, exit, climb, cross, go <direction>
|
| 368 |
+
|
| 369 |
+
Looking:
|
| 370 |
+
look, examine <thing>, look at <thing>, look in <thing>, read <thing>
|
| 371 |
+
|
| 372 |
+
Objects:
|
| 373 |
+
take <item>, drop <item>, pick up <item>
|
| 374 |
+
open <thing>, close <thing>, unlock <thing> with <key>
|
| 375 |
+
put <item> in <container>, give <item> to <person>
|
| 376 |
+
|
| 377 |
+
Light:
|
| 378 |
+
turn on lamp, turn off lamp, light match
|
| 379 |
+
|
| 380 |
+
Combat:
|
| 381 |
+
attack <enemy> with <weapon>, kill <enemy> with <weapon>
|
| 382 |
+
|
| 383 |
+
Other:
|
| 384 |
+
inventory (or i), wait (or z), score
|
| 385 |
+
push <thing>, pull <thing>, move <thing>
|
| 386 |
+
tie <rope> to <thing>, eat <food>, wave <item>
|
| 387 |
+
|
| 388 |
+
## FORBIDDEN VERBS (these will NOT work):
|
| 389 |
+
check, inspect, search, investigate, grab, pick, use, interact,
|
| 390 |
+
go to, walk to, head to, travel, proceed
|
| 391 |
+
|
| 392 |
+
## STRATEGY TIPS
|
| 393 |
+
1. Explore systematically - check all directions
|
| 394 |
+
2. Read everything - open containers, read documents, examine objects
|
| 395 |
+
3. Use get_map() to track explored locations
|
| 396 |
+
4. Light is essential - find a light source before dark areas!
|
| 397 |
+
5. Manage inventory - you can only carry limited items
|
| 398 |
+
|
| 399 |
+
## GETTING STARTED
|
| 400 |
+
1. Call memory() to see your current state
|
| 401 |
+
2. Explore your starting area thoroughly
|
| 402 |
+
3. Pick up useful items (light sources, weapons, keys)
|
| 403 |
+
|
| 404 |
+
Good luck!
|
| 405 |
+
"""
|
| 406 |
+
|
| 407 |
+
|
| 408 |
+
def get_game_prompt(game: str = "zork1") -> str:
|
| 409 |
+
"""Get the system prompt for playing text adventures."""
|
| 410 |
+
prompt = GAME_PROMPT
|
| 411 |
+
prompt += f"\n\nNote: Currently playing {game}. Use list_games() to see all 57 available games."
|
| 412 |
+
return prompt
|
| 413 |
+
|
| 414 |
+
|
| 415 |
+
# ============================================================================
|
| 416 |
+
# Main
|
| 417 |
+
# ============================================================================
|
| 418 |
+
|
| 419 |
+
if __name__ == "__main__":
|
| 420 |
+
mcp.run()
|
requirements.txt
CHANGED
|
@@ -1 +1,14 @@
|
|
| 1 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Core dependencies
|
| 2 |
+
jericho
|
| 3 |
+
python-dotenv
|
| 4 |
+
|
| 5 |
+
# LLM providers
|
| 6 |
+
huggingface_hub
|
| 7 |
+
openai
|
| 8 |
+
anthropic
|
| 9 |
+
|
| 10 |
+
# MCP Server
|
| 11 |
+
fastmcp
|
| 12 |
+
|
| 13 |
+
# Function calling (optional, for the alternative approach)
|
| 14 |
+
langchain-core
|
run_agent.py
ADDED
|
@@ -0,0 +1,352 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Unified Text Adventure Agent Runner
|
| 4 |
+
|
| 5 |
+
Run different types of LLM agents to play text adventure games:
|
| 6 |
+
- react: Basic ReAct agent with HuggingFace models
|
| 7 |
+
- function: Function-calling controller (API-based or text-based)
|
| 8 |
+
- mcp: MCP ReAct agent using FastMCP Client
|
| 9 |
+
|
| 10 |
+
Usage:
|
| 11 |
+
python run_agent.py --mode react
|
| 12 |
+
python run_agent.py --mode function
|
| 13 |
+
python run_agent.py --mode mcp
|
| 14 |
+
|
| 15 |
+
Examples:
|
| 16 |
+
# Run the basic ReAct agent
|
| 17 |
+
python run_agent.py --mode react
|
| 18 |
+
|
| 19 |
+
# Run the function-calling controller (API-based)
|
| 20 |
+
python run_agent.py --mode function
|
| 21 |
+
|
| 22 |
+
# Run the function-calling controller (text-based, works with any model)
|
| 23 |
+
python run_agent.py --mode function --simple
|
| 24 |
+
|
| 25 |
+
# Run with MCP ReAct agent (uses FastMCP Client)
|
| 26 |
+
python run_agent.py --mode mcp
|
| 27 |
+
|
| 28 |
+
# Play a different game
|
| 29 |
+
python run_agent.py --mode mcp --game advent
|
| 30 |
+
"""
|
| 31 |
+
|
| 32 |
+
import argparse
|
| 33 |
+
import sys
|
| 34 |
+
import os
|
| 35 |
+
import time
|
| 36 |
+
from pathlib import Path
|
| 37 |
+
|
| 38 |
+
# Add games module to path for discovering available games
|
| 39 |
+
sys.path.insert(0, str(Path(__file__).parent))
|
| 40 |
+
from games.zork_env import list_available_games, TextAdventureEnv
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
# =============================================================================
|
| 44 |
+
# Mode: ReAct Agent
|
| 45 |
+
# =============================================================================
|
| 46 |
+
|
| 47 |
+
def run_react_agent(args):
|
| 48 |
+
"""Run the basic ReAct agent."""
|
| 49 |
+
from agents.react_agent import ReActAgent, ReActConfig
|
| 50 |
+
|
| 51 |
+
print("\n[ReAct] Running ReAct Agent")
|
| 52 |
+
print(f" Game: {args.game}")
|
| 53 |
+
print(f" Model: {args.model}")
|
| 54 |
+
print()
|
| 55 |
+
|
| 56 |
+
env = TextAdventureEnv(args.game)
|
| 57 |
+
config = ReActConfig(verbose=args.verbose, model=args.model)
|
| 58 |
+
agent = ReActAgent(config)
|
| 59 |
+
|
| 60 |
+
return run_game_loop(env, agent, args.max_steps, args.verbose)
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def run_game_loop(env, agent, max_steps: int, verbose: bool) -> dict:
|
| 64 |
+
"""Common game loop for ReAct-style agents."""
|
| 65 |
+
state = env.reset()
|
| 66 |
+
agent.reset()
|
| 67 |
+
|
| 68 |
+
print("=" * 60)
|
| 69 |
+
print(f"{env.game.upper()} - Starting Game")
|
| 70 |
+
print(f"Max Score: {state.max_score}")
|
| 71 |
+
print("=" * 60)
|
| 72 |
+
print(f"\n{state.observation}\n")
|
| 73 |
+
|
| 74 |
+
start_time = time.time()
|
| 75 |
+
step = 0
|
| 76 |
+
|
| 77 |
+
try:
|
| 78 |
+
for step in range(1, max_steps + 1):
|
| 79 |
+
print(f"\n{'─' * 40}")
|
| 80 |
+
print(f"Step {step}")
|
| 81 |
+
print("─" * 40)
|
| 82 |
+
|
| 83 |
+
action = agent.choose_action(state.observation, state)
|
| 84 |
+
print(f"\n> {action}")
|
| 85 |
+
|
| 86 |
+
state = env.step(action)
|
| 87 |
+
print(f"\n{state.observation}")
|
| 88 |
+
|
| 89 |
+
if state.reward > 0:
|
| 90 |
+
print(f"\n+{state.reward} points! (Total: {state.score}/{state.max_score})")
|
| 91 |
+
elif state.reward < 0:
|
| 92 |
+
print(f"\n{state.reward} points! (Total: {state.score}/{state.max_score})")
|
| 93 |
+
else:
|
| 94 |
+
print(f"\nScore: {state.score}/{state.max_score}")
|
| 95 |
+
|
| 96 |
+
agent.update_history(action, state.observation, state)
|
| 97 |
+
|
| 98 |
+
if state.done:
|
| 99 |
+
print("\n" + "=" * 60)
|
| 100 |
+
print("GAME OVER!")
|
| 101 |
+
break
|
| 102 |
+
|
| 103 |
+
except KeyboardInterrupt:
|
| 104 |
+
print("\n\nGame interrupted by user")
|
| 105 |
+
|
| 106 |
+
elapsed_time = time.time() - start_time
|
| 107 |
+
return print_summary(env.game, state, step, elapsed_time)
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
# =============================================================================
|
| 111 |
+
# Mode: MCP ReAct Agent
|
| 112 |
+
# =============================================================================
|
| 113 |
+
|
| 114 |
+
def run_mcp_agent(args):
|
| 115 |
+
"""Run MCP ReAct Agent using FastMCP Client."""
|
| 116 |
+
import asyncio
|
| 117 |
+
from agents.mcp_react_agent import MCPReActAgent, MCPAgentConfig
|
| 118 |
+
|
| 119 |
+
print("\n[MCP] Running MCP ReAct Agent with FastMCP")
|
| 120 |
+
print(f" Game: {args.game}")
|
| 121 |
+
print(f" Model: {args.model}")
|
| 122 |
+
print(f" Server: mcp_server/zork_server.py")
|
| 123 |
+
print()
|
| 124 |
+
|
| 125 |
+
config = MCPAgentConfig(verbose=args.verbose, model=args.model, game=args.game)
|
| 126 |
+
agent = MCPReActAgent("mcp_server/zork_server.py", config)
|
| 127 |
+
|
| 128 |
+
return asyncio.run(agent.run(max_steps=args.max_steps))
|
| 129 |
+
|
| 130 |
+
|
| 131 |
+
# =============================================================================
|
| 132 |
+
# Mode: Function Calling
|
| 133 |
+
# =============================================================================
|
| 134 |
+
|
| 135 |
+
def run_function_calling(args):
|
| 136 |
+
"""Run the function-calling controller."""
|
| 137 |
+
# Import the appropriate controller
|
| 138 |
+
sys.path.insert(0, str(Path(__file__).parent / "function_calling"))
|
| 139 |
+
from tools import add_to_history
|
| 140 |
+
|
| 141 |
+
if args.simple:
|
| 142 |
+
from simple_controller import SimpleController
|
| 143 |
+
print("\n[Function] Running Function Calling Controller (text-based)")
|
| 144 |
+
controller = SimpleController(model=args.model)
|
| 145 |
+
else:
|
| 146 |
+
from controller import FunctionCallingController
|
| 147 |
+
print("\n[Function] Running Function Calling Controller (API-based)")
|
| 148 |
+
controller = FunctionCallingController(model=args.model)
|
| 149 |
+
|
| 150 |
+
print(f" Game: {args.game}")
|
| 151 |
+
print(f" Model: {args.model}")
|
| 152 |
+
print()
|
| 153 |
+
|
| 154 |
+
env = TextAdventureEnv(args.game)
|
| 155 |
+
state = env.reset()
|
| 156 |
+
|
| 157 |
+
print("=" * 60)
|
| 158 |
+
print(f"{args.game.upper()} - Function Calling Mode")
|
| 159 |
+
print("=" * 60)
|
| 160 |
+
print(f"\n{state.observation}\n")
|
| 161 |
+
|
| 162 |
+
start_time = time.time()
|
| 163 |
+
step = 0
|
| 164 |
+
|
| 165 |
+
try:
|
| 166 |
+
for step in range(1, args.max_steps + 1):
|
| 167 |
+
print(f"\n{'─' * 50}")
|
| 168 |
+
print(f"Step {step}/{args.max_steps} | Score: {state.score}")
|
| 169 |
+
print("─" * 50)
|
| 170 |
+
|
| 171 |
+
action = controller.get_action(state.observation, state)
|
| 172 |
+
print(f"\n> ACTION: {action}")
|
| 173 |
+
|
| 174 |
+
state = env.step(action)
|
| 175 |
+
add_to_history(action, state.observation)
|
| 176 |
+
|
| 177 |
+
print(f"\n{state.observation}")
|
| 178 |
+
|
| 179 |
+
if state.reward > 0:
|
| 180 |
+
print(f"\n+{state.reward} points!")
|
| 181 |
+
|
| 182 |
+
if state.done:
|
| 183 |
+
print("\nGAME OVER!")
|
| 184 |
+
break
|
| 185 |
+
|
| 186 |
+
except KeyboardInterrupt:
|
| 187 |
+
print("\n\nGame interrupted by user")
|
| 188 |
+
|
| 189 |
+
elapsed_time = time.time() - start_time
|
| 190 |
+
return print_summary(args.game, state, step, elapsed_time)
|
| 191 |
+
|
| 192 |
+
|
| 193 |
+
# =============================================================================
|
| 194 |
+
# Common Utilities
|
| 195 |
+
# =============================================================================
|
| 196 |
+
|
| 197 |
+
def print_summary(game: str, state, step: int, elapsed_time: float) -> dict:
|
| 198 |
+
"""Print game summary and return results dict."""
|
| 199 |
+
print("\n" + "=" * 60)
|
| 200 |
+
print("GAME SUMMARY")
|
| 201 |
+
print("=" * 60)
|
| 202 |
+
print(f"Game: {game}")
|
| 203 |
+
print(f"Final Score: {state.score}/{state.max_score} ({100*state.score/state.max_score:.1f}%)")
|
| 204 |
+
print(f"Total Moves: {state.moves}")
|
| 205 |
+
print(f"Steps Taken: {step}")
|
| 206 |
+
print(f"Time Elapsed: {elapsed_time:.1f} seconds")
|
| 207 |
+
print("=" * 60)
|
| 208 |
+
|
| 209 |
+
return {
|
| 210 |
+
"game": game,
|
| 211 |
+
"final_score": state.score,
|
| 212 |
+
"max_score": state.max_score,
|
| 213 |
+
"score_percentage": 100 * state.score / state.max_score,
|
| 214 |
+
"moves": state.moves,
|
| 215 |
+
"steps": step,
|
| 216 |
+
"elapsed_time": elapsed_time,
|
| 217 |
+
"game_over": state.done,
|
| 218 |
+
}
|
| 219 |
+
|
| 220 |
+
|
| 221 |
+
def main():
|
| 222 |
+
parser = argparse.ArgumentParser(
|
| 223 |
+
description="Run an LLM agent to play text adventure games",
|
| 224 |
+
formatter_class=argparse.RawDescriptionHelpFormatter,
|
| 225 |
+
epilog="""
|
| 226 |
+
Modes:
|
| 227 |
+
react Basic ReAct agent (direct game interaction)
|
| 228 |
+
function Function-calling controller (use --simple for text-based)
|
| 229 |
+
mcp MCP ReAct agent using FastMCP Client (recommended)
|
| 230 |
+
|
| 231 |
+
Examples:
|
| 232 |
+
python run_agent.py --mode react
|
| 233 |
+
python run_agent.py --mode function
|
| 234 |
+
python run_agent.py --mode function --simple # text-based, any model
|
| 235 |
+
python run_agent.py --mode mcp # MCP with FastMCP
|
| 236 |
+
python run_agent.py --mode mcp --game advent # Play different game
|
| 237 |
+
python run_agent.py --mode mcp --model google/gemma-2-2b-it
|
| 238 |
+
"""
|
| 239 |
+
)
|
| 240 |
+
|
| 241 |
+
# Get available games for help text
|
| 242 |
+
available_games = list_available_games()
|
| 243 |
+
game_help = f"Game to play (default: zork1). {len(available_games)} games available."
|
| 244 |
+
|
| 245 |
+
parser.add_argument(
|
| 246 |
+
"--mode", "-m",
|
| 247 |
+
type=str,
|
| 248 |
+
default="react",
|
| 249 |
+
choices=["react", "function", "mcp"],
|
| 250 |
+
help="Which agent mode to use (default: react)"
|
| 251 |
+
)
|
| 252 |
+
parser.add_argument(
|
| 253 |
+
"--game", "-g",
|
| 254 |
+
type=str,
|
| 255 |
+
default="zork1",
|
| 256 |
+
help=game_help
|
| 257 |
+
)
|
| 258 |
+
parser.add_argument(
|
| 259 |
+
"--list-games",
|
| 260 |
+
action="store_true",
|
| 261 |
+
help="List all available games and exit"
|
| 262 |
+
)
|
| 263 |
+
parser.add_argument(
|
| 264 |
+
"--max-steps", "-n",
|
| 265 |
+
type=int,
|
| 266 |
+
default=100,
|
| 267 |
+
help="Maximum number of steps to run (default: 100)"
|
| 268 |
+
)
|
| 269 |
+
parser.add_argument(
|
| 270 |
+
"--model",
|
| 271 |
+
type=str,
|
| 272 |
+
default=None,
|
| 273 |
+
help="Model to use (default: meta-llama/Llama-3.2-3B-Instruct)"
|
| 274 |
+
)
|
| 275 |
+
parser.add_argument(
|
| 276 |
+
"--verbose", "-v",
|
| 277 |
+
action="store_true",
|
| 278 |
+
help="Show detailed reasoning from the agent"
|
| 279 |
+
)
|
| 280 |
+
parser.add_argument(
|
| 281 |
+
"--simple",
|
| 282 |
+
action="store_true",
|
| 283 |
+
help="Use text-based function calling (works with any model, only for --mode function)"
|
| 284 |
+
)
|
| 285 |
+
|
| 286 |
+
args = parser.parse_args()
|
| 287 |
+
|
| 288 |
+
# Handle --list-games
|
| 289 |
+
if args.list_games:
|
| 290 |
+
print(f"\nAvailable games ({len(available_games)} total):\n")
|
| 291 |
+
# Print in columns
|
| 292 |
+
cols = 5
|
| 293 |
+
for i in range(0, len(available_games), cols):
|
| 294 |
+
row = available_games[i:i+cols]
|
| 295 |
+
print(" " + " ".join(f"{g:<15}" for g in row))
|
| 296 |
+
print()
|
| 297 |
+
sys.exit(0)
|
| 298 |
+
|
| 299 |
+
# Validate game choice
|
| 300 |
+
if args.game.lower() not in available_games:
|
| 301 |
+
print(f"\nError: Unknown game '{args.game}'")
|
| 302 |
+
print(f"Use --list-games to see {len(available_games)} available options.")
|
| 303 |
+
sys.exit(1)
|
| 304 |
+
|
| 305 |
+
# Get default model from environment
|
| 306 |
+
default_model = os.getenv("HF_MODEL", "meta-llama/Llama-3.2-3B-Instruct")
|
| 307 |
+
|
| 308 |
+
# Set model if not specified
|
| 309 |
+
if args.model is None:
|
| 310 |
+
args.model = default_model
|
| 311 |
+
|
| 312 |
+
print("\n" + "=" * 60)
|
| 313 |
+
print("Text Adventure LLM Agent Runner")
|
| 314 |
+
print("=" * 60)
|
| 315 |
+
print(f"Mode: {args.mode}" + (" (simple)" if args.simple else ""))
|
| 316 |
+
print(f"Game: {args.game}")
|
| 317 |
+
print(f"Max Steps: {args.max_steps}")
|
| 318 |
+
print(f"Model: {args.model}")
|
| 319 |
+
print(f"Verbose: {args.verbose}")
|
| 320 |
+
|
| 321 |
+
# Run the selected mode
|
| 322 |
+
try:
|
| 323 |
+
if args.mode == "react":
|
| 324 |
+
results = run_react_agent(args)
|
| 325 |
+
elif args.mode == "function":
|
| 326 |
+
results = run_function_calling(args)
|
| 327 |
+
elif args.mode == "mcp":
|
| 328 |
+
results = run_mcp_agent(args)
|
| 329 |
+
else:
|
| 330 |
+
print(f"Unknown mode: {args.mode}")
|
| 331 |
+
sys.exit(1)
|
| 332 |
+
|
| 333 |
+
except FileNotFoundError as e:
|
| 334 |
+
print(f"\n[Error] {e}")
|
| 335 |
+
sys.exit(1)
|
| 336 |
+
except ValueError as e:
|
| 337 |
+
print(f"\n[Error] {e}")
|
| 338 |
+
print("\nTo fix this:")
|
| 339 |
+
print("1. Copy .env.example to .env")
|
| 340 |
+
print("2. Add your HuggingFace token (HF_TOKEN)")
|
| 341 |
+
sys.exit(1)
|
| 342 |
+
except ImportError as e:
|
| 343 |
+
print(f"\n[Import Error] {e}")
|
| 344 |
+
print("\nMake sure to install dependencies:")
|
| 345 |
+
print(" pip install -r requirements.txt")
|
| 346 |
+
sys.exit(1)
|
| 347 |
+
|
| 348 |
+
return results
|
| 349 |
+
|
| 350 |
+
|
| 351 |
+
if __name__ == "__main__":
|
| 352 |
+
main()
|
spaces_requirements.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
gradio>=4.0.0
|