Upload 6 files
Browse files- README.md +53 -7
- agent.py +1996 -0
- app.py +36 -0
- explanations.md +379 -0
- mcp_server.py +819 -0
- requirements.txt +9 -0
README.md
CHANGED
|
@@ -1,13 +1,59 @@
|
|
| 1 |
---
|
| 2 |
-
title: Text
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
-
colorTo:
|
| 6 |
sdk: gradio
|
| 7 |
-
sdk_version:
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
-
|
| 11 |
---
|
| 12 |
|
| 13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Text Adventure Agent Submission
|
| 3 |
+
emoji: "\U0001F5FA"
|
| 4 |
+
colorFrom: green
|
| 5 |
+
colorTo: blue
|
| 6 |
sdk: gradio
|
| 7 |
+
sdk_version: "5.12.0"
|
| 8 |
app_file: app.py
|
| 9 |
pinned: false
|
| 10 |
+
license: mit
|
| 11 |
---
|
| 12 |
|
| 13 |
+
# Text Adventure Agent Submission
|
| 14 |
+
|
| 15 |
+
## Overview
|
| 16 |
+
|
| 17 |
+
This is my submission for the Text Adventure Agent assignment. My agent uses the ReAct pattern to play text adventure games via MCP.
|
| 18 |
+
|
| 19 |
+
## Approach
|
| 20 |
+
|
| 21 |
+
<!-- Describe your approach here -->
|
| 22 |
+
|
| 23 |
+
- What strategy does your agent use?
|
| 24 |
+
- What tools did you implement in your MCP server?
|
| 25 |
+
- Any interesting techniques or optimizations?
|
| 26 |
+
|
| 27 |
+
## Files
|
| 28 |
+
|
| 29 |
+
| File | Description |
|
| 30 |
+
|------|-------------|
|
| 31 |
+
| `agent.py` | ReAct agent with `StudentAgent` class |
|
| 32 |
+
| `mcp_server.py` | MCP server with game interaction tools |
|
| 33 |
+
| `app.py` | Gradio interface for HF Space |
|
| 34 |
+
| `requirements.txt` | Additional dependencies |
|
| 35 |
+
|
| 36 |
+
## How to Submit
|
| 37 |
+
|
| 38 |
+
1. Fork the template Space: `https://huggingface.co/spaces/LLM-course/text-adventure-template`
|
| 39 |
+
2. Clone your fork locally
|
| 40 |
+
3. Implement your agent in `agent.py` and `mcp_server.py`
|
| 41 |
+
4. Test locally (see below)
|
| 42 |
+
5. Push your changes to your Space
|
| 43 |
+
6. Submit your Space URL on the course platform
|
| 44 |
+
|
| 45 |
+
## Local Testing
|
| 46 |
+
|
| 47 |
+
```bash
|
| 48 |
+
# Install dependencies
|
| 49 |
+
pip install -r requirements.txt
|
| 50 |
+
|
| 51 |
+
# Test the MCP server interactively
|
| 52 |
+
fastmcp dev mcp_server.py
|
| 53 |
+
|
| 54 |
+
# Run your agent on a game
|
| 55 |
+
python run_agent.py --agent . --game lostpig -v -n 20
|
| 56 |
+
|
| 57 |
+
# Run evaluation
|
| 58 |
+
python -m evaluation.evaluate -s . -g lostpig -t 3
|
| 59 |
+
```
|
agent.py
ADDED
|
@@ -0,0 +1,1996 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Student Agent for Text Adventure Games
|
| 3 |
+
|
| 4 |
+
This is your submission file. Implement the StudentAgent class to play
|
| 5 |
+
text adventure games using the MCP server you also implement.
|
| 6 |
+
|
| 7 |
+
Your agent should:
|
| 8 |
+
1. Connect to the MCP server via the provided client
|
| 9 |
+
2. Use the ReAct pattern (Thought -> Action -> Observation)
|
| 10 |
+
3. Call MCP tools to interact with the game
|
| 11 |
+
4. Maximize the game score within the step limit
|
| 12 |
+
|
| 13 |
+
Required method:
|
| 14 |
+
async def run(self, client, game, max_steps, seed, verbose) -> RunResult
|
| 15 |
+
|
| 16 |
+
The 'client' is a FastMCP Client already connected to your MCP server.
|
| 17 |
+
Use it to call tools like: await client.call_tool("play_action", {"action": "look"})
|
| 18 |
+
|
| 19 |
+
Tips:
|
| 20 |
+
- Start by looking around and understanding your environment
|
| 21 |
+
- Keep track of visited locations to avoid loops
|
| 22 |
+
- Pick up useful items (lamp, sword, etc.)
|
| 23 |
+
- The seed parameter should be used to set your LLM's seed for reproducibility
|
| 24 |
+
"""
|
| 25 |
+
|
| 26 |
+
import json
|
| 27 |
+
import os
|
| 28 |
+
import re
|
| 29 |
+
from dataclasses import dataclass, field
|
| 30 |
+
from typing import Optional
|
| 31 |
+
|
| 32 |
+
from collections import deque
|
| 33 |
+
|
| 34 |
+
from dotenv import load_dotenv
|
| 35 |
+
from huggingface_hub import InferenceClient
|
| 36 |
+
|
| 37 |
+
# Load environment variables
|
| 38 |
+
load_dotenv()
|
| 39 |
+
|
| 40 |
+
# =============================================================================
|
| 41 |
+
# LLM Configuration - DO NOT MODIFY
|
| 42 |
+
# =============================================================================
|
| 43 |
+
|
| 44 |
+
# Model to use (fixed for fair evaluation)
|
| 45 |
+
LLM_MODEL = "Qwen/Qwen2.5-72B-Instruct"
|
| 46 |
+
|
| 47 |
+
# Initialize the LLM client (uses HF_TOKEN from environment)
|
| 48 |
+
_hf_token = os.getenv("HF_TOKEN")
|
| 49 |
+
if not _hf_token:
|
| 50 |
+
raise ValueError("HF_TOKEN not found. Set it in your .env file.")
|
| 51 |
+
|
| 52 |
+
LLM_CLIENT = InferenceClient(token=_hf_token)
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def call_llm(prompt: str, system_prompt: str, seed: int, max_tokens: int = 300) -> str:
|
| 56 |
+
"""
|
| 57 |
+
Call the LLM with the given prompt. Use this function in your agent.
|
| 58 |
+
|
| 59 |
+
Args:
|
| 60 |
+
prompt: The user prompt (current game state, history, etc.)
|
| 61 |
+
system_prompt: The system prompt (instructions for the agent)
|
| 62 |
+
seed: Random seed for reproducibility
|
| 63 |
+
max_tokens: Maximum tokens in response (default: 300)
|
| 64 |
+
|
| 65 |
+
Returns:
|
| 66 |
+
The LLM's response text
|
| 67 |
+
|
| 68 |
+
Example:
|
| 69 |
+
response = call_llm(
|
| 70 |
+
prompt="You are in a forest. What do you do?",
|
| 71 |
+
system_prompt=SYSTEM_PROMPT,
|
| 72 |
+
seed=42,
|
| 73 |
+
)
|
| 74 |
+
"""
|
| 75 |
+
messages = [
|
| 76 |
+
{"role": "system", "content": system_prompt},
|
| 77 |
+
{"role": "user", "content": prompt},
|
| 78 |
+
]
|
| 79 |
+
|
| 80 |
+
response = LLM_CLIENT.chat.completions.create(
|
| 81 |
+
model=LLM_MODEL,
|
| 82 |
+
messages=messages,
|
| 83 |
+
temperature=0.0, # Deterministic for reproducibility
|
| 84 |
+
max_tokens=max_tokens,
|
| 85 |
+
seed=seed,
|
| 86 |
+
)
|
| 87 |
+
|
| 88 |
+
return response.choices[0].message.content
|
| 89 |
+
|
| 90 |
+
SYNTH_SYSTEM = """You are a memory manager for a Zork-like agent.
|
| 91 |
+
Your job: compress recent experience into DECISION-USEFUL memory: what happened AND why it matters.
|
| 92 |
+
|
| 93 |
+
Return ONLY valid JSON with keys:
|
| 94 |
+
facts, blocking, inventory_goals, open_threads, visited, last_update_move.
|
| 95 |
+
|
| 96 |
+
Rules:
|
| 97 |
+
- Each entry must be <= 12 words, start with a strong noun/verb, no filler.
|
| 98 |
+
- Prefer durable, actionable info: "X is locked -> need key" beats "saw a door".
|
| 99 |
+
- Do NOT restate raw room description unless it implies a new affordance.
|
| 100 |
+
- Track failed attempts as blocking only if they should NOT be retried.
|
| 101 |
+
- If something changed (inventory, location access, score), capture the consequence.
|
| 102 |
+
- Deduplicate aggressively across all lists.
|
| 103 |
+
- Keep each list length <= 6. Keep only highest-value items.
|
| 104 |
+
|
| 105 |
+
Interpretation guide:
|
| 106 |
+
facts: stable world knowledge learned (locations, items, mechanics).
|
| 107 |
+
blocking: obstacles + what is needed; include "avoid retry" if relevant.
|
| 108 |
+
inventory_goals: items/tools to seek next (lamp, key, etc.).
|
| 109 |
+
open_threads: unresolved leads worth returning to.
|
| 110 |
+
visited: important locations only (not every room).
|
| 111 |
+
last_update_move: copy STATE_MINIMAL.moves if present; else use prior value.
|
| 112 |
+
"""
|
| 113 |
+
|
| 114 |
+
def build_synth_prompt(mem_json, recent_history, state_obj):
|
| 115 |
+
state_obj = state_obj or {}
|
| 116 |
+
minimal_state = {
|
| 117 |
+
"location": state_obj.get("location"),
|
| 118 |
+
"moves": state_obj.get("moves"),
|
| 119 |
+
"score": state_obj.get("score"),
|
| 120 |
+
"inventory": state_obj.get("inventory"),
|
| 121 |
+
"visible_objects": state_obj.get("visible_objects"),
|
| 122 |
+
"last_observation_head": (state_obj.get("last_observation") or "")[:220],
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
return f"""
|
| 126 |
+
CURRENT_MEMORY_JSON:
|
| 127 |
+
{json.dumps(mem_json, ensure_ascii=False)}
|
| 128 |
+
|
| 129 |
+
RECENT_STEPS (action -> observation head):
|
| 130 |
+
{recent_history}
|
| 131 |
+
|
| 132 |
+
STATE_MINIMAL (json):
|
| 133 |
+
{json.dumps(minimal_state, ensure_ascii=False)}
|
| 134 |
+
|
| 135 |
+
Update the memory JSON. Only output JSON.
|
| 136 |
+
"""
|
| 137 |
+
|
| 138 |
+
PLANNER_SYSTEM = """You are an objective planner for a Zork-like agent.
|
| 139 |
+
You DO NOT act in the game. You only output a plan.
|
| 140 |
+
|
| 141 |
+
Return ONLY valid JSON with keys:
|
| 142 |
+
- objectives: list of {type, description, priority, status, evidence}
|
| 143 |
+
- suggested_actions: list of strings (game commands)
|
| 144 |
+
- notes: short string
|
| 145 |
+
|
| 146 |
+
Rules:
|
| 147 |
+
- Keep objectives <= 8, deduplicate, prefer durable goals.
|
| 148 |
+
- priority: 0 (highest) .. 5 (lowest)
|
| 149 |
+
- status: "open" | "done" | "blocked"
|
| 150 |
+
- evidence: <= 12 words
|
| 151 |
+
- suggested_actions: max 3 actions; MUST respect the agent command grammar.
|
| 152 |
+
- Use short nouns from observation (mailbox, leaflet, grating, egg, etc.)
|
| 153 |
+
- If valid_actions_list is provided, prefer actions from it exactly.
|
| 154 |
+
"""
|
| 155 |
+
|
| 156 |
+
def build_planner_prompt(
|
| 157 |
+
observation: str,
|
| 158 |
+
state_obj: dict,
|
| 159 |
+
synth_memory: dict,
|
| 160 |
+
objectives_text: str,
|
| 161 |
+
valid_actions_list: list[str],
|
| 162 |
+
tried_here: list[str],
|
| 163 |
+
) -> str:
|
| 164 |
+
return f"""
|
| 165 |
+
OBSERVATION:
|
| 166 |
+
{observation}
|
| 167 |
+
|
| 168 |
+
STATE (json):
|
| 169 |
+
{json.dumps(state_obj or {}, ensure_ascii=False)}
|
| 170 |
+
|
| 171 |
+
SYNTH_MEMORY (json):
|
| 172 |
+
{json.dumps(synth_memory or {}, ensure_ascii=False)}
|
| 173 |
+
|
| 174 |
+
CURRENT_OBJECTIVES (text):
|
| 175 |
+
{objectives_text}
|
| 176 |
+
|
| 177 |
+
VALID_ACTIONS_LIST:
|
| 178 |
+
{json.dumps(valid_actions_list or [], ensure_ascii=False)}
|
| 179 |
+
|
| 180 |
+
TRIED_ACTIONS_HERE:
|
| 181 |
+
{json.dumps(tried_here or [], ensure_ascii=False)}
|
| 182 |
+
|
| 183 |
+
Update objectives and propose up to 3 suggested_actions.
|
| 184 |
+
Output ONLY JSON.
|
| 185 |
+
"""
|
| 186 |
+
|
| 187 |
+
|
| 188 |
+
@dataclass
|
| 189 |
+
class RunResult:
|
| 190 |
+
"""Result of running the agent. Do not modify this class."""
|
| 191 |
+
final_score: int
|
| 192 |
+
max_score: int
|
| 193 |
+
moves: int
|
| 194 |
+
locations_visited: set[str]
|
| 195 |
+
game_completed: bool
|
| 196 |
+
error: Optional[str] = None
|
| 197 |
+
history: list[tuple[str, str, str]] = field(default_factory=list)
|
| 198 |
+
|
| 199 |
+
|
| 200 |
+
@dataclass
|
| 201 |
+
class Objective:
|
| 202 |
+
id: str
|
| 203 |
+
type: str # "explore", "get_item", "unlock", "solve", "return"
|
| 204 |
+
description: str
|
| 205 |
+
priority: int # 0 = top
|
| 206 |
+
status: str # "open" | "done" | "blocked"
|
| 207 |
+
evidence: list[str] # traces courtes
|
| 208 |
+
|
| 209 |
+
class ObjectiveManager:
|
| 210 |
+
def __init__(self):
|
| 211 |
+
self.objectives = []
|
| 212 |
+
self.counter = 0
|
| 213 |
+
|
| 214 |
+
def add(self, type_, desc, priority, evidence=""):
|
| 215 |
+
oid = f"obj{self.counter}"
|
| 216 |
+
self.counter += 1
|
| 217 |
+
self.objectives.append(Objective(oid, type_, desc, priority, "open", [evidence] if evidence else []))
|
| 218 |
+
|
| 219 |
+
def update_from_observation(self, obs: str, state_obj: dict):
|
| 220 |
+
low = (obs or "").lower()
|
| 221 |
+
vis = [str(x).lower() for x in (state_obj.get("visible_objects") or [])]
|
| 222 |
+
inv = " ".join([str(x).lower() for x in (state_obj.get("inventory") or [])])
|
| 223 |
+
|
| 224 |
+
# darkness
|
| 225 |
+
if "dark" in low and "lamp" not in inv and "lantern" not in inv:
|
| 226 |
+
if not self._has_open("get_item", "lamp"):
|
| 227 |
+
self.add("get_item", "Find a lamp/lantern", priority=0, evidence="It is dark")
|
| 228 |
+
|
| 229 |
+
# grating locked
|
| 230 |
+
if "grating" in low and "locked" in low:
|
| 231 |
+
if not self._has_open("get_item", "key"):
|
| 232 |
+
self.add("get_item", "Find a key", priority=1, evidence="Grating locked")
|
| 233 |
+
if not self._has_open("unlock", "grating"):
|
| 234 |
+
self.add("unlock", "Unlock the grating", priority=2, evidence="Grating locked")
|
| 235 |
+
|
| 236 |
+
# containers
|
| 237 |
+
for c in ["mailbox", "chest", "box"]:
|
| 238 |
+
if c in low and not self._has_open("open", c):
|
| 239 |
+
self.add("open", f"Open the {c}", priority=2, evidence=f"Seen {c}")
|
| 240 |
+
|
| 241 |
+
# visited
|
| 242 |
+
loc = (state_obj.get("location") or "").strip()
|
| 243 |
+
if loc and not self._has_open("visit", loc) and not self._has_done("visit", loc):
|
| 244 |
+
# pas forcément un "objectif", mais utile si tu veux "return"
|
| 245 |
+
pass
|
| 246 |
+
|
| 247 |
+
def propose_actions(self, state_obj: dict, valid_actions_list: list[str]) -> list[str]:
|
| 248 |
+
"""Return ordered action candidates."""
|
| 249 |
+
cands = []
|
| 250 |
+
# Sort objectives by priority then FIFO
|
| 251 |
+
open_objs = sorted([o for o in self.objectives if o.status == "open"], key=lambda o: (o.priority, o.id))
|
| 252 |
+
|
| 253 |
+
for o in open_objs[:3]:
|
| 254 |
+
if o.type == "get_item":
|
| 255 |
+
target = "lamp" if "lamp" in o.description.lower() else "key"
|
| 256 |
+
# propose "take lamp" if visible
|
| 257 |
+
vis = [str(x).lower() for x in (state_obj.get("visible_objects") or [])]
|
| 258 |
+
if target in vis:
|
| 259 |
+
cands.append(f"take {target}")
|
| 260 |
+
elif o.type == "open":
|
| 261 |
+
noun = o.description.split()[-1]
|
| 262 |
+
cands.append(f"open {noun}")
|
| 263 |
+
elif o.type == "unlock":
|
| 264 |
+
noun = o.description.split()[-1]
|
| 265 |
+
cands.append(f"unlock {noun}")
|
| 266 |
+
|
| 267 |
+
if valid_actions_list:
|
| 268 |
+
va = {re.sub(r"\s+", " ", a.strip().lower()) for a in valid_actions_list}
|
| 269 |
+
cands = [a for a in cands if re.sub(r"\s+", " ", a.strip().lower()) in va]
|
| 270 |
+
return cands
|
| 271 |
+
|
| 272 |
+
def _has_open(self, type_, keyword):
|
| 273 |
+
k = keyword.lower()
|
| 274 |
+
return any(o.status=="open" and o.type==type_ and k in o.description.lower() for o in self.objectives)
|
| 275 |
+
|
| 276 |
+
def _has_done(self, type_, keyword):
|
| 277 |
+
k = keyword.lower()
|
| 278 |
+
return any(o.status=="done" and o.type==type_ and k in o.description.lower() for o in self.objectives)
|
| 279 |
+
|
| 280 |
+
def render(self, k: int = 6) -> str:
|
| 281 |
+
open_objs = sorted([o for o in self.objectives if o.status == "open"], key=lambda o: (o.priority, o.id))
|
| 282 |
+
if not open_objs:
|
| 283 |
+
return "- (none)"
|
| 284 |
+
lines = []
|
| 285 |
+
for o in open_objs[:k]:
|
| 286 |
+
lines.append(f"- [{o.priority}] {o.type}: {o.description}")
|
| 287 |
+
return "\n".join(lines)
|
| 288 |
+
|
| 289 |
+
def mark_done_if_progress(self, before_state: dict, after_state: dict, action: str, obs: str):
|
| 290 |
+
a = (action or "").lower().strip()
|
| 291 |
+
low = (obs or "").lower()
|
| 292 |
+
|
| 293 |
+
# mark "open X" as done if it didn't say "closed/locked/can't"
|
| 294 |
+
if a.startswith("open "):
|
| 295 |
+
noun = a.split(" ", 1)[1]
|
| 296 |
+
if "can't" not in low and "locked" not in low and "does not" not in low:
|
| 297 |
+
for o in self.objectives:
|
| 298 |
+
if o.status == "open" and o.type == "open" and noun in o.description.lower():
|
| 299 |
+
o.status = "done"
|
| 300 |
+
|
| 301 |
+
def replace_from_llm(self, llm_objectives: list[dict]):
|
| 302 |
+
"""
|
| 303 |
+
Replace internal objectives with the list coming from the planner LLM.
|
| 304 |
+
Expected dict keys: type, description, priority, status, evidence
|
| 305 |
+
"""
|
| 306 |
+
self.objectives = []
|
| 307 |
+
self.counter = 0
|
| 308 |
+
|
| 309 |
+
if not llm_objectives:
|
| 310 |
+
return
|
| 311 |
+
|
| 312 |
+
for o in llm_objectives[:12]:
|
| 313 |
+
try:
|
| 314 |
+
type_ = str(o.get("type", "explore")).strip()
|
| 315 |
+
desc = str(o.get("description", "")).strip()
|
| 316 |
+
if not desc:
|
| 317 |
+
continue
|
| 318 |
+
pr = int(o.get("priority", 3))
|
| 319 |
+
st = str(o.get("status", "open")).strip()
|
| 320 |
+
ev = o.get("evidence", "")
|
| 321 |
+
if isinstance(ev, list):
|
| 322 |
+
evidence = [str(x)[:80] for x in ev[:2]]
|
| 323 |
+
else:
|
| 324 |
+
evidence = [str(ev)[:80]] if ev else []
|
| 325 |
+
|
| 326 |
+
oid = f"obj{self.counter}"
|
| 327 |
+
self.counter += 1
|
| 328 |
+
self.objectives.append(Objective(
|
| 329 |
+
id=oid,
|
| 330 |
+
type=type_,
|
| 331 |
+
description=desc,
|
| 332 |
+
priority=max(0, min(pr, 5)),
|
| 333 |
+
status=st if st in {"open", "done", "blocked"} else "open",
|
| 334 |
+
evidence=evidence
|
| 335 |
+
))
|
| 336 |
+
except Exception:
|
| 337 |
+
continue
|
| 338 |
+
|
| 339 |
+
|
| 340 |
+
# =============================================================================
|
| 341 |
+
# System Prompt - Customize this for your agent
|
| 342 |
+
# =============================================================================
|
| 343 |
+
|
| 344 |
+
SYSTEM_PROMPT = """You are playing a Zork-style text adventure.
|
| 345 |
+
|
| 346 |
+
GOAL:
|
| 347 |
+
Explore, solve puzzles, collect treasures, maximize score.
|
| 348 |
+
|
| 349 |
+
YOU CONTROL THE GAME ONLY USING TOOLS.
|
| 350 |
+
You never speak to the game directly.
|
| 351 |
+
|
| 352 |
+
============================================================
|
| 353 |
+
TOOLS (ONLY THESE EXIST)
|
| 354 |
+
- play_action
|
| 355 |
+
- memory
|
| 356 |
+
- get_map
|
| 357 |
+
- inventory
|
| 358 |
+
- valid_actions
|
| 359 |
+
- tried_actions
|
| 360 |
+
- hint
|
| 361 |
+
- state
|
| 362 |
+
- exits
|
| 363 |
+
- graph
|
| 364 |
+
- checkpoint_save
|
| 365 |
+
- checkpoint_restore
|
| 366 |
+
- action_probe
|
| 367 |
+
|
| 368 |
+
ARGS RULE:
|
| 369 |
+
- play_action -> {"action": "<command>"}
|
| 370 |
+
- checkpoint_save/checkpoint_restore -> {"name": "<string>"} (optional)
|
| 371 |
+
- action_probe -> {"action": "<command>"}
|
| 372 |
+
- all others -> {}
|
| 373 |
+
|
| 374 |
+
ABSOLUTE TOOL RULE:
|
| 375 |
+
TOOL must be exactly one of the 13 names above.
|
| 376 |
+
Everything else (look, north, open mailbox, etc.) is a GAME COMMAND used only with play_action.
|
| 377 |
+
============================================================
|
| 378 |
+
|
| 379 |
+
OUTPUT FORMAT (MANDATORY, EXACT)
|
| 380 |
+
THOUGHT: <1 short sentence>
|
| 381 |
+
TOOL: <tool_name>
|
| 382 |
+
ARGS: <json>
|
| 383 |
+
|
| 384 |
+
============================================================
|
| 385 |
+
COMMAND GRAMMAR
|
| 386 |
+
Normally, your play_action command CAN be one of:
|
| 387 |
+
|
| 388 |
+
A) Movement (single word only):
|
| 389 |
+
north / south / east / west / up / down / in / out / northeast / northwest / southeast / southwest
|
| 390 |
+
|
| 391 |
+
B) Simple verb + noun (2–3 words max):
|
| 392 |
+
look
|
| 393 |
+
inventory
|
| 394 |
+
take <noun>
|
| 395 |
+
drop <noun>
|
| 396 |
+
open <noun>
|
| 397 |
+
examine <noun>
|
| 398 |
+
read <noun>
|
| 399 |
+
climb <noun>
|
| 400 |
+
enter <noun>
|
| 401 |
+
pull <noun>
|
| 402 |
+
push <noun>
|
| 403 |
+
unlock <noun>
|
| 404 |
+
|
| 405 |
+
FORBIDDEN (never use):
|
| 406 |
+
- "look around"
|
| 407 |
+
- "go north", "go west", "go northwest"
|
| 408 |
+
- "look south"
|
| 409 |
+
- placeholders like "<item>", "<thing>", "<object>"
|
| 410 |
+
|
| 411 |
+
SPECIAL EXCEPTION:
|
| 412 |
+
If (and only if) you previously called valid_actions, you may use a multi-word command ONLY if it appears EXACTLY in that valid_actions list.
|
| 413 |
+
Example: if valid_actions includes "go around forest", then you may use "go around forest".
|
| 414 |
+
Otherwise, do not invent it.
|
| 415 |
+
|
| 416 |
+
NOUN RULE:
|
| 417 |
+
Use the shortest noun from the latest observation (egg, nest, tree, grating, mailbox, leaflet).
|
| 418 |
+
Do not invent adjectives (say "egg", not "jewel-encrusted egg").
|
| 419 |
+
============================================================
|
| 420 |
+
|
| 421 |
+
TURN POLICY (ANTI-SPAM)
|
| 422 |
+
- Default tool is play_action.
|
| 423 |
+
- Do NOT call valid_actions unless you hit an error or you are stuck.
|
| 424 |
+
- Do NOT call memory unless confused. Never call memory twice in a row.
|
| 425 |
+
- get_map is occasional (only if lost).
|
| 426 |
+
- Call tried_actions only when stuck/looping OR when you have valid_actions and you want to pick a NEW action not yet tried in this location.
|
| 427 |
+
- Call hint when you are stuck or after a parser failure / loop OR after a special description with new possibilities.
|
| 428 |
+
|
| 429 |
+
============================================================
|
| 430 |
+
TREASURE RULE (CRITICAL)
|
| 431 |
+
If you see something valuable/rare (jewels, gold, treasure, ornate, precious, encrusted, crystal, egg, crown, painting):
|
| 432 |
+
YOUR NEXT ACTION MUST BE: take <item>.
|
| 433 |
+
Secure it first. Open/examine later.
|
| 434 |
+
|
| 435 |
+
If you try open/examine and the game says locked / no tools / no expertise:
|
| 436 |
+
STOP trying. KEEP the item. Leave to search for tools/keys elsewhere.
|
| 437 |
+
Do not retry the same blocked action.
|
| 438 |
+
|
| 439 |
+
============================================================
|
| 440 |
+
LOCAL BEFORE LEAVING (CRITICAL)
|
| 441 |
+
When entering a location:
|
| 442 |
+
1) If full description is not shown, do: look
|
| 443 |
+
2) Interact locally ONCE with the most important object(s):
|
| 444 |
+
- take treasure
|
| 445 |
+
- open container
|
| 446 |
+
- examine new object
|
| 447 |
+
3) Only then move.
|
| 448 |
+
|
| 449 |
+
============================================================
|
| 450 |
+
VALID_ACTIONS EXPLORATION (IMPORTANT)
|
| 451 |
+
When you have a valid_actions list for the current location:
|
| 452 |
+
- Before leaving the location, try at most 1–2 NEW high-value actions from that list that you have not tried here yet.
|
| 453 |
+
- High-value actions (try in this order): take*, open*, unlock*, enter*, climb*, up, down, pull*, push*, read*, examine*.
|
| 454 |
+
- Avoid low-value management actions unless clearly needed: "put down ...", "put ... in ...", "close ...".
|
| 455 |
+
- Never repeat the same action in the same location if it produced no progress or an error message.
|
| 456 |
+
- Exception: you may retry an action ONLY if your inventory has changed since the last attempt.
|
| 457 |
+
- Use tried_actions to know which actions you already attempted in this location.
|
| 458 |
+
|
| 459 |
+
============================================================
|
| 460 |
+
EXPLICIT POSSIBILITY OVERRIDE (CRITICAL)
|
| 461 |
+
|
| 462 |
+
If the observation explicitly says something is possible/available
|
| 463 |
+
(e.g., "It is possible to climb down", "You can enter", "A door leads ..."):
|
| 464 |
+
TRY the corresponding canonical command EVEN IF it is not listed in valid_actions.
|
| 465 |
+
|
| 466 |
+
Mapping (canonical):
|
| 467 |
+
- "possible to climb down" / "climb down" / "descend" -> down
|
| 468 |
+
- "possible to climb up" / "climb up" / "ascend" -> up
|
| 469 |
+
- "possible to enter" / "you can enter" / "way in" / "entrance" -> in
|
| 470 |
+
- "possible to leave" / "way out" -> out
|
| 471 |
+
|
| 472 |
+
Do this only once per location; if it fails, do not spam it—switch strategy or call valid_actions.
|
| 473 |
+
|
| 474 |
+
============================================================
|
| 475 |
+
MOVEMENT PRIORITY (IMPORTANT)
|
| 476 |
+
|
| 477 |
+
If you decide to MOVE and multiple movement actions are available, use this priority order:
|
| 478 |
+
|
| 479 |
+
1) Prefer "in", then "up", then "down" (these often unlock new areas/puzzles).
|
| 480 |
+
2) Then prefer a movement you have NOT tried recently from this location.
|
| 481 |
+
3) Only then choose cardinal directions: north / east / south / west (and diagonals if present).
|
| 482 |
+
|
| 483 |
+
Notes:
|
| 484 |
+
- This is only a preference when you are moving (not a rule to always move).
|
| 485 |
+
- If you just arrived in a room, follow LOCAL BEFORE LEAVING first (look + one local interaction), then move.
|
| 486 |
+
|
| 487 |
+
Examples:
|
| 488 |
+
- If valid_actions includes: in, up, north, east -> choose "in" (unless you just tried it and it failed).
|
| 489 |
+
- If valid_actions includes: up, north, south -> choose "up" (unless you just tried it and it failed).
|
| 490 |
+
|
| 491 |
+
============================================================
|
| 492 |
+
EXAMINE POLICY (ANTI-SPAM, CRITICAL)
|
| 493 |
+
|
| 494 |
+
- Do NOT use "examine X" as a default action.
|
| 495 |
+
- Use "examine X" ONLY if:
|
| 496 |
+
A) X is NEW in the latest observation, OR
|
| 497 |
+
B) X looks interactive/blocking (door, window, grating, trapdoor, gate, chest, mailbox, leaves/pile, rope, lever, button), OR
|
| 498 |
+
C) you just got a blocking message ("locked", "not enough to allow entry", "can't", etc.) and you need more detail.
|
| 499 |
+
|
| 500 |
+
- If the game replied "nothing special" (or equivalent) for the same object at the same location:
|
| 501 |
+
DO NOT examine it again there. Change strategy (open/take/pull/enter/move).
|
| 502 |
+
|
| 503 |
+
- Informational items (leaflet, note, inscription):
|
| 504 |
+
Read/examine ONCE, then ignore. Never put them in containers.
|
| 505 |
+
============================================================
|
| 506 |
+
|
| 507 |
+
============================================================
|
| 508 |
+
ERROR RECOVERY (CRITICAL)
|
| 509 |
+
If the game replies:
|
| 510 |
+
- "I don't know the word ..."
|
| 511 |
+
- "That sentence isn't one I recognize"
|
| 512 |
+
- "You can't see any X here"
|
| 513 |
+
- "locked" / "no tools"
|
| 514 |
+
|
| 515 |
+
Then:
|
| 516 |
+
1) Do NOT repeat the same command.
|
| 517 |
+
2) Simplify: shorter noun, simpler verb (look / examine <noun> / take <noun>).
|
| 518 |
+
3) If still stuck: call valid_actions {} ONCE, then pick ONE action from that list.
|
| 519 |
+
|
| 520 |
+
============================================================
|
| 521 |
+
ANTI-LOOP (CRITICAL)
|
| 522 |
+
If you already tried the same interaction with the same object and it gave no progress:
|
| 523 |
+
STOP interacting with that object.
|
| 524 |
+
Move to a new location.
|
| 525 |
+
|
| 526 |
+
Informational items:
|
| 527 |
+
If an item only prints text (like a leaflet), read once then ignore it (do not put in containers, do not shuffle it).
|
| 528 |
+
============================================================
|
| 529 |
+
|
| 530 |
+
EXAMPLES
|
| 531 |
+
|
| 532 |
+
THOUGHT: There is a mailbox; it may contain something.
|
| 533 |
+
TOOL: play_action
|
| 534 |
+
ARGS: {"action": "open mailbox"}
|
| 535 |
+
|
| 536 |
+
THOUGHT: Valuable item spotted; secure it first.
|
| 537 |
+
TOOL: play_action
|
| 538 |
+
ARGS: {"action": "take egg"}
|
| 539 |
+
|
| 540 |
+
THOUGHT: My last command failed; I need valid options.
|
| 541 |
+
TOOL: valid_actions
|
| 542 |
+
ARGS: {}
|
| 543 |
+
|
| 544 |
+
============================================================
|
| 545 |
+
STRATEGY TIPS
|
| 546 |
+
1. Explore systematically, but prefer in/up/down if available; otherwise try one new direction at a time.
|
| 547 |
+
2. Read documents once. Examine only new/blocking/valuable objects.
|
| 548 |
+
3. Use get_map() to track explored locations
|
| 549 |
+
4. Light is essential - find a light source before dark areas!
|
| 550 |
+
5. Manage inventory - you can only carry limited items
|
| 551 |
+
|
| 552 |
+
"""
|
| 553 |
+
|
| 554 |
+
|
| 555 |
+
# =============================================================================
|
| 556 |
+
# Student Agent - IMPLEMENT THIS CLASS
|
| 557 |
+
# =============================================================================
|
| 558 |
+
|
| 559 |
+
class StudentAgent:
|
| 560 |
+
"""
|
| 561 |
+
Your ReAct agent implementation.
|
| 562 |
+
|
| 563 |
+
TODO:
|
| 564 |
+
1. Implement the run() method with the ReAct loop
|
| 565 |
+
2. Parse LLM responses to extract tool calls
|
| 566 |
+
3. Track state and avoid loops
|
| 567 |
+
|
| 568 |
+
Use the provided call_llm() function to interact with the LLM.
|
| 569 |
+
"""
|
| 570 |
+
|
| 571 |
+
def __init__(self):
|
| 572 |
+
"""Initialize your agent here."""
|
| 573 |
+
|
| 574 |
+
# Internal trace (used to build prompts)
|
| 575 |
+
# Each entry: {"thought": str, "tool": str, "args": dict, "result": str}
|
| 576 |
+
self.history: list[dict] = []
|
| 577 |
+
|
| 578 |
+
# Stats/state (for RunResult)
|
| 579 |
+
self.locations_visited: set[str] = set()
|
| 580 |
+
|
| 581 |
+
# Track room changes (so we can reset recommendation cache on new rooms)
|
| 582 |
+
self._last_room_line: str | None = None
|
| 583 |
+
|
| 584 |
+
# Keep last play_action to prevent trivial repeats
|
| 585 |
+
self._last_action: str | None = None
|
| 586 |
+
|
| 587 |
+
# Loop detection based on server state hash
|
| 588 |
+
self._recent_state_hashes = deque(maxlen=20)
|
| 589 |
+
|
| 590 |
+
# Checkpoint management
|
| 591 |
+
self._checkpoint_enabled = True
|
| 592 |
+
self._checkpoint_best = "best"
|
| 593 |
+
self._checkpoint_loop = "loop"
|
| 594 |
+
self._last_score_seen: int | None = None
|
| 595 |
+
|
| 596 |
+
# synthetic memory
|
| 597 |
+
self.synth_memory = {
|
| 598 |
+
"facts": [],
|
| 599 |
+
"blocking": [],
|
| 600 |
+
"inventory_goals": [],
|
| 601 |
+
"open_threads": [],
|
| 602 |
+
"visited": [],
|
| 603 |
+
"last_update_move": 0
|
| 604 |
+
}
|
| 605 |
+
|
| 606 |
+
# objective manager
|
| 607 |
+
self.objman = ObjectiveManager()
|
| 608 |
+
# LLM planner cache
|
| 609 |
+
self._planner_last_step = 0
|
| 610 |
+
self._planner_cooldown = 5 # run planner at most every 5 steps (tweak)
|
| 611 |
+
self._planner_suggested_actions: list[str] = []
|
| 612 |
+
self._planner_notes: str = ""
|
| 613 |
+
|
| 614 |
+
async def run(
|
| 615 |
+
self,
|
| 616 |
+
client, # FastMCP Client connected to your MCP server
|
| 617 |
+
game: str,
|
| 618 |
+
max_steps: int,
|
| 619 |
+
seed: int,
|
| 620 |
+
verbose: bool = False,
|
| 621 |
+
) -> RunResult:
|
| 622 |
+
"""
|
| 623 |
+
Run the agent for a game session.
|
| 624 |
+
|
| 625 |
+
Args:
|
| 626 |
+
client: FastMCP Client connected to your MCP server
|
| 627 |
+
game: Name of the game being played (e.g., "zork1")
|
| 628 |
+
max_steps: Maximum number of steps to take
|
| 629 |
+
seed: Random seed for reproducibility (use for LLM calls)
|
| 630 |
+
verbose: Whether to print detailed output
|
| 631 |
+
|
| 632 |
+
Returns:
|
| 633 |
+
RunResult with final score and statistics
|
| 634 |
+
"""
|
| 635 |
+
|
| 636 |
+
# Utilities for robustness
|
| 637 |
+
def _tool_text(res) -> str:
|
| 638 |
+
"""
|
| 639 |
+
FastMCP returns different shapes depending on version:
|
| 640 |
+
- sometimes an object with .content[0].text
|
| 641 |
+
- sometimes a list of parts with .text
|
| 642 |
+
- sometimes already a string
|
| 643 |
+
"""
|
| 644 |
+
if res is None:
|
| 645 |
+
return ""
|
| 646 |
+
if isinstance(res, str):
|
| 647 |
+
return res
|
| 648 |
+
if isinstance(res, dict):
|
| 649 |
+
return json.dumps(res)
|
| 650 |
+
# Newer fastmcp style: result.content[0].text
|
| 651 |
+
content = getattr(res, "content", None)
|
| 652 |
+
if content:
|
| 653 |
+
try:
|
| 654 |
+
if isinstance(content, list) and content and hasattr(content[0], "text"):
|
| 655 |
+
return content[0].text or ""
|
| 656 |
+
except Exception:
|
| 657 |
+
pass
|
| 658 |
+
# Older / alternate: list of parts
|
| 659 |
+
if isinstance(res, list) and res:
|
| 660 |
+
try:
|
| 661 |
+
if hasattr(res[0], "text"):
|
| 662 |
+
return res[0].text or ""
|
| 663 |
+
except Exception:
|
| 664 |
+
pass
|
| 665 |
+
# Fallback
|
| 666 |
+
return str(res)
|
| 667 |
+
|
| 668 |
+
|
| 669 |
+
def _extract_location(obs: str) -> str | None:
|
| 670 |
+
"""Heuristic: first plausible room-title line."""
|
| 671 |
+
if not obs:
|
| 672 |
+
return None
|
| 673 |
+
for line in obs.splitlines():
|
| 674 |
+
s = line.strip()
|
| 675 |
+
if not s:
|
| 676 |
+
continue
|
| 677 |
+
low = s.lower()
|
| 678 |
+
|
| 679 |
+
# Skip common headers
|
| 680 |
+
if low.startswith(("copyright", "revision", "serial number")):
|
| 681 |
+
continue
|
| 682 |
+
if "trademark" in low:
|
| 683 |
+
continue
|
| 684 |
+
|
| 685 |
+
# Zork titles: short, not a full sentence
|
| 686 |
+
if len(s) > 50:
|
| 687 |
+
continue
|
| 688 |
+
if s.endswith((".", "!", "?", ":", ";")):
|
| 689 |
+
continue
|
| 690 |
+
|
| 691 |
+
bad_starts = (
|
| 692 |
+
"you ", "it ", "i ", "there ", "the ", "a ", "an ",
|
| 693 |
+
"what ", "can't ", "i don't", "unknown", "error"
|
| 694 |
+
)
|
| 695 |
+
if low.startswith(bad_starts):
|
| 696 |
+
continue
|
| 697 |
+
|
| 698 |
+
return s
|
| 699 |
+
return None
|
| 700 |
+
|
| 701 |
+
def _parse_score_moves_from_memory(mem: str) -> tuple[int | None, int | None]:
|
| 702 |
+
"""Parse lines like 'Score: X' / 'Moves: Y' (best-effort)."""
|
| 703 |
+
if not mem:
|
| 704 |
+
return (None, None)
|
| 705 |
+
score = None
|
| 706 |
+
moves = None
|
| 707 |
+
m = re.search(r"\bScore:\s*(\d+)\b", mem)
|
| 708 |
+
if m:
|
| 709 |
+
score = int(m.group(1))
|
| 710 |
+
m = re.search(r"\bMoves:\s*(\d+)\b", mem)
|
| 711 |
+
if m:
|
| 712 |
+
moves = int(m.group(1))
|
| 713 |
+
return (score, moves)
|
| 714 |
+
|
| 715 |
+
async def _force_valid_actions_feedback(msg: str) -> str:
|
| 716 |
+
"""
|
| 717 |
+
Retourne un feedback + la liste des actions valides (si dispo).
|
| 718 |
+
"""
|
| 719 |
+
va_text = ""
|
| 720 |
+
if "valid_actions" in available_tool_names:
|
| 721 |
+
try:
|
| 722 |
+
va_text = _tool_text(await client.call_tool("valid_actions", {}))
|
| 723 |
+
except Exception as e:
|
| 724 |
+
va_text = f"(valid_actions failed: {e})"
|
| 725 |
+
return f"{msg}\n\nValid actions:\n{va_text}".strip()
|
| 726 |
+
|
| 727 |
+
def should_summarize(step_idx, observation, state_obj):
|
| 728 |
+
if step_idx % 10 == 0:
|
| 729 |
+
return True
|
| 730 |
+
low = (observation or "").lower()
|
| 731 |
+
triggers = [
|
| 732 |
+
"locked", "dark", "can't", "you don't know", "you can't see",
|
| 733 |
+
"grating", "trapdoor", "door", "key", "lamp"
|
| 734 |
+
]
|
| 735 |
+
return any(t in low for t in triggers)
|
| 736 |
+
|
| 737 |
+
def _print_step(step_idx: int, thought: str, tool: str, args: dict):
|
| 738 |
+
if not verbose:
|
| 739 |
+
return
|
| 740 |
+
print("\n" + "─" * 40)
|
| 741 |
+
print(f"Step {step_idx}/{max_steps}")
|
| 742 |
+
print("THOUGHT:", thought)
|
| 743 |
+
print("TOOL:", tool)
|
| 744 |
+
print("ARGS:", args)
|
| 745 |
+
|
| 746 |
+
def _parse_points_from_obs(obs: str) -> tuple[int | None, int | None]:
|
| 747 |
+
"""
|
| 748 |
+
Returns (delta_points, total_points) if present in observation, else (None, None)
|
| 749 |
+
Matches patterns like: "+10 points! (Total: 15)"
|
| 750 |
+
"""
|
| 751 |
+
if not obs:
|
| 752 |
+
return (None, None)
|
| 753 |
+
delta = None
|
| 754 |
+
total = None
|
| 755 |
+
m = re.search(r"\+(\d+)\s*point(?:s)?!", obs)
|
| 756 |
+
if m:
|
| 757 |
+
delta = int(m.group(1))
|
| 758 |
+
m = re.search(r"\(Total:\s*(\d+)\)", obs)
|
| 759 |
+
if m:
|
| 760 |
+
total = int(m.group(1))
|
| 761 |
+
return (delta, total)
|
| 762 |
+
|
| 763 |
+
|
| 764 |
+
# Discover tools
|
| 765 |
+
default_tools = {
|
| 766 |
+
"play_action",
|
| 767 |
+
"memory",
|
| 768 |
+
"inventory",
|
| 769 |
+
"get_map",
|
| 770 |
+
"valid_actions",
|
| 771 |
+
"tried_actions",
|
| 772 |
+
"hint",
|
| 773 |
+
"state",
|
| 774 |
+
"exits",
|
| 775 |
+
"graph",
|
| 776 |
+
"checkpoint_save",
|
| 777 |
+
"checkpoint_restore",
|
| 778 |
+
"action_probe"
|
| 779 |
+
}
|
| 780 |
+
|
| 781 |
+
available_tool_names = set(default_tools)
|
| 782 |
+
if hasattr(client, "list_tools"):
|
| 783 |
+
try:
|
| 784 |
+
tools = await client.list_tools()
|
| 785 |
+
# tools can be list of objects with `.name`
|
| 786 |
+
names = []
|
| 787 |
+
for t in tools or []:
|
| 788 |
+
n = getattr(t, "name", None)
|
| 789 |
+
if n:
|
| 790 |
+
names.append(n.strip().lower())
|
| 791 |
+
if names:
|
| 792 |
+
available_tool_names = set(names)
|
| 793 |
+
except Exception:
|
| 794 |
+
# If list_tools isn't available / fails, keep defaults
|
| 795 |
+
pass
|
| 796 |
+
|
| 797 |
+
# session initialization
|
| 798 |
+
self.history.clear()
|
| 799 |
+
self.locations_visited.clear()
|
| 800 |
+
self._last_room_line = None
|
| 801 |
+
self._last_action = None
|
| 802 |
+
self._recent_state_hashes.clear()
|
| 803 |
+
self._last_score_seen = None
|
| 804 |
+
self.objman = ObjectiveManager()
|
| 805 |
+
|
| 806 |
+
# 1) Initial look
|
| 807 |
+
try:
|
| 808 |
+
res = await client.call_tool("play_action", {"action": "look"})
|
| 809 |
+
observation = _tool_text(res)
|
| 810 |
+
except Exception as e:
|
| 811 |
+
return RunResult(
|
| 812 |
+
final_score=0,
|
| 813 |
+
max_score=350 if game == "zork1" else 0,
|
| 814 |
+
moves=0,
|
| 815 |
+
locations_visited=set(),
|
| 816 |
+
game_completed=False,
|
| 817 |
+
error=f"Initial call_tool failed: {e}",
|
| 818 |
+
history=[],
|
| 819 |
+
)
|
| 820 |
+
|
| 821 |
+
loc = _extract_location(observation)
|
| 822 |
+
if loc:
|
| 823 |
+
self.locations_visited.add(loc)
|
| 824 |
+
self._last_room_line = loc.strip().lower()
|
| 825 |
+
|
| 826 |
+
# Save an initial checkpoint if supported (for loop recovery)
|
| 827 |
+
if self._checkpoint_enabled and "checkpoint_save" in available_tool_names:
|
| 828 |
+
try:
|
| 829 |
+
await client.call_tool("checkpoint_save", {"name": self._checkpoint_loop})
|
| 830 |
+
except Exception:
|
| 831 |
+
pass
|
| 832 |
+
|
| 833 |
+
if verbose:
|
| 834 |
+
print("=" * 60)
|
| 835 |
+
print(f"Starting agent on game={game} | max_steps={max_steps}")
|
| 836 |
+
print("=" * 60)
|
| 837 |
+
print("\nInitial observation:\n", observation)
|
| 838 |
+
|
| 839 |
+
# If we detect loops / no-ops, force valid actions next
|
| 840 |
+
force_valid_actions_next = False
|
| 841 |
+
|
| 842 |
+
# Track run history for grading
|
| 843 |
+
run_history: list[tuple[str, str, str]] = []
|
| 844 |
+
|
| 845 |
+
# We'll keep best-known score/moves
|
| 846 |
+
best_score: int | None = None
|
| 847 |
+
best_moves: int | None = None
|
| 848 |
+
|
| 849 |
+
# anti-loop state
|
| 850 |
+
# actions that produced "no progress" recently in this location
|
| 851 |
+
blocked_actions_by_loc: dict[str, set[str]] = {}
|
| 852 |
+
# recommended actions in this location (to avoid repeating same suggestions)
|
| 853 |
+
recommended_actions_by_loc: dict[str, set[str]] = {}
|
| 854 |
+
|
| 855 |
+
def _result_short(txt: str) -> str:
|
| 856 |
+
return re.sub(r"\s+", " ", (txt or "").strip())[:180]
|
| 857 |
+
|
| 858 |
+
def _cur_loc_key() -> str:
|
| 859 |
+
# use last known room line if available, else fallback to empty
|
| 860 |
+
if self._last_room_line:
|
| 861 |
+
return self._last_room_line
|
| 862 |
+
return "unknown:" + _result_short(observation)
|
| 863 |
+
|
| 864 |
+
def _is_no_progress_result(txt: str) -> bool:
|
| 865 |
+
t = (txt or "").lower()
|
| 866 |
+
triggers = [
|
| 867 |
+
"but thing not happen",
|
| 868 |
+
"not see any way",
|
| 869 |
+
"too heavy",
|
| 870 |
+
"stuck",
|
| 871 |
+
"not happen",
|
| 872 |
+
"that not thing",
|
| 873 |
+
"grunk not see that there",
|
| 874 |
+
"you can't see any",
|
| 875 |
+
"not know how",
|
| 876 |
+
"nothing special",
|
| 877 |
+
]
|
| 878 |
+
return any(x in t for x in triggers)
|
| 879 |
+
|
| 880 |
+
# 2) ReAct loop
|
| 881 |
+
for step_idx in range(1, max_steps + 1):
|
| 882 |
+
memory_text = None
|
| 883 |
+
map_text = None
|
| 884 |
+
valid_actions_text = None
|
| 885 |
+
hint_text = None
|
| 886 |
+
state_text = None
|
| 887 |
+
state_obj = None
|
| 888 |
+
|
| 889 |
+
if "state" in available_tool_names:
|
| 890 |
+
try:
|
| 891 |
+
state_text = _tool_text(await client.call_tool("state", {}))
|
| 892 |
+
state_obj = json.loads(state_text) if state_text else None
|
| 893 |
+
except Exception:
|
| 894 |
+
state_obj = None
|
| 895 |
+
|
| 896 |
+
# update objectives from latest observation/state
|
| 897 |
+
if state_obj and isinstance(state_obj, dict):
|
| 898 |
+
try:
|
| 899 |
+
self.objman.update_from_observation(observation, state_obj)
|
| 900 |
+
except Exception:
|
| 901 |
+
pass
|
| 902 |
+
|
| 903 |
+
# deterministic overrides (before LLM)
|
| 904 |
+
if state_obj and isinstance(state_obj, dict):
|
| 905 |
+
visible = [str(x).lower() for x in (state_obj.get("visible_objects") or [])]
|
| 906 |
+
inv = " ".join([str(x).lower() for x in (state_obj.get("inventory") or [])])
|
| 907 |
+
|
| 908 |
+
# treasure rule (simple keyword scan)
|
| 909 |
+
treasure_words = {"treasure","gold","jewel","jewels","diamond","emerald","ruby","sapphire","crown","painting","egg","crystal"}
|
| 910 |
+
if any(w in visible for w in treasure_words):
|
| 911 |
+
# pick first matching visible object
|
| 912 |
+
item = next((x for x in visible if x in treasure_words), None)
|
| 913 |
+
if item:
|
| 914 |
+
# force play_action take <item>
|
| 915 |
+
tool_name = "play_action"
|
| 916 |
+
tool_args = {"action": f"take {item}"}
|
| 917 |
+
thought = "Valuable item spotted; secure it first."
|
| 918 |
+
# execute immediately (skip LLM)
|
| 919 |
+
_print_step(step_idx, thought, tool_name, tool_args)
|
| 920 |
+
res = await client.call_tool(tool_name, tool_args)
|
| 921 |
+
observation = _tool_text(res)
|
| 922 |
+
self.history.append({"thought": thought, "tool": tool_name, "args": tool_args, "result": observation})
|
| 923 |
+
run_history.append((thought, f"{tool_name} {json.dumps(tool_args)}", observation))
|
| 924 |
+
continue
|
| 925 |
+
|
| 926 |
+
# darkness handling
|
| 927 |
+
obs_low = (state_obj.get("last_observation") or "").lower()
|
| 928 |
+
if "dark" in obs_low and ("lamp" in inv or "lantern" in inv):
|
| 929 |
+
# try turning on if valid_actions allows it, else skip
|
| 930 |
+
# (only do this if you decide to allow multi-word exotic via valid_actions)
|
| 931 |
+
pass
|
| 932 |
+
|
| 933 |
+
# Loop detection using state_hash (server-side)
|
| 934 |
+
if isinstance(state_obj, dict) and state_obj.get("state_hash"):
|
| 935 |
+
h = str(state_obj["state_hash"])
|
| 936 |
+
self._recent_state_hashes.append(h)
|
| 937 |
+
|
| 938 |
+
# If the exact same hash repeats 3 times IN A ROW, we are looping
|
| 939 |
+
if len(self._recent_state_hashes) >= 3 and \
|
| 940 |
+
self._recent_state_hashes[-1] == self._recent_state_hashes[-2] == self._recent_state_hashes[-3]:
|
| 941 |
+
force_valid_actions_next = True
|
| 942 |
+
|
| 943 |
+
# Try to rollback to last checkpoint if possible (loop detected)
|
| 944 |
+
if self._checkpoint_enabled and "checkpoint_restore" in available_tool_names:
|
| 945 |
+
try:
|
| 946 |
+
# --- DEBUG: before restore ---
|
| 947 |
+
if verbose:
|
| 948 |
+
before_score = state_obj.get("score") if isinstance(state_obj, dict) else None
|
| 949 |
+
before_moves = state_obj.get("moves") if isinstance(state_obj, dict) else None
|
| 950 |
+
before_hash = state_obj.get("state_hash") if isinstance(state_obj, dict) else None
|
| 951 |
+
print(
|
| 952 |
+
f"[DEBUG] RESTORE requested checkpoint={self._checkpoint_loop} "
|
| 953 |
+
f"at step={step_idx} (before score={before_score}, moves={before_moves}, hash={before_hash})"
|
| 954 |
+
)
|
| 955 |
+
|
| 956 |
+
# --- restore checkpoint (capture tool output) ---
|
| 957 |
+
restore_res = await client.call_tool("checkpoint_restore", {"name": self._checkpoint_loop})
|
| 958 |
+
if verbose:
|
| 959 |
+
print("[DEBUG] checkpoint_restore result:", _tool_text(restore_res))
|
| 960 |
+
|
| 961 |
+
# --- refresh observation after restore ---
|
| 962 |
+
res = await client.call_tool("play_action", {"action": "look"})
|
| 963 |
+
observation = _tool_text(res)
|
| 964 |
+
self._recent_state_hashes.clear()
|
| 965 |
+
|
| 966 |
+
# --- fetch state AFTER restore (so debug + correct state_obj) ---
|
| 967 |
+
after_state_obj = None
|
| 968 |
+
if "state" in available_tool_names:
|
| 969 |
+
try:
|
| 970 |
+
st = _tool_text(await client.call_tool("state", {}))
|
| 971 |
+
after_state_obj = json.loads(st) if st else None
|
| 972 |
+
if isinstance(after_state_obj, dict):
|
| 973 |
+
state_obj = after_state_obj
|
| 974 |
+
try:
|
| 975 |
+
if isinstance(after_state_obj, dict) and "score" in after_state_obj:
|
| 976 |
+
self._last_score_seen = int(after_state_obj["score"])
|
| 977 |
+
except Exception:
|
| 978 |
+
pass
|
| 979 |
+
if verbose:
|
| 980 |
+
after_score = after_state_obj.get("score") if isinstance(after_state_obj, dict) else None
|
| 981 |
+
after_moves = after_state_obj.get("moves") if isinstance(after_state_obj, dict) else None
|
| 982 |
+
after_hash = after_state_obj.get("state_hash") if isinstance(after_state_obj, dict) else None
|
| 983 |
+
print(
|
| 984 |
+
f"[DEBUG] state after restore: score={after_score}, moves={after_moves}, hash={after_hash}"
|
| 985 |
+
)
|
| 986 |
+
except Exception as e:
|
| 987 |
+
after_state_obj = None
|
| 988 |
+
if verbose:
|
| 989 |
+
print("[DEBUG] state after restore failed:", e)
|
| 990 |
+
|
| 991 |
+
# --- optional: mark objective progress using AFTER state (not before) ---
|
| 992 |
+
try:
|
| 993 |
+
if isinstance(after_state_obj, dict):
|
| 994 |
+
self.objman.mark_done_if_progress({}, after_state_obj, self._last_action or "", observation)
|
| 995 |
+
except Exception:
|
| 996 |
+
pass
|
| 997 |
+
|
| 998 |
+
except Exception as e:
|
| 999 |
+
if verbose:
|
| 1000 |
+
print("[DEBUG] checkpoint_restore block failed:", e)
|
| 1001 |
+
pass
|
| 1002 |
+
|
| 1003 |
+
# Occasional tools (only if available)
|
| 1004 |
+
if step_idx % 10 == 0 and "memory" in available_tool_names:
|
| 1005 |
+
try:
|
| 1006 |
+
memory_text = _tool_text(await client.call_tool("memory", {}))
|
| 1007 |
+
s, m = _parse_score_moves_from_memory(memory_text)
|
| 1008 |
+
if s is not None:
|
| 1009 |
+
best_score = s
|
| 1010 |
+
if m is not None:
|
| 1011 |
+
best_moves = m
|
| 1012 |
+
except Exception:
|
| 1013 |
+
memory_text = None
|
| 1014 |
+
|
| 1015 |
+
if (force_valid_actions_next or step_idx % 25 == 0) and "get_map" in available_tool_names:
|
| 1016 |
+
try:
|
| 1017 |
+
map_text = _tool_text(await client.call_tool("get_map", {}))
|
| 1018 |
+
except Exception:
|
| 1019 |
+
map_text = None
|
| 1020 |
+
|
| 1021 |
+
if force_valid_actions_next and "hint" in available_tool_names:
|
| 1022 |
+
try:
|
| 1023 |
+
hint_text = _tool_text(await client.call_tool("hint", {}))
|
| 1024 |
+
except Exception:
|
| 1025 |
+
hint_text = None
|
| 1026 |
+
|
| 1027 |
+
tried_here_cached: set[str] | None = None
|
| 1028 |
+
# Forced valid_actions on loop / parser failure
|
| 1029 |
+
force_before = force_valid_actions_next
|
| 1030 |
+
if force_valid_actions_next and "valid_actions" in available_tool_names:
|
| 1031 |
+
try:
|
| 1032 |
+
valid_actions_text = _tool_text(await client.call_tool("valid_actions", {}))
|
| 1033 |
+
va_list = self._extract_valid_actions(valid_actions_text)
|
| 1034 |
+
|
| 1035 |
+
tried_here = set()
|
| 1036 |
+
if "tried_actions" in available_tool_names:
|
| 1037 |
+
try:
|
| 1038 |
+
tried_text = _tool_text(await client.call_tool("tried_actions", {}))
|
| 1039 |
+
tried_here = self._extract_tried_actions_for_current_location(tried_text)
|
| 1040 |
+
except Exception:
|
| 1041 |
+
tried_here = set()
|
| 1042 |
+
tried_here_cached = tried_here
|
| 1043 |
+
|
| 1044 |
+
loc_key = _cur_loc_key()
|
| 1045 |
+
blocked_here = blocked_actions_by_loc.setdefault(loc_key, set())
|
| 1046 |
+
recommended_here = recommended_actions_by_loc.setdefault(loc_key, set())
|
| 1047 |
+
|
| 1048 |
+
# candidates = not tried, not blocked, not already recommended (if possible)
|
| 1049 |
+
cands = [a for a in va_list
|
| 1050 |
+
if self._norm(a) not in tried_here
|
| 1051 |
+
and self._norm(a) not in blocked_here
|
| 1052 |
+
and self._norm(a) not in recommended_here]
|
| 1053 |
+
|
| 1054 |
+
if not cands:
|
| 1055 |
+
# we relax the "recommended_here" constraint
|
| 1056 |
+
cands = [a for a in va_list
|
| 1057 |
+
if self._norm(a) not in tried_here
|
| 1058 |
+
and self._norm(a) not in blocked_here]
|
| 1059 |
+
|
| 1060 |
+
if not cands:
|
| 1061 |
+
cands = va_list
|
| 1062 |
+
|
| 1063 |
+
best_act = await self._choose_with_probe(client, cands, available_tool_names) if cands else None
|
| 1064 |
+
if best_act:
|
| 1065 |
+
recommended_here.add(self._norm(best_act))
|
| 1066 |
+
|
| 1067 |
+
# Inject a recommendation BUT do not execute anything
|
| 1068 |
+
valid_actions_text = (
|
| 1069 |
+
valid_actions_text.strip()
|
| 1070 |
+
+ ("\n\nAlready tried here:\n- " + "\n- ".join(sorted(tried_here)) if tried_here else "")
|
| 1071 |
+
+ ("\n\nBlocked here:\n- " + "\n- ".join(sorted(blocked_here)) if blocked_here else "")
|
| 1072 |
+
+ (f"\n\nRECOMMENDED NEXT (choose exactly ONE from valid_actions):\n- {best_act}" if best_act else "")
|
| 1073 |
+
+ "\n\nSYSTEM: If recommended fails, choose a DIFFERENT action from valid_actions."
|
| 1074 |
+
)
|
| 1075 |
+
|
| 1076 |
+
except Exception as e:
|
| 1077 |
+
valid_actions_text = f"(valid_actions failed: {e})"
|
| 1078 |
+
|
| 1079 |
+
force_valid_actions_next = False
|
| 1080 |
+
|
| 1081 |
+
# Build helpers for planner inputs
|
| 1082 |
+
va_list = self._extract_valid_actions(valid_actions_text) if valid_actions_text else []
|
| 1083 |
+
tried_here = []
|
| 1084 |
+
if tried_here_cached is not None:
|
| 1085 |
+
tried_here = sorted(list(tried_here_cached))
|
| 1086 |
+
elif "tried_actions" in available_tool_names:
|
| 1087 |
+
try:
|
| 1088 |
+
tried_text = _tool_text(await client.call_tool("tried_actions", {}))
|
| 1089 |
+
tried_here = sorted(list(self._extract_tried_actions_for_current_location(tried_text)))
|
| 1090 |
+
except Exception:
|
| 1091 |
+
tried_here = []
|
| 1092 |
+
|
| 1093 |
+
# Update synth memory
|
| 1094 |
+
if should_summarize(step_idx, observation, state_obj):
|
| 1095 |
+
recent = self.history[-8:]
|
| 1096 |
+
recent_lines = "\n".join(
|
| 1097 |
+
f"- {h['args'].get('action','')} -> {(h['result'].splitlines()[0] if h['result'] else '')}"
|
| 1098 |
+
for h in recent if h.get("tool") == "play_action"
|
| 1099 |
+
)
|
| 1100 |
+
prompt = build_synth_prompt(self.synth_memory, recent_lines, state_obj or {})
|
| 1101 |
+
try:
|
| 1102 |
+
txt = call_llm(prompt, SYNTH_SYSTEM, seed=seed + 10_000 + step_idx, max_tokens=350)
|
| 1103 |
+
new_mem = json.loads(txt)
|
| 1104 |
+
|
| 1105 |
+
for k in ["facts", "blocking", "inventory_goals", "open_threads", "visited"]:
|
| 1106 |
+
if k not in new_mem or not isinstance(new_mem[k], list):
|
| 1107 |
+
new_mem[k] = []
|
| 1108 |
+
new_mem["last_update_move"] = int((state_obj or {}).get("moves", step_idx))
|
| 1109 |
+
|
| 1110 |
+
self.synth_memory = new_mem
|
| 1111 |
+
except Exception:
|
| 1112 |
+
pass
|
| 1113 |
+
|
| 1114 |
+
# Run planner after synth memory update
|
| 1115 |
+
if self._planner_should_run(step_idx, observation, force_before):
|
| 1116 |
+
self._run_planner_llm(
|
| 1117 |
+
observation=observation,
|
| 1118 |
+
state_obj=state_obj or {},
|
| 1119 |
+
valid_actions_list=va_list,
|
| 1120 |
+
tried_here_list=tried_here,
|
| 1121 |
+
seed=seed,
|
| 1122 |
+
step_idx=step_idx,
|
| 1123 |
+
)
|
| 1124 |
+
|
| 1125 |
+
# Build prompt + call LLM
|
| 1126 |
+
prompt = self._build_prompt(
|
| 1127 |
+
observation=observation,
|
| 1128 |
+
memory_text=memory_text,
|
| 1129 |
+
map_text=map_text,
|
| 1130 |
+
valid_actions_text=valid_actions_text,
|
| 1131 |
+
hint_text=hint_text,
|
| 1132 |
+
)
|
| 1133 |
+
|
| 1134 |
+
va_list = self._extract_valid_actions(valid_actions_text) if valid_actions_text else []
|
| 1135 |
+
|
| 1136 |
+
llm_response = self._call_llm(prompt=prompt, system_prompt=SYSTEM_PROMPT, seed=seed + step_idx)
|
| 1137 |
+
thought, tool_name, tool_args = self._parse_response(llm_response)
|
| 1138 |
+
tool_name = (tool_name or "").strip().lower()
|
| 1139 |
+
|
| 1140 |
+
if valid_actions_text:
|
| 1141 |
+
va_list = self._extract_valid_actions(valid_actions_text)
|
| 1142 |
+
if tool_name == "play_action":
|
| 1143 |
+
act = self._norm(tool_args.get("action", ""))
|
| 1144 |
+
if va_list and not self._is_allowed_exotic(act, va_list) and not self._is_canonical_action(act):
|
| 1145 |
+
# action non-canonique et pas dans valid_actions -> re-trigger recovery
|
| 1146 |
+
force_valid_actions_next = True
|
| 1147 |
+
|
| 1148 |
+
if force_valid_actions_next:
|
| 1149 |
+
observation = "SYSTEM FEEDBACK: Non-canonical action not in valid_actions. Recomputing valid_actions."
|
| 1150 |
+
continue
|
| 1151 |
+
|
| 1152 |
+
# avoid calling tried_actions/valid_actions twice in a row
|
| 1153 |
+
if self.history:
|
| 1154 |
+
last_tool = (self.history[-1].get("tool") or "").strip().lower()
|
| 1155 |
+
if tool_name in {"tried_actions", "valid_actions"} and last_tool == tool_name:
|
| 1156 |
+
observation = (
|
| 1157 |
+
f"SYSTEM FEEDBACK: Do not call {tool_name} twice in a row. "
|
| 1158 |
+
"Choose ONE concrete play_action from the last valid_actions list, or move."
|
| 1159 |
+
)
|
| 1160 |
+
self.history.append({"thought": thought, "tool": tool_name, "args": tool_args, "result": observation})
|
| 1161 |
+
run_history.append((thought, f"{tool_name} {tool_args}", observation))
|
| 1162 |
+
force_valid_actions_next = True
|
| 1163 |
+
continue
|
| 1164 |
+
|
| 1165 |
+
|
| 1166 |
+
# Hard tool validation: if unknown tool, coach and continue without spending a move
|
| 1167 |
+
if tool_name not in available_tool_names:
|
| 1168 |
+
va_text = ""
|
| 1169 |
+
if "valid_actions" in available_tool_names:
|
| 1170 |
+
try:
|
| 1171 |
+
va_text = _tool_text(await client.call_tool("valid_actions", {}))
|
| 1172 |
+
except Exception as e:
|
| 1173 |
+
va_text = f"(valid_actions failed: {e})"
|
| 1174 |
+
|
| 1175 |
+
observation = (
|
| 1176 |
+
"SYSTEM FEEDBACK: You requested an UNKNOWN TOOL.\n"
|
| 1177 |
+
f"Tool must be one of: {', '.join(sorted(available_tool_names))}.\n"
|
| 1178 |
+
"Use play_action with ARGS {\"action\": \"...\"} for game commands.\n\n"
|
| 1179 |
+
f"Suggested valid actions:\n{va_text}"
|
| 1180 |
+
)
|
| 1181 |
+
self.history.append({"thought": thought, "tool": tool_name, "args": tool_args, "result": observation})
|
| 1182 |
+
run_history.append((thought, f"{tool_name} {tool_args}", observation))
|
| 1183 |
+
force_valid_actions_next = True
|
| 1184 |
+
continue
|
| 1185 |
+
|
| 1186 |
+
# Enforce args shape
|
| 1187 |
+
if tool_name not in {"play_action", "checkpoint_save", "checkpoint_restore", "action_probe"}:
|
| 1188 |
+
tool_args = {}
|
| 1189 |
+
|
| 1190 |
+
if tool_name in {"checkpoint_save", "checkpoint_restore"}:
|
| 1191 |
+
if not isinstance(tool_args, dict):
|
| 1192 |
+
tool_args = {}
|
| 1193 |
+
if "name" in tool_args and not isinstance(tool_args["name"], str):
|
| 1194 |
+
tool_args["name"] = "auto"
|
| 1195 |
+
|
| 1196 |
+
if tool_name == "action_probe":
|
| 1197 |
+
if not isinstance(tool_args, dict):
|
| 1198 |
+
tool_args = {"action": "look"}
|
| 1199 |
+
if not isinstance(tool_args.get("action",""), str) or not tool_args["action"].strip():
|
| 1200 |
+
tool_args["action"] = "look"
|
| 1201 |
+
|
| 1202 |
+
# Normalize / validate play_action command a bit
|
| 1203 |
+
if tool_name == "play_action":
|
| 1204 |
+
if not isinstance(tool_args, dict):
|
| 1205 |
+
# au lieu de "look"
|
| 1206 |
+
observation = await _force_valid_actions_feedback(
|
| 1207 |
+
"SYSTEM FEEDBACK: Invalid ARGS for play_action. Call valid_actions and pick ONE exact action."
|
| 1208 |
+
)
|
| 1209 |
+
self.history.append({"thought": thought, "tool": tool_name, "args": tool_args, "result": observation})
|
| 1210 |
+
run_history.append((thought, f"{tool_name} {tool_args}", observation))
|
| 1211 |
+
force_valid_actions_next = True
|
| 1212 |
+
continue
|
| 1213 |
+
|
| 1214 |
+
raw_action = str(tool_args.get("action", "") or "").strip()
|
| 1215 |
+
if not raw_action:
|
| 1216 |
+
# au lieu de "look"
|
| 1217 |
+
observation = await _force_valid_actions_feedback(
|
| 1218 |
+
"SYSTEM FEEDBACK: Missing action. Call valid_actions and pick ONE exact action."
|
| 1219 |
+
)
|
| 1220 |
+
self.history.append({"thought": thought, "tool": tool_name, "args": tool_args, "result": observation})
|
| 1221 |
+
run_history.append((thought, f"{tool_name} {tool_args}", observation))
|
| 1222 |
+
force_valid_actions_next = True
|
| 1223 |
+
continue
|
| 1224 |
+
|
| 1225 |
+
action = self._normalize_action(raw_action) if hasattr(self, "_normalize_action") else raw_action.lower()
|
| 1226 |
+
action = action.strip().lower()
|
| 1227 |
+
|
| 1228 |
+
if not action:
|
| 1229 |
+
observation = await _force_valid_actions_feedback(
|
| 1230 |
+
"SYSTEM FEEDBACK: Empty/invalid action after normalization. Call valid_actions and pick ONE exact action."
|
| 1231 |
+
)
|
| 1232 |
+
self.history.append({"thought": thought, "tool": tool_name, "args": tool_args, "result": observation})
|
| 1233 |
+
run_history.append((thought, f"{tool_name} {tool_args}", observation))
|
| 1234 |
+
force_valid_actions_next = True
|
| 1235 |
+
continue
|
| 1236 |
+
|
| 1237 |
+
tool_args["action"] = action
|
| 1238 |
+
|
| 1239 |
+
# movement priority (in/up/down) when valid_actions known
|
| 1240 |
+
if valid_actions_text and self._is_move(action):
|
| 1241 |
+
va_list = [self._norm(x) for x in self._extract_valid_actions(valid_actions_text)]
|
| 1242 |
+
if action in {"north","south","east","west"}:
|
| 1243 |
+
if any(m in va_list for m in {"in","up","down"}):
|
| 1244 |
+
observation = (
|
| 1245 |
+
"SYSTEM FEEDBACK: Movement priority: prefer in/up/down when available. "
|
| 1246 |
+
"Call valid_actions and pick one of those if present."
|
| 1247 |
+
)
|
| 1248 |
+
self.history.append({"thought": thought, "tool": tool_name, "args": tool_args, "result": observation})
|
| 1249 |
+
run_history.append((thought, f"{tool_name} {tool_args}", observation))
|
| 1250 |
+
force_valid_actions_next = True
|
| 1251 |
+
continue
|
| 1252 |
+
|
| 1253 |
+
|
| 1254 |
+
# If not canonical, allow only if it appears in valid_actions (when available).
|
| 1255 |
+
# If we don't have valid_actions_text yet, fetch it once and decide.
|
| 1256 |
+
if not self._is_canonical_action(action):
|
| 1257 |
+
va_text = valid_actions_text
|
| 1258 |
+
va_list: list[str] = self._extract_valid_actions(va_text) if va_text else []
|
| 1259 |
+
|
| 1260 |
+
if not va_list and "valid_actions" in available_tool_names:
|
| 1261 |
+
try:
|
| 1262 |
+
va_text = _tool_text(await client.call_tool("valid_actions", {}))
|
| 1263 |
+
va_list = self._extract_valid_actions(va_text)
|
| 1264 |
+
except Exception:
|
| 1265 |
+
va_text = None
|
| 1266 |
+
va_list = []
|
| 1267 |
+
|
| 1268 |
+
if not va_list or not self._is_allowed_exotic(action, va_list):
|
| 1269 |
+
# coach and force valid_actions next
|
| 1270 |
+
observation = (
|
| 1271 |
+
"SYSTEM FEEDBACK: Your command is not canonical and is not allowed unless it appears "
|
| 1272 |
+
"EXACTLY in valid_actions. Simplify (look / north / take X / open X / examine X / read X / etc.) "
|
| 1273 |
+
"or call valid_actions then pick ONE action from it.\n"
|
| 1274 |
+
)
|
| 1275 |
+
if va_text:
|
| 1276 |
+
observation += f"\nvalid_actions for this location:\n{va_text}"
|
| 1277 |
+
|
| 1278 |
+
self.history.append({"thought": thought, "tool": tool_name, "args": tool_args, "result": observation})
|
| 1279 |
+
run_history.append((thought, f"{tool_name} {tool_args}", observation))
|
| 1280 |
+
force_valid_actions_next = True
|
| 1281 |
+
continue
|
| 1282 |
+
|
| 1283 |
+
# Simple anti-repeat (soft): if same as last action and we got no new info previously, force valid actions
|
| 1284 |
+
if self._last_action and action.strip().lower() == self._last_action:
|
| 1285 |
+
# Don’t block always; only if last observation looked identical-ish
|
| 1286 |
+
prev_short = (self.history[-1]["result"] if self.history else "")[:200].strip()
|
| 1287 |
+
cur_short = (observation or "")[:200].strip()
|
| 1288 |
+
if prev_short and prev_short == cur_short:
|
| 1289 |
+
observation = (
|
| 1290 |
+
"SYSTEM FEEDBACK: You are repeating the same action with no progress. "
|
| 1291 |
+
"Call valid_actions and choose ONE different action."
|
| 1292 |
+
)
|
| 1293 |
+
self.history.append({"thought": thought, "tool": tool_name, "args": tool_args, "result": observation})
|
| 1294 |
+
run_history.append((thought, f"{tool_name} {tool_args}", observation))
|
| 1295 |
+
force_valid_actions_next = True
|
| 1296 |
+
continue
|
| 1297 |
+
|
| 1298 |
+
# per-location blocked actions (avoid "pull lever" spam etc.)
|
| 1299 |
+
if tool_name == "play_action":
|
| 1300 |
+
loc_key = _cur_loc_key()
|
| 1301 |
+
blocked = blocked_actions_by_loc.setdefault(loc_key, set())
|
| 1302 |
+
act_norm = self._norm(tool_args.get("action", ""))
|
| 1303 |
+
|
| 1304 |
+
if act_norm in blocked:
|
| 1305 |
+
observation = (
|
| 1306 |
+
"SYSTEM FEEDBACK: This action already produced no progress in this location. "
|
| 1307 |
+
"Do NOT repeat it. Call valid_actions and pick a different action (prefer the recommended one)."
|
| 1308 |
+
)
|
| 1309 |
+
self.history.append({"thought": thought, "tool": tool_name, "args": tool_args, "result": observation})
|
| 1310 |
+
run_history.append((thought, f"{tool_name} {tool_args}", observation))
|
| 1311 |
+
force_valid_actions_next = True
|
| 1312 |
+
continue
|
| 1313 |
+
|
| 1314 |
+
if verbose:
|
| 1315 |
+
print("\n" + "─" * 40)
|
| 1316 |
+
print(f"Step {step_idx}/{max_steps}")
|
| 1317 |
+
print("THOUGHT:", thought)
|
| 1318 |
+
print("TOOL:", tool_name)
|
| 1319 |
+
print("ARGS:", tool_args)
|
| 1320 |
+
|
| 1321 |
+
# Execute tool
|
| 1322 |
+
prev_observation = observation
|
| 1323 |
+
try:
|
| 1324 |
+
res = await client.call_tool(tool_name, tool_args)
|
| 1325 |
+
observation = _tool_text(res)
|
| 1326 |
+
|
| 1327 |
+
# immediate checkpoint on score gain (robust) ---
|
| 1328 |
+
if tool_name == "play_action" and self._checkpoint_enabled and "checkpoint_save" in available_tool_names:
|
| 1329 |
+
delta_pts, total_pts = _parse_points_from_obs(observation)
|
| 1330 |
+
|
| 1331 |
+
if delta_pts is not None and delta_pts > 0:
|
| 1332 |
+
low_obs = (observation or "").lower()
|
| 1333 |
+
if "game over" not in low_obs and "grue" not in low_obs:
|
| 1334 |
+
# prefer authoritative score from state if available
|
| 1335 |
+
cur_total = None
|
| 1336 |
+
try:
|
| 1337 |
+
if isinstance(state_obj, dict) and "score" in state_obj:
|
| 1338 |
+
cur_total = int(state_obj["score"])
|
| 1339 |
+
except Exception:
|
| 1340 |
+
cur_total = None
|
| 1341 |
+
|
| 1342 |
+
# fallback to parsed total if present
|
| 1343 |
+
if cur_total is None and total_pts is not None:
|
| 1344 |
+
cur_total = total_pts
|
| 1345 |
+
|
| 1346 |
+
# monotone rule (only if we have a total); otherwise save anyway
|
| 1347 |
+
should_save = (cur_total is None) or (self._last_score_seen is None) or (cur_total > self._last_score_seen)
|
| 1348 |
+
|
| 1349 |
+
if should_save:
|
| 1350 |
+
if cur_total is not None:
|
| 1351 |
+
self._last_score_seen = cur_total
|
| 1352 |
+
try:
|
| 1353 |
+
if verbose:
|
| 1354 |
+
print(f"[DEBUG] CHECKPOINT_SAVE after +{delta_pts} (total={cur_total}) at step={step_idx}")
|
| 1355 |
+
|
| 1356 |
+
# 1) best
|
| 1357 |
+
await client.call_tool("checkpoint_save", {"name": self._checkpoint_best})
|
| 1358 |
+
|
| 1359 |
+
# 2) loop-safe position
|
| 1360 |
+
try:
|
| 1361 |
+
await client.call_tool("checkpoint_save", {"name": self._checkpoint_loop})
|
| 1362 |
+
except Exception:
|
| 1363 |
+
pass
|
| 1364 |
+
|
| 1365 |
+
except Exception as e:
|
| 1366 |
+
if verbose:
|
| 1367 |
+
print("[DEBUG] checkpoint_save after points failed:", e)
|
| 1368 |
+
|
| 1369 |
+
except Exception as e:
|
| 1370 |
+
observation = f"Error calling tool {tool_name}: {e}"
|
| 1371 |
+
if tool_name == "play_action":
|
| 1372 |
+
loc_key = _cur_loc_key()
|
| 1373 |
+
blocked_actions_by_loc.setdefault(loc_key, set()).add(self._norm(tool_args.get("action", "")))
|
| 1374 |
+
force_valid_actions_next = True
|
| 1375 |
+
|
| 1376 |
+
if self._looks_like_parser_failure(observation):
|
| 1377 |
+
force_valid_actions_next = True
|
| 1378 |
+
|
| 1379 |
+
if tool_name == "play_action":
|
| 1380 |
+
loc_key = _cur_loc_key()
|
| 1381 |
+
act_norm = self._norm(tool_args.get("action", ""))
|
| 1382 |
+
rs = _result_short(observation)
|
| 1383 |
+
|
| 1384 |
+
if _is_no_progress_result(observation):
|
| 1385 |
+
blocked_actions_by_loc.setdefault(loc_key, set()).add(act_norm)
|
| 1386 |
+
|
| 1387 |
+
# Track location changes
|
| 1388 |
+
loc = _extract_location(observation)
|
| 1389 |
+
if loc:
|
| 1390 |
+
self.locations_visited.add(loc)
|
| 1391 |
+
loc_line = loc.strip().lower()
|
| 1392 |
+
if self._last_room_line is None or loc_line != self._last_room_line:
|
| 1393 |
+
self._last_room_line = loc_line
|
| 1394 |
+
|
| 1395 |
+
# Loop detection: same play_action 3x with same short observation
|
| 1396 |
+
if tool_name == "play_action":
|
| 1397 |
+
act = str(tool_args.get("action", "")).strip().lower()
|
| 1398 |
+
self._last_action = act
|
| 1399 |
+
|
| 1400 |
+
last_plays = [
|
| 1401 |
+
h for h in reversed(self.history)
|
| 1402 |
+
if h.get("tool") == "play_action" and isinstance(h.get("args"), dict)
|
| 1403 |
+
][:2]
|
| 1404 |
+
|
| 1405 |
+
if len(last_plays) == 2:
|
| 1406 |
+
last_act1 = str(last_plays[0]["args"].get("action", "")).strip().lower()
|
| 1407 |
+
last_act2 = str(last_plays[1]["args"].get("action", "")).strip().lower()
|
| 1408 |
+
|
| 1409 |
+
cur_short = (observation or "").strip()[:200]
|
| 1410 |
+
last_obs1 = (last_plays[0].get("result") or "").strip()[:200]
|
| 1411 |
+
last_obs2 = (last_plays[1].get("result") or "").strip()[:200]
|
| 1412 |
+
|
| 1413 |
+
if act and act == last_act1 == last_act2 and cur_short and cur_short == last_obs1 == last_obs2:
|
| 1414 |
+
observation = (
|
| 1415 |
+
"SYSTEM FEEDBACK: You repeated the same action 3 times with no new info. "
|
| 1416 |
+
"Stop repeating. Call valid_actions and pick ONE different action."
|
| 1417 |
+
)
|
| 1418 |
+
force_valid_actions_next = True
|
| 1419 |
+
|
| 1420 |
+
# No-op movement detection: movement but observation unchanged
|
| 1421 |
+
if tool_name == "play_action":
|
| 1422 |
+
a = str(tool_args.get("action", "")).strip().lower()
|
| 1423 |
+
if a in {"north", "south", "east", "west", "up", "down", "in", "out"}:
|
| 1424 |
+
prev_short = (prev_observation or "").strip()[:200]
|
| 1425 |
+
cur_short = (observation or "").strip()[:200]
|
| 1426 |
+
if prev_short and cur_short == prev_short:
|
| 1427 |
+
force_valid_actions_next = True
|
| 1428 |
+
|
| 1429 |
+
if verbose:
|
| 1430 |
+
print("\nRESULT:\n", observation)
|
| 1431 |
+
|
| 1432 |
+
# Save traces
|
| 1433 |
+
self.history.append({"thought": thought, "tool": tool_name, "args": tool_args, "result": observation})
|
| 1434 |
+
run_history.append((thought, f"{tool_name} {json.dumps(tool_args) if isinstance(tool_args, dict) else tool_args}", observation))
|
| 1435 |
+
|
| 1436 |
+
# Stop conditions
|
| 1437 |
+
if "game over" in (observation or "").lower() or "*** game over ***" in (observation or "").lower():
|
| 1438 |
+
break
|
| 1439 |
+
|
| 1440 |
+
# 3) Final Stats
|
| 1441 |
+
# Prefer memory tool to get authoritative score/moves if available
|
| 1442 |
+
final_score = best_score if best_score is not None else 0
|
| 1443 |
+
moves = best_moves if best_moves is not None else 0
|
| 1444 |
+
|
| 1445 |
+
if "memory" in available_tool_names:
|
| 1446 |
+
try:
|
| 1447 |
+
mem = _tool_text(await client.call_tool("memory", {}))
|
| 1448 |
+
s, m = _parse_score_moves_from_memory(mem)
|
| 1449 |
+
if s is not None:
|
| 1450 |
+
final_score = s
|
| 1451 |
+
if m is not None:
|
| 1452 |
+
moves = m
|
| 1453 |
+
except Exception:
|
| 1454 |
+
pass
|
| 1455 |
+
|
| 1456 |
+
game_completed = "game over" in (observation or "").lower()
|
| 1457 |
+
|
| 1458 |
+
max_score = 350 if game == "zork1" else 0 # keep simple; adjust later if you want per-game max scores
|
| 1459 |
+
|
| 1460 |
+
return RunResult(
|
| 1461 |
+
final_score=final_score,
|
| 1462 |
+
max_score=max_score, # Zork1 max score, adjust if needed
|
| 1463 |
+
moves=moves,
|
| 1464 |
+
locations_visited=set(self.locations_visited),
|
| 1465 |
+
game_completed=game_completed,
|
| 1466 |
+
history=run_history,
|
| 1467 |
+
)
|
| 1468 |
+
|
| 1469 |
+
|
| 1470 |
+
|
| 1471 |
+
def _build_prompt(
|
| 1472 |
+
self,
|
| 1473 |
+
observation: str,
|
| 1474 |
+
memory_text: str | None = None,
|
| 1475 |
+
map_text: str | None = None,
|
| 1476 |
+
valid_actions_text: str | None = None,
|
| 1477 |
+
hint_text: str | None = None,
|
| 1478 |
+
) -> str:
|
| 1479 |
+
"""
|
| 1480 |
+
Build the prompt for the LLM.
|
| 1481 |
+
|
| 1482 |
+
"""
|
| 1483 |
+
parts: list[str] = []
|
| 1484 |
+
|
| 1485 |
+
|
| 1486 |
+
|
| 1487 |
+
if memory_text:
|
| 1488 |
+
parts.append("Game memory (authoritative):")
|
| 1489 |
+
parts.append(memory_text.strip())
|
| 1490 |
+
parts.append("")
|
| 1491 |
+
|
| 1492 |
+
if map_text:
|
| 1493 |
+
parts.append("Explored map:")
|
| 1494 |
+
parts.append(map_text.strip())
|
| 1495 |
+
parts.append("")
|
| 1496 |
+
|
| 1497 |
+
if valid_actions_text:
|
| 1498 |
+
parts.append("Suggested valid actions (choose EXACTLY if you use one):")
|
| 1499 |
+
parts.append(valid_actions_text.strip())
|
| 1500 |
+
parts.append("")
|
| 1501 |
+
|
| 1502 |
+
if hint_text:
|
| 1503 |
+
parts.append("Hint (non-spoiler):")
|
| 1504 |
+
parts.append(hint_text.strip())
|
| 1505 |
+
parts.append("")
|
| 1506 |
+
|
| 1507 |
+
# Short recent history: last 2 interactions
|
| 1508 |
+
if getattr(self, "history", None):
|
| 1509 |
+
last = self.history[-2:]
|
| 1510 |
+
if last:
|
| 1511 |
+
parts.append("Recent actions (most recent last):")
|
| 1512 |
+
for h in last:
|
| 1513 |
+
tool = h.get("tool", "")
|
| 1514 |
+
args = h.get("args", {})
|
| 1515 |
+
# Keep result short to avoid prompt bloat
|
| 1516 |
+
res = (h.get("result") or "").strip().replace("\n", " ")
|
| 1517 |
+
if len(res) > 160:
|
| 1518 |
+
res = res[:160] + "..."
|
| 1519 |
+
parts.append(f"- {tool} {args} -> {res}")
|
| 1520 |
+
parts.append("")
|
| 1521 |
+
|
| 1522 |
+
# Current observation always last
|
| 1523 |
+
parts.append("Current observation:")
|
| 1524 |
+
parts.append((observation or "").strip())
|
| 1525 |
+
|
| 1526 |
+
# Tiny nudges based on common patterns
|
| 1527 |
+
low = (observation or "").lower()
|
| 1528 |
+
if "contains:" in low:
|
| 1529 |
+
parts.append("")
|
| 1530 |
+
parts.append("Hint: If a container contains items, try 'take <noun>' using the exact noun shown.")
|
| 1531 |
+
if "is closed" in low:
|
| 1532 |
+
parts.append("")
|
| 1533 |
+
parts.append("Hint: If something is closed, try 'open <noun>' using the exact noun shown.")
|
| 1534 |
+
if "dark" in low:
|
| 1535 |
+
parts.append("")
|
| 1536 |
+
parts.append("Hint: If it is dark, prioritize finding/using a light source (take lamp, turn on lamp).")
|
| 1537 |
+
|
| 1538 |
+
parts.append("Synthesized memory (high signal):")
|
| 1539 |
+
parts.append(json.dumps(self.synth_memory, ensure_ascii=False, indent=2))
|
| 1540 |
+
parts.append("")
|
| 1541 |
+
|
| 1542 |
+
parts.append("Current objectives (highest priority first):")
|
| 1543 |
+
parts.append(self.objman.render()) # short text
|
| 1544 |
+
parts.append("")
|
| 1545 |
+
|
| 1546 |
+
# LLM planner suggestions (do not auto-execute)
|
| 1547 |
+
if getattr(self, "_planner_suggested_actions", None):
|
| 1548 |
+
parts.append("Planner suggestions (DO NOT auto-execute; pick one if sensible):")
|
| 1549 |
+
for a in self._planner_suggested_actions[:3]:
|
| 1550 |
+
parts.append(f"- {a}")
|
| 1551 |
+
if getattr(self, "_planner_notes", ""):
|
| 1552 |
+
parts.append(f"Planner notes: {self._planner_notes}")
|
| 1553 |
+
parts.append("")
|
| 1554 |
+
|
| 1555 |
+
|
| 1556 |
+
parts.append("")
|
| 1557 |
+
parts.append("What do you do next? Remember the required output format.")
|
| 1558 |
+
return "\n".join(parts)
|
| 1559 |
+
|
| 1560 |
+
def _parse_response(self, response: str) -> tuple[str, str, dict]:
|
| 1561 |
+
"""
|
| 1562 |
+
Parse LLM response to extract thought, tool name, and arguments.
|
| 1563 |
+
|
| 1564 |
+
Returns:
|
| 1565 |
+
Tuple of (thought, tool_name, args_dict)
|
| 1566 |
+
"""
|
| 1567 |
+
thought = ""
|
| 1568 |
+
tool_name = "play_action"
|
| 1569 |
+
tool_args: dict = {"action": "look"}
|
| 1570 |
+
|
| 1571 |
+
if not response:
|
| 1572 |
+
return ("", "play_action", {"action": "look"})
|
| 1573 |
+
|
| 1574 |
+
text = response.strip()
|
| 1575 |
+
|
| 1576 |
+
# Fast path: try regex extraction that works even with extra text/noise
|
| 1577 |
+
thought_m = re.search(r"(?im)^\s*THOUGHT\s*:\s*(.+?)\s*$", text)
|
| 1578 |
+
tool_m = re.search(r"(?im)^\s*TOOL\s*:\s*([a-zA-Z0-9_]+)\s*$", text)
|
| 1579 |
+
args_m = re.search(r"(?im)^\s*ARGS\s*:\s*(\{.*\}|\[.*\]|.+?)\s*$", text)
|
| 1580 |
+
|
| 1581 |
+
if thought_m:
|
| 1582 |
+
thought = thought_m.group(1).strip()
|
| 1583 |
+
|
| 1584 |
+
if tool_m:
|
| 1585 |
+
tool_name = tool_m.group(1).strip().lower()
|
| 1586 |
+
|
| 1587 |
+
# Parse ARGS (best-effort)
|
| 1588 |
+
raw_args = None
|
| 1589 |
+
if args_m:
|
| 1590 |
+
raw_args = args_m.group(1).strip()
|
| 1591 |
+
|
| 1592 |
+
# If ARGS line exists but JSON is on next lines, try to capture a JSON block
|
| 1593 |
+
if raw_args is None:
|
| 1594 |
+
# Try to find the first JSON object after "ARGS:"
|
| 1595 |
+
idx = text.lower().find("args:")
|
| 1596 |
+
if idx != -1:
|
| 1597 |
+
tail = text[idx + 5 :].strip()
|
| 1598 |
+
# If tail doesn't start with '{', try to find one
|
| 1599 |
+
jstart = tail.find("{")
|
| 1600 |
+
if jstart != -1:
|
| 1601 |
+
tail2 = tail[jstart:]
|
| 1602 |
+
# naive brace matching
|
| 1603 |
+
depth = 0
|
| 1604 |
+
end = None
|
| 1605 |
+
for i, ch in enumerate(tail2):
|
| 1606 |
+
if ch == "{":
|
| 1607 |
+
depth += 1
|
| 1608 |
+
elif ch == "}":
|
| 1609 |
+
depth -= 1
|
| 1610 |
+
if depth == 0:
|
| 1611 |
+
end = i + 1
|
| 1612 |
+
break
|
| 1613 |
+
if end is not None:
|
| 1614 |
+
raw_args = tail2[:end].strip()
|
| 1615 |
+
|
| 1616 |
+
if raw_args is not None:
|
| 1617 |
+
try:
|
| 1618 |
+
parsed = json.loads(raw_args)
|
| 1619 |
+
if isinstance(parsed, dict):
|
| 1620 |
+
tool_args = parsed
|
| 1621 |
+
else:
|
| 1622 |
+
# If model gave a list/string, treat as invalid
|
| 1623 |
+
tool_args = {"action": "look"} if tool_name == "play_action" else {}
|
| 1624 |
+
except Exception:
|
| 1625 |
+
tool_args = {"action": ""} if tool_name == "play_action" else {}
|
| 1626 |
+
|
| 1627 |
+
# Enforce schema expectations
|
| 1628 |
+
if tool_name == "play_action":
|
| 1629 |
+
if not isinstance(tool_args, dict):
|
| 1630 |
+
tool_args = {"action": "look"}
|
| 1631 |
+
action = tool_args.get("action", "")
|
| 1632 |
+
if not isinstance(action, str) or not action.strip():
|
| 1633 |
+
tool_args["action"] = "look"
|
| 1634 |
+
elif tool_name in {"checkpoint_save", "checkpoint_restore"}:
|
| 1635 |
+
if not isinstance(tool_args, dict):
|
| 1636 |
+
tool_args = {}
|
| 1637 |
+
# name is optional; server default = "auto"
|
| 1638 |
+
if "name" in tool_args and not isinstance(tool_args["name"], str):
|
| 1639 |
+
tool_args["name"] = "auto"
|
| 1640 |
+
elif tool_name == "action_probe":
|
| 1641 |
+
if not isinstance(tool_args, dict):
|
| 1642 |
+
tool_args = {}
|
| 1643 |
+
if not isinstance(tool_args.get("action", ""), str) or not tool_args["action"].strip():
|
| 1644 |
+
tool_args["action"] = "look"
|
| 1645 |
+
else:
|
| 1646 |
+
tool_args = {}
|
| 1647 |
+
|
| 1648 |
+
|
| 1649 |
+
return thought, tool_name, tool_args
|
| 1650 |
+
|
| 1651 |
+
def _call_llm(self, prompt: str, system_prompt: str, seed: int) -> str:
|
| 1652 |
+
"""
|
| 1653 |
+
Call the LLM with the given prompt.
|
| 1654 |
+
|
| 1655 |
+
This is a convenience wrapper - you can also use call_llm() directly.
|
| 1656 |
+
"""
|
| 1657 |
+
return call_llm(prompt, system_prompt, seed)
|
| 1658 |
+
|
| 1659 |
+
def _normalize_action(self, action: str) -> str:
|
| 1660 |
+
"""Soft normalizer: only for very safe rewrites."""
|
| 1661 |
+
a = (action or "").strip().lower()
|
| 1662 |
+
|
| 1663 |
+
# "look around ..." -> "look"
|
| 1664 |
+
if a.startswith("look around"):
|
| 1665 |
+
return "look"
|
| 1666 |
+
|
| 1667 |
+
# common harmless variants -> "look"
|
| 1668 |
+
if a in {"l", "look.", "look!", "look?"}:
|
| 1669 |
+
return "look"
|
| 1670 |
+
|
| 1671 |
+
# "go north" -> "north" (only for cardinal/up/down)
|
| 1672 |
+
if a.startswith("go "):
|
| 1673 |
+
rest = a[3:].strip()
|
| 1674 |
+
if rest in {
|
| 1675 |
+
"north", "south", "east", "west",
|
| 1676 |
+
"up", "down", "in", "out",
|
| 1677 |
+
"northeast", "northwest", "southeast", "southwest",
|
| 1678 |
+
}:
|
| 1679 |
+
return rest
|
| 1680 |
+
|
| 1681 |
+
# "look at X" -> "examine X"
|
| 1682 |
+
m = re.match(r"look at (.+)", a)
|
| 1683 |
+
if m:
|
| 1684 |
+
return f"examine {m.group(1).strip()}"
|
| 1685 |
+
|
| 1686 |
+
# "look X" -> "examine X" ONLY when it's not a "look <preposition> ..." form
|
| 1687 |
+
m = re.match(r"^look\s+(.+)$", a)
|
| 1688 |
+
if m:
|
| 1689 |
+
tail = m.group(1).strip()
|
| 1690 |
+
|
| 1691 |
+
# Keep Zork-ish / parser forms like: look for/in/under/behind/through...
|
| 1692 |
+
first = tail.split(" ", 1)[0]
|
| 1693 |
+
if first in {"for", "in", "inside", "under", "behind", "through", "around", "over", "on", "at"}:
|
| 1694 |
+
# note: "look at X" is handled above already, so here we just keep it
|
| 1695 |
+
return a
|
| 1696 |
+
|
| 1697 |
+
# Otherwise: treat "look <noun>" as "examine <noun>"
|
| 1698 |
+
if tail:
|
| 1699 |
+
return f"examine {tail}"
|
| 1700 |
+
|
| 1701 |
+
return a
|
| 1702 |
+
|
| 1703 |
+
def _is_canonical_action(self, action: str) -> bool:
|
| 1704 |
+
"""True if action matches strict canonical grammar."""
|
| 1705 |
+
a = (action or "").strip().lower()
|
| 1706 |
+
|
| 1707 |
+
# single-word commands
|
| 1708 |
+
if a in {
|
| 1709 |
+
"look", "inventory",
|
| 1710 |
+
"north", "south", "east", "west",
|
| 1711 |
+
"up", "down", "in", "out",
|
| 1712 |
+
"northeast", "northwest", "southeast", "southwest",
|
| 1713 |
+
}:
|
| 1714 |
+
return True
|
| 1715 |
+
|
| 1716 |
+
# verb + noun (2-3 tokens max)
|
| 1717 |
+
parts = a.split()
|
| 1718 |
+
if len(parts) in (2, 3):
|
| 1719 |
+
verb = parts[0]
|
| 1720 |
+
if verb in {"take", "drop", "open", "examine", "read", "climb", "enter", "pull", "push", "unlock"}:
|
| 1721 |
+
# forbid placeholders
|
| 1722 |
+
if any(tok.startswith("<") and tok.endswith(">") for tok in parts[1:]):
|
| 1723 |
+
return False
|
| 1724 |
+
return True
|
| 1725 |
+
|
| 1726 |
+
return False
|
| 1727 |
+
|
| 1728 |
+
def _is_allowed_exotic(self, action: str, valid_actions_list: list[str]) -> bool:
|
| 1729 |
+
"""Exotic commands are allowed only if they appear EXACTLY in valid_actions (spacing/case-tolerant)."""
|
| 1730 |
+
if not action:
|
| 1731 |
+
return False
|
| 1732 |
+
a_norm = re.sub(r"\s+", " ", action.strip().lower())
|
| 1733 |
+
for va in valid_actions_list:
|
| 1734 |
+
va_norm = re.sub(r"\s+", " ", va.strip().lower())
|
| 1735 |
+
if a_norm == va_norm:
|
| 1736 |
+
return True
|
| 1737 |
+
return False
|
| 1738 |
+
|
| 1739 |
+
def _looks_like_parser_failure(self, obs: str) -> bool:
|
| 1740 |
+
"""Detect common parser failure messages."""
|
| 1741 |
+
if not obs:
|
| 1742 |
+
return False
|
| 1743 |
+
o = obs.lower()
|
| 1744 |
+
triggers = [
|
| 1745 |
+
"i don't know the word",
|
| 1746 |
+
"that sentence isn't one i recognize",
|
| 1747 |
+
"you used the word",
|
| 1748 |
+
"there was no verb",
|
| 1749 |
+
"i don't understand",
|
| 1750 |
+
"you must tell me how to do that",
|
| 1751 |
+
"you can't see any",
|
| 1752 |
+
]
|
| 1753 |
+
return any(t in o for t in triggers)
|
| 1754 |
+
|
| 1755 |
+
|
| 1756 |
+
def _extract_valid_actions(self, valid_actions_text: str) -> list[str]:
|
| 1757 |
+
"""
|
| 1758 |
+
Parse MCP valid_actions output into a list of exact commands.
|
| 1759 |
+
Supports formats like:
|
| 1760 |
+
'Valid actions:\n- close mailbox\n- north\n...'
|
| 1761 |
+
"""
|
| 1762 |
+
if not valid_actions_text:
|
| 1763 |
+
return []
|
| 1764 |
+
lines = [ln.strip() for ln in valid_actions_text.splitlines()]
|
| 1765 |
+
actions: list[str] = []
|
| 1766 |
+
for ln in lines:
|
| 1767 |
+
if ln.startswith("- "):
|
| 1768 |
+
actions.append(ln[2:].strip())
|
| 1769 |
+
return actions
|
| 1770 |
+
|
| 1771 |
+
def _norm(self, s: str) -> str:
|
| 1772 |
+
return re.sub(r"\s+", " ", (s or "").strip().lower())
|
| 1773 |
+
|
| 1774 |
+
def _is_move(self, action: str) -> bool:
|
| 1775 |
+
return self._norm(action) in {
|
| 1776 |
+
"north", "south", "east", "west", "up", "down", "in", "out",
|
| 1777 |
+
"northwest", "northeast", "southwest", "southeast",
|
| 1778 |
+
}
|
| 1779 |
+
|
| 1780 |
+
def _extract_tried_actions_for_current_location(self, tried_actions_text: str) -> set[str]:
|
| 1781 |
+
"""
|
| 1782 |
+
Parse output of tried_actions() from the server and return the set of actions
|
| 1783 |
+
already attempted in the current location (best-effort).
|
| 1784 |
+
"""
|
| 1785 |
+
if not tried_actions_text:
|
| 1786 |
+
return set()
|
| 1787 |
+
|
| 1788 |
+
cur = (self._last_room_line or "").strip().lower()
|
| 1789 |
+
if not cur:
|
| 1790 |
+
return set()
|
| 1791 |
+
|
| 1792 |
+
lines = tried_actions_text.splitlines()
|
| 1793 |
+
|
| 1794 |
+
# Look for a block:
|
| 1795 |
+
# - <Location>:
|
| 1796 |
+
# - action
|
| 1797 |
+
in_block = False
|
| 1798 |
+
tried = set()
|
| 1799 |
+
|
| 1800 |
+
for ln in lines:
|
| 1801 |
+
s = ln.rstrip("\n")
|
| 1802 |
+
|
| 1803 |
+
# Start of a location block
|
| 1804 |
+
if re.match(r"^\-\s+.+:\s*$", s.strip()):
|
| 1805 |
+
loc_name = s.strip()[2:-1].strip().lower()
|
| 1806 |
+
in_block = (loc_name == cur)
|
| 1807 |
+
continue
|
| 1808 |
+
|
| 1809 |
+
# Action lines in the block (format " - xxx")
|
| 1810 |
+
if in_block:
|
| 1811 |
+
st = s.strip()
|
| 1812 |
+
if st.startswith("- "):
|
| 1813 |
+
act = st[2:].strip().lower()
|
| 1814 |
+
if act:
|
| 1815 |
+
tried.add(self._norm(act))
|
| 1816 |
+
|
| 1817 |
+
return tried
|
| 1818 |
+
|
| 1819 |
+
def _rank_action_candidate(self, action: str) -> int:
|
| 1820 |
+
"""
|
| 1821 |
+
Smaller is better. Gives a deterministic ranking for probing/choosing.
|
| 1822 |
+
"""
|
| 1823 |
+
a = self._norm(action)
|
| 1824 |
+
if a.startswith("take "): return 0
|
| 1825 |
+
if a.startswith("open "): return 1
|
| 1826 |
+
if a.startswith("unlock "): return 2
|
| 1827 |
+
if a.startswith("enter "): return 3
|
| 1828 |
+
if a in {"in", "up", "down"}: return 4
|
| 1829 |
+
if a.startswith("read "): return 5
|
| 1830 |
+
if a.startswith("examine "): return 6
|
| 1831 |
+
if a in {"north","east","south","west","northeast","northwest","southeast","southwest"}: return 7
|
| 1832 |
+
if a == "look": return 8
|
| 1833 |
+
if a == "inventory": return 9
|
| 1834 |
+
return 50
|
| 1835 |
+
|
| 1836 |
+
async def _choose_with_probe(
|
| 1837 |
+
self,
|
| 1838 |
+
client,
|
| 1839 |
+
candidates: list[str],
|
| 1840 |
+
available_tool_names: set[str],
|
| 1841 |
+
) -> str | None:
|
| 1842 |
+
"""
|
| 1843 |
+
Use action_probe to select the best candidate.
|
| 1844 |
+
Best = positive score_delta, else state_hash change, else first candidate.
|
| 1845 |
+
Probes at most 2 actions to stay cheap.
|
| 1846 |
+
"""
|
| 1847 |
+
if not candidates:
|
| 1848 |
+
return None
|
| 1849 |
+
if "action_probe" not in available_tool_names:
|
| 1850 |
+
return candidates[0]
|
| 1851 |
+
|
| 1852 |
+
# Sort candidates by heuristic rank, then probe top 2
|
| 1853 |
+
candidates_sorted = sorted(candidates, key=lambda x: self._rank_action_candidate(x))
|
| 1854 |
+
to_probe = candidates_sorted[:2]
|
| 1855 |
+
|
| 1856 |
+
best = None
|
| 1857 |
+
best_tuple = None # (score_delta, hash_changed, reward_delta)
|
| 1858 |
+
|
| 1859 |
+
for act in to_probe:
|
| 1860 |
+
try:
|
| 1861 |
+
rep_raw = await client.call_tool("action_probe", {"action": act})
|
| 1862 |
+
rep_txt = self._tool_text_any(rep_raw)
|
| 1863 |
+
rep = json.loads(rep_txt) if rep_txt else {}
|
| 1864 |
+
|
| 1865 |
+
sd = int(rep.get("score_delta", 0) or 0)
|
| 1866 |
+
rd = int(rep.get("reward_delta", 0) or 0)
|
| 1867 |
+
hc = bool(rep.get("hash_changed")) # not perfect
|
| 1868 |
+
tup = (sd, hc, rd)
|
| 1869 |
+
|
| 1870 |
+
if best_tuple is None or tup > best_tuple:
|
| 1871 |
+
best_tuple = tup
|
| 1872 |
+
best = act
|
| 1873 |
+
except Exception:
|
| 1874 |
+
continue
|
| 1875 |
+
|
| 1876 |
+
return best or candidates_sorted[0]
|
| 1877 |
+
|
| 1878 |
+
def _tool_text_any(self, res) -> str:
|
| 1879 |
+
if res is None:
|
| 1880 |
+
return ""
|
| 1881 |
+
if isinstance(res, str):
|
| 1882 |
+
return res
|
| 1883 |
+
if isinstance(res, dict):
|
| 1884 |
+
return json.dumps(res)
|
| 1885 |
+
content = getattr(res, "content", None)
|
| 1886 |
+
if content:
|
| 1887 |
+
try:
|
| 1888 |
+
if isinstance(content, list) and content and hasattr(content[0], "text"):
|
| 1889 |
+
return content[0].text or ""
|
| 1890 |
+
except Exception:
|
| 1891 |
+
pass
|
| 1892 |
+
if isinstance(res, list) and res:
|
| 1893 |
+
try:
|
| 1894 |
+
if hasattr(res[0], "text"):
|
| 1895 |
+
return res[0].text or ""
|
| 1896 |
+
except Exception:
|
| 1897 |
+
pass
|
| 1898 |
+
return str(res)
|
| 1899 |
+
|
| 1900 |
+
def _planner_should_run(self, step_idx: int, observation: str, force: bool) -> bool:
|
| 1901 |
+
if force:
|
| 1902 |
+
return True
|
| 1903 |
+
if (step_idx - self._planner_last_step) < self._planner_cooldown:
|
| 1904 |
+
return False
|
| 1905 |
+
low = (observation or "").lower()
|
| 1906 |
+
triggers = ["locked", "dark", "can't", "i don't know the word", "that sentence isn't one i recognize"]
|
| 1907 |
+
return any(t in low for t in triggers) or (step_idx % self._planner_cooldown == 0)
|
| 1908 |
+
|
| 1909 |
+
def _filter_planner_actions(self, actions: list[str], valid_actions_list: list[str]) -> list[str]:
|
| 1910 |
+
"""
|
| 1911 |
+
Keep only actions that are:
|
| 1912 |
+
- canonical OR appear exactly in valid_actions_list (if provided)
|
| 1913 |
+
- non-empty
|
| 1914 |
+
"""
|
| 1915 |
+
out = []
|
| 1916 |
+
va = [self._norm(x) for x in (valid_actions_list or [])]
|
| 1917 |
+
for a in (actions or []):
|
| 1918 |
+
a = (a or "").strip().lower()
|
| 1919 |
+
if not a:
|
| 1920 |
+
continue
|
| 1921 |
+
if self._is_canonical_action(a):
|
| 1922 |
+
out.append(a)
|
| 1923 |
+
continue
|
| 1924 |
+
# exotic allowed only if in valid_actions
|
| 1925 |
+
if va and self._is_allowed_exotic(a, valid_actions_list):
|
| 1926 |
+
out.append(a)
|
| 1927 |
+
# max 3
|
| 1928 |
+
return out[:3]
|
| 1929 |
+
|
| 1930 |
+
def _run_planner_llm(
|
| 1931 |
+
self,
|
| 1932 |
+
observation: str,
|
| 1933 |
+
state_obj: dict,
|
| 1934 |
+
valid_actions_list: list[str],
|
| 1935 |
+
tried_here_list: list[str],
|
| 1936 |
+
seed: int,
|
| 1937 |
+
step_idx: int,
|
| 1938 |
+
) -> None:
|
| 1939 |
+
prompt = build_planner_prompt(
|
| 1940 |
+
observation=observation,
|
| 1941 |
+
state_obj=state_obj or {},
|
| 1942 |
+
synth_memory=self.synth_memory or {},
|
| 1943 |
+
objectives_text=self.objman.render(),
|
| 1944 |
+
valid_actions_list=valid_actions_list or [],
|
| 1945 |
+
tried_here=tried_here_list or [],
|
| 1946 |
+
)
|
| 1947 |
+
try:
|
| 1948 |
+
txt = call_llm(prompt, PLANNER_SYSTEM, seed=seed + 50_000 + step_idx, max_tokens=450)
|
| 1949 |
+
plan = json.loads(txt)
|
| 1950 |
+
|
| 1951 |
+
llm_objs = plan.get("objectives", [])
|
| 1952 |
+
if isinstance(llm_objs, list):
|
| 1953 |
+
self.objman.replace_from_llm(llm_objs)
|
| 1954 |
+
|
| 1955 |
+
sugg = plan.get("suggested_actions", [])
|
| 1956 |
+
if not isinstance(sugg, list):
|
| 1957 |
+
sugg = []
|
| 1958 |
+
self._planner_suggested_actions = self._filter_planner_actions(sugg, valid_actions_list)
|
| 1959 |
+
self._planner_notes = str(plan.get("notes", "") or "")[:200]
|
| 1960 |
+
self._planner_last_step = step_idx
|
| 1961 |
+
except Exception:
|
| 1962 |
+
# planner failure should be silent (don’t break run)
|
| 1963 |
+
self._planner_suggested_actions = []
|
| 1964 |
+
self._planner_notes = ""
|
| 1965 |
+
|
| 1966 |
+
|
| 1967 |
+
# =============================================================================
|
| 1968 |
+
# For local testing
|
| 1969 |
+
# =============================================================================
|
| 1970 |
+
|
| 1971 |
+
async def test_agent():
|
| 1972 |
+
"""Test the agent locally."""
|
| 1973 |
+
from fastmcp import Client
|
| 1974 |
+
|
| 1975 |
+
# Path to your MCP server
|
| 1976 |
+
server_path = "mcp_server.py"
|
| 1977 |
+
|
| 1978 |
+
agent = StudentAgent()
|
| 1979 |
+
|
| 1980 |
+
async with Client(server_path) as client:
|
| 1981 |
+
result = await agent.run(
|
| 1982 |
+
client=client,
|
| 1983 |
+
game="zork1",
|
| 1984 |
+
max_steps=10,
|
| 1985 |
+
seed=42,
|
| 1986 |
+
verbose=True,
|
| 1987 |
+
)
|
| 1988 |
+
|
| 1989 |
+
print(f"\nFinal Score: {result.final_score}")
|
| 1990 |
+
print(f"Moves: {result.moves}")
|
| 1991 |
+
print(f"Locations: {result.locations_visited}")
|
| 1992 |
+
|
| 1993 |
+
|
| 1994 |
+
if __name__ == "__main__":
|
| 1995 |
+
import asyncio
|
| 1996 |
+
asyncio.run(test_agent())
|
app.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Hugging Face Space - Text Adventure Agent Submission
|
| 3 |
+
|
| 4 |
+
This is a code-only Space for submitting your agent implementation.
|
| 5 |
+
The evaluation is run separately.
|
| 6 |
+
|
| 7 |
+
Files in this submission:
|
| 8 |
+
- agent.py: Your ReAct agent implementation
|
| 9 |
+
- mcp_server.py: Your MCP server implementation
|
| 10 |
+
- requirements.txt: Additional dependencies
|
| 11 |
+
|
| 12 |
+
To test locally:
|
| 13 |
+
fastmcp dev mcp_server.py
|
| 14 |
+
python agent.py
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
import gradio as gr
|
| 18 |
+
from pathlib import Path
|
| 19 |
+
|
| 20 |
+
# Create the Gradio interface
|
| 21 |
+
with gr.Blocks(title="Text Adventure Agent Submission") as demo:
|
| 22 |
+
gr.Markdown("# Text Adventure Agent Submission")
|
| 23 |
+
gr.Markdown(
|
| 24 |
+
"This Space contains a template submission for the Text Adventure Agent assignment. "
|
| 25 |
+
)
|
| 26 |
+
|
| 27 |
+
gr.Markdown(
|
| 28 |
+
"---\n"
|
| 29 |
+
"**Note:** This is a code submission Space. "
|
| 30 |
+
"Evaluation is performed using the evaluation script.\n\n"
|
| 31 |
+
"[Back to main assignment page](https://huggingface.co/spaces/LLM-course/Agentic-zork)"
|
| 32 |
+
)
|
| 33 |
+
|
| 34 |
+
|
| 35 |
+
if __name__ == "__main__":
|
| 36 |
+
demo.launch()
|
explanations.md
ADDED
|
@@ -0,0 +1,379 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Building a Reliable MCP Agent for Zork-Style Text Adventures
|
| 2 |
+
|
| 3 |
+
Text adventures sound trivial: you read a paragraph, type a command, get a new paragraph.
|
| 4 |
+
But once you put an LLM in that loop, you learn quickly that the hardest enemies aren’t in the dungeon—they’re in the interface.
|
| 5 |
+
|
| 6 |
+
What kills most LLM agents in Zork-like games is a predictable set of failure modes:
|
| 7 |
+
|
| 8 |
+
- **Parser brittleness**: the game rejects slightly-wrong phrasing.
|
| 9 |
+
- **Looping**: the model repeats actions, rooms, or “no-op” moves.
|
| 10 |
+
- **Move budget waste**: doing “admin” actions that consume moves.
|
| 11 |
+
- **Prompt bloat**: raw history gets too long and too noisy.
|
| 12 |
+
- **Goal drift**: the model forgets what it was trying to do.
|
| 13 |
+
|
| 14 |
+
Some of these ideas are also exposed in the article "TextQuests: How Good are LLMs at Text-Based Video Games?" (https://arxiv.org/pdf/2507.23701) namely memory, coherence and planning.
|
| 15 |
+
|
| 16 |
+
So we didn’t build “a prompt.” We built a **system** with two main components:
|
| 17 |
+
- an **MCP server** that exposes the game through robust tools and instrumentation
|
| 18 |
+
- and an **agent** that treats the LLM as one component among others (memory, planning, recovery policies)
|
| 19 |
+
|
| 20 |
+
Our focus was on the previous failure modes, and how to design around them with tools and guardrails.
|
| 21 |
+
|
| 22 |
+
This is a high-level tour of the approach, focusing on the big ideas, without getting into implementation details.
|
| 23 |
+
|
| 24 |
+
The code is available in the HuggingFace space:
|
| 25 |
+
|
| 26 |
+
---
|
| 27 |
+
|
| 28 |
+
## The Setup: Two Pieces, One Loop
|
| 29 |
+
|
| 30 |
+
### 1) `mcp_server.py` — the game adapter + instrumentation layer
|
| 31 |
+
The MCP server acts like the game interface for the agent. It:
|
| 32 |
+
- owns the environment (`TextAdventureEnv`)
|
| 33 |
+
- runs commands (`play_action`)
|
| 34 |
+
- tracks exploration metadata (rooms, transitions, tried actions)
|
| 35 |
+
- exposes tools that help reasoning **without spending moves**
|
| 36 |
+
- provides safety mechanisms like checkpoints and action simulation
|
| 37 |
+
|
| 38 |
+
### 2) `agent.py` — the policy engine + ReAct decision-maker
|
| 39 |
+
The agent:
|
| 40 |
+
- outputs strict **ReAct** steps (THOUGHT -> TOOL -> ARGS)
|
| 41 |
+
- can only interact via MCP tools (never “talks to the game” directly)
|
| 42 |
+
- uses guardrails to keep the LLM from hallucinating tools/commands, looping, spamming, etc.
|
| 43 |
+
- uses *two additional LLM calls* as specialized modules:
|
| 44 |
+
- **memory compression** (long-term, high-signal memory)
|
| 45 |
+
- **objective planning** (goal updates + suggested next actions)
|
| 46 |
+
|
| 47 |
+
---
|
| 48 |
+
|
| 49 |
+
## Why “Tooling” is important: The MCP Server as a Game Interface
|
| 50 |
+
|
| 51 |
+
A Zork parser is not a friendly API. If the model invents commands like *“look around carefully”*, the game will often respond with something like:
|
| 52 |
+
> “That sentence isn’t one I recognize.”
|
| 53 |
+
|
| 54 |
+
If you only expose `play_action`, the agent becomes a guessing machine.
|
| 55 |
+
|
| 56 |
+
So the MCP server provides a richer interface that makes the world “legible”:
|
| 57 |
+
|
| 58 |
+
- **Structured state** (score, moves, inventory, room, “done”, a stable hash)
|
| 59 |
+
- **Inventory without spending a move**
|
| 60 |
+
- **Valid actions** (best-effort list) for recovery
|
| 61 |
+
- **A map/graph** of explored rooms and transitions
|
| 62 |
+
- **Actions tried per room** to avoid repeating
|
| 63 |
+
- **Checkpoints** to rollback after loops or risky moves
|
| 64 |
+
- **Action probing** (simulate before committing)
|
| 65 |
+
|
| 66 |
+
This set of tools is what turns the text game into something the agent can navigate reliably.
|
| 67 |
+
|
| 68 |
+
---
|
| 69 |
+
|
| 70 |
+
# Part 1 — The MCP Server: Turning a Game into a Usable API
|
| 71 |
+
|
| 72 |
+
## The Server’s Core Idea: Track More Than the Game Tracks
|
| 73 |
+
|
| 74 |
+
The environment gives you:
|
| 75 |
+
- observation text
|
| 76 |
+
- score/moves (usually)
|
| 77 |
+
- maybe inventory (depending on wrapper)
|
| 78 |
+
|
| 79 |
+
But it *doesn’t* give you the extra structure an agent needs to be efficient:
|
| 80 |
+
- Where have I been?
|
| 81 |
+
- What did I already try here?
|
| 82 |
+
- How do rooms connect?
|
| 83 |
+
- Am I stuck in a loop?
|
| 84 |
+
|
| 85 |
+
So the server maintains that meta-state itself:
|
| 86 |
+
- a short **history** of actions and results
|
| 87 |
+
- a set of **locations** (rooms) discovered
|
| 88 |
+
- a **transition graph** (`room --action--> room`)
|
| 89 |
+
- an index of **actions tried per location**
|
| 90 |
+
- checkpoint snapshots for rollback
|
| 91 |
+
- a stable-ish **state hash** used to detect loops
|
| 92 |
+
|
| 93 |
+
This is *not* just logging. It becomes actionable tool output the agent can rely on.
|
| 94 |
+
|
| 95 |
+
---
|
| 96 |
+
|
| 97 |
+
## Room Awareness: The Small Heuristic That Makes Everything Work
|
| 98 |
+
|
| 99 |
+
Most downstream reasoning depends on “what room am I in?”
|
| 100 |
+
|
| 101 |
+
The server uses a heuristic to extract the room title from the observation:
|
| 102 |
+
- pick the first plausible “header-like” line
|
| 103 |
+
- ignore copyright/revision boilerplate
|
| 104 |
+
- ignore long narrative sentences
|
| 105 |
+
|
| 106 |
+
This matters because room identity powers:
|
| 107 |
+
- mapping
|
| 108 |
+
- “tried actions” grouping
|
| 109 |
+
- loop detection context
|
| 110 |
+
- objective tracking (“return to grating”, “open mailbox”, etc.)
|
| 111 |
+
|
| 112 |
+
If you don’t have stable room identity, the agent’s memory becomes confused.
|
| 113 |
+
|
| 114 |
+
---
|
| 115 |
+
|
| 116 |
+
## The Minimal but Critical Tools
|
| 117 |
+
|
| 118 |
+
### `play_action(action)`
|
| 119 |
+
The main interaction tool:
|
| 120 |
+
- runs the command
|
| 121 |
+
- returns the observation
|
| 122 |
+
- appends optional “+points” signals and “GAME OVER”
|
| 123 |
+
- never crashes the tool (so the run doesn’t die on edge cases)
|
| 124 |
+
|
| 125 |
+
This tool is deliberately boring—but highly reliable.
|
| 126 |
+
|
| 127 |
+
### `inventory()`
|
| 128 |
+
A huge move-saver: it returns inventory **without advancing the game**.
|
| 129 |
+
In text adventures, calling `inventory` as a game command costs a move in many setups, so treating inventory as a *tool query* is a big advantage.
|
| 130 |
+
|
| 131 |
+
### `memory()`
|
| 132 |
+
A compact summary tool that provides “authoritative state”:
|
| 133 |
+
- location
|
| 134 |
+
- score/moves
|
| 135 |
+
- recent action heads
|
| 136 |
+
- last observation
|
| 137 |
+
|
| 138 |
+
It’s a sanity anchor when the agent gets confused.
|
| 139 |
+
|
| 140 |
+
### `valid_actions()`
|
| 141 |
+
An helpful tool when stuck:
|
| 142 |
+
- tries to fetch the actual valid actions if the environment exposes them
|
| 143 |
+
- otherwise falls back to a canonical action menu
|
| 144 |
+
|
| 145 |
+
The agent uses it sparingly—only when stuck or after parser failures.
|
| 146 |
+
|
| 147 |
+
### `tried_actions()`
|
| 148 |
+
The anti-loop tool:
|
| 149 |
+
- returns actions already attempted in each room
|
| 150 |
+
- helps the agent choose *new* high-value actions instead of repeating `open mailbox` 10 times
|
| 151 |
+
|
| 152 |
+
### `get_map()` and `graph()`
|
| 153 |
+
These expose exploration as:
|
| 154 |
+
- a human-readable map (for prompts)
|
| 155 |
+
- a structured JSON graph (for future logic/visualization)
|
| 156 |
+
|
| 157 |
+
Mapping gives the agent an explicit “where have I been?” memory that the LLM doesn’t have to hallucinate.
|
| 158 |
+
|
| 159 |
+
---
|
| 160 |
+
|
| 161 |
+
## Guardrail Tools That Make the System Feel "Serious"
|
| 162 |
+
|
| 163 |
+
### Checkpoints (`checkpoint_save`, `checkpoint_restore`)
|
| 164 |
+
Checkpoints are a reliability hack with real impact:
|
| 165 |
+
- if the agent detects a loop or makes a catastrophic move, it can rollback
|
| 166 |
+
- we keep at least one “loop” checkpoint as a stable anchor
|
| 167 |
+
- we can also maintain a “best” checkpoint after scoring gains
|
| 168 |
+
|
| 169 |
+
This transforms the exploration strategy:
|
| 170 |
+
- you can take risks, because you can recover
|
| 171 |
+
|
| 172 |
+
### `action_probe(action)` — action simulation without commitment
|
| 173 |
+
This is one of the more original parts of the server.
|
| 174 |
+
|
| 175 |
+
The idea:
|
| 176 |
+
- save a snapshot
|
| 177 |
+
- perform the action
|
| 178 |
+
- record deltas (score, moves, hash, location changes)
|
| 179 |
+
- restore the snapshot
|
| 180 |
+
- restore tracking metadata too (so probing doesn’t poison history/map)
|
| 181 |
+
|
| 182 |
+
It returns a compact JSON “what would happen if…?” report.
|
| 183 |
+
|
| 184 |
+
This enables a surprisingly strong behavior:
|
| 185 |
+
> choose between candidates without committing a move (when snapshot/restore succeeds)
|
| 186 |
+
|
| 187 |
+
We keep it cheap (probe only a couple of actions) but it’s an excellent tie-breaker when stuck.
|
| 188 |
+
|
| 189 |
+
---
|
| 190 |
+
|
| 191 |
+
# Part 2 — The Agent: ReAct, But Constrained and Safe
|
| 192 |
+
|
| 193 |
+
## Strict ReAct as a Contract (Not a Style)
|
| 194 |
+
|
| 195 |
+
The agent uses a strict format:
|
| 196 |
+
- THOUGHT: one short sentence
|
| 197 |
+
- TOOL: one of the allowed tool names
|
| 198 |
+
- ARGS: valid JSON
|
| 199 |
+
|
| 200 |
+
That format is useful for stability:
|
| 201 |
+
- the agent becomes machine-parseable
|
| 202 |
+
- tool calls are consistent
|
| 203 |
+
|
| 204 |
+
---
|
| 205 |
+
|
| 206 |
+
## Important Policy: Command Grammar Discipline
|
| 207 |
+
|
| 208 |
+
Text adventure parsers punish creativity.
|
| 209 |
+
|
| 210 |
+
So the agent enforces a tight grammar:
|
| 211 |
+
- movement is single-word: `north`, `in`, `up`, …
|
| 212 |
+
- interaction is short verb+noun: `open mailbox`, `take lamp`, …
|
| 213 |
+
- exotic multiword commands are allowed **only if** they appear exactly in `valid_actions`
|
| 214 |
+
|
| 215 |
+
That last rule is a big deal:
|
| 216 |
+
- it prevents the LLM from inventing fancy commands
|
| 217 |
+
- it converts “language” into “API calls”
|
| 218 |
+
- it makes the agent much more robust across seeds
|
| 219 |
+
|
| 220 |
+
---
|
| 221 |
+
|
| 222 |
+
## The Agent’s Guardrails: How We Stop Thrashing
|
| 223 |
+
|
| 224 |
+
Here are the big guardrail categories (conceptually, not line-by-line):
|
| 225 |
+
|
| 226 |
+
### 1) Tool validation
|
| 227 |
+
If the model requests an unknown tool:
|
| 228 |
+
- we don’t execute it
|
| 229 |
+
- we inject feedback listing allowed tools
|
| 230 |
+
- we force recovery behavior next
|
| 231 |
+
|
| 232 |
+
### 2) Parser failure detection
|
| 233 |
+
If the observation looks like a parser error (“I don’t know the word…”, “sentence isn’t recognized”):
|
| 234 |
+
- we switch into recovery mode
|
| 235 |
+
- we fetch valid actions (once)
|
| 236 |
+
- we force a simpler action selection
|
| 237 |
+
|
| 238 |
+
### 3) Anti-repeat behavior (local)
|
| 239 |
+
We track:
|
| 240 |
+
- the last action
|
| 241 |
+
- actions blocked in the current room
|
| 242 |
+
- actions tried in the current room
|
| 243 |
+
|
| 244 |
+
If the model repeats a no-progress action:
|
| 245 |
+
- we refuse it
|
| 246 |
+
- we force a new choice
|
| 247 |
+
|
| 248 |
+
### 4) Loop detection (global)
|
| 249 |
+
The agent uses the server’s `state_hash`:
|
| 250 |
+
- if the same hash repeats several times, we’re looping
|
| 251 |
+
|
| 252 |
+
Then we can:
|
| 253 |
+
- restore a checkpoint
|
| 254 |
+
- re-orient with `look`
|
| 255 |
+
- switch strategy
|
| 256 |
+
|
| 257 |
+
### 5) Movement bias (Zork-specific optimization)
|
| 258 |
+
When multiple movement options exist:
|
| 259 |
+
- “in / up / down” tend to unlock deeper progress
|
| 260 |
+
- cardinal directions tend to be broad exploration
|
| 261 |
+
|
| 262 |
+
So we bias toward `in/up/down` (especially after seeing them in valid actions).
|
| 263 |
+
|
| 264 |
+
It’s a small heuristic that often pays off.
|
| 265 |
+
|
| 266 |
+
---
|
| 267 |
+
|
| 268 |
+
## Two Specialized LLM Modules: Memory and Planning
|
| 269 |
+
|
| 270 |
+
This is where the project becomes more than a typical ReAct agent.
|
| 271 |
+
|
| 272 |
+
### Specialized module #1: Memory Compression (Long-Term Memory)
|
| 273 |
+
Raw history is short-term memory. It’s verbose, expensive, and noisy.
|
| 274 |
+
|
| 275 |
+
So we maintain a **synthesized memory JSON**, updated periodically by an LLM whose only job is to compress experience into decision-useful facts:
|
| 276 |
+
|
| 277 |
+
- durable facts learned
|
| 278 |
+
- obstacles + what is needed
|
| 279 |
+
- what items/tools to search for
|
| 280 |
+
- open threads worth returning to
|
| 281 |
+
- important visited places
|
| 282 |
+
|
| 283 |
+
We keep it:
|
| 284 |
+
- short
|
| 285 |
+
- deduplicated
|
| 286 |
+
- structured
|
| 287 |
+
- bounded (so it doesn’t explode)
|
| 288 |
+
|
| 289 |
+
If that LLM call fails or returns invalid JSON:
|
| 290 |
+
- we simply skip the update
|
| 291 |
+
- the run continues safely
|
| 292 |
+
|
| 293 |
+
The goal is to make the agent stay coherent over long runs.”
|
| 294 |
+
|
| 295 |
+
### Specialized module #2: Objective Planning (Goal Management)
|
| 296 |
+
Action selection is short-horizon.
|
| 297 |
+
But Zork requires long-horizon intent.
|
| 298 |
+
|
| 299 |
+
So we run a separate “planner” LLM that:
|
| 300 |
+
- updates objectives (explore, open, unlock, acquire key/lamp, return somewhere)
|
| 301 |
+
- proposes up to a few suggested next actions
|
| 302 |
+
- provides short evidence
|
| 303 |
+
|
| 304 |
+
Crucially:
|
| 305 |
+
- planner suggestions are **not auto-executed**
|
| 306 |
+
- they are injected into the prompt as guidance
|
| 307 |
+
- the main ReAct decision still chooses the next tool/action
|
| 308 |
+
|
| 309 |
+
This separation reduces goal drift:
|
| 310 |
+
- the agent behaves like it has a mental TODO list
|
| 311 |
+
- and doesn’t wander aimlessly as often
|
| 312 |
+
|
| 313 |
+
---
|
| 314 |
+
|
| 315 |
+
## Deterministic Overrides: Sometimes We Don’t Ask the LLM
|
| 316 |
+
|
| 317 |
+
Some policies are too important to leave to “model mood.”
|
| 318 |
+
|
| 319 |
+
Example: **treasure acquisition**
|
| 320 |
+
If we see obvious treasure nouns in visible objects:
|
| 321 |
+
- we immediately `take <item>`
|
| 322 |
+
- no debate, no planning, no cleverness
|
| 323 |
+
|
| 324 |
+
---
|
| 325 |
+
|
| 326 |
+
## Checkpoints as a Strategy, Not Just a Feature
|
| 327 |
+
|
| 328 |
+
The agent uses checkpoints like a game speedrunner would:
|
| 329 |
+
- keep a “loop” checkpoint as a stable anchor
|
| 330 |
+
- save a “best” checkpoint after scoring gains
|
| 331 |
+
|
| 332 |
+
That means:
|
| 333 |
+
- progress is protected
|
| 334 |
+
- exploration can be more aggressive
|
| 335 |
+
- loop recovery is fast
|
| 336 |
+
|
| 337 |
+
It’s a pragmatic way to make the system resilient under a move budget.
|
| 338 |
+
|
| 339 |
+
---
|
| 340 |
+
|
| 341 |
+
# What You Get From This Approach
|
| 342 |
+
|
| 343 |
+
Compared to a vanilla “LLM + play_action” loop, this system is:
|
| 344 |
+
|
| 345 |
+
- **more reliable** (fewer parser deaths, fewer infinite loops)
|
| 346 |
+
- **more efficient** (less move waste, less repeated actions)
|
| 347 |
+
- **more scalable** (memory doesn’t balloon)
|
| 348 |
+
- **more coherent** (objectives keep the agent on track)
|
| 349 |
+
- **more intentional** (action_probe and valid_actions are used strategically)
|
| 350 |
+
|
| 351 |
+
---
|
| 352 |
+
|
| 353 |
+
## Final Takeaway
|
| 354 |
+
|
| 355 |
+
Text adventures punish the exact things LLMs love:
|
| 356 |
+
- improvisation in language
|
| 357 |
+
- repetition
|
| 358 |
+
- vague intent
|
| 359 |
+
- verbose context
|
| 360 |
+
|
| 361 |
+
So we respond with the opposite:
|
| 362 |
+
- strict grammar
|
| 363 |
+
- structured state
|
| 364 |
+
- explicit recovery
|
| 365 |
+
- bounded but long term memory
|
| 366 |
+
- deliberate planning
|
| 367 |
+
|
| 368 |
+
---
|
| 369 |
+
|
| 370 |
+
# Evaluations
|
| 371 |
+
|
| 372 |
+
The evaluation has been made on 200 steps and 2 seeds, using lostpig and zork1 as test games.
|
| 373 |
+
|
| 374 |
+
# Potential Improvements
|
| 375 |
+
|
| 376 |
+
- **Navigation tool** — a `go_to(location)` tool that uses the transition graph to find a sequence of moves to go from the current location to the target location (with a BFS algorithm for example) and apply them automatically instead of letting the LLM guessing the path. The agent could reduce move waste and improve reliability.
|
| 377 |
+
|
| 378 |
+
|
| 379 |
+
|
mcp_server.py
ADDED
|
@@ -0,0 +1,819 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Student MCP Server for Text Adventure Games
|
| 3 |
+
|
| 4 |
+
This is your MCP server submission. Implement the tools that your agent
|
| 5 |
+
will use to play text adventure games.
|
| 6 |
+
|
| 7 |
+
Required tool:
|
| 8 |
+
play_action(action: str) -> str
|
| 9 |
+
Execute a game command and return the result.
|
| 10 |
+
|
| 11 |
+
Recommended tools:
|
| 12 |
+
memory() -> str
|
| 13 |
+
Return current game state, score, and recent history.
|
| 14 |
+
|
| 15 |
+
inventory() -> str
|
| 16 |
+
Return the player's current inventory.
|
| 17 |
+
|
| 18 |
+
get_map() -> str
|
| 19 |
+
Return a map of explored locations.
|
| 20 |
+
|
| 21 |
+
Test your server with:
|
| 22 |
+
fastmcp dev submission_template/mcp_server.py
|
| 23 |
+
|
| 24 |
+
Then open the MCP Inspector in your browser to test the tools interactively.
|
| 25 |
+
"""
|
| 26 |
+
|
| 27 |
+
import sys
|
| 28 |
+
import os
|
| 29 |
+
import re
|
| 30 |
+
from collections import defaultdict
|
| 31 |
+
import json
|
| 32 |
+
import hashlib
|
| 33 |
+
from copy import deepcopy
|
| 34 |
+
|
| 35 |
+
# Add parent directory to path to import games module
|
| 36 |
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
| 37 |
+
|
| 38 |
+
from fastmcp import FastMCP
|
| 39 |
+
from games.zork_env import TextAdventureEnv
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
# =============================================================================
|
| 43 |
+
# Create the MCP Server
|
| 44 |
+
# =============================================================================
|
| 45 |
+
|
| 46 |
+
mcp = FastMCP("Student Text Adventure Server")
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
# =============================================================================
|
| 50 |
+
# Game State Management
|
| 51 |
+
# =============================================================================
|
| 52 |
+
|
| 53 |
+
class GameManager:
|
| 54 |
+
"""
|
| 55 |
+
Manages the text adventure game state.
|
| 56 |
+
|
| 57 |
+
TODO: Extend this class to track:
|
| 58 |
+
- Action history (for memory tool)
|
| 59 |
+
- Explored locations (for mapping)
|
| 60 |
+
- Current score and moves
|
| 61 |
+
"""
|
| 62 |
+
|
| 63 |
+
def __init__(self):
|
| 64 |
+
self.env: TextAdventureEnv = None
|
| 65 |
+
self.state = None
|
| 66 |
+
self.game_name: str = ""
|
| 67 |
+
|
| 68 |
+
# History
|
| 69 |
+
self.max_history = 50 # Max number of recent actions to store
|
| 70 |
+
self.history: list[tuple[str, str]] = []
|
| 71 |
+
|
| 72 |
+
# checkpoints
|
| 73 |
+
self.checkpoints = {} # name -> opaque state snapshot
|
| 74 |
+
self.last_reward = 0
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
# Map tracking
|
| 78 |
+
self.locations = set() # Set of explored locations
|
| 79 |
+
self.current_location: str | None = None
|
| 80 |
+
|
| 81 |
+
# Transions
|
| 82 |
+
self.transitions = defaultdict(dict) # location -> action -> new_location
|
| 83 |
+
|
| 84 |
+
# Action tracking
|
| 85 |
+
self.actions_tried_by_location = defaultdict(list) # location -> list of actions tried
|
| 86 |
+
self._actions_tried_set = defaultdict(set)
|
| 87 |
+
|
| 88 |
+
def initialize(self, game: str = "zork1"):
|
| 89 |
+
"""Initialize or reset the game."""
|
| 90 |
+
self.game_name = game
|
| 91 |
+
self.env = TextAdventureEnv(game)
|
| 92 |
+
self.state = self.env.reset()
|
| 93 |
+
|
| 94 |
+
# reset tracking
|
| 95 |
+
self.history = []
|
| 96 |
+
self.locations = set()
|
| 97 |
+
self.transitions = defaultdict(dict)
|
| 98 |
+
self.actions_tried_by_location = defaultdict(list)
|
| 99 |
+
self._actions_tried_set = defaultdict(set)
|
| 100 |
+
|
| 101 |
+
# set initial location
|
| 102 |
+
obs = (self.state.observation or "")
|
| 103 |
+
self.current_location = self._extract_location(obs)
|
| 104 |
+
if self.current_location:
|
| 105 |
+
self.locations.add(self.current_location)
|
| 106 |
+
return obs
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
def step(self, action: str) -> str:
|
| 110 |
+
"""Execute an action and return the result."""
|
| 111 |
+
if self.env is None:
|
| 112 |
+
self.initialize()
|
| 113 |
+
|
| 114 |
+
action_clean = (action or "").strip().lower()
|
| 115 |
+
|
| 116 |
+
from_location = self.current_location
|
| 117 |
+
|
| 118 |
+
# track "action tried" in the room
|
| 119 |
+
if from_location and action_clean not in self._actions_tried_set[from_location]:
|
| 120 |
+
self.actions_tried_by_location[from_location].append(action_clean)
|
| 121 |
+
self._actions_tried_set[from_location].add(action_clean)
|
| 122 |
+
|
| 123 |
+
#Execute the requested action
|
| 124 |
+
self.state = self.env.step(action)
|
| 125 |
+
raw_obs = self.state.observation or ""
|
| 126 |
+
|
| 127 |
+
# No forced look: avoid consuming extra moves
|
| 128 |
+
result_obs = raw_obs
|
| 129 |
+
|
| 130 |
+
# Track history (single action only)
|
| 131 |
+
self.history.append((action, result_obs))
|
| 132 |
+
|
| 133 |
+
# Cap history
|
| 134 |
+
while len(self.history) > self.max_history:
|
| 135 |
+
self.history.pop(0)
|
| 136 |
+
|
| 137 |
+
# update last reward
|
| 138 |
+
try:
|
| 139 |
+
self.last_reward = getattr(self.state, "reward", 0) or 0
|
| 140 |
+
except Exception:
|
| 141 |
+
self.last_reward = 0
|
| 142 |
+
|
| 143 |
+
# Track locations + transitions using the best available observation (result_obs)
|
| 144 |
+
new_location = self._extract_location(result_obs)
|
| 145 |
+
if new_location:
|
| 146 |
+
self.locations.add(new_location)
|
| 147 |
+
|
| 148 |
+
# Record transition only if location actually changed
|
| 149 |
+
if from_location and new_location != from_location:
|
| 150 |
+
# store canonical mapping: from -> action -> to (overwrite is OK)
|
| 151 |
+
self.transitions[from_location][action_clean] = new_location
|
| 152 |
+
|
| 153 |
+
# update current location
|
| 154 |
+
self.current_location = new_location
|
| 155 |
+
|
| 156 |
+
return result_obs
|
| 157 |
+
|
| 158 |
+
def _extract_location(self, observation: str) -> str | None:
|
| 159 |
+
"""Extract the current location name from the observation text."""
|
| 160 |
+
# This is a heuristic that works for Zork and similar games where the location is in ALL CAPS at the start
|
| 161 |
+
if not observation:
|
| 162 |
+
return None
|
| 163 |
+
|
| 164 |
+
for line in observation.splitlines():
|
| 165 |
+
s = line.strip()
|
| 166 |
+
if not s:
|
| 167 |
+
continue
|
| 168 |
+
|
| 169 |
+
low = s.lower()
|
| 170 |
+
|
| 171 |
+
# filter common non-room headers / system lines
|
| 172 |
+
if low.startswith("copyright"):
|
| 173 |
+
continue
|
| 174 |
+
if "trademark" in low:
|
| 175 |
+
continue
|
| 176 |
+
if low.startswith("revision"):
|
| 177 |
+
continue
|
| 178 |
+
if low.startswith("serial number"):
|
| 179 |
+
continue
|
| 180 |
+
if "revision" in low and "serial" in low:
|
| 181 |
+
continue
|
| 182 |
+
|
| 183 |
+
# room titles in Zork are typically short and NOT full sentences
|
| 184 |
+
if len(s) > 50:
|
| 185 |
+
continue
|
| 186 |
+
if s.endswith((".", "!", "?", ":", ";")):
|
| 187 |
+
continue
|
| 188 |
+
|
| 189 |
+
# also avoid lines that look like status messages
|
| 190 |
+
bad_starts = (
|
| 191 |
+
"you ", "it ", "i ", "there ", "the ", "a ", "an ",
|
| 192 |
+
"what ", "can't ", "i don't", "unknown", "error"
|
| 193 |
+
)
|
| 194 |
+
if low.startswith(bad_starts):
|
| 195 |
+
continue
|
| 196 |
+
|
| 197 |
+
return s
|
| 198 |
+
|
| 199 |
+
return None
|
| 200 |
+
|
| 201 |
+
def get_memory(self, last_k: int = 10) -> str:
|
| 202 |
+
"""Return a short summary of state + recent history."""
|
| 203 |
+
loc = self.current_location or "Unknown"
|
| 204 |
+
score = self.get_score()
|
| 205 |
+
moves = self.get_moves()
|
| 206 |
+
obs = (self.state.observation or "").strip() if self.state else ""
|
| 207 |
+
|
| 208 |
+
recent = self.history[-last_k:] if self.history else []
|
| 209 |
+
if recent:
|
| 210 |
+
recent_lines = "\n".join(
|
| 211 |
+
f"- {a} -> {(o.splitlines()[0] if o else '')}"
|
| 212 |
+
for a, o in recent
|
| 213 |
+
)
|
| 214 |
+
else:
|
| 215 |
+
recent_lines = "(none)"
|
| 216 |
+
|
| 217 |
+
return (
|
| 218 |
+
f"Game: {self.game_name}\n"
|
| 219 |
+
f"Location: {loc}\n"
|
| 220 |
+
f"Score: {score}\n"
|
| 221 |
+
f"Moves: {moves}\n\n"
|
| 222 |
+
f"Recent actions:\n{recent_lines}\n\n"
|
| 223 |
+
f"Last observation:\n{obs}"
|
| 224 |
+
)
|
| 225 |
+
|
| 226 |
+
def get_score(self) -> int:
|
| 227 |
+
"""Get current score."""
|
| 228 |
+
return self.state.score if self.state else 0
|
| 229 |
+
|
| 230 |
+
def get_moves(self) -> int:
|
| 231 |
+
"""Get number of moves taken."""
|
| 232 |
+
return self.state.moves if self.state else 0
|
| 233 |
+
|
| 234 |
+
def get_map(self) -> str:
|
| 235 |
+
"""Return a simple text map of explored locations with action-labeled transitions."""
|
| 236 |
+
if not self.locations:
|
| 237 |
+
return "No locations explored yet."
|
| 238 |
+
|
| 239 |
+
lines = [f"Current location: {self.current_location or 'Unknown'}", ""]
|
| 240 |
+
|
| 241 |
+
lines.append("Explored locations:")
|
| 242 |
+
for loc in sorted(self.locations):
|
| 243 |
+
lines.append(f"- {loc}")
|
| 244 |
+
|
| 245 |
+
lines.append("")
|
| 246 |
+
lines.append("Transitions (from --action--> to):")
|
| 247 |
+
|
| 248 |
+
any_edge = False
|
| 249 |
+
for frm in sorted(self.transitions.keys()):
|
| 250 |
+
for act, to in sorted(self.transitions[frm].items()):
|
| 251 |
+
any_edge = True
|
| 252 |
+
lines.append(f"- {frm} --{act}--> {to}")
|
| 253 |
+
|
| 254 |
+
if not any_edge:
|
| 255 |
+
lines.append("- (none yet)")
|
| 256 |
+
|
| 257 |
+
return "\n".join(lines)
|
| 258 |
+
|
| 259 |
+
|
| 260 |
+
def _item_name(self, item) -> str:
|
| 261 |
+
"""Best-effort: extract a human-friendly name from a Jericho item object."""
|
| 262 |
+
for attr in ("name", "label", "noun", "text"):
|
| 263 |
+
v = getattr(item, attr, None)
|
| 264 |
+
if isinstance(v, str) and v.strip():
|
| 265 |
+
return v.strip()
|
| 266 |
+
|
| 267 |
+
s = str(item)
|
| 268 |
+
m = re.search(r"Obj\d+:\s*([^\s]+)", s)
|
| 269 |
+
if m:
|
| 270 |
+
return m.group(1)
|
| 271 |
+
|
| 272 |
+
return s.strip() if s.strip() else "unknown"
|
| 273 |
+
|
| 274 |
+
|
| 275 |
+
def get_inventory(self) -> str:
|
| 276 |
+
"""
|
| 277 |
+
Return inventory WITHOUT advancing the game (does not call env.step).
|
| 278 |
+
If state.inventory doesn't exist, returns a fallback message.
|
| 279 |
+
"""
|
| 280 |
+
if not self.state:
|
| 281 |
+
return "Inventory not available (game not initialized)."
|
| 282 |
+
|
| 283 |
+
inv = getattr(self.state, "inventory", None)
|
| 284 |
+
|
| 285 |
+
# Case 0: inventory exposed as a string
|
| 286 |
+
if isinstance(inv, str):
|
| 287 |
+
return inv.strip() if inv.strip() else "You are not carrying anything."
|
| 288 |
+
|
| 289 |
+
# Case 1: inventory exposed as list/tuple of objects
|
| 290 |
+
if isinstance(inv, (list, tuple)):
|
| 291 |
+
if len(inv) == 0:
|
| 292 |
+
return "You are not carrying anything."
|
| 293 |
+
pretty = [self._item_name(x) for x in inv]
|
| 294 |
+
return "You are carrying:\n" + "\n".join(f"- {name}" for name in pretty)
|
| 295 |
+
|
| 296 |
+
return "Inventory not available from state (no state.inventory)."
|
| 297 |
+
|
| 298 |
+
|
| 299 |
+
def get_valid_actions(self, max_actions: int = 30) -> str:
|
| 300 |
+
try:
|
| 301 |
+
# Option A: wrapper exposes it
|
| 302 |
+
if self.env is not None and hasattr(self.env, "get_valid_actions"):
|
| 303 |
+
valid = self.env.get_valid_actions()
|
| 304 |
+
# Option B: underlying Jericho env
|
| 305 |
+
elif self.env is not None and hasattr(self.env, "env") and hasattr(self.env.env, "get_valid_actions"):
|
| 306 |
+
valid = self.env.env.get_valid_actions()
|
| 307 |
+
else:
|
| 308 |
+
valid = None
|
| 309 |
+
|
| 310 |
+
if isinstance(valid, (list, tuple)) and valid:
|
| 311 |
+
valid = [str(v) for v in valid][:max_actions]
|
| 312 |
+
return "Valid actions:\n" + "\n".join(f"- {v}" for v in valid)
|
| 313 |
+
except Exception:
|
| 314 |
+
pass
|
| 315 |
+
|
| 316 |
+
return (
|
| 317 |
+
"Valid actions (fallback):\n"
|
| 318 |
+
"- look\n- inventory\n- north/south/east/west/up/down/in/out\n"
|
| 319 |
+
"- take <noun>\n- drop <noun>\n- open <noun>\n- examine <noun>\n- read <noun>\n"
|
| 320 |
+
)
|
| 321 |
+
|
| 322 |
+
|
| 323 |
+
def get_actions_tried(self, limit_per_room: int = 50) -> str:
|
| 324 |
+
"""Return actions tried per location (most recent last)."""
|
| 325 |
+
if not self.actions_tried_by_location:
|
| 326 |
+
return "No actions tracked yet."
|
| 327 |
+
|
| 328 |
+
lines = [
|
| 329 |
+
f"Current location: {self.current_location or 'Unknown'}",
|
| 330 |
+
"",
|
| 331 |
+
"Actions tried by location:",
|
| 332 |
+
]
|
| 333 |
+
|
| 334 |
+
for loc in sorted(self.actions_tried_by_location.keys()):
|
| 335 |
+
acts = self.actions_tried_by_location[loc]
|
| 336 |
+
if not acts:
|
| 337 |
+
continue
|
| 338 |
+
shown = acts[-limit_per_room:]
|
| 339 |
+
lines.append(f"- {loc}:")
|
| 340 |
+
for a in shown:
|
| 341 |
+
lines.append(f" - {a}")
|
| 342 |
+
|
| 343 |
+
return "\n".join(lines)
|
| 344 |
+
|
| 345 |
+
def _snapshot(self):
|
| 346 |
+
"""
|
| 347 |
+
Best-effort snapshot. Tries env/state native methods if available, else deepcopies state.
|
| 348 |
+
"""
|
| 349 |
+
if self.env is None:
|
| 350 |
+
return None
|
| 351 |
+
|
| 352 |
+
# 1) Native env snapshot if exists
|
| 353 |
+
for obj in (self.env, getattr(self.env, "env", None)):
|
| 354 |
+
if obj is None:
|
| 355 |
+
continue
|
| 356 |
+
if hasattr(obj, "get_state") and callable(obj.get_state):
|
| 357 |
+
try:
|
| 358 |
+
return ("native", obj.get_state())
|
| 359 |
+
except Exception:
|
| 360 |
+
pass
|
| 361 |
+
|
| 362 |
+
# 2) Fallback: deepcopy state object (works often, not always)
|
| 363 |
+
try:
|
| 364 |
+
return ("deepcopy", deepcopy(self.state))
|
| 365 |
+
except Exception:
|
| 366 |
+
# 3) Last resort: keep nothing (restore impossible)
|
| 367 |
+
return ("none", None)
|
| 368 |
+
|
| 369 |
+
|
| 370 |
+
def _restore_snapshot(self, snap):
|
| 371 |
+
"""
|
| 372 |
+
Best-effort restore snapshot created by _snapshot().
|
| 373 |
+
"""
|
| 374 |
+
if self.env is None or snap is None:
|
| 375 |
+
return False
|
| 376 |
+
|
| 377 |
+
kind, payload = snap
|
| 378 |
+
if kind == "native":
|
| 379 |
+
for obj in (self.env, getattr(self.env, "env", None)):
|
| 380 |
+
if obj is None:
|
| 381 |
+
continue
|
| 382 |
+
if hasattr(obj, "set_state") and callable(obj.set_state):
|
| 383 |
+
try:
|
| 384 |
+
obj.set_state(payload)
|
| 385 |
+
# re-sync wrapper state if needed
|
| 386 |
+
if hasattr(self.env, "state"):
|
| 387 |
+
try:
|
| 388 |
+
self.state = self.env.state
|
| 389 |
+
except Exception:
|
| 390 |
+
pass
|
| 391 |
+
return True
|
| 392 |
+
except Exception:
|
| 393 |
+
pass
|
| 394 |
+
return False
|
| 395 |
+
|
| 396 |
+
if kind == "deepcopy":
|
| 397 |
+
try:
|
| 398 |
+
self.state = payload
|
| 399 |
+
# If wrapper uses internal state, try to set it too
|
| 400 |
+
if hasattr(self.env, "state"):
|
| 401 |
+
try:
|
| 402 |
+
self.env.state = payload
|
| 403 |
+
except Exception:
|
| 404 |
+
pass
|
| 405 |
+
return True
|
| 406 |
+
except Exception:
|
| 407 |
+
return False
|
| 408 |
+
|
| 409 |
+
return False
|
| 410 |
+
|
| 411 |
+
def _state_hash(self) -> str:
|
| 412 |
+
"""
|
| 413 |
+
Stable-ish hash to detect loops. Prefer env-provided hash; else hash observation+inv+loc+score+moves.
|
| 414 |
+
"""
|
| 415 |
+
# If Jericho exposes something like state.hash or env.get_world_state_hash, use it (best-effort).
|
| 416 |
+
for obj in (self.state, self.env, getattr(self.env, "env", None)):
|
| 417 |
+
if obj is None:
|
| 418 |
+
continue
|
| 419 |
+
for attr in ("hash", "state_hash", "world_hash"):
|
| 420 |
+
if hasattr(obj, attr):
|
| 421 |
+
try:
|
| 422 |
+
v = getattr(obj, attr)
|
| 423 |
+
if callable(v):
|
| 424 |
+
v = v()
|
| 425 |
+
if isinstance(v, (str, int)):
|
| 426 |
+
return str(v)
|
| 427 |
+
except Exception:
|
| 428 |
+
pass
|
| 429 |
+
|
| 430 |
+
loc = self.current_location or ""
|
| 431 |
+
obs = (getattr(self.state, "observation", "") or "")
|
| 432 |
+
score = self.get_score()
|
| 433 |
+
moves = self.get_moves()
|
| 434 |
+
inv = getattr(self.state, "inventory", None)
|
| 435 |
+
|
| 436 |
+
inv_str = ""
|
| 437 |
+
if isinstance(inv, str):
|
| 438 |
+
inv_str = inv
|
| 439 |
+
elif isinstance(inv, (list, tuple)):
|
| 440 |
+
inv_str = "|".join(self._item_name(x) for x in inv)
|
| 441 |
+
|
| 442 |
+
payload = f"{loc}\n{score}\n{moves}\n{inv_str}\n{obs[:500]}"
|
| 443 |
+
return hashlib.sha1(payload.encode("utf-8", errors="ignore")).hexdigest()
|
| 444 |
+
|
| 445 |
+
|
| 446 |
+
def _extract_visible_objects_heuristic(self, observation: str) -> list[str]:
|
| 447 |
+
"""
|
| 448 |
+
Heuristic object noun extraction. Not perfect but useful.
|
| 449 |
+
Keeps short nouns; removes stopwords; favors known Zork-ish interactables.
|
| 450 |
+
"""
|
| 451 |
+
if not observation:
|
| 452 |
+
return []
|
| 453 |
+
|
| 454 |
+
obs = observation.lower()
|
| 455 |
+
|
| 456 |
+
# quick whitelist of common objects
|
| 457 |
+
common = [
|
| 458 |
+
"mailbox","leaflet","door","window","grating","lamp","lantern","sword","knife",
|
| 459 |
+
"trapdoor","chest","box","table","rug","mat","rope","key","keys","bottle","water",
|
| 460 |
+
"egg","nest","tree","stairs","staircase","gate"
|
| 461 |
+
]
|
| 462 |
+
found = [w for w in common if w in obs]
|
| 463 |
+
|
| 464 |
+
# de-dup
|
| 465 |
+
out = []
|
| 466 |
+
seen = set()
|
| 467 |
+
for x in found:
|
| 468 |
+
if x not in seen:
|
| 469 |
+
out.append(x)
|
| 470 |
+
seen.add(x)
|
| 471 |
+
return out
|
| 472 |
+
|
| 473 |
+
|
| 474 |
+
def get_state_struct(self) -> dict:
|
| 475 |
+
obs = (getattr(self.state, "observation", "") or "")
|
| 476 |
+
inv = getattr(self.state, "inventory", None)
|
| 477 |
+
|
| 478 |
+
inv_list = []
|
| 479 |
+
if isinstance(inv, str):
|
| 480 |
+
# can't parse reliably => keep as one string
|
| 481 |
+
inv_list = [inv.strip()] if inv.strip() else []
|
| 482 |
+
elif isinstance(inv, (list, tuple)):
|
| 483 |
+
inv_list = [self._item_name(x) for x in inv]
|
| 484 |
+
|
| 485 |
+
return {
|
| 486 |
+
"game": self.game_name,
|
| 487 |
+
"location": self.current_location or "Unknown",
|
| 488 |
+
"score": self.get_score(),
|
| 489 |
+
"moves": self.get_moves(),
|
| 490 |
+
"done": bool(getattr(self.state, "done", False)) if self.state else False,
|
| 491 |
+
"last_reward": int(getattr(self, "last_reward", 0) or 0),
|
| 492 |
+
"state_hash": self._state_hash(),
|
| 493 |
+
"inventory": inv_list,
|
| 494 |
+
"visible_objects": self._extract_visible_objects_heuristic(obs),
|
| 495 |
+
"last_observation": obs,
|
| 496 |
+
}
|
| 497 |
+
|
| 498 |
+
|
| 499 |
+
# Global game manager
|
| 500 |
+
_game = GameManager()
|
| 501 |
+
|
| 502 |
+
|
| 503 |
+
def get_game() -> GameManager:
|
| 504 |
+
"""Get or initialize the game manager."""
|
| 505 |
+
global _game
|
| 506 |
+
if _game.env is None:
|
| 507 |
+
# Get game from environment variable (set by evaluator)
|
| 508 |
+
game = os.environ.get("GAME", "zork1")
|
| 509 |
+
_game.initialize(game)
|
| 510 |
+
return _game
|
| 511 |
+
|
| 512 |
+
|
| 513 |
+
# =============================================================================
|
| 514 |
+
# MCP Tools - IMPLEMENT THESE
|
| 515 |
+
# =============================================================================
|
| 516 |
+
|
| 517 |
+
@mcp.tool()
|
| 518 |
+
def play_action(action: str) -> str:
|
| 519 |
+
"""
|
| 520 |
+
Execute a game command and return the result.
|
| 521 |
+
|
| 522 |
+
This is the main tool for interacting with the game.
|
| 523 |
+
|
| 524 |
+
Args:
|
| 525 |
+
action: The command to execute (e.g., "north", "take lamp", "open mailbox")
|
| 526 |
+
|
| 527 |
+
Returns:
|
| 528 |
+
The game's response to the action
|
| 529 |
+
|
| 530 |
+
Valid commands include:
|
| 531 |
+
- Movement: north, south, east, west, up, down, enter, exit
|
| 532 |
+
- Objects: take <item>, drop <item>, open <thing>, examine <thing>
|
| 533 |
+
- Other: look, inventory, read <thing>, turn on lamp
|
| 534 |
+
"""
|
| 535 |
+
game = get_game()
|
| 536 |
+
|
| 537 |
+
# Basic validation / normalization
|
| 538 |
+
action = (action or "").strip()
|
| 539 |
+
if not action:
|
| 540 |
+
return "I didn't receive an action. Try: look, north, open mailbox, take lamp."
|
| 541 |
+
|
| 542 |
+
# Execute
|
| 543 |
+
result = game.step(action)
|
| 544 |
+
|
| 545 |
+
# Optional: append score deltas + game over
|
| 546 |
+
try:
|
| 547 |
+
reward = getattr(game.state, "reward", 0) or 0
|
| 548 |
+
score = getattr(game.state, "score", None)
|
| 549 |
+
done = bool(getattr(game.state, "done", False))
|
| 550 |
+
|
| 551 |
+
if reward and score is not None and reward > 0:
|
| 552 |
+
result += f"\n\n+{reward} points! (Total: {score})"
|
| 553 |
+
|
| 554 |
+
if done:
|
| 555 |
+
result += "\n\n*** GAME OVER ***"
|
| 556 |
+
except Exception:
|
| 557 |
+
# Never crash the tool — keep returning the observation
|
| 558 |
+
pass
|
| 559 |
+
|
| 560 |
+
return result
|
| 561 |
+
|
| 562 |
+
@mcp.tool()
|
| 563 |
+
def memory() -> str:
|
| 564 |
+
"""
|
| 565 |
+
Return a compact summary of the current game state:
|
| 566 |
+
location, score, moves, recent history, last observation.
|
| 567 |
+
"""
|
| 568 |
+
game = get_game()
|
| 569 |
+
return game.get_memory(last_k=10)
|
| 570 |
+
|
| 571 |
+
|
| 572 |
+
@mcp.tool()
|
| 573 |
+
def get_map() -> str:
|
| 574 |
+
"""
|
| 575 |
+
Return a simple map of explored locations + known transitions.
|
| 576 |
+
"""
|
| 577 |
+
game = get_game()
|
| 578 |
+
return game.get_map()
|
| 579 |
+
|
| 580 |
+
|
| 581 |
+
@mcp.tool()
|
| 582 |
+
def inventory() -> str:
|
| 583 |
+
"""
|
| 584 |
+
Return the player's inventory WITHOUT advancing the game.
|
| 585 |
+
"""
|
| 586 |
+
game = get_game()
|
| 587 |
+
return game.get_inventory()
|
| 588 |
+
|
| 589 |
+
|
| 590 |
+
@mcp.tool()
|
| 591 |
+
def valid_actions() -> str:
|
| 592 |
+
"""
|
| 593 |
+
Return a list of likely valid actions (best-effort).
|
| 594 |
+
"""
|
| 595 |
+
game = get_game()
|
| 596 |
+
return game.get_valid_actions(max_actions=30)
|
| 597 |
+
|
| 598 |
+
|
| 599 |
+
@mcp.tool()
|
| 600 |
+
def tried_actions() -> str:
|
| 601 |
+
"""
|
| 602 |
+
Return actions tried, grouped by location, to avoid loops.
|
| 603 |
+
"""
|
| 604 |
+
game = get_game()
|
| 605 |
+
return game.get_actions_tried(limit_per_room=50)
|
| 606 |
+
|
| 607 |
+
|
| 608 |
+
@mcp.tool()
|
| 609 |
+
def hint() -> str:
|
| 610 |
+
"""
|
| 611 |
+
Get non-spoiler hints based on the current observation/inventory/location.
|
| 612 |
+
"""
|
| 613 |
+
game = get_game()
|
| 614 |
+
|
| 615 |
+
observation = (getattr(game.state, "observation", "") or "")
|
| 616 |
+
obs = observation.lower()
|
| 617 |
+
loc = (game.current_location or "").lower()
|
| 618 |
+
|
| 619 |
+
# Best-effort inventory WITHOUT advancing game
|
| 620 |
+
inv_lower = ""
|
| 621 |
+
inv = getattr(game.state, "inventory", None)
|
| 622 |
+
if isinstance(inv, str):
|
| 623 |
+
inv_lower = inv.lower()
|
| 624 |
+
elif isinstance(inv, (list, tuple)):
|
| 625 |
+
names = []
|
| 626 |
+
for item in inv:
|
| 627 |
+
try:
|
| 628 |
+
names.append(game._item_name(item).lower())
|
| 629 |
+
except Exception:
|
| 630 |
+
names.append(str(item).lower())
|
| 631 |
+
inv_lower = " ".join(names)
|
| 632 |
+
|
| 633 |
+
hints: list[str] = []
|
| 634 |
+
|
| 635 |
+
# Darkness / light
|
| 636 |
+
if ("dark" in obs) or ("pitch black" in obs) or ("dark" in loc):
|
| 637 |
+
hints.append("It is dangerous to move around in the dark. You need a light source.")
|
| 638 |
+
if "lamp" in inv_lower or "lantern" in inv_lower:
|
| 639 |
+
hints.append("You seem to have a lamp/lantern. Try turning it on if that action is available.")
|
| 640 |
+
else:
|
| 641 |
+
hints.append("If you see a lamp or lantern anywhere, pick it up immediately.")
|
| 642 |
+
|
| 643 |
+
# Window
|
| 644 |
+
if "window" in obs:
|
| 645 |
+
if "ajar" in obs or "open" in obs:
|
| 646 |
+
hints.append("An open/ajar window may be an entry point. Try 'enter window' or 'in' if allowed.")
|
| 647 |
+
else:
|
| 648 |
+
hints.append("A window often leads somewhere. Try 'open window' or examine it more closely.")
|
| 649 |
+
|
| 650 |
+
# Leaves
|
| 651 |
+
if "pile of leaves" in obs or "leaves" in obs:
|
| 652 |
+
hints.append("A pile of leaves often hides something. Try moving or taking them.")
|
| 653 |
+
|
| 654 |
+
# Grating
|
| 655 |
+
if "grating" in obs:
|
| 656 |
+
hints.append("A grating is usually a passage. Try opening or unlocking it, or inspect nearby objects.")
|
| 657 |
+
|
| 658 |
+
# Containers
|
| 659 |
+
containers = ["mailbox", "chest", "box", "container", "cabinet", "case", "sack"]
|
| 660 |
+
if any(w in obs for w in containers):
|
| 661 |
+
hints.append("Try opening containers. They often contain useful items.")
|
| 662 |
+
|
| 663 |
+
# Trees / climbing
|
| 664 |
+
if "tree" in obs or "trees" in obs:
|
| 665 |
+
hints.append("Trees may be climbable. Look for branches or try climbing if possible.")
|
| 666 |
+
if "climbable" in obs or "you can climb" in obs:
|
| 667 |
+
hints.append("Climbing may lead to new areas. Try climbing up or down if available.")
|
| 668 |
+
|
| 669 |
+
# Keys / weapons
|
| 670 |
+
if "key" in obs and "key" not in inv_lower:
|
| 671 |
+
hints.append("Keys are important. Pick it up if you can.")
|
| 672 |
+
if ("sword" in obs or "knife" in obs) and ("sword" not in inv_lower and "knife" not in inv_lower):
|
| 673 |
+
hints.append("A weapon may be useful later. Consider taking it.")
|
| 674 |
+
|
| 675 |
+
# Explicit possibility override (narration cues)
|
| 676 |
+
low_obs = observation.lower()
|
| 677 |
+
if "possible to climb down" in low_obs or "it is possible to climb down" in low_obs or "you can climb down" in low_obs:
|
| 678 |
+
hints.append("The narration says you can climb down here — try: 'down'.")
|
| 679 |
+
if "possible to climb up" in low_obs or "it is possible to climb up" in low_obs or "you can climb up" in low_obs:
|
| 680 |
+
hints.append("The narration says you can climb up here — try: 'up'.")
|
| 681 |
+
if "possible to enter" in low_obs or "it is possible to enter" in low_obs or "you can enter" in low_obs or "way in" in low_obs:
|
| 682 |
+
hints.append("The narration suggests an entry is possible — try: 'in'.")
|
| 683 |
+
if "way out" in low_obs or "possible to leave" in low_obs or "you can leave" in low_obs:
|
| 684 |
+
hints.append("The narration suggests an exit — try: 'out'.")
|
| 685 |
+
|
| 686 |
+
if not hints:
|
| 687 |
+
hints.append("If you feel stuck, call valid_actions and try 1–2 new high-value actions (take/open/enter/climb/pull).")
|
| 688 |
+
hints.append("Avoid repeating actions that produced no new information in the same location.")
|
| 689 |
+
|
| 690 |
+
return "Hints:\n" + "\n".join(f"- {h}" for h in hints)
|
| 691 |
+
|
| 692 |
+
@mcp.tool()
|
| 693 |
+
def state() -> str:
|
| 694 |
+
"""
|
| 695 |
+
Structured state as JSON string.
|
| 696 |
+
"""
|
| 697 |
+
game = get_game()
|
| 698 |
+
return json.dumps(game.get_state_struct(), ensure_ascii=False, indent=2)
|
| 699 |
+
|
| 700 |
+
|
| 701 |
+
@mcp.tool()
|
| 702 |
+
def exits() -> str:
|
| 703 |
+
"""
|
| 704 |
+
Return possible movement actions from valid_actions (best-effort).
|
| 705 |
+
"""
|
| 706 |
+
game = get_game()
|
| 707 |
+
va = game.get_valid_actions(max_actions=80)
|
| 708 |
+
moves = []
|
| 709 |
+
for line in va.splitlines():
|
| 710 |
+
line = line.strip()
|
| 711 |
+
if line.startswith("- "):
|
| 712 |
+
act = line[2:].strip().lower()
|
| 713 |
+
if act in {"north","south","east","west","up","down","in","out","northeast","northwest","southeast","southwest"}:
|
| 714 |
+
moves.append(act)
|
| 715 |
+
return json.dumps({"location": game.current_location or "Unknown", "exits": moves}, ensure_ascii=False, indent=2)
|
| 716 |
+
|
| 717 |
+
|
| 718 |
+
@mcp.tool()
|
| 719 |
+
def graph() -> str:
|
| 720 |
+
"""
|
| 721 |
+
Return explored graph as JSON (nodes + edges).
|
| 722 |
+
"""
|
| 723 |
+
game = get_game()
|
| 724 |
+
nodes = sorted(list(game.locations))
|
| 725 |
+
edges = []
|
| 726 |
+
for frm, d in game.transitions.items():
|
| 727 |
+
for act, to in d.items():
|
| 728 |
+
edges.append({"from": frm, "action": act, "to": to})
|
| 729 |
+
payload = {"current": game.current_location or "Unknown", "nodes": nodes, "edges": edges}
|
| 730 |
+
return json.dumps(payload, ensure_ascii=False, indent=2)
|
| 731 |
+
|
| 732 |
+
|
| 733 |
+
@mcp.tool()
|
| 734 |
+
def checkpoint_save(name: str = "auto") -> str:
|
| 735 |
+
"""
|
| 736 |
+
Save an environment snapshot under 'name'.
|
| 737 |
+
"""
|
| 738 |
+
game = get_game()
|
| 739 |
+
snap = game._snapshot()
|
| 740 |
+
game.checkpoints[name] = snap
|
| 741 |
+
ok = snap is not None and snap[0] != "none"
|
| 742 |
+
return json.dumps({"ok": bool(ok), "name": name, "kind": snap[0] if snap else "none"}, ensure_ascii=False, indent=2)
|
| 743 |
+
|
| 744 |
+
|
| 745 |
+
@mcp.tool()
|
| 746 |
+
def checkpoint_restore(name: str = "auto") -> str:
|
| 747 |
+
"""
|
| 748 |
+
Restore a previously saved snapshot.
|
| 749 |
+
"""
|
| 750 |
+
game = get_game()
|
| 751 |
+
snap = game.checkpoints.get(name)
|
| 752 |
+
ok = game._restore_snapshot(snap)
|
| 753 |
+
# re-derive location after restore
|
| 754 |
+
if ok and game.state:
|
| 755 |
+
game.current_location = game._extract_location(getattr(game.state, "observation", "") or "") or game.current_location
|
| 756 |
+
if game.current_location:
|
| 757 |
+
game.locations.add(game.current_location)
|
| 758 |
+
return json.dumps({"ok": bool(ok), "name": name}, ensure_ascii=False, indent=2)
|
| 759 |
+
|
| 760 |
+
|
| 761 |
+
@mcp.tool()
|
| 762 |
+
def action_probe(action: str) -> str:
|
| 763 |
+
"""
|
| 764 |
+
Simulate an action: save -> step(action) -> capture -> restore.
|
| 765 |
+
Returns a JSON report without committing.
|
| 766 |
+
"""
|
| 767 |
+
game = get_game()
|
| 768 |
+
snap = game._snapshot()
|
| 769 |
+
tracking_backup = {
|
| 770 |
+
"history": list(game.history),
|
| 771 |
+
"locations": set(game.locations),
|
| 772 |
+
"current_location": game.current_location,
|
| 773 |
+
"transitions": deepcopy(game.transitions),
|
| 774 |
+
"actions_tried_by_location": deepcopy(game.actions_tried_by_location),
|
| 775 |
+
"_actions_tried_set": deepcopy(game._actions_tried_set),
|
| 776 |
+
"last_reward": game.last_reward,
|
| 777 |
+
}
|
| 778 |
+
before = game.get_state_struct()
|
| 779 |
+
|
| 780 |
+
obs = game.step(action)
|
| 781 |
+
after = game.get_state_struct()
|
| 782 |
+
|
| 783 |
+
# attempt restore
|
| 784 |
+
restored = game._restore_snapshot(snap)
|
| 785 |
+
if restored and game.state:
|
| 786 |
+
game.current_location = game._extract_location(getattr(game.state, "observation", "") or "") or game.current_location
|
| 787 |
+
|
| 788 |
+
# restore tracking too (avoid probe side-effects)
|
| 789 |
+
game.history = tracking_backup["history"]
|
| 790 |
+
game.locations = tracking_backup["locations"]
|
| 791 |
+
game.current_location = tracking_backup["current_location"]
|
| 792 |
+
game.transitions = tracking_backup["transitions"]
|
| 793 |
+
game.actions_tried_by_location = tracking_backup["actions_tried_by_location"]
|
| 794 |
+
game._actions_tried_set = tracking_backup["_actions_tried_set"]
|
| 795 |
+
game.last_reward = tracking_backup["last_reward"]
|
| 796 |
+
|
| 797 |
+
report = {
|
| 798 |
+
"action": (action or "").strip(),
|
| 799 |
+
"ok": True,
|
| 800 |
+
"restored": bool(restored),
|
| 801 |
+
"reward_delta": int(after.get("last_reward", 0) or 0),
|
| 802 |
+
"score_delta": int(after.get("score", 0) - before.get("score", 0)),
|
| 803 |
+
"moves_delta": int(after.get("moves", 0) - before.get("moves", 0)),
|
| 804 |
+
"done": bool(after.get("done", False)),
|
| 805 |
+
"new_location": after.get("location"),
|
| 806 |
+
"state_hash": after.get("state_hash"),
|
| 807 |
+
"observation_head": (obs or "").strip().splitlines()[0] if (obs or "").strip() else "",
|
| 808 |
+
"hash_changed": before.get("state_hash") != after.get("state_hash")
|
| 809 |
+
}
|
| 810 |
+
return json.dumps(report, ensure_ascii=False, indent=2)
|
| 811 |
+
|
| 812 |
+
|
| 813 |
+
# =============================================================================
|
| 814 |
+
# Run the server
|
| 815 |
+
# =============================================================================
|
| 816 |
+
|
| 817 |
+
if __name__ == "__main__":
|
| 818 |
+
# This runs the server with stdio transport (for MCP clients)
|
| 819 |
+
mcp.run()
|
requirements.txt
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# HF Spaces already has gradio and huggingface_hub pre-installed
|
| 2 |
+
# Do not add them here or you may get version conflicts
|
| 3 |
+
|
| 4 |
+
# Agent dependencies (these are provided by the evaluation infrastructure)
|
| 5 |
+
# Do not add jericho, fastmcp here - they are installed during evaluation
|
| 6 |
+
|
| 7 |
+
# Add any additional packages your agent needs below:
|
| 8 |
+
# numpy
|
| 9 |
+
# requests
|