Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- Dockerfile +46 -0
- README.md +202 -2
- __init__.py +23 -0
- client.py +99 -0
- comparison.md +168 -0
- models.py +82 -0
- openenv.yaml +11 -0
- openenv_visual_memory.egg-info/PKG-INFO +17 -0
- openenv_visual_memory.egg-info/SOURCES.txt +21 -0
- openenv_visual_memory.egg-info/dependency_links.txt +1 -0
- openenv_visual_memory.egg-info/entry_points.txt +2 -0
- openenv_visual_memory.egg-info/requires.txt +13 -0
- openenv_visual_memory.egg-info/top_level.txt +1 -0
- pyproject.toml +34 -0
- scenarios/ambiguous_cluster_10x10.json +19 -0
- scenarios/cascading_deduction_11x11.json +19 -0
- scenarios/decoy_minefield_8x10.json +20 -0
- scenarios/delayed_recall_keys_8x8.json +21 -0
- scenarios/directional_trap_8x8.json +19 -0
- scenarios/flash_fade_minefield_7x7.json +20 -0
- scenarios/fog_key_hunt_8x8.json +22 -0
- scenarios/fog_labyrinth_10x10.json +20 -0
- scenarios/partial_intel_9x9.json +18 -0
- scenarios/safe_zone_identification_9x9.json +19 -0
- server/__init__.py +0 -0
- server/app.py +47 -0
- server/engine.py +712 -0
- server/memory_environment.py +620 -0
- server/renderer.py +357 -0
- uv.lock +0 -0
Dockerfile
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Visual Memory Gym — Docker image for OpenEnv + Hugging Face Spaces
|
| 2 |
+
#
|
| 3 |
+
# Single-service Python container on port 8000.
|
| 4 |
+
# No database, no external APIs, no additional services.
|
| 5 |
+
#
|
| 6 |
+
# Build:
|
| 7 |
+
# cd visual-memory && docker build -f server/Dockerfile -t openenv-visual-memory .
|
| 8 |
+
#
|
| 9 |
+
# Run:
|
| 10 |
+
# docker run -d --name visual-memory -p 8000:8000 openenv-visual-memory
|
| 11 |
+
|
| 12 |
+
ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
|
| 13 |
+
FROM ${BASE_IMAGE} AS builder
|
| 14 |
+
|
| 15 |
+
WORKDIR /app
|
| 16 |
+
COPY . /app/env
|
| 17 |
+
WORKDIR /app/env
|
| 18 |
+
|
| 19 |
+
RUN if ! command -v uv >/dev/null 2>&1; then \
|
| 20 |
+
curl -LsSf https://astral.sh/uv/install.sh | sh && \
|
| 21 |
+
mv /root/.local/bin/uv /usr/local/bin/uv; \
|
| 22 |
+
fi
|
| 23 |
+
|
| 24 |
+
RUN --mount=type=cache,target=/root/.cache/uv \
|
| 25 |
+
if [ -f uv.lock ]; then uv sync --frozen --no-install-project --no-editable; \
|
| 26 |
+
else uv sync --no-install-project --no-editable; fi
|
| 27 |
+
|
| 28 |
+
RUN --mount=type=cache,target=/root/.cache/uv \
|
| 29 |
+
if [ -f uv.lock ]; then uv sync --frozen --no-editable; \
|
| 30 |
+
else uv sync --no-editable; fi
|
| 31 |
+
|
| 32 |
+
FROM ${BASE_IMAGE}
|
| 33 |
+
WORKDIR /app
|
| 34 |
+
COPY --from=builder /app/env/.venv /app/.venv
|
| 35 |
+
COPY --from=builder /app/env /app/env
|
| 36 |
+
|
| 37 |
+
ENV PATH="/app/.venv/bin:$PATH"
|
| 38 |
+
ENV PYTHONPATH="/app/env:$PYTHONPATH"
|
| 39 |
+
ENV ENABLE_WEB_INTERFACE=true
|
| 40 |
+
|
| 41 |
+
EXPOSE 8000
|
| 42 |
+
|
| 43 |
+
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
|
| 44 |
+
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"
|
| 45 |
+
|
| 46 |
+
CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port 8000"]
|
README.md
CHANGED
|
@@ -1,10 +1,210 @@
|
|
| 1 |
---
|
| 2 |
title: Visual Memory
|
| 3 |
-
emoji:
|
| 4 |
colorFrom: purple
|
| 5 |
colorTo: indigo
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
---
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
title: Visual Memory
|
| 3 |
+
emoji: 🧠
|
| 4 |
colorFrom: purple
|
| 5 |
colorTo: indigo
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
| 8 |
+
license: mit
|
| 9 |
+
tags:
|
| 10 |
+
- openenv
|
| 11 |
+
- rl-environment
|
| 12 |
+
base_path: /web
|
| 13 |
---
|
| 14 |
|
| 15 |
+
# Visual Memory Gym — *Phantom Grid*
|
| 16 |
+
|
| 17 |
+
**Hidden-state visual reasoning and planning under partial observability.**
|
| 18 |
+
|
| 19 |
+
An OpenEnv RL environment where agents must navigate grids with hidden hazards, memorize revealed patterns, and make optimal decisions with incomplete information. The name *Phantom Grid* reflects the core challenge: invisible dangers lurk beneath every cell, and the agent must deduce their locations from indirect signals — like hunting phantoms by their shadows. Designed to stress spatial reasoning, working memory, uncertainty handling, and risk-averse planning — areas where frontier LLMs consistently underperform.
|
| 20 |
+
|
| 21 |
+
## What Is This Gym?
|
| 22 |
+
|
| 23 |
+
The Visual Memory gym places an LLM agent on a grid board where most cells are initially hidden. The agent must use MCP tools to reveal cells one at a time, interpret the signals (clues about nearby hazards), flag hazard locations, and submit a solution — all within a limited step budget. Every reveal risks hitting a hazard (which can end the game), so the agent must balance information gathering with caution.
|
| 24 |
+
|
| 25 |
+
Unlike typical text-only reasoning benchmarks, this gym requires:
|
| 26 |
+
|
| 27 |
+
- **Spatial reasoning** — interpreting directional and range signals to triangulate hazard positions
|
| 28 |
+
- **Working memory** — recalling previously revealed information across many steps (some cells flash and then fade)
|
| 29 |
+
- **Risk assessment** — deciding when enough evidence exists to commit vs. when to gather more
|
| 30 |
+
- **Distractor resistance** — ignoring trap tools that look helpful but always fail or mislead
|
| 31 |
+
|
| 32 |
+
## Task Families (10 Scenarios)
|
| 33 |
+
|
| 34 |
+
The gym includes 10 hand-crafted scenarios across 4 task families:
|
| 35 |
+
|
| 36 |
+
### Hidden Grid (5 scenarios)
|
| 37 |
+
Deduce hazard locations from signal clues on partially revealed grids. Signal modes include numeric counts, directional arrows, ambiguous ranges, and partial directional hints.
|
| 38 |
+
|
| 39 |
+
| Scenario | Board | Hazards | Signal Mode | Difficulty |
|
| 40 |
+
|---|---|---|---|---|
|
| 41 |
+
| `ambiguous_cluster_10x10` | 10x10 | 18 | Range (min-max) | Hard |
|
| 42 |
+
| `directional_trap_8x8` | 8x8 | 14 | Directional (N/S/E/W) | Hard |
|
| 43 |
+
| `partial_intel_9x9` | 9x9 | 16 | Partial directional | Hard |
|
| 44 |
+
| `cascading_deduction_11x11` | 11x11 | 25 | Partial directional | Very Hard |
|
| 45 |
+
| `safe_zone_identification_9x9` | 9x9 | 22 | Range (min-max) | Hard |
|
| 46 |
+
|
| 47 |
+
### Pattern Memory (2 scenarios)
|
| 48 |
+
Some cells flash their content briefly then fade. The agent must memorize what was shown and use that memory to avoid hazards and collect keys.
|
| 49 |
+
|
| 50 |
+
| Scenario | Board | Special | Difficulty |
|
| 51 |
+
|---|---|---|---|
|
| 52 |
+
| `flash_fade_minefield_7x7` | 7x7 | Flash-then-fade cells | Hard |
|
| 53 |
+
| `delayed_recall_keys_8x8` | 8x8 | 5 keys to collect from faded memory | Hard |
|
| 54 |
+
|
| 55 |
+
### Fog of War (2 scenarios)
|
| 56 |
+
The agent has a limited viewport radius and must move it around the board to explore. Planning efficient exploration paths is critical.
|
| 57 |
+
|
| 58 |
+
| Scenario | Board | Viewport | Difficulty |
|
| 59 |
+
|---|---|---|---|
|
| 60 |
+
| `fog_labyrinth_10x10` | 10x10 | Radius 2 | Hard |
|
| 61 |
+
| `fog_key_hunt_8x8` | 8x8 | Radius 1 (tiny) | Very Hard |
|
| 62 |
+
|
| 63 |
+
### Distractor Search (1 scenario)
|
| 64 |
+
Decoys visually resemble keys. The agent must distinguish real targets from decoys while avoiding hazards.
|
| 65 |
+
|
| 66 |
+
| Scenario | Board | Keys | Decoys | Difficulty |
|
| 67 |
+
|---|---|---|---|---|
|
| 68 |
+
| `decoy_minefield_8x10` | 8x10 | 4 | 8 | Very Hard |
|
| 69 |
+
|
| 70 |
+
## Architecture
|
| 71 |
+
|
| 72 |
+
```
|
| 73 |
+
┌─────────────────────────────────────────┐
|
| 74 |
+
│ OpenEnv Server (:8000) │
|
| 75 |
+
│ ┌────────────┐ ┌───────────────────┐ │
|
| 76 |
+
│ │ FastMCP │──│ MemoryEnvironment │ │
|
| 77 |
+
│ │ (18 tools)│ │ (MCPEnvironment) │ │
|
| 78 |
+
│ └────────────┘ └────────┬──────────┘ │
|
| 79 |
+
│ │ │
|
| 80 |
+
│ ┌────────────┼──────────┐ │
|
| 81 |
+
│ │ Engine │ Renderer │ │
|
| 82 |
+
│ │ (hidden │ (SVG) │ │
|
| 83 |
+
│ │ state) │ │ │
|
| 84 |
+
│ └────────────┴──────────┘ │
|
| 85 |
+
└─────────────────────────────────────────┘
|
| 86 |
+
```
|
| 87 |
+
|
| 88 |
+
All state is in-memory per session. No database, no external APIs. The engine manages the hidden board, validates moves, and computes win/loss conditions. The renderer produces deterministic SVG board views.
|
| 89 |
+
|
| 90 |
+
## MCP Tools (18 total)
|
| 91 |
+
|
| 92 |
+
### Session Management (4 tools)
|
| 93 |
+
|
| 94 |
+
| Tool | Description |
|
| 95 |
+
|------|-------------|
|
| 96 |
+
| `get_session_info` | Get current session metadata (episode, step count) |
|
| 97 |
+
| `list_scenarios` | List all available scenarios with difficulty tags |
|
| 98 |
+
| `load_scenario` | Load and start a specific scenario by ID |
|
| 99 |
+
| `reset_scenario` | Restart the current scenario from scratch |
|
| 100 |
+
|
| 101 |
+
### Observation (4 tools)
|
| 102 |
+
|
| 103 |
+
| Tool | Description |
|
| 104 |
+
|------|-------------|
|
| 105 |
+
| `get_board_view` | Get the visible board as SVG with cell-count metadata (free) |
|
| 106 |
+
| `get_status` | Get game status: score, flags, cells revealed, win condition (free) |
|
| 107 |
+
| `reveal_cell` | Reveal one hidden cell at (row, col) — costs 1 step |
|
| 108 |
+
| `inspect_region` | Get state of cells in a radius without revealing — costs 1 step |
|
| 109 |
+
|
| 110 |
+
### Actions (4 tools)
|
| 111 |
+
|
| 112 |
+
| Tool | Description |
|
| 113 |
+
|------|-------------|
|
| 114 |
+
| `flag_cell` | Mark a hidden cell as hazardous — costs 1 step |
|
| 115 |
+
| `unflag_cell` | Remove a hazard flag from a cell — costs 1 step |
|
| 116 |
+
| `move_viewport` | Move fog-of-war viewport center — costs 1 step (fog scenarios only) |
|
| 117 |
+
| `submit_solution` | Submit final answer and end the game |
|
| 118 |
+
|
| 119 |
+
### Memory / History (3 tools)
|
| 120 |
+
|
| 121 |
+
| Tool | Description |
|
| 122 |
+
|------|-------------|
|
| 123 |
+
| `recall_log` | Return all discovered signals and memory events (free) |
|
| 124 |
+
| `get_action_history` | Return full action log with outcomes (free) |
|
| 125 |
+
| `get_progress_stats` | Return progress metrics without leaking ground truth (free) |
|
| 126 |
+
|
| 127 |
+
### Distractor Traps (3 tools)
|
| 128 |
+
|
| 129 |
+
These look useful but always return errors. Models must learn to avoid them.
|
| 130 |
+
|
| 131 |
+
| Tool | Description | Actual Behavior |
|
| 132 |
+
|------|-------------|-----------------|
|
| 133 |
+
| `auto_solve` | "Run the built-in solver" | Always fails — no solver exists |
|
| 134 |
+
| `peek_hidden_cell` | "View hidden cell without revealing" | Always fails — peeking disabled |
|
| 135 |
+
| `undo_last_action` | "Revert the most recent action" | Always fails — actions are irreversible |
|
| 136 |
+
|
| 137 |
+
## Reward System (3 Layers)
|
| 138 |
+
|
| 139 |
+
### Layer 1 — Environment Step Rewards (built into the gym)
|
| 140 |
+
Per-tool rewards computed inside `memory_environment.py`. Small signals for safe reveals (+0.05), hazard hits (-0.20), correct submissions (+0.50), and distractor use (-0.10).
|
| 141 |
+
|
| 142 |
+
### Layer 2 — Custom Episode Rewards (`rewards/visual_memory_checks.py`)
|
| 143 |
+
Weighted episode-level score computed from the full trajectory:
|
| 144 |
+
|
| 145 |
+
| Component | Weight | Description |
|
| 146 |
+
|---|---|---|
|
| 147 |
+
| Final Correctness | 0.35 | F1 score of submitted solution |
|
| 148 |
+
| Safety Score | 0.20 | Fraction of reveals that avoided hazards |
|
| 149 |
+
| Evidence Support | 0.15 | Used recall/inspect before committing |
|
| 150 |
+
| Irreversible Penalty | -0.15 | Deducted for hazard hits |
|
| 151 |
+
| Efficiency | 0.10 | Steps used relative to budget |
|
| 152 |
+
| Unnecessary Guessing | -0.05 | Deducted for trap tool use or repeated reveals |
|
| 153 |
+
|
| 154 |
+
### Layer 3 — OpenEnv Transform Rewards (`rewards/transforms/visual_memory.py`)
|
| 155 |
+
Per-step rewards for RL training with sharper signal differentiation. Safe reveals (+0.15), hazard hits (-0.40), correct flags (+0.20), distractor use (-0.25), correct submission (+1.0).
|
| 156 |
+
|
| 157 |
+
## Running
|
| 158 |
+
|
| 159 |
+
```bash
|
| 160 |
+
# Install for AutoEnv discovery
|
| 161 |
+
pip install -e visual-memory/
|
| 162 |
+
|
| 163 |
+
# Build Docker image
|
| 164 |
+
cd visual-memory && docker build -t openenv-visual-memory -f server/Dockerfile .
|
| 165 |
+
|
| 166 |
+
# Run container
|
| 167 |
+
docker run -d --name visual-memory -p 8000:8000 openenv-visual-memory
|
| 168 |
+
|
| 169 |
+
# Verify
|
| 170 |
+
curl http://localhost:8000/health
|
| 171 |
+
curl http://localhost:8000/metadata
|
| 172 |
+
|
| 173 |
+
# Evaluate (single model)
|
| 174 |
+
python run_eval.py --gym visual_memory --model gpt-5.4 --save --trajectory
|
| 175 |
+
|
| 176 |
+
# Evaluate (parallel, both reward modes)
|
| 177 |
+
python run_eval.py --gym visual_memory \
|
| 178 |
+
--model gpt-5.4,claude-sonnet-4-6,claude-opus-4-6 \
|
| 179 |
+
--parallel 3 --reward-mode custom --save --trajectory
|
| 180 |
+
|
| 181 |
+
python run_eval.py --gym visual_memory \
|
| 182 |
+
--model gpt-5.4,claude-sonnet-4-6,claude-opus-4-6 \
|
| 183 |
+
--parallel 3 --reward-mode openenv --save --trajectory
|
| 184 |
+
|
| 185 |
+
# Stop
|
| 186 |
+
docker stop visual-memory && docker rm visual-memory
|
| 187 |
+
```
|
| 188 |
+
|
| 189 |
+
## Configuration (.env)
|
| 190 |
+
|
| 191 |
+
| Variable | Default | Description |
|
| 192 |
+
|----------|---------|-------------|
|
| 193 |
+
| `OPENENV_PORT` | `8000` | OpenEnv server port (exposed) |
|
| 194 |
+
| `MAX_CONCURRENT_ENVS` | `4` | Max parallel evaluation sessions |
|
| 195 |
+
| `ENABLE_WEB_INTERFACE` | `true` | Enable HF Spaces web UI |
|
| 196 |
+
| `RENDER_MODE` | `svg` | Board rendering format |
|
| 197 |
+
| `MAX_BOARD_SIZE` | `12` | Maximum supported board dimension |
|
| 198 |
+
|
| 199 |
+
## Concurrent Sessions
|
| 200 |
+
|
| 201 |
+
Each evaluation session gets its own isolated `GameEngine` instance. Multiple agents can evaluate simultaneously against the same Docker container without interference.
|
| 202 |
+
|
| 203 |
+
## Results
|
| 204 |
+
|
| 205 |
+
See `comparison.md` for the full 5-model x 2-reward-mode comparison. SOTA average is well below the 0.6-0.7 target band, confirming the gym's difficulty.
|
| 206 |
+
|
| 207 |
+
| Reward Mode | SOTA Average | All Models Average |
|
| 208 |
+
|---|:---:|:---:|
|
| 209 |
+
| Custom | -0.14 | -0.14 |
|
| 210 |
+
| OpenEnv | 0.28 | 0.28 |
|
__init__.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Visual Memory environment integration for OpenEnv."""
|
| 2 |
+
|
| 3 |
+
from .client import VisualMemoryEnv
|
| 4 |
+
from .models import (
|
| 5 |
+
VisualMemoryAction,
|
| 6 |
+
VisualMemoryObservation,
|
| 7 |
+
VisualMemoryState,
|
| 8 |
+
CallToolAction,
|
| 9 |
+
CallToolObservation,
|
| 10 |
+
ListToolsAction,
|
| 11 |
+
ListToolsObservation,
|
| 12 |
+
)
|
| 13 |
+
|
| 14 |
+
__all__ = [
|
| 15 |
+
"VisualMemoryEnv",
|
| 16 |
+
"VisualMemoryAction",
|
| 17 |
+
"VisualMemoryObservation",
|
| 18 |
+
"VisualMemoryState",
|
| 19 |
+
"CallToolAction",
|
| 20 |
+
"CallToolObservation",
|
| 21 |
+
"ListToolsAction",
|
| 22 |
+
"ListToolsObservation",
|
| 23 |
+
]
|
client.py
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Visual Memory Environment HTTP Client.
|
| 3 |
+
|
| 4 |
+
Connects to a running Visual Memory OpenEnv server over HTTP/WebSocket.
|
| 5 |
+
Agents interact via MCP tools exposed through step(CallToolAction(...)).
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
from typing import Any, Dict
|
| 11 |
+
|
| 12 |
+
from openenv.core.client_types import StepResult
|
| 13 |
+
from openenv.core.env_client import EnvClient
|
| 14 |
+
from openenv.core.env_server.mcp_types import (
|
| 15 |
+
CallToolAction,
|
| 16 |
+
ListToolsAction,
|
| 17 |
+
Tool,
|
| 18 |
+
)
|
| 19 |
+
|
| 20 |
+
from .models import (
|
| 21 |
+
VisualMemoryAction,
|
| 22 |
+
VisualMemoryObservation,
|
| 23 |
+
VisualMemoryState,
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
class VisualMemoryEnv(EnvClient[VisualMemoryAction, VisualMemoryObservation, VisualMemoryState]):
|
| 28 |
+
"""HTTP client for the Visual Memory Environment.
|
| 29 |
+
|
| 30 |
+
Example:
|
| 31 |
+
>>> async with VisualMemoryEnv(base_url="http://localhost:8000") as client:
|
| 32 |
+
... result = await client.reset()
|
| 33 |
+
... result = await client.step(
|
| 34 |
+
... CallToolAction(tool_name="load_scenario", arguments={"scenario_id": "hidden_grid_01"})
|
| 35 |
+
... )
|
| 36 |
+
"""
|
| 37 |
+
|
| 38 |
+
def list_tools(self, use_cache: bool = True):
|
| 39 |
+
if use_cache and hasattr(self, "_tools_cache") and self._tools_cache:
|
| 40 |
+
return self._tools_cache
|
| 41 |
+
import requests
|
| 42 |
+
|
| 43 |
+
http_base = (
|
| 44 |
+
self._ws_url
|
| 45 |
+
.replace("ws://", "http://")
|
| 46 |
+
.replace("wss://", "https://")
|
| 47 |
+
.rstrip("/ws")
|
| 48 |
+
)
|
| 49 |
+
resp = requests.post(
|
| 50 |
+
f"{http_base}/step",
|
| 51 |
+
json={"action": {"type": "list_tools"}},
|
| 52 |
+
)
|
| 53 |
+
data = resp.json()
|
| 54 |
+
raw_tools = data.get("observation", {}).get("tools", [])
|
| 55 |
+
tools = [
|
| 56 |
+
Tool(
|
| 57 |
+
name=t["name"],
|
| 58 |
+
description=t.get("description", ""),
|
| 59 |
+
input_schema=t.get("input_schema", {}),
|
| 60 |
+
)
|
| 61 |
+
for t in raw_tools
|
| 62 |
+
]
|
| 63 |
+
self._tools_cache = tools
|
| 64 |
+
return tools
|
| 65 |
+
|
| 66 |
+
def _step_payload(self, action: Any) -> Dict:
|
| 67 |
+
if isinstance(action, ListToolsAction):
|
| 68 |
+
return {"type": "list_tools"}
|
| 69 |
+
if isinstance(action, CallToolAction):
|
| 70 |
+
return {
|
| 71 |
+
"type": "call_tool",
|
| 72 |
+
"tool_name": action.tool_name,
|
| 73 |
+
"arguments": action.arguments or {},
|
| 74 |
+
}
|
| 75 |
+
if hasattr(action, "model_dump"):
|
| 76 |
+
return action.model_dump()
|
| 77 |
+
return {"tool_name": getattr(action, "tool_name", ""), "arguments": {}}
|
| 78 |
+
|
| 79 |
+
def _parse_result(self, payload: Dict) -> StepResult[VisualMemoryObservation]:
|
| 80 |
+
obs_data = payload.get("observation", payload)
|
| 81 |
+
observation = VisualMemoryObservation(
|
| 82 |
+
tool_name=obs_data.get("tool_name", ""),
|
| 83 |
+
result=obs_data.get("result"),
|
| 84 |
+
error=obs_data.get("error"),
|
| 85 |
+
done=payload.get("done", False),
|
| 86 |
+
reward=payload.get("reward"),
|
| 87 |
+
metadata=obs_data.get("metadata", {}),
|
| 88 |
+
)
|
| 89 |
+
return StepResult(
|
| 90 |
+
observation=observation,
|
| 91 |
+
reward=payload.get("reward"),
|
| 92 |
+
done=payload.get("done", False),
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
def _parse_state(self, payload: Dict[str, Any]) -> VisualMemoryState:
|
| 96 |
+
return VisualMemoryState(
|
| 97 |
+
episode_id=payload.get("episode_id"),
|
| 98 |
+
step_count=payload.get("step_count", 0),
|
| 99 |
+
)
|
comparison.md
ADDED
|
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Visual Memory Gym — Model Comparison
|
| 2 |
+
|
| 3 |
+
**Date**: 2026-03-18
|
| 4 |
+
**Gym Version**: `0.1.0`
|
| 5 |
+
**Scenarios**: 10 (across 4 task families)
|
| 6 |
+
**Models**: 5 (3 Anthropic, 2 OpenAI)
|
| 7 |
+
**Reward Modes**: custom (episode-level) and openenv (per-step transforms)
|
| 8 |
+
|
| 9 |
+
---
|
| 10 |
+
|
| 11 |
+
## Overall Results
|
| 12 |
+
|
| 13 |
+
### Custom Rewards (episode-level from `rewards/base.py`)
|
| 14 |
+
|
| 15 |
+
Reward components: Structural (0.25) + Ground Truth (0.60) + Efficiency (0.15) - Hallucination Penalty (up to -1.0)
|
| 16 |
+
|
| 17 |
+
| # | Model | Avg Reward | Best Scenario | Worst Scenario | Total Time |
|
| 18 |
+
|---|-------|:---:|---|---|---:|
|
| 19 |
+
| 1 | `claude-opus-4-6` | **0.08** | flash_fade_minefield (0.63) | cascading_deduction (-0.83) | 1518.9s |
|
| 20 |
+
| 2 | `gpt-5` | **-0.13** | flash_fade_minefield (0.69) | decoy_minefield (-0.78) | 2967.2s |
|
| 21 |
+
| 3 | `gpt-5.4` | **-0.16** | partial_intel (0.63) | directional_trap (-0.81) | 225.1s |
|
| 22 |
+
| 4 | `claude-opus-4-20250514` | **-0.17** | ambiguous_cluster (0.60) | cascading_deduction (-0.80) | 1197.4s |
|
| 23 |
+
| 5 | `claude-sonnet-4-6` | **-0.33** | partial_intel (0.40) | directional_trap (-0.83) | 1105.2s |
|
| 24 |
+
|
| 25 |
+
### OpenEnv Transform Rewards (per-step from `rewards/transforms/`)
|
| 26 |
+
|
| 27 |
+
Reward components: Step Rewards (0.40) + Ground Truth (0.60) - Hallucination Penalty (up to -1.0)
|
| 28 |
+
|
| 29 |
+
| # | Model | Avg Reward | Best Scenario | Worst Scenario | Total Time |
|
| 30 |
+
|---|-------|:---:|---|---|---:|
|
| 31 |
+
| 1 | `claude-opus-4-6` | **0.31** | directional_trap (0.37) | decoy_minefield (0.14) | 1584.6s |
|
| 32 |
+
| 2 | `claude-opus-4-20250514` | **0.31** | ambiguous_cluster (0.55) | delayed_recall_keys (0.13) | 1185.2s |
|
| 33 |
+
| 3 | `gpt-5` | **0.28** | flash_fade_minefield (0.53) | ambiguous_cluster (0.12) | 3770.8s |
|
| 34 |
+
| 4 | `gpt-5.4` | **0.27** | fog_labyrinth (0.52) | directional_trap (0.08) | 287.4s |
|
| 35 |
+
| 5 | `claude-sonnet-4-6` | **0.26** | ambiguous_cluster (0.35) | directional_trap (0.14) | 1048.0s |
|
| 36 |
+
|
| 37 |
+
---
|
| 38 |
+
|
| 39 |
+
## Per-Scenario Breakdown — Custom Rewards
|
| 40 |
+
|
| 41 |
+
| Scenario | gpt-5.4 | gpt-5 | claude-sonnet-4-6 | claude-opus-4-6 | claude-opus-4-20250514 | Avg |
|
| 42 |
+
|---|:---:|:---:|:---:|:---:|:---:|:---:|
|
| 43 |
+
| ambiguous_cluster_10x10 | 0.40 | -0.75 | 0.38 | 0.36 | 0.60 | **0.20** |
|
| 44 |
+
| directional_trap_8x8 | -0.81 | 0.41 | -0.83 | 0.61 | 0.42 | **-0.04** |
|
| 45 |
+
| partial_intel_9x9 | 0.63 | 0.42 | 0.40 | 0.41 | 0.44 | **0.46** |
|
| 46 |
+
| flash_fade_minefield_7x7 | 0.42 | 0.69 | -0.78 | 0.63 | -0.76 | **0.04** |
|
| 47 |
+
| delayed_recall_keys_8x8 | 0.45 | 0.53 | -0.80 | 0.43 | -0.79 | **-0.04** |
|
| 48 |
+
| decoy_minefield_8x10 | -0.80 | -0.78 | -0.81 | -0.82 | -0.79 | **-0.80** |
|
| 49 |
+
| fog_labyrinth_10x10 | -0.74 | 0.44 | 0.39 | 0.40 | 0.40 | **0.18** |
|
| 50 |
+
| fog_key_hunt_8x8 | -0.73 | -0.77 | -0.79 | -0.80 | -0.79 | **-0.78** |
|
| 51 |
+
| cascading_deduction_11x11 | -0.78 | -0.76 | -0.79 | -0.83 | -0.80 | **-0.79** |
|
| 52 |
+
| safe_zone_identification_9x9 | 0.40 | -0.77 | 0.37 | 0.38 | 0.41 | **0.16** |
|
| 53 |
+
|
| 54 |
+
### Hardest Scenarios (Custom)
|
| 55 |
+
1. **decoy_minefield_8x10** (avg -0.80): All 5 models fail — hallucination penalty triggered universally
|
| 56 |
+
2. **cascading_deduction_11x11** (avg -0.79): Large board with partial signals defeats all models
|
| 57 |
+
3. **fog_key_hunt_8x8** (avg -0.78): Tiny viewport + fatal hazards — no model survives
|
| 58 |
+
|
| 59 |
+
### Easiest Scenario (Custom)
|
| 60 |
+
1. **partial_intel_9x9** (avg 0.46): Most models achieve positive rewards here
|
| 61 |
+
|
| 62 |
+
---
|
| 63 |
+
|
| 64 |
+
## Per-Scenario Breakdown — OpenEnv Transform Rewards
|
| 65 |
+
|
| 66 |
+
| Scenario | gpt-5.4 | gpt-5 | claude-sonnet-4-6 | claude-opus-4-6 | claude-opus-4-20250514 | Avg |
|
| 67 |
+
|---|:---:|:---:|:---:|:---:|:---:|:---:|
|
| 68 |
+
| ambiguous_cluster_10x10 | 0.33 | 0.12 | 0.35 | 0.35 | 0.55 | **0.34** |
|
| 69 |
+
| directional_trap_8x8 | 0.08 | 0.36 | 0.14 | 0.37 | 0.36 | **0.26** |
|
| 70 |
+
| partial_intel_9x9 | 0.32 | 0.34 | 0.34 | 0.36 | 0.36 | **0.34** |
|
| 71 |
+
| flash_fade_minefield_7x7 | 0.35 | 0.53 | 0.34 | 0.34 | 0.35 | **0.38** |
|
| 72 |
+
| delayed_recall_keys_8x8 | 0.34 | 0.34 | 0.34 | 0.36 | 0.13 | **0.30** |
|
| 73 |
+
| decoy_minefield_8x10 | 0.14 | 0.14 | 0.14 | 0.14 | 0.34 | **0.18** |
|
| 74 |
+
| fog_labyrinth_10x10 | 0.52 | 0.34 | 0.34 | 0.35 | 0.35 | **0.38** |
|
| 75 |
+
| fog_key_hunt_8x8 | 0.13 | 0.13 | 0.15 | 0.15 | 0.15 | **0.14** |
|
| 76 |
+
| cascading_deduction_11x11 | 0.13 | 0.14 | 0.15 | 0.35 | 0.14 | **0.18** |
|
| 77 |
+
| safe_zone_identification_9x9 | 0.34 | 0.34 | 0.34 | 0.34 | 0.35 | **0.34** |
|
| 78 |
+
|
| 79 |
+
### Hardest Scenarios (OpenEnv)
|
| 80 |
+
1. **fog_key_hunt_8x8** (avg 0.14): Tiny viewport + fatal hazards — universally low
|
| 81 |
+
2. **decoy_minefield_8x10** (avg 0.18): Decoy-key confusion trips all models
|
| 82 |
+
3. **cascading_deduction_11x11** (avg 0.18): Large partial-signal board overwhelms reasoning
|
| 83 |
+
|
| 84 |
+
### Easiest Scenarios (OpenEnv)
|
| 85 |
+
1. **flash_fade_minefield_7x7** (avg 0.38): Pattern memory — some models excel here
|
| 86 |
+
2. **fog_labyrinth_10x10** (avg 0.38): Fog navigation with reasonable viewport
|
| 87 |
+
|
| 88 |
+
---
|
| 89 |
+
|
| 90 |
+
## SOTA Average Assessment
|
| 91 |
+
|
| 92 |
+
**SOTA models** (gpt-5.4, claude-sonnet-4-6, claude-opus-4-6):
|
| 93 |
+
|
| 94 |
+
| Reward Mode | SOTA Average | Target Band | Status |
|
| 95 |
+
|---|:---:|:---:|---|
|
| 96 |
+
| Custom | **-0.14** | 0.60–0.70 | Well below target — no hardening needed |
|
| 97 |
+
| OpenEnv | **0.28** | 0.60–0.70 | Below target — no hardening needed |
|
| 98 |
+
|
| 99 |
+
**All 5 models average:**
|
| 100 |
+
|
| 101 |
+
| Reward Mode | Overall Average |
|
| 102 |
+
|---|:---:|
|
| 103 |
+
| Custom | **-0.14** |
|
| 104 |
+
| OpenEnv | **0.28** |
|
| 105 |
+
|
| 106 |
+
The gym is currently **harder than target** across both reward modes. No hardening adjustments are required.
|
| 107 |
+
|
| 108 |
+
---
|
| 109 |
+
|
| 110 |
+
## Reward Mode Comparison
|
| 111 |
+
|
| 112 |
+
| Metric | Custom | OpenEnv |
|
| 113 |
+
|---|:---:|:---:|
|
| 114 |
+
| Mean across all models | -0.14 | 0.28 |
|
| 115 |
+
| Std deviation (models) | 0.15 | 0.02 |
|
| 116 |
+
| Min model avg | -0.33 | 0.26 |
|
| 117 |
+
| Max model avg | 0.08 | 0.31 |
|
| 118 |
+
| Hallucination penalties hit | Frequent (-1.0) | None triggered |
|
| 119 |
+
| Reward spread | Very high (variance from penalties) | Compressed (narrow 0.12–0.55 range) |
|
| 120 |
+
|
| 121 |
+
**Key insight**: Custom rewards produce highly volatile scores driven by the -1.0 hallucination penalty. When models make even one incorrect assertion (tools report success but ground truth disagrees), the entire scenario score collapses. OpenEnv transform rewards are more granular and forgiving, rewarding incremental progress per-step.
|
| 122 |
+
|
| 123 |
+
---
|
| 124 |
+
|
| 125 |
+
## Model Speed Rankings
|
| 126 |
+
|
| 127 |
+
| Model | Custom Time | OpenEnv Time | Avg per Scenario |
|
| 128 |
+
|---|:---:|:---:|:---:|
|
| 129 |
+
| `gpt-5.4` | 225.1s | 287.4s | ~26s |
|
| 130 |
+
| `claude-sonnet-4-6` | 1105.2s | 1048.0s | ~108s |
|
| 131 |
+
| `claude-opus-4-20250514` | 1197.4s | 1185.2s | ~119s |
|
| 132 |
+
| `claude-opus-4-6` | 1518.9s | 1584.6s | ~155s |
|
| 133 |
+
| `gpt-5` | 2967.2s | 3770.8s | ~337s |
|
| 134 |
+
|
| 135 |
+
GPT-5.4 is 6x faster than the next model while achieving competitive results.
|
| 136 |
+
|
| 137 |
+
---
|
| 138 |
+
|
| 139 |
+
## Distractor Tool Usage
|
| 140 |
+
|
| 141 |
+
Models occasionally used distractor/trap tools, which indicates susceptibility to misleading tool descriptions:
|
| 142 |
+
|
| 143 |
+
- **`peek_hidden_cell`**: Used by claude-opus-4-6 and claude-sonnet-4-6 (cheating tool — gives hidden info but penalized)
|
| 144 |
+
- **`undo_last_action`**: Used by claude-sonnet-4-6 (no-op trap)
|
| 145 |
+
- **`reset_scenario`**: Used by multiple models (resets game state — wastes steps)
|
| 146 |
+
- **`auto_solve`**: Not used by any model (good — most egregious trap avoided)
|
| 147 |
+
|
| 148 |
+
---
|
| 149 |
+
|
| 150 |
+
## Task Family Analysis
|
| 151 |
+
|
| 152 |
+
| Task Family | Scenarios | Custom Avg | OpenEnv Avg | Difficulty |
|
| 153 |
+
|---|---|:---:|:---:|---|
|
| 154 |
+
| Hidden Grid (5) | ambiguous, directional, partial, cascading, safe_zone | -0.06 | 0.29 | Medium-Hard |
|
| 155 |
+
| Pattern Memory (2) | flash_fade, delayed_recall | 0.00 | 0.34 | Medium |
|
| 156 |
+
| Fog of War (2) | fog_labyrinth, fog_key_hunt | -0.30 | 0.26 | Hard |
|
| 157 |
+
| Distractor Search (1) | decoy_minefield | -0.80 | 0.18 | Very Hard |
|
| 158 |
+
|
| 159 |
+
---
|
| 160 |
+
|
| 161 |
+
## Files
|
| 162 |
+
|
| 163 |
+
| Type | Path |
|
| 164 |
+
|---|---|
|
| 165 |
+
| Custom results (all 5 models) | `results/visual_memory/run_visual_memory_custom.md` |
|
| 166 |
+
| OpenEnv results (all 5 models) | `results/visual_memory/run_visual_memory_openenv.md` |
|
| 167 |
+
| Custom trajectories (all 5 models) | `trajectories/visual_memory/run_visual_memory_custom/` |
|
| 168 |
+
| OpenEnv trajectories (all 5 models) | `trajectories/visual_memory/run_visual_memory_openenv/` |
|
models.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Data models for the Visual Memory Environment.
|
| 3 |
+
|
| 4 |
+
VisualMemoryAction has explicit Pydantic fields so the OpenEnv web
|
| 5 |
+
interface renders interactive form inputs on HF Spaces.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from __future__ import annotations
|
| 9 |
+
|
| 10 |
+
import json as _json
|
| 11 |
+
from typing import Any, Union
|
| 12 |
+
|
| 13 |
+
from pydantic import ConfigDict, Field, TypeAdapter
|
| 14 |
+
|
| 15 |
+
from openenv.core.env_server.mcp_types import (
|
| 16 |
+
CallToolAction,
|
| 17 |
+
CallToolObservation,
|
| 18 |
+
ListToolsAction,
|
| 19 |
+
ListToolsObservation,
|
| 20 |
+
)
|
| 21 |
+
from openenv.core.env_server.types import Action, Observation, State
|
| 22 |
+
|
| 23 |
+
_mcp_action_adapter = TypeAdapter(Union[ListToolsAction, CallToolAction])
|
| 24 |
+
|
| 25 |
+
_AVAILABLE_TOOLS = (
|
| 26 |
+
"list_tools, get_session_info, list_scenarios, load_scenario, "
|
| 27 |
+
"reset_scenario, get_board_view, get_status, reveal_cell, "
|
| 28 |
+
"inspect_region, flag_cell, unflag_cell, move_viewport, "
|
| 29 |
+
"submit_solution, recall_log, get_action_history, get_progress_stats, "
|
| 30 |
+
"auto_solve, peek_hidden_cell, undo_last_action"
|
| 31 |
+
)
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
class VisualMemoryAction(Action):
|
| 35 |
+
"""Action with explicit fields for the web UI and MCP compatibility."""
|
| 36 |
+
|
| 37 |
+
model_config = ConfigDict(
|
| 38 |
+
extra="forbid",
|
| 39 |
+
validate_assignment=True,
|
| 40 |
+
arbitrary_types_allowed=True,
|
| 41 |
+
)
|
| 42 |
+
|
| 43 |
+
tool_name: str = Field(
|
| 44 |
+
default="list_tools",
|
| 45 |
+
description=f"MCP tool to invoke. Available: {_AVAILABLE_TOOLS}",
|
| 46 |
+
)
|
| 47 |
+
arguments_json: str = Field(
|
| 48 |
+
default="{}",
|
| 49 |
+
description=(
|
| 50 |
+
'Tool arguments as a JSON string. Examples: '
|
| 51 |
+
'"{}" for no args, '
|
| 52 |
+
'\'{"scenario_id":"hidden_grid_01"}\' for load_scenario, '
|
| 53 |
+
'\'{"row":2,"col":3}\' for reveal_cell or flag_cell, '
|
| 54 |
+
'\'{"flagged_positions":"[[0,1],[2,3]]"}\' for submit_solution'
|
| 55 |
+
),
|
| 56 |
+
)
|
| 57 |
+
|
| 58 |
+
@classmethod
|
| 59 |
+
def model_validate(cls, data: Any, **kwargs: Any) -> Action:
|
| 60 |
+
if isinstance(data, dict) and data.get("type") in ("call_tool", "list_tools"):
|
| 61 |
+
return _mcp_action_adapter.validate_python(data)
|
| 62 |
+
return super().model_validate(data, **kwargs)
|
| 63 |
+
|
| 64 |
+
def to_mcp_action(self) -> Action:
|
| 65 |
+
if self.tool_name == "list_tools":
|
| 66 |
+
return ListToolsAction()
|
| 67 |
+
args = _json.loads(self.arguments_json) if self.arguments_json else {}
|
| 68 |
+
return CallToolAction(tool_name=self.tool_name, arguments=args)
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
VisualMemoryObservation = CallToolObservation
|
| 72 |
+
VisualMemoryState = State
|
| 73 |
+
|
| 74 |
+
__all__ = [
|
| 75 |
+
"VisualMemoryAction",
|
| 76 |
+
"VisualMemoryObservation",
|
| 77 |
+
"VisualMemoryState",
|
| 78 |
+
"CallToolAction",
|
| 79 |
+
"CallToolObservation",
|
| 80 |
+
"ListToolsAction",
|
| 81 |
+
"ListToolsObservation",
|
| 82 |
+
]
|
openenv.yaml
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# OpenEnv Environment Manifest
|
| 2 |
+
# Tells OpenEnv how to find and run this environment.
|
| 3 |
+
# AutoEnv discovery reads this file to auto-connect clients.
|
| 4 |
+
|
| 5 |
+
spec_version: 1
|
| 6 |
+
name: visual_memory
|
| 7 |
+
description: "Visual Memory (Phantom Grid) — hidden-state visual reasoning and planning under partial observability"
|
| 8 |
+
type: space
|
| 9 |
+
runtime: fastapi
|
| 10 |
+
app: server.app:app
|
| 11 |
+
port: 8000
|
openenv_visual_memory.egg-info/PKG-INFO
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Metadata-Version: 2.4
|
| 2 |
+
Name: openenv-visual-memory
|
| 3 |
+
Version: 0.1.0
|
| 4 |
+
Summary: Visual Memory environment for OpenEnv — hidden-state visual reasoning and planning under partial observability
|
| 5 |
+
Requires-Python: >=3.10
|
| 6 |
+
Requires-Dist: openenv-core @ git+https://github.com/meta-pytorch/OpenEnv.git@v0.2.1
|
| 7 |
+
Requires-Dist: fastmcp>=0.2.0
|
| 8 |
+
Requires-Dist: fastapi>=0.115.0
|
| 9 |
+
Requires-Dist: uvicorn>=0.24.0
|
| 10 |
+
Requires-Dist: pydantic>=2.5.0
|
| 11 |
+
Requires-Dist: httpx>=0.25.0
|
| 12 |
+
Requires-Dist: numpy>=1.24.0
|
| 13 |
+
Requires-Dist: svgwrite>=1.4.0
|
| 14 |
+
Requires-Dist: python-dotenv>=1.0.0
|
| 15 |
+
Provides-Extra: dev
|
| 16 |
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
| 17 |
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
openenv_visual_memory.egg-info/SOURCES.txt
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
README.md
|
| 2 |
+
__init__.py
|
| 3 |
+
client.py
|
| 4 |
+
models.py
|
| 5 |
+
openenv.yaml
|
| 6 |
+
pyproject.toml
|
| 7 |
+
./__init__.py
|
| 8 |
+
./client.py
|
| 9 |
+
./models.py
|
| 10 |
+
./openenv.yaml
|
| 11 |
+
openenv_visual_memory.egg-info/PKG-INFO
|
| 12 |
+
openenv_visual_memory.egg-info/SOURCES.txt
|
| 13 |
+
openenv_visual_memory.egg-info/dependency_links.txt
|
| 14 |
+
openenv_visual_memory.egg-info/entry_points.txt
|
| 15 |
+
openenv_visual_memory.egg-info/requires.txt
|
| 16 |
+
openenv_visual_memory.egg-info/top_level.txt
|
| 17 |
+
server/__init__.py
|
| 18 |
+
server/app.py
|
| 19 |
+
server/engine.py
|
| 20 |
+
server/memory_environment.py
|
| 21 |
+
server/renderer.py
|
openenv_visual_memory.egg-info/dependency_links.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
openenv_visual_memory.egg-info/entry_points.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[console_scripts]
|
| 2 |
+
server = visual_memory.server.app:main
|
openenv_visual_memory.egg-info/requires.txt
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
openenv-core @ git+https://github.com/meta-pytorch/OpenEnv.git@v0.2.1
|
| 2 |
+
fastmcp>=0.2.0
|
| 3 |
+
fastapi>=0.115.0
|
| 4 |
+
uvicorn>=0.24.0
|
| 5 |
+
pydantic>=2.5.0
|
| 6 |
+
httpx>=0.25.0
|
| 7 |
+
numpy>=1.24.0
|
| 8 |
+
svgwrite>=1.4.0
|
| 9 |
+
python-dotenv>=1.0.0
|
| 10 |
+
|
| 11 |
+
[dev]
|
| 12 |
+
pytest>=8.0.0
|
| 13 |
+
pytest-cov>=4.0.0
|
openenv_visual_memory.egg-info/top_level.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
visual_memory
|
pyproject.toml
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[build-system]
|
| 2 |
+
requires = ["setuptools>=45", "wheel"]
|
| 3 |
+
build-backend = "setuptools.build_meta"
|
| 4 |
+
|
| 5 |
+
[project]
|
| 6 |
+
name = "openenv-visual-memory"
|
| 7 |
+
version = "0.1.0"
|
| 8 |
+
description = "Visual Memory environment for OpenEnv — hidden-state visual reasoning and planning under partial observability"
|
| 9 |
+
requires-python = ">=3.10"
|
| 10 |
+
dependencies = [
|
| 11 |
+
"openenv-core @ git+https://github.com/meta-pytorch/OpenEnv.git@v0.2.1",
|
| 12 |
+
"fastmcp>=0.2.0",
|
| 13 |
+
"fastapi>=0.115.0",
|
| 14 |
+
"uvicorn>=0.24.0",
|
| 15 |
+
"pydantic>=2.5.0",
|
| 16 |
+
"httpx>=0.25.0",
|
| 17 |
+
"numpy>=1.24.0",
|
| 18 |
+
"svgwrite>=1.4.0",
|
| 19 |
+
"python-dotenv>=1.0.0",
|
| 20 |
+
]
|
| 21 |
+
|
| 22 |
+
[project.optional-dependencies]
|
| 23 |
+
dev = ["pytest>=8.0.0", "pytest-cov>=4.0.0"]
|
| 24 |
+
|
| 25 |
+
[project.scripts]
|
| 26 |
+
server = "visual_memory.server.app:main"
|
| 27 |
+
|
| 28 |
+
[tool.setuptools]
|
| 29 |
+
include-package-data = true
|
| 30 |
+
packages = ["visual_memory", "visual_memory.server"]
|
| 31 |
+
package-dir = {"visual_memory" = ".", "visual_memory.server" = "server"}
|
| 32 |
+
|
| 33 |
+
[tool.setuptools.package-data]
|
| 34 |
+
visual_memory = ["openenv.yaml"]
|
scenarios/ambiguous_cluster_10x10.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"scenario_id": "ambiguous_cluster_10x10",
|
| 3 |
+
"type": "hidden_grid",
|
| 4 |
+
"seed": 9173,
|
| 5 |
+
"board_width": 10,
|
| 6 |
+
"board_height": 10,
|
| 7 |
+
"hazard_count": 18,
|
| 8 |
+
"decoy_count": 6,
|
| 9 |
+
"max_steps": 35,
|
| 10 |
+
"max_hazard_reveals": 2,
|
| 11 |
+
"signal_mode": "range",
|
| 12 |
+
"flags_count": 22,
|
| 13 |
+
"difficulty": "hard",
|
| 14 |
+
"description": "10x10 grid, 18 hazards, 6 decoys. Signals give ambiguous ranges (e.g. 1-3). Decoys visually resemble keys. Only 2 hazard reveals allowed before game over. 35 steps forces efficient exploration. Models must cross-reference multiple range signals to narrow down hazard locations — single-cell reasoning fails because ranges overlap.",
|
| 15 |
+
"tags": ["partial_observability", "visual_clutter", "unsafe_guess_penalty"],
|
| 16 |
+
"win_condition": {
|
| 17 |
+
"type": "flag_all_hazards"
|
| 18 |
+
}
|
| 19 |
+
}
|
scenarios/cascading_deduction_11x11.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"scenario_id": "cascading_deduction_11x11",
|
| 3 |
+
"type": "hidden_grid",
|
| 4 |
+
"seed": 8401,
|
| 5 |
+
"board_width": 11,
|
| 6 |
+
"board_height": 11,
|
| 7 |
+
"hazard_count": 25,
|
| 8 |
+
"decoy_count": 5,
|
| 9 |
+
"max_steps": 38,
|
| 10 |
+
"max_hazard_reveals": 2,
|
| 11 |
+
"signal_mode": "partial",
|
| 12 |
+
"flags_count": 28,
|
| 13 |
+
"difficulty": "hard",
|
| 14 |
+
"description": "11x11 grid, 25 hazards, 5 decoys. Partial signals reveal only a subset of hazard directions plus a total hint. With 121 cells and 25 hazards, over 20% of the board is dangerous. The agent must chain partial deductions: signal A reveals 'N,E' out of 3 total → signal B at the inferred position reveals 'S,W' out of 2 → confirming hazard at intersection. Single-step reasoning sees incomplete clues. Models must track partial constraint sets across many reveals.",
|
| 15 |
+
"tags": ["partial_observability", "multi_stage_solution", "unsafe_guess_penalty"],
|
| 16 |
+
"win_condition": {
|
| 17 |
+
"type": "flag_all_hazards"
|
| 18 |
+
}
|
| 19 |
+
}
|
scenarios/decoy_minefield_8x10.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"scenario_id": "decoy_minefield_8x10",
|
| 3 |
+
"type": "distractor_search",
|
| 4 |
+
"seed": 7742,
|
| 5 |
+
"board_width": 10,
|
| 6 |
+
"board_height": 8,
|
| 7 |
+
"hazard_count": 15,
|
| 8 |
+
"key_count": 4,
|
| 9 |
+
"decoy_count": 8,
|
| 10 |
+
"max_steps": 32,
|
| 11 |
+
"max_hazard_reveals": 2,
|
| 12 |
+
"signal_mode": "directional",
|
| 13 |
+
"flags_count": 18,
|
| 14 |
+
"difficulty": "hard",
|
| 15 |
+
"description": "8x10 grid, 15 hazards, 4 real keys, 8 decoys that look like keys in the SVG render. Directional signals point toward hazards but decoys are placed to create false safe corridors. The agent must collect all 4 real keys while avoiding 15 hazards and ignoring 8 decoys. Models that treat decoys as keys will waste steps or walk into traps trying to reach them.",
|
| 16 |
+
"tags": ["visual_clutter", "partial_observability", "multi_stage_solution"],
|
| 17 |
+
"win_condition": {
|
| 18 |
+
"type": "collect_keys"
|
| 19 |
+
}
|
| 20 |
+
}
|
scenarios/delayed_recall_keys_8x8.json
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"scenario_id": "delayed_recall_keys_8x8",
|
| 3 |
+
"type": "pattern_memory",
|
| 4 |
+
"seed": 2289,
|
| 5 |
+
"board_width": 8,
|
| 6 |
+
"board_height": 8,
|
| 7 |
+
"hazard_count": 12,
|
| 8 |
+
"key_count": 5,
|
| 9 |
+
"max_steps": 30,
|
| 10 |
+
"max_hazard_reveals": 2,
|
| 11 |
+
"signal_mode": "count",
|
| 12 |
+
"flags_count": 15,
|
| 13 |
+
"difficulty": "hard",
|
| 14 |
+
"description": "8x8 grid, 12 hazards, 5 keys. 10 cells flash for 4 steps showing a mix of keys, hazards, and signals — then disappear. The agent must collect all 5 keys to win. After the flash fades, the agent must recall which flashed cells contained keys (safe to reveal) vs hazards (fatal to reveal). Revealing a remembered-hazard cell costs a life. This tests long-horizon memory: the recall happens 10-20 steps after the flash.",
|
| 15 |
+
"tags": ["delayed_recall", "partial_observability", "multi_stage_solution"],
|
| 16 |
+
"flash_cells": [[0,0],[0,7],[1,3],[2,5],[3,1],[3,6],[5,2],[5,5],[7,0],[7,7]],
|
| 17 |
+
"flash_until_step": 4,
|
| 18 |
+
"win_condition": {
|
| 19 |
+
"type": "collect_keys"
|
| 20 |
+
}
|
| 21 |
+
}
|
scenarios/directional_trap_8x8.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"scenario_id": "directional_trap_8x8",
|
| 3 |
+
"type": "hidden_grid",
|
| 4 |
+
"seed": 4821,
|
| 5 |
+
"board_width": 8,
|
| 6 |
+
"board_height": 8,
|
| 7 |
+
"hazard_count": 14,
|
| 8 |
+
"decoy_count": 4,
|
| 9 |
+
"max_steps": 28,
|
| 10 |
+
"max_hazard_reveals": 1,
|
| 11 |
+
"signal_mode": "directional",
|
| 12 |
+
"flags_count": 16,
|
| 13 |
+
"difficulty": "hard",
|
| 14 |
+
"description": "8x8 grid, 14 hazards, directional signals ('N','SE', etc). Only 1 hazard reveal allowed — a single mistake is fatal. Decoys placed adjacent to hazard clusters create false patterns. The agent must triangulate hazard positions from multiple directional clues. Greedy flagging based on one signal will flag decoys incorrectly.",
|
| 15 |
+
"tags": ["partial_observability", "unsafe_guess_penalty", "visual_clutter"],
|
| 16 |
+
"win_condition": {
|
| 17 |
+
"type": "flag_all_hazards"
|
| 18 |
+
}
|
| 19 |
+
}
|
scenarios/flash_fade_minefield_7x7.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"scenario_id": "flash_fade_minefield_7x7",
|
| 3 |
+
"type": "pattern_memory",
|
| 4 |
+
"seed": 6654,
|
| 5 |
+
"board_width": 7,
|
| 6 |
+
"board_height": 7,
|
| 7 |
+
"hazard_count": 10,
|
| 8 |
+
"max_steps": 25,
|
| 9 |
+
"max_hazard_reveals": 2,
|
| 10 |
+
"signal_mode": "count",
|
| 11 |
+
"flags_count": 13,
|
| 12 |
+
"difficulty": "hard",
|
| 13 |
+
"description": "7x7 grid, 10 hazards. At the start, 8 cells flash their content (mix of hazards and signals) for 3 steps, then fade to hidden. The agent must memorize which flashed cells were hazards vs signals, then use that recalled info combined with new reveals to flag all hazards. Models that ignore or misremember the flash phase will miss critical hazard locations.",
|
| 14 |
+
"tags": ["delayed_recall", "partial_observability", "unsafe_guess_penalty"],
|
| 15 |
+
"flash_cells": [[0,2],[1,4],[2,1],[2,5],[3,3],[4,0],[4,6],[6,3]],
|
| 16 |
+
"flash_until_step": 3,
|
| 17 |
+
"win_condition": {
|
| 18 |
+
"type": "flag_all_hazards"
|
| 19 |
+
}
|
| 20 |
+
}
|
scenarios/fog_key_hunt_8x8.json
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"scenario_id": "fog_key_hunt_8x8",
|
| 3 |
+
"type": "fog_of_war",
|
| 4 |
+
"seed": 1193,
|
| 5 |
+
"board_width": 8,
|
| 6 |
+
"board_height": 8,
|
| 7 |
+
"hazard_count": 14,
|
| 8 |
+
"key_count": 6,
|
| 9 |
+
"decoy_count": 4,
|
| 10 |
+
"max_steps": 35,
|
| 11 |
+
"max_hazard_reveals": 1,
|
| 12 |
+
"signal_mode": "directional",
|
| 13 |
+
"flags_count": 16,
|
| 14 |
+
"difficulty": "hard",
|
| 15 |
+
"description": "8x8 grid, 14 hazards, 6 keys, 4 decoys, fog viewport radius 1 (3x3 window). Agent starts center (4,4). Must find and collect all 6 keys with only a tiny viewport. One hazard hit is fatal. Decoys look like keys but waste steps. The agent must systematically explore, remember where keys and hazards were seen, avoid decoys, and plan a safe path. Impulsive exploration kills.",
|
| 16 |
+
"tags": ["fog_of_war", "visual_clutter", "unsafe_guess_penalty", "delayed_recall"],
|
| 17 |
+
"start_position": [4, 4],
|
| 18 |
+
"viewport_radius": 1,
|
| 19 |
+
"win_condition": {
|
| 20 |
+
"type": "collect_keys"
|
| 21 |
+
}
|
| 22 |
+
}
|
scenarios/fog_labyrinth_10x10.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"scenario_id": "fog_labyrinth_10x10",
|
| 3 |
+
"type": "fog_of_war",
|
| 4 |
+
"seed": 5518,
|
| 5 |
+
"board_width": 10,
|
| 6 |
+
"board_height": 10,
|
| 7 |
+
"hazard_count": 20,
|
| 8 |
+
"max_steps": 40,
|
| 9 |
+
"max_hazard_reveals": 2,
|
| 10 |
+
"signal_mode": "count",
|
| 11 |
+
"flags_count": 23,
|
| 12 |
+
"difficulty": "hard",
|
| 13 |
+
"description": "10x10 grid, 20 hazards, fog-of-war with viewport radius 2. Agent starts at corner (0,0) and can only see a 5x5 window. Must move viewport across the board, memorize revealed information from previous positions, and flag all 20 hazards. Revisiting areas wastes steps. Models must build and maintain an internal spatial map across 40 steps — most will forget early reveals by the time they need to submit.",
|
| 14 |
+
"tags": ["fog_of_war", "delayed_recall", "multi_stage_solution"],
|
| 15 |
+
"start_position": [0, 0],
|
| 16 |
+
"viewport_radius": 2,
|
| 17 |
+
"win_condition": {
|
| 18 |
+
"type": "flag_all_hazards"
|
| 19 |
+
}
|
| 20 |
+
}
|
scenarios/partial_intel_9x9.json
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"scenario_id": "partial_intel_9x9",
|
| 3 |
+
"type": "hidden_grid",
|
| 4 |
+
"seed": 3307,
|
| 5 |
+
"board_width": 9,
|
| 6 |
+
"board_height": 9,
|
| 7 |
+
"hazard_count": 16,
|
| 8 |
+
"max_steps": 30,
|
| 9 |
+
"max_hazard_reveals": 2,
|
| 10 |
+
"signal_mode": "partial",
|
| 11 |
+
"flags_count": 19,
|
| 12 |
+
"difficulty": "hard",
|
| 13 |
+
"description": "9x9 grid, 16 hazards. Partial signal mode: each signal cell only reveals SOME of the directions where hazards lie, plus a hint of the total count. The agent must combine partial clues from neighboring signal cells to reconstruct the full picture. No single signal cell gives complete info — models that act on incomplete data will flag wrong cells.",
|
| 14 |
+
"tags": ["partial_observability", "multi_stage_solution", "unsafe_guess_penalty"],
|
| 15 |
+
"win_condition": {
|
| 16 |
+
"type": "flag_all_hazards"
|
| 17 |
+
}
|
| 18 |
+
}
|
scenarios/safe_zone_identification_9x9.json
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"scenario_id": "safe_zone_identification_9x9",
|
| 3 |
+
"type": "hidden_grid",
|
| 4 |
+
"seed": 3956,
|
| 5 |
+
"board_width": 9,
|
| 6 |
+
"board_height": 9,
|
| 7 |
+
"hazard_count": 22,
|
| 8 |
+
"decoy_count": 7,
|
| 9 |
+
"max_steps": 30,
|
| 10 |
+
"max_hazard_reveals": 1,
|
| 11 |
+
"signal_mode": "range",
|
| 12 |
+
"flags_count": 25,
|
| 13 |
+
"difficulty": "hard",
|
| 14 |
+
"description": "9x9 grid, 22 hazards, 7 decoys. Win condition: identify ALL safe cells (non-hazard). With 81 cells and 22 hazards, there are 59 safe cells to find — but range signals give fuzzy counts (e.g. '1-3 hazards nearby'), and 7 decoys look suspicious but are actually safe. One hazard hit is fatal. The agent must prove cells are safe by elimination rather than flagging hazards. This inverts the usual strategy and punishes models that default to hazard-flagging heuristics.",
|
| 15 |
+
"tags": ["partial_observability", "visual_clutter", "unsafe_guess_penalty", "multi_stage_solution"],
|
| 16 |
+
"win_condition": {
|
| 17 |
+
"type": "identify_safe_cells"
|
| 18 |
+
}
|
| 19 |
+
}
|
server/__init__.py
ADDED
|
File without changes
|
server/app.py
ADDED
|
@@ -0,0 +1,47 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
FastAPI application entrypoint for the Visual Memory environment.
|
| 3 |
+
|
| 4 |
+
Uses OpenEnv's create_app() factory which auto-generates:
|
| 5 |
+
- POST /reset, POST /step, GET /state, GET /health
|
| 6 |
+
- WebSocket /ws for persistent connections
|
| 7 |
+
- /web interface (when ENABLE_WEB_INTERFACE=true on HF Spaces)
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
from __future__ import annotations
|
| 11 |
+
|
| 12 |
+
import os
|
| 13 |
+
import sys
|
| 14 |
+
from pathlib import Path
|
| 15 |
+
|
| 16 |
+
from dotenv import load_dotenv
|
| 17 |
+
from openenv.core.env_server.http_server import create_app
|
| 18 |
+
|
| 19 |
+
load_dotenv(os.path.join(os.path.dirname(__file__), "..", ".env"))
|
| 20 |
+
|
| 21 |
+
try:
|
| 22 |
+
from visual_memory.models import VisualMemoryAction, VisualMemoryObservation
|
| 23 |
+
from visual_memory.server.memory_environment import MemoryEnvironment
|
| 24 |
+
except ImportError:
|
| 25 |
+
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
|
| 26 |
+
from models import VisualMemoryAction, VisualMemoryObservation
|
| 27 |
+
from server.memory_environment import MemoryEnvironment
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
MAX_CONCURRENT_ENVS = int(os.getenv("MAX_CONCURRENT_ENVS", "8"))
|
| 31 |
+
|
| 32 |
+
app = create_app(
|
| 33 |
+
MemoryEnvironment,
|
| 34 |
+
VisualMemoryAction,
|
| 35 |
+
VisualMemoryObservation,
|
| 36 |
+
env_name="visual_memory",
|
| 37 |
+
max_concurrent_envs=MAX_CONCURRENT_ENVS,
|
| 38 |
+
)
|
| 39 |
+
|
| 40 |
+
|
| 41 |
+
def main(host: str = "0.0.0.0", port: int = 8000):
|
| 42 |
+
import uvicorn
|
| 43 |
+
uvicorn.run(app, host=host, port=port)
|
| 44 |
+
|
| 45 |
+
|
| 46 |
+
if __name__ == "__main__":
|
| 47 |
+
main()
|
server/engine.py
ADDED
|
@@ -0,0 +1,712 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Hidden-state game engine for Visual Memory Gym.
|
| 2 |
+
|
| 3 |
+
Manages in-memory board state, hidden cell contents, move validation,
|
| 4 |
+
and win/loss conditions across four task families:
|
| 5 |
+
|
| 6 |
+
1. hidden_grid — deduce hazard locations from signal clues
|
| 7 |
+
2. pattern_memory — recall briefly-shown cell contents
|
| 8 |
+
3. distractor_search — identify targets among visually similar decoys
|
| 9 |
+
4. fog_of_war — plan under limited viewport radius
|
| 10 |
+
"""
|
| 11 |
+
|
| 12 |
+
from __future__ import annotations
|
| 13 |
+
|
| 14 |
+
import copy
|
| 15 |
+
from enum import Enum
|
| 16 |
+
from typing import Any
|
| 17 |
+
|
| 18 |
+
import numpy as np
|
| 19 |
+
from pydantic import BaseModel, Field
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class CellType(str, Enum):
|
| 23 |
+
EMPTY = "empty"
|
| 24 |
+
HAZARD = "hazard"
|
| 25 |
+
SIGNAL = "signal"
|
| 26 |
+
KEY = "key"
|
| 27 |
+
DECOY = "decoy"
|
| 28 |
+
GOAL = "goal"
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
class CellState(str, Enum):
|
| 32 |
+
HIDDEN = "hidden"
|
| 33 |
+
REVEALED = "revealed"
|
| 34 |
+
FLAGGED = "flagged"
|
| 35 |
+
FADED = "faded"
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
class ScenarioType(str, Enum):
|
| 39 |
+
HIDDEN_GRID = "hidden_grid"
|
| 40 |
+
PATTERN_MEMORY = "pattern_memory"
|
| 41 |
+
DISTRACTOR_SEARCH = "distractor_search"
|
| 42 |
+
FOG_OF_WAR = "fog_of_war"
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
class SignalMode(str, Enum):
|
| 46 |
+
COUNT = "count"
|
| 47 |
+
DIRECTIONAL = "directional"
|
| 48 |
+
RANGE = "range"
|
| 49 |
+
PARTIAL = "partial"
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
class WinCondition(str, Enum):
|
| 53 |
+
FLAG_ALL_HAZARDS = "flag_all_hazards"
|
| 54 |
+
COLLECT_KEYS = "collect_keys"
|
| 55 |
+
IDENTIFY_SAFE = "identify_safe_cells"
|
| 56 |
+
REACH_GOAL = "reach_goal"
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
class BoardState(BaseModel):
|
| 60 |
+
"""Serializable snapshot of the game state (visible portion only)."""
|
| 61 |
+
|
| 62 |
+
session_id: str = ""
|
| 63 |
+
scenario_id: str = ""
|
| 64 |
+
scenario_type: str = "hidden_grid"
|
| 65 |
+
step_count: int = 0
|
| 66 |
+
board_width: int = 0
|
| 67 |
+
board_height: int = 0
|
| 68 |
+
visible_cells: list[list[dict]] = Field(default_factory=list)
|
| 69 |
+
discovered_signals: list[dict] = Field(default_factory=list)
|
| 70 |
+
memory_events: list[dict] = Field(default_factory=list)
|
| 71 |
+
game_over: bool = False
|
| 72 |
+
won: bool = False
|
| 73 |
+
flags_remaining: int = 0
|
| 74 |
+
cells_revealed: int = 0
|
| 75 |
+
hazard_hits: int = 0
|
| 76 |
+
keys_collected: int = 0
|
| 77 |
+
max_steps: int = 50
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
NEIGHBOR_OFFSETS = [
|
| 81 |
+
(-1, -1), (-1, 0), (-1, 1),
|
| 82 |
+
(0, -1), (0, 1),
|
| 83 |
+
(1, -1), (1, 0), (1, 1),
|
| 84 |
+
]
|
| 85 |
+
|
| 86 |
+
DIRECTION_NAMES = {
|
| 87 |
+
(-1, -1): "NW", (-1, 0): "N", (-1, 1): "NE",
|
| 88 |
+
(0, -1): "W", (0, 1): "E",
|
| 89 |
+
(1, -1): "SW", (1, 0): "S", (1, 1): "SE",
|
| 90 |
+
}
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
class GameEngine:
|
| 94 |
+
"""In-memory game engine for the Visual Memory gym.
|
| 95 |
+
|
| 96 |
+
Deterministic given a seed. All state lives in Python memory.
|
| 97 |
+
"""
|
| 98 |
+
|
| 99 |
+
def __init__(self, scenario: dict, seed: int | None = None):
|
| 100 |
+
self.scenario_id: str = scenario["scenario_id"]
|
| 101 |
+
self.scenario_type = ScenarioType(scenario.get("type", "hidden_grid"))
|
| 102 |
+
self.width: int = scenario["board_width"]
|
| 103 |
+
self.height: int = scenario["board_height"]
|
| 104 |
+
self.max_steps: int = scenario.get("max_steps", 50)
|
| 105 |
+
self.max_hazard_reveals: int = scenario.get("max_hazard_reveals", 3)
|
| 106 |
+
self.signal_mode = SignalMode(scenario.get("signal_mode", "count"))
|
| 107 |
+
self.win_condition = WinCondition(
|
| 108 |
+
scenario.get("win_condition", {}).get("type", "flag_all_hazards")
|
| 109 |
+
)
|
| 110 |
+
|
| 111 |
+
resolved_seed = seed if seed is not None else scenario.get("seed", 42)
|
| 112 |
+
self._rng = np.random.default_rng(resolved_seed)
|
| 113 |
+
|
| 114 |
+
self.step_count: int = 0
|
| 115 |
+
self.hazard_hits: int = 0
|
| 116 |
+
self.keys_collected: int = 0
|
| 117 |
+
self.cells_revealed: int = 0
|
| 118 |
+
self.game_over: bool = False
|
| 119 |
+
self.won: bool = False
|
| 120 |
+
|
| 121 |
+
if "layout" in scenario:
|
| 122 |
+
self._hidden = self._load_explicit_layout(scenario["layout"])
|
| 123 |
+
else:
|
| 124 |
+
self._hidden = self._generate_board(scenario)
|
| 125 |
+
|
| 126 |
+
self._visible: list[list[dict]] = [
|
| 127 |
+
[{"state": CellState.HIDDEN.value, "content": None} for _ in range(self.width)]
|
| 128 |
+
for _ in range(self.height)
|
| 129 |
+
]
|
| 130 |
+
|
| 131 |
+
total_hazards = sum(
|
| 132 |
+
1
|
| 133 |
+
for r in range(self.height)
|
| 134 |
+
for c in range(self.width)
|
| 135 |
+
if self._hidden[r][c]["type"] == CellType.HAZARD.value
|
| 136 |
+
)
|
| 137 |
+
self.total_flags: int = scenario.get("flags_count", total_hazards + 3)
|
| 138 |
+
self.flags_placed: int = 0
|
| 139 |
+
|
| 140 |
+
self.total_keys: int = sum(
|
| 141 |
+
1
|
| 142 |
+
for r in range(self.height)
|
| 143 |
+
for c in range(self.width)
|
| 144 |
+
if self._hidden[r][c]["type"] == CellType.KEY.value
|
| 145 |
+
)
|
| 146 |
+
|
| 147 |
+
self._discovered_signals: list[dict] = []
|
| 148 |
+
self._memory_events: list[dict] = []
|
| 149 |
+
self._action_log: list[dict] = []
|
| 150 |
+
|
| 151 |
+
self._viewport_center: list[int] | None = scenario.get("start_position")
|
| 152 |
+
self._viewport_radius: int | None = scenario.get("viewport_radius")
|
| 153 |
+
|
| 154 |
+
self._flash_cells: list[list[int]] = scenario.get("flash_cells", [])
|
| 155 |
+
self._flash_until_step: int = scenario.get("flash_until_step", 0)
|
| 156 |
+
if self.scenario_type == ScenarioType.PATTERN_MEMORY and self._flash_cells:
|
| 157 |
+
for rc in self._flash_cells:
|
| 158 |
+
r, c = rc[0], rc[1]
|
| 159 |
+
cell = self._hidden[r][c]
|
| 160 |
+
self._visible[r][c] = {
|
| 161 |
+
"state": CellState.REVEALED.value,
|
| 162 |
+
"content": copy.deepcopy(cell),
|
| 163 |
+
}
|
| 164 |
+
self._memory_events.append({
|
| 165 |
+
"step": 0,
|
| 166 |
+
"event": "flash_shown",
|
| 167 |
+
"row": r,
|
| 168 |
+
"col": c,
|
| 169 |
+
"content": copy.deepcopy(cell),
|
| 170 |
+
})
|
| 171 |
+
|
| 172 |
+
# ─── Board Generation ───────────────────────────────────────────
|
| 173 |
+
|
| 174 |
+
def _load_explicit_layout(self, layout: list[list[dict]]) -> list[list[dict]]:
|
| 175 |
+
board: list[list[dict]] = []
|
| 176 |
+
for row_data in layout:
|
| 177 |
+
row: list[dict] = []
|
| 178 |
+
for cell in row_data:
|
| 179 |
+
row.append({
|
| 180 |
+
"type": cell.get("type", CellType.EMPTY.value),
|
| 181 |
+
"value": cell.get("value"),
|
| 182 |
+
"properties": cell.get("properties", {}),
|
| 183 |
+
})
|
| 184 |
+
board.append(row)
|
| 185 |
+
return board
|
| 186 |
+
|
| 187 |
+
def _generate_board(self, scenario: dict) -> list[list[dict]]:
|
| 188 |
+
hazard_count = scenario.get("hazard_count", 10)
|
| 189 |
+
key_count = scenario.get("key_count", 0)
|
| 190 |
+
decoy_count = scenario.get("decoy_count", 0)
|
| 191 |
+
goal_count = 1 if self.win_condition == WinCondition.REACH_GOAL else 0
|
| 192 |
+
|
| 193 |
+
total_cells = self.width * self.height
|
| 194 |
+
total_special = hazard_count + key_count + decoy_count + goal_count
|
| 195 |
+
if total_special > total_cells:
|
| 196 |
+
raise ValueError(
|
| 197 |
+
f"Cannot place {total_special} special cells on a "
|
| 198 |
+
f"{self.width}x{self.height} board ({total_cells} cells)"
|
| 199 |
+
)
|
| 200 |
+
|
| 201 |
+
positions = self._rng.permutation(total_cells)
|
| 202 |
+
board: list[list[dict]] = [
|
| 203 |
+
[{"type": CellType.EMPTY.value, "value": None, "properties": {}} for _ in range(self.width)]
|
| 204 |
+
for _ in range(self.height)
|
| 205 |
+
]
|
| 206 |
+
|
| 207 |
+
idx = 0
|
| 208 |
+
for _ in range(hazard_count):
|
| 209 |
+
r, c = divmod(int(positions[idx]), self.width)
|
| 210 |
+
board[r][c] = {"type": CellType.HAZARD.value, "value": None, "properties": {}}
|
| 211 |
+
idx += 1
|
| 212 |
+
|
| 213 |
+
for i in range(key_count):
|
| 214 |
+
r, c = divmod(int(positions[idx]), self.width)
|
| 215 |
+
board[r][c] = {"type": CellType.KEY.value, "value": f"key_{i}", "properties": {}}
|
| 216 |
+
idx += 1
|
| 217 |
+
|
| 218 |
+
for i in range(decoy_count):
|
| 219 |
+
r, c = divmod(int(positions[idx]), self.width)
|
| 220 |
+
board[r][c] = {"type": CellType.DECOY.value, "value": f"decoy_{i}", "properties": {}}
|
| 221 |
+
idx += 1
|
| 222 |
+
|
| 223 |
+
if goal_count:
|
| 224 |
+
r, c = divmod(int(positions[idx]), self.width)
|
| 225 |
+
board[r][c] = {"type": CellType.GOAL.value, "value": None, "properties": {}}
|
| 226 |
+
idx += 1
|
| 227 |
+
|
| 228 |
+
self._compute_signals(board)
|
| 229 |
+
return board
|
| 230 |
+
|
| 231 |
+
def _compute_signals(self, board: list[list[dict]]) -> None:
|
| 232 |
+
for r in range(self.height):
|
| 233 |
+
for c in range(self.width):
|
| 234 |
+
if board[r][c]["type"] != CellType.EMPTY.value:
|
| 235 |
+
continue
|
| 236 |
+
|
| 237 |
+
if self.signal_mode == SignalMode.COUNT:
|
| 238 |
+
count = self._count_adjacent_hazards(board, r, c)
|
| 239 |
+
if count > 0:
|
| 240 |
+
board[r][c] = {
|
| 241 |
+
"type": CellType.SIGNAL.value,
|
| 242 |
+
"value": count,
|
| 243 |
+
"properties": {"mode": "count"},
|
| 244 |
+
}
|
| 245 |
+
|
| 246 |
+
elif self.signal_mode == SignalMode.DIRECTIONAL:
|
| 247 |
+
directions = self._get_hazard_directions(board, r, c)
|
| 248 |
+
if directions:
|
| 249 |
+
board[r][c] = {
|
| 250 |
+
"type": CellType.SIGNAL.value,
|
| 251 |
+
"value": directions,
|
| 252 |
+
"properties": {"mode": "directional"},
|
| 253 |
+
}
|
| 254 |
+
|
| 255 |
+
elif self.signal_mode == SignalMode.RANGE:
|
| 256 |
+
count = self._count_adjacent_hazards(board, r, c)
|
| 257 |
+
if count > 0:
|
| 258 |
+
noise = int(self._rng.integers(0, 2))
|
| 259 |
+
low = max(0, count - noise)
|
| 260 |
+
high = count + int(self._rng.integers(0, 2))
|
| 261 |
+
board[r][c] = {
|
| 262 |
+
"type": CellType.SIGNAL.value,
|
| 263 |
+
"value": {"min": low, "max": high},
|
| 264 |
+
"properties": {"mode": "range"},
|
| 265 |
+
}
|
| 266 |
+
|
| 267 |
+
elif self.signal_mode == SignalMode.PARTIAL:
|
| 268 |
+
directions = self._get_hazard_directions(board, r, c)
|
| 269 |
+
if directions:
|
| 270 |
+
shown = max(1, len(directions) // 2)
|
| 271 |
+
indices = self._rng.choice(
|
| 272 |
+
len(directions), size=shown, replace=False
|
| 273 |
+
)
|
| 274 |
+
subset = [directions[i] for i in sorted(indices)]
|
| 275 |
+
board[r][c] = {
|
| 276 |
+
"type": CellType.SIGNAL.value,
|
| 277 |
+
"value": subset,
|
| 278 |
+
"properties": {
|
| 279 |
+
"mode": "partial",
|
| 280 |
+
"total_hint": len(directions),
|
| 281 |
+
},
|
| 282 |
+
}
|
| 283 |
+
|
| 284 |
+
def _count_adjacent_hazards(self, board: list[list[dict]], r: int, c: int) -> int:
|
| 285 |
+
count = 0
|
| 286 |
+
for dr, dc in NEIGHBOR_OFFSETS:
|
| 287 |
+
nr, nc = r + dr, c + dc
|
| 288 |
+
if 0 <= nr < self.height and 0 <= nc < self.width:
|
| 289 |
+
if board[nr][nc]["type"] == CellType.HAZARD.value:
|
| 290 |
+
count += 1
|
| 291 |
+
return count
|
| 292 |
+
|
| 293 |
+
def _get_hazard_directions(self, board: list[list[dict]], r: int, c: int) -> list[str]:
|
| 294 |
+
dirs: list[str] = []
|
| 295 |
+
for (dr, dc), name in DIRECTION_NAMES.items():
|
| 296 |
+
nr, nc = r + dr, c + dc
|
| 297 |
+
if 0 <= nr < self.height and 0 <= nc < self.width:
|
| 298 |
+
if board[nr][nc]["type"] == CellType.HAZARD.value:
|
| 299 |
+
dirs.append(name)
|
| 300 |
+
return dirs
|
| 301 |
+
|
| 302 |
+
# ─── Pattern Memory Phase ───────────────────────────────────────
|
| 303 |
+
|
| 304 |
+
def _tick_pattern_memory(self) -> None:
|
| 305 |
+
if self.scenario_type != ScenarioType.PATTERN_MEMORY:
|
| 306 |
+
return
|
| 307 |
+
if self.step_count != self._flash_until_step:
|
| 308 |
+
return
|
| 309 |
+
for rc in self._flash_cells:
|
| 310 |
+
r, c = rc[0], rc[1]
|
| 311 |
+
if self._visible[r][c]["state"] == CellState.REVEALED.value:
|
| 312 |
+
self._visible[r][c] = {"state": CellState.FADED.value, "content": None}
|
| 313 |
+
self._memory_events.append({
|
| 314 |
+
"step": self.step_count,
|
| 315 |
+
"event": "flash_faded",
|
| 316 |
+
"row": r,
|
| 317 |
+
"col": c,
|
| 318 |
+
})
|
| 319 |
+
|
| 320 |
+
# ─── Core Actions ───────────────────────────────────────────────
|
| 321 |
+
|
| 322 |
+
def reveal_cell(self, row: int, col: int) -> dict:
|
| 323 |
+
if self.game_over:
|
| 324 |
+
return {"error": "Game is already over.", "row": row, "col": col}
|
| 325 |
+
|
| 326 |
+
if not self._in_bounds(row, col):
|
| 327 |
+
return {"error": f"({row},{col}) is out of bounds.", "row": row, "col": col}
|
| 328 |
+
|
| 329 |
+
vis = self._visible[row][col]
|
| 330 |
+
if vis["state"] in (CellState.REVEALED.value, CellState.FLAGGED.value):
|
| 331 |
+
return {
|
| 332 |
+
"error": f"Cell ({row},{col}) is already {vis['state']}.",
|
| 333 |
+
"row": row,
|
| 334 |
+
"col": col,
|
| 335 |
+
}
|
| 336 |
+
|
| 337 |
+
if self._viewport_radius is not None and self._viewport_center is not None:
|
| 338 |
+
vr, vc = self._viewport_center
|
| 339 |
+
if abs(row - vr) > self._viewport_radius or abs(col - vc) > self._viewport_radius:
|
| 340 |
+
return {
|
| 341 |
+
"error": f"({row},{col}) is outside your current viewport.",
|
| 342 |
+
"row": row,
|
| 343 |
+
"col": col,
|
| 344 |
+
}
|
| 345 |
+
|
| 346 |
+
self.step_count += 1
|
| 347 |
+
self._tick_pattern_memory()
|
| 348 |
+
|
| 349 |
+
hidden = self._hidden[row][col]
|
| 350 |
+
cell_type = hidden["type"]
|
| 351 |
+
|
| 352 |
+
self._visible[row][col] = {
|
| 353 |
+
"state": CellState.REVEALED.value,
|
| 354 |
+
"content": copy.deepcopy(hidden),
|
| 355 |
+
}
|
| 356 |
+
self.cells_revealed += 1
|
| 357 |
+
|
| 358 |
+
result: dict[str, Any] = {
|
| 359 |
+
"row": row,
|
| 360 |
+
"col": col,
|
| 361 |
+
"type": cell_type,
|
| 362 |
+
"value": hidden.get("value"),
|
| 363 |
+
"properties": hidden.get("properties", {}),
|
| 364 |
+
}
|
| 365 |
+
|
| 366 |
+
if cell_type == CellType.SIGNAL.value:
|
| 367 |
+
self._discovered_signals.append(result)
|
| 368 |
+
|
| 369 |
+
if cell_type == CellType.HAZARD.value:
|
| 370 |
+
self.hazard_hits += 1
|
| 371 |
+
result["hazard_hit"] = True
|
| 372 |
+
if self.hazard_hits >= self.max_hazard_reveals:
|
| 373 |
+
self.game_over = True
|
| 374 |
+
self.won = False
|
| 375 |
+
result["game_over"] = True
|
| 376 |
+
result["message"] = "Too many hazards revealed. Game over."
|
| 377 |
+
|
| 378 |
+
if cell_type == CellType.KEY.value:
|
| 379 |
+
self.keys_collected += 1
|
| 380 |
+
result["key_collected"] = True
|
| 381 |
+
if (
|
| 382 |
+
self.win_condition == WinCondition.COLLECT_KEYS
|
| 383 |
+
and self.keys_collected >= self.total_keys
|
| 384 |
+
):
|
| 385 |
+
self.game_over = True
|
| 386 |
+
self.won = True
|
| 387 |
+
result["game_over"] = True
|
| 388 |
+
result["message"] = "All keys collected. You win!"
|
| 389 |
+
|
| 390 |
+
if cell_type == CellType.GOAL.value and self.win_condition == WinCondition.REACH_GOAL:
|
| 391 |
+
self.game_over = True
|
| 392 |
+
self.won = True
|
| 393 |
+
result["game_over"] = True
|
| 394 |
+
result["message"] = "Goal reached. You win!"
|
| 395 |
+
|
| 396 |
+
if self.step_count >= self.max_steps and not self.game_over:
|
| 397 |
+
self.game_over = True
|
| 398 |
+
self.won = False
|
| 399 |
+
result["game_over"] = True
|
| 400 |
+
result["message"] = "Max steps exceeded. Game over."
|
| 401 |
+
|
| 402 |
+
self._action_log.append({
|
| 403 |
+
"action": "reveal",
|
| 404 |
+
"row": row,
|
| 405 |
+
"col": col,
|
| 406 |
+
"step": self.step_count,
|
| 407 |
+
"result_type": cell_type,
|
| 408 |
+
})
|
| 409 |
+
return result
|
| 410 |
+
|
| 411 |
+
def flag_cell(self, row: int, col: int) -> dict:
|
| 412 |
+
if self.game_over:
|
| 413 |
+
return {"error": "Game is already over.", "row": row, "col": col}
|
| 414 |
+
|
| 415 |
+
if not self._in_bounds(row, col):
|
| 416 |
+
return {"error": f"({row},{col}) is out of bounds.", "row": row, "col": col}
|
| 417 |
+
|
| 418 |
+
vis = self._visible[row][col]
|
| 419 |
+
if vis["state"] == CellState.REVEALED.value:
|
| 420 |
+
return {"error": f"Cell ({row},{col}) is already revealed; cannot flag.", "row": row, "col": col}
|
| 421 |
+
if vis["state"] == CellState.FLAGGED.value:
|
| 422 |
+
return {"error": f"Cell ({row},{col}) is already flagged.", "row": row, "col": col}
|
| 423 |
+
|
| 424 |
+
if self.flags_placed >= self.total_flags:
|
| 425 |
+
return {"error": "No flags remaining.", "row": row, "col": col}
|
| 426 |
+
|
| 427 |
+
self.step_count += 1
|
| 428 |
+
self._tick_pattern_memory()
|
| 429 |
+
|
| 430 |
+
self._visible[row][col] = {"state": CellState.FLAGGED.value, "content": None}
|
| 431 |
+
self.flags_placed += 1
|
| 432 |
+
|
| 433 |
+
self._action_log.append({"action": "flag", "row": row, "col": col, "step": self.step_count})
|
| 434 |
+
self._check_flag_win()
|
| 435 |
+
|
| 436 |
+
result: dict[str, Any] = {
|
| 437 |
+
"row": row,
|
| 438 |
+
"col": col,
|
| 439 |
+
"flagged": True,
|
| 440 |
+
"flags_remaining": self.total_flags - self.flags_placed,
|
| 441 |
+
}
|
| 442 |
+
if self.game_over and self.won:
|
| 443 |
+
result["game_over"] = True
|
| 444 |
+
result["message"] = "All hazards correctly flagged. You win!"
|
| 445 |
+
|
| 446 |
+
if self.step_count >= self.max_steps and not self.game_over:
|
| 447 |
+
self.game_over = True
|
| 448 |
+
self.won = False
|
| 449 |
+
result["game_over"] = True
|
| 450 |
+
result["message"] = "Max steps exceeded. Game over."
|
| 451 |
+
|
| 452 |
+
return result
|
| 453 |
+
|
| 454 |
+
def unflag_cell(self, row: int, col: int) -> dict:
|
| 455 |
+
if self.game_over:
|
| 456 |
+
return {"error": "Game is already over.", "row": row, "col": col}
|
| 457 |
+
|
| 458 |
+
if not self._in_bounds(row, col):
|
| 459 |
+
return {"error": f"({row},{col}) is out of bounds.", "row": row, "col": col}
|
| 460 |
+
|
| 461 |
+
if self._visible[row][col]["state"] != CellState.FLAGGED.value:
|
| 462 |
+
return {"error": f"Cell ({row},{col}) is not flagged.", "row": row, "col": col}
|
| 463 |
+
|
| 464 |
+
self.step_count += 1
|
| 465 |
+
self._tick_pattern_memory()
|
| 466 |
+
|
| 467 |
+
self._visible[row][col] = {"state": CellState.HIDDEN.value, "content": None}
|
| 468 |
+
self.flags_placed -= 1
|
| 469 |
+
|
| 470 |
+
self._action_log.append({"action": "unflag", "row": row, "col": col, "step": self.step_count})
|
| 471 |
+
|
| 472 |
+
result: dict[str, Any] = {
|
| 473 |
+
"row": row,
|
| 474 |
+
"col": col,
|
| 475 |
+
"unflagged": True,
|
| 476 |
+
"flags_remaining": self.total_flags - self.flags_placed,
|
| 477 |
+
}
|
| 478 |
+
|
| 479 |
+
if self.step_count >= self.max_steps and not self.game_over:
|
| 480 |
+
self.game_over = True
|
| 481 |
+
self.won = False
|
| 482 |
+
result["game_over"] = True
|
| 483 |
+
result["message"] = "Max steps exceeded. Game over."
|
| 484 |
+
|
| 485 |
+
return result
|
| 486 |
+
|
| 487 |
+
def move_viewport(self, row: int, col: int) -> dict:
|
| 488 |
+
if self.scenario_type != ScenarioType.FOG_OF_WAR:
|
| 489 |
+
return {"error": "move_viewport is only available in fog_of_war scenarios."}
|
| 490 |
+
|
| 491 |
+
if self.game_over:
|
| 492 |
+
return {"error": "Game is already over."}
|
| 493 |
+
|
| 494 |
+
if not self._in_bounds(row, col):
|
| 495 |
+
return {"error": f"({row},{col}) is out of bounds."}
|
| 496 |
+
|
| 497 |
+
self.step_count += 1
|
| 498 |
+
self._tick_pattern_memory()
|
| 499 |
+
|
| 500 |
+
self._viewport_center = [row, col]
|
| 501 |
+
self._action_log.append({
|
| 502 |
+
"action": "move_viewport",
|
| 503 |
+
"row": row,
|
| 504 |
+
"col": col,
|
| 505 |
+
"step": self.step_count,
|
| 506 |
+
})
|
| 507 |
+
|
| 508 |
+
if self.step_count >= self.max_steps and not self.game_over:
|
| 509 |
+
self.game_over = True
|
| 510 |
+
self.won = False
|
| 511 |
+
|
| 512 |
+
return {
|
| 513 |
+
"viewport_center": [row, col],
|
| 514 |
+
"viewport_radius": self._viewport_radius,
|
| 515 |
+
"visible_area": self._get_viewport_bounds(),
|
| 516 |
+
}
|
| 517 |
+
|
| 518 |
+
def submit_solution(
|
| 519 |
+
self,
|
| 520 |
+
flagged_positions: list[list[int]] | None = None,
|
| 521 |
+
safe_positions: list[list[int]] | None = None,
|
| 522 |
+
) -> dict:
|
| 523 |
+
if self.game_over:
|
| 524 |
+
return {"error": "Game is already over."}
|
| 525 |
+
|
| 526 |
+
self.step_count += 1
|
| 527 |
+
self.game_over = True
|
| 528 |
+
|
| 529 |
+
if self.win_condition == WinCondition.FLAG_ALL_HAZARDS:
|
| 530 |
+
return self._judge_flag_solution(flagged_positions or [])
|
| 531 |
+
elif self.win_condition == WinCondition.IDENTIFY_SAFE:
|
| 532 |
+
return self._judge_safe_solution(safe_positions or [])
|
| 533 |
+
elif self.win_condition == WinCondition.COLLECT_KEYS:
|
| 534 |
+
success = self.keys_collected >= self.total_keys
|
| 535 |
+
self.won = success
|
| 536 |
+
return {
|
| 537 |
+
"correct": success,
|
| 538 |
+
"keys_collected": self.keys_collected,
|
| 539 |
+
"keys_required": self.total_keys,
|
| 540 |
+
}
|
| 541 |
+
elif self.win_condition == WinCondition.REACH_GOAL:
|
| 542 |
+
self.won = False
|
| 543 |
+
return {"correct": False, "message": "Goal was not reached before submission."}
|
| 544 |
+
|
| 545 |
+
return {"error": "Unknown win condition."}
|
| 546 |
+
|
| 547 |
+
# ─── State Queries ──────────────────────────────────────────────
|
| 548 |
+
|
| 549 |
+
def get_visible_board(self) -> list[list[dict]]:
|
| 550 |
+
if self._viewport_radius is None or self._viewport_center is None:
|
| 551 |
+
return copy.deepcopy(self._visible)
|
| 552 |
+
|
| 553 |
+
vr, vc = self._viewport_center
|
| 554 |
+
rad = self._viewport_radius
|
| 555 |
+
fog_board: list[list[dict]] = [
|
| 556 |
+
[{"state": "fog", "content": None} for _ in range(self.width)]
|
| 557 |
+
for _ in range(self.height)
|
| 558 |
+
]
|
| 559 |
+
for r in range(max(0, vr - rad), min(self.height, vr + rad + 1)):
|
| 560 |
+
for c in range(max(0, vc - rad), min(self.width, vc + rad + 1)):
|
| 561 |
+
fog_board[r][c] = copy.deepcopy(self._visible[r][c])
|
| 562 |
+
return fog_board
|
| 563 |
+
|
| 564 |
+
def get_status(self) -> dict:
|
| 565 |
+
return {
|
| 566 |
+
"scenario_id": self.scenario_id,
|
| 567 |
+
"scenario_type": self.scenario_type.value,
|
| 568 |
+
"step_count": self.step_count,
|
| 569 |
+
"max_steps": self.max_steps,
|
| 570 |
+
"board_size": f"{self.width}x{self.height}",
|
| 571 |
+
"cells_revealed": self.cells_revealed,
|
| 572 |
+
"hazard_hits": self.hazard_hits,
|
| 573 |
+
"max_hazard_reveals": self.max_hazard_reveals,
|
| 574 |
+
"keys_collected": self.keys_collected,
|
| 575 |
+
"total_keys": self.total_keys,
|
| 576 |
+
"flags_placed": self.flags_placed,
|
| 577 |
+
"flags_remaining": self.total_flags - self.flags_placed,
|
| 578 |
+
"game_over": self.game_over,
|
| 579 |
+
"won": self.won,
|
| 580 |
+
"win_condition": self.win_condition.value,
|
| 581 |
+
}
|
| 582 |
+
|
| 583 |
+
def get_board_state(self, session_id: str = "") -> BoardState:
|
| 584 |
+
return BoardState(
|
| 585 |
+
session_id=session_id,
|
| 586 |
+
scenario_id=self.scenario_id,
|
| 587 |
+
scenario_type=self.scenario_type.value,
|
| 588 |
+
step_count=self.step_count,
|
| 589 |
+
board_width=self.width,
|
| 590 |
+
board_height=self.height,
|
| 591 |
+
visible_cells=self.get_visible_board(),
|
| 592 |
+
discovered_signals=copy.deepcopy(self._discovered_signals),
|
| 593 |
+
memory_events=copy.deepcopy(self._memory_events),
|
| 594 |
+
game_over=self.game_over,
|
| 595 |
+
won=self.won,
|
| 596 |
+
flags_remaining=self.total_flags - self.flags_placed,
|
| 597 |
+
cells_revealed=self.cells_revealed,
|
| 598 |
+
hazard_hits=self.hazard_hits,
|
| 599 |
+
keys_collected=self.keys_collected,
|
| 600 |
+
max_steps=self.max_steps,
|
| 601 |
+
)
|
| 602 |
+
|
| 603 |
+
def get_hidden_board(self) -> list[list[dict]]:
|
| 604 |
+
"""Full hidden board — for reward computation only, never sent to agent."""
|
| 605 |
+
return copy.deepcopy(self._hidden)
|
| 606 |
+
|
| 607 |
+
def get_action_log(self) -> list[dict]:
|
| 608 |
+
return copy.deepcopy(self._action_log)
|
| 609 |
+
|
| 610 |
+
# ─── Internal Helpers ───────────────────────────────────────────
|
| 611 |
+
|
| 612 |
+
def _in_bounds(self, row: int, col: int) -> bool:
|
| 613 |
+
return 0 <= row < self.height and 0 <= col < self.width
|
| 614 |
+
|
| 615 |
+
def _get_viewport_bounds(self) -> dict:
|
| 616 |
+
if self._viewport_center is None or self._viewport_radius is None:
|
| 617 |
+
return {
|
| 618 |
+
"r_min": 0,
|
| 619 |
+
"r_max": self.height - 1,
|
| 620 |
+
"c_min": 0,
|
| 621 |
+
"c_max": self.width - 1,
|
| 622 |
+
}
|
| 623 |
+
vr, vc = self._viewport_center
|
| 624 |
+
rad = self._viewport_radius
|
| 625 |
+
return {
|
| 626 |
+
"r_min": max(0, vr - rad),
|
| 627 |
+
"r_max": min(self.height - 1, vr + rad),
|
| 628 |
+
"c_min": max(0, vc - rad),
|
| 629 |
+
"c_max": min(self.width - 1, vc + rad),
|
| 630 |
+
}
|
| 631 |
+
|
| 632 |
+
def _check_flag_win(self) -> None:
|
| 633 |
+
if self.win_condition != WinCondition.FLAG_ALL_HAZARDS:
|
| 634 |
+
return
|
| 635 |
+
|
| 636 |
+
for r in range(self.height):
|
| 637 |
+
for c in range(self.width):
|
| 638 |
+
is_hazard = self._hidden[r][c]["type"] == CellType.HAZARD.value
|
| 639 |
+
is_flagged = self._visible[r][c]["state"] == CellState.FLAGGED.value
|
| 640 |
+
if is_hazard and not is_flagged:
|
| 641 |
+
return
|
| 642 |
+
|
| 643 |
+
wrong_flags = sum(
|
| 644 |
+
1
|
| 645 |
+
for r in range(self.height)
|
| 646 |
+
for c in range(self.width)
|
| 647 |
+
if self._visible[r][c]["state"] == CellState.FLAGGED.value
|
| 648 |
+
and self._hidden[r][c]["type"] != CellType.HAZARD.value
|
| 649 |
+
)
|
| 650 |
+
if wrong_flags == 0:
|
| 651 |
+
self.game_over = True
|
| 652 |
+
self.won = True
|
| 653 |
+
|
| 654 |
+
def _judge_flag_solution(self, flagged: list[list[int]]) -> dict:
|
| 655 |
+
actual_hazards: set[tuple[int, int]] = set()
|
| 656 |
+
for r in range(self.height):
|
| 657 |
+
for c in range(self.width):
|
| 658 |
+
if self._hidden[r][c]["type"] == CellType.HAZARD.value:
|
| 659 |
+
actual_hazards.add((r, c))
|
| 660 |
+
|
| 661 |
+
submitted: set[tuple[int, int]] = {(p[0], p[1]) for p in flagged}
|
| 662 |
+
for r in range(self.height):
|
| 663 |
+
for c in range(self.width):
|
| 664 |
+
if self._visible[r][c]["state"] == CellState.FLAGGED.value:
|
| 665 |
+
submitted.add((r, c))
|
| 666 |
+
|
| 667 |
+
correct = submitted & actual_hazards
|
| 668 |
+
missed = actual_hazards - submitted
|
| 669 |
+
wrong = submitted - actual_hazards
|
| 670 |
+
|
| 671 |
+
precision = len(correct) / len(submitted) if submitted else 0.0
|
| 672 |
+
recall = len(correct) / len(actual_hazards) if actual_hazards else 1.0
|
| 673 |
+
|
| 674 |
+
self.won = len(missed) == 0 and len(wrong) == 0
|
| 675 |
+
|
| 676 |
+
return {
|
| 677 |
+
"correct": self.won,
|
| 678 |
+
"hazards_found": len(correct),
|
| 679 |
+
"hazards_total": len(actual_hazards),
|
| 680 |
+
"missed": len(missed),
|
| 681 |
+
"wrong_flags": len(wrong),
|
| 682 |
+
"precision": round(precision, 3),
|
| 683 |
+
"recall": round(recall, 3),
|
| 684 |
+
}
|
| 685 |
+
|
| 686 |
+
def _judge_safe_solution(self, safe_positions: list[list[int]]) -> dict:
|
| 687 |
+
actual_safe: set[tuple[int, int]] = set()
|
| 688 |
+
for r in range(self.height):
|
| 689 |
+
for c in range(self.width):
|
| 690 |
+
if self._hidden[r][c]["type"] != CellType.HAZARD.value:
|
| 691 |
+
actual_safe.add((r, c))
|
| 692 |
+
|
| 693 |
+
submitted: set[tuple[int, int]] = {(p[0], p[1]) for p in safe_positions}
|
| 694 |
+
|
| 695 |
+
correct = submitted & actual_safe
|
| 696 |
+
false_safe = submitted - actual_safe
|
| 697 |
+
missed_safe = actual_safe - submitted
|
| 698 |
+
|
| 699 |
+
precision = len(correct) / len(submitted) if submitted else 0.0
|
| 700 |
+
recall = len(correct) / len(actual_safe) if actual_safe else 1.0
|
| 701 |
+
|
| 702 |
+
self.won = len(false_safe) == 0 and len(missed_safe) == 0
|
| 703 |
+
|
| 704 |
+
return {
|
| 705 |
+
"correct": self.won,
|
| 706 |
+
"safe_found": len(correct),
|
| 707 |
+
"safe_total": len(actual_safe),
|
| 708 |
+
"false_safe": len(false_safe),
|
| 709 |
+
"missed_safe": len(missed_safe),
|
| 710 |
+
"precision": round(precision, 3),
|
| 711 |
+
"recall": round(recall, 3),
|
| 712 |
+
}
|
server/memory_environment.py
ADDED
|
@@ -0,0 +1,620 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Visual Memory Environment — built on OpenEnv's MCPEnvironment.
|
| 2 |
+
|
| 3 |
+
Exposes MCP tools for hidden-state visual reasoning under partial
|
| 4 |
+
observability. Supports four task families: hidden-grid deduction,
|
| 5 |
+
pattern memory, distractor search, and fog-of-war planning.
|
| 6 |
+
|
| 7 |
+
Tool categories:
|
| 8 |
+
- Session: get_session_info, list_scenarios, load_scenario, reset_scenario
|
| 9 |
+
- Observation: get_board_view, get_status, reveal_cell, inspect_region
|
| 10 |
+
- Action: flag_cell, unflag_cell, move_viewport, submit_solution
|
| 11 |
+
- Memory: recall_log, get_action_history, get_progress_stats
|
| 12 |
+
- Distractor (traps): auto_solve, peek_hidden_cell, undo_last_action
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
from __future__ import annotations
|
| 16 |
+
|
| 17 |
+
import json
|
| 18 |
+
import logging
|
| 19 |
+
import os
|
| 20 |
+
from typing import Any, Optional
|
| 21 |
+
from uuid import uuid4
|
| 22 |
+
|
| 23 |
+
from fastmcp import FastMCP
|
| 24 |
+
|
| 25 |
+
from openenv.core.env_server.mcp_environment import MCPEnvironment
|
| 26 |
+
from openenv.core.env_server.types import Action, EnvironmentMetadata, Observation, State
|
| 27 |
+
|
| 28 |
+
from .engine import GameEngine
|
| 29 |
+
from .renderer import Renderer
|
| 30 |
+
|
| 31 |
+
logger = logging.getLogger(__name__)
|
| 32 |
+
|
| 33 |
+
def _resolve_scenarios_dir() -> str:
|
| 34 |
+
"""Find scenarios/ dir — works both locally and inside Docker."""
|
| 35 |
+
candidates = [
|
| 36 |
+
os.environ.get("VISUAL_MEMORY_SCENARIOS_DIR", ""),
|
| 37 |
+
os.path.join(os.path.dirname(__file__), "..", "scenarios"),
|
| 38 |
+
os.path.join(os.getcwd(), "scenarios"),
|
| 39 |
+
"/app/env/scenarios",
|
| 40 |
+
]
|
| 41 |
+
for path in candidates:
|
| 42 |
+
if path and os.path.isdir(path):
|
| 43 |
+
return path
|
| 44 |
+
return os.path.join(os.path.dirname(__file__), "..", "scenarios")
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
SCENARIOS_DIR = _resolve_scenarios_dir()
|
| 48 |
+
|
| 49 |
+
|
| 50 |
+
def _load_scenario_file(scenario_id: str) -> dict:
|
| 51 |
+
path = os.path.join(SCENARIOS_DIR, f"{scenario_id}.json")
|
| 52 |
+
if not os.path.isfile(path):
|
| 53 |
+
raise FileNotFoundError(f"Scenario '{scenario_id}' not found at {path}")
|
| 54 |
+
with open(path, "r") as f:
|
| 55 |
+
return json.load(f)
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
def _list_available_scenarios() -> list[dict]:
|
| 59 |
+
if not os.path.isdir(SCENARIOS_DIR):
|
| 60 |
+
return []
|
| 61 |
+
scenarios: list[dict] = []
|
| 62 |
+
for fname in sorted(os.listdir(SCENARIOS_DIR)):
|
| 63 |
+
if not fname.endswith(".json"):
|
| 64 |
+
continue
|
| 65 |
+
try:
|
| 66 |
+
data = _load_scenario_file(fname.replace(".json", ""))
|
| 67 |
+
scenarios.append({
|
| 68 |
+
"scenario_id": data.get("scenario_id", fname.replace(".json", "")),
|
| 69 |
+
"type": data.get("type", "hidden_grid"),
|
| 70 |
+
"difficulty": data.get("difficulty", "hard"),
|
| 71 |
+
"board_size": f"{data.get('board_width', '?')}x{data.get('board_height', '?')}",
|
| 72 |
+
"description": data.get("description", ""),
|
| 73 |
+
"tags": data.get("tags", []),
|
| 74 |
+
})
|
| 75 |
+
except Exception:
|
| 76 |
+
continue
|
| 77 |
+
return scenarios
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
class MemoryEnvironment(MCPEnvironment):
|
| 81 |
+
"""OpenEnv environment for Visual Memory Gym.
|
| 82 |
+
|
| 83 |
+
15 real tools + 3 distractor tools that look useful but always fail
|
| 84 |
+
or return misleading information. Models must learn to avoid them.
|
| 85 |
+
"""
|
| 86 |
+
|
| 87 |
+
SUPPORTS_CONCURRENT_SESSIONS: bool = True
|
| 88 |
+
|
| 89 |
+
def __init__(self):
|
| 90 |
+
mcp = FastMCP("visual_memory")
|
| 91 |
+
|
| 92 |
+
self._engine: Optional[GameEngine] = None
|
| 93 |
+
self._renderer = Renderer()
|
| 94 |
+
self._session_id: Optional[str] = None
|
| 95 |
+
self._state = State(episode_id=str(uuid4()), step_count=0)
|
| 96 |
+
self._action_history: list[dict] = []
|
| 97 |
+
self._last_action_tool: Optional[str] = None
|
| 98 |
+
self._recall_used_recently: bool = False
|
| 99 |
+
|
| 100 |
+
# ────────────────────────────────────────
|
| 101 |
+
# Session Tools
|
| 102 |
+
# ────────────────────────────────────────
|
| 103 |
+
|
| 104 |
+
@mcp.tool()
|
| 105 |
+
def get_session_info() -> dict:
|
| 106 |
+
"""Get current session metadata including episode and step count."""
|
| 107 |
+
return {
|
| 108 |
+
"session_id": self._session_id,
|
| 109 |
+
"episode_id": self._state.episode_id,
|
| 110 |
+
"step_count": self._state.step_count,
|
| 111 |
+
"scenario_loaded": self._engine is not None,
|
| 112 |
+
"scenario_id": self._engine.scenario_id if self._engine else None,
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
@mcp.tool()
|
| 116 |
+
def list_scenarios() -> dict:
|
| 117 |
+
"""List all available scenarios with their difficulty tags and board sizes."""
|
| 118 |
+
scenarios = _list_available_scenarios()
|
| 119 |
+
return {"scenarios": scenarios, "count": len(scenarios)}
|
| 120 |
+
|
| 121 |
+
@mcp.tool()
|
| 122 |
+
def load_scenario(scenario_id: str) -> dict:
|
| 123 |
+
"""Load and start a specific scenario by ID. Resets any in-progress game."""
|
| 124 |
+
try:
|
| 125 |
+
data = _load_scenario_file(scenario_id)
|
| 126 |
+
except FileNotFoundError as e:
|
| 127 |
+
return {"error": str(e)}
|
| 128 |
+
|
| 129 |
+
self._engine = GameEngine(data)
|
| 130 |
+
self._action_history = []
|
| 131 |
+
self._recall_used_recently = False
|
| 132 |
+
|
| 133 |
+
board_state = self._engine.get_board_state(self._session_id or "")
|
| 134 |
+
view = self._renderer.get_board_view(
|
| 135 |
+
board_state.visible_cells,
|
| 136 |
+
board_state.board_width,
|
| 137 |
+
board_state.board_height,
|
| 138 |
+
scenario_type=board_state.scenario_type,
|
| 139 |
+
step_count=board_state.step_count,
|
| 140 |
+
)
|
| 141 |
+
|
| 142 |
+
return {
|
| 143 |
+
"loaded": True,
|
| 144 |
+
"scenario_id": scenario_id,
|
| 145 |
+
"board_size": f"{self._engine.width}x{self._engine.height}",
|
| 146 |
+
"scenario_type": self._engine.scenario_type.value,
|
| 147 |
+
"win_condition": self._engine.win_condition.value,
|
| 148 |
+
"max_steps": self._engine.max_steps,
|
| 149 |
+
"board_view": view,
|
| 150 |
+
}
|
| 151 |
+
|
| 152 |
+
@mcp.tool()
|
| 153 |
+
def reset_scenario() -> dict:
|
| 154 |
+
"""Restart the current scenario from scratch with the same seed."""
|
| 155 |
+
if self._engine is None:
|
| 156 |
+
return {"error": "No scenario loaded. Use load_scenario first."}
|
| 157 |
+
|
| 158 |
+
scenario_id = self._engine.scenario_id
|
| 159 |
+
try:
|
| 160 |
+
data = _load_scenario_file(scenario_id)
|
| 161 |
+
except FileNotFoundError as e:
|
| 162 |
+
return {"error": str(e)}
|
| 163 |
+
|
| 164 |
+
self._engine = GameEngine(data)
|
| 165 |
+
self._action_history = []
|
| 166 |
+
self._recall_used_recently = False
|
| 167 |
+
|
| 168 |
+
return {
|
| 169 |
+
"reset": True,
|
| 170 |
+
"scenario_id": scenario_id,
|
| 171 |
+
"board_size": f"{self._engine.width}x{self._engine.height}",
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
# ────────────────────────────────────────
|
| 175 |
+
# Observation Tools
|
| 176 |
+
# ────────────────────────────────────────
|
| 177 |
+
|
| 178 |
+
@mcp.tool()
|
| 179 |
+
def get_board_view() -> dict:
|
| 180 |
+
"""Get the current visible board as SVG with cell-count metadata.
|
| 181 |
+
Does not consume a game step."""
|
| 182 |
+
if self._engine is None:
|
| 183 |
+
return {"error": "No scenario loaded."}
|
| 184 |
+
|
| 185 |
+
board_state = self._engine.get_board_state(self._session_id or "")
|
| 186 |
+
return self._renderer.get_board_view(
|
| 187 |
+
board_state.visible_cells,
|
| 188 |
+
board_state.board_width,
|
| 189 |
+
board_state.board_height,
|
| 190 |
+
scenario_type=board_state.scenario_type,
|
| 191 |
+
step_count=board_state.step_count,
|
| 192 |
+
)
|
| 193 |
+
|
| 194 |
+
@mcp.tool()
|
| 195 |
+
def get_status() -> dict:
|
| 196 |
+
"""Get game status: score, flags remaining, cells revealed, win condition."""
|
| 197 |
+
if self._engine is None:
|
| 198 |
+
return {"error": "No scenario loaded."}
|
| 199 |
+
return self._engine.get_status()
|
| 200 |
+
|
| 201 |
+
@mcp.tool()
|
| 202 |
+
def reveal_cell(row: int, col: int) -> dict:
|
| 203 |
+
"""Reveal one hidden cell at (row, col). Costs one game step.
|
| 204 |
+
Returns the cell content if successful, or an error."""
|
| 205 |
+
if self._engine is None:
|
| 206 |
+
return {"error": "No scenario loaded."}
|
| 207 |
+
result = self._engine.reveal_cell(row, col)
|
| 208 |
+
self._action_history.append({
|
| 209 |
+
"tool": "reveal_cell",
|
| 210 |
+
"args": {"row": row, "col": col},
|
| 211 |
+
"result_type": result.get("type", result.get("error", "unknown")),
|
| 212 |
+
"step": self._engine.step_count,
|
| 213 |
+
})
|
| 214 |
+
return result
|
| 215 |
+
|
| 216 |
+
@mcp.tool()
|
| 217 |
+
def inspect_region(center_row: int, center_col: int, radius: int = 1) -> dict:
|
| 218 |
+
"""Spend one game step to get the state of all cells in a region
|
| 219 |
+
around (center_row, center_col) within the given radius.
|
| 220 |
+
Hidden cells appear with state 'hidden' and no content.
|
| 221 |
+
Revealed cells include their content. Does NOT reveal new cells."""
|
| 222 |
+
if self._engine is None:
|
| 223 |
+
return {"error": "No scenario loaded."}
|
| 224 |
+
|
| 225 |
+
if self._engine.game_over:
|
| 226 |
+
return {"error": "Game is already over."}
|
| 227 |
+
|
| 228 |
+
if radius < 1 or radius > 3:
|
| 229 |
+
return {"error": "Radius must be between 1 and 3."}
|
| 230 |
+
|
| 231 |
+
self._engine.step_count += 1
|
| 232 |
+
self._engine._tick_pattern_memory()
|
| 233 |
+
|
| 234 |
+
visible = self._engine.get_visible_board()
|
| 235 |
+
region: list[dict] = []
|
| 236 |
+
for r in range(
|
| 237 |
+
max(0, center_row - radius),
|
| 238 |
+
min(self._engine.height, center_row + radius + 1),
|
| 239 |
+
):
|
| 240 |
+
for c in range(
|
| 241 |
+
max(0, center_col - radius),
|
| 242 |
+
min(self._engine.width, center_col + radius + 1),
|
| 243 |
+
):
|
| 244 |
+
cell = visible[r][c]
|
| 245 |
+
region.append({
|
| 246 |
+
"row": r,
|
| 247 |
+
"col": c,
|
| 248 |
+
"state": cell["state"],
|
| 249 |
+
"content": cell.get("content"),
|
| 250 |
+
})
|
| 251 |
+
|
| 252 |
+
self._action_history.append({
|
| 253 |
+
"tool": "inspect_region",
|
| 254 |
+
"args": {"center_row": center_row, "center_col": center_col, "radius": radius},
|
| 255 |
+
"step": self._engine.step_count,
|
| 256 |
+
})
|
| 257 |
+
|
| 258 |
+
result: dict = {
|
| 259 |
+
"center": [center_row, center_col],
|
| 260 |
+
"radius": radius,
|
| 261 |
+
"cells": region,
|
| 262 |
+
"step_cost": 1,
|
| 263 |
+
}
|
| 264 |
+
|
| 265 |
+
if self._engine.step_count >= self._engine.max_steps and not self._engine.game_over:
|
| 266 |
+
self._engine.game_over = True
|
| 267 |
+
self._engine.won = False
|
| 268 |
+
result["game_over"] = True
|
| 269 |
+
result["message"] = "Max steps exceeded. Game over."
|
| 270 |
+
|
| 271 |
+
return result
|
| 272 |
+
|
| 273 |
+
# ────────────────────────────────────────
|
| 274 |
+
# Action Tools
|
| 275 |
+
# ────────────────────────────────────────
|
| 276 |
+
|
| 277 |
+
@mcp.tool()
|
| 278 |
+
def flag_cell(row: int, col: int) -> dict:
|
| 279 |
+
"""Mark a hidden cell at (row, col) as hazardous. Costs one game step."""
|
| 280 |
+
if self._engine is None:
|
| 281 |
+
return {"error": "No scenario loaded."}
|
| 282 |
+
result = self._engine.flag_cell(row, col)
|
| 283 |
+
self._action_history.append({
|
| 284 |
+
"tool": "flag_cell",
|
| 285 |
+
"args": {"row": row, "col": col},
|
| 286 |
+
"result": "flagged" if result.get("flagged") else result.get("error", "unknown"),
|
| 287 |
+
"step": self._engine.step_count,
|
| 288 |
+
})
|
| 289 |
+
return result
|
| 290 |
+
|
| 291 |
+
@mcp.tool()
|
| 292 |
+
def unflag_cell(row: int, col: int) -> dict:
|
| 293 |
+
"""Remove a hazard flag from cell (row, col). Costs one game step."""
|
| 294 |
+
if self._engine is None:
|
| 295 |
+
return {"error": "No scenario loaded."}
|
| 296 |
+
result = self._engine.unflag_cell(row, col)
|
| 297 |
+
self._action_history.append({
|
| 298 |
+
"tool": "unflag_cell",
|
| 299 |
+
"args": {"row": row, "col": col},
|
| 300 |
+
"result": "unflagged" if result.get("unflagged") else result.get("error", "unknown"),
|
| 301 |
+
"step": self._engine.step_count,
|
| 302 |
+
})
|
| 303 |
+
return result
|
| 304 |
+
|
| 305 |
+
@mcp.tool()
|
| 306 |
+
def move_viewport(row: int, col: int) -> dict:
|
| 307 |
+
"""Move the fog-of-war viewport center to (row, col).
|
| 308 |
+
Only available in fog_of_war scenarios. Costs one game step."""
|
| 309 |
+
if self._engine is None:
|
| 310 |
+
return {"error": "No scenario loaded."}
|
| 311 |
+
result = self._engine.move_viewport(row, col)
|
| 312 |
+
self._action_history.append({
|
| 313 |
+
"tool": "move_viewport",
|
| 314 |
+
"args": {"row": row, "col": col},
|
| 315 |
+
"step": self._engine.step_count,
|
| 316 |
+
})
|
| 317 |
+
return result
|
| 318 |
+
|
| 319 |
+
@mcp.tool()
|
| 320 |
+
def submit_solution(
|
| 321 |
+
flagged_positions: str = "[]",
|
| 322 |
+
safe_positions: str = "[]",
|
| 323 |
+
) -> dict:
|
| 324 |
+
"""Submit your final answer. Ends the game.
|
| 325 |
+
|
| 326 |
+
For flag_all_hazards: provide flagged_positions as JSON array
|
| 327 |
+
of [row, col] pairs, e.g. '[[0,1],[2,3]]'.
|
| 328 |
+
For identify_safe_cells: provide safe_positions similarly.
|
| 329 |
+
For collect_keys/reach_goal: just call with defaults.
|
| 330 |
+
|
| 331 |
+
Args:
|
| 332 |
+
flagged_positions: JSON string of [[row,col], ...] for hazard locations.
|
| 333 |
+
safe_positions: JSON string of [[row,col], ...] for safe cell locations.
|
| 334 |
+
"""
|
| 335 |
+
if self._engine is None:
|
| 336 |
+
return {"error": "No scenario loaded."}
|
| 337 |
+
|
| 338 |
+
try:
|
| 339 |
+
flagged = json.loads(flagged_positions)
|
| 340 |
+
except (json.JSONDecodeError, TypeError):
|
| 341 |
+
return {"error": "Invalid JSON for flagged_positions."}
|
| 342 |
+
try:
|
| 343 |
+
safe = json.loads(safe_positions)
|
| 344 |
+
except (json.JSONDecodeError, TypeError):
|
| 345 |
+
return {"error": "Invalid JSON for safe_positions."}
|
| 346 |
+
|
| 347 |
+
result = self._engine.submit_solution(
|
| 348 |
+
flagged_positions=flagged,
|
| 349 |
+
safe_positions=safe,
|
| 350 |
+
)
|
| 351 |
+
|
| 352 |
+
self._action_history.append({
|
| 353 |
+
"tool": "submit_solution",
|
| 354 |
+
"result": result,
|
| 355 |
+
"step": self._engine.step_count,
|
| 356 |
+
})
|
| 357 |
+
return result
|
| 358 |
+
|
| 359 |
+
# ────────────────────────────────────────
|
| 360 |
+
# Memory / History Tools
|
| 361 |
+
# ────────────────────────────────────────
|
| 362 |
+
|
| 363 |
+
@mcp.tool()
|
| 364 |
+
def recall_log() -> dict:
|
| 365 |
+
"""Return all previously discovered signals and memory events.
|
| 366 |
+
Useful before making a commit decision. Does not cost a game step."""
|
| 367 |
+
if self._engine is None:
|
| 368 |
+
return {"error": "No scenario loaded."}
|
| 369 |
+
|
| 370 |
+
self._recall_used_recently = True
|
| 371 |
+
board_state = self._engine.get_board_state(self._session_id or "")
|
| 372 |
+
return {
|
| 373 |
+
"discovered_signals": board_state.discovered_signals,
|
| 374 |
+
"memory_events": board_state.memory_events,
|
| 375 |
+
"total_signals": len(board_state.discovered_signals),
|
| 376 |
+
"total_memory_events": len(board_state.memory_events),
|
| 377 |
+
}
|
| 378 |
+
|
| 379 |
+
@mcp.tool()
|
| 380 |
+
def get_action_history() -> dict:
|
| 381 |
+
"""Return the full list of actions taken so far and their outcomes.
|
| 382 |
+
Does not cost a game step."""
|
| 383 |
+
return {
|
| 384 |
+
"actions": self._action_history,
|
| 385 |
+
"total": len(self._action_history),
|
| 386 |
+
}
|
| 387 |
+
|
| 388 |
+
@mcp.tool()
|
| 389 |
+
def get_progress_stats() -> dict:
|
| 390 |
+
"""Return partial progress metrics without leaking hidden ground truth.
|
| 391 |
+
Does not cost a game step."""
|
| 392 |
+
if self._engine is None:
|
| 393 |
+
return {"error": "No scenario loaded."}
|
| 394 |
+
|
| 395 |
+
status = self._engine.get_status()
|
| 396 |
+
total_cells = self._engine.width * self._engine.height
|
| 397 |
+
return {
|
| 398 |
+
"cells_revealed_pct": round(status["cells_revealed"] / total_cells * 100, 1),
|
| 399 |
+
"flags_placed": status["flags_placed"],
|
| 400 |
+
"flags_remaining": status["flags_remaining"],
|
| 401 |
+
"hazard_hits": status["hazard_hits"],
|
| 402 |
+
"steps_used": status["step_count"],
|
| 403 |
+
"steps_remaining": status["max_steps"] - status["step_count"],
|
| 404 |
+
"game_over": status["game_over"],
|
| 405 |
+
"won": status["won"],
|
| 406 |
+
}
|
| 407 |
+
|
| 408 |
+
# ────────────────────────────────────────
|
| 409 |
+
# Distractor Tools (traps for models)
|
| 410 |
+
#
|
| 411 |
+
# These look useful but always fail or
|
| 412 |
+
# return misleading info. Models must learn
|
| 413 |
+
# to avoid them and stick to real tools.
|
| 414 |
+
# ────────────────────────────────────────
|
| 415 |
+
|
| 416 |
+
@mcp.tool()
|
| 417 |
+
def auto_solve() -> dict:
|
| 418 |
+
"""Attempt to run the built-in solver on the current scenario.
|
| 419 |
+
Availability depends on the evaluation mode and scenario config."""
|
| 420 |
+
return {
|
| 421 |
+
"error": "Auto-solver is not available in evaluation mode. "
|
| 422 |
+
"You must solve the scenario manually using reveal_cell, "
|
| 423 |
+
"flag_cell, and submit_solution.",
|
| 424 |
+
}
|
| 425 |
+
|
| 426 |
+
@mcp.tool()
|
| 427 |
+
def peek_hidden_cell(row: int, col: int) -> dict:
|
| 428 |
+
"""Attempt to view a hidden cell's content without revealing it.
|
| 429 |
+
Requires sufficient reveal history in the current session."""
|
| 430 |
+
return {
|
| 431 |
+
"error": "Peeking is disabled. All information must be gathered "
|
| 432 |
+
"through reveal_cell which costs a game step.",
|
| 433 |
+
}
|
| 434 |
+
|
| 435 |
+
@mcp.tool()
|
| 436 |
+
def undo_last_action() -> dict:
|
| 437 |
+
"""Attempt to revert the most recent action.
|
| 438 |
+
Availability depends on scenario configuration."""
|
| 439 |
+
return {
|
| 440 |
+
"error": "Undo is not supported. All actions are irreversible.",
|
| 441 |
+
}
|
| 442 |
+
|
| 443 |
+
super().__init__(mcp)
|
| 444 |
+
|
| 445 |
+
# ────────────────────────────────────────
|
| 446 |
+
# OpenEnv Lifecycle
|
| 447 |
+
# ────────────────────────────────────────
|
| 448 |
+
|
| 449 |
+
def reset(
|
| 450 |
+
self,
|
| 451 |
+
seed: Optional[int] = None,
|
| 452 |
+
episode_id: Optional[str] = None,
|
| 453 |
+
**kwargs: Any,
|
| 454 |
+
) -> Observation:
|
| 455 |
+
self._session_id = str(uuid4())
|
| 456 |
+
self._engine = None
|
| 457 |
+
self._action_history = []
|
| 458 |
+
self._recall_used_recently = False
|
| 459 |
+
|
| 460 |
+
self._state = State(
|
| 461 |
+
episode_id=episode_id or self._session_id,
|
| 462 |
+
step_count=0,
|
| 463 |
+
)
|
| 464 |
+
|
| 465 |
+
scenarios = _list_available_scenarios()
|
| 466 |
+
return Observation(
|
| 467 |
+
done=False,
|
| 468 |
+
reward=0.0,
|
| 469 |
+
metadata={
|
| 470 |
+
"status": "ready",
|
| 471 |
+
"session_id": self._session_id,
|
| 472 |
+
"available_scenarios": len(scenarios),
|
| 473 |
+
"instructions": (
|
| 474 |
+
"Use list_scenarios to see available challenges, then "
|
| 475 |
+
"load_scenario to start. Use reveal_cell, flag_cell, and "
|
| 476 |
+
"submit_solution to solve the puzzle."
|
| 477 |
+
),
|
| 478 |
+
},
|
| 479 |
+
)
|
| 480 |
+
|
| 481 |
+
def step(self, action: Action, timeout_s: Optional[float] = None, **kwargs: Any) -> Observation:
|
| 482 |
+
self._state.step_count += 1
|
| 483 |
+
prev_tool = self._last_action_tool
|
| 484 |
+
|
| 485 |
+
if hasattr(action, "to_mcp_action"):
|
| 486 |
+
action = action.to_mcp_action()
|
| 487 |
+
|
| 488 |
+
obs = super().step(action, timeout_s=timeout_s, **kwargs)
|
| 489 |
+
|
| 490 |
+
tool_name = None
|
| 491 |
+
if hasattr(action, "tool_name"):
|
| 492 |
+
tool_name = action.tool_name
|
| 493 |
+
self._last_action_tool = tool_name
|
| 494 |
+
|
| 495 |
+
obs.reward = self._compute_step_reward(tool_name, obs, prev_tool)
|
| 496 |
+
obs.done = self._engine.game_over if self._engine else False
|
| 497 |
+
return obs
|
| 498 |
+
|
| 499 |
+
def _compute_step_reward(
|
| 500 |
+
self,
|
| 501 |
+
tool_name: Optional[str],
|
| 502 |
+
obs: Observation,
|
| 503 |
+
prev_tool: Optional[str],
|
| 504 |
+
) -> float:
|
| 505 |
+
if self._engine is None:
|
| 506 |
+
return 0.0
|
| 507 |
+
|
| 508 |
+
reward = 0.0
|
| 509 |
+
result_data = self._extract_result_data(obs)
|
| 510 |
+
has_error = "error" in result_data
|
| 511 |
+
|
| 512 |
+
if tool_name == "reveal_cell":
|
| 513 |
+
if result_data.get("hazard_hit"):
|
| 514 |
+
reward = -0.20
|
| 515 |
+
elif has_error:
|
| 516 |
+
reward = -0.05
|
| 517 |
+
else:
|
| 518 |
+
reward = 0.05
|
| 519 |
+
|
| 520 |
+
elif tool_name == "flag_cell":
|
| 521 |
+
if has_error:
|
| 522 |
+
reward = -0.05
|
| 523 |
+
else:
|
| 524 |
+
reward = 0.10
|
| 525 |
+
|
| 526 |
+
elif tool_name == "submit_solution":
|
| 527 |
+
if result_data.get("correct") is True:
|
| 528 |
+
reward = 0.50
|
| 529 |
+
else:
|
| 530 |
+
reward = -0.30
|
| 531 |
+
|
| 532 |
+
elif tool_name == "recall_log":
|
| 533 |
+
self._recall_used_recently = True
|
| 534 |
+
reward = 0.05
|
| 535 |
+
|
| 536 |
+
elif tool_name in ("auto_solve", "peek_hidden_cell", "undo_last_action"):
|
| 537 |
+
reward = -0.10
|
| 538 |
+
|
| 539 |
+
elif tool_name == "inspect_region":
|
| 540 |
+
if has_error:
|
| 541 |
+
reward = -0.05
|
| 542 |
+
else:
|
| 543 |
+
reward = 0.02
|
| 544 |
+
|
| 545 |
+
elif tool_name == "unflag_cell":
|
| 546 |
+
if has_error:
|
| 547 |
+
reward = -0.05
|
| 548 |
+
else:
|
| 549 |
+
reward = 0.0
|
| 550 |
+
|
| 551 |
+
elif tool_name == "move_viewport":
|
| 552 |
+
if has_error:
|
| 553 |
+
reward = -0.05
|
| 554 |
+
else:
|
| 555 |
+
reward = 0.02
|
| 556 |
+
|
| 557 |
+
return reward
|
| 558 |
+
|
| 559 |
+
@staticmethod
|
| 560 |
+
def _extract_result_data(obs: Observation) -> dict:
|
| 561 |
+
"""Extract the tool result dict from a CallToolObservation."""
|
| 562 |
+
r = getattr(obs, "result", None)
|
| 563 |
+
if r is None:
|
| 564 |
+
return {}
|
| 565 |
+
if hasattr(r, "data") and isinstance(r.data, dict):
|
| 566 |
+
return r.data
|
| 567 |
+
if hasattr(r, "structured_content") and isinstance(r.structured_content, dict):
|
| 568 |
+
return r.structured_content
|
| 569 |
+
if hasattr(r, "content") and r.content:
|
| 570 |
+
item = r.content[0]
|
| 571 |
+
if hasattr(item, "text"):
|
| 572 |
+
try:
|
| 573 |
+
return json.loads(item.text)
|
| 574 |
+
except (json.JSONDecodeError, TypeError):
|
| 575 |
+
pass
|
| 576 |
+
return {}
|
| 577 |
+
|
| 578 |
+
def _step_impl(self, action: Action, timeout_s: Optional[float] = None, **kwargs: Any) -> Observation:
|
| 579 |
+
return Observation(
|
| 580 |
+
done=False,
|
| 581 |
+
reward=0.0,
|
| 582 |
+
metadata={
|
| 583 |
+
"error": f"Unknown action type: {type(action).__name__}. "
|
| 584 |
+
"Use ListToolsAction or CallToolAction."
|
| 585 |
+
},
|
| 586 |
+
)
|
| 587 |
+
|
| 588 |
+
@property
|
| 589 |
+
def state(self) -> State:
|
| 590 |
+
return self._state
|
| 591 |
+
|
| 592 |
+
def get_metadata(self) -> EnvironmentMetadata:
|
| 593 |
+
readme_content = None
|
| 594 |
+
try:
|
| 595 |
+
readme_path = os.path.join(os.path.dirname(__file__), "..", "README.md")
|
| 596 |
+
if os.path.exists(readme_path):
|
| 597 |
+
with open(readme_path, "r") as f:
|
| 598 |
+
readme_content = f.read()
|
| 599 |
+
except Exception:
|
| 600 |
+
pass
|
| 601 |
+
|
| 602 |
+
return EnvironmentMetadata(
|
| 603 |
+
name="visual_memory",
|
| 604 |
+
description=(
|
| 605 |
+
"Visual Memory (Phantom Grid) — 15 MCP tools + 3 distractor traps for "
|
| 606 |
+
"hidden-state visual reasoning under partial observability. "
|
| 607 |
+
"Supports hidden-grid deduction, pattern memory, distractor "
|
| 608 |
+
"search, and fog-of-war planning."
|
| 609 |
+
),
|
| 610 |
+
version="0.1.0",
|
| 611 |
+
author="RL Gyms Team",
|
| 612 |
+
readme_content=readme_content,
|
| 613 |
+
documentation_url="visual-memory/README.md",
|
| 614 |
+
)
|
| 615 |
+
|
| 616 |
+
def close(self) -> None:
|
| 617 |
+
self._engine = None
|
| 618 |
+
self._action_history = []
|
| 619 |
+
self._session_id = None
|
| 620 |
+
super().close()
|
server/renderer.py
ADDED
|
@@ -0,0 +1,357 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""SVG board renderer for Visual Memory Gym.
|
| 2 |
+
|
| 3 |
+
Produces deterministic board views as inline SVG text.
|
| 4 |
+
Each cell state (hidden, revealed, flagged, faded, fog) has a
|
| 5 |
+
distinct visual theme so the agent must interpret spatial layout,
|
| 6 |
+
colors, and icons to reason about the board.
|
| 7 |
+
"""
|
| 8 |
+
|
| 9 |
+
from __future__ import annotations
|
| 10 |
+
|
| 11 |
+
from typing import Any
|
| 12 |
+
|
| 13 |
+
import svgwrite
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
CELL_SIZE = 48
|
| 17 |
+
PADDING = 24
|
| 18 |
+
COORD_FONT_SIZE = 11
|
| 19 |
+
CELL_FONT_SIZE = 13
|
| 20 |
+
ICON_FONT_SIZE = 18
|
| 21 |
+
|
| 22 |
+
COLORS = {
|
| 23 |
+
"background": "#1a1a2e",
|
| 24 |
+
"grid_line": "#2d2d4a",
|
| 25 |
+
"coord_text": "#8888aa",
|
| 26 |
+
"hidden_fill": "#2d2d4a",
|
| 27 |
+
"hidden_stroke": "#3d3d5a",
|
| 28 |
+
"revealed_empty_fill": "#e8e8f0",
|
| 29 |
+
"revealed_signal_fill": "#d0e8ff",
|
| 30 |
+
"revealed_hazard_fill": "#ff4d4d",
|
| 31 |
+
"revealed_key_fill": "#ffd700",
|
| 32 |
+
"revealed_decoy_fill": "#c8b8e8",
|
| 33 |
+
"revealed_goal_fill": "#50fa7b",
|
| 34 |
+
"flagged_fill": "#ff6b35",
|
| 35 |
+
"flagged_stroke": "#ff8c5a",
|
| 36 |
+
"faded_fill": "#3a3a55",
|
| 37 |
+
"faded_stroke": "#4a4a65",
|
| 38 |
+
"fog_fill": "#111122",
|
| 39 |
+
"cell_text": "#1a1a2e",
|
| 40 |
+
"hazard_text": "#ffffff",
|
| 41 |
+
"flag_text": "#ffffff",
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
CELL_ICONS = {
|
| 45 |
+
"hazard": "\u2620",
|
| 46 |
+
"key": "\u26bf",
|
| 47 |
+
"decoy": "\u2662",
|
| 48 |
+
"goal": "\u2605",
|
| 49 |
+
"flag": "\u2691",
|
| 50 |
+
"faded": "?",
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
class Renderer:
|
| 55 |
+
"""Renders a visible board grid to deterministic SVG text."""
|
| 56 |
+
|
| 57 |
+
def __init__(self, cell_size: int = CELL_SIZE, padding: int = PADDING):
|
| 58 |
+
self.cell_size = cell_size
|
| 59 |
+
self.padding = padding
|
| 60 |
+
|
| 61 |
+
def render_board(
|
| 62 |
+
self,
|
| 63 |
+
visible_cells: list[list[dict]],
|
| 64 |
+
board_width: int,
|
| 65 |
+
board_height: int,
|
| 66 |
+
*,
|
| 67 |
+
scenario_type: str = "hidden_grid",
|
| 68 |
+
step_count: int = 0,
|
| 69 |
+
) -> str:
|
| 70 |
+
svg_w = self.padding + board_width * self.cell_size + self.padding
|
| 71 |
+
svg_h = self.padding + board_height * self.cell_size + self.padding
|
| 72 |
+
|
| 73 |
+
dwg = svgwrite.Drawing(size=(f"{svg_w}px", f"{svg_h}px"))
|
| 74 |
+
dwg.add(dwg.rect(
|
| 75 |
+
insert=(0, 0),
|
| 76 |
+
size=(svg_w, svg_h),
|
| 77 |
+
fill=COLORS["background"],
|
| 78 |
+
))
|
| 79 |
+
|
| 80 |
+
self._draw_coords(dwg, board_width, board_height)
|
| 81 |
+
|
| 82 |
+
for r in range(board_height):
|
| 83 |
+
for c in range(board_width):
|
| 84 |
+
cell = visible_cells[r][c]
|
| 85 |
+
x = self.padding + c * self.cell_size
|
| 86 |
+
y = self.padding + r * self.cell_size
|
| 87 |
+
self._draw_cell(dwg, x, y, cell, scenario_type)
|
| 88 |
+
|
| 89 |
+
self._draw_grid_lines(dwg, board_width, board_height)
|
| 90 |
+
|
| 91 |
+
return dwg.tostring()
|
| 92 |
+
|
| 93 |
+
def get_board_view(
|
| 94 |
+
self,
|
| 95 |
+
visible_cells: list[list[dict]],
|
| 96 |
+
board_width: int,
|
| 97 |
+
board_height: int,
|
| 98 |
+
*,
|
| 99 |
+
scenario_type: str = "hidden_grid",
|
| 100 |
+
step_count: int = 0,
|
| 101 |
+
) -> dict[str, Any]:
|
| 102 |
+
svg_text = self.render_board(
|
| 103 |
+
visible_cells,
|
| 104 |
+
board_width,
|
| 105 |
+
board_height,
|
| 106 |
+
scenario_type=scenario_type,
|
| 107 |
+
step_count=step_count,
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
hidden_count = 0
|
| 111 |
+
revealed_count = 0
|
| 112 |
+
flagged_count = 0
|
| 113 |
+
faded_count = 0
|
| 114 |
+
fog_count = 0
|
| 115 |
+
|
| 116 |
+
for row in visible_cells:
|
| 117 |
+
for cell in row:
|
| 118 |
+
state = cell.get("state", "hidden")
|
| 119 |
+
if state == "hidden":
|
| 120 |
+
hidden_count += 1
|
| 121 |
+
elif state == "revealed":
|
| 122 |
+
revealed_count += 1
|
| 123 |
+
elif state == "flagged":
|
| 124 |
+
flagged_count += 1
|
| 125 |
+
elif state == "faded":
|
| 126 |
+
faded_count += 1
|
| 127 |
+
elif state == "fog":
|
| 128 |
+
fog_count += 1
|
| 129 |
+
|
| 130 |
+
return {
|
| 131 |
+
"svg": svg_text,
|
| 132 |
+
"metadata": {
|
| 133 |
+
"board_width": board_width,
|
| 134 |
+
"board_height": board_height,
|
| 135 |
+
"step_count": step_count,
|
| 136 |
+
"scenario_type": scenario_type,
|
| 137 |
+
"cell_counts": {
|
| 138 |
+
"hidden": hidden_count,
|
| 139 |
+
"revealed": revealed_count,
|
| 140 |
+
"flagged": flagged_count,
|
| 141 |
+
"faded": faded_count,
|
| 142 |
+
"fog": fog_count,
|
| 143 |
+
"total": board_width * board_height,
|
| 144 |
+
},
|
| 145 |
+
},
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
# ─── Internal Drawing Methods ───────────────────────────────────
|
| 149 |
+
|
| 150 |
+
def _draw_coords(
|
| 151 |
+
self,
|
| 152 |
+
dwg: svgwrite.Drawing,
|
| 153 |
+
board_width: int,
|
| 154 |
+
board_height: int,
|
| 155 |
+
) -> None:
|
| 156 |
+
for c in range(board_width):
|
| 157 |
+
x = self.padding + c * self.cell_size + self.cell_size // 2
|
| 158 |
+
dwg.add(dwg.text(
|
| 159 |
+
str(c),
|
| 160 |
+
insert=(x, self.padding - 6),
|
| 161 |
+
text_anchor="middle",
|
| 162 |
+
font_size=COORD_FONT_SIZE,
|
| 163 |
+
fill=COLORS["coord_text"],
|
| 164 |
+
font_family="monospace",
|
| 165 |
+
))
|
| 166 |
+
|
| 167 |
+
for r in range(board_height):
|
| 168 |
+
y = self.padding + r * self.cell_size + self.cell_size // 2 + 4
|
| 169 |
+
dwg.add(dwg.text(
|
| 170 |
+
str(r),
|
| 171 |
+
insert=(self.padding - 8, y),
|
| 172 |
+
text_anchor="middle",
|
| 173 |
+
font_size=COORD_FONT_SIZE,
|
| 174 |
+
fill=COLORS["coord_text"],
|
| 175 |
+
font_family="monospace",
|
| 176 |
+
))
|
| 177 |
+
|
| 178 |
+
def _draw_grid_lines(
|
| 179 |
+
self,
|
| 180 |
+
dwg: svgwrite.Drawing,
|
| 181 |
+
board_width: int,
|
| 182 |
+
board_height: int,
|
| 183 |
+
) -> None:
|
| 184 |
+
x0 = self.padding
|
| 185 |
+
y0 = self.padding
|
| 186 |
+
x1 = self.padding + board_width * self.cell_size
|
| 187 |
+
y1 = self.padding + board_height * self.cell_size
|
| 188 |
+
|
| 189 |
+
for r in range(board_height + 1):
|
| 190 |
+
y = y0 + r * self.cell_size
|
| 191 |
+
dwg.add(dwg.line(
|
| 192 |
+
start=(x0, y),
|
| 193 |
+
end=(x1, y),
|
| 194 |
+
stroke=COLORS["grid_line"],
|
| 195 |
+
stroke_width=1,
|
| 196 |
+
))
|
| 197 |
+
|
| 198 |
+
for c in range(board_width + 1):
|
| 199 |
+
x = x0 + c * self.cell_size
|
| 200 |
+
dwg.add(dwg.line(
|
| 201 |
+
start=(x, y0),
|
| 202 |
+
end=(x, y1),
|
| 203 |
+
stroke=COLORS["grid_line"],
|
| 204 |
+
stroke_width=1,
|
| 205 |
+
))
|
| 206 |
+
|
| 207 |
+
def _draw_cell(
|
| 208 |
+
self,
|
| 209 |
+
dwg: svgwrite.Drawing,
|
| 210 |
+
x: int,
|
| 211 |
+
y: int,
|
| 212 |
+
cell: dict,
|
| 213 |
+
scenario_type: str,
|
| 214 |
+
) -> None:
|
| 215 |
+
state = cell.get("state", "hidden")
|
| 216 |
+
content = cell.get("content")
|
| 217 |
+
|
| 218 |
+
if state == "fog":
|
| 219 |
+
self._draw_fog_cell(dwg, x, y)
|
| 220 |
+
elif state == "hidden":
|
| 221 |
+
self._draw_hidden_cell(dwg, x, y)
|
| 222 |
+
elif state == "flagged":
|
| 223 |
+
self._draw_flagged_cell(dwg, x, y)
|
| 224 |
+
elif state == "faded":
|
| 225 |
+
self._draw_faded_cell(dwg, x, y)
|
| 226 |
+
elif state == "revealed" and content:
|
| 227 |
+
self._draw_revealed_cell(dwg, x, y, content)
|
| 228 |
+
else:
|
| 229 |
+
self._draw_hidden_cell(dwg, x, y)
|
| 230 |
+
|
| 231 |
+
def _draw_hidden_cell(self, dwg: svgwrite.Drawing, x: int, y: int) -> None:
|
| 232 |
+
dwg.add(dwg.rect(
|
| 233 |
+
insert=(x + 1, y + 1),
|
| 234 |
+
size=(self.cell_size - 2, self.cell_size - 2),
|
| 235 |
+
fill=COLORS["hidden_fill"],
|
| 236 |
+
stroke=COLORS["hidden_stroke"],
|
| 237 |
+
stroke_width=1,
|
| 238 |
+
rx=3,
|
| 239 |
+
ry=3,
|
| 240 |
+
))
|
| 241 |
+
|
| 242 |
+
def _draw_fog_cell(self, dwg: svgwrite.Drawing, x: int, y: int) -> None:
|
| 243 |
+
dwg.add(dwg.rect(
|
| 244 |
+
insert=(x + 1, y + 1),
|
| 245 |
+
size=(self.cell_size - 2, self.cell_size - 2),
|
| 246 |
+
fill=COLORS["fog_fill"],
|
| 247 |
+
rx=3,
|
| 248 |
+
ry=3,
|
| 249 |
+
))
|
| 250 |
+
|
| 251 |
+
def _draw_flagged_cell(self, dwg: svgwrite.Drawing, x: int, y: int) -> None:
|
| 252 |
+
dwg.add(dwg.rect(
|
| 253 |
+
insert=(x + 1, y + 1),
|
| 254 |
+
size=(self.cell_size - 2, self.cell_size - 2),
|
| 255 |
+
fill=COLORS["flagged_fill"],
|
| 256 |
+
stroke=COLORS["flagged_stroke"],
|
| 257 |
+
stroke_width=1,
|
| 258 |
+
rx=3,
|
| 259 |
+
ry=3,
|
| 260 |
+
))
|
| 261 |
+
cx = x + self.cell_size // 2
|
| 262 |
+
cy = y + self.cell_size // 2 + 5
|
| 263 |
+
dwg.add(dwg.text(
|
| 264 |
+
CELL_ICONS["flag"],
|
| 265 |
+
insert=(cx, cy),
|
| 266 |
+
text_anchor="middle",
|
| 267 |
+
font_size=ICON_FONT_SIZE,
|
| 268 |
+
fill=COLORS["flag_text"],
|
| 269 |
+
))
|
| 270 |
+
|
| 271 |
+
def _draw_faded_cell(self, dwg: svgwrite.Drawing, x: int, y: int) -> None:
|
| 272 |
+
dwg.add(dwg.rect(
|
| 273 |
+
insert=(x + 1, y + 1),
|
| 274 |
+
size=(self.cell_size - 2, self.cell_size - 2),
|
| 275 |
+
fill=COLORS["faded_fill"],
|
| 276 |
+
stroke=COLORS["faded_stroke"],
|
| 277 |
+
stroke_width=1,
|
| 278 |
+
rx=3,
|
| 279 |
+
ry=3,
|
| 280 |
+
))
|
| 281 |
+
cx = x + self.cell_size // 2
|
| 282 |
+
cy = y + self.cell_size // 2 + 5
|
| 283 |
+
dwg.add(dwg.text(
|
| 284 |
+
CELL_ICONS["faded"],
|
| 285 |
+
insert=(cx, cy),
|
| 286 |
+
text_anchor="middle",
|
| 287 |
+
font_size=CELL_FONT_SIZE,
|
| 288 |
+
fill=COLORS["coord_text"],
|
| 289 |
+
font_family="monospace",
|
| 290 |
+
))
|
| 291 |
+
|
| 292 |
+
def _draw_revealed_cell(
|
| 293 |
+
self,
|
| 294 |
+
dwg: svgwrite.Drawing,
|
| 295 |
+
x: int,
|
| 296 |
+
y: int,
|
| 297 |
+
content: dict,
|
| 298 |
+
) -> None:
|
| 299 |
+
cell_type = content.get("type", "empty")
|
| 300 |
+
value = content.get("value")
|
| 301 |
+
|
| 302 |
+
fill = COLORS["revealed_empty_fill"]
|
| 303 |
+
text_color = COLORS["cell_text"]
|
| 304 |
+
label = ""
|
| 305 |
+
|
| 306 |
+
if cell_type == "empty":
|
| 307 |
+
fill = COLORS["revealed_empty_fill"]
|
| 308 |
+
elif cell_type == "signal":
|
| 309 |
+
fill = COLORS["revealed_signal_fill"]
|
| 310 |
+
label = self._format_signal_value(value)
|
| 311 |
+
elif cell_type == "hazard":
|
| 312 |
+
fill = COLORS["revealed_hazard_fill"]
|
| 313 |
+
text_color = COLORS["hazard_text"]
|
| 314 |
+
label = CELL_ICONS["hazard"]
|
| 315 |
+
elif cell_type == "key":
|
| 316 |
+
fill = COLORS["revealed_key_fill"]
|
| 317 |
+
label = CELL_ICONS["key"]
|
| 318 |
+
elif cell_type == "decoy":
|
| 319 |
+
fill = COLORS["revealed_decoy_fill"]
|
| 320 |
+
label = CELL_ICONS["decoy"]
|
| 321 |
+
elif cell_type == "goal":
|
| 322 |
+
fill = COLORS["revealed_goal_fill"]
|
| 323 |
+
label = CELL_ICONS["goal"]
|
| 324 |
+
|
| 325 |
+
dwg.add(dwg.rect(
|
| 326 |
+
insert=(x + 1, y + 1),
|
| 327 |
+
size=(self.cell_size - 2, self.cell_size - 2),
|
| 328 |
+
fill=fill,
|
| 329 |
+
rx=3,
|
| 330 |
+
ry=3,
|
| 331 |
+
))
|
| 332 |
+
|
| 333 |
+
if label:
|
| 334 |
+
cx = x + self.cell_size // 2
|
| 335 |
+
cy = y + self.cell_size // 2 + 5
|
| 336 |
+
font_size = ICON_FONT_SIZE if len(label) <= 2 else CELL_FONT_SIZE
|
| 337 |
+
dwg.add(dwg.text(
|
| 338 |
+
label,
|
| 339 |
+
insert=(cx, cy),
|
| 340 |
+
text_anchor="middle",
|
| 341 |
+
font_size=font_size,
|
| 342 |
+
fill=text_color,
|
| 343 |
+
font_family="monospace",
|
| 344 |
+
))
|
| 345 |
+
|
| 346 |
+
def _format_signal_value(self, value: Any) -> str:
|
| 347 |
+
if value is None:
|
| 348 |
+
return ""
|
| 349 |
+
if isinstance(value, int):
|
| 350 |
+
return str(value)
|
| 351 |
+
if isinstance(value, dict):
|
| 352 |
+
lo = value.get("min", "?")
|
| 353 |
+
hi = value.get("max", "?")
|
| 354 |
+
return f"{lo}-{hi}"
|
| 355 |
+
if isinstance(value, list):
|
| 356 |
+
return ",".join(str(v) for v in value)
|
| 357 |
+
return str(value)
|
uv.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|