Spaces:
Runtime error
Runtime error
Upload folder using huggingface_hub
Browse files- Dockerfile +34 -0
- README.md +277 -5
- __init__.py +12 -0
- client.py +115 -0
- models.py +70 -0
- openenv.yaml +6 -0
- openenv_snake_env.egg-info/PKG-INFO +17 -0
- openenv_snake_env.egg-info/SOURCES.txt +13 -0
- openenv_snake_env.egg-info/dependency_links.txt +1 -0
- openenv_snake_env.egg-info/entry_points.txt +2 -0
- openenv_snake_env.egg-info/requires.txt +13 -0
- openenv_snake_env.egg-info/top_level.txt +3 -0
- pyproject.toml +43 -0
- server/__init__.py +7 -0
- server/app.py +59 -0
- server/requirements.txt +5 -0
- server/snake_environment.py +246 -0
- uv.lock +0 -0
Dockerfile
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
# Use the standard openenv base image
|
| 8 |
+
# Built from: docker build -t openenv-base:latest -f src/core/containers/images/Dockerfile .
|
| 9 |
+
# In GitHub Actions, this is overridden to use the GHCR base image
|
| 10 |
+
ARG BASE_IMAGE=openenv-base:latest
|
| 11 |
+
FROM ${BASE_IMAGE}
|
| 12 |
+
|
| 13 |
+
# Install dependencies
|
| 14 |
+
COPY src/envs/snake_env/server/requirements.txt /tmp/requirements.txt
|
| 15 |
+
RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt
|
| 16 |
+
|
| 17 |
+
# Copy only what's needed for this environment
|
| 18 |
+
COPY src/core/ /app/src/core/
|
| 19 |
+
COPY src/envs/snake_env/ /app/src/envs/snake_env/
|
| 20 |
+
|
| 21 |
+
# Copy README for web interface documentation
|
| 22 |
+
COPY src/envs/snake_env/README.md /app/README.md
|
| 23 |
+
|
| 24 |
+
# Expose port
|
| 25 |
+
EXPOSE 8000
|
| 26 |
+
|
| 27 |
+
# Health check
|
| 28 |
+
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
|
| 29 |
+
CMD curl -f http://localhost:8000/health || exit 1
|
| 30 |
+
|
| 31 |
+
# Run the FastAPI server
|
| 32 |
+
# CMD ["uvicorn", "envs.snake_env.server.app:app", "--host", "0.0.0.0", "--port", "8000"]
|
| 33 |
+
ENV ENABLE_WEB_INTERFACE=true
|
| 34 |
+
CMD ["python", "-m", "uvicorn", "envs.snake_env.server.app:app", "--host", "0.0.0.0", "--port", "8000"]
|
README.md
CHANGED
|
@@ -1,10 +1,282 @@
|
|
| 1 |
---
|
| 2 |
-
title: Snake
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom: blue
|
| 5 |
-
colorTo:
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
---
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: Snake Environment Server
|
| 3 |
+
emoji: 🐉
|
| 4 |
+
colorFrom: 'blue'
|
| 5 |
+
colorTo: 'green'
|
| 6 |
sdk: docker
|
| 7 |
pinned: false
|
| 8 |
+
app_port: 8000
|
| 9 |
+
base_path: /web
|
| 10 |
+
tags:
|
| 11 |
+
- openenv
|
| 12 |
---
|
| 13 |
|
| 14 |
+
# Snake Environment
|
| 15 |
+
|
| 16 |
+
A multi-agent snake game environment for OpenEnv, based on [marlenv](https://github.com/kc-ml2/marlenv)'s Snake-v1. This environment provides a single-agent interface to the classic snake game where the snake must navigate a grid, eat fruits, and avoid walls and its own body.
|
| 17 |
+
|
| 18 |
+
## Overview
|
| 19 |
+
|
| 20 |
+
The Snake environment wraps the marlenv Snake-v1 environment to provide a clean OpenEnv-compatible interface. Multiple snakes can battle on a fixed size grid map, but this implementation focuses on single-agent gameplay.
|
| 21 |
+
|
| 22 |
+
### Features
|
| 23 |
+
|
| 24 |
+
- **Grid-based gameplay**: Configurable grid size (default: 20x20)
|
| 25 |
+
- **Fruit collection**: Snake grows when eating fruits
|
| 26 |
+
- **Partial observability**: Optional vision range for limited field of view
|
| 27 |
+
- **Customizable rewards**: Configurable reward function for different game aspects
|
| 28 |
+
- **Two control modes**:
|
| 29 |
+
- `snake`: Relative actions (turn left/right)
|
| 30 |
+
- `human`: Global directions (up/down/left/right)
|
| 31 |
+
|
| 32 |
+
### Game Rules
|
| 33 |
+
|
| 34 |
+
- Snake dies when its head hits a wall or its own body
|
| 35 |
+
- Snake grows by one unit when it eats a fruit
|
| 36 |
+
- Episode ends when the snake dies or reaches maximum steps
|
| 37 |
+
- Rewards can be customized for: eating fruits, survival time, and death penalty
|
| 38 |
+
|
| 39 |
+
## Quick Start
|
| 40 |
+
|
| 41 |
+
### Using Docker (Recommended)
|
| 42 |
+
|
| 43 |
+
```python
|
| 44 |
+
from envs.snake_env import SnakeAction, SnakeEnv
|
| 45 |
+
|
| 46 |
+
# Start environment from Docker image
|
| 47 |
+
client = SnakeEnv.from_docker_image("snake-env:latest")
|
| 48 |
+
|
| 49 |
+
# Reset to start new episode
|
| 50 |
+
result = client.reset()
|
| 51 |
+
print(f"Snake alive: {result.observation.alive}")
|
| 52 |
+
print(f"Grid shape: {len(result.observation.grid)}x{len(result.observation.grid[0])}")
|
| 53 |
+
|
| 54 |
+
# Take actions
|
| 55 |
+
result = client.step(SnakeAction(action=0)) # Continue straight
|
| 56 |
+
print(f"Reward: {result.reward}")
|
| 57 |
+
print(f"Score: {result.observation.episode_score}")
|
| 58 |
+
|
| 59 |
+
result = client.step(SnakeAction(action=1)) # Turn left
|
| 60 |
+
result = client.step(SnakeAction(action=2)) # Turn right
|
| 61 |
+
|
| 62 |
+
# Check game state
|
| 63 |
+
state = client.state()
|
| 64 |
+
print(f"Episode: {state.episode_id}")
|
| 65 |
+
print(f"Steps: {state.step_count}")
|
| 66 |
+
|
| 67 |
+
# Cleanup
|
| 68 |
+
client.close()
|
| 69 |
+
```
|
| 70 |
+
|
| 71 |
+
### Using Local Server
|
| 72 |
+
|
| 73 |
+
```bash
|
| 74 |
+
# Install dependencies
|
| 75 |
+
cd src/envs/snake_env
|
| 76 |
+
pip install -e .
|
| 77 |
+
|
| 78 |
+
# Run server
|
| 79 |
+
uv run --project . server
|
| 80 |
+
```
|
| 81 |
+
|
| 82 |
+
Then connect from another terminal:
|
| 83 |
+
|
| 84 |
+
```python
|
| 85 |
+
from envs.snake_env import SnakeAction, SnakeEnv
|
| 86 |
+
|
| 87 |
+
# Connect to running server
|
| 88 |
+
client = SnakeEnv(base_url="http://localhost:8000")
|
| 89 |
+
result = client.reset()
|
| 90 |
+
result = client.step(SnakeAction(action=0))
|
| 91 |
+
```
|
| 92 |
+
|
| 93 |
+
## Actions
|
| 94 |
+
|
| 95 |
+
The action space depends on the `observer` mode:
|
| 96 |
+
|
| 97 |
+
### Snake Mode (Default)
|
| 98 |
+
Relative actions based on current direction:
|
| 99 |
+
- `0`: No-op (continue in same direction)
|
| 100 |
+
- `1`: Turn left (90 degrees counterclockwise)
|
| 101 |
+
- `2`: Turn right (90 degrees clockwise)
|
| 102 |
+
|
| 103 |
+
### Human Mode
|
| 104 |
+
Global directional actions:
|
| 105 |
+
- `0`: No-op
|
| 106 |
+
- `1`: Move left
|
| 107 |
+
- `2`: Move right
|
| 108 |
+
- `3`: Move down
|
| 109 |
+
- `4`: Move up
|
| 110 |
+
|
| 111 |
+
## Observations
|
| 112 |
+
|
| 113 |
+
Each observation includes:
|
| 114 |
+
|
| 115 |
+
- `grid`: The full game grid as a 2D array (height × width)
|
| 116 |
+
- `observation`: Encoded observation based on vision range
|
| 117 |
+
- `episode_score`: Cumulative score in current episode
|
| 118 |
+
- `episode_steps`: Number of steps taken
|
| 119 |
+
- `episode_fruits`: Number of fruits eaten
|
| 120 |
+
- `episode_kills`: Number of kills (always 0 in single-agent mode)
|
| 121 |
+
- `alive`: Whether the snake is still alive
|
| 122 |
+
|
| 123 |
+
## Configuration
|
| 124 |
+
|
| 125 |
+
### Environment Parameters
|
| 126 |
+
|
| 127 |
+
```python
|
| 128 |
+
from envs.snake_env.server.snake_environment import SnakeEnvironment
|
| 129 |
+
|
| 130 |
+
env = SnakeEnvironment(
|
| 131 |
+
height=20, # Grid height (default: 20)
|
| 132 |
+
width=20, # Grid width (default: 20)
|
| 133 |
+
snake_length=3, # Initial snake length (default: 3)
|
| 134 |
+
vision_range=5, # Partial observability (None for full grid)
|
| 135 |
+
observer='snake', # 'snake' or 'human' mode
|
| 136 |
+
max_episode_steps=1000, # Maximum steps per episode
|
| 137 |
+
reward_dict={ # Custom reward function
|
| 138 |
+
'fruit': 1.0, # Reward for eating fruit
|
| 139 |
+
'kill': 0.0, # Reward for kills (multi-agent)
|
| 140 |
+
'lose': -1.0, # Penalty for death
|
| 141 |
+
'win': 0.0, # Reward for winning (multi-agent)
|
| 142 |
+
'time': 0.0, # Reward per timestep
|
| 143 |
+
}
|
| 144 |
+
)
|
| 145 |
+
```
|
| 146 |
+
|
| 147 |
+
### Custom Rewards
|
| 148 |
+
|
| 149 |
+
You can customize the reward function to encourage different behaviors:
|
| 150 |
+
|
| 151 |
+
```python
|
| 152 |
+
# Encourage survival
|
| 153 |
+
reward_dict = {
|
| 154 |
+
'fruit': 1.0,
|
| 155 |
+
'lose': -10.0,
|
| 156 |
+
'time': 0.01, # Small reward for staying alive
|
| 157 |
+
}
|
| 158 |
+
|
| 159 |
+
# Fast fruit collection
|
| 160 |
+
reward_dict = {
|
| 161 |
+
'fruit': 10.0,
|
| 162 |
+
'lose': -1.0,
|
| 163 |
+
'time': -0.01, # Penalty for taking too long
|
| 164 |
+
}
|
| 165 |
+
```
|
| 166 |
+
|
| 167 |
+
## Building and Deployment
|
| 168 |
+
|
| 169 |
+
### Build Docker Image
|
| 170 |
+
|
| 171 |
+
From the repository root:
|
| 172 |
+
|
| 173 |
+
```bash
|
| 174 |
+
# Build base image first (if not already built)
|
| 175 |
+
docker build -t openenv-base:latest -f src/core/containers/images/Dockerfile .
|
| 176 |
+
|
| 177 |
+
# Build snake environment image
|
| 178 |
+
docker build -t snake-env:latest -f src/envs/snake_env/server/Dockerfile .
|
| 179 |
+
```
|
| 180 |
+
|
| 181 |
+
The Dockerfile uses `pip install` with `requirements.txt` for maximum compatibility.
|
| 182 |
+
|
| 183 |
+
### Run Docker Container
|
| 184 |
+
|
| 185 |
+
```bash
|
| 186 |
+
# Run the container
|
| 187 |
+
docker run -p 8000:8000 snake-env:latest
|
| 188 |
+
|
| 189 |
+
# Or with environment variables
|
| 190 |
+
docker run -p 8000:8000 \
|
| 191 |
+
-e ENABLE_WEB_INTERFACE=true \
|
| 192 |
+
snake-env:latest
|
| 193 |
+
```
|
| 194 |
+
|
| 195 |
+
### Web Interface
|
| 196 |
+
|
| 197 |
+
When `ENABLE_WEB_INTERFACE=true` is set, you can access the web interface at `http://localhost:8000/web` to interact with the environment through your browser.
|
| 198 |
+
|
| 199 |
+
## Dependencies
|
| 200 |
+
|
| 201 |
+
The snake environment requires:
|
| 202 |
+
|
| 203 |
+
- `marlenv`: Multi-agent snake game implementation
|
| 204 |
+
- `gym==0.24.1`: OpenAI Gym (required by marlenv)
|
| 205 |
+
- `numpy`: Numerical operations
|
| 206 |
+
- Standard OpenEnv dependencies (fastapi, pydantic, uvicorn)
|
| 207 |
+
|
| 208 |
+
These are automatically installed when using Docker or installing via pip.
|
| 209 |
+
|
| 210 |
+
## Example Training Loop
|
| 211 |
+
|
| 212 |
+
```python
|
| 213 |
+
from envs.snake_env import SnakeAction, SnakeEnv
|
| 214 |
+
import random
|
| 215 |
+
|
| 216 |
+
# Connect to environment
|
| 217 |
+
env = SnakeEnv.from_docker_image("snake-env:latest")
|
| 218 |
+
|
| 219 |
+
# Training loop
|
| 220 |
+
for episode in range(10):
|
| 221 |
+
result = env.reset()
|
| 222 |
+
total_reward = 0
|
| 223 |
+
done = False
|
| 224 |
+
|
| 225 |
+
while not done:
|
| 226 |
+
# Simple random policy (replace with your agent)
|
| 227 |
+
action = SnakeAction(action=random.randint(0, 2))
|
| 228 |
+
result = env.step(action)
|
| 229 |
+
|
| 230 |
+
total_reward += result.reward
|
| 231 |
+
done = result.done
|
| 232 |
+
|
| 233 |
+
print(f"Episode {episode}: Reward={total_reward}, "
|
| 234 |
+
f"Fruits={result.observation.episode_fruits}, "
|
| 235 |
+
f"Steps={result.observation.episode_steps}")
|
| 236 |
+
|
| 237 |
+
env.close()
|
| 238 |
+
```
|
| 239 |
+
|
| 240 |
+
## Troubleshooting
|
| 241 |
+
|
| 242 |
+
### marlenv Installation Issues
|
| 243 |
+
|
| 244 |
+
If you encounter issues installing marlenv, you can install it from source:
|
| 245 |
+
|
| 246 |
+
```bash
|
| 247 |
+
pip install git+https://github.com/kc-ml2/marlenv.git
|
| 248 |
+
```
|
| 249 |
+
|
| 250 |
+
### Import Errors
|
| 251 |
+
|
| 252 |
+
Make sure you're in the correct directory when running the server:
|
| 253 |
+
|
| 254 |
+
```bash
|
| 255 |
+
cd src/envs/snake_env
|
| 256 |
+
uv run --project . server
|
| 257 |
+
```
|
| 258 |
+
|
| 259 |
+
### Docker Build Issues
|
| 260 |
+
|
| 261 |
+
Ensure the base image is built first:
|
| 262 |
+
|
| 263 |
+
```bash
|
| 264 |
+
docker build -t openenv-base:latest -f src/core/containers/images/Dockerfile .
|
| 265 |
+
```
|
| 266 |
+
|
| 267 |
+
## Citation
|
| 268 |
+
|
| 269 |
+
The underlying snake game is from marlenv:
|
| 270 |
+
|
| 271 |
+
```bibtex
|
| 272 |
+
@MISC{marlenv2021,
|
| 273 |
+
author = {ML2},
|
| 274 |
+
title = {Marlenv, Multi-agent Reinforcement Learning Environment},
|
| 275 |
+
howpublished = {\url{http://github.com/kc-ml2/marlenv}},
|
| 276 |
+
year = {2021}
|
| 277 |
+
}
|
| 278 |
+
```
|
| 279 |
+
|
| 280 |
+
## License
|
| 281 |
+
|
| 282 |
+
BSD 3-Clause License - See LICENSE file in the root directory.
|
__init__.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""Snake Environment - A multi-agent snake game environment based on marlenv."""
|
| 8 |
+
|
| 9 |
+
from .client import SnakeEnv
|
| 10 |
+
from .models import SnakeAction, SnakeObservation
|
| 11 |
+
|
| 12 |
+
__all__ = ["SnakeAction", "SnakeObservation", "SnakeEnv"]
|
client.py
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
Snake Environment HTTP Client.
|
| 9 |
+
|
| 10 |
+
This module provides the client for connecting to a Snake Environment server
|
| 11 |
+
over HTTP.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
from typing import Any, Dict
|
| 15 |
+
|
| 16 |
+
# Support both in-repo and standalone imports
|
| 17 |
+
try:
|
| 18 |
+
# In-repo imports (when running from OpenEnv repository)
|
| 19 |
+
from core.client_types import StepResult
|
| 20 |
+
from core.env_server.types import State
|
| 21 |
+
from core.http_env_client import HTTPEnvClient
|
| 22 |
+
|
| 23 |
+
from .models import SnakeAction, SnakeObservation
|
| 24 |
+
except ImportError:
|
| 25 |
+
from models import SnakeAction, SnakeObservation
|
| 26 |
+
|
| 27 |
+
# Standalone imports (when environment is standalone with openenv-core from pip)
|
| 28 |
+
from openenv_core.client_types import StepResult
|
| 29 |
+
from openenv_core.env_server.types import State
|
| 30 |
+
from openenv_core.http_env_client import HTTPEnvClient
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
class SnakeEnv(HTTPEnvClient[SnakeAction, SnakeObservation]):
|
| 34 |
+
"""
|
| 35 |
+
HTTP client for the Snake Environment.
|
| 36 |
+
|
| 37 |
+
This client connects to a SnakeEnvironment HTTP server and provides
|
| 38 |
+
methods to interact with it: reset(), step(), and state access.
|
| 39 |
+
|
| 40 |
+
Example:
|
| 41 |
+
>>> # Connect to a running server
|
| 42 |
+
>>> client = SnakeEnv(base_url="http://localhost:8000")
|
| 43 |
+
>>> result = client.reset()
|
| 44 |
+
>>> print(result.observation.alive) # True
|
| 45 |
+
>>>
|
| 46 |
+
>>> # Take an action (turn left)
|
| 47 |
+
>>> result = client.step(SnakeAction(action=1))
|
| 48 |
+
>>> print(result.observation.episode_score)
|
| 49 |
+
>>> print(result.reward)
|
| 50 |
+
|
| 51 |
+
Example with Docker:
|
| 52 |
+
>>> # Automatically start container and connect
|
| 53 |
+
>>> client = SnakeEnv.from_docker_image("snake-env:latest")
|
| 54 |
+
>>> result = client.reset()
|
| 55 |
+
>>> result = client.step(SnakeAction(action=0)) # noop
|
| 56 |
+
"""
|
| 57 |
+
|
| 58 |
+
def _step_payload(self, action: SnakeAction) -> Dict:
|
| 59 |
+
"""
|
| 60 |
+
Convert SnakeAction to JSON payload for step request.
|
| 61 |
+
|
| 62 |
+
Args:
|
| 63 |
+
action: SnakeAction instance
|
| 64 |
+
|
| 65 |
+
Returns:
|
| 66 |
+
Dictionary representation suitable for JSON encoding
|
| 67 |
+
"""
|
| 68 |
+
return {
|
| 69 |
+
"action": action.action,
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
def _parse_result(self, payload: Dict) -> StepResult[SnakeObservation]:
|
| 73 |
+
"""
|
| 74 |
+
Parse server response into StepResult[SnakeObservation].
|
| 75 |
+
|
| 76 |
+
Args:
|
| 77 |
+
payload: JSON response from server
|
| 78 |
+
|
| 79 |
+
Returns:
|
| 80 |
+
StepResult with SnakeObservation
|
| 81 |
+
"""
|
| 82 |
+
obs_data = payload.get("observation", {})
|
| 83 |
+
observation = SnakeObservation(
|
| 84 |
+
grid=obs_data.get("grid", []),
|
| 85 |
+
observation=obs_data.get("observation", []),
|
| 86 |
+
episode_score=obs_data.get("episode_score", 0.0),
|
| 87 |
+
episode_steps=obs_data.get("episode_steps", 0),
|
| 88 |
+
episode_fruits=obs_data.get("episode_fruits", 0),
|
| 89 |
+
episode_kills=obs_data.get("episode_kills", 0),
|
| 90 |
+
alive=obs_data.get("alive", True),
|
| 91 |
+
done=payload.get("done", False),
|
| 92 |
+
reward=payload.get("reward"),
|
| 93 |
+
metadata=obs_data.get("metadata", {}),
|
| 94 |
+
)
|
| 95 |
+
|
| 96 |
+
return StepResult(
|
| 97 |
+
observation=observation,
|
| 98 |
+
reward=payload.get("reward"),
|
| 99 |
+
done=payload.get("done", False),
|
| 100 |
+
)
|
| 101 |
+
|
| 102 |
+
def _parse_state(self, payload: Dict) -> State:
|
| 103 |
+
"""
|
| 104 |
+
Parse server response into State object.
|
| 105 |
+
|
| 106 |
+
Args:
|
| 107 |
+
payload: JSON response from /state endpoint
|
| 108 |
+
|
| 109 |
+
Returns:
|
| 110 |
+
State object with episode_id and step_count
|
| 111 |
+
"""
|
| 112 |
+
return State(
|
| 113 |
+
episode_id=payload.get("episode_id"),
|
| 114 |
+
step_count=payload.get("step_count", 0),
|
| 115 |
+
)
|
models.py
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
Data models for the Snake Environment.
|
| 9 |
+
|
| 10 |
+
The Snake environment is a multi-agent reinforcement learning environment
|
| 11 |
+
based on marlenv's Snake-v1. Multiple snakes battle on a fixed size grid map.
|
| 12 |
+
"""
|
| 13 |
+
|
| 14 |
+
from dataclasses import dataclass
|
| 15 |
+
from typing import Any, Dict, List, Optional
|
| 16 |
+
|
| 17 |
+
# Support both in-repo and standalone imports
|
| 18 |
+
try:
|
| 19 |
+
# In-repo imports (when running from OpenEnv repository)
|
| 20 |
+
from core.env_server.types import Action, Observation
|
| 21 |
+
except ImportError:
|
| 22 |
+
# Standalone imports (when environment is standalone with openenv-core from pip)
|
| 23 |
+
from openenv_core.env_server.types import Action, Observation
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
@dataclass(kw_only=True)
|
| 27 |
+
class SnakeAction(Action):
|
| 28 |
+
"""
|
| 29 |
+
Action for the Snake environment.
|
| 30 |
+
|
| 31 |
+
For single snake (observer='snake'):
|
| 32 |
+
action: int in [0, 1, 2]
|
| 33 |
+
0 = noop (continue in same direction)
|
| 34 |
+
1 = turn left (90 degrees)
|
| 35 |
+
2 = turn right (90 degrees)
|
| 36 |
+
|
| 37 |
+
For single snake (observer='human'):
|
| 38 |
+
action: int in [0, 1, 2, 3, 4]
|
| 39 |
+
0 = noop
|
| 40 |
+
1 = left
|
| 41 |
+
2 = right
|
| 42 |
+
3 = down
|
| 43 |
+
4 = up
|
| 44 |
+
"""
|
| 45 |
+
|
| 46 |
+
action: int
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
@dataclass(kw_only=True)
|
| 50 |
+
class SnakeObservation(Observation):
|
| 51 |
+
"""
|
| 52 |
+
Observation from the Snake environment.
|
| 53 |
+
|
| 54 |
+
Attributes:
|
| 55 |
+
grid: The current game grid as a nested list (height x width)
|
| 56 |
+
observation: The encoded observation for the snake (can be full grid or vision range)
|
| 57 |
+
episode_score: Total score accumulated in this episode
|
| 58 |
+
episode_steps: Number of steps taken in this episode
|
| 59 |
+
episode_fruits: Number of fruits eaten in this episode
|
| 60 |
+
episode_kills: Number of kills in this episode
|
| 61 |
+
alive: Whether the snake is still alive
|
| 62 |
+
"""
|
| 63 |
+
|
| 64 |
+
grid: List[List[int]]
|
| 65 |
+
observation: List[List[List[float]]] # H x W x C observation
|
| 66 |
+
episode_score: float = 0.0
|
| 67 |
+
episode_steps: int = 0
|
| 68 |
+
episode_fruits: int = 0
|
| 69 |
+
episode_kills: int = 0
|
| 70 |
+
alive: bool = True
|
openenv.yaml
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
spec_version: 1
|
| 2 |
+
name: snake_env
|
| 3 |
+
type: space
|
| 4 |
+
runtime: fastapi
|
| 5 |
+
app: server.app:app
|
| 6 |
+
port: 8000
|
openenv_snake_env.egg-info/PKG-INFO
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Metadata-Version: 2.4
|
| 2 |
+
Name: openenv-snake-env
|
| 3 |
+
Version: 0.1.0
|
| 4 |
+
Summary: Snake Environment for OpenEnv - multi-agent snake game based on marlenv
|
| 5 |
+
Requires-Python: >=3.10
|
| 6 |
+
Requires-Dist: openenv-core>=0.1.0
|
| 7 |
+
Requires-Dist: fastapi>=0.115.0
|
| 8 |
+
Requires-Dist: pydantic>=2.0.0
|
| 9 |
+
Requires-Dist: uvicorn>=0.24.0
|
| 10 |
+
Requires-Dist: requests>=2.31.0
|
| 11 |
+
Requires-Dist: marlenv>=1.0.0
|
| 12 |
+
Requires-Dist: gym==0.24.1
|
| 13 |
+
Requires-Dist: numpy>=1.24.0
|
| 14 |
+
Requires-Dist: Pillow>=10.0.0
|
| 15 |
+
Provides-Extra: dev
|
| 16 |
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
| 17 |
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
openenv_snake_env.egg-info/SOURCES.txt
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
README.md
|
| 2 |
+
client.py
|
| 3 |
+
models.py
|
| 4 |
+
pyproject.toml
|
| 5 |
+
openenv_snake_env.egg-info/PKG-INFO
|
| 6 |
+
openenv_snake_env.egg-info/SOURCES.txt
|
| 7 |
+
openenv_snake_env.egg-info/dependency_links.txt
|
| 8 |
+
openenv_snake_env.egg-info/entry_points.txt
|
| 9 |
+
openenv_snake_env.egg-info/requires.txt
|
| 10 |
+
openenv_snake_env.egg-info/top_level.txt
|
| 11 |
+
server/__init__.py
|
| 12 |
+
server/app.py
|
| 13 |
+
server/snake_environment.py
|
openenv_snake_env.egg-info/dependency_links.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
|
openenv_snake_env.egg-info/entry_points.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[console_scripts]
|
| 2 |
+
server = server.app:main
|
openenv_snake_env.egg-info/requires.txt
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
openenv-core>=0.1.0
|
| 2 |
+
fastapi>=0.115.0
|
| 3 |
+
pydantic>=2.0.0
|
| 4 |
+
uvicorn>=0.24.0
|
| 5 |
+
requests>=2.31.0
|
| 6 |
+
marlenv>=1.0.0
|
| 7 |
+
gym==0.24.1
|
| 8 |
+
numpy>=1.24.0
|
| 9 |
+
Pillow>=10.0.0
|
| 10 |
+
|
| 11 |
+
[dev]
|
| 12 |
+
pytest>=8.0.0
|
| 13 |
+
pytest-cov>=4.0.0
|
openenv_snake_env.egg-info/top_level.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
client
|
| 2 |
+
models
|
| 3 |
+
server
|
pyproject.toml
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
[build-system]
|
| 8 |
+
requires = ["setuptools>=45", "wheel"]
|
| 9 |
+
build-backend = "setuptools.build_meta"
|
| 10 |
+
|
| 11 |
+
[project]
|
| 12 |
+
name = "openenv-snake-env"
|
| 13 |
+
version = "0.1.0"
|
| 14 |
+
description = "Snake Environment for OpenEnv - multi-agent snake game based on marlenv"
|
| 15 |
+
requires-python = ">=3.10"
|
| 16 |
+
dependencies = [
|
| 17 |
+
# Core OpenEnv dependencies (required for server functionality)
|
| 18 |
+
"openenv-core>=0.1.0",
|
| 19 |
+
"fastapi>=0.115.0",
|
| 20 |
+
"pydantic>=2.0.0",
|
| 21 |
+
"uvicorn>=0.24.0",
|
| 22 |
+
"requests>=2.31.0",
|
| 23 |
+
# Snake environment specific dependencies
|
| 24 |
+
"marlenv>=1.0.0", # Multi-agent snake game environment
|
| 25 |
+
"gym==0.24.1", # Required by marlenv
|
| 26 |
+
"numpy>=1.24.0",
|
| 27 |
+
"Pillow>=10.0.0", # Required by marlenv for image rendering
|
| 28 |
+
]
|
| 29 |
+
|
| 30 |
+
[project.optional-dependencies]
|
| 31 |
+
dev = [
|
| 32 |
+
"pytest>=8.0.0",
|
| 33 |
+
"pytest-cov>=4.0.0",
|
| 34 |
+
]
|
| 35 |
+
|
| 36 |
+
[project.scripts]
|
| 37 |
+
# Server entry point - enables running via: uv run --project . server
|
| 38 |
+
# or: python -m server.app
|
| 39 |
+
server = "server.app:main"
|
| 40 |
+
|
| 41 |
+
[tool.setuptools]
|
| 42 |
+
py-modules = ["models", "client"]
|
| 43 |
+
packages = ["server"]
|
server/__init__.py
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""Snake Environment Server - FastAPI HTTP server for snake game."""
|
server/app.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
FastAPI application for the Snake Environment.
|
| 9 |
+
|
| 10 |
+
This module creates an HTTP server that exposes the SnakeEnvironment
|
| 11 |
+
over HTTP endpoints, making it compatible with HTTPEnvClient.
|
| 12 |
+
|
| 13 |
+
Usage:
|
| 14 |
+
# Development (with auto-reload):
|
| 15 |
+
uvicorn server.app:app --reload --host 0.0.0.0 --port 8000
|
| 16 |
+
|
| 17 |
+
# Production:
|
| 18 |
+
uvicorn server.app:app --host 0.0.0.0 --port 8000 --workers 4
|
| 19 |
+
|
| 20 |
+
# Or run directly:
|
| 21 |
+
uv run --project . server
|
| 22 |
+
"""
|
| 23 |
+
|
| 24 |
+
# Support both in-repo and standalone imports
|
| 25 |
+
try:
|
| 26 |
+
# In-repo imports (when running from OpenEnv repository)
|
| 27 |
+
from core.env_server.http_server import create_app
|
| 28 |
+
from ..models import SnakeAction, SnakeObservation
|
| 29 |
+
from .snake_environment import SnakeEnvironment
|
| 30 |
+
except ImportError:
|
| 31 |
+
# Standalone imports (when environment is standalone with openenv-core from pip)
|
| 32 |
+
from openenv_core.env_server.http_server import create_app
|
| 33 |
+
from models import SnakeAction, SnakeObservation
|
| 34 |
+
from server.snake_environment import SnakeEnvironment
|
| 35 |
+
|
| 36 |
+
# Create the environment instance
|
| 37 |
+
env = SnakeEnvironment()
|
| 38 |
+
|
| 39 |
+
# Create the app with web interface and README integration
|
| 40 |
+
app = create_app(env, SnakeAction, SnakeObservation, env_name="snake_env")
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def main():
|
| 44 |
+
"""
|
| 45 |
+
Entry point for direct execution via uv run or python -m.
|
| 46 |
+
|
| 47 |
+
This function enables running the server without Docker:
|
| 48 |
+
uv run --project . server
|
| 49 |
+
python -m envs.snake_env.server.app
|
| 50 |
+
openenv serve snake_env
|
| 51 |
+
|
| 52 |
+
"""
|
| 53 |
+
import uvicorn
|
| 54 |
+
|
| 55 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
if __name__ == "__main__":
|
| 59 |
+
main()
|
server/requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Snake environment dependencies
|
| 2 |
+
marlenv>=1.0.0
|
| 3 |
+
gym==0.24.1
|
| 4 |
+
numpy>=1.24.0
|
| 5 |
+
Pillow>=10.0.0
|
server/snake_environment.py
ADDED
|
@@ -0,0 +1,246 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Meta Platforms, Inc. and affiliates.
|
| 2 |
+
# All rights reserved.
|
| 3 |
+
#
|
| 4 |
+
# This source code is licensed under the BSD-style license found in the
|
| 5 |
+
# LICENSE file in the root directory of this source tree.
|
| 6 |
+
|
| 7 |
+
"""
|
| 8 |
+
Snake Environment Implementation.
|
| 9 |
+
|
| 10 |
+
A multi-agent snake game environment that wraps marlenv's Snake-v1.
|
| 11 |
+
This implementation provides a single-agent interface by wrapping the
|
| 12 |
+
multi-agent marlenv environment.
|
| 13 |
+
"""
|
| 14 |
+
|
| 15 |
+
from uuid import uuid4
|
| 16 |
+
|
| 17 |
+
import gym
|
| 18 |
+
import marlenv.envs # Register marlenv environments with gym
|
| 19 |
+
import numpy as np
|
| 20 |
+
|
| 21 |
+
# Support both in-repo and standalone imports
|
| 22 |
+
try:
|
| 23 |
+
# In-repo imports (when running from OpenEnv repository)
|
| 24 |
+
from core.env_server.interfaces import Environment
|
| 25 |
+
from core.env_server.types import State
|
| 26 |
+
|
| 27 |
+
from ..models import SnakeAction, SnakeObservation
|
| 28 |
+
except ImportError:
|
| 29 |
+
from models import SnakeAction, SnakeObservation
|
| 30 |
+
|
| 31 |
+
# Standalone imports (when environment is standalone with openenv-core from pip)
|
| 32 |
+
from openenv_core.env_server.interfaces import Environment
|
| 33 |
+
from openenv_core.env_server.types import State
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
class SingleAgentWrapper(gym.Wrapper):
|
| 37 |
+
"""
|
| 38 |
+
Custom wrapper to convert multi-agent marlenv to single-agent.
|
| 39 |
+
|
| 40 |
+
This wrapper properly handles the conversion without triggering
|
| 41 |
+
gym 0.24.1's strict type checking on done flags.
|
| 42 |
+
"""
|
| 43 |
+
|
| 44 |
+
def __init__(self, env):
|
| 45 |
+
super().__init__(env)
|
| 46 |
+
# Unwrap observation and action spaces for single agent
|
| 47 |
+
if hasattr(env.observation_space, '__getitem__'):
|
| 48 |
+
self.observation_space = env.observation_space[0]
|
| 49 |
+
if hasattr(env.action_space, '__getitem__'):
|
| 50 |
+
self.action_space = env.action_space[0]
|
| 51 |
+
|
| 52 |
+
def reset(self, **kwargs):
|
| 53 |
+
obs = self.env.reset(**kwargs)
|
| 54 |
+
# Remove first dimension if it's a multi-agent array (num_agents, H, W, C)
|
| 55 |
+
if hasattr(obs, 'shape') and len(obs.shape) == 4 and obs.shape[0] == 1:
|
| 56 |
+
return obs[0] # Return (H, W, C)
|
| 57 |
+
# Return first agent's observation if it's a list
|
| 58 |
+
if isinstance(obs, list):
|
| 59 |
+
return obs[0]
|
| 60 |
+
return obs
|
| 61 |
+
|
| 62 |
+
def step(self, action):
|
| 63 |
+
# Wrap action in list for multi-agent env
|
| 64 |
+
obs, rewards, dones, info = self.env.step([action])
|
| 65 |
+
|
| 66 |
+
# Unwrap returns for single agent
|
| 67 |
+
# Handle observation: remove first dimension if shape is (1, H, W, C)
|
| 68 |
+
if hasattr(obs, 'shape') and len(obs.shape) == 4 and obs.shape[0] == 1:
|
| 69 |
+
obs = obs[0] # Convert (1, H, W, C) -> (H, W, C)
|
| 70 |
+
elif isinstance(obs, list):
|
| 71 |
+
obs = obs[0]
|
| 72 |
+
|
| 73 |
+
reward = rewards[0] if isinstance(rewards, list) else rewards
|
| 74 |
+
done = dones[0] if isinstance(dones, list) else dones
|
| 75 |
+
|
| 76 |
+
# Ensure done is a boolean (not numpy bool)
|
| 77 |
+
done = bool(done)
|
| 78 |
+
|
| 79 |
+
return obs, reward, done, info
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
class SnakeEnvironment(Environment):
|
| 83 |
+
"""
|
| 84 |
+
A snake game environment that wraps marlenv's Snake-v1.
|
| 85 |
+
|
| 86 |
+
This environment provides a single-agent interface to the multi-agent
|
| 87 |
+
snake game. The snake must navigate a grid, eat fruits, and avoid walls
|
| 88 |
+
and its own body.
|
| 89 |
+
|
| 90 |
+
Args:
|
| 91 |
+
height: Height of the grid map (default: 20)
|
| 92 |
+
width: Width of the grid map (default: 20)
|
| 93 |
+
snake_length: Initial length of the snake (default: 3)
|
| 94 |
+
vision_range: Vision range for partial observability (default: None for full grid)
|
| 95 |
+
observer: 'snake' for relative actions or 'human' for global directions (default: 'snake')
|
| 96 |
+
max_episode_steps: Maximum steps per episode (default: 1000)
|
| 97 |
+
reward_dict: Custom reward function (default: fruit=1.0, others=0.0)
|
| 98 |
+
|
| 99 |
+
Example:
|
| 100 |
+
>>> env = SnakeEnvironment()
|
| 101 |
+
>>> obs = env.reset()
|
| 102 |
+
>>> print(obs.alive) # True
|
| 103 |
+
>>>
|
| 104 |
+
>>> obs = env.step(SnakeAction(action=1)) # Turn left
|
| 105 |
+
>>> print(obs.episode_score)
|
| 106 |
+
>>> print(obs.reward)
|
| 107 |
+
"""
|
| 108 |
+
|
| 109 |
+
def __init__(
|
| 110 |
+
self,
|
| 111 |
+
height: int = 20,
|
| 112 |
+
width: int = 20,
|
| 113 |
+
snake_length: int = 3,
|
| 114 |
+
vision_range: int = None,
|
| 115 |
+
observer: str = "snake",
|
| 116 |
+
max_episode_steps: int = 1000,
|
| 117 |
+
reward_dict: dict = None,
|
| 118 |
+
):
|
| 119 |
+
"""Initialize the snake environment."""
|
| 120 |
+
self._state = State(episode_id=str(uuid4()), step_count=0)
|
| 121 |
+
|
| 122 |
+
# Default reward function
|
| 123 |
+
if reward_dict is None:
|
| 124 |
+
reward_dict = {
|
| 125 |
+
"fruit": 1.0,
|
| 126 |
+
"kill": 0.0,
|
| 127 |
+
"lose": -1.0,
|
| 128 |
+
"win": 100.0,
|
| 129 |
+
"time": 0.001,
|
| 130 |
+
}
|
| 131 |
+
|
| 132 |
+
# Create the marlenv snake environment for single agent
|
| 133 |
+
# Note: We don't use gym.make directly to avoid gym 0.24.1 wrappers
|
| 134 |
+
from marlenv.envs.snake_env import SnakeEnv as MarlenvSnake
|
| 135 |
+
|
| 136 |
+
self.base_env = MarlenvSnake(
|
| 137 |
+
height=height,
|
| 138 |
+
width=width,
|
| 139 |
+
num_snakes=1, # Single agent
|
| 140 |
+
snake_length=snake_length,
|
| 141 |
+
vision_range=vision_range,
|
| 142 |
+
frame_stack=1,
|
| 143 |
+
observer=observer,
|
| 144 |
+
reward_dict=reward_dict,
|
| 145 |
+
max_episode_steps=max_episode_steps,
|
| 146 |
+
)
|
| 147 |
+
|
| 148 |
+
# Wrap with our custom SingleAgent wrapper
|
| 149 |
+
self.env = SingleAgentWrapper(self.base_env)
|
| 150 |
+
|
| 151 |
+
# Track episode statistics
|
| 152 |
+
self._episode_score = 0.0
|
| 153 |
+
self._episode_fruits = 0
|
| 154 |
+
self._episode_kills = 0
|
| 155 |
+
|
| 156 |
+
def reset(self) -> SnakeObservation:
|
| 157 |
+
"""
|
| 158 |
+
Reset the environment.
|
| 159 |
+
|
| 160 |
+
Returns:
|
| 161 |
+
SnakeObservation with initial game state
|
| 162 |
+
"""
|
| 163 |
+
self._state = State(episode_id=str(uuid4()), step_count=0)
|
| 164 |
+
self._episode_score = 0.0
|
| 165 |
+
self._episode_fruits = 0
|
| 166 |
+
self._episode_kills = 0
|
| 167 |
+
|
| 168 |
+
# Reset the marlenv environment
|
| 169 |
+
obs = self.env.reset()
|
| 170 |
+
|
| 171 |
+
# Convert observation to list format
|
| 172 |
+
obs_list = obs.tolist() if isinstance(obs, np.ndarray) else obs
|
| 173 |
+
|
| 174 |
+
# Get the grid from the environment (access base env directly)
|
| 175 |
+
grid = self.base_env.grid.tolist() if hasattr(self.base_env, "grid") else []
|
| 176 |
+
|
| 177 |
+
return SnakeObservation(
|
| 178 |
+
grid=grid,
|
| 179 |
+
observation=obs_list,
|
| 180 |
+
episode_score=self._episode_score,
|
| 181 |
+
episode_steps=self._state.step_count,
|
| 182 |
+
episode_fruits=self._episode_fruits,
|
| 183 |
+
episode_kills=self._episode_kills,
|
| 184 |
+
alive=True,
|
| 185 |
+
done=False,
|
| 186 |
+
reward=0.0,
|
| 187 |
+
)
|
| 188 |
+
|
| 189 |
+
def step(self, action: SnakeAction) -> SnakeObservation: # type: ignore[override]
|
| 190 |
+
"""
|
| 191 |
+
Execute a step in the environment.
|
| 192 |
+
|
| 193 |
+
Args:
|
| 194 |
+
action: SnakeAction containing the action to take
|
| 195 |
+
|
| 196 |
+
Returns:
|
| 197 |
+
SnakeObservation with the result of the action
|
| 198 |
+
"""
|
| 199 |
+
self._state.step_count += 1
|
| 200 |
+
|
| 201 |
+
# Execute action in marlenv
|
| 202 |
+
obs, reward, done, info = self.env.step(action.action)
|
| 203 |
+
|
| 204 |
+
# Update episode statistics
|
| 205 |
+
self._episode_score += reward
|
| 206 |
+
|
| 207 |
+
# Convert observation to list format
|
| 208 |
+
obs_list = obs.tolist() if isinstance(obs, np.ndarray) else obs
|
| 209 |
+
|
| 210 |
+
# Get the grid from the environment (access base env directly)
|
| 211 |
+
grid = self.base_env.grid.tolist() if hasattr(self.base_env, "grid") else []
|
| 212 |
+
|
| 213 |
+
# Extract episode statistics from info if available
|
| 214 |
+
episode_fruits = (
|
| 215 |
+
info.get("episode_fruits", [self._episode_fruits])[0]
|
| 216 |
+
if "episode_fruits" in info
|
| 217 |
+
else self._episode_fruits
|
| 218 |
+
)
|
| 219 |
+
episode_kills = (
|
| 220 |
+
info.get("episode_kills", [self._episode_kills])[0]
|
| 221 |
+
if "episode_kills" in info
|
| 222 |
+
else self._episode_kills
|
| 223 |
+
)
|
| 224 |
+
|
| 225 |
+
return SnakeObservation(
|
| 226 |
+
grid=grid,
|
| 227 |
+
observation=obs_list,
|
| 228 |
+
episode_score=self._episode_score,
|
| 229 |
+
episode_steps=self._state.step_count,
|
| 230 |
+
episode_fruits=int(episode_fruits),
|
| 231 |
+
episode_kills=int(episode_kills),
|
| 232 |
+
alive=not done,
|
| 233 |
+
done=done,
|
| 234 |
+
reward=float(reward),
|
| 235 |
+
metadata={"info": info},
|
| 236 |
+
)
|
| 237 |
+
|
| 238 |
+
@property
|
| 239 |
+
def state(self) -> State:
|
| 240 |
+
"""
|
| 241 |
+
Get the current environment state.
|
| 242 |
+
|
| 243 |
+
Returns:
|
| 244 |
+
Current State with episode_id and step_count
|
| 245 |
+
"""
|
| 246 |
+
return self._state
|
uv.lock
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|