Paulito Palmes, PhD commited on
Commit
92c98ca
·
1 Parent(s): ebb8f61
Files changed (6) hide show
  1. Dockerfile +26 -0
  2. Dockerfile.backup +33 -0
  3. __init__.py +7 -0
  4. app.py +59 -0
  5. requirements.txt +5 -0
  6. snake_environment.py +246 -0
Dockerfile ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Base image
2
+ FROM python:3.11-slim
3
+
4
+ # Set working directory
5
+ WORKDIR /app/env
6
+
7
+ # Install system dependencies (if needed)
8
+ RUN apt-get update && apt-get install -y \
9
+ git \
10
+ && rm -rf /var/lib/apt/lists/*
11
+
12
+ # Copy environment files
13
+ COPY . .
14
+
15
+ # Install Python dependencies
16
+ RUN pip install --no-cache-dir -e .
17
+
18
+ # Expose port
19
+ EXPOSE 8000
20
+
21
+ # Set environment variables
22
+ ENV PYTHONUNBUFFERED=1
23
+ ENV ENABLE_WEB_INTERFACE=true
24
+
25
+ # Run the server
26
+ CMD ["python", "-m", "uvicorn", "snake_env.server.app:app", "--host", "0.0.0.0", "--port", "8000"]
Dockerfile.backup ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ # Use the standard openenv base image
8
+ # Built from: docker build -t openenv-base:latest -f src/core/containers/images/Dockerfile .
9
+ # In GitHub Actions, this is overridden to use the GHCR base image
10
+ ARG BASE_IMAGE=openenv-base:latest
11
+ FROM ${BASE_IMAGE}
12
+
13
+ # Install dependencies
14
+ COPY src/envs/snake_env/server/requirements.txt /tmp/requirements.txt
15
+ RUN pip install --no-cache-dir -r /tmp/requirements.txt && rm /tmp/requirements.txt
16
+
17
+ # Copy only what's needed for this environment
18
+ COPY src/core/ /app/src/core/
19
+ COPY src/envs/snake_env/ /app/src/envs/snake_env/
20
+
21
+ # Copy README for web interface documentation
22
+ COPY src/envs/snake_env/README.md /app/README.md
23
+
24
+ # Expose port
25
+ EXPOSE 8000
26
+
27
+ # Health check
28
+ HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
29
+ CMD curl -f http://localhost:8000/health || exit 1
30
+
31
+ # Run the FastAPI server
32
+ # CMD ["uvicorn", "envs.snake_env.server.app:app", "--host", "0.0.0.0", "--port", "8000"]
33
+ CMD ["python", "-m", "uvicorn", "envs.snake_env.server.app:app", "--host", "0.0.0.0", "--port", "8000"]
__init__.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """Snake Environment Server - FastAPI HTTP server for snake game."""
app.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ FastAPI application for the Snake Environment.
9
+
10
+ This module creates an HTTP server that exposes the SnakeEnvironment
11
+ over HTTP endpoints, making it compatible with HTTPEnvClient.
12
+
13
+ Usage:
14
+ # Development (with auto-reload):
15
+ uvicorn server.app:app --reload --host 0.0.0.0 --port 8000
16
+
17
+ # Production:
18
+ uvicorn server.app:app --host 0.0.0.0 --port 8000 --workers 4
19
+
20
+ # Or run directly:
21
+ uv run --project . server
22
+ """
23
+
24
+ # Support both in-repo and standalone imports
25
+ try:
26
+ # In-repo imports (when running from OpenEnv repository)
27
+ from core.env_server.http_server import create_app
28
+ from ..models import SnakeAction, SnakeObservation
29
+ from .snake_environment import SnakeEnvironment
30
+ except ImportError:
31
+ # Standalone imports (when environment is standalone with openenv-core from pip)
32
+ from openenv_core.env_server.http_server import create_app
33
+ from models import SnakeAction, SnakeObservation
34
+ from server.snake_environment import SnakeEnvironment
35
+
36
+ # Create the environment instance
37
+ env = SnakeEnvironment()
38
+
39
+ # Create the app with web interface and README integration
40
+ app = create_app(env, SnakeAction, SnakeObservation, env_name="snake_env")
41
+
42
+
43
+ def main():
44
+ """
45
+ Entry point for direct execution via uv run or python -m.
46
+
47
+ This function enables running the server without Docker:
48
+ uv run --project . server
49
+ python -m envs.snake_env.server.app
50
+ openenv serve snake_env
51
+
52
+ """
53
+ import uvicorn
54
+
55
+ uvicorn.run(app, host="0.0.0.0", port=8000)
56
+
57
+
58
+ if __name__ == "__main__":
59
+ main()
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # Snake environment dependencies
2
+ marlenv>=1.0.0
3
+ gym==0.24.1
4
+ numpy>=1.24.0
5
+ Pillow>=10.0.0
snake_environment.py ADDED
@@ -0,0 +1,246 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ Snake Environment Implementation.
9
+
10
+ A multi-agent snake game environment that wraps marlenv's Snake-v1.
11
+ This implementation provides a single-agent interface by wrapping the
12
+ multi-agent marlenv environment.
13
+ """
14
+
15
+ from uuid import uuid4
16
+
17
+ import gym
18
+ import marlenv.envs # Register marlenv environments with gym
19
+ import numpy as np
20
+
21
+ # Support both in-repo and standalone imports
22
+ try:
23
+ # In-repo imports (when running from OpenEnv repository)
24
+ from core.env_server.interfaces import Environment
25
+ from core.env_server.types import State
26
+
27
+ from ..models import SnakeAction, SnakeObservation
28
+ except ImportError:
29
+ from models import SnakeAction, SnakeObservation
30
+
31
+ # Standalone imports (when environment is standalone with openenv-core from pip)
32
+ from openenv_core.env_server.interfaces import Environment
33
+ from openenv_core.env_server.types import State
34
+
35
+
36
+ class SingleAgentWrapper(gym.Wrapper):
37
+ """
38
+ Custom wrapper to convert multi-agent marlenv to single-agent.
39
+
40
+ This wrapper properly handles the conversion without triggering
41
+ gym 0.24.1's strict type checking on done flags.
42
+ """
43
+
44
+ def __init__(self, env):
45
+ super().__init__(env)
46
+ # Unwrap observation and action spaces for single agent
47
+ if hasattr(env.observation_space, '__getitem__'):
48
+ self.observation_space = env.observation_space[0]
49
+ if hasattr(env.action_space, '__getitem__'):
50
+ self.action_space = env.action_space[0]
51
+
52
+ def reset(self, **kwargs):
53
+ obs = self.env.reset(**kwargs)
54
+ # Remove first dimension if it's a multi-agent array (num_agents, H, W, C)
55
+ if hasattr(obs, 'shape') and len(obs.shape) == 4 and obs.shape[0] == 1:
56
+ return obs[0] # Return (H, W, C)
57
+ # Return first agent's observation if it's a list
58
+ if isinstance(obs, list):
59
+ return obs[0]
60
+ return obs
61
+
62
+ def step(self, action):
63
+ # Wrap action in list for multi-agent env
64
+ obs, rewards, dones, info = self.env.step([action])
65
+
66
+ # Unwrap returns for single agent
67
+ # Handle observation: remove first dimension if shape is (1, H, W, C)
68
+ if hasattr(obs, 'shape') and len(obs.shape) == 4 and obs.shape[0] == 1:
69
+ obs = obs[0] # Convert (1, H, W, C) -> (H, W, C)
70
+ elif isinstance(obs, list):
71
+ obs = obs[0]
72
+
73
+ reward = rewards[0] if isinstance(rewards, list) else rewards
74
+ done = dones[0] if isinstance(dones, list) else dones
75
+
76
+ # Ensure done is a boolean (not numpy bool)
77
+ done = bool(done)
78
+
79
+ return obs, reward, done, info
80
+
81
+
82
+ class SnakeEnvironment(Environment):
83
+ """
84
+ A snake game environment that wraps marlenv's Snake-v1.
85
+
86
+ This environment provides a single-agent interface to the multi-agent
87
+ snake game. The snake must navigate a grid, eat fruits, and avoid walls
88
+ and its own body.
89
+
90
+ Args:
91
+ height: Height of the grid map (default: 20)
92
+ width: Width of the grid map (default: 20)
93
+ snake_length: Initial length of the snake (default: 3)
94
+ vision_range: Vision range for partial observability (default: None for full grid)
95
+ observer: 'snake' for relative actions or 'human' for global directions (default: 'snake')
96
+ max_episode_steps: Maximum steps per episode (default: 1000)
97
+ reward_dict: Custom reward function (default: fruit=1.0, others=0.0)
98
+
99
+ Example:
100
+ >>> env = SnakeEnvironment()
101
+ >>> obs = env.reset()
102
+ >>> print(obs.alive) # True
103
+ >>>
104
+ >>> obs = env.step(SnakeAction(action=1)) # Turn left
105
+ >>> print(obs.episode_score)
106
+ >>> print(obs.reward)
107
+ """
108
+
109
+ def __init__(
110
+ self,
111
+ height: int = 20,
112
+ width: int = 20,
113
+ snake_length: int = 3,
114
+ vision_range: int = None,
115
+ observer: str = "snake",
116
+ max_episode_steps: int = 1000,
117
+ reward_dict: dict = None,
118
+ ):
119
+ """Initialize the snake environment."""
120
+ self._state = State(episode_id=str(uuid4()), step_count=0)
121
+
122
+ # Default reward function
123
+ if reward_dict is None:
124
+ reward_dict = {
125
+ "fruit": 1.0,
126
+ "kill": 0.0,
127
+ "lose": -1.0,
128
+ "win": 100.0,
129
+ "time": 0.001,
130
+ }
131
+
132
+ # Create the marlenv snake environment for single agent
133
+ # Note: We don't use gym.make directly to avoid gym 0.24.1 wrappers
134
+ from marlenv.envs.snake_env import SnakeEnv as MarlenvSnake
135
+
136
+ self.base_env = MarlenvSnake(
137
+ height=height,
138
+ width=width,
139
+ num_snakes=1, # Single agent
140
+ snake_length=snake_length,
141
+ vision_range=vision_range,
142
+ frame_stack=1,
143
+ observer=observer,
144
+ reward_dict=reward_dict,
145
+ max_episode_steps=max_episode_steps,
146
+ )
147
+
148
+ # Wrap with our custom SingleAgent wrapper
149
+ self.env = SingleAgentWrapper(self.base_env)
150
+
151
+ # Track episode statistics
152
+ self._episode_score = 0.0
153
+ self._episode_fruits = 0
154
+ self._episode_kills = 0
155
+
156
+ def reset(self) -> SnakeObservation:
157
+ """
158
+ Reset the environment.
159
+
160
+ Returns:
161
+ SnakeObservation with initial game state
162
+ """
163
+ self._state = State(episode_id=str(uuid4()), step_count=0)
164
+ self._episode_score = 0.0
165
+ self._episode_fruits = 0
166
+ self._episode_kills = 0
167
+
168
+ # Reset the marlenv environment
169
+ obs = self.env.reset()
170
+
171
+ # Convert observation to list format
172
+ obs_list = obs.tolist() if isinstance(obs, np.ndarray) else obs
173
+
174
+ # Get the grid from the environment (access base env directly)
175
+ grid = self.base_env.grid.tolist() if hasattr(self.base_env, "grid") else []
176
+
177
+ return SnakeObservation(
178
+ grid=grid,
179
+ observation=obs_list,
180
+ episode_score=self._episode_score,
181
+ episode_steps=self._state.step_count,
182
+ episode_fruits=self._episode_fruits,
183
+ episode_kills=self._episode_kills,
184
+ alive=True,
185
+ done=False,
186
+ reward=0.0,
187
+ )
188
+
189
+ def step(self, action: SnakeAction) -> SnakeObservation: # type: ignore[override]
190
+ """
191
+ Execute a step in the environment.
192
+
193
+ Args:
194
+ action: SnakeAction containing the action to take
195
+
196
+ Returns:
197
+ SnakeObservation with the result of the action
198
+ """
199
+ self._state.step_count += 1
200
+
201
+ # Execute action in marlenv
202
+ obs, reward, done, info = self.env.step(action.action)
203
+
204
+ # Update episode statistics
205
+ self._episode_score += reward
206
+
207
+ # Convert observation to list format
208
+ obs_list = obs.tolist() if isinstance(obs, np.ndarray) else obs
209
+
210
+ # Get the grid from the environment (access base env directly)
211
+ grid = self.base_env.grid.tolist() if hasattr(self.base_env, "grid") else []
212
+
213
+ # Extract episode statistics from info if available
214
+ episode_fruits = (
215
+ info.get("episode_fruits", [self._episode_fruits])[0]
216
+ if "episode_fruits" in info
217
+ else self._episode_fruits
218
+ )
219
+ episode_kills = (
220
+ info.get("episode_kills", [self._episode_kills])[0]
221
+ if "episode_kills" in info
222
+ else self._episode_kills
223
+ )
224
+
225
+ return SnakeObservation(
226
+ grid=grid,
227
+ observation=obs_list,
228
+ episode_score=self._episode_score,
229
+ episode_steps=self._state.step_count,
230
+ episode_fruits=int(episode_fruits),
231
+ episode_kills=int(episode_kills),
232
+ alive=not done,
233
+ done=done,
234
+ reward=float(reward),
235
+ metadata={"info": info},
236
+ )
237
+
238
+ @property
239
+ def state(self) -> State:
240
+ """
241
+ Get the current environment state.
242
+
243
+ Returns:
244
+ Current State with episode_id and step_count
245
+ """
246
+ return self._state