anushaacharya commited on
Commit
d218d3a
·
verified ·
1 Parent(s): 9c16ca9

Upload folder using huggingface_hub

Browse files
Files changed (11) hide show
  1. Dockerfile +72 -0
  2. README.md +150 -4
  3. __init__.py +13 -0
  4. client.py +118 -0
  5. models.py +144 -0
  6. openenv.yaml +6 -0
  7. pyproject.toml +32 -0
  8. server/__init__.py +12 -0
  9. server/app.py +35 -0
  10. server/build_docker.sh +48 -0
  11. server/minesweeper_environment.py +338 -0
Dockerfile ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
8
+ FROM ${BASE_IMAGE} AS builder
9
+
10
+ WORKDIR /app
11
+
12
+ # Build argument to control whether we're building standalone or in-repo
13
+ ARG BUILD_MODE=in-repo
14
+
15
+ # Copy environment code (always at root of build context)
16
+ COPY . /app/env
17
+
18
+ # For in-repo builds, openenv-core is already in the pyproject.toml dependencies
19
+ # For standalone builds, openenv-core will be installed from pip via pyproject.toml
20
+ WORKDIR /app/env
21
+
22
+ # Ensure uv is available (for local builds where base image lacks it)
23
+ RUN if ! command -v uv >/dev/null 2>&1; then \
24
+ curl -LsSf https://astral.sh/uv/install.sh | sh && \
25
+ mv /root/.local/bin/uv /usr/local/bin/uv && \
26
+ mv /root/.local/bin/uvx /usr/local/bin/uvx; \
27
+ fi
28
+
29
+ # Install git for building from git repos (build-time only)
30
+ RUN apt-get update && apt-get install -y --no-install-recommends \
31
+ git \
32
+ && rm -rf /var/lib/apt/lists/*
33
+
34
+ # Install dependencies using uv sync
35
+ RUN --mount=type=cache,target=/root/.cache/uv \
36
+ if [ -f uv.lock ]; then \
37
+ uv sync --frozen --no-install-project --no-editable; \
38
+ else \
39
+ uv sync --no-install-project --no-editable; \
40
+ fi
41
+
42
+ RUN --mount=type=cache,target=/root/.cache/uv \
43
+ if [ -f uv.lock ]; then \
44
+ uv sync --frozen --no-editable; \
45
+ else \
46
+ uv sync --no-editable; \
47
+ fi
48
+
49
+ # Final runtime stage
50
+ FROM ${BASE_IMAGE}
51
+
52
+ WORKDIR /app
53
+
54
+ # Copy the virtual environment from builder
55
+ COPY --from=builder /app/env/.venv /app/.venv
56
+
57
+ # Copy the environment code
58
+ COPY --from=builder /app/env /app/env
59
+
60
+ # Set PATH to use the virtual environment
61
+ ENV PATH="/app/.venv/bin:$PATH"
62
+
63
+ # Set PYTHONPATH so imports work correctly
64
+ ENV PYTHONPATH="/app/env:$PYTHONPATH"
65
+
66
+ # Health check using Python (more portable than curl/wget)
67
+ HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
68
+ CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
69
+
70
+ # Run the FastAPI server
71
+ ENV ENABLE_WEB_INTERFACE=true
72
+ CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port 8000"]
README.md CHANGED
@@ -1,10 +1,156 @@
1
  ---
2
- title: Minesweeper Env
3
- emoji: 🔥
4
- colorFrom: yellow
5
  colorTo: indigo
6
  sdk: docker
7
  pinned: false
 
 
 
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: Minesweeper Environment Server
3
+ emoji: 💣
4
+ colorFrom: blue
5
  colorTo: indigo
6
  sdk: docker
7
  pinned: false
8
+ app_port: 8000
9
+ base_path: /web
10
+ tags:
11
+ - openenv
12
  ---
13
 
14
+ # Minesweeper Environment
15
+
16
+ A Minesweeper game environment for reinforcement learning agents. The environment consists of a grid with hidden mines where the agent must reveal all non-mine cells without triggering any mines.
17
+
18
+ ## Overview
19
+
20
+ The agent can perform two types of actions:
21
+ - Reveal cells to uncover numbers indicating adjacent mines
22
+ - Place or remove flags on suspected mine locations
23
+
24
+ The game ends when all non-mine cells are revealed (win) or a mine is revealed (loss).
25
+
26
+ ## Quick Start
27
+
28
+ ```python
29
+ from envs.minesweeper_env import MinesweeperAction, MinesweeperEnv
30
+
31
+ # Create environment from Docker image
32
+ minesweeper_env = MinesweeperEnv.from_docker_image("minesweeper-env:latest")
33
+
34
+ try:
35
+ # Reset the environment
36
+ result = minesweeper_env.reset()
37
+ print(f"Board size: {result.observation.board_height}x{result.observation.board_width}")
38
+ print(f"Number of mines: {result.observation.num_mines}")
39
+
40
+ # Reveal a cell
41
+ result = minesweeper_env.step(MinesweeperAction(row=2, col=2, action_type="reveal"))
42
+ print(f"Cells revealed: {result.observation.cells_revealed}")
43
+ print(f"Reward: {result.observation.reward}")
44
+
45
+ # Place a flag
46
+ result = minesweeper_env.step(MinesweeperAction(row=1, col=1, action_type="flag"))
47
+ print(f"Flags placed: {result.observation.flags_placed}")
48
+
49
+ finally:
50
+ minesweeper_env.close()
51
+ ```
52
+
53
+ ## Building the Docker Image
54
+
55
+ Build the Docker image from the project root:
56
+
57
+ ```bash
58
+ docker build -t minesweeper-env:latest -f src/envs/minesweeper_env/server/Dockerfile .
59
+ ```
60
+
61
+ Or use the build script:
62
+
63
+ ```bash
64
+ cd src/envs/minesweeper_env/server
65
+ ./build_docker.sh latest
66
+ ```
67
+
68
+ ## Environment Details
69
+
70
+ ### Action
71
+
72
+ **MinesweeperAction**: Specifies the cell and action type
73
+ - `row` (int) - Row index (0-indexed)
74
+ - `col` (int) - Column index (0-indexed)
75
+ - `action_type` (str) - Either "reveal" or "flag"
76
+
77
+ ### Observation
78
+
79
+ **MinesweeperObservation**: Current board state and game information
80
+ - `board` (list[list]) - 2D grid showing the current state of each cell:
81
+ - `-1`: Unrevealed cell
82
+ - `0-8`: Number of adjacent mines (revealed cell)
83
+ - `'F'`: Flagged cell
84
+ - `'*'`: Mine (only shown when game is lost)
85
+ - `num_mines` (int) - Total number of mines on the board
86
+ - `flags_placed` (int) - Number of flags currently placed
87
+ - `cells_revealed` (int) - Number of cells that have been revealed
88
+ - `game_status` (GameStatus) - Current game status (ONGOING, WON, or LOST)
89
+ - `done` (bool) - Whether the game has ended
90
+ - `reward` (float) - Reward from the last action
91
+ - `metadata` (dict) - Additional information
92
+
93
+ ### Rewards
94
+
95
+ - Revealing a safe cell: +1.0
96
+ - Placing a flag on a mine: +0.5
97
+ - Revealing a mine (game over): -10.0
98
+ - Revealing an already revealed cell: -0.05
99
+ - Invalid action: -0.1
100
+
101
+ ### Game Status
102
+
103
+ - `GameStatus.ONGOING`: Game is still in progress
104
+ - `GameStatus.WON`: All non-mine cells have been revealed
105
+ - `GameStatus.LOST`: A mine was revealed
106
+
107
+ ## Configuration
108
+
109
+ The default configuration is:
110
+ - Board height: 5
111
+ - Board width: 5
112
+ - Number of mines: 5
113
+
114
+ These can be configured when initializing the environment server.
115
+
116
+ ## Connecting to an Existing Server
117
+
118
+ If you have a server already running:
119
+
120
+ ```python
121
+ from envs.minesweeper_env import MinesweeperEnv
122
+
123
+ # Connect to existing server
124
+ minesweeper_env = MinesweeperEnv(base_url="http://localhost:8000")
125
+
126
+ # Use as normal
127
+ result = minesweeper_env.reset()
128
+ ```
129
+
130
+ Note: When connecting to an existing server, `close()` will not stop the server.
131
+
132
+ ## Running Tests
133
+
134
+ Run the test suite:
135
+
136
+ ```bash
137
+ python tests/envs/test_minesweeper_env.py
138
+ ```
139
+
140
+ ## Project Structure
141
+
142
+ ```
143
+ minesweeper_env/
144
+ ├── __init__.py # Module exports
145
+ ├── README.md # This file
146
+ ├── client.py # MinesweeperEnv client implementation
147
+ ├── models.py # Action, Observation, and State models
148
+ ├── openenv.yaml # Environment configuration
149
+ ├── pyproject.toml # Package dependencies
150
+ └── server/
151
+ ├── __init__.py # Server module exports
152
+ ├── minesweeper_environment.py # Core game logic
153
+ ├── app.py # FastAPI application
154
+ ├── Dockerfile # Container image definition
155
+ └── build_docker.sh # Build script
156
+ ```
__init__.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """Minesweeper Environment - A simple test environment for HTTP server."""
8
+
9
+ from .client import MinesweeperEnv
10
+ from .models import MinesweeperAction, MinesweeperObservation, GameStatus
11
+
12
+ __all__ = ["MinesweeperAction", "MinesweeperObservation", "MinesweeperEnv", "GameStatus"]
13
+
client.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ Minesweeper Environment Client.
9
+
10
+ This module provides the client for connecting to a Minesweeper Environment server
11
+ via WebSocket for persistent sessions.
12
+ """
13
+
14
+ from typing import Any, Dict
15
+
16
+ # Support both in-repo and standalone imports
17
+ try:
18
+ # In-repo imports (when running from OpenEnv repository)
19
+ from openenv.core.client_types import StepResult
20
+ from openenv.core.env_server.types import State
21
+ from openenv.core.env_client import EnvClient
22
+ from .models import MinesweeperAction, MinesweeperObservation
23
+ except ImportError:
24
+ # Standalone imports (when environment is standalone with openenv from pip)
25
+ from openenv.core.client_types import StepResult
26
+ from openenv.core.env_server.types import State
27
+ from openenv.core.env_client import EnvClient
28
+ from models import MinesweeperAction, MinesweeperObservation
29
+
30
+
31
+ class MinesweeperEnv(EnvClient[MinesweeperAction, MinesweeperObservation, State]):
32
+ """
33
+ Client for the Minesweeper Environment.
34
+
35
+ This client maintains a persistent WebSocket connection to the environment
36
+ server, enabling efficient multi-step interactions with lower latency.
37
+ Each client instance has its own dedicated environment session on the server.
38
+
39
+ Example:
40
+ >>> # Connect to a running server
41
+ >>> with MinesweeperEnv(base_url="http://localhost:8000") as client:
42
+ ... result = client.reset()
43
+ ... print(result.observation.board)
44
+ ... print(result.observation.game_status)
45
+ ...
46
+ ... # Reveal a cell
47
+ ... result = client.step(MinesweeperAction(row=0, col=0, action_type="reveal"))
48
+ ... print(result.observation.board)
49
+ ... print(result.reward)
50
+
51
+ Example with Docker:
52
+ >>> # Automatically start container and connect
53
+ >>> client = MinesweeperEnv.from_docker_image("minesweeper-env:latest")
54
+ >>> try:
55
+ ... result = client.reset()
56
+ ... result = client.step(MinesweeperAction(row=2, col=3, action_type="reveal"))
57
+ ... finally:
58
+ ... client.close()
59
+ """
60
+
61
+ def _step_payload(self, action: MinesweeperAction) -> Dict:
62
+ """
63
+ Convert MinesweeperAction to JSON payload for step request.
64
+
65
+ Args:
66
+ action: MinesweeperAction instance
67
+
68
+ Returns:
69
+ Dictionary representation suitable for JSON encoding
70
+ """
71
+ return {
72
+ "row": action.row,
73
+ "col": action.col,
74
+ "action_type": action.action_type,
75
+ }
76
+
77
+ def _parse_result(self, payload: Dict) -> StepResult[MinesweeperObservation]:
78
+ """
79
+ Parse server response into StepResult[MinesweeperObservation].
80
+
81
+ Args:
82
+ payload: JSON response from server
83
+
84
+ Returns:
85
+ StepResult with MinesweeperObservation
86
+ """
87
+ obs_data = payload.get("observation", {})
88
+ observation = MinesweeperObservation(
89
+ board=obs_data.get("board", []),
90
+ num_mines=obs_data.get("num_mines", 0),
91
+ flags_placed=obs_data.get("flags_placed", 0),
92
+ cells_revealed=obs_data.get("cells_revealed", 0),
93
+ game_status=obs_data.get("game_status", "ongoing"),
94
+ done=payload.get("done", False),
95
+ reward=payload.get("reward"),
96
+ metadata=obs_data.get("metadata", {}),
97
+ )
98
+
99
+ return StepResult(
100
+ observation=observation,
101
+ reward=payload.get("reward"),
102
+ done=payload.get("done", False),
103
+ )
104
+
105
+ def _parse_state(self, payload: Dict) -> State:
106
+ """
107
+ Parse server response into State object.
108
+
109
+ Args:
110
+ payload: JSON response from /state endpoint
111
+
112
+ Returns:
113
+ State object with episode_id and step_count
114
+ """
115
+ return State(
116
+ episode_id=payload.get("episode_id"),
117
+ step_count=payload.get("step_count", 0),
118
+ )
models.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ Data models for the Minesweeper Environment.
9
+
10
+ The minesweeper_env environment is a Minesweeper game where agents reveal cells and place flags
11
+ to identify mines on a grid board.
12
+ """
13
+
14
+ from enum import Enum
15
+ from typing import List, Any, Set, Tuple
16
+ from pydantic import Field, BaseModel
17
+
18
+ # Support both in-repo and standalone imports
19
+ try:
20
+ # In-repo imports (when running from OpenEnv repository)
21
+ from openenv.core.env_server.types import Action, Observation
22
+ except ImportError:
23
+ # Standalone imports (when environment is standalone with openenv from pip)
24
+ from openenv.core.env_server.types import Action, Observation
25
+
26
+
27
+ class GameStatus(Enum):
28
+ """Status of the Minesweeper game."""
29
+ ONGOING = "ongoing"
30
+ WON = "won"
31
+ LOST = "lost"
32
+
33
+
34
+ class MinesweeperAction(Action):
35
+ """
36
+ Action for the Minesweeper environment.
37
+
38
+ Attributes:
39
+ row: Row index of the cell to act on (0-indexed).
40
+ col: Column index of the cell to act on (0-indexed).
41
+ action_type: Type of action - 'reveal' to uncover a cell, 'flag' to place/remove a flag.
42
+ """
43
+ row: int = Field(..., ge=0, description="Row index of the cell")
44
+ col: int = Field(..., ge=0, description="Column index of the cell")
45
+ action_type: str = Field(..., pattern="^(reveal|flag)$", description="Type of action: 'reveal' or 'flag'")
46
+
47
+
48
+ class MinesweeperObservation(Observation):
49
+ """
50
+ Observation from the Minesweeper environment.
51
+
52
+ This represents what the agent can see - a partial view of the board with hidden mine locations (unless revealed).
53
+
54
+ Attributes:
55
+ board: 2D list representing the current state of the board. Each cell can be:
56
+ - -1: unrevealed
57
+ - 0-8: number of adjacent mines (if revealed)
58
+ - 'F': flagged cell
59
+ - '*': mine (only revealed if game is lost)
60
+ num_mines: Total number of mines on the board.
61
+ flags_placed: Number of flags currently placed by the agent.
62
+ cells_revealed: Number of cells that have been revealed so far.
63
+ game_status: Current status of the game - ongoing, won, or lost.
64
+ """
65
+ board: List[List[Any]] = Field(default_factory=list, description="2D board state")
66
+ num_mines: int = Field(..., ge=0, description="Total number of mines")
67
+ flags_placed: int = Field(..., ge=0, description="Number of flags placed")
68
+ cells_revealed: int = Field(..., ge=0, description="Number of cells revealed")
69
+ game_status: GameStatus = Field(..., description="Current game status")
70
+
71
+ @property
72
+ def board_height(self) -> int:
73
+ """Height of the board (number of rows)."""
74
+ return len(self.board)
75
+
76
+ @property
77
+ def board_width(self) -> int:
78
+ """Width of the board (number of columns)."""
79
+ return len(self.board[0]) if self.board else 0
80
+
81
+
82
+ class MinesweeperState(BaseModel):
83
+ """
84
+ Internal state of the Minesweeper environment.
85
+
86
+ This represents the full internal state of the environment, including hidden information.
87
+
88
+ Attributes:
89
+ episode_id: Unique identifier for the current episode.
90
+ step_count: Number of steps taken in the current episode.
91
+ board_height: Height of the board (number of rows).
92
+ board_width: Width of the board (number of columns).
93
+ mine_locations: Set of (row, col) tuples indicating where mines are located.
94
+ revealed_cells: Set of (row, col) tuples indicating which cells have been revealed.
95
+ flags: Set of (row, col) tuples indicating where flags have been placed.
96
+ mine_counts: 2D list with counts of adjacent mines for each cell.
97
+ game_status: Current status of the game - ongoing, won, or lost.
98
+ """
99
+ episode_id: str
100
+ step_count: int
101
+ board_height: int
102
+ board_width: int
103
+ mine_locations: Set[Tuple[int, int]]
104
+ revealed_cells: Set[Tuple[int, int]]
105
+ flags: Set[Tuple[int, int]]
106
+ mine_counts: List[List[int]]
107
+ game_status: GameStatus
108
+
109
+ def to_observation(self) -> MinesweeperObservation:
110
+ """
111
+ Convert the full state to a partial observation for the agent.
112
+
113
+ Returns:
114
+ MinesweeperObservation representing the agent's view of the board.
115
+ """
116
+ board = []
117
+ for r in range(self.board_height):
118
+ row = []
119
+ for c in range(self.board_width):
120
+ if (r, c) in self.revealed_cells:
121
+ if (r, c) in self.mine_locations:
122
+ cell_value = '*' # Revealed mine
123
+ else:
124
+ cell_value = self.mine_counts[r][c] # Number of adjacent mines
125
+ elif (r, c) in self.flags:
126
+ cell_value = 'F' # Flagged cell
127
+ else:
128
+ cell_value = -1 # Unrevealed cell
129
+ row.append(cell_value)
130
+ board.append(row)
131
+
132
+ return MinesweeperObservation(
133
+ board=board,
134
+ num_mines=len(self.mine_locations),
135
+ flags_placed=len(self.flags),
136
+ cells_revealed=len(self.revealed_cells),
137
+ game_status=self.game_status,
138
+ done=self.game_status != GameStatus.ONGOING,
139
+ reward=0.0,
140
+ metadata={
141
+ "episode_id": self.episode_id,
142
+ "step_count": self.step_count,
143
+ },
144
+ )
openenv.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ spec_version: 1
2
+ name: minesweeper
3
+ type: space
4
+ runtime: fastapi
5
+ app: server.app:app
6
+ port: 8000
pyproject.toml ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [build-system]
2
+ requires = ["setuptools>=45", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "openenv-minesweeper"
7
+ version = "0.1.0"
8
+ description = "Minesweeper Environment for OpenEnv"
9
+ requires-python = ">=3.10"
10
+ dependencies = [
11
+ "openenv-core>=0.1.0",
12
+ "fastapi>=0.115.0",
13
+ "uvicorn>=0.24.0",
14
+ "pydantic>=2.0.0",
15
+ "requests>=2.31.0",
16
+ ]
17
+
18
+ [project.optional-dependencies]
19
+ dev = [
20
+ "pytest>=8.0.0",
21
+ "pytest-cov>=4.0.0",
22
+ ]
23
+
24
+ [project.scripts]
25
+ # Server entry point -enables running via: uv run --project . server
26
+ # or: python -m minesweeper.server.app
27
+ server = "minesweeper.server.app:main"
28
+
29
+ [tool.setuptools]
30
+ include-package-data = true
31
+ packages = ["minesweeper", "minesweeper.server"]
32
+ package-dir = { "minesweeper" = ".", "minesweeper.server" = "server" }
server/__init__.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """Minesweeper environment server components."""
8
+
9
+ from .minesweeper_environment import MinesweeperEnvironment
10
+
11
+ __all__ = ["MinesweeperEnvironment"]
12
+
server/app.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """FastAPI application for the Minesweeper Environment."""
8
+
9
+ # Support both in-repo and standalone imports
10
+ try:
11
+ # In-repo imports (when running from OpenEnv repository)
12
+ from openenv.core.env_server import create_app
13
+ except ImportError:
14
+ # Standalone imports (when environment is standalone with openenv from pip)
15
+ from openenv.core.env_server import create_app
16
+
17
+ try:
18
+ from ..models import MinesweeperAction, MinesweeperObservation
19
+ from .minesweeper_environment import MinesweeperEnvironment
20
+ except ImportError:
21
+ from models import MinesweeperAction, MinesweeperObservation
22
+ from server.minesweeper_environment import MinesweeperEnvironment
23
+
24
+ # Create the FastAPI app
25
+ # Pass the class (factory) instead of an instance for WebSocket session support
26
+ app = create_app(
27
+ MinesweeperEnvironment,
28
+ MinesweeperAction,
29
+ MinesweeperObservation,
30
+ env_name="minesweeper_env"
31
+ )
32
+
33
+ if __name__ == "__main__":
34
+ import uvicorn
35
+ uvicorn.run(app, host="0.0.0.0", port=8000)
server/build_docker.sh ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
3
+ # All rights reserved.
4
+ #
5
+ # This source code is licensed under the BSD-style license found in the
6
+ # LICENSE file in the root directory of this source tree.
7
+
8
+ # Script to build the Minesweeper environment Docker image
9
+ # Usage: ./build_docker.sh [tag]
10
+
11
+ set -e
12
+
13
+ TAG="${1:-latest}"
14
+ IMAGE_NAME="minesweeper-env:${TAG}"
15
+
16
+ echo "🐳 Building Minesweeper Environment Docker Image"
17
+ echo "================================================"
18
+ echo "Image: $IMAGE_NAME"
19
+ echo ""
20
+
21
+ # Get script directory
22
+ SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
23
+
24
+ # Navigate to OpenEnv root (4 levels up from server/)
25
+ OPENENV_ROOT="$(cd "$SCRIPT_DIR/../../../.." && pwd)"
26
+
27
+ echo "📁 OpenEnv root: $OPENENV_ROOT"
28
+ echo ""
29
+
30
+ # Build Minesweeper environment image
31
+ echo "⏳ Building..."
32
+ docker build \
33
+ -f "$SCRIPT_DIR/Dockerfile" \
34
+ -t "$IMAGE_NAME" \
35
+ "$OPENENV_ROOT"
36
+
37
+ if [ $? -eq 0 ]; then
38
+ echo ""
39
+ echo "✅ Build successful!"
40
+ echo ""
41
+ echo "🚀 Run with:"
42
+ echo " docker run -p 8000:8000 $IMAGE_NAME"
43
+ echo ""
44
+ else
45
+ echo ""
46
+ echo "❌ Build failed!"
47
+ exit 1
48
+ fi
server/minesweeper_environment.py ADDED
@@ -0,0 +1,338 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD-style license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+
7
+ """
8
+ Minesweeper Environment Implementation.
9
+
10
+ A Minesweeper game environment where agents must reveal cells and place flags
11
+ to identify mines on a grid board without triggering any mines.
12
+ """
13
+ import random
14
+ from typing import Any, Dict, List, Optional, Set, Tuple
15
+ from uuid import uuid4
16
+
17
+ try:
18
+ from ..models import (
19
+ MinesweeperAction,
20
+ MinesweeperObservation,
21
+ GameStatus,
22
+ MinesweeperState,
23
+ )
24
+ except ImportError:
25
+ from models import (
26
+ MinesweeperAction,
27
+ MinesweeperObservation,
28
+ GameStatus,
29
+ MinesweeperState,
30
+ )
31
+
32
+ # Support both in-repo and standalone imports
33
+ try:
34
+ # In-repo imports (when running from OpenEnv repository)
35
+ from openenv.core.env_server.interfaces import Environment
36
+ from openenv.core.env_server.types import State
37
+ except ImportError:
38
+ # Standalone imports (when environment is standalone with openenv from pip)
39
+ from openenv.core.env_server.interfaces import Environment
40
+ from openenv.core.env_server.types import State
41
+
42
+
43
+ class MinesweeperEnvironment(Environment):
44
+ """
45
+ Minesweeper game environment implementation for Reinforcement Learning.
46
+ The environment consists of a grid with hidden mines. The agent can reveal cells or place flags.
47
+ The goal is to reveal all non-mine cells without triggering a mine.
48
+ The agent must:
49
+ - Reveal cells to uncover numbers indicating adjacent mines.
50
+ - Place flags on suspected mine locations.
51
+ The game ends when all non-mine cells are revealed (win) or a mine is revealed (loss).
52
+
53
+ Observation encoding:
54
+ -1: unrevealed
55
+ 0-8: number of adjacent mines (if revealed)
56
+ 'F': flagged cell
57
+ '*': mine (only revealed if game is lost)
58
+
59
+ Example:
60
+ >>> env = MinesweeperEnvironment(height=5, width=5, num_mines=5)
61
+ >>> obs = env.reset()
62
+ >>> action = MinesweeperAction(row=2, col=3, action_type='reveal')
63
+ """
64
+
65
+ def __init__(self, height: int = 5, width: int = 5, num_mines: int = 5):
66
+ """Initialize the minesweeper_env environment.
67
+ Args:
68
+ height: Height of the minesweeper board.
69
+ width: Width of the minesweeper board.
70
+ num_mines: Number of mines to place on the board.
71
+ """
72
+ self.height = height
73
+ self.width = width
74
+ self.num_mines = num_mines
75
+
76
+ self._state = State(episode_id=str(uuid4()), step_count=0)
77
+ self._reset_count = 0
78
+
79
+ # Internal game state
80
+ self._mine_positions: Set[Tuple[int, int]] = set()
81
+ self._revealed_cells: Set[Tuple[int, int]] = set()
82
+ self._flags_placed: Set[Tuple[int, int]] = set()
83
+ self._mine_counts: List[List[int]] = [[0 for _ in range(width)] for _ in range(height)]
84
+ self._game_status = GameStatus.ONGOING
85
+
86
+ # Auto-reset so the board is playable immediately
87
+ self.reset()
88
+
89
+ def reset(self) -> MinesweeperObservation:
90
+ """
91
+ Reset the environment and starts a new game.
92
+
93
+ Returns:
94
+ MinesweeperObservation with initial board state
95
+ """
96
+ self._state = State(episode_id=str(uuid4()), step_count=0)
97
+ self._reset_count += 1
98
+
99
+ # Reset internal game state
100
+ self._revealed_cells.clear()
101
+ self._flags_placed.clear()
102
+ self._game_status = GameStatus.ONGOING
103
+
104
+ # Place mines randomly
105
+ self._place_mines()
106
+
107
+ # Compute mine counts for each cell
108
+ self._compute_mine_counts()
109
+
110
+ return self._create_observation(
111
+ done=False,
112
+ reward=0.0,
113
+ )
114
+
115
+ def step(self, action: MinesweeperAction) -> MinesweeperObservation: # type: ignore[override]
116
+ """
117
+ Execute a step in the environment by performing the given action.
118
+
119
+ Args:
120
+ action: MinesweeperAction specifying row, col and action_type
121
+
122
+ Returns:
123
+ MinesweeperObservation with updated board state and reward
124
+ """
125
+ self._state.step_count += 1
126
+
127
+ row, col = action.row, action.col
128
+
129
+ # Validate action
130
+ if not self._is_valid_position(row, col):
131
+ # Invalid action or game already over
132
+ return self._create_observation(
133
+ done=self._game_status != GameStatus.ONGOING,
134
+ reward=-0.1,
135
+ metadata={"error": "Invalid action"},
136
+ )
137
+
138
+ # If game already over, no further actions allowed
139
+ if self._game_status != GameStatus.ONGOING:
140
+ return self._create_observation(
141
+ done=True,
142
+ reward=0.0,
143
+ metadata={"info": "Game already over"},
144
+ )
145
+
146
+ reward = 0.0
147
+
148
+ if action.action_type == "reveal":
149
+ reward = self._reveal_cell(row, col)
150
+ elif action.action_type == "flag":
151
+ reward = self._toggle_flag(row, col)
152
+ else:
153
+ reward = -0.1 # Invalid action type
154
+
155
+ self._check_win_condition()
156
+
157
+ return self._create_observation(
158
+ done=self._game_status != GameStatus.ONGOING,
159
+ reward=reward,
160
+ )
161
+
162
+ def _place_mines(self) -> None:
163
+ """Randomly place mines on the board."""
164
+ self._mine_positions.clear()
165
+ while len(self._mine_positions) < self.num_mines:
166
+ r = random.randint(0, self.height - 1)
167
+ c = random.randint(0, self.width - 1)
168
+ self._mine_positions.add((r, c))
169
+
170
+ def _compute_mine_counts(self) -> None:
171
+ """Compute the number of adjacent mines for each cell."""
172
+ self._mine_counts = [[0 for _ in range(self.width)] for _ in range(self.height)]
173
+ for row in range(self.height):
174
+ for col in range(self.width):
175
+ if (row, col) not in self._mine_positions:
176
+ count = self._count_adjacent_mines(row, col)
177
+ self._mine_counts[row][col] = count
178
+
179
+ def _count_adjacent_mines(self, row: int, col: int) -> int:
180
+ """Count the number of mines adjacent to the given cell."""
181
+ count = 0
182
+ for dr in [-1, 0, 1]:
183
+ for dc in [-1, 0, 1]:
184
+ if dr == 0 and dc == 0:
185
+ continue
186
+ r, c = row + dr, col + dc
187
+ if self._is_valid_position(r, c) and (r, c) in self._mine_positions:
188
+ count += 1
189
+ return count
190
+
191
+ def _reveal_cell(self, row: int, col: int) -> float:
192
+ """Reveal the cell at (row, col). Returns the reward for the action."""
193
+ if (row, col) in self._revealed_cells or (row, col) in self._flags_placed:
194
+ return -0.05 # Penalty for revealing already revealed or flagged cell
195
+
196
+ if (row, col) in self._mine_positions:
197
+ self._game_status = GameStatus.LOST
198
+ self._revealed_cells.add((row, col))
199
+ return -10.0 # Penalty for hitting a mine
200
+
201
+ # Reveal the cell and potentially adjacent cells if count is 0
202
+ self._reveal_recursive(row, col)
203
+
204
+ return 1.0 # Small reward for safe reveal
205
+
206
+ def _reveal_recursive(self, row: int, col: int) -> None:
207
+ """Recursively reveal cells with 0 adjacent mines."""
208
+ if not self._is_valid_position(row, col):
209
+ return
210
+ if (row, col) in self._revealed_cells or (row, col) in self._flags_placed:
211
+ return
212
+
213
+ if (row, col) in self._mine_positions:
214
+ return
215
+
216
+ self._revealed_cells.add((row, col))
217
+
218
+ if self._mine_counts[row][col] == 0:
219
+ for dr in [-1, 0, 1]:
220
+ for dc in [-1, 0, 1]:
221
+ if dr == 0 and dc == 0:
222
+ continue
223
+ self._reveal_recursive(row + dr, col + dc)
224
+
225
+ def _toggle_flag(self, row: int, col: int) -> float:
226
+ """Toggle a flag on the cell at (row, col). Returns the reward for the action."""
227
+ if (row, col) in self._revealed_cells:
228
+ return -0.05 # Penalty for flagging a revealed cell
229
+
230
+ if (row, col) in self._flags_placed:
231
+ self._flags_placed.remove((row, col))
232
+ return 0.0 # No penalty for removing a flag
233
+ else:
234
+ self._flags_placed.add((row, col))
235
+ if (row, col) in self._mine_positions:
236
+ return 0.5 # Small reward for correctly flagging a mine
237
+ return 0.0 # No reward for flagging a non-mine cell
238
+
239
+ def _check_win_condition(self) -> None:
240
+ """Check if the game has been won."""
241
+ total_cells = self.height * self.width
242
+ revealed_count = len(self._revealed_cells)
243
+ if revealed_count == total_cells - self.num_mines:
244
+ self._game_status = GameStatus.WON
245
+
246
+ def _is_valid_position(self, row: int, col: int) -> bool:
247
+ """Check if the given (row, col) is within board bounds."""
248
+ return 0 <= row < self.height and 0 <= col < self.width
249
+
250
+ def _create_observation(
251
+ self,
252
+ done: bool,
253
+ reward: Optional[float] = None,
254
+ metadata: Optional[Dict[str, Any]] = None,
255
+ ) -> MinesweeperObservation:
256
+ """Create the current observation of the board.
257
+ Args:
258
+ done: Whether the episode is done.
259
+ reward: Reward obtained from the last action.
260
+ metadata: Additional metadata to include.
261
+ Returns:
262
+ MinesweeperObservation representing the current board state.
263
+ """
264
+ board = []
265
+ for r in range(self.height):
266
+ row = []
267
+ for c in range(self.width):
268
+ if (r, c) in self._revealed_cells:
269
+ if (r, c) in self._mine_positions:
270
+ row.append('*')
271
+ else:
272
+ row.append(self._mine_counts[r][c])
273
+ elif (r, c) in self._flags_placed:
274
+ row.append('F')
275
+ else:
276
+ row.append(-1)
277
+ board.append(row)
278
+
279
+ return MinesweeperObservation(
280
+ board=board,
281
+ num_mines=self.num_mines,
282
+ flags_placed=len(self._flags_placed),
283
+ cells_revealed=len(self._revealed_cells),
284
+ game_status=self._game_status,
285
+ done=done,
286
+ reward=reward,
287
+ metadata=metadata or {},
288
+ )
289
+
290
+ @property
291
+ def state(self) -> State:
292
+ """
293
+ Get the current environment state.
294
+
295
+ Returns:
296
+ Current State with episode_id and step_count
297
+ """
298
+ return self._state
299
+
300
+ def get_full_state(self) -> MinesweeperState:
301
+ """
302
+ Get the full internal state of the Minesweeper environment.
303
+
304
+ Returns:
305
+ MinesweeperState representing the full internal state
306
+ """
307
+ return MinesweeperState(
308
+ episode_id=self._state.episode_id or "",
309
+ step_count=self._state.step_count,
310
+ board_height=self.height,
311
+ board_width=self.width,
312
+ mine_locations=self._mine_positions,
313
+ revealed_cells=self._revealed_cells,
314
+ flags=self._flags_placed,
315
+ mine_counts=self._mine_counts,
316
+ game_status=self._game_status,
317
+ )
318
+
319
+ def get_legal_actions(self) -> List[MinesweeperAction]:
320
+ """
321
+ Get the list of legal actions available in the current state.
322
+
323
+ Returns:
324
+ List of MinesweeperAction instances representing legal actions
325
+ """
326
+ legal_actions = []
327
+
328
+ # If game is over, no legal actions
329
+ if self._game_status != GameStatus.ONGOING:
330
+ return legal_actions
331
+
332
+ for r in range(self.height):
333
+ for c in range(self.width):
334
+ if (r, c) not in self._revealed_cells and (r, c) not in self._flags_placed:
335
+ legal_actions.append(MinesweeperAction(row=r, col=c, action_type="reveal"))
336
+ if (r, c) not in self._revealed_cells:
337
+ legal_actions.append(MinesweeperAction(row=r, col=c, action_type="flag"))
338
+ return legal_actions