File size: 5,519 Bytes
ca29807
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4eb7f58
 
ca29807
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

"""
Data models for the Minesweeper Environment.

The minesweeper_env environment is a Minesweeper game where agents reveal cells and place flags
to identify mines on a grid board.
"""

from enum import Enum
from typing import List, Any, Set, Tuple
from pydantic import Field, BaseModel

# Support both in-repo and standalone imports
try:
    # In-repo imports (when running from OpenEnv repository)
    from openenv.core.env_server.types import Action, Observation
except ImportError:
    # Standalone imports (when environment is standalone with openenv-core from pip)
    from openenv_core.env_server.types import Action, Observation


class GameStatus(Enum):
    """Status of the Minesweeper game."""
    ONGOING = "ongoing"
    WON = "won"
    LOST = "lost"


class MinesweeperAction(Action):
    """
    Action for the Minesweeper environment.

    Attributes:
        row: Row index of the cell to act on (0-indexed).
        col: Column index of the cell to act on (0-indexed).
        action_type: Type of action - 'reveal' to uncover a cell, 'flag' to place/remove a flag.
    """
    row: int = Field(..., ge=0, description="Row index of the cell")
    col: int = Field(..., ge=0, description="Column index of the cell")
    action_type: str = Field(..., pattern="^(reveal|flag)$", description="Type of action: 'reveal' or 'flag'")


class MinesweeperObservation(Observation):
    """
    Observation from the Minesweeper environment.

    This represents what the agent can see - a partial view of the board with hidden mine locations (unless revealed).

    Attributes:
        board: 2D list representing the current state of the board. Each cell can be:
            - -1: unrevealed
            - 0-8: number of adjacent mines (if revealed)
            - 'F': flagged cell
            - '*': mine (only revealed if game is lost)
        num_mines: Total number of mines on the board.
        flags_placed: Number of flags currently placed by the agent.
        cells_revealed: Number of cells that have been revealed so far.
        game_status: Current status of the game - ongoing, won, or lost.
    """
    board: List[List[Any]] = Field(default_factory=list, description="2D board state")
    num_mines: int = Field(..., ge=0, description="Total number of mines")
    flags_placed: int = Field(..., ge=0, description="Number of flags placed")
    cells_revealed: int = Field(..., ge=0, description="Number of cells revealed")
    game_status: GameStatus = Field(..., description="Current game status")

    @property
    def board_height(self) -> int:
        """Height of the board (number of rows)."""
        return len(self.board)

    @property
    def board_width(self) -> int:
        """Width of the board (number of columns)."""
        return len(self.board[0]) if self.board else 0


class MinesweeperState(BaseModel):
    """
    Internal state of the Minesweeper environment.

    This represents the full internal state of the environment, including hidden information.

    Attributes:
        episode_id: Unique identifier for the current episode.
        step_count: Number of steps taken in the current episode.
        board_height: Height of the board (number of rows).
        board_width: Width of the board (number of columns).
        mine_locations: Set of (row, col) tuples indicating where mines are located.
        revealed_cells: Set of (row, col) tuples indicating which cells have been revealed.
        flags: Set of (row, col) tuples indicating where flags have been placed.
        mine_counts: 2D list with counts of adjacent mines for each cell.
        game_status: Current status of the game - ongoing, won, or lost.
    """
    episode_id: str
    step_count: int
    board_height: int
    board_width: int
    mine_locations: Set[Tuple[int, int]]
    revealed_cells: Set[Tuple[int, int]]
    flags: Set[Tuple[int, int]]
    mine_counts: List[List[int]]
    game_status: GameStatus

    def to_observation(self) -> MinesweeperObservation:
        """
        Convert the full state to a partial observation for the agent.

        Returns:
            MinesweeperObservation representing the agent's view of the board.
        """
        board = []
        for r in range(self.board_height):
            row = []
            for c in range(self.board_width):
                if (r, c) in self.revealed_cells:
                    if (r, c) in self.mine_locations:
                        cell_value = '*'  # Revealed mine
                    else:
                        cell_value = self.mine_counts[r][c]  # Number of adjacent mines
                elif (r, c) in self.flags:
                    cell_value = 'F'  # Flagged cell
                else:
                    cell_value = -1  # Unrevealed cell
                row.append(cell_value)
            board.append(row)

        return MinesweeperObservation(
            board=board,
            num_mines=len(self.mine_locations),
            flags_placed=len(self.flags),
            cells_revealed=len(self.revealed_cells),
            game_status=self.game_status,
            done=self.game_status != GameStatus.ONGOING,
            reward=0.0,
            metadata={
                "episode_id": self.episode_id,
                "step_count": self.step_count,
            },
        )