Syndria98 commited on
Commit
c2cac70
·
verified ·
1 Parent(s): a7461aa

Upload game.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. game.py +196 -0
game.py ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+ WIN_PATTERNS = [
4
+ (0, 1, 2),
5
+ (3, 4, 5),
6
+ (6, 7, 8),
7
+ (0, 3, 6),
8
+ (1, 4, 7),
9
+ (2, 5, 8),
10
+ (0, 4, 8),
11
+ (2, 4, 6),
12
+ ]
13
+
14
+ class UltimateTicTacToe:
15
+ """
16
+ A very, very simple game of ConnectX in which we have:
17
+ rows: 1
18
+ columns: 4
19
+ winNumber: 2
20
+ """
21
+
22
+ def __init__(self):
23
+ self.cells = 81
24
+ self.board_width = 9
25
+ self.state_planes = 9
26
+
27
+ def get_init_board(self):
28
+ b = np.zeros((self.cells,), dtype=int)
29
+ return (b, None)
30
+
31
+ def get_board_size(self):
32
+ return (self.state_planes, self.board_width, self.board_width)
33
+
34
+ def get_action_size(self):
35
+ return self.cells
36
+
37
+ def get_next_state(self, board, player, action, verify_move=False):
38
+ if verify_move:
39
+ if self.get_valid_moves(board)[action] == 0:
40
+ return False
41
+ new_board_data = np.copy(board[0])
42
+ new_board_data[action] = player
43
+
44
+ next_board = ((action // 9) % 3) * 3 + (action % 3)
45
+ next_board = next_board if not self.is_board_full(new_board_data, next_board) else None
46
+
47
+ # Return the new game, but
48
+ # change the perspective of the game with negative
49
+ return ((new_board_data, next_board), -player)
50
+
51
+ def is_board_full(self, board_data, next_board):
52
+ return self._is_small_board_win(board_data, next_board, 1) or self._is_small_board_win(board_data, next_board, -1) or self._is_board_full(board_data, next_board)
53
+
54
+ def _small_board_cells(self, inner_board_idx):
55
+ row_block = inner_board_idx // 3
56
+ col_block = inner_board_idx % 3
57
+
58
+ base = row_block * 27 + col_block * 3
59
+
60
+ return [
61
+ base, base + 1, base + 2,
62
+ base + 9, base + 10, base + 11,
63
+ base + 18, base + 19, base + 20
64
+ ]
65
+
66
+ def _is_board_full(self, board_data, next_board):
67
+ # Check if it is literally full
68
+ cells = self._small_board_cells(next_board)
69
+
70
+ for a in cells:
71
+ if board_data[a] == 0:
72
+ return False
73
+ return True
74
+
75
+ def _is_playable_small_board(self, board_data, inner_board_idx):
76
+ return not self.is_board_full(board_data, inner_board_idx)
77
+
78
+ def has_legal_moves(self, board):
79
+ valid_moves = self.get_valid_moves(board)
80
+ for i in valid_moves:
81
+ if i == 1:
82
+ return True
83
+ return False
84
+
85
+ def get_valid_moves(self, board):
86
+ # All moves are invalid by default
87
+ board_data, active_board = board
88
+ valid_moves = [0] * self.get_action_size()
89
+
90
+ if active_board is not None and not self._is_playable_small_board(board_data, active_board):
91
+ active_board = None
92
+
93
+ if active_board is None:
94
+ playable_boards = [
95
+ inner_board_idx
96
+ for inner_board_idx in range(9)
97
+ if self._is_playable_small_board(board_data, inner_board_idx)
98
+ ]
99
+ for inner_board_idx in playable_boards:
100
+ for index in self._small_board_cells(inner_board_idx):
101
+ if board_data[index] == 0:
102
+ valid_moves[index] = 1
103
+ else:
104
+ for index in self._small_board_cells(active_board):
105
+ if board_data[index] == 0:
106
+ valid_moves[index] = 1
107
+
108
+ return valid_moves
109
+
110
+ def _is_small_board_win(self, board_data, inner_board_idx, player):
111
+ cells = self._small_board_cells(inner_board_idx)
112
+
113
+ for a, b, c in WIN_PATTERNS:
114
+ if board_data[cells[a]] == board_data[cells[b]] == board_data[cells[c]] == player:
115
+ return True
116
+
117
+ return False
118
+
119
+ def is_win(self, board, player):
120
+ board_data, _ = board
121
+ won = [self._is_small_board_win(board_data, i, player) for i in range(9)]
122
+
123
+ # Check if any winning combination is all 1s
124
+ for a, b, c in WIN_PATTERNS:
125
+ if won[a] and won[b] and won[c]:
126
+ return True
127
+
128
+ return False
129
+
130
+ def get_reward_for_player(self, board, player):
131
+ # return None if not ended, 1 if player 1 wins, -1 if player 1 lost
132
+
133
+ if self.is_win(board, player):
134
+ return 1
135
+ if self.is_win(board, -player):
136
+ return -1
137
+ if self.has_legal_moves(board):
138
+ return None
139
+
140
+ return 0
141
+
142
+ def get_canonical_board_data(self, board_data, player):
143
+ return player * board_data
144
+
145
+ def _small_board_mask(self, inner_board_idx):
146
+ mask = np.zeros((self.board_width, self.board_width), dtype=np.float32)
147
+ for index in self._small_board_cells(inner_board_idx):
148
+ row = index // self.board_width
149
+ col = index % self.board_width
150
+ mask[row, col] = 1.0
151
+ return mask
152
+
153
+ def encode_state(self, board):
154
+ board_data, active_board = board
155
+ board_grid = board_data.reshape(self.board_width, self.board_width)
156
+
157
+ current_stones = (board_grid == 1).astype(np.float32)
158
+ opponent_stones = (board_grid == -1).astype(np.float32)
159
+ empty_cells = (board_grid == 0).astype(np.float32)
160
+ legal_moves = np.array(self.get_valid_moves(board), dtype=np.float32).reshape(self.board_width, self.board_width)
161
+
162
+ active_board_mask = np.zeros((self.board_width, self.board_width), dtype=np.float32)
163
+ if active_board is not None and self._is_playable_small_board(board_data, active_board):
164
+ active_board_mask = self._small_board_mask(active_board)
165
+
166
+ current_won_boards = np.zeros((self.board_width, self.board_width), dtype=np.float32)
167
+ opponent_won_boards = np.zeros((self.board_width, self.board_width), dtype=np.float32)
168
+ playable_boards = np.zeros((self.board_width, self.board_width), dtype=np.float32)
169
+
170
+ for inner_board_idx in range(9):
171
+ board_mask = self._small_board_mask(inner_board_idx)
172
+ if self._is_small_board_win(board_data, inner_board_idx, 1):
173
+ current_won_boards += board_mask
174
+ elif self._is_small_board_win(board_data, inner_board_idx, -1):
175
+ opponent_won_boards += board_mask
176
+
177
+ if self._is_playable_small_board(board_data, inner_board_idx):
178
+ playable_boards += board_mask
179
+
180
+ move_count = np.count_nonzero(board_data) / self.cells
181
+ move_count_plane = np.full((self.board_width, self.board_width), move_count, dtype=np.float32)
182
+
183
+ return np.stack(
184
+ (
185
+ current_stones,
186
+ opponent_stones,
187
+ empty_cells,
188
+ legal_moves,
189
+ active_board_mask,
190
+ current_won_boards,
191
+ opponent_won_boards,
192
+ playable_boards,
193
+ move_count_plane,
194
+ ),
195
+ axis=0,
196
+ )