| import numpy as np |
|
|
| WIN_PATTERNS = [ |
| (0, 1, 2), |
| (3, 4, 5), |
| (6, 7, 8), |
| (0, 3, 6), |
| (1, 4, 7), |
| (2, 5, 8), |
| (0, 4, 8), |
| (2, 4, 6), |
| ] |
|
|
| class UltimateTicTacToe: |
| """ |
| A very, very simple game of ConnectX in which we have: |
| rows: 1 |
| columns: 4 |
| winNumber: 2 |
| """ |
|
|
| def __init__(self): |
| self.cells = 81 |
| self.board_width = 9 |
| self.state_planes = 9 |
|
|
| def get_init_board(self): |
| b = np.zeros((self.cells,), dtype=int) |
| return (b, None) |
|
|
| def get_board_size(self): |
| return (self.state_planes, self.board_width, self.board_width) |
|
|
| def get_action_size(self): |
| return self.cells |
|
|
| def get_next_state(self, board, player, action, verify_move=False): |
| if verify_move: |
| if self.get_valid_moves(board)[action] == 0: |
| return False |
| new_board_data = np.copy(board[0]) |
| new_board_data[action] = player |
|
|
| next_board = ((action // 9) % 3) * 3 + (action % 3) |
| next_board = next_board if not self.is_board_full(new_board_data, next_board) else None |
|
|
| |
| |
| return ((new_board_data, next_board), -player) |
|
|
| def is_board_full(self, board_data, next_board): |
| return self._is_small_board_win(board_data, next_board, 1) or self._is_small_board_win(board_data, next_board, -1) or self._is_board_full(board_data, next_board) |
|
|
| def _small_board_cells(self, inner_board_idx): |
| row_block = inner_board_idx // 3 |
| col_block = inner_board_idx % 3 |
|
|
| base = row_block * 27 + col_block * 3 |
|
|
| return [ |
| base, base + 1, base + 2, |
| base + 9, base + 10, base + 11, |
| base + 18, base + 19, base + 20 |
| ] |
|
|
| def _is_board_full(self, board_data, next_board): |
| |
| cells = self._small_board_cells(next_board) |
|
|
| for a in cells: |
| if board_data[a] == 0: |
| return False |
| return True |
|
|
| def _is_playable_small_board(self, board_data, inner_board_idx): |
| return not self.is_board_full(board_data, inner_board_idx) |
| |
| def has_legal_moves(self, board): |
| valid_moves = self.get_valid_moves(board) |
| for i in valid_moves: |
| if i == 1: |
| return True |
| return False |
|
|
| def get_valid_moves(self, board): |
| |
| board_data, active_board = board |
| valid_moves = [0] * self.get_action_size() |
|
|
| if active_board is not None and not self._is_playable_small_board(board_data, active_board): |
| active_board = None |
|
|
| if active_board is None: |
| playable_boards = [ |
| inner_board_idx |
| for inner_board_idx in range(9) |
| if self._is_playable_small_board(board_data, inner_board_idx) |
| ] |
| for inner_board_idx in playable_boards: |
| for index in self._small_board_cells(inner_board_idx): |
| if board_data[index] == 0: |
| valid_moves[index] = 1 |
| else: |
| for index in self._small_board_cells(active_board): |
| if board_data[index] == 0: |
| valid_moves[index] = 1 |
|
|
| return valid_moves |
|
|
| def _is_small_board_win(self, board_data, inner_board_idx, player): |
| cells = self._small_board_cells(inner_board_idx) |
| |
| for a, b, c in WIN_PATTERNS: |
| if board_data[cells[a]] == board_data[cells[b]] == board_data[cells[c]] == player: |
| return True |
| |
| return False |
| |
| def is_win(self, board, player): |
| board_data, _ = board |
| won = [self._is_small_board_win(board_data, i, player) for i in range(9)] |
| |
| |
| for a, b, c in WIN_PATTERNS: |
| if won[a] and won[b] and won[c]: |
| return True |
| |
| return False |
|
|
| def get_reward_for_player(self, board, player): |
| |
|
|
| if self.is_win(board, player): |
| return 1 |
| if self.is_win(board, -player): |
| return -1 |
| if self.has_legal_moves(board): |
| return None |
|
|
| return 0 |
|
|
| def get_canonical_board_data(self, board_data, player): |
| return player * board_data |
|
|
| def _small_board_mask(self, inner_board_idx): |
| mask = np.zeros((self.board_width, self.board_width), dtype=np.float32) |
| for index in self._small_board_cells(inner_board_idx): |
| row = index // self.board_width |
| col = index % self.board_width |
| mask[row, col] = 1.0 |
| return mask |
|
|
| def encode_state(self, board): |
| board_data, active_board = board |
| board_grid = board_data.reshape(self.board_width, self.board_width) |
|
|
| current_stones = (board_grid == 1).astype(np.float32) |
| opponent_stones = (board_grid == -1).astype(np.float32) |
| empty_cells = (board_grid == 0).astype(np.float32) |
| legal_moves = np.array(self.get_valid_moves(board), dtype=np.float32).reshape(self.board_width, self.board_width) |
|
|
| active_board_mask = np.zeros((self.board_width, self.board_width), dtype=np.float32) |
| if active_board is not None and self._is_playable_small_board(board_data, active_board): |
| active_board_mask = self._small_board_mask(active_board) |
|
|
| current_won_boards = np.zeros((self.board_width, self.board_width), dtype=np.float32) |
| opponent_won_boards = np.zeros((self.board_width, self.board_width), dtype=np.float32) |
| playable_boards = np.zeros((self.board_width, self.board_width), dtype=np.float32) |
|
|
| for inner_board_idx in range(9): |
| board_mask = self._small_board_mask(inner_board_idx) |
| if self._is_small_board_win(board_data, inner_board_idx, 1): |
| current_won_boards += board_mask |
| elif self._is_small_board_win(board_data, inner_board_idx, -1): |
| opponent_won_boards += board_mask |
|
|
| if self._is_playable_small_board(board_data, inner_board_idx): |
| playable_boards += board_mask |
|
|
| move_count = np.count_nonzero(board_data) / self.cells |
| move_count_plane = np.full((self.board_width, self.board_width), move_count, dtype=np.float32) |
|
|
| return np.stack( |
| ( |
| current_stones, |
| opponent_stones, |
| empty_cells, |
| legal_moves, |
| active_board_mask, |
| current_won_boards, |
| opponent_won_boards, |
| playable_boards, |
| move_count_plane, |
| ), |
| axis=0, |
| ) |
|
|