Instructions to use doraking/AlphaQuoridor with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Keras
How to use doraking/AlphaQuoridor with Keras:
# Available backend options are: "jax", "torch", "tensorflow". import os os.environ["KERAS_BACKEND"] = "jax" import keras model = keras.saving.load_model("hf://doraking/AlphaQuoridor") - Notebooks
- Google Colab
- Kaggle
| # ==================== | |
| # Quoridor (3 x 3), wall = 1 | |
| # ==================== | |
| # Importing packages | |
| import random | |
| import math | |
| from collections import deque | |
| import copy | |
| from copy import deepcopy | |
| # Game state | |
| class State: | |
| def __init__(self, board_size=3, num_walls=1, player=None, enemy=None, walls=None, depth=0): | |
| self.N = board_size | |
| N = self.N | |
| if N % 2 == 0: | |
| raise ValueError('The board size must be an odd number.') | |
| self.directions = [(-1, 0), (1, 0), (0, -1), (0, 1)] | |
| self.player = player if player != None else [0] * 2 # Position, number of walls | |
| self.enemy = enemy if enemy != None else [0] * 2 | |
| self.walls = walls if walls != None else [0] * ((N - 1) ** 2) | |
| self.depth = depth | |
| self.draw_depth = 30 | |
| if player == None or enemy == None: | |
| init_pos = N * (N - 1) + N // 2 | |
| self.player[0] = init_pos | |
| self.player[1] = num_walls | |
| self.enemy[0] = init_pos | |
| self.enemy[1] = num_walls | |
| # Check if it's a loss | |
| def is_lose(self): | |
| if self.enemy[0] // self.N == 0: | |
| return True | |
| return False | |
| # Check if it's a draw | |
| def is_draw(self): | |
| return self.depth >= self.draw_depth | |
| # Check if the game is over | |
| def is_done(self): | |
| return self.is_lose() or self.is_draw() | |
| def pieces_array(self): | |
| N = self.N | |
| def pieces_of(pieces): | |
| tables = [] | |
| table = [0] * (N ** 2) | |
| table[pieces[0]] = 1 | |
| tables.append(table) | |
| table = [pieces[1]] * (N ** 2) | |
| tables.append(table) | |
| return tables | |
| def walls_of(walls): | |
| tables = [] | |
| table_h = [0] * (N ** 2) | |
| table_v = [0] * (N ** 2) | |
| for wp in range((N - 1) ** 2): | |
| x, y = wp // (N - 1), wp % (N - 1) | |
| if x < (N - 1) // 2 and y < (N - 1) // 2: | |
| pos = N * x + y | |
| elif x > (N - 1) // 2 and y < (N - 1) // 2: | |
| pos = N * x + (y + 1) | |
| elif x < (N - 1) // 2 and y > (N - 1) // 2: | |
| pos = N * (x + 1) + y | |
| else: | |
| pos = N * (x + 1) + (y + 1) | |
| if walls[wp] == 1: | |
| table_h[pos] = 1 | |
| elif walls[wp] == 2: | |
| table_v[pos] = 1 | |
| tables.append(table_h) | |
| tables.append(table_v) | |
| return tables | |
| return [pieces_of(self.player), pieces_of(self.enemy), walls_of(self.walls)] | |
| def legal_actions(self): | |
| """ | |
| 0 - (N ** 2 - 1): Move to a position | |
| N ** 2- (N ** 2 + (N - 1) ** 2 - 1): Place a horizontal wall | |
| (N ** 2 + (N - 1) ** 2) - (N ** 2 + 2 * (N - 1) ** 2 - 1): Place a vertical wall | |
| """ | |
| actions = [] | |
| actions.extend(self.legal_actions_pos(self.player[0])) | |
| if self.player[1] > 0: | |
| for pos in range((self.N - 1) ** 2): | |
| actions.extend(self.legal_actions_wall(pos)) | |
| return actions | |
| def legal_actions_pos(self, pos): | |
| actions = [] | |
| N = self.N | |
| walls = self.walls | |
| ep = self.enemy[0] | |
| x, y = pos // N, pos % N | |
| for dx, dy in self.directions: | |
| nx, ny = x + dx, y + dy | |
| if 0 <= nx < N and 0 <= ny < N: | |
| np = N * nx + ny | |
| wp = (N - 1) * nx + ny | |
| if nx < x: | |
| if y == 0: | |
| if walls[wp] != 1: | |
| if np + ep != N ** 2 - 1: | |
| actions.append(np) | |
| else: | |
| if nx > 0 and walls[wp - (N - 1)] != 1: | |
| nnp = np - N | |
| actions.append(nnp) | |
| elif (nx == 0 and walls[wp] != 2) or (nx > 0 and walls[wp - (N - 1)] != 2 and walls[wp] != 2): | |
| nnp = np + 1 | |
| actions.append(nnp) | |
| elif y == (N - 1): | |
| if walls[wp - 1] != 1: | |
| if np + ep != N ** 2 - 1: | |
| actions.append(np) | |
| else: | |
| if nx > 0 and walls[wp - (N - 1) - 1] != 1: | |
| nnp = np - N | |
| actions.append(nnp) | |
| elif (nx == 0 and walls[wp - 1] != 2) or (nx > 0 and walls[wp - (N - 1) - 1] != 2 and walls[wp - 1] != 2): | |
| nnp = np - 1 | |
| actions.append(nnp) | |
| else: | |
| if walls[wp - 1] != 1 and walls[wp] != 1: | |
| if np + ep != N ** 2 - 1: | |
| actions.append(np) | |
| else: | |
| if nx > 0 and walls[wp - (N - 1)] != 1 and walls[wp - (N - 1) - 1] != 1: | |
| nnp = np - N | |
| actions.append(nnp) | |
| else: | |
| if (nx == 0 and walls[wp - 1] != 2) or (nx > 0 and walls[wp - (N - 1) - 1] != 2 and walls[wp - 1] != 2): | |
| nnp = np - 1 | |
| actions.append(nnp) | |
| if (nx == 0 and walls[wp] != 2) or (nx > 0 and walls[wp - (N - 1)] != 2 and walls[wp] != 2): | |
| nnp = np + 1 | |
| actions.append(nnp) | |
| if nx > x: | |
| if y == 0: | |
| if walls[wp - (N - 1)] != 1: | |
| if np + ep != N ** 2 - 1: | |
| actions.append(np) | |
| else: | |
| if nx < (N - 1) and walls[wp] != 1: | |
| nnp = np + N | |
| actions.append(nnp) | |
| elif (nx == (N - 1) and walls[wp - (N - 1)] != 2) or (nx < (N - 1) and walls[wp - (N - 1)] != 2 and walls[wp] != 2): | |
| nnp = np + 1 | |
| actions.append(nnp) | |
| elif y == (N - 1): | |
| if walls[wp - (N - 1) - 1] != 1: | |
| if np + ep != N ** 2 - 1: | |
| actions.append(np) | |
| else: | |
| if nx < (N - 1) and walls[wp - 1] != 1: | |
| nnp = np + N | |
| actions.append(nnp) | |
| elif (nx == (N - 1) and walls[wp - (N - 1) - 1] != 2) or (nx < (N - 1) and walls[wp - (N - 1) - 1] != 2 and walls[wp - 1] != 2): | |
| nnp = np - 1 | |
| actions.append(nnp) | |
| else: | |
| if walls[wp - (N - 1) - 1] != 1 and walls[wp - (N - 1)] != 1: | |
| if np + ep != N ** 2 - 1: | |
| actions.append(np) | |
| else: | |
| if nx < (N - 1) and walls[wp - 1] != 1 and walls[wp] != 1: | |
| nnp = np + N | |
| actions.append(nnp) | |
| else: | |
| if (nx == (N - 1) and walls[wp - (N - 1) - 1] != 2) or (nx < (N - 1) and walls[wp - (N - 1) - 1] != 2 and walls[wp - 1] != 2): | |
| nnp = np - 1 | |
| actions.append(nnp) | |
| if (nx == (N - 1) and walls[wp - (N - 1)] != 2) or (nx < (N - 1) and walls[wp - (N - 1)] != 2 and walls[wp] != 2): | |
| nnp = np + 1 | |
| actions.append(nnp) | |
| if ny < y: | |
| if x == 0: | |
| if walls[wp] != 2: | |
| if np + ep != N ** 2 - 1: | |
| actions.append(np) | |
| else: | |
| if ny > 0 and walls[wp - 1] != 2: | |
| nnp = np - 1 | |
| actions.append(nnp) | |
| elif (ny == 0 and walls[wp] != 1) or (ny > 0 and walls[wp - 1] != 1 and walls[wp] != 1): | |
| nnp = np + N | |
| actions.append(nnp) | |
| elif x == (N - 1): | |
| if walls[wp - (N - 1)] != 2: | |
| if np + ep != N ** 2 - 1: | |
| actions.append(np) | |
| else: | |
| if ny > 0 and walls[wp - (N - 1) - 1] != 2: | |
| nnp = np - 1 | |
| actions.append(nnp) | |
| elif (ny == 0 and walls[wp - (N - 1)] != 1) or (ny > 0 and walls[wp - (N - 1) - 1] != 2 and walls[wp - (N - 1)] != 1): | |
| nnp = np - N | |
| actions.append(nnp) | |
| else: | |
| if walls[wp - (N - 1)] != 2 and walls[wp] != 2: | |
| if np + ep != N ** 2 - 1: | |
| actions.append(np) | |
| else: | |
| if ny > 0 and walls[wp - (N - 1) - 1] != 2 and walls[wp - 1] != 2: | |
| nnp = np - 1 | |
| actions.append(nnp) | |
| else: | |
| if (ny == 0 and walls[wp - (N - 1)] != 1) or (ny > 0 and walls[wp - (N - 1) - 1] != 2 and walls[wp - (N - 1)] != 1): | |
| nnp = np - N | |
| actions.append(nnp) | |
| if (ny == 0 and walls[wp] != 1) or (ny > 0 and (walls[wp - 1] != 1 or walls[wp] != 1)): | |
| nnp = np + N | |
| actions.append(nnp) | |
| if ny > y: | |
| if x == 0: | |
| if walls[wp - 1] != 2: | |
| if np + ep != N ** 2 - 1: | |
| actions.append(np) | |
| else: | |
| if ny < (N - 1) and walls[wp] != 2: | |
| nnp = np + 1 | |
| actions.append(nnp) | |
| elif (ny == (N - 1) and walls[wp - 1] != 1) or (ny < (N - 1) and walls[wp - 1] != 1 and walls[wp] != 1): | |
| nnp = np + N | |
| actions.append(nnp) | |
| elif x == (N - 1): | |
| if walls[wp - (N - 1) - 1] != 2: | |
| if np + ep != N ** 2 - 1: | |
| actions.append(np) | |
| else: | |
| if ny < (N - 1) and walls[wp - (N - 1)] != 2: | |
| nnp = np + 1 | |
| actions.append(nnp) | |
| elif (ny == (N - 1) and walls[wp - (N - 1) - 1] != 1) or (ny < (N - 1) and walls[wp - (N - 1) - 1] != 1 and walls[wp - (N - 1)] != 1): | |
| nnp = np - N | |
| actions.append(nnp) | |
| else: | |
| if walls[wp - (N - 1) - 1] != 2 and walls[wp - 1] != 2: | |
| if np + ep != N ** 2 - 1: | |
| actions.append(np) | |
| else: | |
| if ny < (N - 1) and walls[wp - (N - 1)] != 2 and walls[wp] != 2: | |
| nnp = np + 1 | |
| actions.append(nnp) | |
| else: | |
| if (ny == (N - 1) and walls[wp - (N - 1) - 1] != 1) or (ny < (N - 1) and walls[wp - (N - 1) - 1] != 1 and walls[wp - (N - 1)] != 1): | |
| nnp = np - N | |
| actions.append(nnp) | |
| if (ny == (N - 1) and walls[wp - 1] != 1) or (ny < (N - 1) and (walls[wp - 1] != 1 or walls[wp] != 1)): | |
| nnp = np + N | |
| actions.append(nnp) | |
| return actions | |
| def legal_actions_wall(self, pos): | |
| N = self.N | |
| walls = self.walls | |
| def can_place_wall(orientation, pos): | |
| if walls[pos] != 0: | |
| return False | |
| x, y = pos // (N - 1), pos % (N - 1) | |
| if orientation == 1: | |
| if y == 0: | |
| if walls[pos + 1] == 1: | |
| return False | |
| elif y == (N - 2): | |
| if walls[pos - 1] == 1: | |
| return False | |
| else: | |
| if walls[pos - 1] == 1 or walls[pos + 1] == 1: | |
| return False | |
| else: | |
| if x == 0: | |
| if walls[pos + (N - 1)] == 2: | |
| return False | |
| elif x == (N - 2): | |
| if walls[pos - (N - 1)] == 2: | |
| return False | |
| else: | |
| if walls[pos - (N - 1)] == 2 or walls[pos + (N - 1)] == 2: | |
| return False | |
| return True | |
| def can_reach_goal(orientation, pos): | |
| def bfs(state): | |
| queue = deque([state.player[0]]) | |
| visited = set() | |
| while queue: | |
| pos = queue.popleft() | |
| nps = state.legal_actions_pos(pos) | |
| for np in nps: | |
| x, y = np // N, np % N | |
| if y == 0: | |
| return True | |
| if np not in visited: | |
| visited.add(np) | |
| queue.append(np) | |
| return False | |
| self.walls[pos] = orientation | |
| player_state = State(board_size=N, player=self.player.copy(), enemy=self.enemy.copy(), walls=deepcopy(self.walls), depth=self.depth) | |
| can_reach_player = bfs(player_state) | |
| action = pos | |
| if orientation == 1: | |
| action += N ** 2 | |
| else: | |
| action += N ** 2 + (N - 1) ** 2 | |
| enemy_state = player_state.next(action) | |
| can_reach_enemy = bfs(enemy_state) | |
| self.walls[pos] = 0 | |
| return can_reach_player and can_reach_enemy | |
| actions = [] | |
| if can_place_wall(1, pos) and can_reach_goal(1, pos): | |
| actions.append(N ** 2 + pos) | |
| if can_place_wall(2, pos) and can_reach_goal(2, pos): | |
| actions.append(N ** 2 + (N - 1) ** 2 + pos) | |
| return actions | |
| def rotate_walls(self): | |
| N = self.N | |
| rotated_walls = [0] * len(self.walls) | |
| for i in range((N - 1) ** 2): | |
| rotated_walls[i] = self.walls[(N - 1) ** 2 - 1 - i] | |
| self.walls = rotated_walls | |
| def next(self, action): | |
| N = self.N | |
| # Create the next state | |
| state = State(board_size=N, player=self.player.copy(), enemy=self.enemy.copy(), walls=deepcopy(self.walls), depth=self.depth + 1) | |
| if action < N ** 2: | |
| # Move piece | |
| state.player[0] = action | |
| elif action < N ** 2 + (N - 1) ** 2: | |
| # Place horizontal wall | |
| pos = action - N ** 2 | |
| state.walls[pos] = 1 | |
| state.player[1] -= 1 | |
| else: | |
| # Place vertical wall | |
| pos = action - N ** 2 - (N - 1) ** 2 | |
| state.walls[pos] = 2 | |
| state.player[1] -= 1 | |
| state.rotate_walls() | |
| # Swap players | |
| state.player, state.enemy = state.enemy, state.player | |
| return state | |
| # Check if it's the first player's turn | |
| def is_first_player(self): | |
| return self.depth % 2 == 0 | |
| def __str__(self): | |
| """Display the game state as a string.""" | |
| N = self.N | |
| is_first_player = self.is_first_player() | |
| board = [['o'] * (2 * N - 1) for _ in range(2 * N - 1)] | |
| for i in range(2 * N - 1): | |
| for j in range(2 * N - 1): | |
| if i % 2 == 1 and j % 2 == 1: | |
| board[i][j] = 'x' | |
| p_pos = self.player[0] if is_first_player else self.enemy[0] | |
| e_pos = self.enemy[0] if is_first_player else self.player[0] | |
| e_pos = N ** 2 - 1 - e_pos | |
| p_x, p_y = p_pos // N, p_pos % N | |
| e_x, e_y = e_pos // N, e_pos % N | |
| board[2 * p_x][2 * p_y] = 'P' | |
| board[2 * e_x][2 * e_y] = 'E' | |
| turn_info = "<Enemy's Turn>" if is_first_player else "<Player's Turn>" | |
| if not is_first_player: | |
| self.rotate_walls() | |
| # Set walls | |
| for i in range(N - 1): | |
| for j in range(N - 1): | |
| pos = i * (N - 1) + j | |
| if self.walls[pos] == 1: | |
| board[2 * i + 1][2 * j] = '-' | |
| board[2 * i + 1][2 * (j + 1)] = '-' | |
| if self.walls[pos] == 2: | |
| board[2 * i][2 * j + 1] = '|' | |
| board[2 * (i + 1)][2 * j + 1] = '|' | |
| if not is_first_player: | |
| self.rotate_walls() | |
| board_str = '\n'.join([''.join(row) for row in board]) | |
| return turn_info + '\n' + board_str | |
| # Randomly select an action | |
| def random_action(state): | |
| legal_actions = state.legal_actions() | |
| action = random.randint(0, len(legal_actions) - 1) | |
| return legal_actions[action] | |
| # Calculate state value using alpha-beta pruning | |
| def alpha_beta(state, alpha, beta): | |
| # Loss is -1 | |
| if state.is_lose(): | |
| return -1 | |
| # Draw is 0 | |
| if state.is_draw(): | |
| return 0 | |
| # Calculate state values for legal actions | |
| for action in state.legal_actions(): | |
| score = -alpha_beta(state.next(action), -beta, -alpha) | |
| if score > alpha: | |
| alpha = score | |
| # If the best score for the current node exceeds the parent node, stop the search | |
| if alpha >= beta: | |
| return alpha | |
| # Return the maximum value of the state values for legal actions | |
| return alpha | |
| # Select an action using alpha-beta pruning | |
| def alpha_beta_action(state): | |
| # Calculate state values for legal actions | |
| best_action = 0 | |
| alpha = -float('inf') | |
| for action in state.legal_actions(): | |
| score = -alpha_beta(state.next(action), -float('inf'), -alpha) | |
| if score > alpha: | |
| best_action = action | |
| alpha = score | |
| # Return the action with the maximum state value | |
| return best_action | |
| # Playout | |
| def playout(state): | |
| # Loss is -1 | |
| if state.is_lose(): | |
| return -1 | |
| # Draw is 0 | |
| if state.is_draw(): | |
| return 0 | |
| # Next state value | |
| return -playout(state.next(random_action(state))) | |
| # Return the index of the maximum value | |
| def argmax(collection): | |
| return collection.index(max(collection)) | |
| # Select an action using Monte Carlo Tree Search | |
| def mcts_action(state): | |
| # Node for Monte Carlo Tree Search | |
| class node: | |
| # Initialization | |
| def __init__(self, state): | |
| self.state = state # State | |
| self.w = 0 # Cumulative value | |
| self.n = 0 # Number of trials | |
| self.child_nodes = None # Child nodes | |
| # Evaluation | |
| def evaluate(self): | |
| # When the game ends | |
| if self.state.is_done(): | |
| # Get value from the game result | |
| value = -1 if self.state.is_lose() else 0 # Loss is -1, draw is 0 | |
| # Update cumulative value and number of trials | |
| self.w += value | |
| self.n += 1 | |
| return value | |
| # When there are no child nodes | |
| if not self.child_nodes: | |
| # Get value from playout | |
| value = playout(self.state) | |
| # Update cumulative value and number of trials | |
| self.w += value | |
| self.n += 1 | |
| # Expand child nodes | |
| if self.n == 10: | |
| self.expand() | |
| return value | |
| # When there are child nodes | |
| else: | |
| # Get value from evaluating the child node with the maximum UCB1 | |
| value = -self.next_child_node().evaluate() | |
| # Update cumulative value and number of trials | |
| self.w += value | |
| self.n += 1 | |
| return value | |
| # Expand child nodes | |
| def expand(self): | |
| legal_actions = self.state.legal_actions() | |
| self.child_nodes = [] | |
| for action in legal_actions: | |
| self.child_nodes.append(node(self.state.next(action))) | |
| # Get the child node with the maximum UCB1 | |
| def next_child_node(self): | |
| # Return the child node with n=0 | |
| for child_node in self.child_nodes: | |
| if child_node.n == 0: | |
| return child_node | |
| # Calculate UCB1 | |
| t = 0 | |
| for c in self.child_nodes: | |
| t += c.n | |
| ucb1_values = [] | |
| for child_node in self.child_nodes: | |
| ucb1_values.append(-child_node.w/child_node.n + 2*(2*math.log(t)/child_node.n)**0.5) | |
| # Return the child node with the maximum UCB1 | |
| return self.child_nodes[argmax(ucb1_values)] | |
| # Generate the root node | |
| root_node = node(state) | |
| root_node.expand() | |
| # Evaluate the root node 100 times | |
| for _ in range(100): | |
| root_node.evaluate() | |
| # Return the action with the maximum number of trials | |
| legal_actions = state.legal_actions() | |
| n_list = [] | |
| for c in root_node.child_nodes: | |
| n_list.append(c.n) | |
| return legal_actions[argmax(n_list)] | |
| # Running the function | |
| if __name__ == '__main__': | |
| # Generate the state | |
| state = State() | |
| # Loop until the game ends | |
| while True: | |
| # When the game ends | |
| if state.is_done(): | |
| break | |
| # Get the next state | |
| state = state.next(random_action(state)) | |
| # Display as a string | |
| print(state) | |
| print() | |