Instructions to use doraking/AlphaQuoridor with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Keras
How to use doraking/AlphaQuoridor with Keras:
# Available backend options are: "jax", "torch", "tensorflow". import os os.environ["KERAS_BACKEND"] = "jax" import keras model = keras.saving.load_model("hf://doraking/AlphaQuoridor") - Notebooks
- Google Colab
- Kaggle
| # ==================== | |
| # Self-Play Part | |
| # ==================== | |
| # Importing packages | |
| from game import State | |
| from pv_mcts import pv_mcts_scores | |
| from dual_network import DN_OUTPUT_SIZE | |
| from datetime import datetime | |
| from tensorflow.keras.models import load_model | |
| from tensorflow.keras import backend as K | |
| from pathlib import Path | |
| import numpy as np | |
| import pickle | |
| import os | |
| from copy import deepcopy | |
| # Preparing parameters | |
| SP_GAME_COUNT = 50 # Number of games for self-play (25000 in the original version) | |
| SP_TEMPERATURE = 1.0 # Temperature parameter for Boltzmann distribution | |
| # Value of the first player | |
| def first_player_value(ended_state): | |
| # 1: First player wins, -1: First player loses, 0: Draw | |
| if ended_state.is_lose(): | |
| return -1 if ended_state.is_first_player() else 1 | |
| return 0 | |
| # Saving training data | |
| def write_data(history): | |
| now = datetime.now() | |
| os.makedirs('./data/', exist_ok=True) # Create folder if it does not exist | |
| path = './data/{:04}{:02}{:02}{:02}{:02}{:02}.history'.format( | |
| now.year, now.month, now.day, now.hour, now.minute, now.second) | |
| with open(path, mode='wb') as f: | |
| pickle.dump(history, f) | |
| # Executing one game | |
| def play(model): | |
| # Training data | |
| history = [] | |
| # Generating the state | |
| state = State() | |
| while True: | |
| # When the game ends | |
| if state.is_done(): | |
| break | |
| # Getting the probability distribution of legal moves | |
| scores = pv_mcts_scores(model, deepcopy(state), SP_TEMPERATURE) | |
| # Adding the state and policy to the training data | |
| policies = [0] * DN_OUTPUT_SIZE | |
| for action, policy in zip(state.legal_actions(), scores): | |
| policies[action] = policy | |
| history.append([state.pieces_array(), policies, None]) | |
| # Getting the action | |
| action = np.random.choice(state.legal_actions(), p=scores) | |
| # Getting the next state | |
| state = state.next(action) | |
| # Adding the value to the training data | |
| value = first_player_value(state) | |
| for i in range(len(history)): | |
| history[i][2] = value | |
| value = -value | |
| return history | |
| # Self-Play | |
| def self_play(): | |
| # Training data | |
| history = [] | |
| # Loading the best player's model | |
| model = load_model('./model/best.keras') | |
| # Executing multiple games | |
| for i in range(SP_GAME_COUNT): | |
| # Executing one game | |
| h = play(model) | |
| history.extend(h) | |
| # Output | |
| print('\rSelfPlay {}/{}'.format(i+1, SP_GAME_COUNT), end='') | |
| print('') | |
| # Saving the training data | |
| write_data(history) | |
| # Clearing the model | |
| K.clear_session() | |
| del model | |
| # Running the function | |
| if __name__ == '__main__': | |
| self_play() | |