File size: 2,723 Bytes
2437f34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# ====================
# Self-Play Part
# ====================

# Importing packages
from game import State
from pv_mcts import pv_mcts_scores
from dual_network import DN_OUTPUT_SIZE
from datetime import datetime
from tensorflow.keras.models import load_model
from tensorflow.keras import backend as K
from pathlib import Path
import numpy as np
import pickle
import os
from copy import deepcopy

# Preparing parameters
SP_GAME_COUNT = 50  # Number of games for self-play (25000 in the original version)
SP_TEMPERATURE = 1.0  # Temperature parameter for Boltzmann distribution

# Value of the first player
def first_player_value(ended_state):
    # 1: First player wins, -1: First player loses, 0: Draw
    if ended_state.is_lose():
        return -1 if ended_state.is_first_player() else 1
    return 0

# Saving training data
def write_data(history):
    now = datetime.now()
    os.makedirs('./data/', exist_ok=True)  # Create folder if it does not exist
    path = './data/{:04}{:02}{:02}{:02}{:02}{:02}.history'.format(
        now.year, now.month, now.day, now.hour, now.minute, now.second)
    with open(path, mode='wb') as f:
        pickle.dump(history, f)

# Executing one game
def play(model):
    # Training data
    history = []

    # Generating the state
    state = State()

    while True:
        # When the game ends
        if state.is_done():
            break

        # Getting the probability distribution of legal moves
        scores = pv_mcts_scores(model, deepcopy(state), SP_TEMPERATURE)

        # Adding the state and policy to the training data
        policies = [0] * DN_OUTPUT_SIZE
        for action, policy in zip(state.legal_actions(), scores):
            policies[action] = policy
        history.append([state.pieces_array(), policies, None])

        # Getting the action
        action = np.random.choice(state.legal_actions(), p=scores)

        # Getting the next state
        state = state.next(action)

    # Adding the value to the training data
    value = first_player_value(state)
    for i in range(len(history)):
        history[i][2] = value
        value = -value
    return history

# Self-Play
def self_play():
    # Training data
    history = []

    # Loading the best player's model
    model = load_model('./model/best.keras')

    # Executing multiple games
    for i in range(SP_GAME_COUNT):
        # Executing one game
        h = play(model)
        history.extend(h)

        # Output
        print('\rSelfPlay {}/{}'.format(i+1, SP_GAME_COUNT), end='')
    print('')

    # Saving the training data
    write_data(history)

    # Clearing the model
    K.clear_session()
    del model

# Running the function
if __name__ == '__main__':
    self_play()