AlphaQuoridor / evaluate_network.py
doraking's picture
Upload 10 files
2437f34 verified
# ====================
# New Parameter Evaluation Section
# ====================
# Import packages
from game import State
from pv_mcts import pv_mcts_action
from tensorflow.keras.models import load_model
from tensorflow.keras import backend as K
from pathlib import Path
from shutil import copy
import numpy as np
# Prepare parameters
EN_GAME_COUNT = 10 # Number of games per evaluation (originally 400)
EN_TEMPERATURE = 1.0 # Temperature of the Boltzmann distribution
# Points for the first player
def first_player_point(ended_state):
# 1: first player wins, 0: first player loses, 0.5: draw
if ended_state.is_lose():
return 0 if ended_state.is_first_player() else 1
return 0.5
# Execute one game
def play(next_actions):
# Generate state
state = State()
# Loop until the game ends
while True:
# When the game ends
if state.is_done():
break
# Get action
next_action = next_actions[0] if state.is_first_player() else next_actions[1]
action = next_action(state)
# Get the next state
state = state.next(action)
# Return points for the first player
return first_player_point(state)
# Replace the best player
def update_best_player():
copy('./model/latest.keras', './model/best.keras')
print('Change BestPlayer')
# Network evaluation
def evaluate_network():
# Load the model of the latest player
model0 = load_model('./model/latest.keras')
# Load the model of the best player
model1 = load_model('./model/best.keras')
# Generate a function to select actions using PV MCTS
next_action0 = pv_mcts_action(model0, EN_TEMPERATURE)
next_action1 = pv_mcts_action(model1, EN_TEMPERATURE)
next_actions = (next_action0, next_action1)
# Repeat multiple matches
total_point = 0
for i in range(EN_GAME_COUNT):
# Execute one game
if i % 2 == 0:
total_point += play(next_actions)
else:
total_point += 1 - play(list(reversed(next_actions)))
# Output
print('\rEvaluate {}/{}'.format(i + 1, EN_GAME_COUNT), end='')
print('')
# Calculate average points
average_point = total_point / EN_GAME_COUNT
print('AveragePoint', average_point)
# Clear models
K.clear_session()
del model0
del model1
# Replace the best player
if average_point > 0.5:
update_best_player()
return True
else:
return False
# Operation check
if __name__ == '__main__':
evaluate_network()