doraking
/

AlphaQuoridor

Model card Files Files and versions

AlphaQuoridor / evaluate_network.py

doraking's picture

Upload 10 files

2437f34 verified almost 2 years ago

history blame contribute delete

2.55 kB

	# ====================
	# New Parameter Evaluation Section
	# ====================

	# Import packages
	from game import State
	from pv_mcts import pv_mcts_action
	from tensorflow.keras.models import load_model
	from tensorflow.keras import backend as K
	from pathlib import Path
	from shutil import copy
	import numpy as np

	# Prepare parameters
	EN_GAME_COUNT = 10 # Number of games per evaluation (originally 400)
	EN_TEMPERATURE = 1.0 # Temperature of the Boltzmann distribution

	# Points for the first player
	def first_player_point(ended_state):
	# 1: first player wins, 0: first player loses, 0.5: draw
	if ended_state.is_lose():
	return 0 if ended_state.is_first_player() else 1
	return 0.5

	# Execute one game
	def play(next_actions):
	# Generate state
	state = State()

	# Loop until the game ends
	while True:
	# When the game ends
	if state.is_done():
	break

	# Get action
	next_action = next_actions[0] if state.is_first_player() else next_actions[1]
	action = next_action(state)

	# Get the next state
	state = state.next(action)

	# Return points for the first player
	return first_player_point(state)

	# Replace the best player
	def update_best_player():
	copy('./model/latest.keras', './model/best.keras')
	print('Change BestPlayer')

	# Network evaluation
	def evaluate_network():
	# Load the model of the latest player
	model0 = load_model('./model/latest.keras')

	# Load the model of the best player
	model1 = load_model('./model/best.keras')

	# Generate a function to select actions using PV MCTS
	next_action0 = pv_mcts_action(model0, EN_TEMPERATURE)
	next_action1 = pv_mcts_action(model1, EN_TEMPERATURE)
	next_actions = (next_action0, next_action1)

	# Repeat multiple matches
	total_point = 0
	for i in range(EN_GAME_COUNT):
	# Execute one game
	if i % 2 == 0:
	total_point += play(next_actions)
	else:
	total_point += 1 - play(list(reversed(next_actions)))

	# Output
	print('\rEvaluate {}/{}'.format(i + 1, EN_GAME_COUNT), end='')
	print('')

	# Calculate average points
	average_point = total_point / EN_GAME_COUNT
	print('AveragePoint', average_point)

	# Clear models
	K.clear_session()
	del model0
	del model1

	# Replace the best player
	if average_point > 0.5:
	update_best_player()
	return True
	else:
	return False

	# Operation check
	if __name__ == '__main__':
	evaluate_network()