connect-4-API / arena.py
Gruhit Patel
init-backend
1fab54b verified
from game import Connect4
from agent import Agent
from model import Model
from mcts import MCTS_NN
from typing import Union
import tqdm
import numpy as np
def play_selfgames(agent: Agent, training_games: int):
for _ in tqdm(range(training_games)):
board = Connect4(row=agent.row, col=agent.col)
agent.reset(state = board)
# a buffer list to store the transition of current episode
episodic_buffer = []
while not board.is_win() and not board.is_draw():
# While getting the action the search is performed
# also the experience is stored in it
action, policy = agent.get_action()
episodic_buffer.append([
board.get_state(),
board.player_1,
policy
])
board, _ = board.drop_piece(action)
# Update the root node of MCTS to one of its child node
agent.update(action)
# When the episode is compelted update the buffer
agent.update_buffer(episodic_buffer)
def get_move_for_bot(state: Connect4, model: Model, tree_iters: int, random_move: bool = False) -> int:
mcts = MCTS_NN(state = state, model = model)
for _ in range(tree_iters):
mcts.selection(mcts.root, random_move)
policy = mcts.get_policy_pie()
act = np.argmax(policy)
return act
def play_game_against_bot(bot1: Model, bot2: Model, tree_iters:int) -> Union[None, int]:
board = Connect4()
player_1 = True
# In function bot1 will be always datagen model to make 1st move
# bot2 will be main_model to make 2nd move
# We randomly allow them to make first move based for 50% of time
flip = False
if np.random.uniform() < 0.5:
flip = True
(bot1, bot2) = (bot2, bot1)
print("Bot has been flipped")
while not board.is_win() and not board.is_draw():
if player_1:
act = get_move_for_bot(board, model=bot1, tree_iters=tree_iters)
player_1 = False
else:
act = get_move_for_bot(board, model=bot2, tree_iters=tree_iters)
player_1 = True
board, win = board.drop_piece(act)
print(board)
# Here returning
# 0 - draw
# 1 - datagen won
# -1 - main_model won
# Hence when flipped we have to handle the values accordingly
if flip:
# Thus if we have flipped then main_model who is player 1 if its has won
# then we want to return -1 for it and vice-versa
return 0 if win == None else win*-1
else:
return 0 if win == None else win