Spaces:

Gomoku-Zero
/

Demo

Sleeping

App Files Files Community

HuskyDoge commited on Dec 21, 2023

Commit

d97a106

1 Parent(s): ef116e6

AI VS AI bugs

Browse files

Files changed (5) hide show

Gomoku_MCTS/policy_value_net_pytorch_new.py +4 -4
const.py +17 -2
pages/AI_VS_AI.py +256 -188
pages/Player_VS_AI.py +59 -18
pages/Try.py +17 -0

Gomoku_MCTS/policy_value_net_pytorch_new.py CHANGED Viewed

@@ -20,8 +20,6 @@ def set_learning_rate(optimizer, lr):
         param_group['lr'] = lr
 class ResidualBlock(nn.Module):
     def __init__(self, channels):
         super(ResidualBlock, self).__init__()
@@ -37,8 +35,10 @@ class ResidualBlock(nn.Module):
         out += residual
         return F.relu(out)
 class Net(nn.Module):
     """Policy-Value network module for AlphaZero Gomoku."""
     def __init__(self, board_width, board_height, num_residual_blocks=5):
         super(Net, self).__init__()
         self.board_width = board_width
@@ -78,7 +78,7 @@ class PolicyValueNet():
     """policy-value network """
     def __init__(self, board_width, board_height,
-                 model_file=None, use_gpu=False, bias = False):
         self.device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
         self.use_gpu = use_gpu
         self.l2_const = 1e-4  # coef of l2 penalty
@@ -111,7 +111,7 @@ class PolicyValueNet():
                 self.policy_value_net = Net(board_width, board_height)
         self.optimizer = optim.Adam(self.policy_value_net.parameters(),
-                                        weight_decay=self.l2_const)
     def infer_board_size_from_model(self, model):
         # Use the size of the act_fc1 layer to infer board dimensions

         param_group['lr'] = lr
 class ResidualBlock(nn.Module):
     def __init__(self, channels):
         super(ResidualBlock, self).__init__()
         out += residual
         return F.relu(out)
 class Net(nn.Module):
     """Policy-Value network module for AlphaZero Gomoku."""
     def __init__(self, board_width, board_height, num_residual_blocks=5):
         super(Net, self).__init__()
         self.board_width = board_width
     """policy-value network """
     def __init__(self, board_width, board_height,
+                 model_file=None, use_gpu=False, bias=False):
         self.device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
         self.use_gpu = use_gpu
         self.l2_const = 1e-4  # coef of l2 penalty
                 self.policy_value_net = Net(board_width, board_height)
         self.optimizer = optim.Adam(self.policy_value_net.parameters(),
+                                    weight_decay=self.l2_const)
     def infer_board_size_from_model(self, model):
         # Use the size of the act_fc1 layer to infer board dimensions

const.py CHANGED Viewed

@@ -15,13 +15,22 @@ _BLANK = 0
 _BLACK = 1
 _WHITE = 2
 _NEW = 3
-_PLAYER_SYMBOL = {
     _WHITE: "⚪",
     _BLANK: "➕",
     _BLACK: "⚫",
     _NEW: "🔴",
 }
 _PLAYER_COLOR = {
     _WHITE: "AI",
     _BLANK: "Blank",
@@ -70,8 +79,14 @@ _ROOM_COLOR = {
 }
 _MODEL_PATH = {
     "AlphaZero": "Gomoku_MCTS/checkpoint/2023-12-14-18-17-07_test_teaching_learning_collect_epochs=1000_size=9_model=normal/best_policy.model",
-    "duel": "Gomoku_MCTS/checkpoint/2023-12-14-18-16-09_test_teaching_learning_collect_epochs=1000_size=9_model=duel/best_policy.model",
     "Gumbel AlphaZero": "Gomoku_MCTS/checkpoint/2023-12-14-21-19-40_selfplay_epochs=1000_size=9_model=gumbel/best_policy.model",
 }

 _BLACK = 1
 _WHITE = 2
 _NEW = 3
+_PLAYER_SYMBOL1 = {
     _WHITE: "⚪",
     _BLANK: "➕",
     _BLACK: "⚫",
     _NEW: "🔴",
+}
+_PLAYER_SYMBOL2 = {
+    _BLACK: "⚪",
+    _BLANK: "➕",
+    _WHITE: "⚫",
+    _NEW: "🔴",
 }
 _PLAYER_COLOR = {
     _WHITE: "AI",
     _BLANK: "Blank",
 }
+# _MODEL_PATH = {
+#     "AlphaZero": "Gomoku_MCTS/checkpoint/2023-12-14-18-17-07_test_teaching_learning_collect_epochs=1000_size=9_model=normal/best_policy.model",
+#     "duel": "Gomoku_MCTS/checkpoint/2023-12-14-18-16-09_test_teaching_learning_collect_epochs=1000_size=9_model=duel/best_policy.model",
+#     "Gumbel AlphaZero": "Gomoku_MCTS/checkpoint/2023-12-14-21-19-40_selfplay_epochs=1000_size=9_model=gumbel/best_policy.model",
+# }
 _MODEL_PATH = {
     "AlphaZero": "Gomoku_MCTS/checkpoint/2023-12-14-18-17-07_test_teaching_learning_collect_epochs=1000_size=9_model=normal/best_policy.model",
+    "duel": "/Users/husky/GomokuDemo/Gomoku_MCTS/checkpoint/2023-12-14-10-22-12_test_teaching_learning_collect_epochs=1000_size=9_model=duel/best_policy.model",
     "Gumbel AlphaZero": "Gomoku_MCTS/checkpoint/2023-12-14-21-19-40_selfplay_epochs=1000_size=9_model=gumbel/best_policy.model",
 }

pages/AI_VS_AI.py CHANGED Viewed

@@ -8,27 +8,25 @@ Description: this file is used to display our project and add visualization elem
 import time
 import pandas as pd
 from copy import deepcopy
-import torch
-# import torch
 import numpy as np
 import streamlit as st
 from scipy.signal import convolve  # this is used to check if any player wins
 from streamlit import session_state
 from streamlit_server_state import server_state, server_state_lock
-from Gomoku_MCTS import MCTSpure, alphazero, Board, PolicyValueNet
 from Gomoku_Bot import Gomoku_bot
 from Gomoku_Bot import Board as Gomoku_bot_board
-import matplotlib.pyplot as plt
 from const import (
     _BLACK,  # 1, for human
     _WHITE,  # 2 , for AI
     _BLANK,
     _PLAYER_COLOR,
-    _PLAYER_SYMBOL,
     _ROOM_COLOR,
     _VERTICAL,
     _NEW,
@@ -36,41 +34,71 @@ from const import (
     _DIAGONAL_UP_LEFT,
     _DIAGONAL_UP_RIGHT,
     _BOARD_SIZE,
-    _BOARD_SIZE_1D,
-    _AI_AID_INFO
 )
-from ai import (
-    BOS_TOKEN_ID,
-    generate_gpt2,
-    load_model,
-)
-gpt2 = load_model()
 # Utils
 class Room:
     def __init__(self, room_id) -> None:
         self.ROOM_ID = room_id
         # self.BOARD = np.zeros(shape=(_BOARD_SIZE, _BOARD_SIZE), dtype=int)
-        self.BOARD = Board(width=_BOARD_SIZE, height=_BOARD_SIZE, n_in_row=5, players=[_BLACK, _WHITE])
-        self.PLAYER = _BLACK
         self.TURN = self.PLAYER
         self.HISTORY = (0, 0)
         self.WINNER = _BLANK
         self.TIME = time.time()
         self.gomoku_bot_board = Gomoku_bot_board(_BOARD_SIZE, 1)
         self.MCTS_dict = {'Pure MCTS': MCTSpure(c_puct=5, n_playout=1000),
-                          'AlphaZero': alphazero(PolicyValueNet(_BOARD_SIZE, _BOARD_SIZE, 'Gomoku_MCTS/checkpoints/best_policy_8_8_5_2torch.pth').policy_value_fn, c_puct=5, n_playout=100),
                           'Gomoku Bot': Gomoku_bot(self.gomoku_bot_board, -1)}
         self.MCTS = self.MCTS_dict['AlphaZero']
         self.last_mcts = self.MCTS
         self.AID_MCTS = self.MCTS_dict['AlphaZero']
-        self.COORDINATE_1D = [BOS_TOKEN_ID]
         self.current_move = -1
-        self.simula_time_list = []
 def change_turn(cur):
@@ -90,9 +118,9 @@ if "ROOMS" not in server_state:
     with server_state_lock["ROOMS"]:
         server_state.ROOMS = {}
 def handle_oppo_model_selection():
     if st.session_state['selected_oppo_model'] == 'Gomoku Bot':
-        session_state.ROOM.last_mcts = session_state.ROOM.MCTS # since use different mechanism, store previous mcts first
         session_state.ROOM.MCTS = session_state.ROOM.MCTS_dict['Gomoku Bot']
         return
     else:
@@ -100,37 +128,66 @@ def handle_oppo_model_selection():
         new_mct = session_state.ROOM.MCTS_dict[st.session_state['selected_oppo_model']]
         new_mct.mcts._root = deepcopy(TreeNode)
         session_state.ROOM.MCTS = new_mct
-        session_state.ROOM.last_mcts  = new_mct
     return
 def handle_aid_model_selection():
     if st.session_state['selected_aid_model'] == 'None':
         session_state.USE_AIAID = False
         return
     session_state.USE_AIAID = True
-    TreeNode = session_state.ROOM.MCTS.mcts._root # use the same tree node
     new_mct = session_state.ROOM.MCTS_dict[st.session_state['selected_aid_model']]
     new_mct.mcts._root = deepcopy(TreeNode)
     session_state.ROOM.AID_MCTS = new_mct
     return
 if 'selected_oppo_model' not in st.session_state:
     st.session_state['selected_oppo_model'] = 'AlphaZero'  # 默认值
 if 'selected_aid_model' not in st.session_state:
     st.session_state['selected_aid_model'] = 'AlphaZero'  # 默认值
 # Layout
 TITLE = st.empty()
 Model_Switch = st.empty()
 TITLE.header("🤖 AI 3603 Gomoku")
-selected_oppo_option = Model_Switch.selectbox('Select Opponent Model', ['Pure MCTS', 'AlphaZero','Gomoku Bot'], index=1, key='oppo_model')
 if st.session_state['selected_oppo_model'] != selected_oppo_option:
     st.session_state['selected_oppo_model'] = selected_oppo_option
     handle_oppo_model_selection()
 ROUND_INFO = st.empty()
 st.markdown("<br>", unsafe_allow_html=True)
 BOARD_PLATE = [
@@ -149,9 +206,11 @@ MULTIPLAYER_TAG = st.sidebar.empty()
 with st.sidebar.container():
     ANOTHER_ROUND = st.empty()
     RESTART = st.empty()
     AIAID = st.empty()
     EXIT = st.empty()
-selected_aid_option = AIAID.selectbox('Select Assistant Model', ['None', 'Pure MCTS', 'AlphaZero'], index=0, key='aid_model')
 if st.session_state['selected_aid_model'] != selected_aid_option:
     st.session_state['selected_aid_model'] = selected_aid_option
     handle_aid_model_selection()
@@ -174,7 +233,6 @@ GAME_INFO.markdown(
 )
 def restart() -> None:
     """
     Restart the game.
@@ -182,12 +240,52 @@ def restart() -> None:
     session_state.ROOM = Room(session_state.ROOM.ROOM_ID)
     st.session_state['selected_oppo_model'] = 'AlphaZero'
 RESTART.button(
     "Reset",
     on_click=restart,
     help="Clear the board as well as the scores",
 )
 # Draw the board
 def gomoku():
@@ -207,14 +305,25 @@ def gomoku():
         session_state.ROOM.BOARD = Board(width=_BOARD_SIZE, height=_BOARD_SIZE, n_in_row=5)
         session_state.ROOM.gomoku_bot_board = Gomoku_bot_board(_BOARD_SIZE, 1)
         session_state.ROOM.MCTS_dict = {'Pure MCTS': MCTSpure(c_puct=5, n_playout=1000),
-                          'AlphaZero': alphazero(PolicyValueNet(_BOARD_SIZE, _BOARD_SIZE, 'Gomoku_MCTS/checkpoints/best_policy_8_8_5_2torch.pth').policy_value_fn, c_puct=5, n_playout=100),
-                          'Gomoku Bot': Gomoku_bot(session_state.ROOM.gomoku_bot_board, -1)}
         session_state.ROOM.MCTS = session_state.ROOM.MCTS_dict[st.session_state['selected_oppo_model']]
         session_state.ROOM.last_mcts = session_state.ROOM.MCTS
         session_state.ROOM.PLAYER = session_state.ROOM.PLAYER
         session_state.ROOM.TURN = session_state.ROOM.PLAYER
         session_state.ROOM.WINNER = _BLANK  # 0
-        session_state.ROOM.COORDINATE_1D = [BOS_TOKEN_ID]
     # Room status sync
     def sync_room() -> bool:
@@ -235,59 +344,6 @@ def gomoku():
             session_state.ROOM = server_state.ROOMS[room_id]
             return True
-    # Check if winner emerge from move
-    def check_win() -> int:
-        """
-        Use convolution to check if any player wins.
-        """
-        vertical = convolve(
-            session_state.ROOM.BOARD.board_map,
-            _VERTICAL,
-            mode="same",
-        )
-        horizontal = convolve(
-            session_state.ROOM.BOARD.board_map,
-            _HORIZONTAL,
-            mode="same",
-        )
-        diagonal_up_left = convolve(
-            session_state.ROOM.BOARD.board_map,
-            _DIAGONAL_UP_LEFT,
-            mode="same",
-        )
-        diagonal_up_right = convolve(
-            session_state.ROOM.BOARD.board_map,
-            _DIAGONAL_UP_RIGHT,
-            mode="same",
-        )
-        if (
-                np.max(
-                    [
-                        np.max(vertical),
-                        np.max(horizontal),
-                        np.max(diagonal_up_left),
-                        np.max(diagonal_up_right),
-                    ]
-                )
-                == 5 * _BLACK
-        ):
-            winner = _BLACK
-        elif (
-                np.min(
-                    [
-                        np.min(vertical),
-                        np.min(horizontal),
-                        np.min(diagonal_up_left),
-                        np.min(diagonal_up_right),
-                    ]
-                )
-                == 5 * _WHITE
-        ):
-            winner = _WHITE
-        else:
-            winner = _BLANK
-        return winner
     # Triggers the board response on click
     def handle_click(x, y):
         """
@@ -310,7 +366,8 @@ def gomoku():
             session_state.ROOM.current_move = move
             session_state.ROOM.BOARD.do_move(move)
             # Gomoku Bot BOARD
-            session_state.ROOM.MCTS_dict["Gomoku Bot"].board.put(move // _BOARD_SIZE, move % _BOARD_SIZE)
             session_state.ROOM.BOARD.board_map[x][y] = session_state.ROOM.TURN
             session_state.ROOM.COORDINATE_1D.append(x * _BOARD_SIZE + y)
@@ -333,98 +390,121 @@ def gomoku():
     # Draw board
     def draw_board(response: bool):
         """construct each buttons for all cells of the board"""
-        if session_state.USE_AIAID and session_state.ROOM.WINNER == _BLANK and session_state.ROOM.TURN == _BLACK:
-            if session_state.USE_AIAID:
-                copy_mcts = deepcopy(session_state.ROOM.AID_MCTS.mcts)
-                _, acts_aid, probs_aid, simul_mean_time_aid = copy_mcts.get_move_probs(session_state.ROOM.BOARD)
-                sorted_acts_probs = sorted(zip(acts_aid, probs_aid), key=lambda x: x[1], reverse=True)
-                top_five_acts = [act for act, prob in sorted_acts_probs[:5]]
-                top_five_probs = [prob for act, prob in sorted_acts_probs[:5]]
-        if response and session_state.ROOM.TURN == _BLACK:  # human turn
-            print("Your turn")
-            # construction of clickable buttons
-            cur_move = (session_state.ROOM.current_move // _BOARD_SIZE, session_state.ROOM.current_move % _BOARD_SIZE)
-            for i, row in enumerate(session_state.ROOM.BOARD.board_map):
-                # print("row:", row)
-                for j, cell in enumerate(row):
-                    if (
-                            i * _BOARD_SIZE + j
-                            in (session_state.ROOM.COORDINATE_1D)
-                    ):
-                        if i == cur_move[0] and j == cur_move[1]:
-                            BOARD_PLATE[i][j].button(
-                                _PLAYER_SYMBOL[_NEW],
-                                key=f"{i}:{j}",
-                                args=(i, j),
-                                on_click=handle_click,
-                            )
-                        else:
-                            # disable click for GPT choices
-                            BOARD_PLATE[i][j].button(
-                                _PLAYER_SYMBOL[cell],
-                                key=f"{i}:{j}",
-                                args=(i, j),
-                                on_click=forbid_click
-                            )
-                    else:
-                        if session_state.USE_AIAID and i * _BOARD_SIZE + j in top_five_acts:
-                            # enable click for other cells available for human choices
-                            prob = top_five_probs[top_five_acts.index(i * _BOARD_SIZE + j)]
-                            BOARD_PLATE[i][j].button(
-                                _PLAYER_SYMBOL[cell] + f"({round(prob, 2)})",
-                                key=f"{i}:{j}",
-                                on_click=handle_click,
-                                args=(i, j),
-                            )
                         else:
-                            # enable click for other cells available for human choices
-                            BOARD_PLATE[i][j].button(
-                                _PLAYER_SYMBOL[cell],
-                                key=f"{i}:{j}",
-                                on_click=handle_click,
-                                args=(i, j),
-                            )
         elif response and session_state.ROOM.TURN == _WHITE:  # AI turn
             message.empty()
             with st.spinner('🔮✨ Waiting for AI response... ⏳🚀'):
                 time.sleep(0.1)
                 print("AI's turn")
                 print("Below are current board under AI's view")
-                # print(session_state.ROOM.BOARD.board_map)
-                # move = _BOARD_SIZE * _BOARD_SIZE
-                # forbid = []
-                # step = 0.1
-                # tmp = 0.7
-                # while move >= _BOARD_SIZE * _BOARD_SIZE or move in session_state.ROOM.COORDINATE_1D:
-                #
-                #     gpt_predictions = generate_gpt2(
-                #         gpt2,
-                #         torch.tensor(session_state.ROOM.COORDINATE_1D).unsqueeze(0),
-                #         tmp
-                #     )
-                #     print(gpt_predictions)
-                #     move = gpt_predictions[len(session_state.ROOM.COORDINATE_1D)]
-                #     print(move)
-                #     tmp += step
-                #     # if move >= _BOARD_SIZE * _BOARD_SIZE:
-                #     #     forbid.append(move)
-                #     # else:
-                #     #     break
-                #
-                #
-                # gpt_response = move
-                # gpt_i, gpt_j = gpt_response // _BOARD_SIZE, gpt_response % _BOARD_SIZE
-                # print(gpt_i, gpt_j)
-                # # session_state.ROOM.BOARD[gpt_i][gpt_j] = session_state.ROOM.TURN
-                #
-                # simul_time = 0
                 if st.session_state['selected_oppo_model'] != 'Gomoku Bot':
                     move, simul_time = session_state.ROOM.MCTS.get_action(session_state.ROOM.BOARD, return_time=True)
                 else:
                     move, simul_time = session_state.ROOM.MCTS.get_action(return_time=True)
-                session_state.ROOM.simula_time_list.append(simul_time)
                 print("AI takes move: ", move)
                 session_state.ROOM.current_move = move
                 gpt_response = move
@@ -436,7 +516,8 @@ def gomoku():
                 # MCTS BOARD
                 session_state.ROOM.BOARD.do_move(move)
                 # Gomoku Bot BOARD
-                session_state.ROOM.MCTS_dict["Gomoku Bot"].board.put(move // _BOARD_SIZE, move % _BOARD_SIZE)
                 # session_state.ROOM.BOARD[gpt_i][gpt_j] = session_state.ROOM.TURN
                 session_state.ROOM.COORDINATE_1D.append(gpt_i * _BOARD_SIZE + gpt_j)
@@ -457,43 +538,43 @@ def gomoku():
                     for j, cell in enumerate(row):
                         if (
                                 i * _BOARD_SIZE + j
-                                in (session_state.ROOM.COORDINATE_1D)
                         ):
                             if i == gpt_i and j == gpt_j:
                                 BOARD_PLATE[i][j].button(
-                                    _PLAYER_SYMBOL[_NEW],
                                     key=f"{i}:{j}",
                                     args=(i, j),
-                                    on_click=handle_click,
                                 )
                             else:
                                 # disable click for GPT choices
                                 BOARD_PLATE[i][j].button(
-                                    _PLAYER_SYMBOL[cell],
                                     key=f"{i}:{j}",
                                     args=(i, j),
                                     on_click=forbid_click
                                 )
                         else:
-                            if session_state.USE_AIAID and i * _BOARD_SIZE + j in top_five_acts and not session_state.ROOM.BOARD.game_end()[0]:
                                 # enable click for other cells available for human choices
                                 prob = top_five_probs[top_five_acts.index(i * _BOARD_SIZE + j)]
                                 BOARD_PLATE[i][j].button(
-                                    _PLAYER_SYMBOL[cell] + f"({round(prob, 2)})",
                                     key=f"{i}:{j}",
-                                    on_click=handle_click,
                                     args=(i, j),
                                 )
                             else:
                                 # enable click for other cells available for human choices
                                 BOARD_PLATE[i][j].button(
-                                    _PLAYER_SYMBOL[cell],
                                     key=f"{i}:{j}",
-                                    on_click=handle_click,
                                     args=(i, j),
                                 )
             message.markdown(
                 'AI agent has calculated its strategy, which takes <span style="color: blue; font-size: 20px;">{:.3e}</span>s per simulation.'.format(
                     simul_time),
@@ -522,7 +603,7 @@ def gomoku():
             for i, row in enumerate(session_state.ROOM.BOARD.board_map):
                 for j, cell in enumerate(row):
                     BOARD_PLATE[i][j].write(
-                        _PLAYER_SYMBOL[cell],
                         # key=f"{i}:{j}",
                     )
@@ -549,24 +630,11 @@ def gomoku():
             ROUND_INFO.write(
                 f"#### **{_PLAYER_COLOR[session_state.ROOM.WINNER]} WIN!**\n**Click buttons on the left for more plays.**"
             )
-        # elif 0 not in session_state.ROOM.BOARD.board_map:
-        #     ROUND_INFO.write("#### **Tie**")
-        # else:
-        #     ROUND_INFO.write(
-        #         f"#### **{_PLAYER_SYMBOL[session_state.ROOM.TURN]} {_PLAYER_COLOR[session_state.ROOM.TURN]}'s turn...**"
-        #     )
-        # draw the plot for simulation time
-        # 创建一个 DataFrame
-        # print(session_state.ROOM.simula_time_list)
         st.markdown("<br>", unsafe_allow_html=True)
         st.markdown("<br>", unsafe_allow_html=True)
-        chart_data = pd.DataFrame(session_state.ROOM.simula_time_list, columns=["Simulation Time"])
         st.line_chart(chart_data)
     game_control()
     update_info()

 import time
 import pandas as pd
 from copy import deepcopy
 import numpy as np
 import streamlit as st
 from scipy.signal import convolve  # this is used to check if any player wins
 from streamlit import session_state
 from streamlit_server_state import server_state, server_state_lock
+from Gomoku_MCTS import MCTSpure, alphazero, Board, PolicyValueNet_old, PolicyValueNet_new, duel_PolicyValueNet, \
+    Gumbel_MCTSPlayer
 from Gomoku_Bot import Gomoku_bot
 from Gomoku_Bot import Board as Gomoku_bot_board
+import matplotlib.pyplot as plt
 from const import (
     _BLACK,  # 1, for human
     _WHITE,  # 2 , for AI
     _BLANK,
     _PLAYER_COLOR,
+    _PLAYER_SYMBOL1,
+    _PLAYER_SYMBOL2,
     _ROOM_COLOR,
     _VERTICAL,
     _NEW,
     _DIAGONAL_UP_LEFT,
     _DIAGONAL_UP_RIGHT,
     _BOARD_SIZE,
+    _MODEL_PATH
 )
+_PLAYER_SYMBOL = [0, _PLAYER_SYMBOL1, _PLAYER_SYMBOL2]
+# '''
+# from ai import (
+#     BOS_TOKEN_ID,
+#     generate_gpt2,
+#     load_model,
+# )
+#
+# gpt2 = load_model()
+#
+# '''
+if "FirstPlayer" not in session_state:
+    session_state.FirstPlayer = _BLACK
+    session_state.Player = [[], [ _BLACK,_WHITE], [_WHITE,_BLACK]][session_state.FirstPlayer]
+    session_state.Symbol = _PLAYER_SYMBOL[session_state.FirstPlayer]
 # Utils
 class Room:
     def __init__(self, room_id) -> None:
         self.ROOM_ID = room_id
         # self.BOARD = np.zeros(shape=(_BOARD_SIZE, _BOARD_SIZE), dtype=int)
+        self.BOARD = Board(width=_BOARD_SIZE, height=_BOARD_SIZE, n_in_row=5, players=session_state.Player)
+        self.PLAYER = session_state.FirstPlayer
         self.TURN = self.PLAYER
         self.HISTORY = (0, 0)
         self.WINNER = _BLANK
         self.TIME = time.time()
         self.gomoku_bot_board = Gomoku_bot_board(_BOARD_SIZE, 1)
         self.MCTS_dict = {'Pure MCTS': MCTSpure(c_puct=5, n_playout=1000),
+                          'AlphaZero': alphazero(PolicyValueNet_new(_BOARD_SIZE, _BOARD_SIZE,
+                                                                    _MODEL_PATH["AlphaZero"]).policy_value_fn,
+                                                 c_puct=5, n_playout=100),
+                          'duel': alphazero(duel_PolicyValueNet(_BOARD_SIZE, _BOARD_SIZE,
+                                                                _MODEL_PATH["duel"]).policy_value_fn,
+                                            c_puct=5, n_playout=100),
+                          'Gumbel AlphaZero': Gumbel_MCTSPlayer(PolicyValueNet_new(_BOARD_SIZE, _BOARD_SIZE,
+                                                                                   _MODEL_PATH["Gumbel AlphaZero"]).policy_value_fn,
+                                                                c_puct=5, n_playout=100, m_action=8),
+                          'Gomoku Bot': Gomoku_bot(self.gomoku_bot_board, -1)}
+        self.MCTS_dict_ = {'Pure MCTS': MCTSpure(c_puct=5, n_playout=1000),
+                          'AlphaZero': alphazero(PolicyValueNet_new(_BOARD_SIZE, _BOARD_SIZE,
+                                                                    _MODEL_PATH["AlphaZero"]).policy_value_fn,
+                                                 c_puct=5, n_playout=100),
+                          'duel': alphazero(duel_PolicyValueNet(_BOARD_SIZE, _BOARD_SIZE,
+                                                                _MODEL_PATH["duel"]).policy_value_fn,
+                                            c_puct=5, n_playout=100),
+                          'Gumbel AlphaZero': Gumbel_MCTSPlayer(PolicyValueNet_new(_BOARD_SIZE, _BOARD_SIZE,
+                                                                                   _MODEL_PATH["Gumbel AlphaZero"]).policy_value_fn,
+                                                                c_puct=5, n_playout=100, m_action=8),
                           'Gomoku Bot': Gomoku_bot(self.gomoku_bot_board, -1)}
         self.MCTS = self.MCTS_dict['AlphaZero']
+        self.MCTS_ = self.MCTS_dict['AlphaZero']
         self.last_mcts = self.MCTS
         self.AID_MCTS = self.MCTS_dict['AlphaZero']
+        self.COORDINATE_1D = []
         self.current_move = -1
+        self.ai_simula_time_list = []
+        self.ai_simula_time_list_ = []
+        self.human_simula_time_list = []
 def change_turn(cur):
     with server_state_lock["ROOMS"]:
         server_state.ROOMS = {}
 def handle_oppo_model_selection():
     if st.session_state['selected_oppo_model'] == 'Gomoku Bot':
         session_state.ROOM.MCTS = session_state.ROOM.MCTS_dict['Gomoku Bot']
         return
     else:
         new_mct = session_state.ROOM.MCTS_dict[st.session_state['selected_oppo_model']]
         new_mct.mcts._root = deepcopy(TreeNode)
         session_state.ROOM.MCTS = new_mct
+        session_state.ROOM.last_mcts = new_mct
+    return
+def handle_oppo_model_selection_():
+    if st.session_state['selected_oppo_model_'] == 'Gomoku Bot':
+        session_state.ROOM.MCTS_ = session_state.ROOM.MCTS_dict_['Gomoku Bot']
+        return
+    else:
+        TreeNode = session_state.ROOM.last_mcts_.mcts._root
+        new_mct = session_state.ROOM.MCTS_dict_[st.session_state['selected_oppo_model_']]
+        new_mct.mcts._root = deepcopy(TreeNode)
+        session_state.ROOM.MCTS_ = new_mct
+        session_state.ROOM.last_mcts_ = new_mct
     return
 def handle_aid_model_selection():
     if st.session_state['selected_aid_model'] == 'None':
         session_state.USE_AIAID = False
         return
     session_state.USE_AIAID = True
+    TreeNode = session_state.ROOM.MCTS.mcts._root  # use the same tree node
     new_mct = session_state.ROOM.MCTS_dict[st.session_state['selected_aid_model']]
     new_mct.mcts._root = deepcopy(TreeNode)
     session_state.ROOM.AID_MCTS = new_mct
     return
 if 'selected_oppo_model' not in st.session_state:
     st.session_state['selected_oppo_model'] = 'AlphaZero'  # 默认值
+if 'selected_oppo_model_' not in st.session_state:
+    st.session_state['selected_oppo_model_'] = 'AlphaZero'  # 默认值
 if 'selected_aid_model' not in st.session_state:
     st.session_state['selected_aid_model'] = 'AlphaZero'  # 默认值
 # Layout
 TITLE = st.empty()
 Model_Switch = st.empty()
+Model_Switch_ = st.empty()
 TITLE.header("🤖 AI 3603 Gomoku")
+selected_oppo_option = Model_Switch.selectbox('Select Model 1',
+                                              ['Pure MCTS', 'AlphaZero', 'Gomoku Bot', 'duel', 'Gumbel AlphaZero'],
+                                              index=1, key='oppo_model')
+selected_oppo_option_ = Model_Switch_.selectbox('Select Model 2',
+                                              ['Pure MCTS', 'AlphaZero', 'Gomoku Bot', 'duel', 'Gumbel AlphaZero'],
+                                              index=1, key='oppo_model_')
 if st.session_state['selected_oppo_model'] != selected_oppo_option:
     st.session_state['selected_oppo_model'] = selected_oppo_option
     handle_oppo_model_selection()
+if st.session_state['selected_oppo_model_'] != selected_oppo_option_:
+    st.session_state['selected_oppo_model_'] = selected_oppo_option_
+    handle_oppo_model_selection_()
 ROUND_INFO = st.empty()
 st.markdown("<br>", unsafe_allow_html=True)
 BOARD_PLATE = [
 with st.sidebar.container():
     ANOTHER_ROUND = st.empty()
     RESTART = st.empty()
+    CHANGE_PLAYER = st.empty()
     AIAID = st.empty()
     EXIT = st.empty()
+selected_aid_option = AIAID.selectbox('Select Assistant Model', ['None', 'Pure MCTS', 'AlphaZero'], index=0,
+                                      key='aid_model')
 if st.session_state['selected_aid_model'] != selected_aid_option:
     st.session_state['selected_aid_model'] = selected_aid_option
     handle_aid_model_selection()
 )
 def restart() -> None:
     """
     Restart the game.
     session_state.ROOM = Room(session_state.ROOM.ROOM_ID)
     st.session_state['selected_oppo_model'] = 'AlphaZero'
+def swap_players() -> None:
+    session_state.update(
+        FirstPlayer=change_turn(session_state.FirstPlayer),
+    )
+    session_state.update(
+        Player=[[], [_BLACK, _WHITE], [_WHITE, _BLACK]][session_state.FirstPlayer],
+        Symbol=_PLAYER_SYMBOL[session_state.FirstPlayer]
+    )
+    session_state.ROOM.BOARD = Board(width=_BOARD_SIZE, height=_BOARD_SIZE, n_in_row=5, players=session_state.Player)
+    session_state.ROOM.PLAYER = session_state.FirstPlayer
+    session_state.ROOM.gomoku_bot_board = Gomoku_bot_board(_BOARD_SIZE, 1)
+    session_state.ROOM.MCTS_dict = {'Pure MCTS': MCTSpure(c_puct=5, n_playout=1000),
+                                    'AlphaZero': alphazero(PolicyValueNet_new(_BOARD_SIZE, _BOARD_SIZE,
+                                                                              _MODEL_PATH["AlphaZero"]).policy_value_fn,
+                                                           c_puct=5, n_playout=100),
+                                    'duel': alphazero(duel_PolicyValueNet(_BOARD_SIZE, _BOARD_SIZE,
+                                                                          _MODEL_PATH["duel"]).policy_value_fn,
+                                                      c_puct=5, n_playout=100),
+                                    'Gumbel AlphaZero': Gumbel_MCTSPlayer(PolicyValueNet_new(_BOARD_SIZE, _BOARD_SIZE,
+                                                                                             _MODEL_PATH[
+                                                                                                 "Gumbel AlphaZero"]).policy_value_fn,
+                                                                          c_puct=5, n_playout=100, m_action=8),
+                                    'Gomoku Bot': Gomoku_bot(session_state.ROOM.gomoku_bot_board, -1)}
+    session_state.ROOM.MCTS = session_state.ROOM.MCTS_dict[st.session_state['selected_oppo_model']]
+    session_state.ROOM.last_mcts = session_state.ROOM.MCTS
+    session_state.ROOM.PLAYER = session_state.ROOM.PLAYER
+    session_state.ROOM.TURN = session_state.ROOM.PLAYER
+    session_state.ROOM.WINNER = _BLANK  # 0
+    session_state.ROOM.ai_simula_time_list = []
+    session_state.ROOM.human_simula_time_list = []
+    session_state.ROOM.COORDINATE_1D = []
 RESTART.button(
     "Reset",
     on_click=restart,
     help="Clear the board as well as the scores",
 )
+CHANGE_PLAYER.button(
+    "Swap players",
+    on_click=swap_players,
+    help="Swap players",
+)
 # Draw the board
 def gomoku():
         session_state.ROOM.BOARD = Board(width=_BOARD_SIZE, height=_BOARD_SIZE, n_in_row=5)
         session_state.ROOM.gomoku_bot_board = Gomoku_bot_board(_BOARD_SIZE, 1)
         session_state.ROOM.MCTS_dict = {'Pure MCTS': MCTSpure(c_puct=5, n_playout=1000),
+                                        'AlphaZero': alphazero(PolicyValueNet_new(_BOARD_SIZE, _BOARD_SIZE,
+                                                                                  _MODEL_PATH["AlphaZero"]).policy_value_fn,
+                                                               c_puct=5, n_playout=100),
+                                        'duel': alphazero(duel_PolicyValueNet(_BOARD_SIZE, _BOARD_SIZE,
+                                                                              _MODEL_PATH["duel"]).policy_value_fn,
+                                                          c_puct=5, n_playout=100),
+                                        'Gumbel AlphaZero': Gumbel_MCTSPlayer(PolicyValueNet_new(_BOARD_SIZE, _BOARD_SIZE,
+                                                                                   _MODEL_PATH["Gumbel AlphaZero"]).policy_value_fn,
+                                                                c_puct=5, n_playout=100, m_action=8),
+                                        'Gomoku Bot': Gomoku_bot(session_state.ROOM.gomoku_bot_board, -1)}
         session_state.ROOM.MCTS = session_state.ROOM.MCTS_dict[st.session_state['selected_oppo_model']]
         session_state.ROOM.last_mcts = session_state.ROOM.MCTS
         session_state.ROOM.PLAYER = session_state.ROOM.PLAYER
         session_state.ROOM.TURN = session_state.ROOM.PLAYER
         session_state.ROOM.WINNER = _BLANK  # 0
+        session_state.ROOM.ai_simula_time_list = []
+        session_state.ROOM.human_simula_time_list = []
+        session_state.ROOM.COORDINATE_1D = []
     # Room status sync
     def sync_room() -> bool:
             session_state.ROOM = server_state.ROOMS[room_id]
             return True
     # Triggers the board response on click
     def handle_click(x, y):
         """
             session_state.ROOM.current_move = move
             session_state.ROOM.BOARD.do_move(move)
             # Gomoku Bot BOARD
+            session_state.ROOM.MCTS_dict["Gomoku Bot"].board.put(_BOARD_SIZE - move // _BOARD_SIZE - 1,
+                                                                 move % _BOARD_SIZE)  # # this move starts from left up corner (0,0), however, the move in the game starts from left bottom corner (0,0)
             session_state.ROOM.BOARD.board_map[x][y] = session_state.ROOM.TURN
             session_state.ROOM.COORDINATE_1D.append(x * _BOARD_SIZE + y)
     # Draw board
     def draw_board(response: bool):
         """construct each buttons for all cells of the board"""
+        if response and session_state.ROOM.TURN == _BLACK:  # Another AI
+            message.empty()
+            with st.spinner('🔮✨ Waiting for AI response... ⏳🚀'):
+                time.sleep(0.1)
+                print("AI's turn")
+                print("Below are current board under AI's view")
+                if st.session_state['selected_oppo_model_'] != 'Gomoku Bot':
+                    move, simul_time = session_state.ROOM.MCTS_.get_action(session_state.ROOM.BOARD, return_time=True)
+                else:
+                    move, simul_time = session_state.ROOM.MCTS_.get_action(return_time=True)
+                session_state.ROOM.ai_simula_time_list_.append(simul_time)
+                print("AI takes move: ", move)
+                session_state.ROOM.current_move = move
+                gpt_response = move
+                gpt_i, gpt_j = gpt_response // _BOARD_SIZE, gpt_response % _BOARD_SIZE
+                print("AI's move is located at ({}, {}) :".format(gpt_i, gpt_j))
+                move = session_state.ROOM.BOARD.location_to_move((gpt_i, gpt_j))
+                print("Location to move: ", move)
+                # print("Location to move: ", move)
+                # MCTS BOARD
+                session_state.ROOM.BOARD.do_move(move)
+                # Gomoku Bot BOARD
+                session_state.ROOM.MCTS_dict_["Gomoku Bot"].board.put(_BOARD_SIZE - 1 - move // _BOARD_SIZE,
+                                                                     move % _BOARD_SIZE)
+                # session_state.ROOM.BOARD[gpt_i][gpt_j] = session_state.ROOM.TURN
+                session_state.ROOM.COORDINATE_1D.append(gpt_i * _BOARD_SIZE + gpt_j)
+                if not session_state.ROOM.BOARD.game_end()[0]:
+                    if session_state.USE_AIAID:
+                        copy_mcts = deepcopy(session_state.ROOM.AID_MCTS.mcts)
+                        _, acts_aid, probs_aid, simul_mean_time_aid = copy_mcts.get_move_probs(session_state.ROOM.BOARD)
+                        sorted_acts_probs = sorted(zip(acts_aid, probs_aid), key=lambda x: x[1], reverse=True)
+                        top_five_acts = [act for act, prob in sorted_acts_probs[:5]]
+                        top_five_probs = [prob for act, prob in sorted_acts_probs[:5]]
+                else:
+                    top_five_acts = []
+                    top_five_probs = []
+                # construction of clickable buttons
+                for i, row in enumerate(session_state.ROOM.BOARD.board_map):
+                    # print("row:", row)
+                    for j, cell in enumerate(row):
+                        if (
+                                i * _BOARD_SIZE + j
+                                in session_state.ROOM.COORDINATE_1D
+                        ):
+                            if i == gpt_i and j == gpt_j:
+                                BOARD_PLATE[i][j].button(
+                                    session_state.Symbol[_NEW],
+                                    key=f"{i}:{j}",
+                                    args=(i, j),
+                                    on_click=forbid_click,
+                                )
+                            else:
+                                # disable click for GPT choices
+                                BOARD_PLATE[i][j].button(
+                                    session_state.Symbol[cell],
+                                    key=f"{i}:{j}",
+                                    args=(i, j),
+                                    on_click=forbid_click
+                                )
                         else:
+                            if session_state.USE_AIAID and i * _BOARD_SIZE + j in top_five_acts and not \
+                                    session_state.ROOM.BOARD.game_end()[0]:
+                                # enable click for other cells available for human choices
+                                prob = top_five_probs[top_five_acts.index(i * _BOARD_SIZE + j)]
+                                BOARD_PLATE[i][j].button(
+                                    session_state.Symbol[cell] + f"({round(prob, 2)})",
+                                    key=f"{i}:{j}",
+                                    on_click=forbid_click,
+                                    args=(i, j),
+                                )
+                            else:
+                                # enable click for other cells available for human choices
+                                BOARD_PLATE[i][j].button(
+                                    session_state.Symbol[cell],
+                                    key=f"{i}:{j}",
+                                    on_click=forbid_click,
+                                    args=(i, j),
+                                )
+            message.markdown(
+                'AI agent has calculated its strategy, which takes <span style="color: blue; font-size: 20px;">{:.3e}</span>s per simulation.'.format(
+                    simul_time),
+                unsafe_allow_html=True
+            )
+            LOG.subheader("Logs")
+            # change turn
+            session_state.ROOM.TURN = change_turn(session_state.ROOM.TURN)
+            # session_state.ROOM.WINNER = check_win()
+            win, winner = session_state.ROOM.BOARD.game_end()
+            if win:
+                session_state.ROOM.WINNER = winner
+            session_state.ROOM.HISTORY = (
+                session_state.ROOM.HISTORY[0]
+                + int(session_state.ROOM.WINNER == _WHITE),
+                session_state.ROOM.HISTORY[1]
+                + int(session_state.ROOM.WINNER == _BLACK),
+            )
+            session_state.ROOM.TIME = time.time()
         elif response and session_state.ROOM.TURN == _WHITE:  # AI turn
             message.empty()
             with st.spinner('🔮✨ Waiting for AI response... ⏳🚀'):
                 time.sleep(0.1)
                 print("AI's turn")
                 print("Below are current board under AI's view")
                 if st.session_state['selected_oppo_model'] != 'Gomoku Bot':
                     move, simul_time = session_state.ROOM.MCTS.get_action(session_state.ROOM.BOARD, return_time=True)
                 else:
                     move, simul_time = session_state.ROOM.MCTS.get_action(return_time=True)
+                session_state.ROOM.ai_simula_time_list.append(simul_time)
                 print("AI takes move: ", move)
                 session_state.ROOM.current_move = move
                 gpt_response = move
                 # MCTS BOARD
                 session_state.ROOM.BOARD.do_move(move)
                 # Gomoku Bot BOARD
+                session_state.ROOM.MCTS_dict["Gomoku Bot"].board.put(_BOARD_SIZE - 1 - move // _BOARD_SIZE,
+                                                                     move % _BOARD_SIZE)
                 # session_state.ROOM.BOARD[gpt_i][gpt_j] = session_state.ROOM.TURN
                 session_state.ROOM.COORDINATE_1D.append(gpt_i * _BOARD_SIZE + gpt_j)
                     for j, cell in enumerate(row):
                         if (
                                 i * _BOARD_SIZE + j
+                                in session_state.ROOM.COORDINATE_1D
                         ):
                             if i == gpt_i and j == gpt_j:
                                 BOARD_PLATE[i][j].button(
+                                    session_state.Symbol[_NEW],
                                     key=f"{i}:{j}",
                                     args=(i, j),
+                                    on_click=forbid_click,
                                 )
                             else:
                                 # disable click for GPT choices
                                 BOARD_PLATE[i][j].button(
+                                    session_state.Symbol[cell],
                                     key=f"{i}:{j}",
                                     args=(i, j),
                                     on_click=forbid_click
                                 )
                         else:
+                            if session_state.USE_AIAID and i * _BOARD_SIZE + j in top_five_acts and not \
+                                    session_state.ROOM.BOARD.game_end()[0]:
                                 # enable click for other cells available for human choices
                                 prob = top_five_probs[top_five_acts.index(i * _BOARD_SIZE + j)]
                                 BOARD_PLATE[i][j].button(
+                                    session_state.Symbol[cell] + f"({round(prob, 2)})",
                                     key=f"{i}:{j}",
+                                    on_click=forbid_click,
                                     args=(i, j),
                                 )
                             else:
                                 # enable click for other cells available for human choices
                                 BOARD_PLATE[i][j].button(
+                                    session_state.Symbol[cell],
                                     key=f"{i}:{j}",
+                                    on_click=forbid_click,
                                     args=(i, j),
                                 )
             message.markdown(
                 'AI agent has calculated its strategy, which takes <span style="color: blue; font-size: 20px;">{:.3e}</span>s per simulation.'.format(
                     simul_time),
             for i, row in enumerate(session_state.ROOM.BOARD.board_map):
                 for j, cell in enumerate(row):
                     BOARD_PLATE[i][j].write(
+                        session_state.Symbol[cell],
                         # key=f"{i}:{j}",
                     )
             ROUND_INFO.write(
                 f"#### **{_PLAYER_COLOR[session_state.ROOM.WINNER]} WIN!**\n**Click buttons on the left for more plays.**"
             )
         st.markdown("<br>", unsafe_allow_html=True)
         st.markdown("<br>", unsafe_allow_html=True)
+        chart_data = pd.DataFrame(session_state.ROOM.ai_simula_time_list, columns=["Simulation Time"])
         st.line_chart(chart_data)
     game_control()
     update_info()

pages/Player_VS_AI.py CHANGED Viewed

@@ -8,14 +8,10 @@ Description: this file is used to display our project and add visualization elem
 import time
 import pandas as pd
 from copy import deepcopy
-import torch
-# import torch
 import numpy as np
 import streamlit as st
 from scipy.signal import convolve  # this is used to check if any player wins
 from streamlit import session_state
-from streamlit.delta_generator import DeltaGenerator
 from streamlit_server_state import server_state, server_state_lock
 from Gomoku_MCTS import MCTSpure, alphazero, Board, PolicyValueNet_old, PolicyValueNet_new, duel_PolicyValueNet, \
     Gumbel_MCTSPlayer
@@ -29,7 +25,8 @@ from const import (
     _WHITE,  # 2 , for AI
     _BLANK,
     _PLAYER_COLOR,
-    _PLAYER_SYMBOL,
     _ROOM_COLOR,
     _VERTICAL,
     _NEW,
@@ -37,11 +34,11 @@ from const import (
     _DIAGONAL_UP_LEFT,
     _DIAGONAL_UP_RIGHT,
     _BOARD_SIZE,
-    _BOARD_SIZE_1D,
-    _AI_AID_INFO,
     _MODEL_PATH
 )
 # '''
 # from ai import (
 #     BOS_TOKEN_ID,
@@ -54,13 +51,18 @@ from const import (
 # '''
 # Utils
 class Room:
     def __init__(self, room_id) -> None:
         self.ROOM_ID = room_id
         # self.BOARD = np.zeros(shape=(_BOARD_SIZE, _BOARD_SIZE), dtype=int)
-        self.BOARD = Board(width=_BOARD_SIZE, height=_BOARD_SIZE, n_in_row=5, players=[_BLACK, _WHITE])
-        self.PLAYER = _BLACK
         self.TURN = self.PLAYER
         self.HISTORY = (0, 0)
         self.WINNER = _BLANK
@@ -167,6 +169,7 @@ with st.sidebar.container():
     ANOTHER_ROUND = st.empty()
     RESTART = st.empty()
     GIVEIN = st.empty()
     AIAID = st.empty()
     EXIT = st.empty()
 selected_aid_option = AIAID.selectbox('Select Assistant Model', ['None', 'Pure MCTS', 'AlphaZero'], index=0,
@@ -237,6 +240,38 @@ def givein() -> None:
     session_state.ROOM.human_simula_time_list = []
     session_state.ROOM.COORDINATE_1D = []
 RESTART.button(
     "Reset",
@@ -250,6 +285,12 @@ GIVEIN.button(
     help="Give in to AI",
 )
 # Draw the board
 def gomoku():
@@ -428,7 +469,7 @@ def gomoku():
                     ):
                         if i == cur_move[0] and j == cur_move[1]:
                             BOARD_PLATE[i][j].button(
-                                _PLAYER_SYMBOL[_NEW],
                                 key=f"{i}:{j}",
                                 args=(i, j),
                                 on_click=forbid_click,
@@ -436,7 +477,7 @@ def gomoku():
                         else:
                             # disable click for GPT choices
                             BOARD_PLATE[i][j].button(
-                                _PLAYER_SYMBOL[cell],
                                 key=f"{i}:{j}",
                                 args=(i, j),
                                 on_click=forbid_click
@@ -446,7 +487,7 @@ def gomoku():
                             # enable click for other cells available for human choices
                             prob = top_five_probs[top_five_acts.index(i * _BOARD_SIZE + j)]
                             BOARD_PLATE[i][j].button(
-                                _PLAYER_SYMBOL[cell] + f"({round(prob, 2)})",
                                 key=f"{i}:{j}",
                                 on_click=handle_click,
                                 args=(i, j),
@@ -454,7 +495,7 @@ def gomoku():
                         else:
                             # enable click for other cells available for human choices
                             BOARD_PLATE[i][j].button(
-                                _PLAYER_SYMBOL[cell],
                                 key=f"{i}:{j}",
                                 on_click=handle_click,
                                 args=(i, j),
@@ -538,7 +579,7 @@ def gomoku():
                         ):
                             if i == gpt_i and j == gpt_j:
                                 BOARD_PLATE[i][j].button(
-                                    _PLAYER_SYMBOL[_NEW],
                                     key=f"{i}:{j}",
                                     args=(i, j),
                                     on_click=handle_click,
@@ -546,7 +587,7 @@ def gomoku():
                             else:
                                 # disable click for GPT choices
                                 BOARD_PLATE[i][j].button(
-                                    _PLAYER_SYMBOL[cell],
                                     key=f"{i}:{j}",
                                     args=(i, j),
                                     on_click=forbid_click
@@ -557,7 +598,7 @@ def gomoku():
                                 # enable click for other cells available for human choices
                                 prob = top_five_probs[top_five_acts.index(i * _BOARD_SIZE + j)]
                                 BOARD_PLATE[i][j].button(
-                                    _PLAYER_SYMBOL[cell] + f"({round(prob, 2)})",
                                     key=f"{i}:{j}",
                                     on_click=handle_click,
                                     args=(i, j),
@@ -565,7 +606,7 @@ def gomoku():
                             else:
                                 # enable click for other cells available for human choices
                                 BOARD_PLATE[i][j].button(
-                                    _PLAYER_SYMBOL[cell],
                                     key=f"{i}:{j}",
                                     on_click=handle_click,
                                     args=(i, j),
@@ -599,7 +640,7 @@ def gomoku():
             for i, row in enumerate(session_state.ROOM.BOARD.board_map):
                 for j, cell in enumerate(row):
                     BOARD_PLATE[i][j].write(
-                        _PLAYER_SYMBOL[cell],
                         # key=f"{i}:{j}",
                     )

 import time
 import pandas as pd
 from copy import deepcopy
 import numpy as np
 import streamlit as st
 from scipy.signal import convolve  # this is used to check if any player wins
 from streamlit import session_state
 from streamlit_server_state import server_state, server_state_lock
 from Gomoku_MCTS import MCTSpure, alphazero, Board, PolicyValueNet_old, PolicyValueNet_new, duel_PolicyValueNet, \
     Gumbel_MCTSPlayer
     _WHITE,  # 2 , for AI
     _BLANK,
     _PLAYER_COLOR,
+    _PLAYER_SYMBOL1,
+    _PLAYER_SYMBOL2,
     _ROOM_COLOR,
     _VERTICAL,
     _NEW,
     _DIAGONAL_UP_LEFT,
     _DIAGONAL_UP_RIGHT,
     _BOARD_SIZE,
     _MODEL_PATH
 )
+_PLAYER_SYMBOL = [0, _PLAYER_SYMBOL1, _PLAYER_SYMBOL2]
 # '''
 # from ai import (
 #     BOS_TOKEN_ID,
 # '''
+if "FirstPlayer" not in session_state:
+    session_state.FirstPlayer = _BLACK
+    session_state.Player = [[], [ _BLACK,_WHITE], [_WHITE,_BLACK]][session_state.FirstPlayer]
+    session_state.Symbol = _PLAYER_SYMBOL[session_state.FirstPlayer]
 # Utils
 class Room:
     def __init__(self, room_id) -> None:
         self.ROOM_ID = room_id
         # self.BOARD = np.zeros(shape=(_BOARD_SIZE, _BOARD_SIZE), dtype=int)
+        self.BOARD = Board(width=_BOARD_SIZE, height=_BOARD_SIZE, n_in_row=5, players=session_state.Player)
+        self.PLAYER = session_state.FirstPlayer
         self.TURN = self.PLAYER
         self.HISTORY = (0, 0)
         self.WINNER = _BLANK
     ANOTHER_ROUND = st.empty()
     RESTART = st.empty()
     GIVEIN = st.empty()
+    CHANGE_PLAYER = st.empty()
     AIAID = st.empty()
     EXIT = st.empty()
 selected_aid_option = AIAID.selectbox('Select Assistant Model', ['None', 'Pure MCTS', 'AlphaZero'], index=0,
     session_state.ROOM.human_simula_time_list = []
     session_state.ROOM.COORDINATE_1D = []
+def swap_players() -> None:
+    session_state.update(
+        FirstPlayer=change_turn(session_state.FirstPlayer),
+    )
+    session_state.update(
+        Player=[[], [_BLACK, _WHITE], [_WHITE, _BLACK]][session_state.FirstPlayer],
+        Symbol=_PLAYER_SYMBOL[session_state.FirstPlayer]
+    )
+    session_state.ROOM.BOARD = Board(width=_BOARD_SIZE, height=_BOARD_SIZE, n_in_row=5, players=session_state.Player)
+    session_state.ROOM.PLAYER = session_state.FirstPlayer
+    session_state.ROOM.gomoku_bot_board = Gomoku_bot_board(_BOARD_SIZE, 1)
+    session_state.ROOM.MCTS_dict = {'Pure MCTS': MCTSpure(c_puct=5, n_playout=1000),
+                                    'AlphaZero': alphazero(PolicyValueNet_new(_BOARD_SIZE, _BOARD_SIZE,
+                                                                              _MODEL_PATH["AlphaZero"]).policy_value_fn,
+                                                           c_puct=5, n_playout=100),
+                                    'duel': alphazero(duel_PolicyValueNet(_BOARD_SIZE, _BOARD_SIZE,
+                                                                          _MODEL_PATH["duel"]).policy_value_fn,
+                                                      c_puct=5, n_playout=100),
+                                    'Gumbel AlphaZero': Gumbel_MCTSPlayer(PolicyValueNet_new(_BOARD_SIZE, _BOARD_SIZE,
+                                                                                             _MODEL_PATH[
+                                                                                                 "Gumbel AlphaZero"]).policy_value_fn,
+                                                                          c_puct=5, n_playout=100, m_action=8),
+                                    'Gomoku Bot': Gomoku_bot(session_state.ROOM.gomoku_bot_board, -1)}
+    session_state.ROOM.MCTS = session_state.ROOM.MCTS_dict[st.session_state['selected_oppo_model']]
+    session_state.ROOM.last_mcts = session_state.ROOM.MCTS
+    session_state.ROOM.PLAYER = session_state.ROOM.PLAYER
+    session_state.ROOM.TURN = session_state.ROOM.PLAYER
+    session_state.ROOM.WINNER = _BLANK  # 0
+    session_state.ROOM.ai_simula_time_list = []
+    session_state.ROOM.human_simula_time_list = []
+    session_state.ROOM.COORDINATE_1D = []
 RESTART.button(
     "Reset",
     help="Give in to AI",
 )
+CHANGE_PLAYER.button(
+    "Swap players",
+    on_click=swap_players,
+    help="Swap players",
+)
 # Draw the board
 def gomoku():
                     ):
                         if i == cur_move[0] and j == cur_move[1]:
                             BOARD_PLATE[i][j].button(
+                                session_state.Symbol[_NEW],
                                 key=f"{i}:{j}",
                                 args=(i, j),
                                 on_click=forbid_click,
                         else:
                             # disable click for GPT choices
                             BOARD_PLATE[i][j].button(
+                                session_state.Symbol[cell],
                                 key=f"{i}:{j}",
                                 args=(i, j),
                                 on_click=forbid_click
                             # enable click for other cells available for human choices
                             prob = top_five_probs[top_five_acts.index(i * _BOARD_SIZE + j)]
                             BOARD_PLATE[i][j].button(
+                                session_state.Symbol[cell] + f"{round(prob, 2)}",
                                 key=f"{i}:{j}",
                                 on_click=handle_click,
                                 args=(i, j),
                         else:
                             # enable click for other cells available for human choices
                             BOARD_PLATE[i][j].button(
+                                session_state.Symbol[cell],
                                 key=f"{i}:{j}",
                                 on_click=handle_click,
                                 args=(i, j),
                         ):
                             if i == gpt_i and j == gpt_j:
                                 BOARD_PLATE[i][j].button(
+                                    session_state.Symbol[_NEW],
                                     key=f"{i}:{j}",
                                     args=(i, j),
                                     on_click=handle_click,
                             else:
                                 # disable click for GPT choices
                                 BOARD_PLATE[i][j].button(
+                                    session_state.Symbol[cell],
                                     key=f"{i}:{j}",
                                     args=(i, j),
                                     on_click=forbid_click
                                 # enable click for other cells available for human choices
                                 prob = top_five_probs[top_five_acts.index(i * _BOARD_SIZE + j)]
                                 BOARD_PLATE[i][j].button(
+                                    session_state.Symbol[cell] + f"{round(prob, 2)}",
                                     key=f"{i}:{j}",
                                     on_click=handle_click,
                                     args=(i, j),
                             else:
                                 # enable click for other cells available for human choices
                                 BOARD_PLATE[i][j].button(
+                                    session_state.Symbol[cell],
                                     key=f"{i}:{j}",
                                     on_click=handle_click,
                                     args=(i, j),
             for i, row in enumerate(session_state.ROOM.BOARD.board_map):
                 for j, cell in enumerate(row):
                     BOARD_PLATE[i][j].write(
+                        session_state.Symbol[cell],
                         # key=f"{i}:{j}",
                     )

pages/Try.py ADDED Viewed

	@@ -0,0 +1,17 @@

+import streamlit as st
+# 示例数据
+data = {
+    "Player": ["Alice", "Bob", "Charlie"],
+    "Score": [100, 95, 90]
+}
+# 将数据转换为 Markdown 格式的字符串
+def create_leaderboard(data):
+    leaderboard = "### Leaderboard\n"
+    for i, (player, score) in enumerate(zip(data["Player"], data["Score"]), start=1):
+        leaderboard += f"{i}. **{player}**: {score} points\n"
+    return leaderboard
+# 在 Streamlit 应用中显示排行榜
+st.markdown(create_leaderboard(data))