import random import numpy as np import gradio as gr class TicTacToe: def __init__(self): self.board = [' '] * 9 self.current_player = 'X' def display_board(self): print("\n") for i in range(3): print(" | ".join(self.board[i * 3:(i + 1) * 3])) if i < 2: print("---------") print("\n") def make_move(self, position): self.board[position] = self.current_player def switch_player(self): self.current_player = 'O' if self.current_player == 'X' else 'X' def check_winner(self): winning_combinations = [ [0, 1, 2], [3, 4, 5], [6, 7, 8], # Rows [0, 3, 6], [1, 4, 7], [2, 5, 8], # Columns [0, 4, 8], [2, 4, 6] # Diagonals ] for combo in winning_combinations: if self.board[combo[0]] == self.board[combo[1]] == self.board[combo[2]] != ' ': return self.board[combo[0]] return None def is_draw(self): return ' ' not in self.board def reset_board(self): self.board = [' '] * 9 self.current_player = 'X' class MinimaxPlayer: def __init__(self, symbol): self.symbol = symbol def minimax(self, game, is_maximizing): winner = game.check_winner() if winner == self.symbol: return 1 elif winner == ('O' if self.symbol == 'X' else 'X'): return -1 elif game.is_draw(): return 0 if is_maximizing: best_score = -float('inf') for i in range(9): if game.board[i] == ' ': game.board[i] = self.symbol score = self.minimax(game, False) game.board[i] = ' ' best_score = max(score, best_score) return best_score else: best_score = float('inf') for i in range(9): if game.board[i] == ' ': game.board[i] = ('O' if self.symbol == 'X' else 'X') score = self.minimax(game, True) game.board[i] = ' ' best_score = min(score, best_score) return best_score def get_move(self, game): best_score = -float('inf') best_move = None for i in range(9): if game.board[i] == ' ': game.board[i] = self.symbol score = self.minimax(game, False) game.board[i] = ' ' if score > best_score: best_score = score best_move = i return best_move class QLearningPlayer: def __init__(self, symbol, learning_rate=0.1, discount_factor=0.9, exploration_rate=1.0): self.symbol = symbol self.q_table = {} self.learning_rate = learning_rate self.discount_factor = discount_factor self.exploration_rate = exploration_rate def get_state(self, game): return ''.join(game.board) def choose_action(self, game): state = self.get_state(game) if random.random() < self.exploration_rate: return random.choice([i for i in range(9) if game.board[i] == ' ']) if state not in self.q_table: self.q_table[state] = np.zeros(9) return np.argmax(self.q_table[state]) def update_q_table(self, state, action, reward, next_state): if state not in self.q_table: self.q_table[state] = np.zeros(9) if next_state not in self.q_table: self.q_table[next_state] = np.zeros(9) self.q_table[state][action] += self.learning_rate * ( reward + self.discount_factor * np.max(self.q_table[next_state]) - self.q_table[state][action] ) def train(self, episodes): for _ in range(episodes): game = TicTacToe() state = self.get_state(game) while True: action = self.choose_action(game) game.make_move(action) next_state = self.get_state(game) winner = game.check_winner() if winner == self.symbol: reward = 1 self.update_q_table(state, action, reward, next_state) break elif winner: reward = -1 self.update_q_table(state, action, reward, next_state) break elif game.is_draw(): reward = 0.5 self.update_q_table(state, action, reward, next_state) break else: reward = 0 self.update_q_table(state, action, reward, next_state) game.switch_player() state = next_state # Global game instance game = TicTacToe() game_mode = "human_vs_human" minimax_player = MinimaxPlayer('O') qlearning_player = QLearningPlayer('O') qlearning_player.train(1000) # Pre-train the Q-learning agent def render_board(): """Convert the game board to a grid of buttons""" board = [] for i in range(3): row = [] for j in range(3): idx = i * 3 + j value = game.board[idx] if game.board[idx] != ' ' else "" row.append(value) board.append(row) return board def check_game_end(): """Check if the game has ended""" return game.check_winner() or game.is_draw() def get_game_status(): """Get the current game status message""" winner = game.check_winner() if winner: return f"Player {winner} wins!" elif game.is_draw(): return "It's a draw!" else: return f"Current player: {game.current_player}" def reset_game(game_mode, difficulty): """Reset the game board""" game.reset_board() return render_board(), "Game reset! Current player: X" def make_move(evt: gr.SelectData, game_mode, difficulty): """Handle player moves and AI responses""" row, col = evt.index position = row * 3 + col # Human move if game.board[position] == ' ': game.make_move(position) if check_game_end(): return render_board(), get_game_status() # If in human_vs_human mode, switch player manually if game_mode == "human_vs_human": game.switch_player() # AI move (if playing against AI) elif game_mode != "human_vs_human": # Switch player before AI move game.switch_player() # Minimax or Q-Learning AI move if game_mode == "minimax": ai_move = minimax_player.get_move(game) else: # q_learning ai_move = qlearning_player.choose_action(game) game.make_move(ai_move) if check_game_end(): return render_board(), get_game_status() # Switch player after AI move game.switch_player() return render_board(), get_game_status() def create_gui(): """Create the Gradio interface""" with gr.Blocks() as interface: gr.Markdown("# Tic Tac Toe with AI") with gr.Row(): game_mode = gr.Radio( ["Human_vs_Human", "AI", "Q_learning"], label="Game Mode", value="human_vs_human" ) difficulty = gr.Radio( ["Easy", "Medium", "Hard"], label="AI Difficulty", value="medium" ) board = gr.DataFrame( render_board(), headers=[""] * 3, # Empty headers for 3 columns interactive=True, col_count=(3, "fixed"), row_count=(3, "fixed") ) status = gr.Textbox(value="Current player: X", label="Status") reset_btn = gr.Button("Reset Game") # Event handlers board.select( make_move, [game_mode, difficulty], [board, status] ) reset_btn.click( reset_game, [game_mode, difficulty], [board, status] ) return interface # Launch the interface if __name__ == "__main__": interface = create_gui() interface.launch()