import random
import numpy as np
import gradio as gr

class TicTacToe:
    def __init__(self):
        self.board = [' '] * 9
        self.current_player = 'X'

    def display_board(self):
        print("\n")
        for i in range(3):
            print(" | ".join(self.board[i * 3:(i + 1) * 3]))
            if i < 2:
                print("---------")
        print("\n")

    def make_move(self, position):
        self.board[position] = self.current_player

    def switch_player(self):
        self.current_player = 'O' if self.current_player == 'X' else 'X'

    def check_winner(self):
        winning_combinations = [
            [0, 1, 2], [3, 4, 5], [6, 7, 8],  # Rows
            [0, 3, 6], [1, 4, 7], [2, 5, 8],  # Columns
            [0, 4, 8], [2, 4, 6]              # Diagonals
        ]
        for combo in winning_combinations:
            if self.board[combo[0]] == self.board[combo[1]] == self.board[combo[2]] != ' ':
                return self.board[combo[0]]
        return None

    def is_draw(self):
        return ' ' not in self.board

    def reset_board(self):
        self.board = [' '] * 9
        self.current_player = 'X'


class MinimaxPlayer:
    def __init__(self, symbol):
        self.symbol = symbol

    def minimax(self, game, is_maximizing):
        winner = game.check_winner()
        if winner == self.symbol:
            return 1
        elif winner == ('O' if self.symbol == 'X' else 'X'):
            return -1
        elif game.is_draw():
            return 0

        if is_maximizing:
            best_score = -float('inf')
            for i in range(9):
                if game.board[i] == ' ':
                    game.board[i] = self.symbol
                    score = self.minimax(game, False)
                    game.board[i] = ' '
                    best_score = max(score, best_score)
            return best_score
        else:
            best_score = float('inf')
            for i in range(9):
                if game.board[i] == ' ':
                    game.board[i] = ('O' if self.symbol == 'X' else 'X')
                    score = self.minimax(game, True)
                    game.board[i] = ' '
                    best_score = min(score, best_score)
            return best_score

    def get_move(self, game):
        best_score = -float('inf')
        best_move = None
        for i in range(9):
            if game.board[i] == ' ':
                game.board[i] = self.symbol
                score = self.minimax(game, False)
                game.board[i] = ' '
                if score > best_score:
                    best_score = score
                    best_move = i
        return best_move


class QLearningPlayer:
    def __init__(self, symbol, learning_rate=0.1, discount_factor=0.9, exploration_rate=1.0):
        self.symbol = symbol
        self.q_table = {}
        self.learning_rate = learning_rate
        self.discount_factor = discount_factor
        self.exploration_rate = exploration_rate

    def get_state(self, game):
        return ''.join(game.board)

    def choose_action(self, game):
        state = self.get_state(game)
        if random.random() < self.exploration_rate:
            return random.choice([i for i in range(9) if game.board[i] == ' '])
        if state not in self.q_table:
            self.q_table[state] = np.zeros(9)
        return np.argmax(self.q_table[state])

    def update_q_table(self, state, action, reward, next_state):
        if state not in self.q_table:
            self.q_table[state] = np.zeros(9)
        if next_state not in self.q_table:
            self.q_table[next_state] = np.zeros(9)
        self.q_table[state][action] += self.learning_rate * (
            reward + self.discount_factor * np.max(self.q_table[next_state]) - self.q_table[state][action]
        )

    def train(self, episodes):
        for _ in range(episodes):
            game = TicTacToe()
            state = self.get_state(game)
            while True:
                action = self.choose_action(game)
                game.make_move(action)
                next_state = self.get_state(game)
                winner = game.check_winner()
                if winner == self.symbol:
                    reward = 1
                    self.update_q_table(state, action, reward, next_state)
                    break
                elif winner:
                    reward = -1
                    self.update_q_table(state, action, reward, next_state)
                    break
                elif game.is_draw():
                    reward = 0.5
                    self.update_q_table(state, action, reward, next_state)
                    break
                else:
                    reward = 0
                    self.update_q_table(state, action, reward, next_state)
                    game.switch_player()
                    state = next_state


# Global game instance
game = TicTacToe()
game_mode = "human_vs_human"
minimax_player = MinimaxPlayer('O')
qlearning_player = QLearningPlayer('O')
qlearning_player.train(1000)  # Pre-train the Q-learning agent


def render_board():
    """Convert the game board to a grid of buttons"""
    board = []
    for i in range(3):
        row = []
        for j in range(3):
            idx = i * 3 + j
            value = game.board[idx] if game.board[idx] != ' ' else ""
            row.append(value)
        board.append(row)
    return board


def check_game_end():
    """Check if the game has ended"""
    return game.check_winner() or game.is_draw()


def get_game_status():
    """Get the current game status message"""
    winner = game.check_winner()
    if winner:
        return f"Player {winner} wins!"
    elif game.is_draw():
        return "It's a draw!"
    else:
        return f"Current player: {game.current_player}"


def reset_game(game_mode, difficulty):
    """Reset the game board"""
    game.reset_board()
    return render_board(), "Game reset! Current player: X"


def make_move(evt: gr.SelectData, game_mode, difficulty):
    """Handle player moves and AI responses"""
    row, col = evt.index
    position = row * 3 + col

    # Human move
    if game.board[position] == ' ':
        game.make_move(position)
        if check_game_end():
            return render_board(), get_game_status()

        # If in human_vs_human mode, switch player manually
        if game_mode == "human_vs_human":
            game.switch_player()

        # AI move (if playing against AI)
        elif game_mode != "human_vs_human":
            # Switch player before AI move
            game.switch_player()

            # Minimax or Q-Learning AI move
            if game_mode == "minimax":
                ai_move = minimax_player.get_move(game)
            else:  # q_learning
                ai_move = qlearning_player.choose_action(game)

            game.make_move(ai_move)
            if check_game_end():
                return render_board(), get_game_status()

            # Switch player after AI move
            game.switch_player()

    return render_board(), get_game_status()


def create_gui():
    """Create the Gradio interface"""
    with gr.Blocks() as interface:
        gr.Markdown("# Tic Tac Toe with AI")

        with gr.Row():
            game_mode = gr.Radio(
                ["Human_vs_Human", "AI", "Q_learning"],
                label="Game Mode",
                value="human_vs_human"
            )
            difficulty = gr.Radio(
                ["Easy", "Medium", "Hard"],
                label="AI Difficulty",
                value="medium"
            )

        board = gr.DataFrame(
            render_board(),
            headers=[""] * 3,  # Empty headers for 3 columns
            interactive=True,
            col_count=(3, "fixed"),
            row_count=(3, "fixed")
        )

        status = gr.Textbox(value="Current player: X", label="Status")
        reset_btn = gr.Button("Reset Game")

        # Event handlers
        board.select(
            make_move,
            [game_mode, difficulty],
            [board, status]
        )
        reset_btn.click(
            reset_game,
            [game_mode, difficulty],
            [board, status]
        )

    return interface


# Launch the interface
if __name__ == "__main__":
    interface = create_gui()
    interface.launch()