import io import os import re import time import math import random from collections import defaultdict from datetime import datetime import numpy as np import pandas as pd import requests from tqdm.auto import tqdm from PIL import Image, ImageDraw, ImageFont import plotly.graph_objects as go from plotly.subplots import make_subplots import gradio as gr import gistyc # Try optional transformers try: from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM import torch TRANSFORMERS_AVAILABLE = True except Exception: TRANSFORMERS_AVAILABLE = False # Constants for the game BOARD_SIZE = 300 MARGIN = 50 CELL_SIZE = BOARD_SIZE // 3 class TicTacToe: def __init__(self): self.board = [[' ' for _ in range(3)] for _ in range(3)] self.current_player = 'X' self.winner = None self.game_over = False self.move_history = [] def make_move(self, row, col): if self.board[row][col] == ' ' and not self.game_over: self.board[row][col] = self.current_player self.move_history.append((row, col, self.current_player)) if self.check_winner(): self.winner = self.current_player self.game_over = True elif self.is_board_full(): self.game_over = True else: self.current_player = 'O' if self.current_player == 'X' else 'X' return True return False def check_winner(self): # Check rows for row in self.board: if row[0] == row[1] == row[2] != ' ': return True # Check columns for col in range(3): if self.board[0][col] == self.board[1][col] == self.board[2][col] != ' ': return True # Check diagonals if self.board[0][0] == self.board[1][1] == self.board[2][2] != ' ': return True if self.board[0][2] == self.board[1][1] == self.board[2][0] != ' ': return True return False def is_board_full(self): for row in self.board: for cell in row: if cell == ' ': return False return True def get_legal_moves(self): moves = [] for i in range(3): for j in range(3): if self.board[i][j] == ' ': moves.append(f"{i+1},{j+1}") return moves def get_board_state_description(self): """Return a textual description of the current board state""" description = "Current board:\n" for i in range(3): row = [] for j in range(3): if self.board[i][j] == ' ': row.append(f"({i+1},{j+1})") else: row.append(self.board[i][j]) description += " | ".join(row) + "\n" description += f"\nYou are playing as {self.current_player}. " description += f"Legal moves: {', '.join(self.get_legal_moves())}" return description def create_board_image(board, highlight_move=None): """Create an image of the current board state""" img = Image.new('RGB', (BOARD_SIZE + 2*MARGIN, BOARD_SIZE + 2*MARGIN), 'white') draw = ImageDraw.Draw(img) # Draw grid lines for i in range(1, 3): # Vertical lines draw.line([(MARGIN + i*CELL_SIZE, MARGIN), (MARGIN + i*CELL_SIZE, MARGIN + BOARD_SIZE)], fill='black', width=3) # Horizontal lines draw.line([(MARGIN, MARGIN + i*CELL_SIZE), (MARGIN + BOARD_SIZE, MARGIN + i*CELL_SIZE)], fill='black', width=3) # Draw X and O try: font = ImageFont.truetype("arial.ttf", 40) except: font = ImageFont.load_default() for i in range(3): for j in range(3): x = MARGIN + j * CELL_SIZE + CELL_SIZE // 2 y = MARGIN + i * CELL_SIZE + CELL_SIZE // 2 if board[i][j] == 'X': draw.text((x-10, y-20), 'X', fill='blue', font=font) elif board[i][j] == 'O': draw.text((x-10, y-20), 'O', fill='red', font=font) # Highlight last move if highlight_move: row, col = highlight_move x1 = MARGIN + (col-1) * CELL_SIZE + 5 y1 = MARGIN + (row-1) * CELL_SIZE + 5 x2 = MARGIN + col * CELL_SIZE - 5 y2 = MARGIN + row * CELL_SIZE - 5 draw.rectangle([x1, y1, x2, y2], outline='green', width=3) return img def write_game_record(moves, model_id_x, model_id_o, result, time_budget, termination): """Write game record in PGN-like format""" current_utc_datetime = datetime.utcnow() utc_date = current_utc_datetime.strftime("%Y.%m.%d") utc_time = current_utc_datetime.strftime("%H:%M:%S") moves_str = " ".join([f"{i+1}.{move}" for i, move in enumerate(moves)]) final_record = f"""[Event 'Tic Tac Toe LLM Arena'] [Site 'HuggingFace Spaces'] [Date '{utc_date}'] [Time '{utc_time}'] [PlayerX '{model_id_x}'] [PlayerO '{model_id_o}'] [Result '{result}'] [TimeControl '{time_budget}+0'] [Termination '{termination}'] {moves_str} {result} """ return final_record def determine_termination(game, time_budget_x, time_budget_o): """Determine how the game ended""" if game.winner: return "Win" elif game.game_over: return "Draw" elif time_budget_x <= 0: return "Timeout - X lost on time" elif time_budget_o <= 0: return "Timeout - O lost on time" else: return "Unknown" def format_elapsed(seconds): """Format elapsed time""" hours, remainder = divmod(int(seconds), 3600) minutes, seconds = divmod(remainder, 60) if hours: return f"{hours:02d}:{minutes:02d}:{seconds:02d}" elif minutes: return f"{minutes:02d}:{seconds:02d}" else: return f"{seconds:02d}" def save_result_file(game_id, model_id_x, model_id_o, termination, result, auth_token, gist_id): """Save result to CSV file""" timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") data_str = f"{game_id},{timestamp},{model_id_x},{model_id_o},{termination},{result}\n" with open("tictactoe_results.csv", "a") as file: file.write(data_str) # Update Gist if token is provided if auth_token and gist_id: gist_api = gistyc.GISTyc(auth_token=auth_token) gist_api.update_gist(file_name="tictactoe_results.csv", gist_id=gist_id) def save_game_record(final_record, file_name, auth_token): """Save game record to Gist""" with open(file_name + ".txt", "w") as file: file.write(final_record) if auth_token: gist_api = gistyc.GISTyc(auth_token=auth_token) response_data = gist_api.create_gist(file_name=file_name + ".txt") return response_data["id"] return "local" def calculate_elo(rank1, rank2, result): """Calculate new ELO rating""" K = 32 expected_score1 = 1 / (1 + 10 ** ((rank2 - rank1) / 400)) new_rank1 = rank1 + K * (result - expected_score1) return round(new_rank1) def update_elo_ratings(game_data): """Update ELO ratings based on game results""" elo_ratings = defaultdict(lambda: 1000) for index, row in game_data.iterrows(): if row["Result"] == "*": continue model1 = row["ModelX"] model2 = row["ModelO"] result = row["Result"] model1_elo = elo_ratings[model1] model2_elo = elo_ratings[model2] if result == "1-0": # ModelX wins elo_ratings[model1] = calculate_elo(model1_elo, model2_elo, 1) elo_ratings[model2] = calculate_elo(model2_elo, model1_elo, 0) elif result == "0-1": # ModelO wins elo_ratings[model1] = calculate_elo(model1_elo, model2_elo, 0) elo_ratings[model2] = calculate_elo(model2_elo, model1_elo, 1) elif result == "1/2-1/2": # Draw elo_ratings[model1] = calculate_elo(model1_elo, model2_elo, 0.5) elo_ratings[model2] = calculate_elo(model2_elo, model1_elo, 0.5) return elo_ratings def get_leaderboard(): """Get leaderboard data""" try: return pd.read_csv("tictactoe_leaderboard.csv") except: # Create initial leaderboard if doesn't exist df = pd.DataFrame(columns=["Model", "ELO Rating", "Games", "Wins", "Losses", "Draws"]) df.to_csv("tictactoe_leaderboard.csv", index=False) return df def load_model_and_tokenizer(model_id): """Load model and tokenizer using transformers""" if not TRANSFORMERS_AVAILABLE: raise ImportError("Transformers library is not available.") tokenizer = AutoTokenizer.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id) # If the model doesn't have a pad token, set it to eos token if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token return model, tokenizer def generate_move_with_context(model, tokenizer, game, max_length=50): """Generate a move using the model with full game context""" # Create a detailed prompt with board state and strategy hints board_description = game.get_board_state_description() prompt = f"""You are playing Tic Tac Toe as {game.current_player}. {board_description} Strategy considerations: - Try to win by getting three in a row - Block your opponent if they are about to win - The center (2,2) is a strong position - Corners (1,1), (1,3), (3,1), (3,3) are good positions - Try to create multiple threats at once Make your move by responding with only the coordinates in the format: row,col For example: 2,2 Your move: """ # Encode the prompt inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512) # Generate text with the model with torch.no_grad(): outputs = model.generate( inputs.input_ids, max_new_tokens=max_length, num_return_sequences=1, pad_token_id=tokenizer.pad_token_id, do_sample=True, temperature=0.8, # Higher temperature for more variety top_p=0.9, top_k=50, repetition_penalty=1.1, ) # Decode the generated text generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True) # Extract the generated part (after the prompt) generated_part = generated_text[len(prompt):].strip() # Try to find a move in the format "row,col" move_pattern = r'(\d)\s*[,.\-:;]?\s*(\d)' matches = re.findall(move_pattern, generated_part) # Try to find a valid move legal_moves = game.get_legal_moves() # First, check if any of the found matches are legal moves for match in matches: if len(match) == 2: move_str = f"{match[0]},{match[1]}" if move_str in legal_moves: return move_str # If no valid move found in the matches, try to extract from the text more liberally for move in legal_moves: if move in generated_part: return move # If still no valid move, try a different approach - look for numbers numbers = re.findall(r'\b[1-3]\b', generated_part) if len(numbers) >= 2: move_str = f"{numbers[0]},{numbers[1]}" if move_str in legal_moves: return move_str # Final fallback: choose a random legal move if legal_moves: return random.choice(legal_moves) else: return "2,2" # Should never happen if game isn't over def play_game(model_id_x, model_id_o): """Main game function""" if not TRANSFORMERS_AVAILABLE: gr.Error("Transformers library is not available. Please install it to use this feature.") return TIME_BUDGET = 120 # Increased time budget for more complex reasoning prompt = "Make your move (format: row,col where row and col are 1,2,3):" # Initialize game game = TicTacToe() # Load models try: model_x, tokenizer_x = load_model_and_tokenizer(model_id_x) model_o, tokenizer_o = load_model_and_tokenizer(model_id_o) except Exception as e: gr.Error(f"Error loading models: {e}") return moves = [] game_images = [] time_budget_x = TIME_BUDGET time_budget_o = TIME_BUDGET last_move = None # Create initial board image image = create_board_image(game.board) game_images.append(np.array(image)) yield image # Progress bars x_bar = tqdm(total=time_budget_x, desc=f"{model_id_x.split('/')[-1]}:", bar_format="{desc} {n:.0f}s left | Elapsed: {elapsed}") o_bar = tqdm(total=time_budget_o, desc=f"{model_id_o.split('/')[-1]}:", bar_format="{desc} {n:.0f}s left | Elapsed: {elapsed}") # Game loop max_moves = 9 # Maximum possible moves in tic tac toe move_count = 0 while not game.game_over and move_count < max_moves: current_model = model_x if game.current_player == 'X' else model_o current_tokenizer = tokenizer_x if game.current_player == 'X' else tokenizer_o current_time_budget = time_budget_x if game.current_player == 'X' else time_budget_o # Generate move start_time = time.time() try: move_str = generate_move_with_context(current_model, current_tokenizer, game) except Exception as e: print(f"Error generating move: {e}") # Fallback to random move legal_moves = game.get_legal_moves() if legal_moves: move_str = random.choice(legal_moves) else: break end_time = time.time() move_duration = end_time - start_time # Parse move try: row, col = map(int, move_str.split(',')) row -= 1 col -= 1 if 0 <= row <= 2 and 0 <= col <= 2 and game.board[row][col] == ' ': game.make_move(row, col) moves.append(move_str) last_move = (row+1, col+1) move_count += 1 # Update time budget if game.current_player == 'O': # X just moved time_budget_x -= move_duration x_bar.n = max(0, time_budget_x) x_bar.refresh() if time_budget_x <= 0: game.winner = 'O' game.game_over = True else: # O just moved time_budget_o -= move_duration o_bar.n = max(0, time_budget_o) o_bar.refresh() if time_budget_o <= 0: game.winner = 'X' game.game_over = True # Create new board image image = create_board_image(game.board, last_move) game_images.append(np.array(image)) yield image else: print(f"Illegal move: {move_str}. Choosing random move.") # Fallback to random legal move legal_moves = game.get_legal_moves() if legal_moves: move_str = random.choice(legal_moves) # Retry with the random move row, col = map(int, move_str.split(',')) row -= 1 col -= 1 if game.make_move(row, col): moves.append(move_str) last_move = (row+1, col+1) move_count += 1 image = create_board_image(game.board, last_move) game_images.append(np.array(image)) yield image else: break except ValueError: print(f"Invalid move format: {move_str}. Choosing random move.") # Fallback to random legal move legal_moves = game.get_legal_moves() if legal_moves: move_str = random.choice(legal_moves) # Retry with the random move try: row, col = map(int, move_str.split(',')) row -= 1 col -= 1 if game.make_move(row, col): moves.append(move_str) last_move = (row+1, col+1) move_count += 1 image = create_board_image(game.board, last_move) game_images.append(np.array(image)) yield image except: break x_bar.close() o_bar.close() # Determine result if game.winner == 'X': result = "1-0" elif game.winner == 'O': result = "0-1" else: result = "1/2-1/2" # Save game record termination = determine_termination(game, time_budget_x, time_budget_o) final_record = write_game_record(moves, model_id_x, model_id_o, result, TIME_BUDGET, termination) file_name = f"{model_id_x.split('/')[-1]}_vs_{model_id_o.split('/')[-1]}" game_id = save_game_record(final_record, file_name, os.environ.get("GITHUB_TOKEN")) # Save results save_result_file(game_id, model_id_x, model_id_o, termination, result, os.environ.get("GITHUB_TOKEN"), os.environ.get("RESULT_GIST_ID")) # Update leaderboard try: game_data = pd.read_csv('tictactoe_results.csv') elo_ratings = update_elo_ratings(game_data) elo_ratings_df = pd.DataFrame(elo_ratings.items(), columns=['Model', 'ELO Rating']) elo_ratings_df['ELO Rating'] = elo_ratings_df['ELO Rating'].round().astype(int) elo_ratings_df.sort_values(by='ELO Rating', ascending=False, inplace=True) elo_ratings_df.reset_index(drop=True, inplace=True) elo_ratings_df.to_csv('tictactoe_leaderboard.csv', index=False) # Upload to Gist if token available if os.environ.get("GITHUB_TOKEN") and os.environ.get("LEADERBOARD_GIST_ID"): gist_api = gistyc.GISTyc(auth_token=os.environ.get("GITHUB_TOKEN")) gist_api.update_gist(file_name='tictactoe_leaderboard.csv', gist_id=os.environ.get("LEADERBOARD_GIST_ID")) except Exception as e: print(f"Error updating leaderboard: {e}") # Show result if game.winner: winner_model = model_id_x if game.winner == 'X' else model_id_o result_text = f"{winner_model} wins! ({termination})" else: result_text = f"Draw! ({termination})" gr.Info(result_text) print(result_text) # Return final image yield image # Initialize environment os.environ["TOKENIZERS_PARALLELISM"] = "false" # Create initial files if they don't exist for file in ["tictactoe_results.csv", "tictactoe_leaderboard.csv"]: if not os.path.exists(file): if file == "tictactoe_results.csv": pd.DataFrame(columns=["GameID", "Timestamp", "ModelX", "ModelO", "Termination", "Result"]).to_csv(file, index=False) else: pd.DataFrame(columns=["Model", "ELO Rating"]).to_csv(file, index=False) # Create Gradio interface title = """
⭕ Tic Tac Toe LLM Arena ❌
🤖 Make two LLMs play Tic Tac Toe against each other
Enter the HuggingFace model IDs for two language models and watch them play Tic Tac Toe with adaptive strategies!
LLMs now receive full board context and strategic guidance, making games more varied and interesting.
""" with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown(title) with gr.Row(): with gr.Column(): model_x = gr.Textbox( label="❌ Player X Model ID", value="Mattimax/DACMini-IT", placeholder="Enter HuggingFace model ID for Player X" ) model_o = gr.Textbox( label="⭕ Player O Model ID", value="Mattimax/DACMini", placeholder="Enter HuggingFace model ID for Player O" ) fight_btn = gr.Button("Start Game! 🎮", variant="primary") with gr.Column(): game_display = gr.Image( value=create_board_image([[' ' for _ in range(3)] for _ in range(3)]), label="Game Board", height=400, width=400 ) gr.Markdown('🏆 Leaderboard