Spaces:

Mattimax
/

TrisLLM

Sleeping

File size: 21,409 Bytes

2c8c0d0

import io
import os
import re
import time
import math
import random
from collections import defaultdict
from datetime import datetime

import numpy as np
import pandas as pd
import requests
from tqdm.auto import tqdm
from PIL import Image, ImageDraw, ImageFont
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import gradio as gr
import gistyc

# Try optional transformers
try:
    from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM
    import torch
    TRANSFORMERS_AVAILABLE = True
except Exception:
    TRANSFORMERS_AVAILABLE = False

# Constants for the game
BOARD_SIZE = 300
MARGIN = 50
CELL_SIZE = BOARD_SIZE // 3

class TicTacToe:
    def __init__(self):
        self.board = [[' ' for _ in range(3)] for _ in range(3)]
        self.current_player = 'X'
        self.winner = None
        self.game_over = False
        self.move_history = []
        
    def make_move(self, row, col):
        if self.board[row][col] == ' ' and not self.game_over:
            self.board[row][col] = self.current_player
            self.move_history.append((row, col, self.current_player))
            
            if self.check_winner():
                self.winner = self.current_player
                self.game_over = True
            elif self.is_board_full():
                self.game_over = True
            else:
                self.current_player = 'O' if self.current_player == 'X' else 'X'
            return True
        return False
    
    def check_winner(self):
        # Check rows
        for row in self.board:
            if row[0] == row[1] == row[2] != ' ':
                return True
        
        # Check columns
        for col in range(3):
            if self.board[0][col] == self.board[1][col] == self.board[2][col] != ' ':
                return True
        
        # Check diagonals
        if self.board[0][0] == self.board[1][1] == self.board[2][2] != ' ':
            return True
        if self.board[0][2] == self.board[1][1] == self.board[2][0] != ' ':
            return True
        
        return False
    
    def is_board_full(self):
        for row in self.board:
            for cell in row:
                if cell == ' ':
                    return False
        return True
    
    def get_legal_moves(self):
        moves = []
        for i in range(3):
            for j in range(3):
                if self.board[i][j] == ' ':
                    moves.append(f"{i+1},{j+1}")
        return moves
    
    def get_board_state_description(self):
        """Return a textual description of the current board state"""
        description = "Current board:\n"
        for i in range(3):
            row = []
            for j in range(3):
                if self.board[i][j] == ' ':
                    row.append(f"({i+1},{j+1})")
                else:
                    row.append(self.board[i][j])
            description += " | ".join(row) + "\n"
        
        description += f"\nYou are playing as {self.current_player}. "
        description += f"Legal moves: {', '.join(self.get_legal_moves())}"
        return description

def create_board_image(board, highlight_move=None):
    """Create an image of the current board state"""
    img = Image.new('RGB', (BOARD_SIZE + 2*MARGIN, BOARD_SIZE + 2*MARGIN), 'white')
    draw = ImageDraw.Draw(img)
    
    # Draw grid lines
    for i in range(1, 3):
        # Vertical lines
        draw.line([(MARGIN + i*CELL_SIZE, MARGIN), 
                  (MARGIN + i*CELL_SIZE, MARGIN + BOARD_SIZE)], fill='black', width=3)
        # Horizontal lines
        draw.line([(MARGIN, MARGIN + i*CELL_SIZE), 
                  (MARGIN + BOARD_SIZE, MARGIN + i*CELL_SIZE)], fill='black', width=3)
    
    # Draw X and O
    try:
        font = ImageFont.truetype("arial.ttf", 40)
    except:
        font = ImageFont.load_default()
    
    for i in range(3):
        for j in range(3):
            x = MARGIN + j * CELL_SIZE + CELL_SIZE // 2
            y = MARGIN + i * CELL_SIZE + CELL_SIZE // 2
            
            if board[i][j] == 'X':
                draw.text((x-10, y-20), 'X', fill='blue', font=font)
            elif board[i][j] == 'O':
                draw.text((x-10, y-20), 'O', fill='red', font=font)
    
    # Highlight last move
    if highlight_move:
        row, col = highlight_move
        x1 = MARGIN + (col-1) * CELL_SIZE + 5
        y1 = MARGIN + (row-1) * CELL_SIZE + 5
        x2 = MARGIN + col * CELL_SIZE - 5
        y2 = MARGIN + row * CELL_SIZE - 5
        draw.rectangle([x1, y1, x2, y2], outline='green', width=3)
    
    return img

def write_game_record(moves, model_id_x, model_id_o, result, time_budget, termination):
    """Write game record in PGN-like format"""
    current_utc_datetime = datetime.utcnow()
    utc_date = current_utc_datetime.strftime("%Y.%m.%d")
    utc_time = current_utc_datetime.strftime("%H:%M:%S")
    
    moves_str = " ".join([f"{i+1}.{move}" for i, move in enumerate(moves)])
    
    final_record = f"""[Event 'Tic Tac Toe LLM Arena']
[Site 'HuggingFace Spaces']
[Date '{utc_date}']
[Time '{utc_time}']
[PlayerX '{model_id_x}']
[PlayerO '{model_id_o}']
[Result '{result}']
[TimeControl '{time_budget}+0']
[Termination '{termination}']

{moves_str} {result}
"""
    return final_record

def determine_termination(game, time_budget_x, time_budget_o):
    """Determine how the game ended"""
    if game.winner:
        return "Win"
    elif game.game_over:
        return "Draw"
    elif time_budget_x <= 0:
        return "Timeout - X lost on time"
    elif time_budget_o <= 0:
        return "Timeout - O lost on time"
    else:
        return "Unknown"

def format_elapsed(seconds):
    """Format elapsed time"""
    hours, remainder = divmod(int(seconds), 3600)
    minutes, seconds = divmod(remainder, 60)
    if hours:
        return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
    elif minutes:
        return f"{minutes:02d}:{seconds:02d}"
    else:
        return f"{seconds:02d}"

def save_result_file(game_id, model_id_x, model_id_o, termination, result, auth_token, gist_id):
    """Save result to CSV file"""
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    data_str = f"{game_id},{timestamp},{model_id_x},{model_id_o},{termination},{result}\n"
    
    with open("tictactoe_results.csv", "a") as file:
        file.write(data_str)
    
    # Update Gist if token is provided
    if auth_token and gist_id:
        gist_api = gistyc.GISTyc(auth_token=auth_token)
        gist_api.update_gist(file_name="tictactoe_results.csv", gist_id=gist_id)

def save_game_record(final_record, file_name, auth_token):
    """Save game record to Gist"""
    with open(file_name + ".txt", "w") as file:
        file.write(final_record)
    
    if auth_token:
        gist_api = gistyc.GISTyc(auth_token=auth_token)
        response_data = gist_api.create_gist(file_name=file_name + ".txt")
        return response_data["id"]
    return "local"

def calculate_elo(rank1, rank2, result):
    """Calculate new ELO rating"""
    K = 32
    expected_score1 = 1 / (1 + 10 ** ((rank2 - rank1) / 400))
    new_rank1 = rank1 + K * (result - expected_score1)
    return round(new_rank1)

def update_elo_ratings(game_data):
    """Update ELO ratings based on game results"""
    elo_ratings = defaultdict(lambda: 1000)
    
    for index, row in game_data.iterrows():
        if row["Result"] == "*":
            continue
            
        model1 = row["ModelX"]
        model2 = row["ModelO"]
        result = row["Result"]
        
        model1_elo = elo_ratings[model1]
        model2_elo = elo_ratings[model2]
        
        if result == "1-0":  # ModelX wins
            elo_ratings[model1] = calculate_elo(model1_elo, model2_elo, 1)
            elo_ratings[model2] = calculate_elo(model2_elo, model1_elo, 0)
        elif result == "0-1":  # ModelO wins
            elo_ratings[model1] = calculate_elo(model1_elo, model2_elo, 0)
            elo_ratings[model2] = calculate_elo(model2_elo, model1_elo, 1)
        elif result == "1/2-1/2":  # Draw
            elo_ratings[model1] = calculate_elo(model1_elo, model2_elo, 0.5)
            elo_ratings[model2] = calculate_elo(model2_elo, model1_elo, 0.5)
    
    return elo_ratings

def get_leaderboard():
    """Get leaderboard data"""
    try:
        return pd.read_csv("tictactoe_leaderboard.csv")
    except:
        # Create initial leaderboard if doesn't exist
        df = pd.DataFrame(columns=["Model", "ELO Rating", "Games", "Wins", "Losses", "Draws"])
        df.to_csv("tictactoe_leaderboard.csv", index=False)
        return df

def load_model_and_tokenizer(model_id):
    """Load model and tokenizer using transformers"""
    if not TRANSFORMERS_AVAILABLE:
        raise ImportError("Transformers library is not available.")
    
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    model = AutoModelForCausalLM.from_pretrained(model_id)
    
    # If the model doesn't have a pad token, set it to eos token
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
    
    return model, tokenizer

def generate_move_with_context(model, tokenizer, game, max_length=50):
    """Generate a move using the model with full game context"""
    # Create a detailed prompt with board state and strategy hints
    board_description = game.get_board_state_description()
    
    prompt = f"""You are playing Tic Tac Toe as {game.current_player}. 
{board_description}

Strategy considerations:
- Try to win by getting three in a row
- Block your opponent if they are about to win
- The center (2,2) is a strong position
- Corners (1,1), (1,3), (3,1), (3,3) are good positions
- Try to create multiple threats at once

Make your move by responding with only the coordinates in the format: row,col
For example: 2,2

Your move: """
    
    # Encode the prompt
    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=512)
    
    # Generate text with the model
    with torch.no_grad():
        outputs = model.generate(
            inputs.input_ids,
            max_new_tokens=max_length,
            num_return_sequences=1,
            pad_token_id=tokenizer.pad_token_id,
            do_sample=True,
            temperature=0.8,  # Higher temperature for more variety
            top_p=0.9,
            top_k=50,
            repetition_penalty=1.1,
        )
    
    # Decode the generated text
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # Extract the generated part (after the prompt)
    generated_part = generated_text[len(prompt):].strip()
    
    # Try to find a move in the format "row,col"
    move_pattern = r'(\d)\s*[,.\-:;]?\s*(\d)'
    matches = re.findall(move_pattern, generated_part)
    
    # Try to find a valid move
    legal_moves = game.get_legal_moves()
    
    # First, check if any of the found matches are legal moves
    for match in matches:
        if len(match) == 2:
            move_str = f"{match[0]},{match[1]}"
            if move_str in legal_moves:
                return move_str
    
    # If no valid move found in the matches, try to extract from the text more liberally
    for move in legal_moves:
        if move in generated_part:
            return move
    
    # If still no valid move, try a different approach - look for numbers
    numbers = re.findall(r'\b[1-3]\b', generated_part)
    if len(numbers) >= 2:
        move_str = f"{numbers[0]},{numbers[1]}"
        if move_str in legal_moves:
            return move_str
    
    # Final fallback: choose a random legal move
    if legal_moves:
        return random.choice(legal_moves)
    else:
        return "2,2"  # Should never happen if game isn't over

def play_game(model_id_x, model_id_o):
    """Main game function"""
    if not TRANSFORMERS_AVAILABLE:
        gr.Error("Transformers library is not available. Please install it to use this feature.")
        return
    
    TIME_BUDGET = 120  # Increased time budget for more complex reasoning
    prompt = "Make your move (format: row,col where row and col are 1,2,3):"
    
    # Initialize game
    game = TicTacToe()
    
    # Load models
    try:
        model_x, tokenizer_x = load_model_and_tokenizer(model_id_x)
        model_o, tokenizer_o = load_model_and_tokenizer(model_id_o)
    except Exception as e:
        gr.Error(f"Error loading models: {e}")
        return
    
    moves = []
    game_images = []
    time_budget_x = TIME_BUDGET
    time_budget_o = TIME_BUDGET
    last_move = None
    
    # Create initial board image
    image = create_board_image(game.board)
    game_images.append(np.array(image))
    yield image
    
    # Progress bars
    x_bar = tqdm(total=time_budget_x, desc=f"{model_id_x.split('/')[-1]}:", 
                bar_format="{desc} {n:.0f}s left | Elapsed: {elapsed}")
    o_bar = tqdm(total=time_budget_o, desc=f"{model_id_o.split('/')[-1]}:", 
                bar_format="{desc} {n:.0f}s left | Elapsed: {elapsed}")
    
    # Game loop
    max_moves = 9  # Maximum possible moves in tic tac toe
    move_count = 0
    
    while not game.game_over and move_count < max_moves:
        current_model = model_x if game.current_player == 'X' else model_o
        current_tokenizer = tokenizer_x if game.current_player == 'X' else tokenizer_o
        current_time_budget = time_budget_x if game.current_player == 'X' else time_budget_o
        
        # Generate move
        start_time = time.time()
        try:
            move_str = generate_move_with_context(current_model, current_tokenizer, game)
        except Exception as e:
            print(f"Error generating move: {e}")
            # Fallback to random move
            legal_moves = game.get_legal_moves()
            if legal_moves:
                move_str = random.choice(legal_moves)
            else:
                break
        
        end_time = time.time()
        move_duration = end_time - start_time
        
        # Parse move
        try:
            row, col = map(int, move_str.split(','))
            row -= 1
            col -= 1
            
            if 0 <= row <= 2 and 0 <= col <= 2 and game.board[row][col] == ' ':
                game.make_move(row, col)
                moves.append(move_str)
                last_move = (row+1, col+1)
                move_count += 1
                
                # Update time budget
                if game.current_player == 'O':  # X just moved
                    time_budget_x -= move_duration
                    x_bar.n = max(0, time_budget_x)
                    x_bar.refresh()
                    if time_budget_x <= 0:
                        game.winner = 'O'
                        game.game_over = True
                else:  # O just moved
                    time_budget_o -= move_duration
                    o_bar.n = max(0, time_budget_o)
                    o_bar.refresh()
                    if time_budget_o <= 0:
                        game.winner = 'X'
                        game.game_over = True
                
                # Create new board image
                image = create_board_image(game.board, last_move)
                game_images.append(np.array(image))
                yield image
                
            else:
                print(f"Illegal move: {move_str}. Choosing random move.")
                # Fallback to random legal move
                legal_moves = game.get_legal_moves()
                if legal_moves:
                    move_str = random.choice(legal_moves)
                    # Retry with the random move
                    row, col = map(int, move_str.split(','))
                    row -= 1
                    col -= 1
                    if game.make_move(row, col):
                        moves.append(move_str)
                        last_move = (row+1, col+1)
                        move_count += 1
                        image = create_board_image(game.board, last_move)
                        game_images.append(np.array(image))
                        yield image
                else:
                    break
                
        except ValueError:
            print(f"Invalid move format: {move_str}. Choosing random move.")
            # Fallback to random legal move
            legal_moves = game.get_legal_moves()
            if legal_moves:
                move_str = random.choice(legal_moves)
                # Retry with the random move
                try:
                    row, col = map(int, move_str.split(','))
                    row -= 1
                    col -= 1
                    if game.make_move(row, col):
                        moves.append(move_str)
                        last_move = (row+1, col+1)
                        move_count += 1
                        image = create_board_image(game.board, last_move)
                        game_images.append(np.array(image))
                        yield image
                except:
                    break
    
    x_bar.close()
    o_bar.close()
    
    # Determine result
    if game.winner == 'X':
        result = "1-0"
    elif game.winner == 'O':
        result = "0-1"
    else:
        result = "1/2-1/2"
    
    # Save game record
    termination = determine_termination(game, time_budget_x, time_budget_o)
    final_record = write_game_record(moves, model_id_x, model_id_o, result, TIME_BUDGET, termination)
    file_name = f"{model_id_x.split('/')[-1]}_vs_{model_id_o.split('/')[-1]}"
    game_id = save_game_record(final_record, file_name, os.environ.get("GITHUB_TOKEN"))
    
    # Save results
    save_result_file(game_id, model_id_x, model_id_o, termination, result, 
                    os.environ.get("GITHUB_TOKEN"), os.environ.get("RESULT_GIST_ID"))
    
    # Update leaderboard
    try:
        game_data = pd.read_csv('tictactoe_results.csv')
        elo_ratings = update_elo_ratings(game_data)
        
        elo_ratings_df = pd.DataFrame(elo_ratings.items(), columns=['Model', 'ELO Rating'])
        elo_ratings_df['ELO Rating'] = elo_ratings_df['ELO Rating'].round().astype(int)
        elo_ratings_df.sort_values(by='ELO Rating', ascending=False, inplace=True)
        elo_ratings_df.reset_index(drop=True, inplace=True)
        elo_ratings_df.to_csv('tictactoe_leaderboard.csv', index=False)
        
        # Upload to Gist if token available
        if os.environ.get("GITHUB_TOKEN") and os.environ.get("LEADERBOARD_GIST_ID"):
            gist_api = gistyc.GISTyc(auth_token=os.environ.get("GITHUB_TOKEN"))
            gist_api.update_gist(file_name='tictactoe_leaderboard.csv', 
                               gist_id=os.environ.get("LEADERBOARD_GIST_ID"))
    except Exception as e:
        print(f"Error updating leaderboard: {e}")
    
    # Show result
    if game.winner:
        winner_model = model_id_x if game.winner == 'X' else model_id_o
        result_text = f"{winner_model} wins! ({termination})"
    else:
        result_text = f"Draw! ({termination})"
    
    gr.Info(result_text)
    print(result_text)
    
    # Return final image
    yield image

# Initialize environment
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# Create initial files if they don't exist
for file in ["tictactoe_results.csv", "tictactoe_leaderboard.csv"]:
    if not os.path.exists(file):
        if file == "tictactoe_results.csv":
            pd.DataFrame(columns=["GameID", "Timestamp", "ModelX", "ModelO", "Termination", "Result"]).to_csv(file, index=False)
        else:
            pd.DataFrame(columns=["Model", "ELO Rating"]).to_csv(file, index=False)

# Create Gradio interface
title = """
<div align="center">
  <p style="font-size: 36px;">⭕ Tic Tac Toe LLM Arena ❌</p>
  <p style="font-size: 20px;">🤖 Make two LLMs play Tic Tac Toe against each other</p>
  <p><em>Enter the HuggingFace model IDs for two language models and watch them play Tic Tac Toe with adaptive strategies!</em></p>
</div>
"""

footer = """
<p><em>LLMs now receive full board context and strategic guidance, making games more varied and interesting.</em></p>
"""

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown(title)
    
    with gr.Row():
        with gr.Column():
            model_x = gr.Textbox(
                label="❌ Player X Model ID", 
                value="Mattimax/DACMini-IT",
                placeholder="Enter HuggingFace model ID for Player X"
            )
            model_o = gr.Textbox(
                label="⭕ Player O Model ID", 
                value="Mattimax/DACMini", 
                placeholder="Enter HuggingFace model ID for Player O"
            )
            fight_btn = gr.Button("Start Game! 🎮", variant="primary")
        
        with gr.Column():
            game_display = gr.Image(
                value=create_board_image([[' ' for _ in range(3)] for _ in range(3)]),
                label="Game Board",
                height=400,
                width=400
            )
    
    gr.Markdown('<div align="center"><p style="font-size: 30px;">🏆 Leaderboard</p></div>')
    leaderboard_display = gr.Dataframe(
        value=get_leaderboard,
        label="Model Rankings",
        every=30
    )
    
    gr.Markdown(footer)
    
    # Set up interactions
    fight_btn.click(
        fn=play_game,
        inputs=[model_x, model_o],
        outputs=game_display
    )

if __name__ == "__main__":
    demo.launch()