Spaces:

eoeooe
/

OCRArena

Sleeping

File size: 7,078 Bytes

28673b1
 
 
 
 
0f35be0
 
 
28673b1
0f369cb
 
bbd3e11
28673b1
 
 
bbd3e11
28673b1
0f369cb
28673b1
0f369cb
0f35be0
 
 
 
 
28673b1
 
 
 
0f35be0
28673b1
b45bb89
 
 
2d375db
b45bb89
 
28673b1
3e159b8
b45bb89
2d375db
 
 
 
 
 
 
 
 
0f35be0
2d375db
 
28673b1
 
 
 
2d375db
 
 
 
 
 
0f35be0
0f369cb
 
 
 
 
 
 
2d375db
0f369cb
b45bb89
2d375db
 
0f369cb
 
 
2d375db
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0f35be0
90a3c09
28673b1
 
 
 
2d375db
28673b1
 
2d375db
 
28673b1
 
 
 
 
 
2d375db
28673b1
2d375db
 
28673b1
2d375db
28673b1
2d375db
28673b1
2d375db
 
 
0f35be0
bb747c6
2d375db
 
 
 
 
0f35be0
28673b1
bb747c6
28673b1
2d375db
 
 
 
28673b1
bb747c6
2d375db
bb747c6
2d375db
28673b1
bb747c6
2d375db
 
 
 
28673b1
bb747c6
2d375db
bbd3e11
2d375db
 
28673b1
 
2d375db
28673b1
2d375db
 
0f35be0
 
 
2d375db
0f35be0
2d375db
bb747c6
28673b1
2d375db
 
28673b1
2d375db
bbd3e11
28673b1
 
0f35be0
 
2d375db

"""
OCR Arena - Main Application
A Gradio web application for comparing OCR results from different AI models.
"""

import gradio as gr
import logging
import os
import datetime
from dotenv import load_dotenv
from storage import upload_file_to_bucket
from db import add_vote, get_all_votes, calculate_elo_ratings_from_votes
from ocr_models import process_model_ocr, initialize_gemini, initialize_mistral, initialize_openai
from ui_helpers import (
    get_model_display_name, select_random_models, format_votes_table, 
    format_elo_leaderboard
)

# Load environment variables
load_dotenv()

# Configure logging
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s')
logger = logging.getLogger(__name__)

# Initialize API keys and models
initialize_gemini()
initialize_mistral()
initialize_openai()

# Get Supabase credentials
SUPABASE_URL = os.getenv("SUPABASE_URL")
SUPABASE_KEY = os.getenv("SUPABASE_KEY")

# Global variables
current_gemini_output = ""
current_mistral_output = ""
current_openai_output = ""
current_gpt5_output = ""
current_image_url = ""
current_model_a = ""
current_model_b = ""


def process_image_single_click(image):
    """Process uploaded image and run OCR for both models in one click."""
    global current_gemini_output, current_mistral_output, current_openai_output, current_gpt5_output
    global current_model_a, current_model_b, current_image_url

    if image is None:
        return "Please upload an image.", "Please upload an image.", gr.update(visible=False), gr.update(visible=False)

    # Select two random models
    model_a, model_b = select_random_models()
    current_model_a = model_a
    current_model_b = model_b

    # Save image temporarily
    temp_filename = f"temp_image_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.png"
    image.save(temp_filename)

    # Upload image to Supabase
    try:
        upload_result = upload_file_to_bucket(
            file_path=temp_filename,
            bucket_name="images",
            storage_path=f"ocr_images/{temp_filename}",
            file_options={"cache-control": "3600", "upsert": "false"}
        )
        if upload_result["success"]:
            current_image_url = upload_result.get("public_url") or f"{SUPABASE_URL}/storage/v1/object/public/images/ocr_images/{temp_filename}"
        else:
            current_image_url = ""
            logger.error(f"Image upload failed: {upload_result.get('error')}")
    finally:
        try:
            os.remove(temp_filename)
        except Exception as e:
            logger.warning(f"Could not remove temp file: {e}")

    # Run OCR for both models
    output_a = process_model_ocr(image, model_a)
    output_b = process_model_ocr(image, model_b)

    # Store outputs in globals
    def store_output(model, output):
        global current_gemini_output, current_mistral_output, current_openai_output, current_gpt5_output
        if model == "gemini":
            current_gemini_output = output
        elif model == "mistral":
            current_mistral_output = output
        elif model == "openai":
            current_openai_output = output
        elif model == "gpt5":
            current_gpt5_output = output

    store_output(model_a, output_a)
    store_output(model_b, output_b)

    return output_a, output_b, gr.update(visible=True), gr.update(visible=True)


def load_vote_data():
    """Load and format vote data for display."""
    try:
        votes = get_all_votes()
        return format_votes_table(votes)
    except Exception as e:
        logger.error(f"Error loading vote data: {e}")
        return f"<p style='color:red;'>Error loading data: {e}</p>"


def load_elo_leaderboard():
    """Load and format ELO leaderboard data."""
    try:
        votes = get_all_votes()
        elo_ratings = calculate_elo_ratings_from_votes(votes)
        vote_counts = {"gemini": 0, "mistral": 0, "openai": 0, "gpt5": 0}
        for vote in votes:
            model_a, model_b, vote_choice = vote.get("model_a"), vote.get("model_b"), vote.get("vote")
            if vote_choice == "model_a" and model_a in vote_counts:
                vote_counts[model_a] += 1
            elif vote_choice == "model_b" and model_b in vote_counts:
                vote_counts[model_b] += 1
        return format_elo_leaderboard(elo_ratings, vote_counts)
    except Exception as e:
        logger.error(f"Error loading leaderboard: {e}")
        return f"<p style='color:red;'>Error loading leaderboard: {e}</p>"


with gr.Blocks(title="OCR Comparison", css="""
    .output-box {border:2px solid #e0e0e0; border-radius:8px; padding:15px; margin:10px 0; background-color:#f9f9f9; min-height:200px;}
    .vote-table {border-collapse: collapse; width:100%; margin:10px 0; min-width:800px;}
    .vote-table th, .vote-table td {border:1px solid #ddd; padding:6px; text-align:left; vertical-align:top;}
    .vote-table th {background-color:#f2f2f2; font-weight:bold; position:sticky; top:0; z-index:10;}
    .vote-table tr:nth-child(even){background-color:#f9f9f9;}
""") as demo:

    with gr.Tabs():

        # Arena Tab
        with gr.Tab("⚔️ Arena"):
            gr.Markdown("# ⚔️ OCR Arena")
            gr.Markdown("Upload an image to compare two randomly selected OCR models.")

            with gr.Row():
                gemini_output = gr.Markdown(label="Model A Output", elem_classes=["output-box"])
                image_input = gr.Image(type="pil", label="Upload or Paste Image")
                mistral_output = gr.Markdown(label="Model B Output", elem_classes=["output-box"])

            with gr.Row():
                gemini_vote_btn = gr.Button("A is better", variant="primary", size="sm", visible=False)
                mistral_vote_btn = gr.Button("B is better", variant="primary", size="sm", visible=False)

            process_btn = gr.Button("🔍 Run OCR", variant="primary")

        # Data Tab
        with gr.Tab("📊 Data"):
            gr.Markdown("# 📊 Vote Data")
            refresh_btn = gr.Button("🔄 Refresh Data", variant="secondary")
            votes_table = gr.HTML("<p>Loading vote data...</p>")

        # Leaderboard Tab
        with gr.Tab("🏆 Leaderboard"):
            gr.Markdown("# 🏆 ELO Leaderboard")
            refresh_leaderboard_btn = gr.Button("🔄 Refresh Leaderboard", variant="secondary")
            leaderboard_display = gr.HTML("<p>Loading ELO leaderboard...</p>")

    # Event handlers
    process_btn.click(
        process_image_single_click,
        inputs=[image_input],
        outputs=[gemini_output, mistral_output, gemini_vote_btn, mistral_vote_btn]
    )

    refresh_btn.click(load_vote_data, inputs=None, outputs=[votes_table])
    refresh_leaderboard_btn.click(load_elo_leaderboard, inputs=None, outputs=[leaderboard_display])

    # Load data and leaderboard on start
    demo.load(fn=load_vote_data, inputs=None, outputs=[votes_table])
    demo.load(fn=load_elo_leaderboard, inputs=None, outputs=[leaderboard_display])

if __name__ == "__main__":
    logger.info("Starting OCR Comparison App...")
    demo.launch(share=True)