""" OCR Arena - Main Application A Gradio web application for comparing OCR results from different AI models. """ import gradio as gr import logging import os import datetime from dotenv import load_dotenv from storage import upload_file_to_bucket from db import add_vote, get_all_votes, calculate_elo_ratings_from_votes from ocr_models import process_model_ocr, initialize_gemini, initialize_mistral, initialize_openai from ui_helpers import ( get_model_display_name, select_random_models, format_votes_table, format_elo_leaderboard ) # Load environment variables load_dotenv() # Configure logging logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s') logger = logging.getLogger(__name__) # Initialize API keys and models initialize_gemini() initialize_mistral() initialize_openai() # Get Supabase credentials SUPABASE_URL = os.getenv("SUPABASE_URL") SUPABASE_KEY = os.getenv("SUPABASE_KEY") # Global variables current_gemini_output = "" current_mistral_output = "" current_openai_output = "" current_gpt5_output = "" current_image_url = "" current_model_a = "" current_model_b = "" def process_image_single_click(image): """Process uploaded image and run OCR for both models in one click.""" global current_gemini_output, current_mistral_output, current_openai_output, current_gpt5_output global current_model_a, current_model_b, current_image_url if image is None: return "Please upload an image.", "Please upload an image.", gr.update(visible=False), gr.update(visible=False) # Select two random models model_a, model_b = select_random_models() current_model_a = model_a current_model_b = model_b # Save image temporarily temp_filename = f"temp_image_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.png" image.save(temp_filename) # Upload image to Supabase try: upload_result = upload_file_to_bucket( file_path=temp_filename, bucket_name="images", storage_path=f"ocr_images/{temp_filename}", file_options={"cache-control": "3600", "upsert": "false"} ) if upload_result["success"]: current_image_url = upload_result.get("public_url") or f"{SUPABASE_URL}/storage/v1/object/public/images/ocr_images/{temp_filename}" else: current_image_url = "" logger.error(f"Image upload failed: {upload_result.get('error')}") finally: try: os.remove(temp_filename) except Exception as e: logger.warning(f"Could not remove temp file: {e}") # Run OCR for both models output_a = process_model_ocr(image, model_a) output_b = process_model_ocr(image, model_b) # Store outputs in globals def store_output(model, output): global current_gemini_output, current_mistral_output, current_openai_output, current_gpt5_output if model == "gemini": current_gemini_output = output elif model == "mistral": current_mistral_output = output elif model == "openai": current_openai_output = output elif model == "gpt5": current_gpt5_output = output store_output(model_a, output_a) store_output(model_b, output_b) return output_a, output_b, gr.update(visible=True), gr.update(visible=True) def load_vote_data(): """Load and format vote data for display.""" try: votes = get_all_votes() return format_votes_table(votes) except Exception as e: logger.error(f"Error loading vote data: {e}") return f"
Error loading data: {e}
" def load_elo_leaderboard(): """Load and format ELO leaderboard data.""" try: votes = get_all_votes() elo_ratings = calculate_elo_ratings_from_votes(votes) vote_counts = {"gemini": 0, "mistral": 0, "openai": 0, "gpt5": 0} for vote in votes: model_a, model_b, vote_choice = vote.get("model_a"), vote.get("model_b"), vote.get("vote") if vote_choice == "model_a" and model_a in vote_counts: vote_counts[model_a] += 1 elif vote_choice == "model_b" and model_b in vote_counts: vote_counts[model_b] += 1 return format_elo_leaderboard(elo_ratings, vote_counts) except Exception as e: logger.error(f"Error loading leaderboard: {e}") return f"Error loading leaderboard: {e}
" with gr.Blocks(title="OCR Comparison", css=""" .output-box {border:2px solid #e0e0e0; border-radius:8px; padding:15px; margin:10px 0; background-color:#f9f9f9; min-height:200px;} .vote-table {border-collapse: collapse; width:100%; margin:10px 0; min-width:800px;} .vote-table th, .vote-table td {border:1px solid #ddd; padding:6px; text-align:left; vertical-align:top;} .vote-table th {background-color:#f2f2f2; font-weight:bold; position:sticky; top:0; z-index:10;} .vote-table tr:nth-child(even){background-color:#f9f9f9;} """) as demo: with gr.Tabs(): # Arena Tab with gr.Tab("⚔️ Arena"): gr.Markdown("# ⚔️ OCR Arena") gr.Markdown("Upload an image to compare two randomly selected OCR models.") with gr.Row(): gemini_output = gr.Markdown(label="Model A Output", elem_classes=["output-box"]) image_input = gr.Image(type="pil", label="Upload or Paste Image") mistral_output = gr.Markdown(label="Model B Output", elem_classes=["output-box"]) with gr.Row(): gemini_vote_btn = gr.Button("A is better", variant="primary", size="sm", visible=False) mistral_vote_btn = gr.Button("B is better", variant="primary", size="sm", visible=False) process_btn = gr.Button("🔍 Run OCR", variant="primary") # Data Tab with gr.Tab("📊 Data"): gr.Markdown("# 📊 Vote Data") refresh_btn = gr.Button("🔄 Refresh Data", variant="secondary") votes_table = gr.HTML("Loading vote data...
") # Leaderboard Tab with gr.Tab("🏆 Leaderboard"): gr.Markdown("# 🏆 ELO Leaderboard") refresh_leaderboard_btn = gr.Button("🔄 Refresh Leaderboard", variant="secondary") leaderboard_display = gr.HTML("Loading ELO leaderboard...
") # Event handlers process_btn.click( process_image_single_click, inputs=[image_input], outputs=[gemini_output, mistral_output, gemini_vote_btn, mistral_vote_btn] ) refresh_btn.click(load_vote_data, inputs=None, outputs=[votes_table]) refresh_leaderboard_btn.click(load_elo_leaderboard, inputs=None, outputs=[leaderboard_display]) # Load data and leaderboard on start demo.load(fn=load_vote_data, inputs=None, outputs=[votes_table]) demo.load(fn=load_elo_leaderboard, inputs=None, outputs=[leaderboard_display]) if __name__ == "__main__": logger.info("Starting OCR Comparison App...") demo.launch(share=True)