|
|
""" |
|
|
OCR Arena - Main Application |
|
|
A Gradio web application for comparing OCR results from different AI models. |
|
|
""" |
|
|
|
|
|
import gradio as gr |
|
|
import logging |
|
|
import os |
|
|
import datetime |
|
|
from dotenv import load_dotenv |
|
|
from storage import upload_file_to_bucket |
|
|
from db import add_vote, get_all_votes, calculate_elo_ratings_from_votes |
|
|
from ocr_models import process_model_ocr, initialize_gemini, initialize_mistral, initialize_openai |
|
|
from ui_helpers import ( |
|
|
get_model_display_name, select_random_models, format_votes_table, |
|
|
format_elo_leaderboard |
|
|
) |
|
|
|
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
|
|
|
logging.basicConfig(level=logging.INFO, format='%(levelname)s: %(message)s') |
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
initialize_gemini() |
|
|
initialize_mistral() |
|
|
initialize_openai() |
|
|
|
|
|
|
|
|
SUPABASE_URL = os.getenv("SUPABASE_URL") |
|
|
SUPABASE_KEY = os.getenv("SUPABASE_KEY") |
|
|
|
|
|
|
|
|
current_gemini_output = "" |
|
|
current_mistral_output = "" |
|
|
current_openai_output = "" |
|
|
current_gpt5_output = "" |
|
|
current_image_url = "" |
|
|
current_model_a = "" |
|
|
current_model_b = "" |
|
|
|
|
|
|
|
|
def process_image_single_click(image): |
|
|
"""Process uploaded image and run OCR for both models in one click.""" |
|
|
global current_gemini_output, current_mistral_output, current_openai_output, current_gpt5_output |
|
|
global current_model_a, current_model_b, current_image_url |
|
|
|
|
|
if image is None: |
|
|
return "Please upload an image.", "Please upload an image.", gr.update(visible=False), gr.update(visible=False) |
|
|
|
|
|
|
|
|
model_a, model_b = select_random_models() |
|
|
current_model_a = model_a |
|
|
current_model_b = model_b |
|
|
|
|
|
|
|
|
temp_filename = f"temp_image_{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}.png" |
|
|
image.save(temp_filename) |
|
|
|
|
|
|
|
|
try: |
|
|
upload_result = upload_file_to_bucket( |
|
|
file_path=temp_filename, |
|
|
bucket_name="images", |
|
|
storage_path=f"ocr_images/{temp_filename}", |
|
|
file_options={"cache-control": "3600", "upsert": "false"} |
|
|
) |
|
|
if upload_result["success"]: |
|
|
current_image_url = upload_result.get("public_url") or f"{SUPABASE_URL}/storage/v1/object/public/images/ocr_images/{temp_filename}" |
|
|
else: |
|
|
current_image_url = "" |
|
|
logger.error(f"Image upload failed: {upload_result.get('error')}") |
|
|
finally: |
|
|
try: |
|
|
os.remove(temp_filename) |
|
|
except Exception as e: |
|
|
logger.warning(f"Could not remove temp file: {e}") |
|
|
|
|
|
|
|
|
output_a = process_model_ocr(image, model_a) |
|
|
output_b = process_model_ocr(image, model_b) |
|
|
|
|
|
|
|
|
def store_output(model, output): |
|
|
global current_gemini_output, current_mistral_output, current_openai_output, current_gpt5_output |
|
|
if model == "gemini": |
|
|
current_gemini_output = output |
|
|
elif model == "mistral": |
|
|
current_mistral_output = output |
|
|
elif model == "openai": |
|
|
current_openai_output = output |
|
|
elif model == "gpt5": |
|
|
current_gpt5_output = output |
|
|
|
|
|
store_output(model_a, output_a) |
|
|
store_output(model_b, output_b) |
|
|
|
|
|
return output_a, output_b, gr.update(visible=True), gr.update(visible=True) |
|
|
|
|
|
|
|
|
def load_vote_data(): |
|
|
"""Load and format vote data for display.""" |
|
|
try: |
|
|
votes = get_all_votes() |
|
|
return format_votes_table(votes) |
|
|
except Exception as e: |
|
|
logger.error(f"Error loading vote data: {e}") |
|
|
return f"<p style='color:red;'>Error loading data: {e}</p>" |
|
|
|
|
|
|
|
|
def load_elo_leaderboard(): |
|
|
"""Load and format ELO leaderboard data.""" |
|
|
try: |
|
|
votes = get_all_votes() |
|
|
elo_ratings = calculate_elo_ratings_from_votes(votes) |
|
|
vote_counts = {"gemini": 0, "mistral": 0, "openai": 0, "gpt5": 0} |
|
|
for vote in votes: |
|
|
model_a, model_b, vote_choice = vote.get("model_a"), vote.get("model_b"), vote.get("vote") |
|
|
if vote_choice == "model_a" and model_a in vote_counts: |
|
|
vote_counts[model_a] += 1 |
|
|
elif vote_choice == "model_b" and model_b in vote_counts: |
|
|
vote_counts[model_b] += 1 |
|
|
return format_elo_leaderboard(elo_ratings, vote_counts) |
|
|
except Exception as e: |
|
|
logger.error(f"Error loading leaderboard: {e}") |
|
|
return f"<p style='color:red;'>Error loading leaderboard: {e}</p>" |
|
|
|
|
|
|
|
|
with gr.Blocks(title="OCR Comparison", css=""" |
|
|
.output-box {border:2px solid #e0e0e0; border-radius:8px; padding:15px; margin:10px 0; background-color:#f9f9f9; min-height:200px;} |
|
|
.vote-table {border-collapse: collapse; width:100%; margin:10px 0; min-width:800px;} |
|
|
.vote-table th, .vote-table td {border:1px solid #ddd; padding:6px; text-align:left; vertical-align:top;} |
|
|
.vote-table th {background-color:#f2f2f2; font-weight:bold; position:sticky; top:0; z-index:10;} |
|
|
.vote-table tr:nth-child(even){background-color:#f9f9f9;} |
|
|
""") as demo: |
|
|
|
|
|
with gr.Tabs(): |
|
|
|
|
|
|
|
|
with gr.Tab("⚔️ Arena"): |
|
|
gr.Markdown("# ⚔️ OCR Arena") |
|
|
gr.Markdown("Upload an image to compare two randomly selected OCR models.") |
|
|
|
|
|
with gr.Row(): |
|
|
gemini_output = gr.Markdown(label="Model A Output", elem_classes=["output-box"]) |
|
|
image_input = gr.Image(type="pil", label="Upload or Paste Image") |
|
|
mistral_output = gr.Markdown(label="Model B Output", elem_classes=["output-box"]) |
|
|
|
|
|
with gr.Row(): |
|
|
gemini_vote_btn = gr.Button("A is better", variant="primary", size="sm", visible=False) |
|
|
mistral_vote_btn = gr.Button("B is better", variant="primary", size="sm", visible=False) |
|
|
|
|
|
process_btn = gr.Button("🔍 Run OCR", variant="primary") |
|
|
|
|
|
|
|
|
with gr.Tab("📊 Data"): |
|
|
gr.Markdown("# 📊 Vote Data") |
|
|
refresh_btn = gr.Button("🔄 Refresh Data", variant="secondary") |
|
|
votes_table = gr.HTML("<p>Loading vote data...</p>") |
|
|
|
|
|
|
|
|
with gr.Tab("🏆 Leaderboard"): |
|
|
gr.Markdown("# 🏆 ELO Leaderboard") |
|
|
refresh_leaderboard_btn = gr.Button("🔄 Refresh Leaderboard", variant="secondary") |
|
|
leaderboard_display = gr.HTML("<p>Loading ELO leaderboard...</p>") |
|
|
|
|
|
|
|
|
process_btn.click( |
|
|
process_image_single_click, |
|
|
inputs=[image_input], |
|
|
outputs=[gemini_output, mistral_output, gemini_vote_btn, mistral_vote_btn] |
|
|
) |
|
|
|
|
|
refresh_btn.click(load_vote_data, inputs=None, outputs=[votes_table]) |
|
|
refresh_leaderboard_btn.click(load_elo_leaderboard, inputs=None, outputs=[leaderboard_display]) |
|
|
|
|
|
|
|
|
demo.load(fn=load_vote_data, inputs=None, outputs=[votes_table]) |
|
|
demo.load(fn=load_elo_leaderboard, inputs=None, outputs=[leaderboard_display]) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
logger.info("Starting OCR Comparison App...") |
|
|
demo.launch(share=True) |
|
|
|