SorrowTea's picture
Fix refresh alignment and tooltip locale issues
4ef02bb
import json
import os
import uuid
from pathlib import Path
import gradio as gr
import pandas as pd
from src.about import (
EVALUATION_INFO,
INTRODUCTION,
NAVIGATION,
SUBMISSION_GUIDE,
TITLE,
custom_css,
)
from src.evaluator import Evaluator
from src.leaderboard_manager import (
ALL_METRIC_COLS,
DEFAULT_DISPLAY_METRICS,
LeaderboardManager,
)
from src.storage import (
check_rate_limit,
record_submission_time,
save_submission,
)
# Initialize components
try:
manager = LeaderboardManager()
except Exception as e:
print(f"[WARN] Failed to init LeaderboardManager: {e}")
manager = None
evaluator = Evaluator()
def refresh_leaderboard(sort_by):
if manager is None:
return pd.DataFrame(columns=["rank", "model_name"])
try:
return manager.get_display_df(
method_filter="Agent",
sort_by=sort_by,
ascending=False,
top_n=30,
metric_cols=DEFAULT_DISPLAY_METRICS,
)
except Exception as e:
return pd.DataFrame({"Error": [str(e)]})
def handle_submission(file_obj, email, model_name, opt_in):
if manager is None:
return {"error": "Leaderboard service unavailable."}, None
if file_obj is None:
return {"error": "Please upload a JSON file."}, None
if not email or not email.strip() or "@" not in email:
return {"error": "Please enter a valid email address."}, None
email = email.strip().lower()
if not model_name or not model_name.strip():
return {"error": "Please enter a model / system name."}, None
# Rate limit check
allowed, msg = check_rate_limit(email)
if not allowed:
return {"error": msg}, None
# Read uploaded file
file_path = file_obj.name if hasattr(file_obj, "name") else str(file_obj)
try:
with open(file_path, "r", encoding="utf-8") as f:
data = json.load(f)
except Exception as e:
return {"error": f"Failed to parse JSON: {e}"}, None
# Validate format
errors = evaluator.validate_json_format(data)
if errors:
return {"error": "Validation failed", "details": errors}, None
# Run evaluation
try:
result = evaluator.evaluate(data)
except Exception as e:
return {"error": f"Evaluation failed: {e}"}, None
# Extract album coverage
albums = sorted({str(item["album_id"]) for item in data})
# Record rate limit
record_submission_time(email)
# Save submission
submission_id = str(uuid.uuid4())
try:
save_submission(
submission_id,
{
"meta": {
"submission_id": submission_id,
"email": email,
"method": "Agent",
"model_name": model_name.strip(),
"albums": albums,
"opt_in": opt_in,
},
"submission": data,
"result": result,
},
)
except Exception as e:
return {"error": f"Failed to save submission: {e}"}, None
# Update leaderboard only if opted in and full submission
leaderboard_msg = ""
if opt_in:
entry = manager.add_result(
email=email,
method="Agent",
model_name=model_name.strip(),
albums=albums,
evaluated_queries=result["evaluated_queries"],
total_gt_queries=result["total_gt_queries"],
global_metrics=result["global_metrics"],
)
if entry is None:
if result["is_partial"]:
leaderboard_msg = f"Result saved but NOT eligible for leaderboard: incomplete submission ({result['evaluated_queries']}/{result['total_gt_queries']} queries). Only full submissions across all 3 albums are ranked."
else:
leaderboard_msg = "Result saved but NOT eligible for leaderboard. Only full submissions across all 3 albums are ranked."
else:
leaderboard_msg = "Result published to leaderboard."
else:
leaderboard_msg = "Result recorded privately. Not published to leaderboard."
# Build per-album breakdown
album_breakdown = {}
for a_id, alb_res in result.get("per_album", {}).items():
album_breakdown[f"album_{a_id}"] = {
"submitted": alb_res["evaluated_queries"],
"total": alb_res["total_gt_queries"],
"complete": not alb_res["is_partial"],
}
# Build result summary
summary = {
"status": "Success",
"submission_id": submission_id,
"email": email,
"model_name": model_name.strip(),
"albums": albums,
"evaluated_queries": result["evaluated_queries"],
"total_gt_queries": result["total_gt_queries"],
"album_breakdown": album_breakdown,
"metrics": result["global_metrics"],
"leaderboard_status": leaderboard_msg,
"notice": "Please download and save your results. Submission data is retained for 30 days only.",
}
if result.get("is_partial"):
summary["warning"] = result["warning"]
updated_df = refresh_leaderboard("Recall@10")
return summary, updated_df
# Gradio interface
with gr.Blocks(css=custom_css, title="PhotoBench-Protected Leaderboard") as demo:
gr.HTML(TITLE)
gr.HTML(NAVIGATION)
gr.Markdown(INTRODUCTION, elem_classes="markdown-text")
with gr.Tabs(elem_classes="tab-buttons"):
# === Tab 1: Leaderboard ===
with gr.TabItem("🏅 Leaderboard"):
with gr.Row():
with gr.Column(scale=3):
sort_by = gr.Dropdown(
choices=ALL_METRIC_COLS,
value="Recall@10",
label="Sort by",
)
with gr.Column(scale=1):
refresh_btn = gr.Button("Refresh", variant="primary", elem_classes=["refresh-btn"])
leaderboard_table = gr.DataFrame(
label="Top 30",
interactive=False,
wrap=True,
)
refresh_btn.click(
refresh_leaderboard,
inputs=[sort_by],
outputs=leaderboard_table,
)
demo.load(
refresh_leaderboard,
inputs=[sort_by],
outputs=leaderboard_table,
)
# === Tab 2: Submit ===
with gr.TabItem("📝 Submit"):
gr.Markdown(SUBMISSION_GUIDE, elem_classes="markdown-text")
with gr.Row():
with gr.Column(scale=1):
pass
with gr.Column(scale=3):
with gr.Row():
with gr.Column():
upload_file = gr.File(
label="Upload results JSON",
file_types=[".json"],
)
email_input = gr.Textbox(
label="Email",
placeholder="your@email.com",
)
model_name_input = gr.Textbox(
label="Model / System Name",
placeholder="e.g., GPT-4V-Agent",
)
opt_in_toggle = gr.Checkbox(
label="Publish to public leaderboard",
value=True,
elem_classes=["toggle-switch"],
)
submit_btn = gr.Button("Submit for Evaluation", variant="primary")
with gr.Column():
result_json = gr.JSON(label="Evaluation Results")
with gr.Column(scale=1):
pass
submit_btn.click(
handle_submission,
inputs=[upload_file, email_input, model_name_input, opt_in_toggle],
outputs=[result_json, leaderboard_table],
)
# === Tab 3: About ===
with gr.TabItem("ℹ️ About"):
gr.Markdown(EVALUATION_INFO, elem_classes="markdown-text")
demo.launch()