import json import os import uuid from pathlib import Path import gradio as gr import pandas as pd from src.about import ( EVALUATION_INFO, INTRODUCTION, NAVIGATION, SUBMISSION_GUIDE, TITLE, custom_css, ) from src.evaluator import Evaluator from src.leaderboard_manager import ( ALL_METRIC_COLS, DEFAULT_DISPLAY_METRICS, LeaderboardManager, ) from src.storage import ( check_rate_limit, record_submission_time, save_submission, ) # Initialize components try: manager = LeaderboardManager() except Exception as e: print(f"[WARN] Failed to init LeaderboardManager: {e}") manager = None evaluator = Evaluator() def refresh_leaderboard(sort_by): if manager is None: return pd.DataFrame(columns=["rank", "model_name"]) try: return manager.get_display_df( method_filter="Agent", sort_by=sort_by, ascending=False, top_n=30, metric_cols=DEFAULT_DISPLAY_METRICS, ) except Exception as e: return pd.DataFrame({"Error": [str(e)]}) def handle_submission(file_obj, email, model_name, opt_in): if manager is None: return {"error": "Leaderboard service unavailable."}, None if file_obj is None: return {"error": "Please upload a JSON file."}, None if not email or not email.strip() or "@" not in email: return {"error": "Please enter a valid email address."}, None email = email.strip().lower() if not model_name or not model_name.strip(): return {"error": "Please enter a model / system name."}, None # Rate limit check allowed, msg = check_rate_limit(email) if not allowed: return {"error": msg}, None # Read uploaded file file_path = file_obj.name if hasattr(file_obj, "name") else str(file_obj) try: with open(file_path, "r", encoding="utf-8") as f: data = json.load(f) except Exception as e: return {"error": f"Failed to parse JSON: {e}"}, None # Validate format errors = evaluator.validate_json_format(data) if errors: return {"error": "Validation failed", "details": errors}, None # Run evaluation try: result = evaluator.evaluate(data) except Exception as e: return {"error": f"Evaluation failed: {e}"}, None # Extract album coverage albums = sorted({str(item["album_id"]) for item in data}) # Record rate limit record_submission_time(email) # Save submission submission_id = str(uuid.uuid4()) try: save_submission( submission_id, { "meta": { "submission_id": submission_id, "email": email, "method": "Agent", "model_name": model_name.strip(), "albums": albums, "opt_in": opt_in, }, "submission": data, "result": result, }, ) except Exception as e: return {"error": f"Failed to save submission: {e}"}, None # Update leaderboard only if opted in and full submission leaderboard_msg = "" if opt_in: entry = manager.add_result( email=email, method="Agent", model_name=model_name.strip(), albums=albums, evaluated_queries=result["evaluated_queries"], total_gt_queries=result["total_gt_queries"], global_metrics=result["global_metrics"], ) if entry is None: if result["is_partial"]: leaderboard_msg = f"Result saved but NOT eligible for leaderboard: incomplete submission ({result['evaluated_queries']}/{result['total_gt_queries']} queries). Only full submissions across all 3 albums are ranked." else: leaderboard_msg = "Result saved but NOT eligible for leaderboard. Only full submissions across all 3 albums are ranked." else: leaderboard_msg = "Result published to leaderboard." else: leaderboard_msg = "Result recorded privately. Not published to leaderboard." # Build per-album breakdown album_breakdown = {} for a_id, alb_res in result.get("per_album", {}).items(): album_breakdown[f"album_{a_id}"] = { "submitted": alb_res["evaluated_queries"], "total": alb_res["total_gt_queries"], "complete": not alb_res["is_partial"], } # Build result summary summary = { "status": "Success", "submission_id": submission_id, "email": email, "model_name": model_name.strip(), "albums": albums, "evaluated_queries": result["evaluated_queries"], "total_gt_queries": result["total_gt_queries"], "album_breakdown": album_breakdown, "metrics": result["global_metrics"], "leaderboard_status": leaderboard_msg, "notice": "Please download and save your results. Submission data is retained for 30 days only.", } if result.get("is_partial"): summary["warning"] = result["warning"] updated_df = refresh_leaderboard("Recall@10") return summary, updated_df # Gradio interface with gr.Blocks(css=custom_css, title="PhotoBench-Protected Leaderboard") as demo: gr.HTML(TITLE) gr.HTML(NAVIGATION) gr.Markdown(INTRODUCTION, elem_classes="markdown-text") with gr.Tabs(elem_classes="tab-buttons"): # === Tab 1: Leaderboard === with gr.TabItem("🏅 Leaderboard"): with gr.Row(): with gr.Column(scale=3): sort_by = gr.Dropdown( choices=ALL_METRIC_COLS, value="Recall@10", label="Sort by", ) with gr.Column(scale=1): refresh_btn = gr.Button("Refresh", variant="primary", elem_classes=["refresh-btn"]) leaderboard_table = gr.DataFrame( label="Top 30", interactive=False, wrap=True, ) refresh_btn.click( refresh_leaderboard, inputs=[sort_by], outputs=leaderboard_table, ) demo.load( refresh_leaderboard, inputs=[sort_by], outputs=leaderboard_table, ) # === Tab 2: Submit === with gr.TabItem("📝 Submit"): gr.Markdown(SUBMISSION_GUIDE, elem_classes="markdown-text") with gr.Row(): with gr.Column(scale=1): pass with gr.Column(scale=3): with gr.Row(): with gr.Column(): upload_file = gr.File( label="Upload results JSON", file_types=[".json"], ) email_input = gr.Textbox( label="Email", placeholder="your@email.com", ) model_name_input = gr.Textbox( label="Model / System Name", placeholder="e.g., GPT-4V-Agent", ) opt_in_toggle = gr.Checkbox( label="Publish to public leaderboard", value=True, elem_classes=["toggle-switch"], ) submit_btn = gr.Button("Submit for Evaluation", variant="primary") with gr.Column(): result_json = gr.JSON(label="Evaluation Results") with gr.Column(scale=1): pass submit_btn.click( handle_submission, inputs=[upload_file, email_input, model_name_input, opt_in_toggle], outputs=[result_json, leaderboard_table], ) # === Tab 3: About === with gr.TabItem("â„šī¸ About"): gr.Markdown(EVALUATION_INFO, elem_classes="markdown-text") demo.launch()