File size: 8,349 Bytes
01f4cb5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4e43175
 
 
 
01f4cb5
 
 
 
 
4e43175
 
 
 
 
 
 
 
 
01f4cb5
 
 
 
 
 
 
 
 
4e43175
01f4cb5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f29fa73
 
 
 
 
 
 
4ef02bb
01f4cb5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f29fa73
01f4cb5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
import json
import os
import uuid
from pathlib import Path

import gradio as gr
import pandas as pd

from src.about import (
    EVALUATION_INFO,
    INTRODUCTION,
    NAVIGATION,
    SUBMISSION_GUIDE,
    TITLE,
    custom_css,
)
from src.evaluator import Evaluator
from src.leaderboard_manager import (
    ALL_METRIC_COLS,
    DEFAULT_DISPLAY_METRICS,
    LeaderboardManager,
)
from src.storage import (
    check_rate_limit,
    record_submission_time,
    save_submission,
)

# Initialize components
try:
    manager = LeaderboardManager()
except Exception as e:
    print(f"[WARN] Failed to init LeaderboardManager: {e}")
    manager = None

evaluator = Evaluator()


def refresh_leaderboard(sort_by):
    if manager is None:
        return pd.DataFrame(columns=["rank", "model_name"])
    try:
        return manager.get_display_df(
            method_filter="Agent",
            sort_by=sort_by,
            ascending=False,
            top_n=30,
            metric_cols=DEFAULT_DISPLAY_METRICS,
        )
    except Exception as e:
        return pd.DataFrame({"Error": [str(e)]})


def handle_submission(file_obj, email, model_name, opt_in):
    if manager is None:
        return {"error": "Leaderboard service unavailable."}, None

    if file_obj is None:
        return {"error": "Please upload a JSON file."}, None

    if not email or not email.strip() or "@" not in email:
        return {"error": "Please enter a valid email address."}, None

    email = email.strip().lower()

    if not model_name or not model_name.strip():
        return {"error": "Please enter a model / system name."}, None

    # Rate limit check
    allowed, msg = check_rate_limit(email)
    if not allowed:
        return {"error": msg}, None

    # Read uploaded file
    file_path = file_obj.name if hasattr(file_obj, "name") else str(file_obj)
    try:
        with open(file_path, "r", encoding="utf-8") as f:
            data = json.load(f)
    except Exception as e:
        return {"error": f"Failed to parse JSON: {e}"}, None

    # Validate format
    errors = evaluator.validate_json_format(data)
    if errors:
        return {"error": "Validation failed", "details": errors}, None

    # Run evaluation
    try:
        result = evaluator.evaluate(data)
    except Exception as e:
        return {"error": f"Evaluation failed: {e}"}, None

    # Extract album coverage
    albums = sorted({str(item["album_id"]) for item in data})

    # Record rate limit
    record_submission_time(email)

    # Save submission
    submission_id = str(uuid.uuid4())
    try:
        save_submission(
            submission_id,
            {
                "meta": {
                    "submission_id": submission_id,
                    "email": email,
                    "method": "Agent",
                    "model_name": model_name.strip(),
                    "albums": albums,
                    "opt_in": opt_in,
                },
                "submission": data,
                "result": result,
            },
        )
    except Exception as e:
        return {"error": f"Failed to save submission: {e}"}, None

    # Update leaderboard only if opted in and full submission
    leaderboard_msg = ""
    if opt_in:
        entry = manager.add_result(
            email=email,
            method="Agent",
            model_name=model_name.strip(),
            albums=albums,
            evaluated_queries=result["evaluated_queries"],
            total_gt_queries=result["total_gt_queries"],
            global_metrics=result["global_metrics"],
        )
        if entry is None:
            if result["is_partial"]:
                leaderboard_msg = f"Result saved but NOT eligible for leaderboard: incomplete submission ({result['evaluated_queries']}/{result['total_gt_queries']} queries). Only full submissions across all 3 albums are ranked."
            else:
                leaderboard_msg = "Result saved but NOT eligible for leaderboard. Only full submissions across all 3 albums are ranked."
        else:
            leaderboard_msg = "Result published to leaderboard."
    else:
        leaderboard_msg = "Result recorded privately. Not published to leaderboard."

    # Build per-album breakdown
    album_breakdown = {}
    for a_id, alb_res in result.get("per_album", {}).items():
        album_breakdown[f"album_{a_id}"] = {
            "submitted": alb_res["evaluated_queries"],
            "total": alb_res["total_gt_queries"],
            "complete": not alb_res["is_partial"],
        }

    # Build result summary
    summary = {
        "status": "Success",
        "submission_id": submission_id,
        "email": email,
        "model_name": model_name.strip(),
        "albums": albums,
        "evaluated_queries": result["evaluated_queries"],
        "total_gt_queries": result["total_gt_queries"],
        "album_breakdown": album_breakdown,
        "metrics": result["global_metrics"],
        "leaderboard_status": leaderboard_msg,
        "notice": "Please download and save your results. Submission data is retained for 30 days only.",
    }
    if result.get("is_partial"):
        summary["warning"] = result["warning"]

    updated_df = refresh_leaderboard("Recall@10")
    return summary, updated_df


# Gradio interface
with gr.Blocks(css=custom_css, title="PhotoBench-Protected Leaderboard") as demo:
    gr.HTML(TITLE)
    gr.HTML(NAVIGATION)
    gr.Markdown(INTRODUCTION, elem_classes="markdown-text")

    with gr.Tabs(elem_classes="tab-buttons"):
        # === Tab 1: Leaderboard ===
        with gr.TabItem("🏅 Leaderboard"):
            with gr.Row():
                with gr.Column(scale=3):
                    sort_by = gr.Dropdown(
                        choices=ALL_METRIC_COLS,
                        value="Recall@10",
                        label="Sort by",
                    )
                with gr.Column(scale=1):
                    refresh_btn = gr.Button("Refresh", variant="primary", elem_classes=["refresh-btn"])
            leaderboard_table = gr.DataFrame(
                label="Top 30",
                interactive=False,
                wrap=True,
            )

            refresh_btn.click(
                refresh_leaderboard,
                inputs=[sort_by],
                outputs=leaderboard_table,
            )
            demo.load(
                refresh_leaderboard,
                inputs=[sort_by],
                outputs=leaderboard_table,
            )

        # === Tab 2: Submit ===
        with gr.TabItem("📝 Submit"):
            gr.Markdown(SUBMISSION_GUIDE, elem_classes="markdown-text")

            with gr.Row():
                with gr.Column(scale=1):
                    pass
                with gr.Column(scale=3):
                    with gr.Row():
                        with gr.Column():
                            upload_file = gr.File(
                                label="Upload results JSON",
                                file_types=[".json"],
                            )
                            email_input = gr.Textbox(
                                label="Email",
                                placeholder="your@email.com",
                            )
                            model_name_input = gr.Textbox(
                                label="Model / System Name",
                                placeholder="e.g., GPT-4V-Agent",
                            )
                            opt_in_toggle = gr.Checkbox(
                                label="Publish to public leaderboard",
                                value=True,
                                elem_classes=["toggle-switch"],
                            )
                            submit_btn = gr.Button("Submit for Evaluation", variant="primary")

                        with gr.Column():
                            result_json = gr.JSON(label="Evaluation Results")
                with gr.Column(scale=1):
                    pass

            submit_btn.click(
                handle_submission,
                inputs=[upload_file, email_input, model_name_input, opt_in_toggle],
                outputs=[result_json, leaderboard_table],
            )

        # === Tab 3: About ===
        with gr.TabItem("ℹ️ About"):
            gr.Markdown(EVALUATION_INFO, elem_classes="markdown-text")


demo.launch()