Spaces:
Running
Running
Commit ·
01f4cb5
0
Parent(s):
Initial PhotoBench-Protected Leaderboard
Browse files- .gitignore +20 -0
- README.md +39 -0
- app.py +236 -0
- assets/leaderboard.jsonl +26 -0
- data/leaderboard.jsonl +26 -0
- requirements.txt +3 -0
- src/__init__.py +0 -0
- src/about.py +235 -0
- src/evaluator.py +183 -0
- src/leaderboard_manager.py +166 -0
- src/storage.py +108 -0
.gitignore
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Data files
|
| 2 |
+
data/gt/
|
| 3 |
+
*.DS_Store
|
| 4 |
+
|
| 5 |
+
# Python
|
| 6 |
+
__pycache__/
|
| 7 |
+
*.pyc
|
| 8 |
+
*.pyo
|
| 9 |
+
*.egg-info/
|
| 10 |
+
.venv/
|
| 11 |
+
venv/
|
| 12 |
+
|
| 13 |
+
# IDE
|
| 14 |
+
.vscode/
|
| 15 |
+
.idea/
|
| 16 |
+
|
| 17 |
+
# OS
|
| 18 |
+
*.swp
|
| 19 |
+
*.swo
|
| 20 |
+
*~
|
README.md
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
title: PhotoBench-Protected
|
| 3 |
+
emoji: 🛡️
|
| 4 |
+
colorFrom: green
|
| 5 |
+
colorTo: green
|
| 6 |
+
sdk: gradio
|
| 7 |
+
app_file: app.py
|
| 8 |
+
pinned: true
|
| 9 |
+
license: apache-2.0
|
| 10 |
+
short_description: PhotoBench-Protected agent-only leaderboard
|
| 11 |
+
tags:
|
| 12 |
+
- leaderboard
|
| 13 |
+
- image-retrieval
|
| 14 |
+
- benchmark
|
| 15 |
+
- agent
|
| 16 |
+
---
|
| 17 |
+
|
| 18 |
+
# PhotoBench-Protected Leaderboard
|
| 19 |
+
|
| 20 |
+
Agent-only leaderboard for PhotoBench-Protected, where only partial captions, embeddings, and metadata are provided.
|
| 21 |
+
|
| 22 |
+
## Quick Start
|
| 23 |
+
|
| 24 |
+
1. Download the protected dataset from [PhotoBench-Protected Dataset](https://huggingface.co/datasets/SorrowTea/PhotoBench-Protected)
|
| 25 |
+
2. Build your agent-based retrieval system using the provided features
|
| 26 |
+
3. Submit predictions in JSON format
|
| 27 |
+
|
| 28 |
+
## Important Notice
|
| 29 |
+
|
| 30 |
+
**PhotoBench-Protected** is our initial open-source release with limited information sources. This benchmark focuses exclusively on **agent planning** ability.
|
| 31 |
+
|
| 32 |
+
- For **unrestricted retrieval with raw images**, please use the [full PhotoBench Leaderboard](https://huggingface.co/spaces/SorrowTea/PhotoBench/).
|
| 33 |
+
- The test sets for PhotoBench-Protected and PhotoBench (full) are **different**.
|
| 34 |
+
- Please confirm you are submitting to the correct leaderboard before uploading.
|
| 35 |
+
|
| 36 |
+
## Evaluation Metrics
|
| 37 |
+
|
| 38 |
+
- **Recall@k** for k ∈ {1, 5, 10, 20, 50, 100}
|
| 39 |
+
- **NDCG@k** for k ∈ {1, 5, 10, 20, 50, 100}
|
app.py
ADDED
|
@@ -0,0 +1,236 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import os
|
| 3 |
+
import uuid
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
|
| 6 |
+
import gradio as gr
|
| 7 |
+
import pandas as pd
|
| 8 |
+
|
| 9 |
+
from src.about import (
|
| 10 |
+
EVALUATION_INFO,
|
| 11 |
+
INTRODUCTION,
|
| 12 |
+
NAVIGATION,
|
| 13 |
+
SUBMISSION_GUIDE,
|
| 14 |
+
TITLE,
|
| 15 |
+
custom_css,
|
| 16 |
+
)
|
| 17 |
+
from src.evaluator import Evaluator
|
| 18 |
+
from src.leaderboard_manager import (
|
| 19 |
+
ALL_METRIC_COLS,
|
| 20 |
+
DEFAULT_DISPLAY_METRICS,
|
| 21 |
+
LeaderboardManager,
|
| 22 |
+
)
|
| 23 |
+
from src.storage import (
|
| 24 |
+
check_rate_limit,
|
| 25 |
+
record_submission_time,
|
| 26 |
+
save_submission,
|
| 27 |
+
)
|
| 28 |
+
|
| 29 |
+
# Initialize components
|
| 30 |
+
try:
|
| 31 |
+
manager = LeaderboardManager()
|
| 32 |
+
except Exception as e:
|
| 33 |
+
print(f"[WARN] Failed to init LeaderboardManager: {e}")
|
| 34 |
+
manager = None
|
| 35 |
+
|
| 36 |
+
evaluator = Evaluator()
|
| 37 |
+
|
| 38 |
+
|
| 39 |
+
def refresh_leaderboard(sort_by):
|
| 40 |
+
if manager is None:
|
| 41 |
+
return pd.DataFrame(columns=["rank", "model_name"])
|
| 42 |
+
try:
|
| 43 |
+
return manager.get_display_df(
|
| 44 |
+
method_filter="Agent",
|
| 45 |
+
sort_by=sort_by,
|
| 46 |
+
ascending=False,
|
| 47 |
+
top_n=30,
|
| 48 |
+
metric_cols=DEFAULT_DISPLAY_METRICS,
|
| 49 |
+
)
|
| 50 |
+
except Exception as e:
|
| 51 |
+
return pd.DataFrame({"Error": [str(e)]})
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def handle_submission(file_obj, email, model_name, opt_in):
|
| 55 |
+
if manager is None:
|
| 56 |
+
return {"error": "Leaderboard service unavailable."}, None
|
| 57 |
+
|
| 58 |
+
if file_obj is None:
|
| 59 |
+
return {"error": "Please upload a JSON file."}, None
|
| 60 |
+
|
| 61 |
+
if not email or not email.strip() or "@" not in email:
|
| 62 |
+
return {"error": "Please enter a valid email address."}, None
|
| 63 |
+
|
| 64 |
+
email = email.strip().lower()
|
| 65 |
+
|
| 66 |
+
if not model_name or not model_name.strip():
|
| 67 |
+
return {"error": "Please enter a model / system name."}, None
|
| 68 |
+
|
| 69 |
+
# Rate limit check
|
| 70 |
+
allowed, msg = check_rate_limit(email)
|
| 71 |
+
if not allowed:
|
| 72 |
+
return {"error": msg}, None
|
| 73 |
+
|
| 74 |
+
# Read uploaded file
|
| 75 |
+
file_path = file_obj.name if hasattr(file_obj, "name") else str(file_obj)
|
| 76 |
+
try:
|
| 77 |
+
with open(file_path, "r", encoding="utf-8") as f:
|
| 78 |
+
data = json.load(f)
|
| 79 |
+
except Exception as e:
|
| 80 |
+
return {"error": f"Failed to parse JSON: {e}"}, None
|
| 81 |
+
|
| 82 |
+
# Validate format
|
| 83 |
+
errors = evaluator.validate_json_format(data)
|
| 84 |
+
if errors:
|
| 85 |
+
return {"error": "Validation failed", "details": errors}, None
|
| 86 |
+
|
| 87 |
+
# Run evaluation
|
| 88 |
+
try:
|
| 89 |
+
result = evaluator.evaluate(data)
|
| 90 |
+
except Exception as e:
|
| 91 |
+
return {"error": f"Evaluation failed: {e}"}, None
|
| 92 |
+
|
| 93 |
+
# Extract album coverage
|
| 94 |
+
albums = sorted({str(item["album_id"]) for item in data})
|
| 95 |
+
|
| 96 |
+
# Record rate limit
|
| 97 |
+
record_submission_time(email)
|
| 98 |
+
|
| 99 |
+
# Save submission
|
| 100 |
+
submission_id = str(uuid.uuid4())
|
| 101 |
+
try:
|
| 102 |
+
save_submission(
|
| 103 |
+
submission_id,
|
| 104 |
+
{
|
| 105 |
+
"meta": {
|
| 106 |
+
"submission_id": submission_id,
|
| 107 |
+
"email": email,
|
| 108 |
+
"method": "Agent",
|
| 109 |
+
"model_name": model_name.strip(),
|
| 110 |
+
"albums": albums,
|
| 111 |
+
"opt_in": opt_in,
|
| 112 |
+
},
|
| 113 |
+
"submission": data,
|
| 114 |
+
"result": result,
|
| 115 |
+
},
|
| 116 |
+
)
|
| 117 |
+
except Exception as e:
|
| 118 |
+
return {"error": f"Failed to save submission: {e}"}, None
|
| 119 |
+
|
| 120 |
+
# Update leaderboard only if opted in and full submission
|
| 121 |
+
leaderboard_msg = ""
|
| 122 |
+
if opt_in:
|
| 123 |
+
entry = manager.add_result(
|
| 124 |
+
email=email,
|
| 125 |
+
method="Agent",
|
| 126 |
+
model_name=model_name.strip(),
|
| 127 |
+
albums=albums,
|
| 128 |
+
evaluated_queries=result["evaluated_queries"],
|
| 129 |
+
total_gt_queries=result["total_gt_queries"],
|
| 130 |
+
global_metrics=result["global_metrics"],
|
| 131 |
+
)
|
| 132 |
+
if entry is None:
|
| 133 |
+
leaderboard_msg = "Result saved but not eligible for leaderboard (incomplete submission). Only full submissions across all 3 albums are ranked."
|
| 134 |
+
else:
|
| 135 |
+
leaderboard_msg = "Result published to leaderboard."
|
| 136 |
+
else:
|
| 137 |
+
leaderboard_msg = "Result recorded privately. Not published to leaderboard."
|
| 138 |
+
|
| 139 |
+
# Build result summary
|
| 140 |
+
summary = {
|
| 141 |
+
"status": "Success",
|
| 142 |
+
"submission_id": submission_id,
|
| 143 |
+
"email": email,
|
| 144 |
+
"model_name": model_name.strip(),
|
| 145 |
+
"albums": albums,
|
| 146 |
+
"evaluated_queries": result["evaluated_queries"],
|
| 147 |
+
"total_gt_queries": result["total_gt_queries"],
|
| 148 |
+
"metrics": result["global_metrics"],
|
| 149 |
+
"leaderboard_status": leaderboard_msg,
|
| 150 |
+
"notice": "Please download and save your results. Submission data is retained for 30 days only.",
|
| 151 |
+
}
|
| 152 |
+
if result.get("is_partial"):
|
| 153 |
+
summary["warning"] = result["warning"]
|
| 154 |
+
|
| 155 |
+
updated_df = refresh_leaderboard("Recall@10")
|
| 156 |
+
return summary, updated_df
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
# Gradio interface
|
| 160 |
+
with gr.Blocks(css=custom_css, title="PhotoBench-Protected Leaderboard") as demo:
|
| 161 |
+
gr.HTML(TITLE)
|
| 162 |
+
gr.HTML(NAVIGATION)
|
| 163 |
+
gr.Markdown(INTRODUCTION, elem_classes="markdown-text")
|
| 164 |
+
|
| 165 |
+
with gr.Tabs(elem_classes="tab-buttons"):
|
| 166 |
+
# === Tab 1: Leaderboard ===
|
| 167 |
+
with gr.TabItem("🏅 Leaderboard"):
|
| 168 |
+
with gr.Row():
|
| 169 |
+
sort_by = gr.Dropdown(
|
| 170 |
+
choices=ALL_METRIC_COLS,
|
| 171 |
+
value="Recall@10",
|
| 172 |
+
label="Sort by",
|
| 173 |
+
)
|
| 174 |
+
refresh_btn = gr.Button("Refresh", variant="primary")
|
| 175 |
+
leaderboard_table = gr.DataFrame(
|
| 176 |
+
label="Top 30",
|
| 177 |
+
interactive=False,
|
| 178 |
+
wrap=True,
|
| 179 |
+
)
|
| 180 |
+
|
| 181 |
+
refresh_btn.click(
|
| 182 |
+
refresh_leaderboard,
|
| 183 |
+
inputs=[sort_by],
|
| 184 |
+
outputs=leaderboard_table,
|
| 185 |
+
)
|
| 186 |
+
demo.load(
|
| 187 |
+
refresh_leaderboard,
|
| 188 |
+
inputs=[sort_by],
|
| 189 |
+
outputs=leaderboard_table,
|
| 190 |
+
)
|
| 191 |
+
|
| 192 |
+
# === Tab 2: Submit ===
|
| 193 |
+
with gr.TabItem("📝 Submit"):
|
| 194 |
+
gr.Markdown(SUBMISSION_GUIDE, elem_classes="markdown-text")
|
| 195 |
+
|
| 196 |
+
with gr.Row():
|
| 197 |
+
with gr.Column(scale=1):
|
| 198 |
+
pass
|
| 199 |
+
with gr.Column(scale=3):
|
| 200 |
+
with gr.Row():
|
| 201 |
+
with gr.Column():
|
| 202 |
+
upload_file = gr.File(
|
| 203 |
+
label="Upload results JSON",
|
| 204 |
+
file_types=[".json"],
|
| 205 |
+
)
|
| 206 |
+
email_input = gr.Textbox(
|
| 207 |
+
label="Email",
|
| 208 |
+
placeholder="your@email.com",
|
| 209 |
+
)
|
| 210 |
+
model_name_input = gr.Textbox(
|
| 211 |
+
label="Model / System Name",
|
| 212 |
+
placeholder="e.g., GPT-4V-Agent",
|
| 213 |
+
)
|
| 214 |
+
opt_in_toggle = gr.Checkbox(
|
| 215 |
+
label="Publish to public leaderboard",
|
| 216 |
+
value=True,
|
| 217 |
+
)
|
| 218 |
+
submit_btn = gr.Button("Submit for Evaluation", variant="primary")
|
| 219 |
+
|
| 220 |
+
with gr.Column():
|
| 221 |
+
result_json = gr.JSON(label="Evaluation Results")
|
| 222 |
+
with gr.Column(scale=1):
|
| 223 |
+
pass
|
| 224 |
+
|
| 225 |
+
submit_btn.click(
|
| 226 |
+
handle_submission,
|
| 227 |
+
inputs=[upload_file, email_input, model_name_input, opt_in_toggle],
|
| 228 |
+
outputs=[result_json, leaderboard_table],
|
| 229 |
+
)
|
| 230 |
+
|
| 231 |
+
# === Tab 3: About ===
|
| 232 |
+
with gr.TabItem("ℹ️ About"):
|
| 233 |
+
gr.Markdown(EVALUATION_INFO, elem_classes="markdown-text")
|
| 234 |
+
|
| 235 |
+
|
| 236 |
+
demo.launch()
|
assets/leaderboard.jsonl
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"submission_id": "dbd32abd6ef16eef", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Embedding", "model_name": "clip-ViT-B-32-multilingual-v1", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 1.2, "Recall@5": 4.1, "Recall@10": 6.1, "Recall@20": 8.8, "NDCG@1": 3.2, "NDCG@5": 4, "NDCG@10": 4.7, "NDCG@20": 5.5}
|
| 2 |
+
{"submission_id": "a59247d2809373ba", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Embedding", "model_name": "siglip2-base-patch16-224", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 16.4, "Recall@5": 33.7, "Recall@10": 40, "Recall@20": 47.3, "NDCG@1": 33.1, "NDCG@5": 34.6, "NDCG@10": 36, "NDCG@20": 38.2}
|
| 3 |
+
{"submission_id": "95f663a7c7d53495", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Embedding", "model_name": "siglip2-giant-opt-patch16-256", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 20.8, "Recall@5": 40.5, "Recall@10": 47.6, "Recall@20": 54.5, "NDCG@1": 42, "NDCG@5": 42.4, "NDCG@10": 44.1, "NDCG@20": 46}
|
| 4 |
+
{"submission_id": "695d2f72f79a2e1b", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Embedding", "model_name": "VLM2Vec", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 23.1, "Recall@5": 44.4, "Recall@10": 52.4, "Recall@20": 60, "NDCG@1": 46, "NDCG@5": 47, "NDCG@10": 48.9, "NDCG@20": 51.3}
|
| 5 |
+
{"submission_id": "141053c18e438d49", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Embedding", "model_name": "B3_Qwen2_7B", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 20.9, "Recall@5": 41.4, "Recall@10": 49.9, "Recall@20": 57.1, "NDCG@1": 41.9, "NDCG@5": 43.6, "NDCG@10": 45.6, "NDCG@20": 47.8}
|
| 6 |
+
{"submission_id": "200e8177772d5001", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Embedding", "model_name": "Qwen3-VL-Embedding-2B", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 22.7, "Recall@5": 42.5, "Recall@10": 50.6, "Recall@20": 58.2, "NDCG@1": 44.6, "NDCG@5": 45.4, "NDCG@10": 47.4, "NDCG@20": 49.5}
|
| 7 |
+
{"submission_id": "4b46272277c90449", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Embedding", "model_name": "Qwen3-VL-Embedding-8B", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 24.9, "Recall@5": 46.2, "Recall@10": 53, "Recall@20": 59.2, "NDCG@1": 49.7, "NDCG@5": 49.7, "NDCG@10": 50.9, "NDCG@20": 52.6}
|
| 8 |
+
{"submission_id": "d30ad9a666c5a684", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Embedding", "model_name": "Ops-MM-embedding-v1", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 25.8, "Recall@5": 48.7, "Recall@10": 56.6, "Recall@20": 63.7, "NDCG@1": 49.8, "NDCG@5": 51.7, "NDCG@10": 53.5, "NDCG@20": 55.5}
|
| 9 |
+
{"submission_id": "4bd67195881c1ba9", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Embedding", "model_name": "RzenEmbed-v2-7B", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 27.2, "Recall@5": 49.9, "Recall@10": 58, "Recall@20": 65.1, "NDCG@1": 54.1, "NDCG@5": 54.3, "NDCG@10": 56, "NDCG@20": 57.9}
|
| 10 |
+
{"submission_id": "1a6b01520b759117", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Embedding", "model_name": "QQMM-embed-v2", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 26.6, "Recall@5": 50, "Recall@10": 57.8, "Recall@20": 65.4, "NDCG@1": 52.3, "NDCG@5": 53.7, "NDCG@10": 55.4, "NDCG@20": 57.6}
|
| 11 |
+
{"submission_id": "2da78d02a132aec3", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Caption", "model_name": "multilingual-e5-small", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 21.6, "Recall@5": 37.5, "Recall@10": 42.8, "Recall@20": 48.8, "NDCG@1": 40.7, "NDCG@5": 39.6, "NDCG@10": 40.7, "NDCG@20": 42.5}
|
| 12 |
+
{"submission_id": "c3a345e59fb8a397", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Caption", "model_name": "multilingual-e5-base", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 21.8, "Recall@5": 38.1, "Recall@10": 44.8, "Recall@20": 51.9, "NDCG@1": 41.6, "NDCG@5": 40.5, "NDCG@10": 42.2, "NDCG@20": 44.3}
|
| 13 |
+
{"submission_id": "4e2e72470575fb93", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Caption", "model_name": "multilingual-e5-large", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 24.2, "Recall@5": 40.6, "Recall@10": 47.1, "Recall@20": 54.6, "NDCG@1": 46.3, "NDCG@5": 44.7, "NDCG@10": 45.9, "NDCG@20": 48}
|
| 14 |
+
{"submission_id": "ee3448449553ac08", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Caption", "model_name": "bge-m3", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 23, "Recall@5": 41.6, "Recall@10": 48.5, "Recall@20": 56.6, "NDCG@1": 43.8, "NDCG@5": 44.5, "NDCG@10": 46, "NDCG@20": 48.5}
|
| 15 |
+
{"submission_id": "b0fa2bb87791bb47", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Caption", "model_name": "Qwen3-Embedding-0.6B", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 24, "Recall@5": 42, "Recall@10": 48.4, "Recall@20": 54.9, "NDCG@1": 45.9, "NDCG@5": 45.5, "NDCG@10": 46.8, "NDCG@20": 48.6}
|
| 16 |
+
{"submission_id": "7b6b91a8b64da303", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Caption", "model_name": "Qwen3-Embedding-4B", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 25.6, "Recall@5": 44.5, "Recall@10": 51.6, "Recall@20": 57.9, "NDCG@1": 49.1, "NDCG@5": 48.3, "NDCG@10": 49.8, "NDCG@20": 51.4}
|
| 17 |
+
{"submission_id": "b38199eded78c794", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Caption", "model_name": "Qwen3-Embedding-8B", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 25.4, "Recall@5": 44.8, "Recall@10": 51.7, "Recall@20": 57.8, "NDCG@1": 48.3, "NDCG@5": 48.2, "NDCG@10": 49.7, "NDCG@20": 51.2}
|
| 18 |
+
{"submission_id": "0dbc32eb7e40e180", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Agent", "model_name": "ToolACE-2-Llama-3.1-8B", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 21.3, "Recall@5": 44, "Recall@10": 47.7, "Recall@20": 51.2, "NDCG@1": 48.8, "NDCG@5": 50.9, "NDCG@10": 50.2, "NDCG@20": 50.7}
|
| 19 |
+
{"submission_id": "62d9047f0402776f", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Agent", "model_name": "Qwen3-8B", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 14.5, "Recall@5": 51, "Recall@10": 60.4, "Recall@20": 64.1, "NDCG@1": 35, "NDCG@5": 53.5, "NDCG@10": 56.4, "NDCG@20": 56.4}
|
| 20 |
+
{"submission_id": "6921c1b285657dfe", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Agent", "model_name": "Qwen3-32B", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 28.3, "Recall@5": 54.1, "Recall@10": 62.5, "Recall@20": 64.5, "NDCG@1": 63.6, "NDCG@5": 63, "NDCG@10": 63.5, "NDCG@20": 62.4}
|
| 21 |
+
{"submission_id": "60fd3fdf06d3f313", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Agent", "model_name": "DeepSeek-v3", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 26.8, "Recall@5": 51.6, "Recall@10": 60.1, "Recall@20": 62.4, "NDCG@1": 59.6, "NDCG@5": 59.9, "NDCG@10": 61, "NDCG@20": 60.6}
|
| 22 |
+
{"submission_id": "45249ed7e1ebc2f2", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Agent", "model_name": "Qwen3-235B-A22B", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 31, "Recall@5": 60.8, "Recall@10": 67, "Recall@20": 69.6, "NDCG@1": 70, "NDCG@5": 71.4, "NDCG@10": 72, "NDCG@20": 71.8}
|
| 23 |
+
{"submission_id": "695f07e99fefa554", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Agent", "model_name": "GPT-4o", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 28, "Recall@5": 53.1, "Recall@10": 58.5, "Recall@20": 60.3, "NDCG@1": 60.3, "NDCG@5": 61.1, "NDCG@10": 60.5, "NDCG@20": 59.5}
|
| 24 |
+
{"submission_id": "2750290902c58d51", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Agent", "model_name": "OpenAI-o3", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 31.2, "Recall@5": 59.5, "Recall@10": 68.7, "Recall@20": 71.4, "NDCG@1": 70.2, "NDCG@5": 69.8, "NDCG@10": 70.6, "NDCG@20": 69.9}
|
| 25 |
+
{"submission_id": "d80bf05df2437697", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Agent", "model_name": "Claude-Sonnet-4-5", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 30.5, "Recall@5": 60.4, "Recall@10": 69.5, "Recall@20": 73.1, "NDCG@1": 69, "NDCG@5": 69.7, "NDCG@10": 70.3, "NDCG@20": 70}
|
| 26 |
+
{"submission_id": "16b8c0a0ce88851b", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Agent", "model_name": "Claude-Opus-4-5", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 32.1, "Recall@5": 60.6, "Recall@10": 68.7, "Recall@20": 71.7, "NDCG@1": 69.8, "NDCG@5": 69.9, "NDCG@10": 70.3, "NDCG@20": 69.9}
|
data/leaderboard.jsonl
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{"submission_id": "dbd32abd6ef16eef", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Embedding", "model_name": "clip-ViT-B-32-multilingual-v1", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 1.2, "Recall@5": 4.1, "Recall@10": 6.1, "Recall@20": 8.8, "NDCG@1": 3.2, "NDCG@5": 4, "NDCG@10": 4.7, "NDCG@20": 5.5}
|
| 2 |
+
{"submission_id": "a59247d2809373ba", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Embedding", "model_name": "siglip2-base-patch16-224", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 16.4, "Recall@5": 33.7, "Recall@10": 40, "Recall@20": 47.3, "NDCG@1": 33.1, "NDCG@5": 34.6, "NDCG@10": 36, "NDCG@20": 38.2}
|
| 3 |
+
{"submission_id": "95f663a7c7d53495", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Embedding", "model_name": "siglip2-giant-opt-patch16-256", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 20.8, "Recall@5": 40.5, "Recall@10": 47.6, "Recall@20": 54.5, "NDCG@1": 42, "NDCG@5": 42.4, "NDCG@10": 44.1, "NDCG@20": 46}
|
| 4 |
+
{"submission_id": "695d2f72f79a2e1b", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Embedding", "model_name": "VLM2Vec", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 23.1, "Recall@5": 44.4, "Recall@10": 52.4, "Recall@20": 60, "NDCG@1": 46, "NDCG@5": 47, "NDCG@10": 48.9, "NDCG@20": 51.3}
|
| 5 |
+
{"submission_id": "141053c18e438d49", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Embedding", "model_name": "B3_Qwen2_7B", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 20.9, "Recall@5": 41.4, "Recall@10": 49.9, "Recall@20": 57.1, "NDCG@1": 41.9, "NDCG@5": 43.6, "NDCG@10": 45.6, "NDCG@20": 47.8}
|
| 6 |
+
{"submission_id": "200e8177772d5001", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Embedding", "model_name": "Qwen3-VL-Embedding-2B", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 22.7, "Recall@5": 42.5, "Recall@10": 50.6, "Recall@20": 58.2, "NDCG@1": 44.6, "NDCG@5": 45.4, "NDCG@10": 47.4, "NDCG@20": 49.5}
|
| 7 |
+
{"submission_id": "4b46272277c90449", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Embedding", "model_name": "Qwen3-VL-Embedding-8B", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 24.9, "Recall@5": 46.2, "Recall@10": 53, "Recall@20": 59.2, "NDCG@1": 49.7, "NDCG@5": 49.7, "NDCG@10": 50.9, "NDCG@20": 52.6}
|
| 8 |
+
{"submission_id": "d30ad9a666c5a684", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Embedding", "model_name": "Ops-MM-embedding-v1", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 25.8, "Recall@5": 48.7, "Recall@10": 56.6, "Recall@20": 63.7, "NDCG@1": 49.8, "NDCG@5": 51.7, "NDCG@10": 53.5, "NDCG@20": 55.5}
|
| 9 |
+
{"submission_id": "4bd67195881c1ba9", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Embedding", "model_name": "RzenEmbed-v2-7B", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 27.2, "Recall@5": 49.9, "Recall@10": 58, "Recall@20": 65.1, "NDCG@1": 54.1, "NDCG@5": 54.3, "NDCG@10": 56, "NDCG@20": 57.9}
|
| 10 |
+
{"submission_id": "1a6b01520b759117", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Embedding", "model_name": "QQMM-embed-v2", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 26.6, "Recall@5": 50, "Recall@10": 57.8, "Recall@20": 65.4, "NDCG@1": 52.3, "NDCG@5": 53.7, "NDCG@10": 55.4, "NDCG@20": 57.6}
|
| 11 |
+
{"submission_id": "2da78d02a132aec3", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Caption", "model_name": "multilingual-e5-small", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 21.6, "Recall@5": 37.5, "Recall@10": 42.8, "Recall@20": 48.8, "NDCG@1": 40.7, "NDCG@5": 39.6, "NDCG@10": 40.7, "NDCG@20": 42.5}
|
| 12 |
+
{"submission_id": "c3a345e59fb8a397", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Caption", "model_name": "multilingual-e5-base", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 21.8, "Recall@5": 38.1, "Recall@10": 44.8, "Recall@20": 51.9, "NDCG@1": 41.6, "NDCG@5": 40.5, "NDCG@10": 42.2, "NDCG@20": 44.3}
|
| 13 |
+
{"submission_id": "4e2e72470575fb93", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Caption", "model_name": "multilingual-e5-large", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 24.2, "Recall@5": 40.6, "Recall@10": 47.1, "Recall@20": 54.6, "NDCG@1": 46.3, "NDCG@5": 44.7, "NDCG@10": 45.9, "NDCG@20": 48}
|
| 14 |
+
{"submission_id": "ee3448449553ac08", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Caption", "model_name": "bge-m3", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 23, "Recall@5": 41.6, "Recall@10": 48.5, "Recall@20": 56.6, "NDCG@1": 43.8, "NDCG@5": 44.5, "NDCG@10": 46, "NDCG@20": 48.5}
|
| 15 |
+
{"submission_id": "b0fa2bb87791bb47", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Caption", "model_name": "Qwen3-Embedding-0.6B", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 24, "Recall@5": 42, "Recall@10": 48.4, "Recall@20": 54.9, "NDCG@1": 45.9, "NDCG@5": 45.5, "NDCG@10": 46.8, "NDCG@20": 48.6}
|
| 16 |
+
{"submission_id": "7b6b91a8b64da303", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Caption", "model_name": "Qwen3-Embedding-4B", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 25.6, "Recall@5": 44.5, "Recall@10": 51.6, "Recall@20": 57.9, "NDCG@1": 49.1, "NDCG@5": 48.3, "NDCG@10": 49.8, "NDCG@20": 51.4}
|
| 17 |
+
{"submission_id": "b38199eded78c794", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Caption", "model_name": "Qwen3-Embedding-8B", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 25.4, "Recall@5": 44.8, "Recall@10": 51.7, "Recall@20": 57.8, "NDCG@1": 48.3, "NDCG@5": 48.2, "NDCG@10": 49.7, "NDCG@20": 51.2}
|
| 18 |
+
{"submission_id": "0dbc32eb7e40e180", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Agent", "model_name": "ToolACE-2-Llama-3.1-8B", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 21.3, "Recall@5": 44, "Recall@10": 47.7, "Recall@20": 51.2, "NDCG@1": 48.8, "NDCG@5": 50.9, "NDCG@10": 50.2, "NDCG@20": 50.7}
|
| 19 |
+
{"submission_id": "62d9047f0402776f", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Agent", "model_name": "Qwen3-8B", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 14.5, "Recall@5": 51, "Recall@10": 60.4, "Recall@20": 64.1, "NDCG@1": 35, "NDCG@5": 53.5, "NDCG@10": 56.4, "NDCG@20": 56.4}
|
| 20 |
+
{"submission_id": "6921c1b285657dfe", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Agent", "model_name": "Qwen3-32B", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 28.3, "Recall@5": 54.1, "Recall@10": 62.5, "Recall@20": 64.5, "NDCG@1": 63.6, "NDCG@5": 63, "NDCG@10": 63.5, "NDCG@20": 62.4}
|
| 21 |
+
{"submission_id": "60fd3fdf06d3f313", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Agent", "model_name": "DeepSeek-v3", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 26.8, "Recall@5": 51.6, "Recall@10": 60.1, "Recall@20": 62.4, "NDCG@1": 59.6, "NDCG@5": 59.9, "NDCG@10": 61, "NDCG@20": 60.6}
|
| 22 |
+
{"submission_id": "45249ed7e1ebc2f2", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Agent", "model_name": "Qwen3-235B-A22B", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 31, "Recall@5": 60.8, "Recall@10": 67, "Recall@20": 69.6, "NDCG@1": 70, "NDCG@5": 71.4, "NDCG@10": 72, "NDCG@20": 71.8}
|
| 23 |
+
{"submission_id": "695f07e99fefa554", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Agent", "model_name": "GPT-4o", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 28, "Recall@5": 53.1, "Recall@10": 58.5, "Recall@20": 60.3, "NDCG@1": 60.3, "NDCG@5": 61.1, "NDCG@10": 60.5, "NDCG@20": 59.5}
|
| 24 |
+
{"submission_id": "2750290902c58d51", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Agent", "model_name": "OpenAI-o3", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 31.2, "Recall@5": 59.5, "Recall@10": 68.7, "Recall@20": 71.4, "NDCG@1": 70.2, "NDCG@5": 69.8, "NDCG@10": 70.6, "NDCG@20": 69.9}
|
| 25 |
+
{"submission_id": "d80bf05df2437697", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Agent", "model_name": "Claude-Sonnet-4-5", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 30.5, "Recall@5": 60.4, "Recall@10": 69.5, "Recall@20": 73.1, "NDCG@1": 69, "NDCG@5": 69.7, "NDCG@10": 70.3, "NDCG@20": 70}
|
| 26 |
+
{"submission_id": "16b8c0a0ce88851b", "timestamp": "2026-04-22T17:31:10.846331Z", "email": "init@example.com", "method": "Agent", "model_name": "Claude-Opus-4-5", "albums": "1,2,3", "evaluated_queries": 0, "is_paper_data": true, "Recall@1": 32.1, "Recall@5": 60.6, "Recall@10": 68.7, "Recall@20": 71.7, "NDCG@1": 69.8, "NDCG@5": 69.9, "NDCG@10": 70.3, "NDCG@20": 69.9}
|
requirements.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio
|
| 2 |
+
pandas
|
| 3 |
+
numpy
|
src/__init__.py
ADDED
|
File without changes
|
src/about.py
ADDED
|
@@ -0,0 +1,235 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
NAVIGATION = """
|
| 2 |
+
<div style="text-align:center; margin-bottom: 24px;">
|
| 3 |
+
<div style="display:inline-flex; flex-wrap:wrap; gap:8px; justify-content:center;">
|
| 4 |
+
<a href="https://github.com/LaVieEnRose365/PhotoBench" target="_blank" style="text-decoration:none;">
|
| 5 |
+
<span style="display:inline-block; padding:8px 14px; background:#fff; border:1px solid #d4e0c8; border-radius:10px; color:#555; font-size:0.85em; font-weight:500; transition:all 0.2s;">🏠 GitHub</span>
|
| 6 |
+
</a>
|
| 7 |
+
<a href="https://arxiv.org/abs/2603.01493v1" target="_blank" style="text-decoration:none;">
|
| 8 |
+
<span style="display:inline-block; padding:8px 14px; background:#fff; border:1px solid #d4e0c8; border-radius:10px; color:#555; font-size:0.85em; font-weight:500;">📄 arXiv</span>
|
| 9 |
+
</a>
|
| 10 |
+
<a href="https://huggingface.co/spaces/SorrowTea/PhotoBench/" target="_blank" style="text-decoration:none;">
|
| 11 |
+
<span style="display:inline-block; padding:8px 14px; background:#fff; border:1px solid #d4e0c8; border-radius:10px; color:#555; font-size:0.85em; font-weight:500;">🏅 Leaderboard</span>
|
| 12 |
+
</a>
|
| 13 |
+
<a href="https://huggingface.co/datasets/SorrowTea/PhotoBench" target="_blank" style="text-decoration:none;">
|
| 14 |
+
<span style="display:inline-block; padding:8px 14px; background:#fff; border:1px solid #d4e0c8; border-radius:10px; color:#555; font-size:0.85em; font-weight:500;">📊 Dataset</span>
|
| 15 |
+
</a>
|
| 16 |
+
<span style="display:inline-block; padding:8px 14px; background:#7CB342; border:1px solid #7CB342; border-radius:10px; color:#fff; font-size:0.85em; font-weight:600;">🛡️ Protected LB</span>
|
| 17 |
+
<a href="https://huggingface.co/datasets/SorrowTea/PhotoBench-Protected" target="_blank" style="text-decoration:none;">
|
| 18 |
+
<span style="display:inline-block; padding:8px 14px; background:#fff; border:1px solid #d4e0c8; border-radius:10px; color:#555; font-size:0.85em; font-weight:500;">📁 Protected Data</span>
|
| 19 |
+
</a>
|
| 20 |
+
<a href="https://sbox.myoas.com/l/Be5be4053f6b43840" target="_blank" style="text-decoration:none;">
|
| 21 |
+
<span style="display:inline-block; padding:8px 14px; background:#fff; border:1px solid #d4e0c8; border-radius:10px; color:#555; font-size:0.85em; font-weight:500;">🔒 Full Data</span>
|
| 22 |
+
</a>
|
| 23 |
+
</div>
|
| 24 |
+
</div>
|
| 25 |
+
"""
|
| 26 |
+
|
| 27 |
+
TITLE = """
|
| 28 |
+
<div style="text-align:center; padding: 48px 20px; background: linear-gradient(160deg, #f5f9f0 0%, #e8f0e0 100%); border-radius: 20px; margin-bottom: 32px; border: 1px solid #d4e0c8;">
|
| 29 |
+
<h1 style="color:#1a1a1a; font-size:3em; font-weight:600; letter-spacing:-1px; margin:0; font-family:-apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;">
|
| 30 |
+
PhotoBench-Protected
|
| 31 |
+
</h1>
|
| 32 |
+
<p style="color:#7CB342; font-size:1.1em; font-weight:500; letter-spacing:2px; margin:12px 0 0 0;">
|
| 33 |
+
AGENT-ONLY LEADERBOARD
|
| 34 |
+
</p>
|
| 35 |
+
<div style="width:60px; height:3px; background:#7CB342; margin:20px auto; border-radius:2px;"></div>
|
| 36 |
+
<p style="color:#666; font-size:0.95em; margin-top:12px;">
|
| 37 |
+
Limited Information Source Benchmark
|
| 38 |
+
</p>
|
| 39 |
+
</div>
|
| 40 |
+
"""
|
| 41 |
+
|
| 42 |
+
INTRODUCTION = """
|
| 43 |
+
<div style="text-align:center; max-width:720px; margin:0 auto 40px; color:#444; line-height:1.8;">
|
| 44 |
+
|
| 45 |
+
<strong>PhotoBench-Protected</strong> is our initial open-source release.
|
| 46 |
+
Because only partial model captions, embeddings, and metadata are provided,
|
| 47 |
+
this benchmark focuses exclusively on <strong>agent planning</strong> ability.
|
| 48 |
+
|
| 49 |
+
<p style="margin-top:16px; color:#7CB342; font-weight:600;">
|
| 50 |
+
⚠️ Please confirm you are submitting to the correct leaderboard.
|
| 51 |
+
</p>
|
| 52 |
+
|
| 53 |
+
<p style="margin-top:12px;">
|
| 54 |
+
The test sets for PhotoBench-Protected and <a href="https://huggingface.co/spaces/SorrowTea/PhotoBench/" target="_blank" style="color:#7CB342; font-weight:600; text-decoration:none;">PhotoBench (full) ↗</a> are different.
|
| 55 |
+
For unrestricted retrieval with raw images, please use the
|
| 56 |
+
<a href="https://huggingface.co/spaces/SorrowTea/PhotoBench/" target="_blank" style="color:#7CB342; font-weight:600; text-decoration:none;">full PhotoBench leaderboard ↗</a>.
|
| 57 |
+
Full dataset download: <a href="https://sbox.myoas.com/l/Be5be4053f6b43840" target="_blank" style="color:#7CB342; font-weight:600; text-decoration:none;">OneBox ↗</a>.
|
| 58 |
+
</p>
|
| 59 |
+
|
| 60 |
+
</div>
|
| 61 |
+
"""
|
| 62 |
+
|
| 63 |
+
SUBMISSION_GUIDE = """
|
| 64 |
+
### Submission Format
|
| 65 |
+
|
| 66 |
+
Upload a JSON file containing an array of prediction objects:
|
| 67 |
+
|
| 68 |
+
```json
|
| 69 |
+
[
|
| 70 |
+
{
|
| 71 |
+
"album_id": "1",
|
| 72 |
+
"query_en": "cluttered desk",
|
| 73 |
+
"pred": ["IMG_1234.jpg", "IMG_5678.jpg", ...]
|
| 74 |
+
}
|
| 75 |
+
]
|
| 76 |
+
```
|
| 77 |
+
|
| 78 |
+
**Required fields:**
|
| 79 |
+
- `album_id`: Album number (1, 2, or 3)
|
| 80 |
+
- `query_en`: The English query text (must match exactly)
|
| 81 |
+
- `pred`: Ordered list of predicted image filenames
|
| 82 |
+
|
| 83 |
+
You may submit results for any subset of albums. Partial submissions are accepted and evaluated.
|
| 84 |
+
"""
|
| 85 |
+
|
| 86 |
+
EVALUATION_INFO = """
|
| 87 |
+
### Evaluation Metrics
|
| 88 |
+
|
| 89 |
+
| Metric | Description |
|
| 90 |
+
|--------|-------------|
|
| 91 |
+
| **Recall@k** | Proportion of ground-truth images found in top-k predictions |
|
| 92 |
+
| **NDCG@k** | Normalized Discounted Cumulative Gain at rank k |
|
| 93 |
+
|
| 94 |
+
Supported k values: **1, 5, 10, 20, 50, 100**
|
| 95 |
+
|
| 96 |
+
Results are averaged across all evaluated queries per album, then averaged across albums for the final leaderboard score.
|
| 97 |
+
"""
|
| 98 |
+
|
| 99 |
+
custom_css = """
|
| 100 |
+
/* Grass-green clean theme */
|
| 101 |
+
body {
|
| 102 |
+
background: #f5f9f0 !important;
|
| 103 |
+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif !important;
|
| 104 |
+
font-size: 17px !important;
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
/* Tab buttons */
|
| 108 |
+
.tab-buttons button {
|
| 109 |
+
font-weight: 500 !important;
|
| 110 |
+
font-size: 0.9em !important;
|
| 111 |
+
border-radius: 10px 10px 0 0 !important;
|
| 112 |
+
padding: 12px 24px !important;
|
| 113 |
+
background: #e0ead8 !important;
|
| 114 |
+
color: #555 !important;
|
| 115 |
+
border: none !important;
|
| 116 |
+
transition: all 0.25s ease !important;
|
| 117 |
+
}
|
| 118 |
+
|
| 119 |
+
.tab-buttons button.selected {
|
| 120 |
+
background: #fff !important;
|
| 121 |
+
color: #1a1a1a !important;
|
| 122 |
+
box-shadow: 0 -2px 0 #7CB342 inset !important;
|
| 123 |
+
}
|
| 124 |
+
|
| 125 |
+
/* Primary buttons */
|
| 126 |
+
.gr-button-primary {
|
| 127 |
+
background: #7CB342 !important;
|
| 128 |
+
border: none !important;
|
| 129 |
+
border-radius: 10px !important;
|
| 130 |
+
color: #fff !important;
|
| 131 |
+
font-weight: 600 !important;
|
| 132 |
+
font-size: 0.95em !important;
|
| 133 |
+
padding: 12px 28px !important;
|
| 134 |
+
transition: all 0.25s ease !important;
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
.gr-button-primary:hover {
|
| 138 |
+
background: #6ba32e !important;
|
| 139 |
+
transform: translateY(-1px) !important;
|
| 140 |
+
box-shadow: 0 6px 20px rgba(124,179,66,0.25) !important;
|
| 141 |
+
}
|
| 142 |
+
|
| 143 |
+
/* Markdown */
|
| 144 |
+
.markdown-text {
|
| 145 |
+
max-width: 780px;
|
| 146 |
+
margin: 0 auto;
|
| 147 |
+
color: #333;
|
| 148 |
+
line-height: 1.8;
|
| 149 |
+
font-size: 1.05em;
|
| 150 |
+
}
|
| 151 |
+
|
| 152 |
+
/* DataFrame Table */
|
| 153 |
+
.gr-dataframe {
|
| 154 |
+
border-radius: 14px !important;
|
| 155 |
+
overflow: hidden !important;
|
| 156 |
+
box-shadow: 0 2px 16px rgba(0,0,0,0.06) !important;
|
| 157 |
+
border: 1px solid #d4e0c8 !important;
|
| 158 |
+
font-size: 0.95em !important;
|
| 159 |
+
}
|
| 160 |
+
|
| 161 |
+
.gr-dataframe th {
|
| 162 |
+
background: #e8f0e0 !important;
|
| 163 |
+
color: #444 !important;
|
| 164 |
+
font-weight: 600 !important;
|
| 165 |
+
font-size: 0.8em !important;
|
| 166 |
+
text-transform: uppercase !important;
|
| 167 |
+
letter-spacing: 0.5px !important;
|
| 168 |
+
padding: 14px 10px !important;
|
| 169 |
+
border-bottom: 2px solid #d4e0c8 !important;
|
| 170 |
+
}
|
| 171 |
+
|
| 172 |
+
.gr-dataframe td {
|
| 173 |
+
padding: 12px 10px !important;
|
| 174 |
+
border-bottom: 1px solid #e0ead8 !important;
|
| 175 |
+
color: #333 !important;
|
| 176 |
+
}
|
| 177 |
+
|
| 178 |
+
.gr-dataframe tr:hover td {
|
| 179 |
+
background: #f0f7e8 !important;
|
| 180 |
+
}
|
| 181 |
+
|
| 182 |
+
/* Inputs */
|
| 183 |
+
input, textarea, select {
|
| 184 |
+
border-radius: 10px !important;
|
| 185 |
+
border: 1px solid #c4d4b4 !important;
|
| 186 |
+
background: #fff !important;
|
| 187 |
+
font-size: 1em !important;
|
| 188 |
+
padding: 10px 14px !important;
|
| 189 |
+
}
|
| 190 |
+
|
| 191 |
+
input:focus, textarea:focus, select:focus {
|
| 192 |
+
border-color: #7CB342 !important;
|
| 193 |
+
box-shadow: 0 0 0 3px rgba(124,179,66,0.12) !important;
|
| 194 |
+
outline: none !important;
|
| 195 |
+
}
|
| 196 |
+
|
| 197 |
+
/* Form containers */
|
| 198 |
+
.gr-form .gr-box {
|
| 199 |
+
border-radius: 14px !important;
|
| 200 |
+
background: #fff !important;
|
| 201 |
+
border: 1px solid #d4e0c8 !important;
|
| 202 |
+
padding: 24px !important;
|
| 203 |
+
}
|
| 204 |
+
|
| 205 |
+
/* Labels */
|
| 206 |
+
.gr-input-label, .gr-dropdown-label {
|
| 207 |
+
font-weight: 500 !important;
|
| 208 |
+
color: #444 !important;
|
| 209 |
+
font-size: 0.9em !important;
|
| 210 |
+
margin-bottom: 6px !important;
|
| 211 |
+
}
|
| 212 |
+
|
| 213 |
+
/* JSON output */
|
| 214 |
+
.gr-json {
|
| 215 |
+
border-radius: 12px !important;
|
| 216 |
+
background: #f5f9f0 !important;
|
| 217 |
+
border: 1px solid #d4e0c8 !important;
|
| 218 |
+
font-size: 0.9em !important;
|
| 219 |
+
}
|
| 220 |
+
|
| 221 |
+
/* Center submit form */
|
| 222 |
+
#submit-form-container {
|
| 223 |
+
max-width: 600px;
|
| 224 |
+
margin: 0 auto;
|
| 225 |
+
}
|
| 226 |
+
|
| 227 |
+
/* Section headers */
|
| 228 |
+
.gr-tab-item h3 {
|
| 229 |
+
color: #1a1a1a !important;
|
| 230 |
+
font-weight: 600 !important;
|
| 231 |
+
font-size: 1.2em !important;
|
| 232 |
+
margin-top: 24px;
|
| 233 |
+
margin-bottom: 12px;
|
| 234 |
+
}
|
| 235 |
+
"""
|
src/evaluator.py
ADDED
|
@@ -0,0 +1,183 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import logging
|
| 3 |
+
from math import log2
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from typing import Any, Dict, List
|
| 6 |
+
|
| 7 |
+
import numpy as np
|
| 8 |
+
|
| 9 |
+
logger = logging.getLogger(__name__)
|
| 10 |
+
|
| 11 |
+
GT_DIR = Path("/data") / "gt"
|
| 12 |
+
|
| 13 |
+
K_VALUES = [1, 5, 10, 20, 50, 100]
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class Evaluator:
|
| 17 |
+
def __init__(self, gt_dir: str | Path | None = None):
|
| 18 |
+
self.gt_dir = Path(gt_dir) if gt_dir else GT_DIR
|
| 19 |
+
self._gt_cache: Dict[str, list] = {}
|
| 20 |
+
|
| 21 |
+
def _load_gt(self, album_id: str) -> list:
|
| 22 |
+
if album_id in self._gt_cache:
|
| 23 |
+
return self._gt_cache[album_id]
|
| 24 |
+
gt_file = self.gt_dir / f"album{album_id}_test_answer.json"
|
| 25 |
+
if not gt_file.exists():
|
| 26 |
+
raise FileNotFoundError(f"Ground truth file not found: {gt_file}")
|
| 27 |
+
with open(gt_file, "r", encoding="utf-8") as f:
|
| 28 |
+
data = json.load(f)
|
| 29 |
+
self._gt_cache[album_id] = data
|
| 30 |
+
return data
|
| 31 |
+
|
| 32 |
+
def validate_json_format(self, data: Any) -> list[str]:
|
| 33 |
+
errors = []
|
| 34 |
+
if not isinstance(data, list):
|
| 35 |
+
return ["Root must be a JSON array"]
|
| 36 |
+
if len(data) == 0:
|
| 37 |
+
return ["Submission is empty"]
|
| 38 |
+
for i, item in enumerate(data):
|
| 39 |
+
if not isinstance(item, dict):
|
| 40 |
+
errors.append(f"Item #{i} must be an object")
|
| 41 |
+
continue
|
| 42 |
+
if "album_id" not in item or str(item["album_id"]) not in ["1", "2", "3"]:
|
| 43 |
+
errors.append(f"Item #{i} 'album_id' must be '1', '2', or '3'")
|
| 44 |
+
if "query_en" not in item or not isinstance(item["query_en"], str):
|
| 45 |
+
errors.append(f"Item #{i} 'query_en' must be a string")
|
| 46 |
+
if (
|
| 47 |
+
"pred" not in item
|
| 48 |
+
or not isinstance(item["pred"], list)
|
| 49 |
+
or not all(isinstance(x, str) for x in item["pred"])
|
| 50 |
+
):
|
| 51 |
+
errors.append(f"Item #{i} 'pred' must be a list of strings")
|
| 52 |
+
return errors
|
| 53 |
+
|
| 54 |
+
def _dcg_at_k(self, r, k):
|
| 55 |
+
r = np.asarray(r, dtype=float)[:k]
|
| 56 |
+
if r.size:
|
| 57 |
+
return np.sum(r / np.log2(np.arange(2, r.size + 2)))
|
| 58 |
+
return 0.0
|
| 59 |
+
|
| 60 |
+
def _ndcg_at_k(self, r, k):
|
| 61 |
+
dcg_max = self._dcg_at_k(sorted(r, reverse=True), k)
|
| 62 |
+
if not dcg_max:
|
| 63 |
+
return 0.0
|
| 64 |
+
return self._dcg_at_k(r, k) / dcg_max
|
| 65 |
+
|
| 66 |
+
def _recall_at_k(self, ground_truth, predictions, k):
|
| 67 |
+
k_preds = predictions[:k]
|
| 68 |
+
hits = len(set(ground_truth) & set(k_preds))
|
| 69 |
+
if len(ground_truth) == 0:
|
| 70 |
+
return 0.0
|
| 71 |
+
return hits / len(ground_truth)
|
| 72 |
+
|
| 73 |
+
def _evaluate_album(self, album_submissions: dict, album_id: str) -> dict:
|
| 74 |
+
"""Evaluate a single album."""
|
| 75 |
+
gt_data = self._load_gt(album_id)
|
| 76 |
+
gt_map = {item["query_en"]: item for item in gt_data}
|
| 77 |
+
|
| 78 |
+
metrics_accum = {f"Recall@{k}": [] for k in K_VALUES}
|
| 79 |
+
metrics_accum.update({f"NDCG@{k}": [] for k in K_VALUES})
|
| 80 |
+
metrics_accum["Recall"] = []
|
| 81 |
+
metrics_accum["NDCG"] = []
|
| 82 |
+
source_accum = {}
|
| 83 |
+
empty_gt_queries = 0
|
| 84 |
+
evaluated_queries = 0
|
| 85 |
+
|
| 86 |
+
for q, pred in album_submissions.items():
|
| 87 |
+
if q not in gt_map:
|
| 88 |
+
continue
|
| 89 |
+
|
| 90 |
+
gt_item = gt_map[q]
|
| 91 |
+
gt_answers = gt_item.get("ground_truth", [])
|
| 92 |
+
source = gt_item.get("Source")
|
| 93 |
+
evaluated_queries += 1
|
| 94 |
+
|
| 95 |
+
if not gt_answers:
|
| 96 |
+
empty_gt_queries += 1
|
| 97 |
+
continue
|
| 98 |
+
|
| 99 |
+
r = [1 if p in gt_answers else 0 for p in pred]
|
| 100 |
+
dcg_r = [1.0] * len(gt_answers)
|
| 101 |
+
m = {}
|
| 102 |
+
|
| 103 |
+
for k in K_VALUES:
|
| 104 |
+
m[f"Recall@{k}"] = self._recall_at_k(gt_answers, pred, k)
|
| 105 |
+
idcg = self._dcg_at_k(dcg_r, k)
|
| 106 |
+
ndcg = self._dcg_at_k(r, k) / idcg if idcg > 0 else 0.0
|
| 107 |
+
m[f"NDCG@{k}"] = ndcg
|
| 108 |
+
metrics_accum[f"Recall@{k}"].append(m[f"Recall@{k}"])
|
| 109 |
+
metrics_accum[f"NDCG@{k}"].append(m[f"NDCG@{k}"])
|
| 110 |
+
|
| 111 |
+
m["Recall"] = sum(r) / len(gt_answers)
|
| 112 |
+
idcg_all = self._dcg_at_k(dcg_r, len(gt_answers))
|
| 113 |
+
ndcg_all = self._dcg_at_k(r, len(r)) / idcg_all if idcg_all > 0 else 0.0
|
| 114 |
+
m["NDCG"] = ndcg_all
|
| 115 |
+
metrics_accum["Recall"].append(m["Recall"])
|
| 116 |
+
metrics_accum["NDCG"].append(m["NDCG"])
|
| 117 |
+
|
| 118 |
+
if source is not None:
|
| 119 |
+
if source not in source_accum:
|
| 120 |
+
source_accum[source] = {f"Recall@{_k}": [] for _k in K_VALUES}
|
| 121 |
+
source_accum[source].update({f"NDCG@{_k}": [] for _k in K_VALUES})
|
| 122 |
+
source_accum[source]["Recall"] = []
|
| 123 |
+
source_accum[source]["NDCG"] = []
|
| 124 |
+
for k in K_VALUES:
|
| 125 |
+
source_accum[source][f"Recall@{k}"].append(m[f"Recall@{k}"])
|
| 126 |
+
source_accum[source][f"NDCG@{k}"].append(m[f"NDCG@{k}"])
|
| 127 |
+
source_accum[source]["Recall"].append(m["Recall"])
|
| 128 |
+
source_accum[source]["NDCG"].append(m["NDCG"])
|
| 129 |
+
|
| 130 |
+
global_metrics = {
|
| 131 |
+
k: float(np.mean(v)) if v else 0.0 for k, v in metrics_accum.items()
|
| 132 |
+
}
|
| 133 |
+
return {
|
| 134 |
+
"global_metrics": global_metrics,
|
| 135 |
+
"source_metrics": {
|
| 136 |
+
src: {k: float(np.mean(v)) if v else 0.0 for k, v in m_dict.items()}
|
| 137 |
+
for src, m_dict in source_accum.items()
|
| 138 |
+
},
|
| 139 |
+
"empty_gt_ratio": empty_gt_queries / evaluated_queries if evaluated_queries > 0 else 0.0,
|
| 140 |
+
"evaluated_queries": evaluated_queries,
|
| 141 |
+
"total_gt_queries": len(gt_data),
|
| 142 |
+
"is_partial": evaluated_queries < len(gt_data),
|
| 143 |
+
}
|
| 144 |
+
|
| 145 |
+
def evaluate(self, submission_data: list) -> dict:
|
| 146 |
+
albums = {}
|
| 147 |
+
for item in submission_data:
|
| 148 |
+
a_id = str(item["album_id"])
|
| 149 |
+
if a_id not in albums:
|
| 150 |
+
albums[a_id] = {}
|
| 151 |
+
albums[a_id][item["query_en"]] = item["pred"]
|
| 152 |
+
|
| 153 |
+
if not albums:
|
| 154 |
+
raise ValueError("No valid albums found in submission.")
|
| 155 |
+
|
| 156 |
+
# Evaluate each album separately
|
| 157 |
+
per_album = {}
|
| 158 |
+
for a_id in sorted(albums.keys()):
|
| 159 |
+
per_album[a_id] = self._evaluate_album(albums[a_id], a_id)
|
| 160 |
+
|
| 161 |
+
# Compute averaged metrics across all albums
|
| 162 |
+
avg_metrics = {}
|
| 163 |
+
for metric_key in per_album[list(per_album.keys())[0]]["global_metrics"].keys():
|
| 164 |
+
values = [alb["global_metrics"][metric_key] for alb in per_album.values() if metric_key in alb["global_metrics"]]
|
| 165 |
+
avg_metrics[metric_key] = float(np.mean(values)) if values else 0.0
|
| 166 |
+
|
| 167 |
+
total_evaluated = sum(alb["evaluated_queries"] for alb in per_album.values())
|
| 168 |
+
total_gt = sum(alb["total_gt_queries"] for alb in per_album.values())
|
| 169 |
+
|
| 170 |
+
result = {
|
| 171 |
+
"per_album": per_album,
|
| 172 |
+
"global_metrics": avg_metrics,
|
| 173 |
+
"evaluated_queries": total_evaluated,
|
| 174 |
+
"total_gt_queries": total_gt,
|
| 175 |
+
"is_partial": total_evaluated < total_gt,
|
| 176 |
+
"albums": sorted(albums.keys()),
|
| 177 |
+
}
|
| 178 |
+
|
| 179 |
+
if result["is_partial"]:
|
| 180 |
+
missing = [a for a in ["1", "2", "3"] if a not in albums]
|
| 181 |
+
result["warning"] = f"Submission incomplete. Missing albums: {', '.join(missing)}. Averaged results across submitted albums shown below."
|
| 182 |
+
|
| 183 |
+
return result
|
src/leaderboard_manager.py
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import hashlib
|
| 2 |
+
import json
|
| 3 |
+
from datetime import datetime, timedelta
|
| 4 |
+
from typing import Any
|
| 5 |
+
|
| 6 |
+
import pandas as pd
|
| 7 |
+
|
| 8 |
+
from src.storage import load_leaderboard, save_leaderboard
|
| 9 |
+
|
| 10 |
+
# All available metric columns (computed)
|
| 11 |
+
ALL_METRIC_COLS = [
|
| 12 |
+
"Recall@1", "Recall@5", "Recall@10", "Recall@20", "Recall@50", "Recall@100",
|
| 13 |
+
"NDCG@1", "NDCG@5", "NDCG@10", "NDCG@20", "NDCG@50", "NDCG@100",
|
| 14 |
+
]
|
| 15 |
+
|
| 16 |
+
# Default columns shown on leaderboard
|
| 17 |
+
DEFAULT_DISPLAY_METRICS = [
|
| 18 |
+
"Recall@1", "Recall@5", "Recall@20", "Recall@50",
|
| 19 |
+
"NDCG@1", "NDCG@5", "NDCG@20", "NDCG@50",
|
| 20 |
+
]
|
| 21 |
+
|
| 22 |
+
# Base columns always shown
|
| 23 |
+
BASE_COLS = ["rank", "model_name"]
|
| 24 |
+
|
| 25 |
+
_DEFAULT_SORT = "Recall@10"
|
| 26 |
+
_TOP_N = 30
|
| 27 |
+
_RETENTION_DAYS = 30
|
| 28 |
+
|
| 29 |
+
|
| 30 |
+
def make_id(email: str, model_name: str) -> str:
|
| 31 |
+
return hashlib.sha256(f"{email}:{model_name}".encode()).hexdigest()[:16]
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
class LeaderboardManager:
|
| 35 |
+
def __init__(self):
|
| 36 |
+
self._entries: list[dict] = []
|
| 37 |
+
self._load()
|
| 38 |
+
self._cleanup()
|
| 39 |
+
|
| 40 |
+
def _load(self):
|
| 41 |
+
raw = load_leaderboard()
|
| 42 |
+
self._entries = raw
|
| 43 |
+
|
| 44 |
+
def _save(self):
|
| 45 |
+
save_leaderboard(self._entries)
|
| 46 |
+
|
| 47 |
+
def _cleanup(self):
|
| 48 |
+
"""Remove non-paper entries older than 30 days that are not in top 30."""
|
| 49 |
+
if not self._entries:
|
| 50 |
+
return
|
| 51 |
+
|
| 52 |
+
df = pd.DataFrame(self._entries)
|
| 53 |
+
if _DEFAULT_SORT in df.columns:
|
| 54 |
+
top_ids = set(
|
| 55 |
+
df.sort_values(by=_DEFAULT_SORT, ascending=False)
|
| 56 |
+
.head(_TOP_N)["submission_id"]
|
| 57 |
+
.tolist()
|
| 58 |
+
)
|
| 59 |
+
else:
|
| 60 |
+
top_ids = set()
|
| 61 |
+
|
| 62 |
+
cutoff = datetime.utcnow() - timedelta(days=_RETENTION_DAYS)
|
| 63 |
+
kept = []
|
| 64 |
+
for e in self._entries:
|
| 65 |
+
sid = e.get("submission_id", "")
|
| 66 |
+
is_paper = e.get("is_paper_data", False)
|
| 67 |
+
ts_str = e.get("timestamp", "")
|
| 68 |
+
try:
|
| 69 |
+
ts = datetime.fromisoformat(ts_str.replace("Z", "+00:00"))
|
| 70 |
+
except Exception:
|
| 71 |
+
ts = datetime.utcnow()
|
| 72 |
+
|
| 73 |
+
if is_paper or sid in top_ids or ts >= cutoff:
|
| 74 |
+
kept.append(e)
|
| 75 |
+
|
| 76 |
+
removed = len(self._entries) - len(kept)
|
| 77 |
+
if removed > 0:
|
| 78 |
+
print(f"[CLEANUP] Removed {removed} expired entries")
|
| 79 |
+
self._entries = kept
|
| 80 |
+
self._save()
|
| 81 |
+
|
| 82 |
+
def add_result(
|
| 83 |
+
self,
|
| 84 |
+
email: str,
|
| 85 |
+
method: str,
|
| 86 |
+
model_name: str,
|
| 87 |
+
albums: list[str],
|
| 88 |
+
evaluated_queries: int,
|
| 89 |
+
total_gt_queries: int,
|
| 90 |
+
global_metrics: dict,
|
| 91 |
+
) -> dict | None:
|
| 92 |
+
"""Add a new evaluation result. Returns entry if added, None if not eligible."""
|
| 93 |
+
# Must be a full submission (all 3 albums, all queries matched)
|
| 94 |
+
if set(albums) != {"1", "2", "3"}:
|
| 95 |
+
return None
|
| 96 |
+
if evaluated_queries < total_gt_queries:
|
| 97 |
+
return None
|
| 98 |
+
|
| 99 |
+
submission_id = make_id(email, model_name)
|
| 100 |
+
entry = {
|
| 101 |
+
"submission_id": submission_id,
|
| 102 |
+
"timestamp": datetime.utcnow().isoformat() + "Z",
|
| 103 |
+
"email": email,
|
| 104 |
+
"method": method,
|
| 105 |
+
"model_name": model_name,
|
| 106 |
+
"albums": ",".join(albums),
|
| 107 |
+
"is_paper_data": False,
|
| 108 |
+
**{k: round(v, 4) for k, v in global_metrics.items() if k in ALL_METRIC_COLS or k in ("Recall", "NDCG")},
|
| 109 |
+
}
|
| 110 |
+
|
| 111 |
+
# Keep best score per (email, model_name)
|
| 112 |
+
key = (email, model_name)
|
| 113 |
+
existing_idx = None
|
| 114 |
+
for i, e in enumerate(self._entries):
|
| 115 |
+
if (e.get("email"), e.get("model_name")) == key:
|
| 116 |
+
existing_idx = i
|
| 117 |
+
break
|
| 118 |
+
|
| 119 |
+
if existing_idx is not None:
|
| 120 |
+
old = self._entries[existing_idx]
|
| 121 |
+
if global_metrics.get(_DEFAULT_SORT, 0) >= old.get(_DEFAULT_SORT, 0):
|
| 122 |
+
self._entries[existing_idx] = entry
|
| 123 |
+
else:
|
| 124 |
+
self._entries.append(entry)
|
| 125 |
+
|
| 126 |
+
self._save()
|
| 127 |
+
return entry
|
| 128 |
+
|
| 129 |
+
def get_display_df(
|
| 130 |
+
self,
|
| 131 |
+
method_filter: str | None = None,
|
| 132 |
+
sort_by: str = _DEFAULT_SORT,
|
| 133 |
+
ascending: bool = False,
|
| 134 |
+
top_n: int = _TOP_N,
|
| 135 |
+
metric_cols: list[str] | None = None,
|
| 136 |
+
) -> pd.DataFrame:
|
| 137 |
+
"""Return a pandas DataFrame ready for gr.DataFrame."""
|
| 138 |
+
cols_to_show = BASE_COLS + (metric_cols or DEFAULT_DISPLAY_METRICS)
|
| 139 |
+
|
| 140 |
+
if not self._entries:
|
| 141 |
+
return pd.DataFrame(columns=cols_to_show)
|
| 142 |
+
|
| 143 |
+
df = pd.DataFrame(self._entries)
|
| 144 |
+
|
| 145 |
+
if method_filter and method_filter != "All":
|
| 146 |
+
df = df[df["method"] == method_filter]
|
| 147 |
+
|
| 148 |
+
if sort_by not in df.columns:
|
| 149 |
+
sort_by = _DEFAULT_SORT
|
| 150 |
+
|
| 151 |
+
df = df.sort_values(by=sort_by, ascending=ascending)
|
| 152 |
+
df = df.head(top_n).reset_index(drop=True)
|
| 153 |
+
df["rank"] = df.index + 1
|
| 154 |
+
|
| 155 |
+
available = [c for c in cols_to_show if c in df.columns]
|
| 156 |
+
df = df[available]
|
| 157 |
+
return df
|
| 158 |
+
|
| 159 |
+
def remove_entry(self, submission_id: str) -> bool:
|
| 160 |
+
"""Remove an entry by submission_id. Returns True if removed."""
|
| 161 |
+
original_len = len(self._entries)
|
| 162 |
+
self._entries = [e for e in self._entries if e.get("submission_id") != submission_id]
|
| 163 |
+
if len(self._entries) < original_len:
|
| 164 |
+
self._save()
|
| 165 |
+
return True
|
| 166 |
+
return False
|
src/storage.py
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import json
|
| 2 |
+
import os
|
| 3 |
+
from datetime import datetime, timedelta
|
| 4 |
+
from pathlib import Path
|
| 5 |
+
from typing import Any
|
| 6 |
+
|
| 7 |
+
STORAGE_DIR = Path("/data")
|
| 8 |
+
SUBMISSIONS_DIR = STORAGE_DIR / "submissions"
|
| 9 |
+
LEADERBOARD_FILE = STORAGE_DIR / "leaderboard.jsonl"
|
| 10 |
+
RATE_LIMIT_FILE = STORAGE_DIR / "rate_limits.json"
|
| 11 |
+
|
| 12 |
+
# Seed data bundled with the app (used on first boot)
|
| 13 |
+
SEED_LEADERBOARD = Path(__file__).parent.parent / "assets" / "leaderboard.jsonl"
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
def ensure_dirs():
|
| 17 |
+
STORAGE_DIR.mkdir(parents=True, exist_ok=True)
|
| 18 |
+
SUBMISSIONS_DIR.mkdir(parents=True, exist_ok=True)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
def _seed_leaderboard():
|
| 22 |
+
"""Copy bundled leaderboard data to /data on first boot."""
|
| 23 |
+
if LEADERBOARD_FILE.exists():
|
| 24 |
+
return
|
| 25 |
+
if SEED_LEADERBOARD.exists():
|
| 26 |
+
import shutil
|
| 27 |
+
shutil.copy(SEED_LEADERBOARD, LEADERBOARD_FILE)
|
| 28 |
+
print(f"[SEED] Copied leaderboard data from {SEED_LEADERBOARD} to {LEADERBOARD_FILE}")
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
def save_submission(submission_id: str, payload: dict) -> str:
|
| 32 |
+
"""Save raw submission JSON to local storage."""
|
| 33 |
+
ensure_dirs()
|
| 34 |
+
file_path = SUBMISSIONS_DIR / f"{submission_id}.json"
|
| 35 |
+
with open(file_path, "w", encoding="utf-8") as f:
|
| 36 |
+
json.dump(payload, f, ensure_ascii=False, indent=2)
|
| 37 |
+
return str(file_path)
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def list_submissions() -> list[dict]:
|
| 41 |
+
"""List all submission metadata."""
|
| 42 |
+
ensure_dirs()
|
| 43 |
+
results = []
|
| 44 |
+
for f in sorted(SUBMISSIONS_DIR.glob("*.json")):
|
| 45 |
+
try:
|
| 46 |
+
with open(f, "r", encoding="utf-8") as fp:
|
| 47 |
+
data = json.load(fp)
|
| 48 |
+
meta = data.get("meta", {})
|
| 49 |
+
meta["file"] = str(f.name)
|
| 50 |
+
results.append(meta)
|
| 51 |
+
except Exception:
|
| 52 |
+
continue
|
| 53 |
+
return results
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def load_leaderboard() -> list[dict]:
|
| 57 |
+
"""Load current leaderboard data from local storage."""
|
| 58 |
+
_seed_leaderboard()
|
| 59 |
+
if not LEADERBOARD_FILE.exists():
|
| 60 |
+
return []
|
| 61 |
+
entries = []
|
| 62 |
+
with open(LEADERBOARD_FILE, "r", encoding="utf-8") as f:
|
| 63 |
+
for line in f:
|
| 64 |
+
line = line.strip()
|
| 65 |
+
if line:
|
| 66 |
+
entries.append(json.loads(line))
|
| 67 |
+
return entries
|
| 68 |
+
|
| 69 |
+
|
| 70 |
+
def save_leaderboard(entries: list[dict]) -> None:
|
| 71 |
+
"""Overwrite leaderboard file with the current entries."""
|
| 72 |
+
ensure_dirs()
|
| 73 |
+
with open(LEADERBOARD_FILE, "w", encoding="utf-8") as f:
|
| 74 |
+
for entry in entries:
|
| 75 |
+
f.write(json.dumps(entry, ensure_ascii=False) + "\n")
|
| 76 |
+
|
| 77 |
+
|
| 78 |
+
# ---- Rate limiting ----
|
| 79 |
+
|
| 80 |
+
def check_rate_limit(email: str, cooldown_minutes: int = 60) -> tuple[bool, str]:
|
| 81 |
+
"""Check if email is allowed to submit. Returns (allowed, message)."""
|
| 82 |
+
ensure_dirs()
|
| 83 |
+
limits = {}
|
| 84 |
+
if RATE_LIMIT_FILE.exists():
|
| 85 |
+
with open(RATE_LIMIT_FILE, "r", encoding="utf-8") as f:
|
| 86 |
+
limits = json.load(f)
|
| 87 |
+
|
| 88 |
+
last_str = limits.get(email)
|
| 89 |
+
if last_str:
|
| 90 |
+
last_time = datetime.fromisoformat(last_str)
|
| 91 |
+
next_allowed = last_time + timedelta(minutes=cooldown_minutes)
|
| 92 |
+
if datetime.utcnow() < next_allowed:
|
| 93 |
+
remaining = int((next_allowed - datetime.utcnow()).total_seconds() / 60)
|
| 94 |
+
return False, f"This email has already submitted within the last hour. Please wait {remaining} minutes."
|
| 95 |
+
|
| 96 |
+
return True, ""
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
def record_submission_time(email: str) -> None:
|
| 100 |
+
"""Record the current submission time for an email."""
|
| 101 |
+
ensure_dirs()
|
| 102 |
+
limits = {}
|
| 103 |
+
if RATE_LIMIT_FILE.exists():
|
| 104 |
+
with open(RATE_LIMIT_FILE, "r", encoding="utf-8") as f:
|
| 105 |
+
limits = json.load(f)
|
| 106 |
+
limits[email] = datetime.utcnow().isoformat()
|
| 107 |
+
with open(RATE_LIMIT_FILE, "w", encoding="utf-8") as f:
|
| 108 |
+
json.dump(limits, f, ensure_ascii=False, indent=2)
|