Commit ·
f11734c
1
Parent(s): 42b6362
Add persistent storage via HF Dataset repo
Browse files- dabstep_benchmark/leaderboard.py +79 -0
- requirements.txt +1 -0
dabstep_benchmark/leaderboard.py
CHANGED
|
@@ -13,6 +13,7 @@ from pathlib import Path
|
|
| 13 |
|
| 14 |
import gradio as gr
|
| 15 |
import pandas as pd
|
|
|
|
| 16 |
|
| 17 |
from dabstep_benchmark.utils import (
|
| 18 |
evaluate,
|
|
@@ -22,6 +23,9 @@ from dabstep_benchmark.utils import (
|
|
| 22 |
is_valid_https_url,
|
| 23 |
)
|
| 24 |
|
|
|
|
|
|
|
|
|
|
| 25 |
# Paths
|
| 26 |
DATA_DIR = Path("data")
|
| 27 |
SUBMISSIONS_DIR = DATA_DIR / "submissions"
|
|
@@ -148,6 +152,72 @@ def load_task_scores() -> pd.DataFrame:
|
|
| 148 |
return TASK_SCORES_DF
|
| 149 |
|
| 150 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
def load_metadata() -> pd.DataFrame:
|
| 152 |
"""Load submission metadata from the small metadata file."""
|
| 153 |
global METADATA_DF
|
|
@@ -245,6 +315,9 @@ def refresh(only_leaderboard: bool = False) -> tuple[pd.DataFrame, pd.DataFrame]
|
|
| 245 |
|
| 246 |
ensure_directories()
|
| 247 |
|
|
|
|
|
|
|
|
|
|
| 248 |
if not only_leaderboard:
|
| 249 |
GROUND_TRUTH_DF = None
|
| 250 |
load_ground_truth()
|
|
@@ -408,6 +481,12 @@ def process_submission(
|
|
| 408 |
validated=False
|
| 409 |
)
|
| 410 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 411 |
return format_log(f"""
|
| 412 |
Agent {agent_name} submitted by {organisation} successfully!
|
| 413 |
Please refresh the leaderboard to see your score.
|
|
|
|
| 13 |
|
| 14 |
import gradio as gr
|
| 15 |
import pandas as pd
|
| 16 |
+
from huggingface_hub import HfApi, hf_hub_download
|
| 17 |
|
| 18 |
from dabstep_benchmark.utils import (
|
| 19 |
evaluate,
|
|
|
|
| 23 |
is_valid_https_url,
|
| 24 |
)
|
| 25 |
|
| 26 |
+
# HuggingFace Dataset repo for persistent storage
|
| 27 |
+
HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "justinlangsethgenesis/dabstep-submissions")
|
| 28 |
+
|
| 29 |
# Paths
|
| 30 |
DATA_DIR = Path("data")
|
| 31 |
SUBMISSIONS_DIR = DATA_DIR / "submissions"
|
|
|
|
| 152 |
return TASK_SCORES_DF
|
| 153 |
|
| 154 |
|
| 155 |
+
def sync_from_hf_dataset():
|
| 156 |
+
"""Download latest metadata and scores from HF Dataset repo."""
|
| 157 |
+
ensure_directories()
|
| 158 |
+
|
| 159 |
+
try:
|
| 160 |
+
# Download metadata.jsonl
|
| 161 |
+
metadata_path = hf_hub_download(
|
| 162 |
+
repo_id=HF_DATASET_REPO,
|
| 163 |
+
filename="metadata.jsonl",
|
| 164 |
+
repo_type="dataset",
|
| 165 |
+
local_dir=DATA_DIR,
|
| 166 |
+
local_dir_use_symlinks=False
|
| 167 |
+
)
|
| 168 |
+
print(f"Downloaded metadata from HF Dataset: {metadata_path}")
|
| 169 |
+
|
| 170 |
+
# Download scores_summary.jsonl
|
| 171 |
+
scores_path = hf_hub_download(
|
| 172 |
+
repo_id=HF_DATASET_REPO,
|
| 173 |
+
filename="scores_summary.jsonl",
|
| 174 |
+
repo_type="dataset",
|
| 175 |
+
local_dir=DATA_DIR,
|
| 176 |
+
local_dir_use_symlinks=False
|
| 177 |
+
)
|
| 178 |
+
print(f"Downloaded scores summary from HF Dataset: {scores_path}")
|
| 179 |
+
|
| 180 |
+
except Exception as e:
|
| 181 |
+
print(f"Warning: Could not sync from HF Dataset: {e}")
|
| 182 |
+
print("Using local files if available...")
|
| 183 |
+
|
| 184 |
+
|
| 185 |
+
def push_to_hf_dataset():
|
| 186 |
+
"""Push updated metadata and scores to HF Dataset repo."""
|
| 187 |
+
hf_token = os.environ.get("HF_TOKEN")
|
| 188 |
+
if not hf_token:
|
| 189 |
+
print("Warning: HF_TOKEN not set, cannot push to dataset repo")
|
| 190 |
+
return False
|
| 191 |
+
|
| 192 |
+
try:
|
| 193 |
+
api = HfApi(token=hf_token)
|
| 194 |
+
|
| 195 |
+
# Upload metadata.jsonl
|
| 196 |
+
if METADATA_FILE.exists():
|
| 197 |
+
api.upload_file(
|
| 198 |
+
path_or_fileobj=str(METADATA_FILE),
|
| 199 |
+
path_in_repo="metadata.jsonl",
|
| 200 |
+
repo_id=HF_DATASET_REPO,
|
| 201 |
+
repo_type="dataset"
|
| 202 |
+
)
|
| 203 |
+
print(f"Pushed metadata.jsonl to {HF_DATASET_REPO}")
|
| 204 |
+
|
| 205 |
+
# Upload scores_summary.jsonl
|
| 206 |
+
if SCORES_SUMMARY_FILE.exists():
|
| 207 |
+
api.upload_file(
|
| 208 |
+
path_or_fileobj=str(SCORES_SUMMARY_FILE),
|
| 209 |
+
path_in_repo="scores_summary.jsonl",
|
| 210 |
+
repo_id=HF_DATASET_REPO,
|
| 211 |
+
repo_type="dataset"
|
| 212 |
+
)
|
| 213 |
+
print(f"Pushed scores_summary.jsonl to {HF_DATASET_REPO}")
|
| 214 |
+
|
| 215 |
+
return True
|
| 216 |
+
except Exception as e:
|
| 217 |
+
print(f"Error pushing to HF Dataset: {e}")
|
| 218 |
+
return False
|
| 219 |
+
|
| 220 |
+
|
| 221 |
def load_metadata() -> pd.DataFrame:
|
| 222 |
"""Load submission metadata from the small metadata file."""
|
| 223 |
global METADATA_DF
|
|
|
|
| 315 |
|
| 316 |
ensure_directories()
|
| 317 |
|
| 318 |
+
# Sync latest data from HF Dataset repo
|
| 319 |
+
sync_from_hf_dataset()
|
| 320 |
+
|
| 321 |
if not only_leaderboard:
|
| 322 |
GROUND_TRUTH_DF = None
|
| 323 |
load_ground_truth()
|
|
|
|
| 481 |
validated=False
|
| 482 |
)
|
| 483 |
|
| 484 |
+
# Push updated files to HF Dataset for persistence
|
| 485 |
+
if push_to_hf_dataset():
|
| 486 |
+
print(f"Submission {submission_id} persisted to HF Dataset")
|
| 487 |
+
else:
|
| 488 |
+
print(f"Warning: Submission {submission_id} saved locally but not persisted to HF Dataset")
|
| 489 |
+
|
| 490 |
return format_log(f"""
|
| 491 |
Agent {agent_name} submitted by {organisation} successfully!
|
| 492 |
Please refresh the leaderboard to see your score.
|
requirements.txt
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
gradio>=4.0.0
|
| 2 |
pandas>=2.0.0
|
| 3 |
numpy>=1.24.0
|
|
|
|
| 4 |
|
|
|
|
| 1 |
gradio>=4.0.0
|
| 2 |
pandas>=2.0.0
|
| 3 |
numpy>=1.24.0
|
| 4 |
+
huggingface_hub>=0.20.0
|
| 5 |
|