Spaces:

justinlangsethgenesis
/

DABstep-temp

Sleeping

App Files Files Community

justinlangsethgenesis commited on Dec 18, 2025

Commit

f11734c

1 Parent(s): 42b6362

Add persistent storage via HF Dataset repo

Browse files

Files changed (2) hide show

dabstep_benchmark/leaderboard.py +79 -0
requirements.txt +1 -0

dabstep_benchmark/leaderboard.py CHANGED Viewed

@@ -13,6 +13,7 @@ from pathlib import Path
 import gradio as gr
 import pandas as pd
 from dabstep_benchmark.utils import (
     evaluate,
@@ -22,6 +23,9 @@ from dabstep_benchmark.utils import (
     is_valid_https_url,
 )
 # Paths
 DATA_DIR = Path("data")
 SUBMISSIONS_DIR = DATA_DIR / "submissions"
@@ -148,6 +152,72 @@ def load_task_scores() -> pd.DataFrame:
     return TASK_SCORES_DF
 def load_metadata() -> pd.DataFrame:
     """Load submission metadata from the small metadata file."""
     global METADATA_DF
@@ -245,6 +315,9 @@ def refresh(only_leaderboard: bool = False) -> tuple[pd.DataFrame, pd.DataFrame]
     ensure_directories()
     if not only_leaderboard:
         GROUND_TRUTH_DF = None
         load_ground_truth()
@@ -408,6 +481,12 @@ def process_submission(
         validated=False
     )
     return format_log(f"""
         Agent {agent_name} submitted by {organisation} successfully!
         Please refresh the leaderboard to see your score.

 import gradio as gr
 import pandas as pd
+from huggingface_hub import HfApi, hf_hub_download
 from dabstep_benchmark.utils import (
     evaluate,
     is_valid_https_url,
 )
+# HuggingFace Dataset repo for persistent storage
+HF_DATASET_REPO = os.environ.get("HF_DATASET_REPO", "justinlangsethgenesis/dabstep-submissions")
 # Paths
 DATA_DIR = Path("data")
 SUBMISSIONS_DIR = DATA_DIR / "submissions"
     return TASK_SCORES_DF
+def sync_from_hf_dataset():
+    """Download latest metadata and scores from HF Dataset repo."""
+    ensure_directories()
+    try:
+        # Download metadata.jsonl
+        metadata_path = hf_hub_download(
+            repo_id=HF_DATASET_REPO,
+            filename="metadata.jsonl",
+            repo_type="dataset",
+            local_dir=DATA_DIR,
+            local_dir_use_symlinks=False
+        )
+        print(f"Downloaded metadata from HF Dataset: {metadata_path}")
+        # Download scores_summary.jsonl
+        scores_path = hf_hub_download(
+            repo_id=HF_DATASET_REPO,
+            filename="scores_summary.jsonl",
+            repo_type="dataset",
+            local_dir=DATA_DIR,
+            local_dir_use_symlinks=False
+        )
+        print(f"Downloaded scores summary from HF Dataset: {scores_path}")
+    except Exception as e:
+        print(f"Warning: Could not sync from HF Dataset: {e}")
+        print("Using local files if available...")
+def push_to_hf_dataset():
+    """Push updated metadata and scores to HF Dataset repo."""
+    hf_token = os.environ.get("HF_TOKEN")
+    if not hf_token:
+        print("Warning: HF_TOKEN not set, cannot push to dataset repo")
+        return False
+    try:
+        api = HfApi(token=hf_token)
+        # Upload metadata.jsonl
+        if METADATA_FILE.exists():
+            api.upload_file(
+                path_or_fileobj=str(METADATA_FILE),
+                path_in_repo="metadata.jsonl",
+                repo_id=HF_DATASET_REPO,
+                repo_type="dataset"
+            )
+            print(f"Pushed metadata.jsonl to {HF_DATASET_REPO}")
+        # Upload scores_summary.jsonl
+        if SCORES_SUMMARY_FILE.exists():
+            api.upload_file(
+                path_or_fileobj=str(SCORES_SUMMARY_FILE),
+                path_in_repo="scores_summary.jsonl",
+                repo_id=HF_DATASET_REPO,
+                repo_type="dataset"
+            )
+            print(f"Pushed scores_summary.jsonl to {HF_DATASET_REPO}")
+        return True
+    except Exception as e:
+        print(f"Error pushing to HF Dataset: {e}")
+        return False
 def load_metadata() -> pd.DataFrame:
     """Load submission metadata from the small metadata file."""
     global METADATA_DF
     ensure_directories()
+    # Sync latest data from HF Dataset repo
+    sync_from_hf_dataset()
     if not only_leaderboard:
         GROUND_TRUTH_DF = None
         load_ground_truth()
         validated=False
     )
+    # Push updated files to HF Dataset for persistence
+    if push_to_hf_dataset():
+        print(f"Submission {submission_id} persisted to HF Dataset")
+    else:
+        print(f"Warning: Submission {submission_id} saved locally but not persisted to HF Dataset")
     return format_log(f"""
         Agent {agent_name} submitted by {organisation} successfully!
         Please refresh the leaderboard to see your score.

requirements.txt CHANGED Viewed

@@ -1,4 +1,5 @@
 gradio>=4.0.0
 pandas>=2.0.0
 numpy>=1.24.0

 gradio>=4.0.0
 pandas>=2.0.0
 numpy>=1.24.0
+huggingface_hub>=0.20.0