Lev Israel commited on
Commit
1d86fb8
·
1 Parent(s): 112f081

Improve leaderboard loading with better error handling and force refresh

Browse files
Files changed (3) hide show
  1. app.py +7 -1
  2. leaderboard.py +19 -3
  3. upload_leaderboard.py +44 -0
app.py CHANGED
@@ -462,8 +462,14 @@ def create_app():
462
  show_progress="hidden",
463
  )
464
 
 
 
 
 
 
 
465
  refresh_btn.click(
466
- fn=lambda: (format_leaderboard_df(), create_leaderboard_comparison()),
467
  outputs=[leaderboard_table, comparison_plot],
468
  )
469
 
 
462
  show_progress="hidden",
463
  )
464
 
465
+ def refresh_leaderboard():
466
+ """Force refresh leaderboard from Hub."""
467
+ from leaderboard import clear_cache
468
+ clear_cache() # Clear cache to force fresh load
469
+ return (format_leaderboard_df(), create_leaderboard_comparison())
470
+
471
  refresh_btn.click(
472
+ fn=refresh_leaderboard,
473
  outputs=[leaderboard_table, comparison_plot],
474
  )
475
 
leaderboard.py CHANGED
@@ -48,14 +48,16 @@ def load_leaderboard(force_refresh: bool = False) -> list[dict]:
48
  try:
49
  from huggingface_hub import hf_hub_download
50
 
51
- # Download the leaderboard file
 
52
  local_path = hf_hub_download(
53
  repo_id=LEADERBOARD_DATASET_ID,
54
  filename=LEADERBOARD_FILENAME,
55
  repo_type="dataset",
56
- token=_get_hf_token(),
57
  )
58
 
 
59
  with open(local_path, "r", encoding="utf-8") as f:
60
  _leaderboard_cache = json.load(f)
61
  _cache_time = datetime.now()
@@ -63,13 +65,27 @@ def load_leaderboard(force_refresh: bool = False) -> list[dict]:
63
  # Ensure sorted by MRR
64
  _leaderboard_cache.sort(key=lambda x: x.get("mrr", 0), reverse=True)
65
 
 
66
  return _leaderboard_cache
67
 
 
 
 
 
 
 
 
 
68
  except Exception as e:
69
- print(f"Could not load leaderboard from Hub: {e}")
 
 
 
70
  # Return cached data if available, otherwise empty list
71
  if _leaderboard_cache is not None:
 
72
  return _leaderboard_cache
 
73
  return []
74
 
75
 
 
48
  try:
49
  from huggingface_hub import hf_hub_download
50
 
51
+ print(f"Loading leaderboard from {LEADERBOARD_DATASET_ID}...")
52
+ # Download the leaderboard file (no token needed for public datasets)
53
  local_path = hf_hub_download(
54
  repo_id=LEADERBOARD_DATASET_ID,
55
  filename=LEADERBOARD_FILENAME,
56
  repo_type="dataset",
57
+ token=_get_hf_token(), # Optional - works without token for public datasets
58
  )
59
 
60
+ print(f"Downloaded to: {local_path}")
61
  with open(local_path, "r", encoding="utf-8") as f:
62
  _leaderboard_cache = json.load(f)
63
  _cache_time = datetime.now()
 
65
  # Ensure sorted by MRR
66
  _leaderboard_cache.sort(key=lambda x: x.get("mrr", 0), reverse=True)
67
 
68
+ print(f"✅ Loaded {len(_leaderboard_cache)} entries from leaderboard dataset")
69
  return _leaderboard_cache
70
 
71
+ except FileNotFoundError as e:
72
+ error_msg = f"Leaderboard file not found in dataset. Make sure {LEADERBOARD_FILENAME} exists in {LEADERBOARD_DATASET_ID}"
73
+ print(error_msg)
74
+ if _leaderboard_cache is not None:
75
+ print(f"Using cached leaderboard ({len(_leaderboard_cache)} entries)")
76
+ return _leaderboard_cache
77
+ print("Returning empty leaderboard")
78
+ return []
79
  except Exception as e:
80
+ import traceback
81
+ error_msg = f"Could not load leaderboard from Hub: {type(e).__name__}: {e}"
82
+ print(error_msg)
83
+ traceback.print_exc() # Print full traceback for debugging
84
  # Return cached data if available, otherwise empty list
85
  if _leaderboard_cache is not None:
86
+ print(f"Using cached leaderboard ({len(_leaderboard_cache)} entries)")
87
  return _leaderboard_cache
88
+ print("Returning empty leaderboard")
89
  return []
90
 
91
 
upload_leaderboard.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Upload existing leaderboard.json to HuggingFace dataset.
4
+
5
+ Usage:
6
+ export HF_TOKEN=your_token
7
+ python upload_leaderboard.py
8
+ """
9
+
10
+ import json
11
+ from pathlib import Path
12
+ from leaderboard import LEADERBOARD_DATASET_ID, save_leaderboard, create_leaderboard_dataset
13
+
14
+ def main():
15
+ # Load local leaderboard
16
+ local_path = Path("benchmark_data/leaderboard.json")
17
+ if not local_path.exists():
18
+ print(f"Error: {local_path} not found")
19
+ return 1
20
+
21
+ print(f"Loading leaderboard from {local_path}...")
22
+ with open(local_path, "r", encoding="utf-8") as f:
23
+ leaderboard = json.load(f)
24
+
25
+ print(f"Found {len(leaderboard)} entries")
26
+
27
+ # Ensure dataset exists
28
+ print(f"\nEnsuring dataset exists: {LEADERBOARD_DATASET_ID}")
29
+ create_leaderboard_dataset()
30
+
31
+ # Upload all entries
32
+ print(f"\nUploading {len(leaderboard)} entries to {LEADERBOARD_DATASET_ID}...")
33
+ success = save_leaderboard(leaderboard)
34
+
35
+ if success:
36
+ print(f"\n✅ Successfully uploaded leaderboard!")
37
+ print(f" View at: https://huggingface.co/datasets/{LEADERBOARD_DATASET_ID}")
38
+ return 0
39
+ else:
40
+ print("\n❌ Failed to upload. Check HF_TOKEN and permissions.")
41
+ return 1
42
+
43
+ if __name__ == "__main__":
44
+ exit(main())