""" Data loading utilities for the leaderboard. Loads data from HuggingFace dataset and integrates provider logos. """ import json import os import pandas as pd from datasets import load_dataset def load_provider_logos(): """ Load provider logos from data/provider_logos.json Returns: dict: Provider name -> logo URL mapping """ logos_path = os.path.join( os.path.dirname(__file__), "..", "data", "provider_logos.json" ) try: with open(logos_path, "r") as f: logos = json.load(f) return logos except FileNotFoundError: print(f"Warning: Provider logos file not found at {logos_path}") return {} except json.JSONDecodeError as e: print(f"Warning: Could not parse provider logos JSON: {e}") return {} def format_params(param_billions): """ Format parameter count for display. Args: param_billions: Parameter count in billions (float or None) Returns: str: Formatted parameter string (e.g., "72.7B", "Unknown") """ if pd.isna(param_billions) or param_billions is None: return "Unknown" if param_billions >= 1000: return f"{param_billions:.0f}B" elif param_billions >= 100: return f"{param_billions:.0f}B" elif param_billions >= 10: return f"{param_billions:.1f}B" else: return f"{param_billions:.2f}B" def load_leaderboard_data(): """ Load leaderboard data from HuggingFace dataset. Returns: pandas.DataFrame: Complete leaderboard data with: - All model metadata - All benchmark scores - Provider logos - Formatted parameters """ print("Loading leaderboard data from HuggingFace dataset...") # Load dataset from HF try: ds = load_dataset("OpenEvals/leaderboard-data", split="train") df = ds.to_pandas() print(f"✓ Loaded {len(df)} models from dataset") except Exception as e: print(f"✗ Error loading dataset: {e}") raise # Load provider logos logos = load_provider_logos() print(f"✓ Loaded {len(logos)} provider logos") # Add logo URLs to dataframe df["logo_url"] = df["provider"].map(logos) # Format parameters for display df["parameters_display"] = df["parameters_billions"].apply(format_params) # Sort by model name by default df = df.sort_values("model_name").reset_index(drop=True) print(f"✓ Data loaded successfully: {len(df)} models, {df.columns.size} columns") return df def get_benchmark_columns(): """ Get list of all benchmark score column names. Returns: list: Column names for benchmark scores """ return [ "gsm8k_score", "mmluPro_score", "gpqa_score", "hle_score", "olmOcr_score", "sweVerified_score", "swePro_score", "aime2026_score", "terminalBench_score", "evasionBench_score", "hmmt2026_score", ] def get_benchmark_info(): """ Get metadata about each benchmark. Returns: dict: Benchmark key -> metadata mapping """ return { "gsm8k": { "name": "GSM8K", "full_name": "Grade School Math 8K", "category": "math", "color": "#7c3aed", "url": "https://huggingface.co/datasets/openai/gsm8k", }, "mmluPro": { "name": "MMLU-Pro", "full_name": "Massive Multi-task Language Understanding Pro", "category": "knowledge", "color": "#2563eb", "url": "https://huggingface.co/datasets/TIGER-Lab/MMLU-Pro", }, "gpqa": { "name": "GPQA◆", "full_name": "PhD-level Expert Questions", "category": "knowledge", "color": "#2563eb", "url": "https://huggingface.co/datasets/Idavidrein/gpqa", }, "hle": { "name": "HLE", "full_name": "Humanity's Last Exam", "category": "knowledge", "color": "#2563eb", "url": "https://lastexam.ai", }, "olmOcr": { "name": "olmOCR", "full_name": "OCR Evaluation Benchmark", "category": "vision", "color": "#db2777", "url": "https://huggingface.co/datasets/allenai/olmOCR-bench", }, "sweVerified": { "name": "SWE-V", "full_name": "SWE-bench Verified", "category": "coding", "color": "#059669", "url": "https://www.swebench.com", }, "swePro": { "name": "SWE-Pro", "full_name": "SWE-bench Pro", "category": "coding", "color": "#059669", "url": "https://scale.com/leaderboard/swe_bench_pro_public", }, "aime2026": { "name": "AIME 2026", "full_name": "American Invitational Mathematics Examination 2026", "category": "math", "color": "#7c3aed", "url": "https://matharena.ai/?comp=aime--aime_2026", }, "terminalBench": { "name": "TB 2.0", "full_name": "Terminal-Bench 2.0", "category": "agent", "color": "#0d9488", "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.0", }, "evasionBench": { "name": "EvasionB", "full_name": "EvasionBench", "category": "language", "color": "#ea580c", "url": "https://huggingface.co/datasets/FutureMa/EvasionBench", }, "hmmt2026": { "name": "HMMT", "full_name": "Harvard-MIT Mathematics Tournament Feb 2026", "category": "math", "color": "#7c3aed", "url": "https://matharena.ai/?comp=hmmt--hmmt_feb_2026", }, }