""" HTML table generator for the leaderboard. Generates styled HTML tables with client-side sorting and provider logos. """ import pandas as pd from typing import Dict, List from .data_loader import get_benchmark_info # Benchmark to category mapping (for color coding) BENCHMARK_CATEGORIES = { "gsm8k": "math", "aime2026": "math", "hmmt2026": "math", "mmluPro": "knowledge", "gpqa": "knowledge", "hle": "knowledge", "sweVerified": "coding", "swePro": "coding", "olmOcr": "vision", "terminalBench": "agent", "evasionBench": "language", } # Category color mapping (for score styling) CATEGORY_COLORS = { "math": "#7c3aed", # purple "knowledge": "#2563eb", # blue "coding": "#059669", # green "agent": "#0d9488", # teal "language": "#ea580c", # orange "vision": "#db2777", # pink } def get_table_css() -> str: """ Returns the CSS styles for the leaderboard table (light mode only). Extracted from index.html and adapted for Gradio embedding. """ return """ *{margin:0;padding:0;box-sizing:border-box;} :root{ --bg:#f9fafb;--bg2:#f3f4f6;--surface:#ffffff;--surface-alt:#f9fafb; --border:#e5e7eb;--border-hover:#d1d5db; --shadow-sm:0 1px 3px rgba(15,23,42,.04),0 1px 2px rgba(15,23,42,.06); --shadow:0 4px 16px rgba(15,23,42,.06),0 1px 3px rgba(15,23,42,.08); --shadow-lg:0 12px 40px rgba(15,23,42,.08),0 4px 12px rgba(15,23,42,.06); --text:#111827;--text-sec:#6b7280;--text-muted:#9ca3af; --ac:#6366f1;--ac2:#4f46e5;--ac-bg:rgba(99,102,241,.06); --teal:#0d9488;--amber:#d97706;--green:#16a34a;--rose:#e11d48;--purple:#7c3aed; --radius:16px;--radius-sm:10px;--radius-xs:6px; --font:'Source Sans Pro',sans-serif;--font-mono:'IBM Plex Mono',monospace; --tr:0.22s cubic-bezier(0.4,0,0.2,1); } /* TABLE */ .tw{background:var(--surface);border:1px solid var(--border);border-radius:var(--radius);overflow-x:auto;box-shadow:var(--shadow);margin-bottom:20px;} table{width:100%;border-collapse:collapse;font-size:11px;font-family:var(--font);} thead{background:var(--surface-alt);position:sticky;top:0;z-index:100;box-shadow:0 2px 4px rgba(0,0,0,0.1);} thead tr{border-bottom:2px solid var(--border);} th{padding:12px 8px;text-align:center;font-size:11px;font-family:var(--font-mono);text-transform:uppercase;letter-spacing:.5px;color:var(--text-muted);white-space:nowrap;cursor:pointer;user-select:none;vertical-align:bottom;line-height:1.6;font-weight:700;transition:var(--tr);} th.c-model{text-align:left;padding-left:14px;min-width:180px;position:sticky;left:0;background:var(--surface-alt);z-index:101;} th:hover{color:var(--ac);background:rgba(99,102,241,.08);transform:translateY(-1px);} th.sorted{color:var(--ac);font-weight:800;} .sa{opacity:.6;font-size:7px;margin-left:3px;} th a{color:inherit;text-decoration:none;} th a:hover{color:var(--ac);text-decoration:underline;} tbody tr{border-bottom:1px solid var(--border);transition:background var(--tr);} tbody tr:last-child{border-bottom:none;} tbody tr:hover{background:rgba(99,102,241,.025);} td{padding:10px 6px;text-align:center;vertical-align:middle;} td.c-model{text-align:left;padding-left:14px;position:sticky;left:0;background:var(--surface);z-index:9;border-right:1px solid var(--border);} tbody tr:hover td.c-model{background:rgba(99,102,241,.025);} /* MODEL CELL */ .mc{display:flex;flex-direction:column;gap:2px;} .mn{font-weight:700;font-size:12px;color:var(--text);display:flex;align-items:center;gap:5px;flex-wrap:wrap;} .mn a{color:var(--text);text-decoration:none;transition:var(--tr);position:relative;} .mn a:hover{color:var(--ac);text-decoration:none;} .mn a::after{content:'';position:absolute;bottom:-2px;left:0;width:0;height:1px;background:var(--ac);transition:width 0.3s ease;} .mn a:hover::after{width:100%;} .ms{display:flex;gap:4px;align-items:center;margin-top:2px;} .mp{font-size:8px;color:var(--text-muted);font-family:var(--font-mono);} /* PROVIDER LOGO */ .provider-logo-inline{width:16px;height:16px;border-radius:50%;object-fit:cover;border:1px solid var(--border);box-shadow:var(--shadow-sm);margin-right:6px;vertical-align:middle;display:inline-block;} .provider-logo-fallback-inline{width:16px;height:16px;border-radius:50%;background:var(--ac-bg);border:1px solid var(--border);display:inline-flex;align-items:center;justify-content:center;font-size:8px;font-weight:700;color:var(--ac);font-family:var(--font-mono);margin-right:6px;vertical-align:middle;} /* SCORE CELL */ .sc{display:flex;flex-direction:column;align-items:center;gap:2px;} .sn{font-family:var(--font-mono);font-size:11px;font-weight:700;} .na{color:var(--text-muted);font-size:9px;font-family:var(--font-mono);} /* EMPTY STATE */ .empty-state{text-align:center;padding:40px 20px;color:var(--text-muted);font-size:13px;} .empty-state strong{color:var(--text-sec);font-size:15px;display:block;margin-bottom:8px;} """ def get_benchmark_category_color(benchmark_key: str) -> str: """ Get the color for a benchmark based on its category. Args: benchmark_key: The benchmark key (e.g., 'gsm8k', 'mmluPro') Returns: str: Hex color code for the category """ category = BENCHMARK_CATEGORIES.get(benchmark_key, "knowledge") return CATEGORY_COLORS.get(category, "#6366f1") def generate_table_headers(selected_benchmarks: List[str]) -> str: """ Generate HTML for table headers with sorting functionality. Args: selected_benchmarks: List of benchmark keys to display Returns: str: HTML string for element """ benchmarks_info = get_benchmark_info() # Start with model header (column 0) headers_html = "\n" headers_html += ' Model \n' # Add benchmark headers (columns 1+) for idx, bench_key in enumerate(selected_benchmarks, start=1): bench_info = benchmarks_info.get(bench_key, {}) bench_name = bench_info.get("name", bench_key) headers_html += f' {bench_name} \n' headers_html += "\n" return headers_html def generate_model_cell(row: pd.Series, provider_logos: Dict[str, str]) -> str: """ Generate HTML for the model cell (sticky first column). Args: row: DataFrame row containing model data provider_logos: Dictionary mapping provider names to logo URLs Returns: str: HTML string for model element """ model_id = row.get("model_id", "") model_name = row.get("model_name", model_id) provider = row.get("provider", "Unknown") # Try parameters_display first (formatted), then parameters_billions params = row.get("parameters_display", row.get("parameters", "Unknown")) # Get provider logo - first try logo_url column, then fallback to provider_logos dict provider_logo_url = row.get("logo_url") if not provider_logo_url or pd.isna(provider_logo_url): provider_logo_url = provider_logos.get(provider) if provider_logo_url: logo_html = f'{provider}' else: # Fallback: show first 2 letters of provider name initials = provider[:2].upper() if provider and provider != "Unknown" else "??" logo_html = f'{initials}' # Format HuggingFace link - use model_name which contains the repo path (e.g., "Meta/Llama-3") hf_link = f"https://huggingface.co/{model_name}" if model_name else "#" cell_html = f'''
{logo_html} {model_name}
{provider} {params}
''' return cell_html def generate_score_cell(score, benchmark_key: str) -> str: """ Generate HTML for a score cell with category-specific color. Args: score: The benchmark score (float, None, or NaN) benchmark_key: The benchmark key (for color coding) Returns: str: HTML string for score element """ # Check if score is missing/invalid if pd.isna(score) or score is None: return '
' try: score_float = float(score) color = get_benchmark_category_color(benchmark_key) score_display = f"{score_float:.1f}" return f'
{score_display}
' except (ValueError, TypeError): return '
' def generate_table_rows( df: pd.DataFrame, selected_benchmarks: List[str], provider_logos: Dict[str, str] ) -> str: """ Generate HTML for all table rows. Args: df: DataFrame containing leaderboard data selected_benchmarks: List of benchmark keys to display provider_logos: Dictionary mapping provider names to logo URLs Returns: str: HTML string for element """ if df.empty: return """ No models match your criteria Try adjusting your search or filter settings """ rows_html = "\n" for _, row in df.iterrows(): model_name = row.get("model_name", row.get("model_id", "Unknown")) rows_html += f'\n' # Model cell (sticky first column) rows_html += generate_model_cell(row, provider_logos) + "\n" # Score cells for each selected benchmark for bench_key in selected_benchmarks: score_col = f"{bench_key}_score" score = row.get(score_col) rows_html += generate_score_cell(score, bench_key) + "\n" rows_html += "\n" rows_html += "\n" return rows_html def generate_leaderboard_html( df: pd.DataFrame, selected_benchmarks: List[str], provider_logos: Dict[str, str] ) -> str: """ Generate complete HTML table for the leaderboard. Args: df: DataFrame containing filtered leaderboard data selected_benchmarks: List of benchmark keys to display provider_logos: Dictionary mapping provider names to logo URLs Returns: str: Complete HTML string with styles, table, and inline JavaScript """ css = get_table_css() headers = generate_table_headers(selected_benchmarks) rows = generate_table_rows(df, selected_benchmarks, provider_logos) # Note: JavaScript for sorting is loaded via Gradio's js parameter in app.py html = f"""
{headers} {rows}
""" return html