Spaces:
Running
Running
| """ | |
| HTML table generator for the leaderboard. | |
| Generates styled HTML tables with client-side sorting and provider logos. | |
| """ | |
| import pandas as pd | |
| from typing import Dict, List | |
| from .data_loader import get_benchmark_info | |
| # Benchmark to category mapping (for color coding) | |
| BENCHMARK_CATEGORIES = { | |
| "gsm8k": "math", | |
| "aime2026": "math", | |
| "hmmt2026": "math", | |
| "mmluPro": "knowledge", | |
| "gpqa": "knowledge", | |
| "hle": "knowledge", | |
| "sweVerified": "coding", | |
| "swePro": "coding", | |
| "olmOcr": "vision", | |
| "terminalBench": "agent", | |
| "evasionBench": "language", | |
| } | |
| # Category color mapping (for score styling) | |
| CATEGORY_COLORS = { | |
| "math": "#7c3aed", # purple | |
| "knowledge": "#2563eb", # blue | |
| "coding": "#059669", # green | |
| "agent": "#0d9488", # teal | |
| "language": "#ea580c", # orange | |
| "vision": "#db2777", # pink | |
| } | |
| def get_table_css() -> str: | |
| """ | |
| Returns the CSS styles for the leaderboard table (light mode only). | |
| Extracted from index.html and adapted for Gradio embedding. | |
| """ | |
| return """ | |
| *{margin:0;padding:0;box-sizing:border-box;} | |
| :root{ | |
| --bg:#f9fafb;--bg2:#f3f4f6;--surface:#ffffff;--surface-alt:#f9fafb; | |
| --border:#e5e7eb;--border-hover:#d1d5db; | |
| --shadow-sm:0 1px 3px rgba(15,23,42,.04),0 1px 2px rgba(15,23,42,.06); | |
| --shadow:0 4px 16px rgba(15,23,42,.06),0 1px 3px rgba(15,23,42,.08); | |
| --shadow-lg:0 12px 40px rgba(15,23,42,.08),0 4px 12px rgba(15,23,42,.06); | |
| --text:#111827;--text-sec:#6b7280;--text-muted:#9ca3af; | |
| --ac:#6366f1;--ac2:#4f46e5;--ac-bg:rgba(99,102,241,.06); | |
| --teal:#0d9488;--amber:#d97706;--green:#16a34a;--rose:#e11d48;--purple:#7c3aed; | |
| --radius:16px;--radius-sm:10px;--radius-xs:6px; | |
| --font:'Source Sans Pro',sans-serif;--font-mono:'IBM Plex Mono',monospace; | |
| --tr:0.22s cubic-bezier(0.4,0,0.2,1); | |
| } | |
| /* TABLE */ | |
| .tw{background:var(--surface);border:1px solid var(--border);border-radius:var(--radius);overflow-x:auto;box-shadow:var(--shadow);margin-bottom:20px;} | |
| table{width:100%;border-collapse:collapse;font-size:11px;font-family:var(--font);} | |
| thead{background:var(--surface-alt);position:sticky;top:0;z-index:100;box-shadow:0 2px 4px rgba(0,0,0,0.1);} | |
| thead tr{border-bottom:2px solid var(--border);} | |
| th{padding:12px 8px;text-align:center;font-size:11px;font-family:var(--font-mono);text-transform:uppercase;letter-spacing:.5px;color:var(--text-muted);white-space:nowrap;cursor:pointer;user-select:none;vertical-align:bottom;line-height:1.6;font-weight:700;transition:var(--tr);} | |
| th.c-model{text-align:left;padding-left:14px;min-width:180px;position:sticky;left:0;background:var(--surface-alt);z-index:101;} | |
| th:hover{color:var(--ac);background:rgba(99,102,241,.08);transform:translateY(-1px);} | |
| th.sorted{color:var(--ac);font-weight:800;} | |
| .sa{opacity:.6;font-size:7px;margin-left:3px;} | |
| th a{color:inherit;text-decoration:none;} | |
| th a:hover{color:var(--ac);text-decoration:underline;} | |
| tbody tr{border-bottom:1px solid var(--border);transition:background var(--tr);} | |
| tbody tr:last-child{border-bottom:none;} | |
| tbody tr:hover{background:rgba(99,102,241,.025);} | |
| td{padding:10px 6px;text-align:center;vertical-align:middle;} | |
| td.c-model{text-align:left;padding-left:14px;position:sticky;left:0;background:var(--surface);z-index:9;border-right:1px solid var(--border);} | |
| tbody tr:hover td.c-model{background:rgba(99,102,241,.025);} | |
| /* MODEL CELL */ | |
| .mc{display:flex;flex-direction:column;gap:2px;} | |
| .mn{font-weight:700;font-size:12px;color:var(--text);display:flex;align-items:center;gap:5px;flex-wrap:wrap;} | |
| .mn a{color:var(--text);text-decoration:none;transition:var(--tr);position:relative;} | |
| .mn a:hover{color:var(--ac);text-decoration:none;} | |
| .mn a::after{content:'';position:absolute;bottom:-2px;left:0;width:0;height:1px;background:var(--ac);transition:width 0.3s ease;} | |
| .mn a:hover::after{width:100%;} | |
| .ms{display:flex;gap:4px;align-items:center;margin-top:2px;} | |
| .mp{font-size:8px;color:var(--text-muted);font-family:var(--font-mono);} | |
| /* PROVIDER LOGO */ | |
| .provider-logo-inline{width:16px;height:16px;border-radius:50%;object-fit:cover;border:1px solid var(--border);box-shadow:var(--shadow-sm);margin-right:6px;vertical-align:middle;display:inline-block;} | |
| .provider-logo-fallback-inline{width:16px;height:16px;border-radius:50%;background:var(--ac-bg);border:1px solid var(--border);display:inline-flex;align-items:center;justify-content:center;font-size:8px;font-weight:700;color:var(--ac);font-family:var(--font-mono);margin-right:6px;vertical-align:middle;} | |
| /* SCORE CELL */ | |
| .sc{display:flex;flex-direction:column;align-items:center;gap:2px;} | |
| .sn{font-family:var(--font-mono);font-size:11px;font-weight:700;} | |
| .na{color:var(--text-muted);font-size:9px;font-family:var(--font-mono);} | |
| /* EMPTY STATE */ | |
| .empty-state{text-align:center;padding:40px 20px;color:var(--text-muted);font-size:13px;} | |
| .empty-state strong{color:var(--text-sec);font-size:15px;display:block;margin-bottom:8px;} | |
| """ | |
| def get_benchmark_category_color(benchmark_key: str) -> str: | |
| """ | |
| Get the color for a benchmark based on its category. | |
| Args: | |
| benchmark_key: The benchmark key (e.g., 'gsm8k', 'mmluPro') | |
| Returns: | |
| str: Hex color code for the category | |
| """ | |
| category = BENCHMARK_CATEGORIES.get(benchmark_key, "knowledge") | |
| return CATEGORY_COLORS.get(category, "#6366f1") | |
| def generate_table_headers(selected_benchmarks: List[str]) -> str: | |
| """ | |
| Generate HTML for table headers with sorting functionality. | |
| Args: | |
| selected_benchmarks: List of benchmark keys to display | |
| Returns: | |
| str: HTML string for <thead> element | |
| """ | |
| benchmarks_info = get_benchmark_info() | |
| # Start with model header (column 0) | |
| headers_html = "<thead><tr>\n" | |
| headers_html += ' <th class="c-model" onclick="sortTable(0)">Model <span class="sa">↕</span></th>\n' | |
| # Add benchmark headers (columns 1+) | |
| for idx, bench_key in enumerate(selected_benchmarks, start=1): | |
| bench_info = benchmarks_info.get(bench_key, {}) | |
| bench_name = bench_info.get("name", bench_key) | |
| headers_html += f' <th onclick="sortTable({idx})">{bench_name} <span class="sa">↕</span></th>\n' | |
| headers_html += "</tr></thead>\n" | |
| return headers_html | |
| def generate_model_cell(row: pd.Series, provider_logos: Dict[str, str]) -> str: | |
| """ | |
| Generate HTML for the model cell (sticky first column). | |
| Args: | |
| row: DataFrame row containing model data | |
| provider_logos: Dictionary mapping provider names to logo URLs | |
| Returns: | |
| str: HTML string for model <td> element | |
| """ | |
| model_id = row.get("model_id", "") | |
| model_name = row.get("model_name", model_id) | |
| provider = row.get("provider", "Unknown") | |
| # Try parameters_display first (formatted), then parameters_billions | |
| params = row.get("parameters_display", row.get("parameters", "Unknown")) | |
| # Get provider logo - first try logo_url column, then fallback to provider_logos dict | |
| provider_logo_url = row.get("logo_url") | |
| if not provider_logo_url or pd.isna(provider_logo_url): | |
| provider_logo_url = provider_logos.get(provider) | |
| if provider_logo_url: | |
| logo_html = f'<img src="{provider_logo_url}" alt="{provider}" class="provider-logo-inline" title="{provider}" onerror="this.style.display=\'none\';">' | |
| else: | |
| # Fallback: show first 2 letters of provider name | |
| initials = provider[:2].upper() if provider and provider != "Unknown" else "??" | |
| logo_html = f'<span class="provider-logo-fallback-inline" title="{provider}">{initials}</span>' | |
| # Format HuggingFace link - use model_name which contains the repo path (e.g., "Meta/Llama-3") | |
| hf_link = f"https://huggingface.co/{model_name}" if model_name else "#" | |
| cell_html = f''' <td class="c-model"> | |
| <div class="mc"> | |
| <div class="mn"> | |
| {logo_html} | |
| <a href="{hf_link}" target="_blank" rel="noopener noreferrer">{model_name}</a> | |
| </div> | |
| <div class="ms"> | |
| <span class="mp">{provider}</span> | |
| <span class="mp">{params}</span> | |
| </div> | |
| </div> | |
| </td>''' | |
| return cell_html | |
| def generate_score_cell(score, benchmark_key: str) -> str: | |
| """ | |
| Generate HTML for a score cell with category-specific color. | |
| Args: | |
| score: The benchmark score (float, None, or NaN) | |
| benchmark_key: The benchmark key (for color coding) | |
| Returns: | |
| str: HTML string for score <td> element | |
| """ | |
| # Check if score is missing/invalid | |
| if pd.isna(score) or score is None: | |
| return ' <td><div class="sc"><span class="na">—</span></div></td>' | |
| try: | |
| score_float = float(score) | |
| color = get_benchmark_category_color(benchmark_key) | |
| score_display = f"{score_float:.1f}" | |
| return f' <td><div class="sc"><div class="sn" style="color: {color};">{score_display}</div></div></td>' | |
| except (ValueError, TypeError): | |
| return ' <td><div class="sc"><span class="na">—</span></div></td>' | |
| def generate_table_rows( | |
| df: pd.DataFrame, selected_benchmarks: List[str], provider_logos: Dict[str, str] | |
| ) -> str: | |
| """ | |
| Generate HTML for all table rows. | |
| Args: | |
| df: DataFrame containing leaderboard data | |
| selected_benchmarks: List of benchmark keys to display | |
| provider_logos: Dictionary mapping provider names to logo URLs | |
| Returns: | |
| str: HTML string for <tbody> element | |
| """ | |
| if df.empty: | |
| return """<tbody> | |
| <tr> | |
| <td colspan="100" class="empty-state"> | |
| <strong>No models match your criteria</strong> | |
| Try adjusting your search or filter settings | |
| </td> | |
| </tr> | |
| </tbody>""" | |
| rows_html = "<tbody>\n" | |
| for _, row in df.iterrows(): | |
| model_name = row.get("model_name", row.get("model_id", "Unknown")) | |
| rows_html += f'<tr data-name="{model_name}">\n' | |
| # Model cell (sticky first column) | |
| rows_html += generate_model_cell(row, provider_logos) + "\n" | |
| # Score cells for each selected benchmark | |
| for bench_key in selected_benchmarks: | |
| score_col = f"{bench_key}_score" | |
| score = row.get(score_col) | |
| rows_html += generate_score_cell(score, bench_key) + "\n" | |
| rows_html += "</tr>\n" | |
| rows_html += "</tbody>\n" | |
| return rows_html | |
| def generate_leaderboard_html( | |
| df: pd.DataFrame, selected_benchmarks: List[str], provider_logos: Dict[str, str] | |
| ) -> str: | |
| """ | |
| Generate complete HTML table for the leaderboard. | |
| Args: | |
| df: DataFrame containing filtered leaderboard data | |
| selected_benchmarks: List of benchmark keys to display | |
| provider_logos: Dictionary mapping provider names to logo URLs | |
| Returns: | |
| str: Complete HTML string with styles, table, and inline JavaScript | |
| """ | |
| css = get_table_css() | |
| headers = generate_table_headers(selected_benchmarks) | |
| rows = generate_table_rows(df, selected_benchmarks, provider_logos) | |
| # Note: JavaScript for sorting is loaded via Gradio's js parameter in app.py | |
| html = f""" | |
| <style> | |
| {css} | |
| </style> | |
| <div class="tw"> | |
| <table id="leaderboardTable"> | |
| {headers} | |
| {rows} | |
| </table> | |
| </div> | |
| """ | |
| return html | |