Spaces:

OpenEvals
/

every-leaderboards

Running

App Files Files Community

every-leaderboards / utils /html_generator.py

SaylorTwift HF Staff

Migrate to Gradio app with interactive features

fa808e0 verified 17 days ago

raw

history blame contribute delete

11.1 kB

	"""
	HTML table generator for the leaderboard.
	Generates styled HTML tables with client-side sorting and provider logos.
	"""

	import pandas as pd
	from typing import Dict, List
	from .data_loader import get_benchmark_info


	# Benchmark to category mapping (for color coding)
	BENCHMARK_CATEGORIES = {
	"gsm8k": "math",
	"aime2026": "math",
	"hmmt2026": "math",
	"mmluPro": "knowledge",
	"gpqa": "knowledge",
	"hle": "knowledge",
	"sweVerified": "coding",
	"swePro": "coding",
	"olmOcr": "vision",
	"terminalBench": "agent",
	"evasionBench": "language",
	}

	# Category color mapping (for score styling)
	CATEGORY_COLORS = {
	"math": "#7c3aed", # purple
	"knowledge": "#2563eb", # blue
	"coding": "#059669", # green
	"agent": "#0d9488", # teal
	"language": "#ea580c", # orange
	"vision": "#db2777", # pink
	}


	def get_table_css() -> str:
	"""
	Returns the CSS styles for the leaderboard table (light mode only).
	Extracted from index.html and adapted for Gradio embedding.
	"""
	return """
	*{margin:0;padding:0;box-sizing:border-box;}
	:root{
	--bg:#f9fafb;--bg2:#f3f4f6;--surface:#ffffff;--surface-alt:#f9fafb;
	--border:#e5e7eb;--border-hover:#d1d5db;
	--shadow-sm:0 1px 3px rgba(15,23,42,.04),0 1px 2px rgba(15,23,42,.06);
	--shadow:0 4px 16px rgba(15,23,42,.06),0 1px 3px rgba(15,23,42,.08);
	--shadow-lg:0 12px 40px rgba(15,23,42,.08),0 4px 12px rgba(15,23,42,.06);
	--text:#111827;--text-sec:#6b7280;--text-muted:#9ca3af;
	--ac:#6366f1;--ac2:#4f46e5;--ac-bg:rgba(99,102,241,.06);
	--teal:#0d9488;--amber:#d97706;--green:#16a34a;--rose:#e11d48;--purple:#7c3aed;
	--radius:16px;--radius-sm:10px;--radius-xs:6px;
	--font:'Source Sans Pro',sans-serif;--font-mono:'IBM Plex Mono',monospace;
	--tr:0.22s cubic-bezier(0.4,0,0.2,1);
	}

	/* TABLE */
	.tw{background:var(--surface);border:1px solid var(--border);border-radius:var(--radius);overflow-x:auto;box-shadow:var(--shadow);margin-bottom:20px;}
	table{width:100%;border-collapse:collapse;font-size:11px;font-family:var(--font);}
	thead{background:var(--surface-alt);position:sticky;top:0;z-index:100;box-shadow:0 2px 4px rgba(0,0,0,0.1);}
	thead tr{border-bottom:2px solid var(--border);}
	th{padding:12px 8px;text-align:center;font-size:11px;font-family:var(--font-mono);text-transform:uppercase;letter-spacing:.5px;color:var(--text-muted);white-space:nowrap;cursor:pointer;user-select:none;vertical-align:bottom;line-height:1.6;font-weight:700;transition:var(--tr);}
	th.c-model{text-align:left;padding-left:14px;min-width:180px;position:sticky;left:0;background:var(--surface-alt);z-index:101;}
	th:hover{color:var(--ac);background:rgba(99,102,241,.08);transform:translateY(-1px);}
	th.sorted{color:var(--ac);font-weight:800;}
	.sa{opacity:.6;font-size:7px;margin-left:3px;}
	th a{color:inherit;text-decoration:none;}
	th a:hover{color:var(--ac);text-decoration:underline;}
	tbody tr{border-bottom:1px solid var(--border);transition:background var(--tr);}
	tbody tr:last-child{border-bottom:none;}
	tbody tr:hover{background:rgba(99,102,241,.025);}
	td{padding:10px 6px;text-align:center;vertical-align:middle;}
	td.c-model{text-align:left;padding-left:14px;position:sticky;left:0;background:var(--surface);z-index:9;border-right:1px solid var(--border);}
	tbody tr:hover td.c-model{background:rgba(99,102,241,.025);}

	/* MODEL CELL */
	.mc{display:flex;flex-direction:column;gap:2px;}
	.mn{font-weight:700;font-size:12px;color:var(--text);display:flex;align-items:center;gap:5px;flex-wrap:wrap;}
	.mn a{color:var(--text);text-decoration:none;transition:var(--tr);position:relative;}
	.mn a:hover{color:var(--ac);text-decoration:none;}
	.mn a::after{content:'';position:absolute;bottom:-2px;left:0;width:0;height:1px;background:var(--ac);transition:width 0.3s ease;}
	.mn a:hover::after{width:100%;}
	.ms{display:flex;gap:4px;align-items:center;margin-top:2px;}
	.mp{font-size:8px;color:var(--text-muted);font-family:var(--font-mono);}

	/* PROVIDER LOGO */
	.provider-logo-inline{width:16px;height:16px;border-radius:50%;object-fit:cover;border:1px solid var(--border);box-shadow:var(--shadow-sm);margin-right:6px;vertical-align:middle;display:inline-block;}
	.provider-logo-fallback-inline{width:16px;height:16px;border-radius:50%;background:var(--ac-bg);border:1px solid var(--border);display:inline-flex;align-items:center;justify-content:center;font-size:8px;font-weight:700;color:var(--ac);font-family:var(--font-mono);margin-right:6px;vertical-align:middle;}

	/* SCORE CELL */
	.sc{display:flex;flex-direction:column;align-items:center;gap:2px;}
	.sn{font-family:var(--font-mono);font-size:11px;font-weight:700;}
	.na{color:var(--text-muted);font-size:9px;font-family:var(--font-mono);}

	/* EMPTY STATE */
	.empty-state{text-align:center;padding:40px 20px;color:var(--text-muted);font-size:13px;}
	.empty-state strong{color:var(--text-sec);font-size:15px;display:block;margin-bottom:8px;}
	"""


	def get_benchmark_category_color(benchmark_key: str) -> str:
	"""
	Get the color for a benchmark based on its category.

	Args:
	benchmark_key: The benchmark key (e.g., 'gsm8k', 'mmluPro')

	Returns:
	str: Hex color code for the category
	"""
	category = BENCHMARK_CATEGORIES.get(benchmark_key, "knowledge")
	return CATEGORY_COLORS.get(category, "#6366f1")


	def generate_table_headers(selected_benchmarks: List[str]) -> str:
	"""
	Generate HTML for table headers with sorting functionality.

	Args:
	selected_benchmarks: List of benchmark keys to display

	Returns:
	str: HTML string for <thead> element
	"""
	benchmarks_info = get_benchmark_info()

	# Start with model header (column 0)
	headers_html = "<thead><tr>\n"
	headers_html += ' <th class="c-model" onclick="sortTable(0)">Model <span class="sa">↕</span></th>\n'

	# Add benchmark headers (columns 1+)
	for idx, bench_key in enumerate(selected_benchmarks, start=1):
	bench_info = benchmarks_info.get(bench_key, {})
	bench_name = bench_info.get("name", bench_key)
	headers_html += f' <th onclick="sortTable({idx})">{bench_name} <span class="sa">↕</span></th>\n'

	headers_html += "</tr></thead>\n"
	return headers_html


	def generate_model_cell(row: pd.Series, provider_logos: Dict[str, str]) -> str:
	"""
	Generate HTML for the model cell (sticky first column).

	Args:
	row: DataFrame row containing model data
	provider_logos: Dictionary mapping provider names to logo URLs

	Returns:
	str: HTML string for model <td> element
	"""
	model_id = row.get("model_id", "")
	model_name = row.get("model_name", model_id)
	provider = row.get("provider", "Unknown")
	# Try parameters_display first (formatted), then parameters_billions
	params = row.get("parameters_display", row.get("parameters", "Unknown"))

	# Get provider logo - first try logo_url column, then fallback to provider_logos dict
	provider_logo_url = row.get("logo_url")
	if not provider_logo_url or pd.isna(provider_logo_url):
	provider_logo_url = provider_logos.get(provider)

	if provider_logo_url:
	logo_html = f'<img src="{provider_logo_url}" alt="{provider}" class="provider-logo-inline" title="{provider}" onerror="this.style.display=\'none\';">'
	else:
	# Fallback: show first 2 letters of provider name
	initials = provider[:2].upper() if provider and provider != "Unknown" else "??"
	logo_html = f'<span class="provider-logo-fallback-inline" title="{provider}">{initials}</span>'

	# Format HuggingFace link - use model_name which contains the repo path (e.g., "Meta/Llama-3")
	hf_link = f"https://huggingface.co/{model_name}" if model_name else "#"

	cell_html = f''' <td class="c-model">
	<div class="mc">
	<div class="mn">
	{logo_html}
	<a href="{hf_link}" target="_blank" rel="noopener noreferrer">{model_name}</a>
	</div>
	<div class="ms">
	<span class="mp">{provider}</span>
	<span class="mp">{params}</span>
	</div>
	</div>
	</td>'''

	return cell_html


	def generate_score_cell(score, benchmark_key: str) -> str:
	"""
	Generate HTML for a score cell with category-specific color.

	Args:
	score: The benchmark score (float, None, or NaN)
	benchmark_key: The benchmark key (for color coding)

	Returns:
	str: HTML string for score <td> element
	"""
	# Check if score is missing/invalid
	if pd.isna(score) or score is None:
	return ' <td><div class="sc"><span class="na">—</span></div></td>'

	try:
	score_float = float(score)
	color = get_benchmark_category_color(benchmark_key)
	score_display = f"{score_float:.1f}"

	return f' <td><div class="sc"><div class="sn" style="color: {color};">{score_display}</div></div></td>'
	except (ValueError, TypeError):
	return ' <td><div class="sc"><span class="na">—</span></div></td>'


	def generate_table_rows(
	df: pd.DataFrame, selected_benchmarks: List[str], provider_logos: Dict[str, str]
	) -> str:
	"""
	Generate HTML for all table rows.

	Args:
	df: DataFrame containing leaderboard data
	selected_benchmarks: List of benchmark keys to display
	provider_logos: Dictionary mapping provider names to logo URLs

	Returns:
	str: HTML string for <tbody> element
	"""
	if df.empty:
	return """<tbody>
	<tr>
	<td colspan="100" class="empty-state">
	<strong>No models match your criteria</strong>
	Try adjusting your search or filter settings
	</td>
	</tr>
	</tbody>"""

	rows_html = "<tbody>\n"

	for _, row in df.iterrows():
	model_name = row.get("model_name", row.get("model_id", "Unknown"))
	rows_html += f'<tr data-name="{model_name}">\n'

	# Model cell (sticky first column)
	rows_html += generate_model_cell(row, provider_logos) + "\n"

	# Score cells for each selected benchmark
	for bench_key in selected_benchmarks:
	score_col = f"{bench_key}_score"
	score = row.get(score_col)
	rows_html += generate_score_cell(score, bench_key) + "\n"

	rows_html += "</tr>\n"

	rows_html += "</tbody>\n"
	return rows_html


	def generate_leaderboard_html(
	df: pd.DataFrame, selected_benchmarks: List[str], provider_logos: Dict[str, str]
	) -> str:
	"""
	Generate complete HTML table for the leaderboard.

	Args:
	df: DataFrame containing filtered leaderboard data
	selected_benchmarks: List of benchmark keys to display
	provider_logos: Dictionary mapping provider names to logo URLs

	Returns:
	str: Complete HTML string with styles, table, and inline JavaScript
	"""
	css = get_table_css()
	headers = generate_table_headers(selected_benchmarks)
	rows = generate_table_rows(df, selected_benchmarks, provider_logos)

	# Note: JavaScript for sorting is loaded via Gradio's js parameter in app.py
	html = f"""
	<style>
	{css}
	</style>

	<div class="tw">
	<table id="leaderboardTable">
	{headers}
	{rows}
	</table>
	</div>
	"""

	return html