import gradio as gr import plotly.graph_objects as go from data_loader import get_eval_metadata def get_theme(): return gr.themes.Base( primary_hue="blue", neutral_hue="slate", ).set( body_background_fill="#f5f5f5", body_text_color="#0a0a0a", body_text_color_subdued="#525252", block_background_fill="#ffffff", block_border_color="#e5e5e5", block_label_text_color="#525252", block_title_text_color="#0a0a0a", input_background_fill="#ffffff", input_border_color="#e5e5e5", button_primary_background_fill="#3b82f6", button_primary_text_color="#ffffff", button_secondary_background_fill="#ffffff", button_secondary_text_color="#0a0a0a", button_secondary_border_color="#e5e5e5", ) def get_custom_css(): return """ :root { --brand-black: #0a0a0a; --brand-dark: #1a1a1a; --brand-gray: #2a2a2a; --brand-light: #f5f5f5; --brand-accent: #3b82f6; } body, .gradio-container { background: var(--brand-light) !important; color: var(--brand-black) !important; } .gradio-container { max-width: 100%; padding: 1.25rem 2.5rem 2rem; } .gradio-container *:focus-visible { outline: none !important; box-shadow: inset 0 0 0 1.5px #3b82f6 !important; } .gradio-container .block, .gradio-container .wrap, .gradio-container .form, .gradio-container .container { box-shadow: none !important; } /* Match pill styling */ .match-pills .wrap, .match-pills .container { display: flex !important; flex-wrap: wrap !important; gap: 0.35rem !important; } .match-pills .wrap > div, .match-pills .container > div { margin: 0 !important; } .match-pills input[type="checkbox"] { display: none; } .match-pills label { display: inline-flex; align-items: center; border: 1px solid #d6d9de; background: #f5f7fb; border-radius: 999px; padding: 0.28rem 0.75rem; font-weight: 500; color: #0a0a0a; transition: all 120ms ease; cursor: pointer; } .match-pills label:hover { border-color: #3b82f6; background: #eef4ff; } .match-pills input[type="checkbox"]:checked + label { border-color: #3b82f6; background: rgba(59, 130, 246, 0.12); color: #0a0a0a; font-weight: 600; } .app-header { display: flex; align-items: center; gap: 1rem; margin-bottom: 1.5rem; padding: 1rem 1.25rem; background: #ffffff; border: 1px solid #e5e5e5; border-radius: 12px; } .logo-mark { width: 48px; height: 48px; border-radius: 12px; display: flex; align-items: center; justify-content: center; font-weight: 800; font-size: 1.1rem; color: #ffffff; } .brand h1 { margin: 0; font-size: 1.5rem; font-weight: 700; color: #0a0a0a; } .brand .tagline { color: #525252; font-size: 0.9rem; } .header-right { margin-left: auto; } .version-badge { background: rgba(59, 130, 246, 0.1); border: 1px solid #3b82f6; border-radius: 8px; padding: 0.35rem 0.6rem; font-size: 0.78rem; color: #3b82f6; } .info-banner { background: #ffffff; border: 1px solid #e5e5e5; border-left: 3px solid #3b82f6; border-radius: 10px; padding: 1rem 1.25rem; margin-bottom: 1rem; } .info-banner h3 { margin: 0; font-weight: 600; color: #0a0a0a; } .leaderboard-header { display: flex; justify-content: space-between; align-items: center; gap: 1rem; flex-wrap: wrap; margin-bottom: 0.4rem; } .lb-title { font-size: 1.2rem; font-weight: 700; color: #0a0a0a; margin: 0; line-height: 1.35; } .lb-by { font-size: 0.9rem; color: #525252; margin: 0.1rem 0 0 0; line-height: 1.35; } .lb-meta { display: flex; flex-direction: column; gap: 0.1rem; } .eval-tags { display: flex; flex-wrap: wrap; gap: 0.4rem; } .eval-tags { margin-top: 0.35rem; } .eval-tag { border-radius: 10px; padding: 0.3rem 0.65rem; font-size: 0.82rem; font-weight: 600; color: #0a0a0a; border: 1px solid #e5e5e5; background: #f8fafc; } .eval-tag:nth-child(5n + 1) { border-color: #3b82f6; background: rgba(59, 130, 246, 0.12); color: #0a1d4a; } .eval-tag:nth-child(5n + 2) { border-color: #10b981; background: rgba(16, 185, 129, 0.12); color: #0b3b2b; } .eval-tag:nth-child(5n + 3) { border-color: #f97316; background: rgba(249, 115, 22, 0.12); color: #4b1f07; } .eval-tag:nth-child(5n + 4) { border-color: #8b5cf6; background: rgba(139, 92, 246, 0.12); color: #2f0f5a; } .eval-tag:nth-child(5n) { border-color: #06b6d4; background: rgba(6, 182, 212, 0.12); color: #053f46; } .source-link { font-size: 0.75rem; color: #3b82f6; text-decoration: none; padding: 0.375rem 0.75rem; border: 1px solid #3b82f6; border-radius: 6px; } .source-link:hover { background: rgba(59, 130, 246, 0.1); } .pagination-bar { margin-top: 0.75rem; padding: 0.85rem 0 0.25rem; display: flex; justify-content: center; align-items: center; gap: 0.85rem; } .page-info { font-size: 1rem; min-width: 80px; text-align: center; color: #0a0a0a; } .metrics-section { margin-top: 1.25rem; padding: 1.25rem 1rem; border-top: 1px solid #e5e5e5; } .metrics-section h3 { font-size: 0.9rem; font-weight: 700; color: #525252; margin: 0 0 0.9rem 0; text-transform: uppercase; letter-spacing: 0.05em; } .metrics-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(280px, 1fr)); gap: 0.75rem; } @media (max-width: 768px) { .metrics-grid { grid-template-columns: repeat(auto-fill, minmax(160px, 1fr)); gap: 0.5rem; } .metric-card-header { padding: 0.65rem 0.8rem; flex-direction: column; align-items: flex-start; gap: 0.25rem; } .metric-card-body { padding: 0.65rem 0.8rem; font-size: 0.85rem; } .metrics-section { padding: 1rem 0.5rem; } } .metrics-grid .metric-card { align-self: start; } .metric-card { background: #ffffff; border: 1px solid #e5e5e5; border-radius: 10px; overflow: hidden; position: relative; } .metric-card-header { display: flex; justify-content: space-between; align-items: center; padding: 0.85rem 1rem; cursor: pointer; } .metric-card-header:hover { background: #f9f9f9; } .metric-card-name { font-weight: 600; color: #0a0a0a; } .metric-card-direction { font-size: 0.82rem; color: #525252; } .metric-card-direction .arrow { color: #22c55e; font-weight: 700; } .metric-card-body { display: none; padding: 0.85rem 1rem; border-top: 1px solid #e5e5e5; color: #0a0a0a; } .metric-card input.metric-toggle { display: none; } .metric-card input.metric-toggle:checked ~ .metric-card-body { display: block; } .metric-card input.metric-toggle:checked ~ .metric-card-header { background: #f9f9f9; border-bottom: 1px solid #e5e5e5; } .metric-card input.metric-toggle:checked ~ .metric-card-header .metric-card-name, .metric-card input.metric-toggle:checked ~ .metric-card-header .metric-card-direction { color: #0a0a0a; } /* Ensure multiple cards can be open at once and are closable */ .metric-card input.metric-toggle:not(:checked) ~ .metric-card-body { display: none; } .metric-type-badge { font-size: 0.68rem; text-transform: uppercase; padding: 0.2rem 0.45rem; background: rgba(59, 130, 246, 0.1); border: 1px solid #3b82f6; border-radius: 6px; color: #3b82f6; } .heatmap-table { width: 100%; border-collapse: collapse; font-size: 0.85rem; } .heatmap-table th { padding: 0.55rem 0.65rem; font-weight: 700; font-size: 0.72rem; text-transform: uppercase; color: #525252; background: #f5f5f5; } .heatmap-table td { padding: 0.45rem 0.65rem; text-align: center; border-bottom: 1px solid #e5e5e5; } .heatmap-table td.metric-name { text-align: left; font-weight: 600; color: #0a0a0a; } .heatmap-table td.score-cell { font-weight: 600; } .heatmap-table td.score-cell.best { background: rgba(34, 197, 94, 0.15); color: #16a34a; } .heatmap-table td.score-cell.good { background: rgba(34, 197, 94, 0.08); color: #16a34a; } .heatmap-table td.score-cell.mid { background: rgba(234, 179, 8, 0.15); color: #ca8a04; } .heatmap-table td.score-cell.low { background: rgba(239, 68, 68, 0.12); color: #dc2626; } .heatmap-table td.score-cell.worst { background: rgba(239, 68, 68, 0.18); color: #b91c1c; } .heatmap-table td.score-cell.na { color: #525252; font-style: italic; } /* Model chips */ .selected-models-group label { display: inline-flex !important; background: #ffffff; border: 1px solid #e5e5e5; border-radius: 16px; padding: 0.35rem 0.85rem; font-size: 0.88rem; color: #0a0a0a; cursor: pointer; margin: 0.18rem 0.32rem 0.18rem 0 !important; } .selected-models-group input[type="checkbox"] { display: none; } .no-results { text-align: center; padding: 2.5rem 1rem; color: #525252; } .gradio-container footer { display: none; } .block, .form, .wrap, .container { background: #ffffff !important; } body, .gradio-container, p, span, div, h1, h2, h3, h4, h5, h6, label, td, th { color: #0a0a0a !important; } .label-wrap span, .prose, .markdown, .prose p, .prose li, .markdown p, .markdown li { color: #525252 !important; } input, textarea, select { background: #ffffff !important; color: #0a0a0a !important; border: 1px solid #e5e5e5 !important; border-radius: 8px !important; } input::placeholder, textarea::placeholder { color: #a1a1a1 !important; } input:focus, textarea:focus, select:focus { border-color: #3b82f6 !important; outline: none !important; box-shadow: inset 0 0 0 1.5px #3b82f6 !important; } select, .wrap select, .wrap input, input[type="text"], textarea { min-height: 44px !important; padding: 0.55rem 0.75rem !important; font-size: 0.96rem !important; } button { border-radius: 8px !important; font-weight: 500 !important; transition: all 0.15s ease !important; } button.primary, button[variant="primary"] { background: #3b82f6 !important; color: #ffffff !important; border: none !important; } button.primary:hover, button[variant="primary"]:hover { background: #2563eb !important; } button.secondary, button[variant="secondary"], button:not(.primary):not([variant="primary"]) { background: #ffffff !important; color: #0a0a0a !important; border: 1px solid #e5e5e5 !important; } button.secondary:hover, button[variant="secondary"]:hover { border-color: #3b82f6 !important; background: #f5f5f5 !important; } .tab-nav, .tabs { border-bottom: 1px solid #e5e5e5 !important; } .tab-nav button, .tabs button { color: #525252 !important; background: transparent !important; border: none !important; border-bottom: 2px solid transparent !important; } .tab-nav button.selected, .tabs button.selected { color: #3b82f6 !important; border-bottom-color: #3b82f6 !important; } .wrap, .secondary-wrap, .primary-wrap { background: transparent !important; border: none !important; border-radius: 0 !important; box-shadow: none !important; padding: 0 !important; } ul[role="listbox"], .dropdown, .options { background: #ffffff !important; border: 1px solid #e5e5e5 !important; border-radius: 8px !important; box-shadow: 0 4px 12px rgba(0, 0, 0, 0.1) !important; } ul[role="listbox"] li, .dropdown li, .options li { color: #0a0a0a !important; } ul[role="listbox"] li:hover, .dropdown li:hover, .options li:hover { background: #f5f5f5 !important; } ul[role="listbox"] li.active, .dropdown li.active, .options li.active { background: #f5f5f5 !important; color: #0a0a0a !important; } ul[role="listbox"] li.selected, .dropdown li.selected { background: rgba(59, 130, 246, 0.1) !important; color: #3b82f6 !important; } .accordion { border: 1px solid #e5e5e5 !important; border-radius: 8px !important; background: #ffffff !important; } .accordion > button { color: #0a0a0a !important; } .selected-models-group label, .checkbox-group label { display: inline-flex !important; background: #ffffff; border: 1px solid #e5e5e5; border-radius: 20px !important; padding: 0.4rem 0.9rem !important; font-size: 0.88rem !important; color: #0a0a0a !important; cursor: pointer !important; margin: 0.2rem !important; transition: all 0.15s ease !important; } .selected-models-group label:hover, .checkbox-group label:hover { border-color: #3b82f6 !important; background: #f5f5f5 !important; } .selected-models-group input[type="checkbox"], .checkbox-group input[type="checkbox"] { display: none !important; } table { width: 100% !important; border-collapse: collapse !important; background: #ffffff !important; } table th { background: #f5f5f5 !important; color: #525252 !important; font-weight: 600 !important; text-transform: uppercase !important; font-size: 0.75rem !important; padding: 0.75rem !important; border-bottom: 1px solid #e5e5e5 !important; text-align: left !important; } table td { padding: 0.65rem 0.75rem !important; border-bottom: 1px solid #e5e5e5 !important; color: #0a0a0a !important; } table tr:hover td { background: #f9f9f9 !important; } .dataframe { background: #ffffff !important; border: 1px solid #e5e5e5 !important; box-shadow: none !important; border-radius: px !important; overflow: hidden !important; } .dataframe table { width: 100% !important; border-collapse: collapse !important; font-size: 0.75rem !important; table-layout: auto !important; background: #ffffff !important; } .dataframe thead, .dataframe thead tr { background: #ffffff !important; position: sticky !important; top: 0 !important; z-index: 10 !important; } .dataframe thead th { padding: 0.875rem 1rem !important; font-weight: 700 !important; font-size: 0.75rem !important; text-transform: uppercase !important; letter-spacing: 0.05em !important; color: #0a0a0a !important; border-bottom: 2px solid #e5e5e5 !important; border-top: none !important; text-align: left !important; background: #ffffff !important; white-space: nowrap !important; border-radius: 0 !important; } .dataframe thead th span, .dataframe thead th div, .dataframe thead th button { background: transparent !important; border: none !important; border-radius: 0 !important; box-shadow: none !important; margin: 0 !important; outline: none !important; } .dataframe thead th span[role="button"], .dataframe thead th span[class*="svelte"] { background: transparent !important; border: none !important; box-shadow: none !important; outline: none !important; padding: 0 !important; width: auto !important; } /* Also target the SVG icon if it exists to ensure it doesn't have a background */ .dataframe thead th svg { background: transparent !important; box-shadow: none !important; } .dataframe thead th span:hover, .dataframe thead th span[role="button"]:hover, .dataframe thead th span[class*="svelte"]:hover, .dataframe thead th button:hover { background: transparent !important; border: none !important; box-shadow: none !important; color: #3b82f6 !important; } .token { background-color: rgba(59, 130, 246, 0.12) !important; border: 1px solid rgba(59, 130, 246, 0.3) !important; color: #1e3a8a !important; border-radius: 6px !important; padding: 2px 8px !important; gap: 4px !important; } .token-remove { background-color: rgba(255, 255, 255, 0.4) !important; border: 1px solid rgba(30, 58, 138, 0.5) !important; /* Dark blue outline */ color: #1e3a8a !important; border-radius: 4px !important; margin-left: 6px !important; padding: 1px !important; opacity: 0.9 !important; min-width: 18px !important; min-height: 18px !important; display: flex !important; align-items: center !important; justify-content: center !important; } .token-remove svg { width: 12px !important; height: 12px !important; } .token-remove:hover { background-color: #1e3a8a !important; color: #ffffff !important; border-color: #1e3a8a !important; } .selector-item { border-radius: 6px !important; } .gradio-container .token { box-shadow: none !important; font-weight: 500 !important; } .gradio-container .token span { color: #1e3a8a !important; } .dataframe tbody, .dataframe tbody tr { background: #ffffff !important; } .dataframe tbody tr { border-bottom: 1px solid #e5e5e5 !important; } .dataframe tbody tr:hover { background: #f9f9f9 !important; } .dataframe tbody td { padding: 0.75rem 1rem !important; color: #0a0a0a !important; background: #ffffff !important; border: none !important; border-bottom: 1px solid #e5e5e5 !important; } .dataframe tbody td:first-child { font-weight: 700 !important; color: #0a0a0a !important; white-space: normal !important; word-break: break-word !important; max-width: 400px; min-width: 250px; } .dataframe tbody td:not(:first-child) { font-family: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace !important; text-align: left !important; white-space: nowrap !important; min-width: 80px !important; } .dataframe td:nth-child(2), .dataframe th:nth-child(2) { max-width: 220px; min-width: 140px; } .column-selector-dropdown { min-width: 300px; } .column-selector-dropdown .wrap { flex-wrap: nowrap !important; overflow-x: auto !important; gap: 0.25rem !important; padding: 0.5rem !important; } .column-selector-dropdown .wrap input { width: 100% !important; padding-left: 0.5rem !important; border: none !important; box-shadow: none !important; } .heatmap-table { border: 1px solid #e5e5e5 !important; border-radius: 8px !important; overflow: hidden !important; } .heatmap-table th { background: #f5f5f5 !important; color: #525252 !important; padding: 0.6rem 0.75rem !important; font-size: 0.72rem !important; border-bottom: 2px solid #e5e5e5 !important; } .heatmap-table td { padding: 0.5rem 0.75rem !important; border-bottom: 1px solid #e5e5e5 !important; } .heatmap-table td.metric-name { background: #f5f5f5 !important; font-weight: 600 !important; } .heatmap-table td.score-cell.best { background: rgba(34, 197, 94, 0.2) !important; color: #15803d !important; } .heatmap-table td.score-cell.good { background: rgba(34, 197, 94, 0.1) !important; color: #16a34a !important; } .heatmap-table td.score-cell.mid { background: rgba(234, 179, 8, 0.15) !important; color: #a16207 !important; } .heatmap-table td.score-cell.low { background: rgba(239, 68, 68, 0.12) !important; color: #dc2626 !important; } .heatmap-table td.score-cell.worst { background: rgba(239, 68, 68, 0.2) !important; color: #b91c1c !important; } .heatmap-table td.score-cell.na { color: #a1a1a1 !important; font-style: italic !important; } .gradio-container footer { display: none !important; } ::-webkit-scrollbar { width: 8px; height: 8px; } ::-webkit-scrollbar-track { background: #f5f5f5; } ::-webkit-scrollbar-thumb { background: #d4d4d4; border-radius: 4px; } ::-webkit-scrollbar-thumb:hover { background: #a1a1a1; } """ def format_leaderboard_header(selected_leaderboard, metadata): if not selected_leaderboard: return '
Try a different model name
Choose models from the dropdown
| Metric | ' for model_name in selected_models: short = model_name[:20] + "…" if len(model_name) > 20 else model_name html += f'{short} | ' html += '|
|---|---|---|
| {metric_name} | ' scores = {} for m in selected_models: if m in model_data_dict and leaderboard_name in model_data_dict[m]: scores[m] = model_data_dict[m][leaderboard_name].get("results", {}).get(metric_name) valid = [v for v in scores.values() if v is not None] max_s = max(valid) if valid else None min_s = min(valid) if valid else None for model_name in selected_models: score = scores.get(model_name) if score is not None: if len(valid) > 1 and max_s and min_s: if score == max_s: cls = "best" elif max_s > min_s: pct = (score - min_s) / (max_s - min_s) cls = "good" if pct >= 0.75 else "mid" if pct >= 0.5 else "low" if pct >= 0.25 else "worst" else: cls = "" else: cls = "" html += f'{score:.2f} | ' else: html += '— | ' html += '