every_eval_ever_space / ui_components.py
deepmage121's picture
moving to EEE hf org
a92080e
raw
history blame
35 kB
"""
UI Components: Themes, CSS, and HTML formatters for the Gradio interface.
Nord color theme with balanced contrast.
"""
import gradio as gr
def get_theme():
"""Returns the Nord-themed Gradio theme, locked to dark mode."""
return gr.themes.Base(
primary_hue="blue",
neutral_hue="slate",
font=[gr.themes.GoogleFont("DM Sans"), "system-ui", "sans-serif"],
font_mono=[gr.themes.GoogleFont("JetBrains Mono"), "monospace"],
).set(
body_background_fill="#2E3440",
body_background_fill_dark="#2E3440",
body_text_color="#ECEFF4",
body_text_color_dark="#ECEFF4",
body_text_color_subdued="#4C566A",
body_text_color_subdued_dark="#4C566A",
block_background_fill="#3B4252",
block_background_fill_dark="#3B4252",
block_border_width="1px",
block_border_color="#434C5E",
block_border_color_dark="#434C5E",
block_label_text_color="#D8DEE9",
block_label_text_color_dark="#D8DEE9",
block_title_text_color="#ECEFF4",
block_title_text_color_dark="#ECEFF4",
input_background_fill="#2E3440",
input_background_fill_dark="#2E3440",
input_border_color="#4C566A",
input_border_color_dark="#4C566A",
button_primary_background_fill="#88C0D0",
button_primary_background_fill_dark="#88C0D0",
button_primary_text_color="#2E3440",
button_primary_text_color_dark="#2E3440",
button_secondary_background_fill="#434C5E",
button_secondary_background_fill_dark="#434C5E",
button_secondary_text_color="#ECEFF4",
button_secondary_text_color_dark="#ECEFF4",
)
def get_custom_css():
"""Returns custom CSS with Nord colors."""
return """
/* === Nord Theme ===
Polar Night: #2E3440 (bg), #3B4252 (surface), #434C5E, #4C566A
Snow Storm: #D8DEE9, #E5E9F0, #ECEFF4
Frost: #8FBCBB, #88C0D0, #81A1C1, #5E81AC
Aurora: #BF616A, #D08770, #EBCB8B, #A3BE8C, #B48EAD
*/
/* Lock the UI to dark Nord regardless of OS preference */
:root {
color-scheme: dark;
background-color: #2E3440;
}
body {
background: #2E3440 !important;
color: #ECEFF4 !important;
}
/* === Base === */
.gradio-container {
max-width: 100% !important;
margin: 0 !important;
padding: 1.25rem 2.5rem 2rem !important;
background: #2E3440 !important;
color: #ECEFF4 !important;
font-family: 'DM Sans', system-ui, sans-serif !important;
font-size: 16px !important;
}
/* === Header === */
.app-header {
display: flex;
align-items: center;
gap: 1rem;
margin-bottom: 1.5rem;
padding: 1.25rem 1.5rem;
background: #3B4252;
border: 1px solid #434C5E;
border-radius: 12px;
}
.app-header .logo-mark {
width: 48px;
height: 48px;
background: linear-gradient(135deg, #88C0D0 0%, #81A1C1 100%);
border-radius: 12px;
display: flex;
align-items: center;
justify-content: center;
font-weight: 800;
font-size: 1.1rem;
color: #2E3440;
}
.app-header .brand {
display: flex;
flex-direction: column;
gap: 0.125rem;
}
.app-header h1 {
margin: 0;
font-size: 1.5rem;
font-weight: 700;
color: #ECEFF4;
letter-spacing: -0.02em;
}
.app-header .tagline {
color: #D8DEE9;
font-size: 0.85rem;
}
.app-header .header-right {
margin-left: auto;
display: flex;
align-items: center;
gap: 0.75rem;
}
.app-header .version-badge {
background: rgba(136, 192, 208, 0.2);
border: 1px solid rgba(136, 192, 208, 0.4);
border-radius: 6px;
padding: 0.25rem 0.625rem;
font-size: 0.7rem;
font-family: 'JetBrains Mono', monospace;
color: #88C0D0;
}
/* === Tabs === */
.tabs {
border: none !important;
background: transparent !important;
}
.tab-nav {
background: #3B4252 !important;
border: 1px solid #434C5E !important;
border-radius: 10px !important;
padding: 0.25rem !important;
gap: 0.25rem !important;
margin-bottom: 1.25rem !important;
display: inline-flex !important;
}
.tab-nav button {
background: transparent !important;
border: none !important;
color: #D8DEE9 !important;
padding: 0.75rem 1.5rem !important;
font-size: 0.95rem !important;
font-weight: 500 !important;
border-radius: 8px !important;
transition: all 0.15s ease !important;
}
.tab-nav button.selected {
color: #2E3440 !important;
background: #88C0D0 !important;
}
.tab-nav button:hover:not(.selected) {
background: #434C5E !important;
color: #ECEFF4 !important;
}
.tabitem {
background: transparent !important;
border: none !important;
padding: 0 !important;
}
/* === Controls bar === */
.controls-bar {
background: #3B4252 !important;
border: 1px solid #434C5E !important;
border-radius: 10px !important;
padding: 0.75rem 1.25rem !important;
margin-bottom: 1rem !important;
gap: 0.75rem !important;
}
.controls-bar label {
font-size: 0.75rem !important;
text-transform: uppercase !important;
letter-spacing: 0.04em !important;
color: #D8DEE9 !important;
font-weight: 500 !important;
}
/* === Info banner === */
.info-banner {
background: #3B4252 !important;
border: 1px solid #434C5E !important;
border-left: 3px solid #88C0D0 !important;
border-radius: 0 10px 10px 0 !important;
padding: 0.75rem 1rem !important;
margin-bottom: 1rem !important;
}
.info-banner h3 {
margin: 0;
font-size: 1.1rem;
font-weight: 600;
color: #ECEFF4;
}
.info-banner .eval-tags {
display: flex;
flex-wrap: wrap;
gap: 0.375rem;
}
.info-banner .eval-tag {
background: rgba(143, 188, 187, 0.15);
border: 1px solid rgba(143, 188, 187, 0.3);
border-radius: 4px;
padding: 0.3rem 0.6rem;
font-size: 0.8rem;
font-family: 'JetBrains Mono', monospace;
color: #8FBCBB;
}
/* === Dataframe - seamless styling === */
.dataframe,
.dataframe > div,
.dataframe > div > div,
.dataframe .table-wrap,
.dataframe .svelte-1gfkn6j {
background: #2E3440 !important;
border: none !important;
box-shadow: none !important;
border-radius: 0 !important;
}
.dataframe table {
width: 100% !important;
border-collapse: collapse !important;
font-size: 0.95rem !important;
table-layout: auto !important;
background: #2E3440 !important;
}
.dataframe thead,
.dataframe thead tr {
background: #2E3440 !important;
position: sticky;
top: 0;
z-index: 10;
}
.dataframe thead th {
padding: 0.875rem 1rem !important;
font-weight: 600 !important;
font-size: 0.75rem !important;
text-transform: uppercase !important;
letter-spacing: 0.05em !important;
color: #81A1C1 !important;
border-bottom: 1px solid #434C5E !important;
border-top: none !important;
text-align: left !important;
background: #2E3440 !important;
}
.dataframe tbody,
.dataframe tbody tr {
background: #2E3440 !important;
}
.dataframe tbody tr {
border-bottom: 1px solid #3B4252 !important;
}
.dataframe tbody tr:hover {
background: rgba(136, 192, 208, 0.04) !important;
}
.dataframe tbody td {
padding: 0.75rem 1rem !important;
color: #E5E9F0 !important;
background: #2E3440 !important;
overflow: hidden !important;
text-overflow: ellipsis !important;
border: none !important;
}
/* === Pagination bar === */
.pagination-bar {
margin-top: 1rem !important;
padding: 1rem 0 !important;
border-top: 1px solid #3B4252 !important;
display: flex !important;
justify-content: center !important;
align-items: center !important;
gap: 1rem !important;
}
.page-info {
font-family: 'JetBrains Mono', monospace !important;
font-size: 1rem !important;
color: #D8DEE9 !important;
min-width: 80px !important;
text-align: center !important;
}
/* Model name - white, readable */
.dataframe tbody td:first-child {
font-weight: 500 !important;
color: #ECEFF4 !important;
white-space: nowrap !important;
}
/* All other columns - use monospace for numbers */
.dataframe tbody td:not(:first-child) {
font-family: 'JetBrains Mono', monospace !important;
color: #8FBCBB !important;
text-align: left !important;
}
.dataframe tbody td:nth-child(2) {
color: #88C0D0 !important;
white-space: nowrap !important;
}
.dataframe tbody td:nth-child(3) {
color: #D08770 !important;
}
.dataframe tbody td:nth-child(4) {
font-weight: 600 !important;
color: #A3BE8C !important;
}
.dataframe tbody td:nth-child(n+5) {
white-space: nowrap !important;
}
/* === Status text === */
.status-text {
font-size: 0.9rem !important;
color: #D8DEE9 !important;
padding: 0.5rem 0 !important;
font-family: 'JetBrains Mono', monospace !important;
}
/* === Model Card === */
.model-card-container {
display: flex;
flex-direction: column;
gap: 1.25rem;
}
.model-card-header {
background: #3B4252;
border: 1px solid #434C5E;
border-radius: 12px;
padding: 1.5rem 2rem;
}
.model-card-header h2 {
margin: 0 0 0.5rem 0;
font-size: 1.5rem;
font-weight: 600;
color: #ECEFF4;
}
.model-card-header .model-meta {
display: flex;
gap: 1.5rem;
color: #D8DEE9;
font-size: 0.95rem;
}
.model-card-header .model-meta strong {
color: #8FBCBB;
}
.leaderboard-section {
background: #3B4252;
border: 1px solid #434C5E;
border-radius: 10px;
overflow: hidden;
}
.leaderboard-section-header {
background: #434C5E;
padding: 1rem 1.25rem;
border-bottom: 1px solid #4C566A;
display: flex;
justify-content: space-between;
align-items: center;
}
.leaderboard-section-header h3 {
margin: 0;
font-size: 1rem;
font-weight: 600;
color: #88C0D0;
}
.leaderboard-section-header .lb-avg {
background: rgba(163, 190, 140, 0.15);
border: 1px solid rgba(163, 190, 140, 0.3);
border-radius: 8px;
padding: 0.5rem 1rem;
font-size: 0.85rem;
color: #D8DEE9;
}
.leaderboard-section-header .lb-avg strong {
color: #A3BE8C;
font-family: 'JetBrains Mono', monospace;
font-size: 1.1rem;
font-weight: 700;
}
.scores-grid {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(180px, 1fr));
gap: 1px;
background: #434C5E;
}
.score-item {
background: #3B4252;
padding: 1rem 1.25rem;
}
.score-item .score-label {
font-size: 0.8rem;
text-transform: uppercase;
letter-spacing: 0.05em;
color: #D8DEE9;
margin-bottom: 0.375rem;
}
.score-item .score-value {
font-size: 1.5rem;
font-weight: 600;
font-family: 'JetBrains Mono', monospace;
color: #A3BE8C;
}
.score-item.highlight .score-value {
color: #88C0D0;
}
.no-results {
text-align: center;
padding: 3rem 1rem;
color: #D8DEE9;
}
.no-results h3 {
color: #ECEFF4;
margin-bottom: 0.5rem;
}
/* === New Comparison View === */
.comparison-container {
display: flex;
flex-direction: column;
gap: 1.5rem;
}
.comparison-summary {
background: #3B4252;
border: 1px solid #434C5E;
border-radius: 12px;
padding: 1.5rem;
}
.comparison-summary h2 {
margin: 0 0 1rem 0;
color: #ECEFF4;
font-size: 1.25rem;
}
.summary-cards {
display: flex;
gap: 1rem;
flex-wrap: wrap;
}
.summary-card {
flex: 1;
min-width: 200px;
background: #2E3440;
border-radius: 8px;
padding: 1rem;
}
.summary-card-header {
display: flex;
align-items: center;
gap: 0.5rem;
margin-bottom: 0.75rem;
}
.model-dot {
width: 10px;
height: 10px;
border-radius: 50%;
}
.model-name {
font-weight: 600;
color: #ECEFF4;
font-size: 0.9rem;
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
}
.summary-card-body {
display: flex;
flex-direction: column;
gap: 0.5rem;
}
.summary-stat {
display: flex;
justify-content: space-between;
align-items: center;
}
.summary-stat .stat-label {
font-size: 0.75rem;
color: #D8DEE9;
text-transform: uppercase;
letter-spacing: 0.05em;
}
.summary-stat .stat-value {
font-family: 'JetBrains Mono', monospace;
color: #8FBCBB;
}
.summary-stat.primary .stat-value.large {
font-size: 1.5rem;
font-weight: 700;
color: #A3BE8C;
}
.leaderboard-comparison-card {
background: #3B4252;
border: 1px solid #434C5E;
border-radius: 12px;
overflow: hidden;
}
.lb-card-header {
background: #434C5E;
padding: 0.875rem 1.25rem;
}
.lb-card-header h3 {
margin: 0;
color: #88C0D0;
font-size: 1rem;
font-weight: 600;
}
.lb-card-body {
padding: 1rem 1.25rem;
display: flex;
flex-direction: column;
gap: 0.75rem;
}
.metric-comparison {
display: flex;
flex-direction: column;
gap: 0.375rem;
}
.metric-name-row {
margin-bottom: 0.25rem;
}
.metric-title {
font-size: 0.85rem;
font-weight: 600;
color: #ECEFF4;
}
.metric-title.sub {
font-size: 0.75rem;
font-weight: 500;
color: #D8DEE9;
}
.model-score-row {
display: flex;
align-items: center;
gap: 0.5rem;
padding: 0.375rem 0;
}
.model-score-row.compact {
padding: 0.25rem 0;
}
.model-score-row.best-score {
background: rgba(163, 190, 140, 0.1);
border-radius: 4px;
padding-left: 0.5rem;
margin-left: -0.5rem;
}
.model-score-row.no-data {
opacity: 0.5;
}
.model-indicator {
width: 8px;
height: 8px;
border-radius: 2px;
flex-shrink: 0;
}
.model-indicator.small {
width: 6px;
height: 6px;
}
.score-bar-container {
flex: 1;
display: flex;
align-items: center;
gap: 0.75rem;
height: 24px;
background: #2E3440;
border-radius: 4px;
padding: 0 0.5rem;
position: relative;
}
.score-bar {
position: absolute;
left: 0;
top: 0;
bottom: 0;
border-radius: 4px;
opacity: 0.3;
}
.score-bar.thin {
opacity: 0.2;
}
.score-value {
position: relative;
font-family: 'JetBrains Mono', monospace;
font-size: 0.9rem;
font-weight: 600;
color: #ECEFF4;
z-index: 1;
}
.score-value.small {
font-size: 0.8rem;
font-weight: 500;
}
.score-value.dim {
color: #4C566A;
}
/* === Selected Models Chips === */
.selected-models-group label {
display: inline-flex !important;
align-items: center !important;
background: #434C5E;
border: 1px solid #4C566A;
border-radius: 16px;
padding: 0.35rem 0.85rem;
font-size: 0.85rem;
color: #ECEFF4;
gap: 0.4rem;
cursor: pointer;
margin: 0.15rem 0.3rem 0.15rem 0 !important;
}
.selected-models-group label span::before {
content: "Γ—";
font-size: 0.75rem;
color: #EBCB8B;
opacity: 0;
transition: opacity 0.15s ease;
}
.selected-models-group label:hover span::before {
opacity: 1;
}
.selected-models-group input[type="checkbox"] {
display: none;
}
/* === Heat Map Table === */
.heatmap-table-wrapper {
overflow-x: auto;
margin-top: 1rem;
}
.heatmap-table {
width: 100%;
border-collapse: collapse;
font-size: 0.85rem;
}
.heatmap-table thead {
position: sticky;
top: 0;
z-index: 10;
}
.heatmap-table th {
background: #434C5E;
padding: 0.625rem 0.75rem;
font-weight: 600;
font-size: 0.7rem;
text-transform: uppercase;
letter-spacing: 0.05em;
color: #81A1C1;
text-align: left;
border-bottom: 2px solid #4C566A;
white-space: nowrap;
}
.heatmap-table th.metric-header {
min-width: 120px;
}
.heatmap-table th.model-header {
text-align: center;
max-width: 150px;
overflow: hidden;
text-overflow: ellipsis;
}
.heatmap-table td {
padding: 0.5rem 0.75rem;
border-bottom: 1px solid #3B4252;
}
.heatmap-table td.metric-name {
font-weight: 500;
color: #D8DEE9;
background: #2E3440;
}
.heatmap-table td.score-cell {
text-align: center;
font-family: 'JetBrains Mono', monospace;
font-weight: 500;
transition: all 0.15s ease;
}
.heatmap-table td.score-cell.best {
background: rgba(163, 190, 140, 0.25);
color: #A3BE8C;
font-weight: 700;
}
.heatmap-table td.score-cell.good {
background: rgba(163, 190, 140, 0.12);
color: #A3BE8C;
}
.heatmap-table td.score-cell.mid {
background: rgba(235, 203, 139, 0.12);
color: #EBCB8B;
}
.heatmap-table td.score-cell.low {
background: rgba(208, 135, 112, 0.12);
color: #D08770;
}
.heatmap-table td.score-cell.worst {
background: rgba(191, 97, 106, 0.15);
color: #BF616A;
}
.heatmap-table td.score-cell.na {
color: #4C566A;
font-style: italic;
}
.heatmap-table tr.avg-row {
background: rgba(136, 192, 208, 0.08);
}
.heatmap-table tr.avg-row td.metric-name {
font-weight: 700;
color: #88C0D0;
background: rgba(136, 192, 208, 0.08);
}
/* === Buttons === */
button {
border-radius: 8px !important;
font-weight: 500 !important;
font-size: 0.95rem !important;
transition: all 0.15s ease !important;
}
button.primary {
background: #88C0D0 !important;
color: #2E3440 !important;
border: none !important;
}
button.primary:hover:not(:disabled) {
background: #8FBCBB !important;
}
button.secondary,
button[variant="secondary"] {
background: #434C5E !important;
color: #ECEFF4 !important;
border: 1px solid #4C566A !important;
}
button.secondary:hover:not(:disabled),
button[variant="secondary"]:hover:not(:disabled) {
background: #4C566A !important;
}
button:disabled {
opacity: 0.35 !important;
}
/* === Inputs === */
input[type="text"],
select {
background: #2E3440 !important;
border: 1px solid #4C566A !important;
border-radius: 8px !important;
color: #ECEFF4 !important;
font-size: 1rem !important;
}
input[type="text"]:focus,
select:focus {
border-color: #88C0D0 !important;
box-shadow: 0 0 0 3px rgba(136, 192, 208, 0.15) !important;
outline: none !important;
}
input::placeholder {
color: #4C566A !important;
}
/* === Accordion === */
.accordion {
background: #3B4252 !important;
border: 1px solid #434C5E !important;
border-radius: 10px !important;
margin-top: 1.5rem !important;
}
.accordion > .label-wrap {
background: transparent !important;
padding: 1rem 1.25rem !important;
color: #D8DEE9 !important;
font-size: 0.95rem !important;
}
.accordion > .wrap {
padding: 0.5rem 1.25rem 1.25rem !important;
color: #D8DEE9 !important;
font-size: 0.95rem !important;
line-height: 1.6 !important;
}
.accordion code {
background: #434C5E !important;
padding: 0.125rem 0.375rem !important;
border-radius: 4px !important;
font-family: 'JetBrains Mono', monospace !important;
font-size: 0.8rem !important;
color: #8FBCBB !important;
}
/* === Metrics section === */
.metrics-section {
margin-top: 1.5rem;
padding-top: 1.5rem;
border-top: 1px solid #434C5E;
}
.metrics-section h3 {
font-size: 0.85rem;
font-weight: 600;
color: #D8DEE9;
margin: 0 0 1rem 0;
text-transform: uppercase;
letter-spacing: 0.05em;
}
.metrics-grid {
display: grid;
grid-template-columns: repeat(auto-fill, minmax(300px, 1fr));
gap: 0.75rem;
}
.metric-card {
background: #3B4252;
border: 1px solid #434C5E;
border-radius: 8px;
overflow: hidden;
}
.metric-card-header {
display: flex;
justify-content: space-between;
align-items: center;
padding: 0.75rem 1rem;
cursor: pointer;
list-style: none;
}
.metric-card-header::-webkit-details-marker {
display: none;
}
.metric-card-name {
font-weight: 500;
font-size: 0.95rem;
color: #ECEFF4;
}
.metric-card-direction {
font-size: 0.8rem;
color: #D8DEE9;
}
.metric-card-direction .arrow {
color: #A3BE8C;
font-weight: 600;
}
.metric-card-body {
padding: 0.875rem 1.25rem;
border-top: 1px solid #434C5E;
font-size: 0.9rem;
color: #D8DEE9;
line-height: 1.5;
}
.metric-type-badge {
font-size: 0.65rem;
text-transform: uppercase;
letter-spacing: 0.05em;
padding: 0.15rem 0.4rem;
background: rgba(180, 142, 173, 0.2);
border: 1px solid rgba(180, 142, 173, 0.35);
border-radius: 4px;
color: #B48EAD;
font-family: 'JetBrains Mono', monospace;
}
/* === Scrollbar === */
::-webkit-scrollbar {
width: 8px;
height: 8px;
}
::-webkit-scrollbar-track {
background: #2E3440;
}
::-webkit-scrollbar-thumb {
background: #4C566A;
border-radius: 4px;
}
::-webkit-scrollbar-thumb:hover {
background: #5E81AC;
}
/* === Responsive === */
@media (max-width: 768px) {
.gradio-container {
padding: 1rem !important;
}
.scores-grid {
grid-template-columns: repeat(2, 1fr);
}
}
/* === Overrides === */
.gradio-container footer {
display: none !important;
}
.block {
background: #3B4252 !important;
}
.gradio-radio label {
background: #434C5E !important;
border: 1px solid #4C566A !important;
color: #ECEFF4 !important;
border-radius: 8px !important;
font-size: 0.85rem !important;
}
.gradio-radio label.selected {
background: #88C0D0 !important;
border-color: #88C0D0 !important;
color: #2E3440 !important;
}
"""
def format_leaderboard_header(selected_leaderboard, metadata):
"""Formats the leaderboard header info section."""
if not selected_leaderboard:
return """
<div style="text-align: center; padding: 2rem 1rem; color: #D8DEE9;">
<div style="font-size: 1.1rem;">Select a leaderboard to explore</div>
</div>
"""
if not metadata or not metadata.get("evals"):
return f"""
<div class="info-banner">
<h3>{selected_leaderboard}</h3>
</div>
"""
source_info = metadata.get("source_info", {})
org = source_info.get("organization", "Unknown")
url = source_info.get("url", "#")
eval_names = list(metadata["evals"].keys())
eval_tags = "".join([f'<span class="eval-tag">{name}</span>' for name in eval_names])
return f"""
<div class="info-banner">
<div style="display: flex; justify-content: space-between; align-items: center; gap: 1rem;">
<div style="display: flex; align-items: center; gap: 1rem; flex-wrap: wrap;">
<h3 style="margin: 0;">{selected_leaderboard}</h3>
<span style="color: #D8DEE9; font-size: 0.8rem;">by {org}</span>
<div class="eval-tags" style="margin: 0;">{eval_tags}</div>
</div>
<a href="{url}" target="_blank" style="
font-size: 0.75rem;
color: #88C0D0;
text-decoration: none;
padding: 0.375rem 0.75rem;
border: 1px solid rgba(136, 192, 208, 0.4);
border-radius: 6px;
white-space: nowrap;
">Source β†’</a>
</div>
</div>
"""
def format_metric_details(selected_leaderboard, metadata):
"""Formats metric detail cards."""
if not selected_leaderboard or not metadata or not metadata.get("evals"):
return ""
evals = metadata.get("evals", {})
html = """
<div class="metrics-section">
<h3>Metric Reference</h3>
<div class="metrics-grid">
"""
for eval_name, info in evals.items():
score_type = info['score_type'].upper() if info.get('score_type') else "β€”"
direction = "Lower is better" if info.get('lower_is_better') else "Higher is better"
arrow = "↓" if info.get('lower_is_better') else "↑"
details = ""
if info.get('score_type') == "continuous" and info.get('min_score') is not None:
details = f"Range: [{info['min_score']} – {info['max_score']}]"
elif info.get('score_type') == "levels" and info.get('level_names'):
details = f"Levels: {', '.join(str(l) for l in info['level_names'])}"
html += f"""
<details class="metric-card">
<summary class="metric-card-header">
<span class="metric-card-name">{eval_name}</span>
<span class="metric-card-direction"><span class="arrow">{arrow}</span> {direction}</span>
</summary>
<div class="metric-card-body">
<div>{info.get('description', 'No description')}</div>
<div style="display: flex; justify-content: space-between; align-items: center; margin-top: 0.5rem;">
<span style="font-size: 0.75rem; color: #D8DEE9;">{details}</span>
<span class="metric-type-badge">{score_type}</span>
</div>
</div>
</details>
"""
html += "</div></div>"
return html
def format_model_card(model_name, model_data):
"""Formats a model card showing all evals across leaderboards."""
if not model_data:
return """
<div class="no-results">
<h3>No results found</h3>
<p>Try searching for a different model name</p>
</div>
"""
first = list(model_data.values())[0]
developer = first.get("developer", "Unknown")
params = first.get("params")
arch = first.get("architecture", "Unknown")
params_str = f"{params}B" if params else "β€”"
html = f"""
<div class="model-card-container">
<div class="model-card-header">
<h2>{model_name}</h2>
<div class="model-meta">
<span><strong>Developer:</strong> {developer}</span>
<span><strong>Parameters:</strong> {params_str}</span>
<span><strong>Architecture:</strong> {arch}</span>
</div>
</div>
"""
for leaderboard_name, data in model_data.items():
results = data.get("results", {})
if not results:
continue
scores = [v for v in results.values() if v is not None]
avg = sum(scores) / len(scores) if scores else None
avg_str = f"{avg:.2f}" if avg else "β€”"
html += f"""
<div class="leaderboard-section">
<div class="leaderboard-section-header">
<h3>{leaderboard_name}</h3>
<span class="lb-avg">Avg: <strong>{avg_str}</strong></span>
</div>
<div class="scores-grid">
"""
sorted_results = sorted(results.items(), key=lambda x: x[1] if x[1] is not None else 0, reverse=True)
for i, (metric_name, score) in enumerate(sorted_results):
score_display = f"{score:.2f}" if score is not None else "β€”"
highlight_class = "highlight" if i == 0 else ""
html += f"""
<div class="score-item {highlight_class}">
<div class="score-label">{metric_name}</div>
<div class="score-value">{score_display}</div>
</div>
"""
html += "</div></div>"
html += "</div>"
return html
def format_model_comparison(selected_models, all_results):
"""Formats a comparison view showing multiple models with visual indicators."""
if not selected_models or not all_results:
return """
<div class="no-results">
<h3>Select models to compare</h3>
<p>Choose multiple models from the dropdown to see a side-by-side comparison</p>
</div>
"""
# Get all unique leaderboards across selected models
all_leaderboards = set()
model_data_dict = {}
for model_name in selected_models:
if model_name in all_results:
model_data_dict[model_name] = all_results[model_name]
for leaderboard_name in all_results[model_name].keys():
all_leaderboards.add(leaderboard_name)
if not model_data_dict:
return """
<div class="no-results">
<h3>No data found for selected models</h3>
<p>Try selecting different models</p>
</div>
"""
all_leaderboards = sorted(all_leaderboards)
model_colors = ['#88C0D0', '#A3BE8C', '#EBCB8B', '#D08770', '#B48EAD', '#8FBCBB', '#81A1C1', '#BF616A']
# Calculate overall averages for summary
overall_avgs = {}
for model_name in selected_models:
if model_name in model_data_dict:
all_scores = []
for lb_data in model_data_dict[model_name].values():
all_scores.extend([v for v in lb_data.get("results", {}).values() if v is not None])
overall_avgs[model_name] = sum(all_scores) / len(all_scores) if all_scores else None
html = """
<div class="comparison-container">
<div class="comparison-summary">
<h2>Model Comparison</h2>
<div class="summary-cards">
"""
# Summary cards for each model
for i, model_name in enumerate(selected_models):
color = model_colors[i % len(model_colors)]
avg = overall_avgs.get(model_name)
avg_str = f"{avg:.2f}" if avg is not None else "β€”"
# Get model info
model_info = list(model_data_dict.get(model_name, {}).values())
developer = model_info[0].get("developer", "Unknown") if model_info else "Unknown"
html += f"""
<div class="summary-card" style="border-left: 4px solid {color};">
<div class="summary-card-header">
<span class="model-dot" style="background: {color};"></span>
<span class="model-name">{model_name}</span>
</div>
<div class="summary-card-body">
<div class="summary-stat">
<span class="stat-label">Developer</span>
<span class="stat-value">{developer}</span>
</div>
<div class="summary-stat primary">
<span class="stat-label">Overall Avg</span>
<span class="stat-value large">{avg_str}</span>
</div>
</div>
</div>
"""
html += """
</div>
</div>
"""
# Leaderboard comparison cards
for leaderboard_name in all_leaderboards:
leaderboard_metrics = set()
for model_data in model_data_dict.values():
if leaderboard_name in model_data:
results = model_data[leaderboard_name].get("results", {})
leaderboard_metrics.update(results.keys())
leaderboard_metrics = sorted(leaderboard_metrics)
if not leaderboard_metrics:
continue
# Calculate averages for ranking
model_avgs = {}
for model_name in selected_models:
if model_name in model_data_dict and leaderboard_name in model_data_dict[model_name]:
results = model_data_dict[model_name][leaderboard_name].get("results", {})
scores = [v for v in results.values() if v is not None]
model_avgs[model_name] = sum(scores) / len(scores) if scores else None
html += f"""
<div class="leaderboard-comparison-card">
<div class="lb-card-header">
<h3>{leaderboard_name}</h3>
</div>
<div class="lb-card-body">
"""
# Compact heat-map table
html += '<div class="heatmap-table-wrapper">'
html += '<table class="heatmap-table">'
# Header with model names
html += '<thead><tr><th class="metric-header">Metric</th>'
for i, model_name in enumerate(selected_models):
# Truncate long names
short_name = model_name if len(model_name) <= 20 else model_name[:18] + "…"
html += f'<th class="model-header" title="{model_name}">{short_name}</th>'
html += '</tr></thead>'
html += '<tbody>'
# Average row first
html += '<tr class="avg-row"><td class="metric-name">Average</td>'
valid_avgs_list = [model_avgs.get(m) for m in selected_models if model_avgs.get(m) is not None]
max_avg_val = max(valid_avgs_list) if valid_avgs_list else None
for model_name in selected_models:
avg = model_avgs.get(model_name)
if avg is not None:
cell_class = "best" if avg == max_avg_val and len(valid_avgs_list) > 1 else ""
html += f'<td class="score-cell {cell_class}">{avg:.2f}</td>'
else:
html += '<td class="score-cell na">β€”</td>'
html += '</tr>'
# Individual metric rows
for metric_name in leaderboard_metrics:
html += f'<tr><td class="metric-name">{metric_name}</td>'
# Get all scores for this metric
metric_scores = {}
for model_name in selected_models:
if model_name in model_data_dict and leaderboard_name in model_data_dict[model_name]:
results = model_data_dict[model_name][leaderboard_name].get("results", {})
metric_scores[model_name] = results.get(metric_name)
valid_scores = [v for v in metric_scores.values() if v is not None]
if valid_scores:
max_score = max(valid_scores)
min_score = min(valid_scores)
score_range = max_score - min_score if max_score > min_score else 1
else:
max_score = min_score = score_range = None
for model_name in selected_models:
score = metric_scores.get(model_name)
if score is not None and score_range is not None:
# Determine color class based on relative position
if len(valid_scores) > 1:
pct = (score - min_score) / score_range if score_range > 0 else 1
if score == max_score:
cell_class = "best"
elif pct >= 0.75:
cell_class = "good"
elif pct >= 0.5:
cell_class = "mid"
elif pct >= 0.25:
cell_class = "low"
else:
cell_class = "worst"
else:
cell_class = ""
html += f'<td class="score-cell {cell_class}">{score:.2f}</td>'
else:
html += '<td class="score-cell na">β€”</td>'
html += '</tr>'
html += '</tbody></table></div>'
html += """
</div>
</div>
"""
html += "</div>"
return html