Spaces:
Running
Running
| """ | |
| UI Components: Themes, CSS, and HTML formatters for the Gradio interface. | |
| Nord color theme with balanced contrast. | |
| """ | |
| import gradio as gr | |
| def get_theme(): | |
| """Returns the Nord-themed Gradio theme, locked to dark mode.""" | |
| return gr.themes.Base( | |
| primary_hue="blue", | |
| neutral_hue="slate", | |
| font=[gr.themes.GoogleFont("DM Sans"), "system-ui", "sans-serif"], | |
| font_mono=[gr.themes.GoogleFont("JetBrains Mono"), "monospace"], | |
| ).set( | |
| body_background_fill="#2E3440", | |
| body_background_fill_dark="#2E3440", | |
| body_text_color="#ECEFF4", | |
| body_text_color_dark="#ECEFF4", | |
| body_text_color_subdued="#4C566A", | |
| body_text_color_subdued_dark="#4C566A", | |
| block_background_fill="#3B4252", | |
| block_background_fill_dark="#3B4252", | |
| block_border_width="1px", | |
| block_border_color="#434C5E", | |
| block_border_color_dark="#434C5E", | |
| block_label_text_color="#D8DEE9", | |
| block_label_text_color_dark="#D8DEE9", | |
| block_title_text_color="#ECEFF4", | |
| block_title_text_color_dark="#ECEFF4", | |
| input_background_fill="#2E3440", | |
| input_background_fill_dark="#2E3440", | |
| input_border_color="#4C566A", | |
| input_border_color_dark="#4C566A", | |
| button_primary_background_fill="#88C0D0", | |
| button_primary_background_fill_dark="#88C0D0", | |
| button_primary_text_color="#2E3440", | |
| button_primary_text_color_dark="#2E3440", | |
| button_secondary_background_fill="#434C5E", | |
| button_secondary_background_fill_dark="#434C5E", | |
| button_secondary_text_color="#ECEFF4", | |
| button_secondary_text_color_dark="#ECEFF4", | |
| ) | |
| def get_custom_css(): | |
| """Returns custom CSS with Nord colors.""" | |
| return """ | |
| /* === Nord Theme === | |
| Polar Night: #2E3440 (bg), #3B4252 (surface), #434C5E, #4C566A | |
| Snow Storm: #D8DEE9, #E5E9F0, #ECEFF4 | |
| Frost: #8FBCBB, #88C0D0, #81A1C1, #5E81AC | |
| Aurora: #BF616A, #D08770, #EBCB8B, #A3BE8C, #B48EAD | |
| */ | |
| /* Lock the UI to dark Nord regardless of OS preference */ | |
| :root { | |
| color-scheme: dark; | |
| background-color: #2E3440; | |
| } | |
| body { | |
| background: #2E3440 !important; | |
| color: #ECEFF4 !important; | |
| } | |
| /* === Base === */ | |
| .gradio-container { | |
| max-width: 100% !important; | |
| margin: 0 !important; | |
| padding: 1.25rem 2.5rem 2rem !important; | |
| background: #2E3440 !important; | |
| color: #ECEFF4 !important; | |
| font-family: 'DM Sans', system-ui, sans-serif !important; | |
| font-size: 16px !important; | |
| } | |
| /* === Header === */ | |
| .app-header { | |
| display: flex; | |
| align-items: center; | |
| gap: 1rem; | |
| margin-bottom: 1.5rem; | |
| padding: 1.25rem 1.5rem; | |
| background: #3B4252; | |
| border: 1px solid #434C5E; | |
| border-radius: 12px; | |
| } | |
| .app-header .logo-mark { | |
| width: 48px; | |
| height: 48px; | |
| background: linear-gradient(135deg, #88C0D0 0%, #81A1C1 100%); | |
| border-radius: 12px; | |
| display: flex; | |
| align-items: center; | |
| justify-content: center; | |
| font-weight: 800; | |
| font-size: 1.1rem; | |
| color: #2E3440; | |
| } | |
| .app-header .brand { | |
| display: flex; | |
| flex-direction: column; | |
| gap: 0.125rem; | |
| } | |
| .app-header h1 { | |
| margin: 0; | |
| font-size: 1.5rem; | |
| font-weight: 700; | |
| color: #ECEFF4; | |
| letter-spacing: -0.02em; | |
| } | |
| .app-header .tagline { | |
| color: #D8DEE9; | |
| font-size: 0.85rem; | |
| } | |
| .app-header .header-right { | |
| margin-left: auto; | |
| display: flex; | |
| align-items: center; | |
| gap: 0.75rem; | |
| } | |
| .app-header .version-badge { | |
| background: rgba(136, 192, 208, 0.2); | |
| border: 1px solid rgba(136, 192, 208, 0.4); | |
| border-radius: 6px; | |
| padding: 0.25rem 0.625rem; | |
| font-size: 0.7rem; | |
| font-family: 'JetBrains Mono', monospace; | |
| color: #88C0D0; | |
| } | |
| /* === Tabs === */ | |
| .tabs { | |
| border: none !important; | |
| background: transparent !important; | |
| } | |
| .tab-nav { | |
| background: #3B4252 !important; | |
| border: 1px solid #434C5E !important; | |
| border-radius: 10px !important; | |
| padding: 0.25rem !important; | |
| gap: 0.25rem !important; | |
| margin-bottom: 1.25rem !important; | |
| display: inline-flex !important; | |
| } | |
| .tab-nav button { | |
| background: transparent !important; | |
| border: none !important; | |
| color: #D8DEE9 !important; | |
| padding: 0.75rem 1.5rem !important; | |
| font-size: 0.95rem !important; | |
| font-weight: 500 !important; | |
| border-radius: 8px !important; | |
| transition: all 0.15s ease !important; | |
| } | |
| .tab-nav button.selected { | |
| color: #2E3440 !important; | |
| background: #88C0D0 !important; | |
| } | |
| .tab-nav button:hover:not(.selected) { | |
| background: #434C5E !important; | |
| color: #ECEFF4 !important; | |
| } | |
| .tabitem { | |
| background: transparent !important; | |
| border: none !important; | |
| padding: 0 !important; | |
| } | |
| /* === Controls bar === */ | |
| .controls-bar { | |
| background: #3B4252 !important; | |
| border: 1px solid #434C5E !important; | |
| border-radius: 10px !important; | |
| padding: 0.75rem 1.25rem !important; | |
| margin-bottom: 1rem !important; | |
| gap: 0.75rem !important; | |
| } | |
| .controls-bar label { | |
| font-size: 0.75rem !important; | |
| text-transform: uppercase !important; | |
| letter-spacing: 0.04em !important; | |
| color: #D8DEE9 !important; | |
| font-weight: 500 !important; | |
| } | |
| /* === Info banner === */ | |
| .info-banner { | |
| background: #3B4252 !important; | |
| border: 1px solid #434C5E !important; | |
| border-left: 3px solid #88C0D0 !important; | |
| border-radius: 0 10px 10px 0 !important; | |
| padding: 0.75rem 1rem !important; | |
| margin-bottom: 1rem !important; | |
| } | |
| .info-banner h3 { | |
| margin: 0; | |
| font-size: 1.1rem; | |
| font-weight: 600; | |
| color: #ECEFF4; | |
| } | |
| .info-banner .eval-tags { | |
| display: flex; | |
| flex-wrap: wrap; | |
| gap: 0.375rem; | |
| } | |
| .info-banner .eval-tag { | |
| background: rgba(143, 188, 187, 0.15); | |
| border: 1px solid rgba(143, 188, 187, 0.3); | |
| border-radius: 4px; | |
| padding: 0.3rem 0.6rem; | |
| font-size: 0.8rem; | |
| font-family: 'JetBrains Mono', monospace; | |
| color: #8FBCBB; | |
| } | |
| /* === Dataframe - seamless styling === */ | |
| .dataframe, | |
| .dataframe > div, | |
| .dataframe > div > div, | |
| .dataframe .table-wrap, | |
| .dataframe .svelte-1gfkn6j { | |
| background: #2E3440 !important; | |
| border: none !important; | |
| box-shadow: none !important; | |
| border-radius: 0 !important; | |
| } | |
| .dataframe table { | |
| width: 100% !important; | |
| border-collapse: collapse !important; | |
| font-size: 0.95rem !important; | |
| table-layout: auto !important; | |
| background: #2E3440 !important; | |
| } | |
| .dataframe thead, | |
| .dataframe thead tr { | |
| background: #2E3440 !important; | |
| position: sticky; | |
| top: 0; | |
| z-index: 10; | |
| } | |
| .dataframe thead th { | |
| padding: 0.875rem 1rem !important; | |
| font-weight: 600 !important; | |
| font-size: 0.75rem !important; | |
| text-transform: uppercase !important; | |
| letter-spacing: 0.05em !important; | |
| color: #81A1C1 !important; | |
| border-bottom: 1px solid #434C5E !important; | |
| border-top: none !important; | |
| text-align: left !important; | |
| background: #2E3440 !important; | |
| } | |
| .dataframe tbody, | |
| .dataframe tbody tr { | |
| background: #2E3440 !important; | |
| } | |
| .dataframe tbody tr { | |
| border-bottom: 1px solid #3B4252 !important; | |
| } | |
| .dataframe tbody tr:hover { | |
| background: rgba(136, 192, 208, 0.04) !important; | |
| } | |
| .dataframe tbody td { | |
| padding: 0.75rem 1rem !important; | |
| color: #E5E9F0 !important; | |
| background: #2E3440 !important; | |
| overflow: hidden !important; | |
| text-overflow: ellipsis !important; | |
| border: none !important; | |
| } | |
| /* === Pagination bar === */ | |
| .pagination-bar { | |
| margin-top: 1rem !important; | |
| padding: 1rem 0 !important; | |
| border-top: 1px solid #3B4252 !important; | |
| display: flex !important; | |
| justify-content: center !important; | |
| align-items: center !important; | |
| gap: 1rem !important; | |
| } | |
| .page-info { | |
| font-family: 'JetBrains Mono', monospace !important; | |
| font-size: 1rem !important; | |
| color: #D8DEE9 !important; | |
| min-width: 80px !important; | |
| text-align: center !important; | |
| } | |
| /* Model name - white, readable */ | |
| .dataframe tbody td:first-child { | |
| font-weight: 500 !important; | |
| color: #ECEFF4 !important; | |
| white-space: nowrap !important; | |
| } | |
| /* All other columns - use monospace for numbers */ | |
| .dataframe tbody td:not(:first-child) { | |
| font-family: 'JetBrains Mono', monospace !important; | |
| color: #8FBCBB !important; | |
| text-align: left !important; | |
| } | |
| .dataframe tbody td:nth-child(2) { | |
| color: #88C0D0 !important; | |
| white-space: nowrap !important; | |
| } | |
| .dataframe tbody td:nth-child(3) { | |
| color: #D08770 !important; | |
| } | |
| .dataframe tbody td:nth-child(4) { | |
| font-weight: 600 !important; | |
| color: #A3BE8C !important; | |
| } | |
| .dataframe tbody td:nth-child(n+5) { | |
| white-space: nowrap !important; | |
| } | |
| /* === Status text === */ | |
| .status-text { | |
| font-size: 0.9rem !important; | |
| color: #D8DEE9 !important; | |
| padding: 0.5rem 0 !important; | |
| font-family: 'JetBrains Mono', monospace !important; | |
| } | |
| /* === Model Card === */ | |
| .model-card-container { | |
| display: flex; | |
| flex-direction: column; | |
| gap: 1.25rem; | |
| } | |
| .model-card-header { | |
| background: #3B4252; | |
| border: 1px solid #434C5E; | |
| border-radius: 12px; | |
| padding: 1.5rem 2rem; | |
| } | |
| .model-card-header h2 { | |
| margin: 0 0 0.5rem 0; | |
| font-size: 1.5rem; | |
| font-weight: 600; | |
| color: #ECEFF4; | |
| } | |
| .model-card-header .model-meta { | |
| display: flex; | |
| gap: 1.5rem; | |
| color: #D8DEE9; | |
| font-size: 0.95rem; | |
| } | |
| .model-card-header .model-meta strong { | |
| color: #8FBCBB; | |
| } | |
| .leaderboard-section { | |
| background: #3B4252; | |
| border: 1px solid #434C5E; | |
| border-radius: 10px; | |
| overflow: hidden; | |
| } | |
| .leaderboard-section-header { | |
| background: #434C5E; | |
| padding: 1rem 1.25rem; | |
| border-bottom: 1px solid #4C566A; | |
| display: flex; | |
| justify-content: space-between; | |
| align-items: center; | |
| } | |
| .leaderboard-section-header h3 { | |
| margin: 0; | |
| font-size: 1rem; | |
| font-weight: 600; | |
| color: #88C0D0; | |
| } | |
| .leaderboard-section-header .lb-avg { | |
| background: rgba(163, 190, 140, 0.15); | |
| border: 1px solid rgba(163, 190, 140, 0.3); | |
| border-radius: 8px; | |
| padding: 0.5rem 1rem; | |
| font-size: 0.85rem; | |
| color: #D8DEE9; | |
| } | |
| .leaderboard-section-header .lb-avg strong { | |
| color: #A3BE8C; | |
| font-family: 'JetBrains Mono', monospace; | |
| font-size: 1.1rem; | |
| font-weight: 700; | |
| } | |
| .scores-grid { | |
| display: grid; | |
| grid-template-columns: repeat(auto-fill, minmax(180px, 1fr)); | |
| gap: 1px; | |
| background: #434C5E; | |
| } | |
| .score-item { | |
| background: #3B4252; | |
| padding: 1rem 1.25rem; | |
| } | |
| .score-item .score-label { | |
| font-size: 0.8rem; | |
| text-transform: uppercase; | |
| letter-spacing: 0.05em; | |
| color: #D8DEE9; | |
| margin-bottom: 0.375rem; | |
| } | |
| .score-item .score-value { | |
| font-size: 1.5rem; | |
| font-weight: 600; | |
| font-family: 'JetBrains Mono', monospace; | |
| color: #A3BE8C; | |
| } | |
| .score-item.highlight .score-value { | |
| color: #88C0D0; | |
| } | |
| .no-results { | |
| text-align: center; | |
| padding: 3rem 1rem; | |
| color: #D8DEE9; | |
| } | |
| .no-results h3 { | |
| color: #ECEFF4; | |
| margin-bottom: 0.5rem; | |
| } | |
| /* === New Comparison View === */ | |
| .comparison-container { | |
| display: flex; | |
| flex-direction: column; | |
| gap: 1.5rem; | |
| } | |
| .comparison-summary { | |
| background: #3B4252; | |
| border: 1px solid #434C5E; | |
| border-radius: 12px; | |
| padding: 1.5rem; | |
| } | |
| .comparison-summary h2 { | |
| margin: 0 0 1rem 0; | |
| color: #ECEFF4; | |
| font-size: 1.25rem; | |
| } | |
| .summary-cards { | |
| display: flex; | |
| gap: 1rem; | |
| flex-wrap: wrap; | |
| } | |
| .summary-card { | |
| flex: 1; | |
| min-width: 200px; | |
| background: #2E3440; | |
| border-radius: 8px; | |
| padding: 1rem; | |
| } | |
| .summary-card-header { | |
| display: flex; | |
| align-items: center; | |
| gap: 0.5rem; | |
| margin-bottom: 0.75rem; | |
| } | |
| .model-dot { | |
| width: 10px; | |
| height: 10px; | |
| border-radius: 50%; | |
| } | |
| .model-name { | |
| font-weight: 600; | |
| color: #ECEFF4; | |
| font-size: 0.9rem; | |
| overflow: hidden; | |
| text-overflow: ellipsis; | |
| white-space: nowrap; | |
| } | |
| .summary-card-body { | |
| display: flex; | |
| flex-direction: column; | |
| gap: 0.5rem; | |
| } | |
| .summary-stat { | |
| display: flex; | |
| justify-content: space-between; | |
| align-items: center; | |
| } | |
| .summary-stat .stat-label { | |
| font-size: 0.75rem; | |
| color: #D8DEE9; | |
| text-transform: uppercase; | |
| letter-spacing: 0.05em; | |
| } | |
| .summary-stat .stat-value { | |
| font-family: 'JetBrains Mono', monospace; | |
| color: #8FBCBB; | |
| } | |
| .summary-stat.primary .stat-value.large { | |
| font-size: 1.5rem; | |
| font-weight: 700; | |
| color: #A3BE8C; | |
| } | |
| .leaderboard-comparison-card { | |
| background: #3B4252; | |
| border: 1px solid #434C5E; | |
| border-radius: 12px; | |
| overflow: hidden; | |
| } | |
| .lb-card-header { | |
| background: #434C5E; | |
| padding: 0.875rem 1.25rem; | |
| } | |
| .lb-card-header h3 { | |
| margin: 0; | |
| color: #88C0D0; | |
| font-size: 1rem; | |
| font-weight: 600; | |
| } | |
| .lb-card-body { | |
| padding: 1rem 1.25rem; | |
| display: flex; | |
| flex-direction: column; | |
| gap: 0.75rem; | |
| } | |
| .metric-comparison { | |
| display: flex; | |
| flex-direction: column; | |
| gap: 0.375rem; | |
| } | |
| .metric-name-row { | |
| margin-bottom: 0.25rem; | |
| } | |
| .metric-title { | |
| font-size: 0.85rem; | |
| font-weight: 600; | |
| color: #ECEFF4; | |
| } | |
| .metric-title.sub { | |
| font-size: 0.75rem; | |
| font-weight: 500; | |
| color: #D8DEE9; | |
| } | |
| .model-score-row { | |
| display: flex; | |
| align-items: center; | |
| gap: 0.5rem; | |
| padding: 0.375rem 0; | |
| } | |
| .model-score-row.compact { | |
| padding: 0.25rem 0; | |
| } | |
| .model-score-row.best-score { | |
| background: rgba(163, 190, 140, 0.1); | |
| border-radius: 4px; | |
| padding-left: 0.5rem; | |
| margin-left: -0.5rem; | |
| } | |
| .model-score-row.no-data { | |
| opacity: 0.5; | |
| } | |
| .model-indicator { | |
| width: 8px; | |
| height: 8px; | |
| border-radius: 2px; | |
| flex-shrink: 0; | |
| } | |
| .model-indicator.small { | |
| width: 6px; | |
| height: 6px; | |
| } | |
| .score-bar-container { | |
| flex: 1; | |
| display: flex; | |
| align-items: center; | |
| gap: 0.75rem; | |
| height: 24px; | |
| background: #2E3440; | |
| border-radius: 4px; | |
| padding: 0 0.5rem; | |
| position: relative; | |
| } | |
| .score-bar { | |
| position: absolute; | |
| left: 0; | |
| top: 0; | |
| bottom: 0; | |
| border-radius: 4px; | |
| opacity: 0.3; | |
| } | |
| .score-bar.thin { | |
| opacity: 0.2; | |
| } | |
| .score-value { | |
| position: relative; | |
| font-family: 'JetBrains Mono', monospace; | |
| font-size: 0.9rem; | |
| font-weight: 600; | |
| color: #ECEFF4; | |
| z-index: 1; | |
| } | |
| .score-value.small { | |
| font-size: 0.8rem; | |
| font-weight: 500; | |
| } | |
| .score-value.dim { | |
| color: #4C566A; | |
| } | |
| /* === Selected Models Chips === */ | |
| .selected-models-group label { | |
| display: inline-flex !important; | |
| align-items: center !important; | |
| background: #434C5E; | |
| border: 1px solid #4C566A; | |
| border-radius: 16px; | |
| padding: 0.35rem 0.85rem; | |
| font-size: 0.85rem; | |
| color: #ECEFF4; | |
| gap: 0.4rem; | |
| cursor: pointer; | |
| margin: 0.15rem 0.3rem 0.15rem 0 !important; | |
| } | |
| .selected-models-group label span::before { | |
| content: "Γ"; | |
| font-size: 0.75rem; | |
| color: #EBCB8B; | |
| opacity: 0; | |
| transition: opacity 0.15s ease; | |
| } | |
| .selected-models-group label:hover span::before { | |
| opacity: 1; | |
| } | |
| .selected-models-group input[type="checkbox"] { | |
| display: none; | |
| } | |
| /* === Heat Map Table === */ | |
| .heatmap-table-wrapper { | |
| overflow-x: auto; | |
| margin-top: 1rem; | |
| } | |
| .heatmap-table { | |
| width: 100%; | |
| border-collapse: collapse; | |
| font-size: 0.85rem; | |
| } | |
| .heatmap-table thead { | |
| position: sticky; | |
| top: 0; | |
| z-index: 10; | |
| } | |
| .heatmap-table th { | |
| background: #434C5E; | |
| padding: 0.625rem 0.75rem; | |
| font-weight: 600; | |
| font-size: 0.7rem; | |
| text-transform: uppercase; | |
| letter-spacing: 0.05em; | |
| color: #81A1C1; | |
| text-align: left; | |
| border-bottom: 2px solid #4C566A; | |
| white-space: nowrap; | |
| } | |
| .heatmap-table th.metric-header { | |
| min-width: 120px; | |
| } | |
| .heatmap-table th.model-header { | |
| text-align: center; | |
| max-width: 150px; | |
| overflow: hidden; | |
| text-overflow: ellipsis; | |
| } | |
| .heatmap-table td { | |
| padding: 0.5rem 0.75rem; | |
| border-bottom: 1px solid #3B4252; | |
| } | |
| .heatmap-table td.metric-name { | |
| font-weight: 500; | |
| color: #D8DEE9; | |
| background: #2E3440; | |
| } | |
| .heatmap-table td.score-cell { | |
| text-align: center; | |
| font-family: 'JetBrains Mono', monospace; | |
| font-weight: 500; | |
| transition: all 0.15s ease; | |
| } | |
| .heatmap-table td.score-cell.best { | |
| background: rgba(163, 190, 140, 0.25); | |
| color: #A3BE8C; | |
| font-weight: 700; | |
| } | |
| .heatmap-table td.score-cell.good { | |
| background: rgba(163, 190, 140, 0.12); | |
| color: #A3BE8C; | |
| } | |
| .heatmap-table td.score-cell.mid { | |
| background: rgba(235, 203, 139, 0.12); | |
| color: #EBCB8B; | |
| } | |
| .heatmap-table td.score-cell.low { | |
| background: rgba(208, 135, 112, 0.12); | |
| color: #D08770; | |
| } | |
| .heatmap-table td.score-cell.worst { | |
| background: rgba(191, 97, 106, 0.15); | |
| color: #BF616A; | |
| } | |
| .heatmap-table td.score-cell.na { | |
| color: #4C566A; | |
| font-style: italic; | |
| } | |
| .heatmap-table tr.avg-row { | |
| background: rgba(136, 192, 208, 0.08); | |
| } | |
| .heatmap-table tr.avg-row td.metric-name { | |
| font-weight: 700; | |
| color: #88C0D0; | |
| background: rgba(136, 192, 208, 0.08); | |
| } | |
| /* === Buttons === */ | |
| button { | |
| border-radius: 8px !important; | |
| font-weight: 500 !important; | |
| font-size: 0.95rem !important; | |
| transition: all 0.15s ease !important; | |
| } | |
| button.primary { | |
| background: #88C0D0 !important; | |
| color: #2E3440 !important; | |
| border: none !important; | |
| } | |
| button.primary:hover:not(:disabled) { | |
| background: #8FBCBB !important; | |
| } | |
| button.secondary, | |
| button[variant="secondary"] { | |
| background: #434C5E !important; | |
| color: #ECEFF4 !important; | |
| border: 1px solid #4C566A !important; | |
| } | |
| button.secondary:hover:not(:disabled), | |
| button[variant="secondary"]:hover:not(:disabled) { | |
| background: #4C566A !important; | |
| } | |
| button:disabled { | |
| opacity: 0.35 !important; | |
| } | |
| /* === Inputs === */ | |
| input[type="text"], | |
| select { | |
| background: #2E3440 !important; | |
| border: 1px solid #4C566A !important; | |
| border-radius: 8px !important; | |
| color: #ECEFF4 !important; | |
| font-size: 1rem !important; | |
| } | |
| input[type="text"]:focus, | |
| select:focus { | |
| border-color: #88C0D0 !important; | |
| box-shadow: 0 0 0 3px rgba(136, 192, 208, 0.15) !important; | |
| outline: none !important; | |
| } | |
| input::placeholder { | |
| color: #4C566A !important; | |
| } | |
| /* === Accordion === */ | |
| .accordion { | |
| background: #3B4252 !important; | |
| border: 1px solid #434C5E !important; | |
| border-radius: 10px !important; | |
| margin-top: 1.5rem !important; | |
| } | |
| .accordion > .label-wrap { | |
| background: transparent !important; | |
| padding: 1rem 1.25rem !important; | |
| color: #D8DEE9 !important; | |
| font-size: 0.95rem !important; | |
| } | |
| .accordion > .wrap { | |
| padding: 0.5rem 1.25rem 1.25rem !important; | |
| color: #D8DEE9 !important; | |
| font-size: 0.95rem !important; | |
| line-height: 1.6 !important; | |
| } | |
| .accordion code { | |
| background: #434C5E !important; | |
| padding: 0.125rem 0.375rem !important; | |
| border-radius: 4px !important; | |
| font-family: 'JetBrains Mono', monospace !important; | |
| font-size: 0.8rem !important; | |
| color: #8FBCBB !important; | |
| } | |
| /* === Metrics section === */ | |
| .metrics-section { | |
| margin-top: 1.5rem; | |
| padding-top: 1.5rem; | |
| border-top: 1px solid #434C5E; | |
| } | |
| .metrics-section h3 { | |
| font-size: 0.85rem; | |
| font-weight: 600; | |
| color: #D8DEE9; | |
| margin: 0 0 1rem 0; | |
| text-transform: uppercase; | |
| letter-spacing: 0.05em; | |
| } | |
| .metrics-grid { | |
| display: grid; | |
| grid-template-columns: repeat(auto-fill, minmax(300px, 1fr)); | |
| gap: 0.75rem; | |
| } | |
| .metric-card { | |
| background: #3B4252; | |
| border: 1px solid #434C5E; | |
| border-radius: 8px; | |
| overflow: hidden; | |
| } | |
| .metric-card-header { | |
| display: flex; | |
| justify-content: space-between; | |
| align-items: center; | |
| padding: 0.75rem 1rem; | |
| cursor: pointer; | |
| list-style: none; | |
| } | |
| .metric-card-header::-webkit-details-marker { | |
| display: none; | |
| } | |
| .metric-card-name { | |
| font-weight: 500; | |
| font-size: 0.95rem; | |
| color: #ECEFF4; | |
| } | |
| .metric-card-direction { | |
| font-size: 0.8rem; | |
| color: #D8DEE9; | |
| } | |
| .metric-card-direction .arrow { | |
| color: #A3BE8C; | |
| font-weight: 600; | |
| } | |
| .metric-card-body { | |
| padding: 0.875rem 1.25rem; | |
| border-top: 1px solid #434C5E; | |
| font-size: 0.9rem; | |
| color: #D8DEE9; | |
| line-height: 1.5; | |
| } | |
| .metric-type-badge { | |
| font-size: 0.65rem; | |
| text-transform: uppercase; | |
| letter-spacing: 0.05em; | |
| padding: 0.15rem 0.4rem; | |
| background: rgba(180, 142, 173, 0.2); | |
| border: 1px solid rgba(180, 142, 173, 0.35); | |
| border-radius: 4px; | |
| color: #B48EAD; | |
| font-family: 'JetBrains Mono', monospace; | |
| } | |
| /* === Scrollbar === */ | |
| ::-webkit-scrollbar { | |
| width: 8px; | |
| height: 8px; | |
| } | |
| ::-webkit-scrollbar-track { | |
| background: #2E3440; | |
| } | |
| ::-webkit-scrollbar-thumb { | |
| background: #4C566A; | |
| border-radius: 4px; | |
| } | |
| ::-webkit-scrollbar-thumb:hover { | |
| background: #5E81AC; | |
| } | |
| /* === Responsive === */ | |
| @media (max-width: 768px) { | |
| .gradio-container { | |
| padding: 1rem !important; | |
| } | |
| .scores-grid { | |
| grid-template-columns: repeat(2, 1fr); | |
| } | |
| } | |
| /* === Overrides === */ | |
| .gradio-container footer { | |
| display: none !important; | |
| } | |
| .block { | |
| background: #3B4252 !important; | |
| } | |
| .gradio-radio label { | |
| background: #434C5E !important; | |
| border: 1px solid #4C566A !important; | |
| color: #ECEFF4 !important; | |
| border-radius: 8px !important; | |
| font-size: 0.85rem !important; | |
| } | |
| .gradio-radio label.selected { | |
| background: #88C0D0 !important; | |
| border-color: #88C0D0 !important; | |
| color: #2E3440 !important; | |
| } | |
| """ | |
| def format_leaderboard_header(selected_leaderboard, metadata): | |
| """Formats the leaderboard header info section.""" | |
| if not selected_leaderboard: | |
| return """ | |
| <div style="text-align: center; padding: 2rem 1rem; color: #D8DEE9;"> | |
| <div style="font-size: 1.1rem;">Select a leaderboard to explore</div> | |
| </div> | |
| """ | |
| if not metadata or not metadata.get("evals"): | |
| return f""" | |
| <div class="info-banner"> | |
| <h3>{selected_leaderboard}</h3> | |
| </div> | |
| """ | |
| source_info = metadata.get("source_info", {}) | |
| org = source_info.get("organization", "Unknown") | |
| url = source_info.get("url", "#") | |
| eval_names = list(metadata["evals"].keys()) | |
| eval_tags = "".join([f'<span class="eval-tag">{name}</span>' for name in eval_names]) | |
| return f""" | |
| <div class="info-banner"> | |
| <div style="display: flex; justify-content: space-between; align-items: center; gap: 1rem;"> | |
| <div style="display: flex; align-items: center; gap: 1rem; flex-wrap: wrap;"> | |
| <h3 style="margin: 0;">{selected_leaderboard}</h3> | |
| <span style="color: #D8DEE9; font-size: 0.8rem;">by {org}</span> | |
| <div class="eval-tags" style="margin: 0;">{eval_tags}</div> | |
| </div> | |
| <a href="{url}" target="_blank" style=" | |
| font-size: 0.75rem; | |
| color: #88C0D0; | |
| text-decoration: none; | |
| padding: 0.375rem 0.75rem; | |
| border: 1px solid rgba(136, 192, 208, 0.4); | |
| border-radius: 6px; | |
| white-space: nowrap; | |
| ">Source β</a> | |
| </div> | |
| </div> | |
| """ | |
| def format_metric_details(selected_leaderboard, metadata): | |
| """Formats metric detail cards.""" | |
| if not selected_leaderboard or not metadata or not metadata.get("evals"): | |
| return "" | |
| evals = metadata.get("evals", {}) | |
| html = """ | |
| <div class="metrics-section"> | |
| <h3>Metric Reference</h3> | |
| <div class="metrics-grid"> | |
| """ | |
| for eval_name, info in evals.items(): | |
| score_type = info['score_type'].upper() if info.get('score_type') else "β" | |
| direction = "Lower is better" if info.get('lower_is_better') else "Higher is better" | |
| arrow = "β" if info.get('lower_is_better') else "β" | |
| details = "" | |
| if info.get('score_type') == "continuous" and info.get('min_score') is not None: | |
| details = f"Range: [{info['min_score']} β {info['max_score']}]" | |
| elif info.get('score_type') == "levels" and info.get('level_names'): | |
| details = f"Levels: {', '.join(str(l) for l in info['level_names'])}" | |
| html += f""" | |
| <details class="metric-card"> | |
| <summary class="metric-card-header"> | |
| <span class="metric-card-name">{eval_name}</span> | |
| <span class="metric-card-direction"><span class="arrow">{arrow}</span> {direction}</span> | |
| </summary> | |
| <div class="metric-card-body"> | |
| <div>{info.get('description', 'No description')}</div> | |
| <div style="display: flex; justify-content: space-between; align-items: center; margin-top: 0.5rem;"> | |
| <span style="font-size: 0.75rem; color: #D8DEE9;">{details}</span> | |
| <span class="metric-type-badge">{score_type}</span> | |
| </div> | |
| </div> | |
| </details> | |
| """ | |
| html += "</div></div>" | |
| return html | |
| def format_model_card(model_name, model_data): | |
| """Formats a model card showing all evals across leaderboards.""" | |
| if not model_data: | |
| return """ | |
| <div class="no-results"> | |
| <h3>No results found</h3> | |
| <p>Try searching for a different model name</p> | |
| </div> | |
| """ | |
| first = list(model_data.values())[0] | |
| developer = first.get("developer", "Unknown") | |
| params = first.get("params") | |
| arch = first.get("architecture", "Unknown") | |
| params_str = f"{params}B" if params else "β" | |
| html = f""" | |
| <div class="model-card-container"> | |
| <div class="model-card-header"> | |
| <h2>{model_name}</h2> | |
| <div class="model-meta"> | |
| <span><strong>Developer:</strong> {developer}</span> | |
| <span><strong>Parameters:</strong> {params_str}</span> | |
| <span><strong>Architecture:</strong> {arch}</span> | |
| </div> | |
| </div> | |
| """ | |
| for leaderboard_name, data in model_data.items(): | |
| results = data.get("results", {}) | |
| if not results: | |
| continue | |
| scores = [v for v in results.values() if v is not None] | |
| avg = sum(scores) / len(scores) if scores else None | |
| avg_str = f"{avg:.2f}" if avg else "β" | |
| html += f""" | |
| <div class="leaderboard-section"> | |
| <div class="leaderboard-section-header"> | |
| <h3>{leaderboard_name}</h3> | |
| <span class="lb-avg">Avg: <strong>{avg_str}</strong></span> | |
| </div> | |
| <div class="scores-grid"> | |
| """ | |
| sorted_results = sorted(results.items(), key=lambda x: x[1] if x[1] is not None else 0, reverse=True) | |
| for i, (metric_name, score) in enumerate(sorted_results): | |
| score_display = f"{score:.2f}" if score is not None else "β" | |
| highlight_class = "highlight" if i == 0 else "" | |
| html += f""" | |
| <div class="score-item {highlight_class}"> | |
| <div class="score-label">{metric_name}</div> | |
| <div class="score-value">{score_display}</div> | |
| </div> | |
| """ | |
| html += "</div></div>" | |
| html += "</div>" | |
| return html | |
| def format_model_comparison(selected_models, all_results): | |
| """Formats a comparison view showing multiple models with visual indicators.""" | |
| if not selected_models or not all_results: | |
| return """ | |
| <div class="no-results"> | |
| <h3>Select models to compare</h3> | |
| <p>Choose multiple models from the dropdown to see a side-by-side comparison</p> | |
| </div> | |
| """ | |
| # Get all unique leaderboards across selected models | |
| all_leaderboards = set() | |
| model_data_dict = {} | |
| for model_name in selected_models: | |
| if model_name in all_results: | |
| model_data_dict[model_name] = all_results[model_name] | |
| for leaderboard_name in all_results[model_name].keys(): | |
| all_leaderboards.add(leaderboard_name) | |
| if not model_data_dict: | |
| return """ | |
| <div class="no-results"> | |
| <h3>No data found for selected models</h3> | |
| <p>Try selecting different models</p> | |
| </div> | |
| """ | |
| all_leaderboards = sorted(all_leaderboards) | |
| model_colors = ['#88C0D0', '#A3BE8C', '#EBCB8B', '#D08770', '#B48EAD', '#8FBCBB', '#81A1C1', '#BF616A'] | |
| # Calculate overall averages for summary | |
| overall_avgs = {} | |
| for model_name in selected_models: | |
| if model_name in model_data_dict: | |
| all_scores = [] | |
| for lb_data in model_data_dict[model_name].values(): | |
| all_scores.extend([v for v in lb_data.get("results", {}).values() if v is not None]) | |
| overall_avgs[model_name] = sum(all_scores) / len(all_scores) if all_scores else None | |
| html = """ | |
| <div class="comparison-container"> | |
| <div class="comparison-summary"> | |
| <h2>Model Comparison</h2> | |
| <div class="summary-cards"> | |
| """ | |
| # Summary cards for each model | |
| for i, model_name in enumerate(selected_models): | |
| color = model_colors[i % len(model_colors)] | |
| avg = overall_avgs.get(model_name) | |
| avg_str = f"{avg:.2f}" if avg is not None else "β" | |
| # Get model info | |
| model_info = list(model_data_dict.get(model_name, {}).values()) | |
| developer = model_info[0].get("developer", "Unknown") if model_info else "Unknown" | |
| html += f""" | |
| <div class="summary-card" style="border-left: 4px solid {color};"> | |
| <div class="summary-card-header"> | |
| <span class="model-dot" style="background: {color};"></span> | |
| <span class="model-name">{model_name}</span> | |
| </div> | |
| <div class="summary-card-body"> | |
| <div class="summary-stat"> | |
| <span class="stat-label">Developer</span> | |
| <span class="stat-value">{developer}</span> | |
| </div> | |
| <div class="summary-stat primary"> | |
| <span class="stat-label">Overall Avg</span> | |
| <span class="stat-value large">{avg_str}</span> | |
| </div> | |
| </div> | |
| </div> | |
| """ | |
| html += """ | |
| </div> | |
| </div> | |
| """ | |
| # Leaderboard comparison cards | |
| for leaderboard_name in all_leaderboards: | |
| leaderboard_metrics = set() | |
| for model_data in model_data_dict.values(): | |
| if leaderboard_name in model_data: | |
| results = model_data[leaderboard_name].get("results", {}) | |
| leaderboard_metrics.update(results.keys()) | |
| leaderboard_metrics = sorted(leaderboard_metrics) | |
| if not leaderboard_metrics: | |
| continue | |
| # Calculate averages for ranking | |
| model_avgs = {} | |
| for model_name in selected_models: | |
| if model_name in model_data_dict and leaderboard_name in model_data_dict[model_name]: | |
| results = model_data_dict[model_name][leaderboard_name].get("results", {}) | |
| scores = [v for v in results.values() if v is not None] | |
| model_avgs[model_name] = sum(scores) / len(scores) if scores else None | |
| html += f""" | |
| <div class="leaderboard-comparison-card"> | |
| <div class="lb-card-header"> | |
| <h3>{leaderboard_name}</h3> | |
| </div> | |
| <div class="lb-card-body"> | |
| """ | |
| # Compact heat-map table | |
| html += '<div class="heatmap-table-wrapper">' | |
| html += '<table class="heatmap-table">' | |
| # Header with model names | |
| html += '<thead><tr><th class="metric-header">Metric</th>' | |
| for i, model_name in enumerate(selected_models): | |
| # Truncate long names | |
| short_name = model_name if len(model_name) <= 20 else model_name[:18] + "β¦" | |
| html += f'<th class="model-header" title="{model_name}">{short_name}</th>' | |
| html += '</tr></thead>' | |
| html += '<tbody>' | |
| # Average row first | |
| html += '<tr class="avg-row"><td class="metric-name">Average</td>' | |
| valid_avgs_list = [model_avgs.get(m) for m in selected_models if model_avgs.get(m) is not None] | |
| max_avg_val = max(valid_avgs_list) if valid_avgs_list else None | |
| for model_name in selected_models: | |
| avg = model_avgs.get(model_name) | |
| if avg is not None: | |
| cell_class = "best" if avg == max_avg_val and len(valid_avgs_list) > 1 else "" | |
| html += f'<td class="score-cell {cell_class}">{avg:.2f}</td>' | |
| else: | |
| html += '<td class="score-cell na">β</td>' | |
| html += '</tr>' | |
| # Individual metric rows | |
| for metric_name in leaderboard_metrics: | |
| html += f'<tr><td class="metric-name">{metric_name}</td>' | |
| # Get all scores for this metric | |
| metric_scores = {} | |
| for model_name in selected_models: | |
| if model_name in model_data_dict and leaderboard_name in model_data_dict[model_name]: | |
| results = model_data_dict[model_name][leaderboard_name].get("results", {}) | |
| metric_scores[model_name] = results.get(metric_name) | |
| valid_scores = [v for v in metric_scores.values() if v is not None] | |
| if valid_scores: | |
| max_score = max(valid_scores) | |
| min_score = min(valid_scores) | |
| score_range = max_score - min_score if max_score > min_score else 1 | |
| else: | |
| max_score = min_score = score_range = None | |
| for model_name in selected_models: | |
| score = metric_scores.get(model_name) | |
| if score is not None and score_range is not None: | |
| # Determine color class based on relative position | |
| if len(valid_scores) > 1: | |
| pct = (score - min_score) / score_range if score_range > 0 else 1 | |
| if score == max_score: | |
| cell_class = "best" | |
| elif pct >= 0.75: | |
| cell_class = "good" | |
| elif pct >= 0.5: | |
| cell_class = "mid" | |
| elif pct >= 0.25: | |
| cell_class = "low" | |
| else: | |
| cell_class = "worst" | |
| else: | |
| cell_class = "" | |
| html += f'<td class="score-cell {cell_class}">{score:.2f}</td>' | |
| else: | |
| html += '<td class="score-cell na">β</td>' | |
| html += '</tr>' | |
| html += '</tbody></table></div>' | |
| html += """ | |
| </div> | |
| </div> | |
| """ | |
| html += "</div>" | |
| return html | |