|
|
import gradio as gr |
|
|
import matplotlib |
|
|
matplotlib.use("Agg") |
|
|
import matplotlib.pyplot as plt |
|
|
import pandas as pd |
|
|
|
|
|
from leaderboard import compute_leaderboard |
|
|
from rank_through_time import ( |
|
|
plot_rank_for_subdataset, |
|
|
plot_value_for_subdataset, |
|
|
) |
|
|
|
|
|
df = pd.read_csv("mock_evaluation_results.csv") |
|
|
|
|
|
ALL_METRICS = sorted(df["metric"].unique().tolist()) |
|
|
ALL_SUBDATASETS = sorted(df["subdataset"].unique().tolist()) |
|
|
ALL_MODELS = sorted(df["model"].unique().tolist()) |
|
|
|
|
|
|
|
|
def build_table(metric, subdataset, models): |
|
|
sub = df[df["metric"] == metric] |
|
|
if subdataset != "All": |
|
|
sub = sub[sub["subdataset"] == subdataset] |
|
|
if models: |
|
|
sub = sub[sub["model"].isin(models)] |
|
|
pivot = sub.pivot_table( |
|
|
index=["subdataset", "cutoff"], columns="model", values="value" |
|
|
) |
|
|
pivot = pivot.sort_index() |
|
|
pivot = pivot.reset_index() |
|
|
return pivot |
|
|
|
|
|
|
|
|
def build_plots(metric, subdataset): |
|
|
fig_rank = plot_rank_for_subdataset(df, metric, subdataset) |
|
|
fig_value = plot_value_for_subdataset(df, metric, subdataset) |
|
|
return fig_rank, fig_value |
|
|
|
|
|
|
|
|
CUSTOM_CSS = """\ |
|
|
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
|
|
LIGHT MODE β Ethereal Glass |
|
|
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */ |
|
|
|
|
|
/* ββ Page background with floating aurora βββββββββββββββββββ */ |
|
|
.gradio-container { |
|
|
background: |
|
|
radial-gradient(ellipse at 20% 0%, rgba(124, 58, 237, 0.15) 0%, transparent 50%), |
|
|
radial-gradient(ellipse at 80% 100%, rgba(59, 130, 246, 0.12) 0%, transparent 50%), |
|
|
radial-gradient(ellipse at 50% 50%, rgba(6, 182, 212, 0.08) 0%, transparent 60%), |
|
|
linear-gradient(160deg, #f5f3ff 0%, #eef2ff 40%, #f0fdfa 100%) !important; |
|
|
color: #1e1b4b !important; |
|
|
} |
|
|
|
|
|
/* ββ Title banner β gradient with glow ββββββββββββββββββββββ */ |
|
|
.title-banner { |
|
|
background: linear-gradient(120deg, #7c3aed, #3b82f6, #06b6d4) !important; |
|
|
padding: 20px 28px !important; |
|
|
border-radius: 20px; |
|
|
margin-bottom: 14px !important; |
|
|
box-shadow: |
|
|
0 8px 32px rgba(124, 58, 237, 0.3), |
|
|
0 0 60px rgba(59, 130, 246, 0.15), |
|
|
inset 0 1px 0 rgba(255, 255, 255, 0.2); |
|
|
border: 1px solid rgba(255, 255, 255, 0.2); |
|
|
position: relative; |
|
|
overflow: hidden; |
|
|
} |
|
|
.title-banner::after { |
|
|
content: ""; |
|
|
position: absolute; |
|
|
top: -50%; |
|
|
left: -50%; |
|
|
width: 200%; |
|
|
height: 200%; |
|
|
background: linear-gradient( |
|
|
45deg, |
|
|
transparent 40%, |
|
|
rgba(255, 255, 255, 0.08) 50%, |
|
|
transparent 60% |
|
|
); |
|
|
animation: shimmer 6s ease-in-out infinite; |
|
|
} |
|
|
@keyframes shimmer { |
|
|
0%, 100% { transform: translateX(-30%) translateY(-30%) rotate(0deg); } |
|
|
50% { transform: translateX(30%) translateY(30%) rotate(5deg); } |
|
|
} |
|
|
.title-banner h1 { |
|
|
color: #ffffff !important; |
|
|
margin: 0 !important; |
|
|
position: relative; |
|
|
z-index: 1; |
|
|
letter-spacing: -0.02em; |
|
|
} |
|
|
|
|
|
/* ββ Accent top bar β animated gradient βββββββββββββββββββββ */ |
|
|
.gradio-container::before { |
|
|
content: ""; |
|
|
display: block; |
|
|
height: 3px; |
|
|
background: linear-gradient(90deg, #7c3aed, #3b82f6, #06b6d4, #7c3aed); |
|
|
background-size: 200% 100%; |
|
|
animation: gradientSlide 4s linear infinite; |
|
|
margin: -16px -16px 16px -16px; |
|
|
} |
|
|
@keyframes gradientSlide { |
|
|
0% { background-position: 0% 50%; } |
|
|
100% { background-position: 200% 50%; } |
|
|
} |
|
|
|
|
|
/* ββ Header card β frosted glass ββββββββββββββββββββββββββββ */ |
|
|
.gradio-container > .main > .wrap > div:first-child { |
|
|
background: rgba(255, 255, 255, 0.55) !important; |
|
|
border: 1px solid rgba(124, 58, 237, 0.1); |
|
|
border-radius: 16px; |
|
|
padding: 24px 28px; |
|
|
margin-bottom: 14px; |
|
|
backdrop-filter: blur(20px) saturate(1.4); |
|
|
-webkit-backdrop-filter: blur(20px) saturate(1.4); |
|
|
box-shadow: |
|
|
0 4px 24px rgba(124, 58, 237, 0.08), |
|
|
inset 0 1px 0 rgba(255, 255, 255, 0.6); |
|
|
color: #1e1b4b !important; |
|
|
} |
|
|
|
|
|
/* ββ Tab buttons β pill glass βββββββββββββββββββββββββββββββ */ |
|
|
button.tab-nav-button { |
|
|
color: #4c1d95 !important; |
|
|
background: rgba(255, 255, 255, 0.5) !important; |
|
|
border-radius: 999px !important; |
|
|
border: 1px solid rgba(124, 58, 237, 0.12) !important; |
|
|
backdrop-filter: blur(8px); |
|
|
-webkit-backdrop-filter: blur(8px); |
|
|
transition: all 0.3s ease !important; |
|
|
box-shadow: 0 2px 8px rgba(124, 58, 237, 0.06); |
|
|
} |
|
|
button.tab-nav-button:hover { |
|
|
background: rgba(124, 58, 237, 0.08) !important; |
|
|
box-shadow: 0 4px 16px rgba(124, 58, 237, 0.12); |
|
|
transform: translateY(-1px); |
|
|
} |
|
|
button.tab-nav-button.selected { |
|
|
color: #ffffff !important; |
|
|
border-color: transparent !important; |
|
|
background-image: linear-gradient(120deg, #7c3aed, #3b82f6) !important; |
|
|
box-shadow: |
|
|
0 6px 24px rgba(124, 58, 237, 0.35), |
|
|
0 0 40px rgba(59, 130, 246, 0.1); |
|
|
} |
|
|
|
|
|
/* ββ Table β glass with depth βββββββββββββββββββββββββββββββ */ |
|
|
.table-wrap { |
|
|
border-radius: 16px !important; |
|
|
overflow: hidden; |
|
|
border: 1px solid rgba(124, 58, 237, 0.1) !important; |
|
|
box-shadow: 0 4px 24px rgba(124, 58, 237, 0.08); |
|
|
} |
|
|
.table-wrap thead th { |
|
|
background: linear-gradient(120deg, #4c1d95, #1e40af) !important; |
|
|
color: #ffffff !important; |
|
|
letter-spacing: 0.03em; |
|
|
} |
|
|
.table-wrap tbody tr { |
|
|
transition: all 0.2s ease; |
|
|
} |
|
|
.table-wrap tbody tr:hover { |
|
|
background: rgba(124, 58, 237, 0.06) !important; |
|
|
} |
|
|
|
|
|
/* ββ Dropdowns & form elements ββββββββββββββββββββββββββββββ */ |
|
|
.gr-dropdown, .multiselect-dropdown { |
|
|
border-radius: 12px !important; |
|
|
border: 1px solid rgba(124, 58, 237, 0.18) !important; |
|
|
background: rgba(245, 243, 255, 0.6) !important; |
|
|
backdrop-filter: blur(12px); |
|
|
-webkit-backdrop-filter: blur(12px); |
|
|
color: #1e1b4b !important; |
|
|
box-shadow: 0 2px 12px rgba(124, 58, 237, 0.06); |
|
|
} |
|
|
|
|
|
/* ββ Light mode: inputs βββββββββββββββββββββββββββββββββββββ */ |
|
|
.gradio-container input, |
|
|
.gradio-container textarea, |
|
|
.gradio-container select { |
|
|
background: rgba(245, 243, 255, 0.5) !important; |
|
|
border: 1px solid rgba(124, 58, 237, 0.15) !important; |
|
|
border-radius: 10px !important; |
|
|
color: #1e1b4b !important; |
|
|
} |
|
|
|
|
|
/* ββ Light mode: all panels/blocks transparent ββββββββββββββ */ |
|
|
.gr-group, |
|
|
.gr-box, |
|
|
.gr-panel, |
|
|
.gr-form, |
|
|
.gr-block, |
|
|
.block, |
|
|
.form, |
|
|
.panel, |
|
|
.tabitem, |
|
|
.tabitem > div, |
|
|
.tab-content, |
|
|
.gap, |
|
|
.gr-padded, |
|
|
.wrap, |
|
|
.dropdown-container, |
|
|
.secondary-wrap, |
|
|
.input-container { |
|
|
background: transparent !important; |
|
|
border: none !important; |
|
|
box-shadow: none !important; |
|
|
} |
|
|
|
|
|
/* ββ Light mode: tags / chips βββββββββββββββββββββββββββββββ */ |
|
|
.token, |
|
|
.token-remove, |
|
|
.tag, |
|
|
span.tag, |
|
|
.multiselect-token, |
|
|
.pill { |
|
|
background: rgba(124, 58, 237, 0.1) !important; |
|
|
border: 1px solid rgba(124, 58, 237, 0.25) !important; |
|
|
border-radius: 999px !important; |
|
|
color: #4c1d95 !important; |
|
|
backdrop-filter: blur(6px); |
|
|
-webkit-backdrop-filter: blur(6px); |
|
|
} |
|
|
.token:hover, |
|
|
.tag:hover, |
|
|
.multiselect-token:hover, |
|
|
.pill:hover { |
|
|
background: rgba(124, 58, 237, 0.18) !important; |
|
|
border-color: rgba(124, 58, 237, 0.4) !important; |
|
|
} |
|
|
|
|
|
/* ββ Light mode: dropdown lists when open βββββββββββββββββββ */ |
|
|
.dropdown-content, |
|
|
ul[role="listbox"], |
|
|
.options { |
|
|
background: rgba(255, 255, 255, 0.85) !important; |
|
|
border: 1px solid rgba(124, 58, 237, 0.15) !important; |
|
|
border-radius: 12px !important; |
|
|
backdrop-filter: blur(16px); |
|
|
-webkit-backdrop-filter: blur(16px); |
|
|
box-shadow: 0 8px 32px rgba(124, 58, 237, 0.1); |
|
|
} |
|
|
.dropdown-content li:hover, |
|
|
ul[role="listbox"] li:hover, |
|
|
.options li:hover { |
|
|
background: rgba(124, 58, 237, 0.08) !important; |
|
|
} |
|
|
|
|
|
/* ββ Light mode: labels βββββββββββββββββββββββββββββββββββββ */ |
|
|
label, |
|
|
.gr-block label, |
|
|
.label-text, |
|
|
.block-label { |
|
|
color: #4c1d95 !important; |
|
|
} |
|
|
|
|
|
/* ββ Tab content spacing ββββββββββββββββββββββββββββββββββββ */ |
|
|
.tabitem { |
|
|
padding-top: 14px; |
|
|
} |
|
|
|
|
|
/* βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ |
|
|
DARK MODE β Aurora Noir |
|
|
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ */ |
|
|
|
|
|
.dark .gradio-container { |
|
|
background: |
|
|
radial-gradient(ellipse at 25% 15%, rgba(124, 58, 237, 0.18) 0%, transparent 50%), |
|
|
radial-gradient(ellipse at 75% 70%, rgba(59, 130, 246, 0.14) 0%, transparent 50%), |
|
|
radial-gradient(ellipse at 50% 40%, rgba(6, 182, 212, 0.06) 0%, transparent 55%), |
|
|
linear-gradient(180deg, #110d24 0%, #0d0b1a 50%, #080614 100%) !important; |
|
|
color: #f1f5f9 !important; |
|
|
position: relative; |
|
|
} |
|
|
|
|
|
/* ββ Body glare effect ββββββββββββββββββββββββββββββββββββββ */ |
|
|
.dark .gradio-container::after { |
|
|
content: ""; |
|
|
position: fixed; |
|
|
top: -50%; |
|
|
left: -50%; |
|
|
width: 200%; |
|
|
height: 200%; |
|
|
background: linear-gradient( |
|
|
35deg, |
|
|
transparent 0%, |
|
|
transparent 42%, |
|
|
rgba(124, 58, 237, 0.04) 45%, |
|
|
rgba(255, 255, 255, 0.03) 50%, |
|
|
rgba(59, 130, 246, 0.04) 55%, |
|
|
transparent 58%, |
|
|
transparent 100% |
|
|
); |
|
|
animation: bodyGlare 10s ease-in-out infinite; |
|
|
pointer-events: none; |
|
|
z-index: 0; |
|
|
} |
|
|
@keyframes bodyGlare { |
|
|
0%, 100% { transform: translateX(-20%) translateY(-10%) rotate(-5deg); } |
|
|
50% { transform: translateX(20%) translateY(10%) rotate(5deg); } |
|
|
} |
|
|
|
|
|
/* ββ Force all text readable ββββββββββββββββββββββββββββββββ */ |
|
|
.dark .gradio-container, |
|
|
.dark .gradio-container *:not(.title-banner *):not(button.tab-nav-button) { |
|
|
color: #f1f5f9 !important; |
|
|
} |
|
|
.dark .gradio-container .prose, |
|
|
.dark .gradio-container .prose * { |
|
|
color: #e2e8f0 !important; |
|
|
} |
|
|
.dark .gradio-container .markdown-text, |
|
|
.dark .gradio-container .markdown-text * { |
|
|
color: #e2e8f0 !important; |
|
|
} |
|
|
.dark .gradio-container strong, |
|
|
.dark .gradio-container b { |
|
|
color: #ffffff !important; |
|
|
} |
|
|
.dark .gradio-container h2, |
|
|
.dark .gradio-container h3, |
|
|
.dark .gradio-container h4 { |
|
|
color: #ffffff !important; |
|
|
text-shadow: 0 0 30px rgba(124, 58, 237, 0.3); |
|
|
} |
|
|
.dark .gradio-container a { |
|
|
color: #93c5fd !important; |
|
|
} |
|
|
|
|
|
/* ββ All inputs β normal + focus + active βββββββββββββββββββ */ |
|
|
.dark .gradio-container input, |
|
|
.dark .gradio-container textarea, |
|
|
.dark .gradio-container select, |
|
|
.dark .gradio-container input[type="text"], |
|
|
.dark .gradio-container input[type="search"], |
|
|
.dark .gradio-container input[type="number"], |
|
|
.dark .gradio-container input:focus, |
|
|
.dark .gradio-container input:active, |
|
|
.dark .gradio-container textarea:focus, |
|
|
.dark .gradio-container textarea:active, |
|
|
.dark .gradio-container select:focus, |
|
|
.dark .gradio-container select:active { |
|
|
color: #f1f5f9 !important; |
|
|
background: rgba(10, 6, 32, 0.8) !important; |
|
|
border: 1px solid rgba(124, 58, 237, 0.25) !important; |
|
|
border-radius: 10px !important; |
|
|
outline: none !important; |
|
|
caret-color: #a78bfa !important; |
|
|
} |
|
|
|
|
|
/* ββ Title banner β frosted glass βββββββββββββββββββββββββββ */ |
|
|
.dark .title-banner { |
|
|
background: linear-gradient(120deg, rgba(124, 58, 237, 0), rgba(59, 130, 246, 0), rgba(34, 211, 238, 0)) !important; |
|
|
padding: 20px 28px !important; |
|
|
border-radius: 20px; |
|
|
margin-bottom: 14px !important; |
|
|
box-shadow: |
|
|
0 4px 16px rgba(0, 0, 0, 0.3), |
|
|
inset 0 1px 0 rgba(255, 255, 255, 0.15), |
|
|
inset 0 -1px 0 rgba(0, 0, 0, 0.1); |
|
|
border: 1px solid rgba(255, 255, 255, 0.1); |
|
|
backdrop-filter: blur(4px); |
|
|
-webkit-backdrop-filter: blur(20px); |
|
|
position: relative; |
|
|
z-index: 1; |
|
|
} |
|
|
.dark .title-banner h1 { |
|
|
color: #ffffff !important; |
|
|
} |
|
|
|
|
|
/* ββ Kill all block/panel backgrounds β seamless look βββββββ */ |
|
|
.dark .gr-group, |
|
|
.dark .gr-box, |
|
|
.dark .gr-panel, |
|
|
.dark .gr-form, |
|
|
.dark .gr-block, |
|
|
.dark .block, |
|
|
.dark .form, |
|
|
.dark .panel, |
|
|
.dark .gradio-container > .main, |
|
|
.dark .gradio-container > .main > .wrap, |
|
|
.dark .gradio-container > .main > .wrap > div, |
|
|
.dark .tabitem, |
|
|
.dark .tabitem > div, |
|
|
.dark .tabitem > .gr-group, |
|
|
.dark .tabitem > .gr-box, |
|
|
.dark .tab-content, |
|
|
.dark .gap, |
|
|
.dark .gr-padded { |
|
|
background: transparent !important; |
|
|
border: none !important; |
|
|
box-shadow: none !important; |
|
|
} |
|
|
|
|
|
/* ββ Header card β keep its glass style βββββββββββββββββββββ */ |
|
|
.dark .gradio-container > .main > .wrap > div:first-child { |
|
|
background: rgba(10, 6, 32, 0.6) !important; |
|
|
border: 1px solid rgba(124, 58, 237, 0.15) !important; |
|
|
border-radius: 16px !important; |
|
|
backdrop-filter: blur(24px) saturate(1.2); |
|
|
-webkit-backdrop-filter: blur(24px) saturate(1.2); |
|
|
box-shadow: |
|
|
0 8px 32px rgba(0, 0, 0, 0.4), |
|
|
inset 0 1px 0 rgba(124, 58, 237, 0.1); |
|
|
position: relative; |
|
|
z-index: 1; |
|
|
} |
|
|
|
|
|
/* ββ Dropdowns β glass ββββββββββββββββββββββββββββββββββββββ */ |
|
|
.dark .gr-dropdown, |
|
|
.dark .multiselect-dropdown { |
|
|
background: rgba(10, 6, 32, 0.5) !important; |
|
|
border: 1px solid rgba(124, 58, 237, 0.2) !important; |
|
|
border-radius: 12px !important; |
|
|
backdrop-filter: blur(16px); |
|
|
-webkit-backdrop-filter: blur(16px); |
|
|
} |
|
|
|
|
|
/* ββ Dropdown containers + select wrappers ββββββββββββββββββ */ |
|
|
.dark .wrap, |
|
|
.dark .dropdown-container, |
|
|
.dark .secondary-wrap, |
|
|
.dark .input-container { |
|
|
background: transparent !important; |
|
|
border-color: rgba(124, 58, 237, 0.15) !important; |
|
|
} |
|
|
|
|
|
/* ββ Dropdown/listbox popups (scoped β no markdown lists) ββ */ |
|
|
.dark .gradio-container [role="listbox"], |
|
|
.dark .gradio-container [role="listbox"] [role="option"], |
|
|
.dark .gradio-container .dropdown-content, |
|
|
.dark .gradio-container .dropdown-content li { |
|
|
background: rgba(10, 6, 32, 0.95) !important; |
|
|
color: #f1f5f9 !important; |
|
|
} |
|
|
.dark .gradio-container [role="listbox"], |
|
|
.dark .gradio-container .dropdown-content { |
|
|
border: 1px solid rgba(124, 58, 237, 0.25) !important; |
|
|
border-radius: 12px !important; |
|
|
backdrop-filter: blur(20px); |
|
|
-webkit-backdrop-filter: blur(20px); |
|
|
box-shadow: 0 12px 40px rgba(0, 0, 0, 0.6); |
|
|
} |
|
|
|
|
|
/* ββ Dropdown items: hover + selected βββββββββββββββββββββββ */ |
|
|
.dark .gradio-container [role="option"]:hover, |
|
|
.dark .gradio-container .dropdown-content li:hover { |
|
|
background: rgba(124, 58, 237, 0.2) !important; |
|
|
color: #ffffff !important; |
|
|
} |
|
|
.dark .gradio-container [role="option"][aria-selected="true"], |
|
|
.dark .gradio-container .dropdown-content li.selected { |
|
|
background: rgba(124, 58, 237, 0.35) !important; |
|
|
color: #ffffff !important; |
|
|
} |
|
|
|
|
|
/* ββ Checkmarks and icons inside dropdowns ββββββββββββββββββ */ |
|
|
.dark .gradio-container [role="option"] svg, |
|
|
.dark .gradio-container .dropdown-content li svg { |
|
|
color: #a78bfa !important; |
|
|
fill: #a78bfa !important; |
|
|
} |
|
|
|
|
|
/* ββ Tags / chips (model selectors) βββββββββββββββββββββββββ */ |
|
|
.dark .token, |
|
|
.dark .token-remove, |
|
|
.dark .tag, |
|
|
.dark span.tag, |
|
|
.dark .multiselect-token, |
|
|
.dark .pill { |
|
|
background: rgba(124, 58, 237, 0.2) !important; |
|
|
border: 1px solid rgba(124, 58, 237, 0.3) !important; |
|
|
border-radius: 999px !important; |
|
|
color: #e2e8f0 !important; |
|
|
backdrop-filter: blur(8px); |
|
|
-webkit-backdrop-filter: blur(8px); |
|
|
} |
|
|
.dark .token:hover, |
|
|
.dark .tag:hover, |
|
|
.dark .multiselect-token:hover, |
|
|
.dark .pill:hover { |
|
|
background: rgba(124, 58, 237, 0.3) !important; |
|
|
border-color: rgba(124, 58, 237, 0.5) !important; |
|
|
} |
|
|
.dark .token-remove:hover, |
|
|
.dark .remove-btn:hover { |
|
|
color: #f87171 !important; |
|
|
} |
|
|
|
|
|
/* ββ Label text above inputs ββββββββββββββββββββββββββββββββ */ |
|
|
.dark label, |
|
|
.dark .gr-block label, |
|
|
.dark .label-text, |
|
|
.dark .block-label { |
|
|
color: #cbd5e1 !important; |
|
|
} |
|
|
|
|
|
/* ββ Tab buttons β glass pills ββββββββββββββββββββββββββββββ */ |
|
|
.dark button.tab-nav-button { |
|
|
color: #cbd5e1 !important; |
|
|
background: rgba(10, 6, 32, 0.5) !important; |
|
|
border-radius: 999px !important; |
|
|
border: 1px solid rgba(124, 58, 237, 0.15) !important; |
|
|
backdrop-filter: blur(12px); |
|
|
-webkit-backdrop-filter: blur(12px); |
|
|
transition: all 0.3s ease !important; |
|
|
position: relative; |
|
|
z-index: 1; |
|
|
} |
|
|
.dark button.tab-nav-button:hover { |
|
|
background: rgba(124, 58, 237, 0.12) !important; |
|
|
box-shadow: 0 4px 20px rgba(124, 58, 237, 0.15); |
|
|
border-color: rgba(124, 58, 237, 0.3) !important; |
|
|
} |
|
|
.dark button.tab-nav-button.selected { |
|
|
color: #ffffff !important; |
|
|
border-color: transparent !important; |
|
|
background-image: linear-gradient(120deg, rgba(124, 58, 237, 1), rgba(59, 130, 246, 1)) !important; |
|
|
box-shadow: |
|
|
0 10px 30px rgba(37, 99, 235, 0.5), |
|
|
0 0 50px rgba(124, 58, 237, 0.15); |
|
|
} |
|
|
|
|
|
/* ββ Table β aurora glass βββββββββββββββββββββββββββββββββββ */ |
|
|
.dark .table-wrap { |
|
|
border-radius: 16px !important; |
|
|
overflow: hidden; |
|
|
border: 1px solid rgba(124, 58, 237, 0.2) !important; |
|
|
box-shadow: |
|
|
0 8px 40px rgba(0, 0, 0, 0.5), |
|
|
0 0 60px rgba(124, 58, 237, 0.08); |
|
|
position: relative; |
|
|
z-index: 1; |
|
|
} |
|
|
.dark .table-wrap thead th { |
|
|
background: linear-gradient(120deg, rgba(46, 16, 101, 0.9), rgba(30, 58, 138, 0.9)) !important; |
|
|
color: #ffffff !important; |
|
|
border-bottom: 1px solid rgba(124, 58, 237, 0.3) !important; |
|
|
letter-spacing: 0.03em; |
|
|
text-shadow: 0 0 10px rgba(124, 58, 237, 0.4); |
|
|
backdrop-filter: blur(12px); |
|
|
} |
|
|
.dark .table-wrap tbody tr { |
|
|
background: rgba(0, 0, 0, 0.35) !important; |
|
|
color: #f1f5f9 !important; |
|
|
transition: all 0.25s ease; |
|
|
} |
|
|
.dark .table-wrap tbody tr:nth-child(even) { |
|
|
background: rgba(10, 6, 32, 0.4) !important; |
|
|
} |
|
|
.dark .table-wrap tbody tr:hover { |
|
|
background: linear-gradient(120deg, rgba(124, 58, 237, 0.12), rgba(59, 130, 246, 0.12)) !important; |
|
|
box-shadow: inset 0 0 30px rgba(124, 58, 237, 0.06); |
|
|
} |
|
|
.dark .table-wrap tbody td { |
|
|
color: #f1f5f9 !important; |
|
|
border-bottom: 1px solid rgba(124, 58, 237, 0.06) !important; |
|
|
} |
|
|
""" |
|
|
|
|
|
with gr.Blocks(title="Impermanent Leaderboard") as app: |
|
|
gr.Markdown("# Impermanent Leaderboard", elem_classes=["title-banner"]) |
|
|
gr.Markdown( |
|
|
"A **live** time-series forecasting benchmark designed to avoid data contamination. " |
|
|
"Automated pipelines continuously fetch fresh data from GitHub β including the number of " |
|
|
"open issues, opened PRs, pushes, and stars β ensuring that models are always evaluated " |
|
|
"on data they could not have seen during training." |
|
|
) |
|
|
|
|
|
cutoff_dates = sorted(df["cutoff"].unique()) |
|
|
n_dates = len(cutoff_dates) |
|
|
date_min, date_max = cutoff_dates[0], cutoff_dates[-1] |
|
|
statistical_models = ["zero_model", "seasonal_naive", "auto_arima", "auto_ets", "auto_lgbm"] |
|
|
foundation_models = ["chronos", "moirai", "timesfm"] |
|
|
all_model_names = statistical_models + foundation_models |
|
|
|
|
|
gr.Markdown(f"""\ |
|
|
## Datasets |
|
|
|
|
|
GitHub repositories are selected across several **buckets based on their number of stars**, |
|
|
yielding a mix of both intermittent (low-activity) and high-volume time series. |
|
|
For each bucket, an automated pipeline fetches four signals: |
|
|
|
|
|
- **Open issues** β number of issues opened |
|
|
- **Opened PRs** β number of pull requests opened |
|
|
- **Pushes** β number of push events |
|
|
- **Stars** β number of new stars |
|
|
|
|
|
Each signal is collected at both **daily** and **weekly** granularity. |
|
|
|
|
|
## Models |
|
|
|
|
|
The benchmark evaluates two families of forecasting methods: |
|
|
|
|
|
- **Statistical / ML models:** {", ".join(f"`{m}`" for m in statistical_models)} |
|
|
- **Foundation models:** {", ".join(f"`{m}`" for m in foundation_models)} |
|
|
|
|
|
## Evaluation dates |
|
|
|
|
|
Forecast methods are evaluated **every week** using rolling forecast evaluations. |
|
|
Currently **{n_dates} evaluations** are available, from **{date_min}** to **{date_max}**. |
|
|
""") |
|
|
|
|
|
with gr.Tab("Leaderboard π"): |
|
|
lb = compute_leaderboard(df) |
|
|
gr.Dataframe( |
|
|
value=lb, |
|
|
|
|
|
interactive=False, |
|
|
headers=[f"**{c}**" for c in lb.columns], |
|
|
) |
|
|
|
|
|
with gr.Tab("Results over time π"): |
|
|
with gr.Row(): |
|
|
time_metric_dd = gr.Dropdown( |
|
|
choices=ALL_METRICS, |
|
|
value=ALL_METRICS[0], |
|
|
label="Metric", |
|
|
) |
|
|
time_subdataset_dd = gr.Dropdown( |
|
|
choices=ALL_SUBDATASETS, |
|
|
value=ALL_SUBDATASETS[0], |
|
|
label="Subdataset", |
|
|
) |
|
|
|
|
|
rank_plot = gr.Plot(label="Rank over time") |
|
|
value_plot = gr.Plot(label="Metric value over time") |
|
|
|
|
|
def update_plots(metric, subdataset): |
|
|
fig_rank, fig_value = build_plots(metric, subdataset) |
|
|
return fig_rank, fig_value |
|
|
|
|
|
app.load( |
|
|
fn=update_plots, |
|
|
inputs=[time_metric_dd, time_subdataset_dd], |
|
|
outputs=[rank_plot, value_plot], |
|
|
) |
|
|
|
|
|
for control in [time_metric_dd, time_subdataset_dd]: |
|
|
control.change( |
|
|
fn=update_plots, |
|
|
inputs=[time_metric_dd, time_subdataset_dd], |
|
|
outputs=[rank_plot, value_plot], |
|
|
) |
|
|
|
|
|
with gr.Tab("All results π"): |
|
|
with gr.Row(): |
|
|
metric_dd = gr.Dropdown( |
|
|
choices=ALL_METRICS, |
|
|
value=ALL_METRICS[0], |
|
|
label="Metric", |
|
|
) |
|
|
subdataset_dd = gr.Dropdown( |
|
|
choices=["All"] + ALL_SUBDATASETS, |
|
|
value="All", |
|
|
label="Subdataset", |
|
|
) |
|
|
models_dd = gr.Dropdown( |
|
|
choices=ALL_MODELS, |
|
|
value=ALL_MODELS, |
|
|
multiselect=True, |
|
|
label="Models", |
|
|
) |
|
|
|
|
|
results_table = gr.Dataframe( |
|
|
value=build_table(ALL_METRICS[0], "All", ALL_MODELS), |
|
|
label="Results", |
|
|
interactive=False, |
|
|
) |
|
|
|
|
|
for control in [metric_dd, subdataset_dd, models_dd]: |
|
|
control.change( |
|
|
fn=build_table, |
|
|
inputs=[metric_dd, subdataset_dd, models_dd], |
|
|
outputs=results_table, |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
app.launch(css=CUSTOM_CSS, ssr_mode=False) |
|
|
|