import gradio as gr import matplotlib matplotlib.use("Agg") import matplotlib.pyplot as plt import pandas as pd from leaderboard import compute_leaderboard from rank_through_time import ( plot_rank_for_subdataset, plot_value_for_subdataset, ) df = pd.read_csv("mock_evaluation_results.csv") ALL_METRICS = sorted(df["metric"].unique().tolist()) ALL_SUBDATASETS = sorted(df["subdataset"].unique().tolist()) ALL_MODELS = sorted(df["model"].unique().tolist()) def build_table(metric, subdataset, models): sub = df[df["metric"] == metric] if subdataset != "All": sub = sub[sub["subdataset"] == subdataset] if models: sub = sub[sub["model"].isin(models)] pivot = sub.pivot_table( index=["subdataset", "cutoff"], columns="model", values="value" ) pivot = pivot.sort_index() pivot = pivot.reset_index() return pivot def build_plots(metric, subdataset): fig_rank = plot_rank_for_subdataset(df, metric, subdataset) fig_value = plot_value_for_subdataset(df, metric, subdataset) return fig_rank, fig_value CUSTOM_CSS = """\ /* ═══════════════════════════════════════════════════════════ LIGHT MODE — Ethereal Glass ═══════════════════════════════════════════════════════════ */ /* ── Page background with floating aurora ─────────────────── */ .gradio-container { background: radial-gradient(ellipse at 20% 0%, rgba(124, 58, 237, 0.15) 0%, transparent 50%), radial-gradient(ellipse at 80% 100%, rgba(59, 130, 246, 0.12) 0%, transparent 50%), radial-gradient(ellipse at 50% 50%, rgba(6, 182, 212, 0.08) 0%, transparent 60%), linear-gradient(160deg, #f5f3ff 0%, #eef2ff 40%, #f0fdfa 100%) !important; color: #1e1b4b !important; } /* ── Title banner — gradient with glow ────────────────────── */ .title-banner { background: linear-gradient(120deg, #7c3aed, #3b82f6, #06b6d4) !important; padding: 20px 28px !important; border-radius: 20px; margin-bottom: 14px !important; box-shadow: 0 8px 32px rgba(124, 58, 237, 0.3), 0 0 60px rgba(59, 130, 246, 0.15), inset 0 1px 0 rgba(255, 255, 255, 0.2); border: 1px solid rgba(255, 255, 255, 0.2); position: relative; overflow: hidden; } .title-banner::after { content: ""; position: absolute; top: -50%; left: -50%; width: 200%; height: 200%; background: linear-gradient( 45deg, transparent 40%, rgba(255, 255, 255, 0.08) 50%, transparent 60% ); animation: shimmer 6s ease-in-out infinite; } @keyframes shimmer { 0%, 100% { transform: translateX(-30%) translateY(-30%) rotate(0deg); } 50% { transform: translateX(30%) translateY(30%) rotate(5deg); } } .title-banner h1 { color: #ffffff !important; margin: 0 !important; position: relative; z-index: 1; letter-spacing: -0.02em; } /* ── Accent top bar — animated gradient ───────────────────── */ .gradio-container::before { content: ""; display: block; height: 3px; background: linear-gradient(90deg, #7c3aed, #3b82f6, #06b6d4, #7c3aed); background-size: 200% 100%; animation: gradientSlide 4s linear infinite; margin: -16px -16px 16px -16px; } @keyframes gradientSlide { 0% { background-position: 0% 50%; } 100% { background-position: 200% 50%; } } /* ── Header card — frosted glass ──────────────────────────── */ .gradio-container > .main > .wrap > div:first-child { background: rgba(255, 255, 255, 0.55) !important; border: 1px solid rgba(124, 58, 237, 0.1); border-radius: 16px; padding: 24px 28px; margin-bottom: 14px; backdrop-filter: blur(20px) saturate(1.4); -webkit-backdrop-filter: blur(20px) saturate(1.4); box-shadow: 0 4px 24px rgba(124, 58, 237, 0.08), inset 0 1px 0 rgba(255, 255, 255, 0.6); color: #1e1b4b !important; } /* ── Tab buttons — pill glass ─────────────────────────────── */ button.tab-nav-button { color: #4c1d95 !important; background: rgba(255, 255, 255, 0.5) !important; border-radius: 999px !important; border: 1px solid rgba(124, 58, 237, 0.12) !important; backdrop-filter: blur(8px); -webkit-backdrop-filter: blur(8px); transition: all 0.3s ease !important; box-shadow: 0 2px 8px rgba(124, 58, 237, 0.06); } button.tab-nav-button:hover { background: rgba(124, 58, 237, 0.08) !important; box-shadow: 0 4px 16px rgba(124, 58, 237, 0.12); transform: translateY(-1px); } button.tab-nav-button.selected { color: #ffffff !important; border-color: transparent !important; background-image: linear-gradient(120deg, #7c3aed, #3b82f6) !important; box-shadow: 0 6px 24px rgba(124, 58, 237, 0.35), 0 0 40px rgba(59, 130, 246, 0.1); } /* ── Table — glass with depth ─────────────────────────────── */ .table-wrap { border-radius: 16px !important; overflow: hidden; border: 1px solid rgba(124, 58, 237, 0.1) !important; box-shadow: 0 4px 24px rgba(124, 58, 237, 0.08); } .table-wrap thead th { background: linear-gradient(120deg, #4c1d95, #1e40af) !important; color: #ffffff !important; letter-spacing: 0.03em; } .table-wrap tbody tr { transition: all 0.2s ease; } .table-wrap tbody tr:hover { background: rgba(124, 58, 237, 0.06) !important; } /* ── Dropdowns & form elements ────────────────────────────── */ .gr-dropdown, .multiselect-dropdown { border-radius: 12px !important; border: 1px solid rgba(124, 58, 237, 0.18) !important; background: rgba(245, 243, 255, 0.6) !important; backdrop-filter: blur(12px); -webkit-backdrop-filter: blur(12px); color: #1e1b4b !important; box-shadow: 0 2px 12px rgba(124, 58, 237, 0.06); } /* ── Light mode: inputs ───────────────────────────────────── */ .gradio-container input, .gradio-container textarea, .gradio-container select { background: rgba(245, 243, 255, 0.5) !important; border: 1px solid rgba(124, 58, 237, 0.15) !important; border-radius: 10px !important; color: #1e1b4b !important; } /* ── Light mode: all panels/blocks transparent ────────────── */ .gr-group, .gr-box, .gr-panel, .gr-form, .gr-block, .block, .form, .panel, .tabitem, .tabitem > div, .tab-content, .gap, .gr-padded, .wrap, .dropdown-container, .secondary-wrap, .input-container { background: transparent !important; border: none !important; box-shadow: none !important; } /* ── Light mode: tags / chips ─────────────────────────────── */ .token, .token-remove, .tag, span.tag, .multiselect-token, .pill { background: rgba(124, 58, 237, 0.1) !important; border: 1px solid rgba(124, 58, 237, 0.25) !important; border-radius: 999px !important; color: #4c1d95 !important; backdrop-filter: blur(6px); -webkit-backdrop-filter: blur(6px); } .token:hover, .tag:hover, .multiselect-token:hover, .pill:hover { background: rgba(124, 58, 237, 0.18) !important; border-color: rgba(124, 58, 237, 0.4) !important; } /* ── Light mode: dropdown lists when open ─────────────────── */ .dropdown-content, ul[role="listbox"], .options { background: rgba(255, 255, 255, 0.85) !important; border: 1px solid rgba(124, 58, 237, 0.15) !important; border-radius: 12px !important; backdrop-filter: blur(16px); -webkit-backdrop-filter: blur(16px); box-shadow: 0 8px 32px rgba(124, 58, 237, 0.1); } .dropdown-content li:hover, ul[role="listbox"] li:hover, .options li:hover { background: rgba(124, 58, 237, 0.08) !important; } /* ── Light mode: labels ───────────────────────────────────── */ label, .gr-block label, .label-text, .block-label { color: #4c1d95 !important; } /* ── Tab content spacing ──────────────────────────────────── */ .tabitem { padding-top: 14px; } /* ═══════════════════════════════════════════════════════════ DARK MODE — Aurora Noir ═══════════════════════════════════════════════════════════ */ .dark .gradio-container { background: radial-gradient(ellipse at 25% 15%, rgba(124, 58, 237, 0.18) 0%, transparent 50%), radial-gradient(ellipse at 75% 70%, rgba(59, 130, 246, 0.14) 0%, transparent 50%), radial-gradient(ellipse at 50% 40%, rgba(6, 182, 212, 0.06) 0%, transparent 55%), linear-gradient(180deg, #110d24 0%, #0d0b1a 50%, #080614 100%) !important; color: #f1f5f9 !important; position: relative; } /* ── Body glare effect ────────────────────────────────────── */ .dark .gradio-container::after { content: ""; position: fixed; top: -50%; left: -50%; width: 200%; height: 200%; background: linear-gradient( 35deg, transparent 0%, transparent 42%, rgba(124, 58, 237, 0.04) 45%, rgba(255, 255, 255, 0.03) 50%, rgba(59, 130, 246, 0.04) 55%, transparent 58%, transparent 100% ); animation: bodyGlare 10s ease-in-out infinite; pointer-events: none; z-index: 0; } @keyframes bodyGlare { 0%, 100% { transform: translateX(-20%) translateY(-10%) rotate(-5deg); } 50% { transform: translateX(20%) translateY(10%) rotate(5deg); } } /* ── Force all text readable ──────────────────────────────── */ .dark .gradio-container, .dark .gradio-container *:not(.title-banner *):not(button.tab-nav-button) { color: #f1f5f9 !important; } .dark .gradio-container .prose, .dark .gradio-container .prose * { color: #e2e8f0 !important; } .dark .gradio-container .markdown-text, .dark .gradio-container .markdown-text * { color: #e2e8f0 !important; } .dark .gradio-container strong, .dark .gradio-container b { color: #ffffff !important; } .dark .gradio-container h2, .dark .gradio-container h3, .dark .gradio-container h4 { color: #ffffff !important; text-shadow: 0 0 30px rgba(124, 58, 237, 0.3); } .dark .gradio-container a { color: #93c5fd !important; } /* ── All inputs — normal + focus + active ─────────────────── */ .dark .gradio-container input, .dark .gradio-container textarea, .dark .gradio-container select, .dark .gradio-container input[type="text"], .dark .gradio-container input[type="search"], .dark .gradio-container input[type="number"], .dark .gradio-container input:focus, .dark .gradio-container input:active, .dark .gradio-container textarea:focus, .dark .gradio-container textarea:active, .dark .gradio-container select:focus, .dark .gradio-container select:active { color: #f1f5f9 !important; background: rgba(10, 6, 32, 0.8) !important; border: 1px solid rgba(124, 58, 237, 0.25) !important; border-radius: 10px !important; outline: none !important; caret-color: #a78bfa !important; } /* ── Title banner — frosted glass ─────────────────────────── */ .dark .title-banner { background: linear-gradient(120deg, rgba(124, 58, 237, 0), rgba(59, 130, 246, 0), rgba(34, 211, 238, 0)) !important; padding: 20px 28px !important; border-radius: 20px; margin-bottom: 14px !important; box-shadow: 0 4px 16px rgba(0, 0, 0, 0.3), inset 0 1px 0 rgba(255, 255, 255, 0.15), inset 0 -1px 0 rgba(0, 0, 0, 0.1); border: 1px solid rgba(255, 255, 255, 0.1); backdrop-filter: blur(4px); -webkit-backdrop-filter: blur(20px); position: relative; z-index: 1; } .dark .title-banner h1 { color: #ffffff !important; } /* ── Kill all block/panel backgrounds — seamless look ─────── */ .dark .gr-group, .dark .gr-box, .dark .gr-panel, .dark .gr-form, .dark .gr-block, .dark .block, .dark .form, .dark .panel, .dark .gradio-container > .main, .dark .gradio-container > .main > .wrap, .dark .gradio-container > .main > .wrap > div, .dark .tabitem, .dark .tabitem > div, .dark .tabitem > .gr-group, .dark .tabitem > .gr-box, .dark .tab-content, .dark .gap, .dark .gr-padded { background: transparent !important; border: none !important; box-shadow: none !important; } /* ── Header card — keep its glass style ───────────────────── */ .dark .gradio-container > .main > .wrap > div:first-child { background: rgba(10, 6, 32, 0.6) !important; border: 1px solid rgba(124, 58, 237, 0.15) !important; border-radius: 16px !important; backdrop-filter: blur(24px) saturate(1.2); -webkit-backdrop-filter: blur(24px) saturate(1.2); box-shadow: 0 8px 32px rgba(0, 0, 0, 0.4), inset 0 1px 0 rgba(124, 58, 237, 0.1); position: relative; z-index: 1; } /* ── Dropdowns — glass ────────────────────────────────────── */ .dark .gr-dropdown, .dark .multiselect-dropdown { background: rgba(10, 6, 32, 0.5) !important; border: 1px solid rgba(124, 58, 237, 0.2) !important; border-radius: 12px !important; backdrop-filter: blur(16px); -webkit-backdrop-filter: blur(16px); } /* ── Dropdown containers + select wrappers ────────────────── */ .dark .wrap, .dark .dropdown-container, .dark .secondary-wrap, .dark .input-container { background: transparent !important; border-color: rgba(124, 58, 237, 0.15) !important; } /* ── Dropdown/listbox popups (scoped — no markdown lists) ── */ .dark .gradio-container [role="listbox"], .dark .gradio-container [role="listbox"] [role="option"], .dark .gradio-container .dropdown-content, .dark .gradio-container .dropdown-content li { background: rgba(10, 6, 32, 0.95) !important; color: #f1f5f9 !important; } .dark .gradio-container [role="listbox"], .dark .gradio-container .dropdown-content { border: 1px solid rgba(124, 58, 237, 0.25) !important; border-radius: 12px !important; backdrop-filter: blur(20px); -webkit-backdrop-filter: blur(20px); box-shadow: 0 12px 40px rgba(0, 0, 0, 0.6); } /* ── Dropdown items: hover + selected ─────────────────────── */ .dark .gradio-container [role="option"]:hover, .dark .gradio-container .dropdown-content li:hover { background: rgba(124, 58, 237, 0.2) !important; color: #ffffff !important; } .dark .gradio-container [role="option"][aria-selected="true"], .dark .gradio-container .dropdown-content li.selected { background: rgba(124, 58, 237, 0.35) !important; color: #ffffff !important; } /* ── Checkmarks and icons inside dropdowns ────────────────── */ .dark .gradio-container [role="option"] svg, .dark .gradio-container .dropdown-content li svg { color: #a78bfa !important; fill: #a78bfa !important; } /* ── Tags / chips (model selectors) ───────────────────────── */ .dark .token, .dark .token-remove, .dark .tag, .dark span.tag, .dark .multiselect-token, .dark .pill { background: rgba(124, 58, 237, 0.2) !important; border: 1px solid rgba(124, 58, 237, 0.3) !important; border-radius: 999px !important; color: #e2e8f0 !important; backdrop-filter: blur(8px); -webkit-backdrop-filter: blur(8px); } .dark .token:hover, .dark .tag:hover, .dark .multiselect-token:hover, .dark .pill:hover { background: rgba(124, 58, 237, 0.3) !important; border-color: rgba(124, 58, 237, 0.5) !important; } .dark .token-remove:hover, .dark .remove-btn:hover { color: #f87171 !important; } /* ── Label text above inputs ──────────────────────────────── */ .dark label, .dark .gr-block label, .dark .label-text, .dark .block-label { color: #cbd5e1 !important; } /* ── Tab buttons — glass pills ────────────────────────────── */ .dark button.tab-nav-button { color: #cbd5e1 !important; background: rgba(10, 6, 32, 0.5) !important; border-radius: 999px !important; border: 1px solid rgba(124, 58, 237, 0.15) !important; backdrop-filter: blur(12px); -webkit-backdrop-filter: blur(12px); transition: all 0.3s ease !important; position: relative; z-index: 1; } .dark button.tab-nav-button:hover { background: rgba(124, 58, 237, 0.12) !important; box-shadow: 0 4px 20px rgba(124, 58, 237, 0.15); border-color: rgba(124, 58, 237, 0.3) !important; } .dark button.tab-nav-button.selected { color: #ffffff !important; border-color: transparent !important; background-image: linear-gradient(120deg, rgba(124, 58, 237, 1), rgba(59, 130, 246, 1)) !important; box-shadow: 0 10px 30px rgba(37, 99, 235, 0.5), 0 0 50px rgba(124, 58, 237, 0.15); } /* ── Table — aurora glass ─────────────────────────────────── */ .dark .table-wrap { border-radius: 16px !important; overflow: hidden; border: 1px solid rgba(124, 58, 237, 0.2) !important; box-shadow: 0 8px 40px rgba(0, 0, 0, 0.5), 0 0 60px rgba(124, 58, 237, 0.08); position: relative; z-index: 1; } .dark .table-wrap thead th { background: linear-gradient(120deg, rgba(46, 16, 101, 0.9), rgba(30, 58, 138, 0.9)) !important; color: #ffffff !important; border-bottom: 1px solid rgba(124, 58, 237, 0.3) !important; letter-spacing: 0.03em; text-shadow: 0 0 10px rgba(124, 58, 237, 0.4); backdrop-filter: blur(12px); } .dark .table-wrap tbody tr { background: rgba(0, 0, 0, 0.35) !important; color: #f1f5f9 !important; transition: all 0.25s ease; } .dark .table-wrap tbody tr:nth-child(even) { background: rgba(10, 6, 32, 0.4) !important; } .dark .table-wrap tbody tr:hover { background: linear-gradient(120deg, rgba(124, 58, 237, 0.12), rgba(59, 130, 246, 0.12)) !important; box-shadow: inset 0 0 30px rgba(124, 58, 237, 0.06); } .dark .table-wrap tbody td { color: #f1f5f9 !important; border-bottom: 1px solid rgba(124, 58, 237, 0.06) !important; } """ with gr.Blocks(title="Impermanent Leaderboard") as app: gr.Markdown("# Impermanent Leaderboard", elem_classes=["title-banner"]) gr.Markdown( "A **live** time-series forecasting benchmark designed to avoid data contamination. " "Automated pipelines continuously fetch fresh data from GitHub — including the number of " "open issues, opened PRs, pushes, and stars — ensuring that models are always evaluated " "on data they could not have seen during training." ) cutoff_dates = sorted(df["cutoff"].unique()) n_dates = len(cutoff_dates) date_min, date_max = cutoff_dates[0], cutoff_dates[-1] statistical_models = ["zero_model", "seasonal_naive", "auto_arima", "auto_ets", "auto_lgbm"] foundation_models = ["chronos", "moirai", "timesfm"] all_model_names = statistical_models + foundation_models gr.Markdown(f"""\ ## Datasets GitHub repositories are selected across several **buckets based on their number of stars**, yielding a mix of both intermittent (low-activity) and high-volume time series. For each bucket, an automated pipeline fetches four signals: - **Open issues** — number of issues opened - **Opened PRs** — number of pull requests opened - **Pushes** — number of push events - **Stars** — number of new stars Each signal is collected at both **daily** and **weekly** granularity. ## Models The benchmark evaluates two families of forecasting methods: - **Statistical / ML models:** {", ".join(f"`{m}`" for m in statistical_models)} - **Foundation models:** {", ".join(f"`{m}`" for m in foundation_models)} ## Evaluation dates Forecast methods are evaluated **every week** using rolling forecast evaluations. Currently **{n_dates} evaluations** are available, from **{date_min}** to **{date_max}**. """) with gr.Tab("Leaderboard 🏆"): lb = compute_leaderboard(df) gr.Dataframe( value=lb, #label="Leaderboard", interactive=False, headers=[f"**{c}**" for c in lb.columns], ) with gr.Tab("Results over time 📈"): with gr.Row(): time_metric_dd = gr.Dropdown( choices=ALL_METRICS, value=ALL_METRICS[0], label="Metric", ) time_subdataset_dd = gr.Dropdown( choices=ALL_SUBDATASETS, value=ALL_SUBDATASETS[0], label="Subdataset", ) rank_plot = gr.Plot(label="Rank over time") value_plot = gr.Plot(label="Metric value over time") def update_plots(metric, subdataset): fig_rank, fig_value = build_plots(metric, subdataset) return fig_rank, fig_value app.load( fn=update_plots, inputs=[time_metric_dd, time_subdataset_dd], outputs=[rank_plot, value_plot], ) for control in [time_metric_dd, time_subdataset_dd]: control.change( fn=update_plots, inputs=[time_metric_dd, time_subdataset_dd], outputs=[rank_plot, value_plot], ) with gr.Tab("All results 📋"): with gr.Row(): metric_dd = gr.Dropdown( choices=ALL_METRICS, value=ALL_METRICS[0], label="Metric", ) subdataset_dd = gr.Dropdown( choices=["All"] + ALL_SUBDATASETS, value="All", label="Subdataset", ) models_dd = gr.Dropdown( choices=ALL_MODELS, value=ALL_MODELS, multiselect=True, label="Models", ) results_table = gr.Dataframe( value=build_table(ALL_METRICS[0], "All", ALL_MODELS), label="Results", interactive=False, ) for control in [metric_dd, subdataset_dd, models_dd]: control.change( fn=build_table, inputs=[metric_dd, subdataset_dd, models_dd], outputs=results_table, ) if __name__ == "__main__": app.launch(css=CUSTOM_CSS, ssr_mode=False)