import html from pathlib import Path from datetime import datetime import pandas as pd import gradio as gr CSV_PATH = Path("leaderboard.csv") # Full breakdown columns (Appendix Table 6) COLS = [ "Model", "Score", "Completeness", "Grounding", "Success Rate", "Recovery Rate", "Flexibility", "Order", "Info Diversity", "Format", "Tradeoff", "Tool Calls", "# Turns", "Progress Tracking", "Goal Decomposition", ] PERCENT_COLS = { "Success Rate", "Recovery Rate", "Flexibility", "Order", "Info Diversity", "Format", "Tradeoff", } LABEL_MAP = { "Info Diversity": "Info Div.", "# Turns": "Turns", "Progress Tracking": "Progress", "Goal Decomposition": "Goal Decomp.", } # Light, Arena-like, high-contrast style (prevents "light bg + light text" issues) ARENA_CSS = r""" :root { color-scheme: light; } html, body { background: #f6f7fb !important; } /* Gradio theme tokens */ .gradio-container{ max-width: 1200px !important; margin: 0 auto !important; padding: 18px 16px 24px !important; --body-background-fill: #f6f7fb !important; --body-background-fill-hover: #f6f7fb !important; --body-text-color: #0f172a !important; --body-text-color-subdued: #334155 !important; --block-background-fill: #ffffff !important; --block-background-fill-hover: #ffffff !important; --block-border-color: #e5e7eb !important; --input-background-fill: #ffffff !important; --input-background-fill-hover: #ffffff !important; --input-border-color: #e2e8f0 !important; --input-border-color-focus: #93c5fd !important; --input-text-color: #0f172a !important; --input-placeholder-color: #94a3b8 !important; --button-secondary-background-fill: #ffffff !important; --button-secondary-background-fill-hover: #f8fafc !important; --button-secondary-border-color: #e2e8f0 !important; --button-secondary-text-color: #0f172a !important; --button-primary-background-fill: #2563eb !important; --button-primary-background-fill-hover: #1d4ed8 !important; --button-primary-border-color: #2563eb !important; --button-primary-text-color: #ffffff !important; --link-text-color: #2563eb !important; --link-text-color-active: #1d4ed8 !important; } /* Arena table card */ .arena-card{ background: #ffffff; border: 1px solid #e5e7eb; border-radius: 14px; box-shadow: 0 1px 2px rgba(16,24,40,0.06); overflow: hidden; } .arena-wrap{ width: 100%; overflow-x: auto; } table.arena-table{ width: 100%; min-width: 1300px; /* wide table, scrolls horizontally */ border-collapse: separate; border-spacing: 0; font-size: 13px; color: #0f172a; } /* IMPORTANT: override any global "prose table" borders */ table.arena-table th, table.arena-table td{ border: none !important; overflow: visible !important; text-overflow: clip !important; } table.arena-table thead th{ position: sticky; top: 0; z-index: 2; background: #f8fafc; color: #334155 !important; font-weight: 650; text-align: left; padding: 10px 12px; border-bottom: 1px solid #e2e8f0 !important; white-space: nowrap; } table.arena-table tbody td{ padding: 10px 12px; border-bottom: 1px solid #eef2f7 !important; white-space: nowrap; color: #0f172a !important; } table.arena-table tbody tr:nth-child(even){ background: #fbfdff; } table.arena-table tbody tr:hover{ background: #f1f5f9; } table.arena-table th.num, table.arena-table td.num{ text-align: right; font-variant-numeric: tabular-nums; } table.arena-table td.model{ font-weight: 650; } table.arena-table td.rank{ width: 52px; color: #64748b !important; } /* optional: keep Model column visible while horizontal scrolling */ table.arena-table thead th:first-child, table.arena-table tbody td:first-child{ position: sticky; left: 0; z-index: 3; background: #f8fafc; } table.arena-table tbody td:first-child{ background: #ffffff; } """ def _to_float(x): """Parse numeric and percent strings into float for sorting.""" if x is None: return float("nan") if isinstance(x, (int, float)) and not pd.isna(x): return float(x) s = str(x).strip() if not s: return float("nan") if s.endswith("%"): s = s[:-1].strip() s = s.replace(",", "") try: return float(s) except Exception: return float("nan") def load_df() -> pd.DataFrame: if not CSV_PATH.exists(): return pd.DataFrame(columns=COLS) df = pd.read_csv(CSV_PATH) for c in COLS: if c not in df.columns: df[c] = "" return df[COLS] def format_cell(col: str, val) -> str: if val is None or (isinstance(val, float) and pd.isna(val)): return "" s = str(val).strip() if col in PERCENT_COLS: return s f = _to_float(val) if pd.isna(f): return s return f"{f:.2f}" def prepare_df(query: str, sort_by: str, descending: bool) -> pd.DataFrame: df = load_df() if query: q = query.lower().strip() df = df[df["Model"].astype(str).str.lower().str.contains(q, na=False)] if sort_by in df.columns: df = df.assign(_s=df[sort_by].map(_to_float)) df = df.sort_values("_s", ascending=not descending, na_position="last").drop(columns=["_s"]) df = df.reset_index(drop=True) df.insert(0, "Rank", range(1, len(df) + 1)) return df def render_table(df: pd.DataFrame) -> str: if df.empty: return "
leaderboard.csv{(' ยท Last updated: ' + ts) if ts else ''}")
with gr.Accordion("Submit / Update", open=False):
gr.Markdown(
"- Open a PR editing `leaderboard.csv`.\n"
"- Include: model name, evaluation setting/commit hash, and the metrics.\n"
)
demo.launch()