| import html |
| import json |
| import re |
| from pathlib import Path |
|
|
| import gradio as gr |
| import pandas as pd |
|
|
|
|
| ROOT = Path(__file__).parent |
| DATA_DIR = ROOT / "data" |
| LEADERBOARD_JSON_PATH = DATA_DIR / "leaderboard.json" |
| LEADERBOARD_CSV_PATH = DATA_DIR / "leaderboard_seed.csv" |
|
|
| MODEL_LINK_RE = re.compile(r"^\[(?P<name>.*)\]\((?P<url>.*)\)$") |
| DISPLAY_COLUMNS = [ |
| "Rank", |
| "Model", |
| "Organization", |
| "Input Config", |
| "Conciseness", |
| "Correctness", |
| "Completeness", |
| "F1-like", |
| "Informativeness", |
| "Date", |
| ] |
| NUMERIC_COLUMNS = ["Conciseness", "Correctness", "Completeness", "F1-like", "Informativeness"] |
|
|
| CUSTOM_CSS = """ |
| footer { display: none !important; } |
| .api-docs, .show-api, .built-with, [data-testid="api-info"] { display: none !important; } |
| .gradio-container { max-width: 100% !important; padding: 18px 24px 16px !important; } |
| #component-0 { max-width: 100% !important; } |
| .rpc-title h1 { margin-bottom: 4px !important; } |
| .rpc-title p { margin-top: 0 !important; color: #555; } |
| .rpc-links { |
| display: flex; |
| justify-content: center; |
| align-items: center; |
| gap: 8px; |
| flex-wrap: wrap; |
| margin: 4px 0 12px; |
| color: #4b5563; |
| } |
| .rpc-links a { color: #2563eb; text-decoration: none; } |
| .rpc-links a:hover { text-decoration: underline; } |
| .leaderboard-toolbar { |
| display: flex; |
| align-items: center; |
| justify-content: space-between; |
| gap: 12px; |
| margin: 8px 0 12px; |
| flex-wrap: wrap; |
| } |
| .config-filter { |
| display: inline-flex; |
| align-items: center; |
| gap: 4px; |
| padding: 3px; |
| border: 1px solid #e5e7eb; |
| border-radius: 8px; |
| background: #f8fafc; |
| } |
| .config-filter button { |
| border: 0; |
| background: transparent; |
| color: #4b5563; |
| cursor: pointer; |
| font-size: 13px; |
| font-weight: 600; |
| padding: 6px 10px; |
| border-radius: 6px; |
| } |
| .config-filter button.active { |
| background: #111827; |
| color: #ffffff; |
| } |
| .table-count { |
| color: #6b7280; |
| font-size: 13px; |
| } |
| .leaderboard-shell { |
| height: calc(100vh - 245px); |
| min-height: 460px; |
| max-height: 780px; |
| overflow: auto; |
| border: 1px solid #e5e7eb; |
| border-radius: 8px; |
| background: white; |
| } |
| .rpc-table { |
| width: 100%; |
| border-collapse: separate; |
| border-spacing: 0; |
| font-size: 14px; |
| } |
| .rpc-table th { |
| position: sticky; |
| top: 0; |
| z-index: 2; |
| background: #f8fafc; |
| color: #111827; |
| font-weight: 650; |
| text-align: left; |
| border-bottom: 1px solid #d1d5db; |
| padding: 9px 11px; |
| white-space: nowrap; |
| cursor: pointer; |
| user-select: none; |
| } |
| .rpc-table th::after { |
| content: "β"; |
| color: #9ca3af; |
| font-size: 11px; |
| margin-left: 6px; |
| } |
| .rpc-table th.sort-asc::after { content: "β"; color: #111827; } |
| .rpc-table th.sort-desc::after { content: "β"; color: #111827; } |
| .rpc-table td { |
| border-bottom: 1px solid #eef2f7; |
| padding: 9px 11px; |
| vertical-align: middle; |
| white-space: nowrap; |
| } |
| .rpc-table tbody tr:hover { background: #f9fafb; } |
| .rpc-table .num { text-align: right; font-variant-numeric: tabular-nums; } |
| .rpc-table .rank { width: 64px; text-align: right; color: #4b5563; } |
| .rpc-table .model { min-width: 210px; font-weight: 600; } |
| .rpc-table .org { min-width: 180px; } |
| .config-badge { |
| display: inline-flex; |
| align-items: center; |
| justify-content: center; |
| min-width: 58px; |
| padding: 3px 8px; |
| border-radius: 999px; |
| font-size: 12px; |
| font-weight: 700; |
| letter-spacing: 0.02em; |
| } |
| .config-text { |
| background: #e0f2fe; |
| color: #075985; |
| } |
| .config-visual { |
| background: #fef3c7; |
| color: #92400e; |
| } |
| .rpc-table a { color: #2563eb; text-decoration: none; } |
| .rpc-table a:hover { text-decoration: underline; } |
| .submit-panel { max-width: 980px; } |
| .submit-panel pre { border-radius: 8px; } |
| """ |
|
|
|
|
| TABLE_SCRIPT = """ |
| <script> |
| (function () { |
| const table = document.getElementById("rpc-leaderboard-table"); |
| if (!table) return; |
| |
| const tbody = table.querySelector("tbody"); |
| const headers = table.querySelectorAll("th[data-sort]"); |
| const filterButtons = document.querySelectorAll(".config-filter button"); |
| const countEl = document.getElementById("table-count"); |
| let activeConfig = "ALL"; |
| let sortColumn = "Informativeness"; |
| let sortDirection = "desc"; |
| |
| function parseValue(row, column) { |
| const cell = row.querySelector(`[data-col="${column}"]`); |
| if (!cell) return ""; |
| const raw = cell.getAttribute("data-value") || cell.textContent || ""; |
| if (["Rank", "Conciseness", "Correctness", "Completeness", "F1-like", "Informativeness"].includes(column)) { |
| const num = Number.parseFloat(raw); |
| return Number.isNaN(num) ? -Infinity : num; |
| } |
| if (column === "Date") { |
| const time = Date.parse(raw); |
| return Number.isNaN(time) ? 0 : time; |
| } |
| return raw.toLowerCase(); |
| } |
| |
| |
| function apply() { |
| const rows = Array.from(tbody.querySelectorAll("tr")); |
| const sorted = rows.slice().sort((a, b) => { |
| const av = parseValue(a, sortColumn); |
| const bv = parseValue(b, sortColumn); |
| if (av < bv) return sortDirection === "asc" ? -1 : 1; |
| if (av > bv) return sortDirection === "asc" ? 1 : -1; |
| return 0; |
| }); |
| |
| sorted.forEach(row => tbody.appendChild(row)); |
| |
| let shown = 0; |
| Array.from(tbody.querySelectorAll("tr")).forEach(row => { |
| const visible = activeConfig === "ALL" || row.dataset.config === activeConfig; |
| row.style.display = visible ? "" : "none"; |
| if (visible) { |
| shown += 1; |
| const rankCell = row.querySelector('[data-col="Rank"]'); |
| if (rankCell) { |
| rankCell.textContent = shown; |
| rankCell.setAttribute("data-value", String(shown)); |
| } |
| } |
| }); |
| |
| if (countEl) countEl.textContent = `${shown} entries`; |
| headers.forEach(header => { |
| header.classList.remove("sort-asc", "sort-desc"); |
| if (header.dataset.sort === sortColumn) { |
| header.classList.add(sortDirection === "asc" ? "sort-asc" : "sort-desc"); |
| } |
| }); |
| } |
| |
| headers.forEach(header => { |
| header.addEventListener("click", () => { |
| const column = header.dataset.sort; |
| if (sortColumn === column) { |
| sortDirection = sortDirection === "asc" ? "desc" : "asc"; |
| } else { |
| sortColumn = column; |
| sortDirection = ["Model", "Organization", "Input Config", "Date"].includes(column) ? "asc" : "desc"; |
| } |
| apply(); |
| }); |
| }); |
| |
| filterButtons.forEach(button => { |
| button.addEventListener("click", () => { |
| activeConfig = button.dataset.config; |
| filterButtons.forEach(item => item.classList.toggle("active", item === button)); |
| apply(); |
| }); |
| }); |
| |
| apply(); |
| })(); |
| </script> |
| """ |
|
|
| SUBMISSION_GUIDE = """ |
| <div class="submit-panel"> |
| |
| ### How to Submit |
| |
| 1. Fork <a href="https://huggingface.co/datasets/zai-org/RPC-Bench" target="_blank" rel="noopener noreferrer">this repository</a>. |
| 2. Create a new branch for your submission. |
| 3. Add your submission folder under |
| `submissions/<organization>__<model>__<input_config>/`. |
| 4. Open a Pull Request with the new submission folder. |
| |
| ### Submission Directory Requirements |
| |
| Each submission directory must contain the metadata and predictions for one |
| model/input configuration pair: |
| |
| ```text |
| <organization>__<model>__<input_config>/ |
| metadata.yaml |
| predictions.jsonl |
| generation_config.json # optional, recommended |
| artifacts/ # optional logs or prompt notes |
| ``` |
| |
| Use URL-safe directory names. Replace spaces, slashes, and special characters |
| with hyphens; keep `input_config` as `TEXT` or `VISUAL`. |
| |
| ### `metadata.yaml` |
| |
| ```yaml |
| model_name: "My Model" |
| organization: "My Org" |
| model_url: https://... # optional work link: paper, GitHub, model card, etc. |
| date: "2026-06-17" # model release date, not submission date |
| split: test |
| input_config: TEXT # TEXT or VISUAL |
| ``` |
| |
| ### `predictions.jsonl` |
| |
| Each line must be one JSON object: |
| |
| ```json |
| { |
| "id": "paper-id", |
| "part_idx": 1, |
| "question": "question text", |
| "category": "category", |
| "gen_answer": "model answer" |
| } |
| ``` |
| |
| `part_idx` is the question index in the current paper's `qa_pairs` list (`1` for the first item). `category` must match the corresponding item in `test.json`. |
| |
| ### Validation Rules |
| |
| Your submission will be validated before evaluation. To pass: |
| |
| - `metadata.yaml` must include `model_name`, `organization`, `date`, `split`, |
| and `input_config`. |
| - `model_url` is optional. |
| - `date` is the model release date, not the submission date. |
| - `split` must be `test`. |
| - `input_config` must be `TEXT` or `VISUAL`. |
| - `predictions.jsonl` must contain exactly one line for every QA item in |
| `test.json`. |
| - `part_idx` is the question index in the current paper's `qa_pairs` list |
| (`1` for the first item). |
| - `id`, `part_idx`, `question`, and `category` must exactly match the benchmark |
| item. |
| - `gen_answer` must be a string. |
| - For `Claim_Verification`, `gen_answer` must be exactly `True` or `False`. |
| |
| ### Submission Process |
| |
| 1. Open PR: add your folder under |
| `submissions/<organization>__<model>__<input_config>/`. |
| 2. Fix issues: if validation fails, update the PR with corrected files. |
| 3. Review: once validation passes, a maintainer reviews the submission. |
| 4. Evaluate: maintainers run the official evaluator in a controlled local |
| environment. |
| 5. Import: accepted aggregate results are imported to the leaderboard. |
| |
| </div> |
| """ |
|
|
|
|
| def _parse_markdown_link(value): |
| text = str(value).strip() |
| match = MODEL_LINK_RE.match(text) |
| if match: |
| return match.group("name"), match.group("url") |
| return text, "" |
|
|
|
|
| def _read_csv_leaderboard(): |
| df = pd.read_csv(LEADERBOARD_CSV_PATH) |
| if "Info" in df.columns and "Informativeness" not in df.columns: |
| df = df.rename(columns={"Info": "Informativeness"}) |
| names = [] |
| urls = [] |
| for value in df.get("Model", []): |
| name, url = _parse_markdown_link(value) |
| names.append(name) |
| urls.append(url) |
| if "Model" in df.columns: |
| df["Model"] = names |
| df["url"] = urls |
| for col in NUMERIC_COLUMNS: |
| if col in df.columns: |
| df[col] = pd.to_numeric(df[col], errors="coerce") |
| df = df.sort_values("Informativeness", ascending=False, na_position="last").reset_index(drop=True) |
| df.insert(0, "Rank", range(1, len(df) + 1)) |
| return df |
|
|
|
|
| def _read_json_leaderboard(): |
| with LEADERBOARD_JSON_PATH.open("r", encoding="utf-8") as f: |
| data = json.load(f) |
| rows = [] |
| for season in data.get("seasons", {}).values(): |
| for row in season.get("models", []): |
| rows.append({ |
| "Model": row.get("name", ""), |
| "url": row.get("url", ""), |
| "Organization": row.get("org", ""), |
| "Input Config": str(row.get("modality", "")).upper(), |
| "Conciseness": row.get("conciseness", 0), |
| "Correctness": row.get("correctness", 0), |
| "Completeness": row.get("completeness", 0), |
| "F1-like": row.get("f1_like", row.get("informativeness", 0)), |
| "Informativeness": row.get("informativeness", row.get("info", row.get("overall", 0))), |
| "Date": row.get("date", ""), |
| }) |
| df = pd.DataFrame(rows) |
| if df.empty: |
| return pd.DataFrame(columns=DISPLAY_COLUMNS + ["url"]) |
| df = df.sort_values("Informativeness", ascending=False, na_position="last").reset_index(drop=True) |
| df.insert(0, "Rank", range(1, len(df) + 1)) |
| return df |
|
|
|
|
| def load_leaderboard_table(): |
| if LEADERBOARD_CSV_PATH.exists(): |
| try: |
| return _read_csv_leaderboard() |
| except Exception: |
| pass |
| return _read_json_leaderboard() |
|
|
|
|
| def _format_cell(value, column): |
| if pd.isna(value): |
| return "" |
| if column in NUMERIC_COLUMNS: |
| return f"{float(value):.2f}" |
| return html.escape(str(value)) |
|
|
|
|
| def _render_input_config(value): |
| config = str(value).upper() |
| if config == "TEXT": |
| return '<span class="config-badge config-text">TEXT</span>' |
| if config == "VISUAL": |
| return '<span class="config-badge config-visual">VISUAL</span>' |
| return html.escape(config) |
|
|
|
|
| def render_leaderboard_html(): |
| df = load_leaderboard_table() |
| columns = [col for col in DISPLAY_COLUMNS if col in df.columns] |
|
|
| thead = "".join( |
| f'<th data-sort="{html.escape(col, quote=True)}">{html.escape(col)}</th>' |
| for col in columns |
| ) |
| body_rows = [] |
| for _, row in df.iterrows(): |
| cells = [] |
| config_value = str(row.get("Input Config", "")).upper() |
| for col in columns: |
| classes = [] |
| if col == "Rank": |
| classes.append("rank") |
| if col == "Model": |
| classes.append("model") |
| if col == "Organization": |
| classes.append("org") |
| if col in NUMERIC_COLUMNS or col == "Rank": |
| classes.append("num") |
| class_attr = f' class="{" ".join(classes)}"' if classes else "" |
| data_value = html.escape(str(row[col]), quote=True) |
| data_col = html.escape(col, quote=True) |
| if col == "Model" and row.get("url"): |
| text = html.escape(str(row[col])) |
| url = html.escape(str(row["url"]), quote=True) |
| value = f'<a href="{url}" target="_blank" rel="noopener noreferrer">{text}</a>' |
| elif col == "Input Config": |
| value = _render_input_config(row[col]) |
| else: |
| value = _format_cell(row[col], col) |
| cells.append(f'<td{class_attr} data-col="{data_col}" data-value="{data_value}">{value}</td>') |
| body_rows.append(f'<tr data-config="{html.escape(config_value, quote=True)}">' + "".join(cells) + "</tr>") |
|
|
| return f""" |
| <div class="leaderboard-toolbar"> |
| <div class="config-filter" aria-label="Input Config filter"> |
| <button type="button" class="active" data-config="ALL">All</button> |
| <button type="button" data-config="TEXT">TEXT</button> |
| <button type="button" data-config="VISUAL">VISUAL</button> |
| </div> |
| <div id="table-count" class="table-count">{len(df)} entries</div> |
| </div> |
| <div class="leaderboard-shell"> |
| <table id="rpc-leaderboard-table" class="rpc-table"> |
| <thead><tr>{thead}</tr></thead> |
| <tbody>{''.join(body_rows)}</tbody> |
| </table> |
| </div> |
| {TABLE_SCRIPT} |
| """ |
|
|
|
|
|
|
| with gr.Blocks(title="RPC-Bench Leaderboard", analytics_enabled=False, css=CUSTOM_CSS) as demo: |
| gr.Markdown( |
| """ |
| # RPC-Bench Leaderboard |
| |
| <div class="rpc-links"> |
| <span>π <a href="https://rpc-bench.github.io/" target="_blank" rel="noopener noreferrer">Project Page</a></span> |
| <span>β’</span> |
| <span>π <a href="https://arxiv.org/abs/2601.14289" target="_blank" rel="noopener noreferrer">Paper</a></span> |
| <span>β’</span> |
| <span>π€ <a href="https://huggingface.co/datasets/zai-org/RPC-Bench" target="_blank" rel="noopener noreferrer">Hugging Face</a></span> |
| <span>β’</span> |
| <span>π§ <a href="https://modelscope.cn/datasets/ZhipuAI/RPC-Bench" target="_blank" rel="noopener noreferrer">ModelScope</a></span> |
| </div> |
| """, |
| elem_classes=["rpc-title"], |
| ) |
|
|
| gr.Markdown(SUBMISSION_GUIDE) |
| gr.Markdown("### Leaderboard") |
| gr.HTML(render_leaderboard_html()) |
|
|
|
|
| if __name__ == "__main__": |
| demo.launch(show_api=False) |
|
|