import html import json import re from pathlib import Path import gradio as gr import pandas as pd ROOT = Path(__file__).parent DATA_DIR = ROOT / "data" LEADERBOARD_JSON_PATH = DATA_DIR / "leaderboard.json" LEADERBOARD_CSV_PATH = DATA_DIR / "leaderboard_seed.csv" MODEL_LINK_RE = re.compile(r"^\[(?P.*)\]\((?P.*)\)$") DISPLAY_COLUMNS = [ "Rank", "Model", "Organization", "Input Config", "Conciseness", "Correctness", "Completeness", "F1-like", "Informativeness", "Date", ] NUMERIC_COLUMNS = ["Conciseness", "Correctness", "Completeness", "F1-like", "Informativeness"] CUSTOM_CSS = """ footer { display: none !important; } .api-docs, .show-api, .built-with, [data-testid="api-info"] { display: none !important; } .gradio-container { max-width: 100% !important; padding: 18px 24px 16px !important; } #component-0 { max-width: 100% !important; } .rpc-title h1 { margin-bottom: 4px !important; } .rpc-title p { margin-top: 0 !important; color: #555; } .rpc-links { display: flex; justify-content: center; align-items: center; gap: 8px; flex-wrap: wrap; margin: 4px 0 12px; color: #4b5563; } .rpc-links a { color: #2563eb; text-decoration: none; } .rpc-links a:hover { text-decoration: underline; } .leaderboard-toolbar { display: flex; align-items: center; justify-content: space-between; gap: 12px; margin: 8px 0 12px; flex-wrap: wrap; } .config-filter { display: inline-flex; align-items: center; gap: 4px; padding: 3px; border: 1px solid #e5e7eb; border-radius: 8px; background: #f8fafc; } .config-filter button { border: 0; background: transparent; color: #4b5563; cursor: pointer; font-size: 13px; font-weight: 600; padding: 6px 10px; border-radius: 6px; } .config-filter button.active { background: #111827; color: #ffffff; } .table-count { color: #6b7280; font-size: 13px; } .leaderboard-shell { height: calc(100vh - 245px); min-height: 460px; max-height: 780px; overflow: auto; border: 1px solid #e5e7eb; border-radius: 8px; background: white; } .rpc-table { width: 100%; border-collapse: separate; border-spacing: 0; font-size: 14px; } .rpc-table th { position: sticky; top: 0; z-index: 2; background: #f8fafc; color: #111827; font-weight: 650; text-align: left; border-bottom: 1px solid #d1d5db; padding: 9px 11px; white-space: nowrap; cursor: pointer; user-select: none; } .rpc-table th::after { content: "↕"; color: #9ca3af; font-size: 11px; margin-left: 6px; } .rpc-table th.sort-asc::after { content: "↑"; color: #111827; } .rpc-table th.sort-desc::after { content: "↓"; color: #111827; } .rpc-table td { border-bottom: 1px solid #eef2f7; padding: 9px 11px; vertical-align: middle; white-space: nowrap; } .rpc-table tbody tr:hover { background: #f9fafb; } .rpc-table .num { text-align: right; font-variant-numeric: tabular-nums; } .rpc-table .rank { width: 64px; text-align: right; color: #4b5563; } .rpc-table .model { min-width: 210px; font-weight: 600; } .rpc-table .org { min-width: 180px; } .config-badge { display: inline-flex; align-items: center; justify-content: center; min-width: 58px; padding: 3px 8px; border-radius: 999px; font-size: 12px; font-weight: 700; letter-spacing: 0.02em; } .config-text { background: #e0f2fe; color: #075985; } .config-visual { background: #fef3c7; color: #92400e; } .rpc-table a { color: #2563eb; text-decoration: none; } .rpc-table a:hover { text-decoration: underline; } .submit-panel { max-width: 980px; } .submit-panel pre { border-radius: 8px; } """ TABLE_SCRIPT = """ """ SUBMISSION_GUIDE = """
### How to Submit 1. Fork this repository. 2. Create a new branch for your submission. 3. Add your submission folder under `submissions/____/`. 4. Open a Pull Request with the new submission folder. ### Submission Directory Requirements Each submission directory must contain the metadata and predictions for one model/input configuration pair: ```text ____/ metadata.yaml predictions.jsonl generation_config.json # optional, recommended artifacts/ # optional logs or prompt notes ``` Use URL-safe directory names. Replace spaces, slashes, and special characters with hyphens; keep `input_config` as `TEXT` or `VISUAL`. ### `metadata.yaml` ```yaml model_name: "My Model" organization: "My Org" model_url: https://... # optional work link: paper, GitHub, model card, etc. date: "2026-06-17" # model release date, not submission date split: test input_config: TEXT # TEXT or VISUAL ``` ### `predictions.jsonl` Each line must be one JSON object: ```json { "id": "paper-id", "part_idx": 1, "question": "question text", "category": "category", "gen_answer": "model answer" } ``` `part_idx` is the question index in the current paper's `qa_pairs` list (`1` for the first item). `category` must match the corresponding item in `test.json`. ### Validation Rules Your submission will be validated before evaluation. To pass: - `metadata.yaml` must include `model_name`, `organization`, `date`, `split`, and `input_config`. - `model_url` is optional. - `date` is the model release date, not the submission date. - `split` must be `test`. - `input_config` must be `TEXT` or `VISUAL`. - `predictions.jsonl` must contain exactly one line for every QA item in `test.json`. - `part_idx` is the question index in the current paper's `qa_pairs` list (`1` for the first item). - `id`, `part_idx`, `question`, and `category` must exactly match the benchmark item. - `gen_answer` must be a string. - For `Claim_Verification`, `gen_answer` must be exactly `True` or `False`. ### Submission Process 1. Open PR: add your folder under `submissions/____/`. 2. Fix issues: if validation fails, update the PR with corrected files. 3. Review: once validation passes, a maintainer reviews the submission. 4. Evaluate: maintainers run the official evaluator in a controlled local environment. 5. Import: accepted aggregate results are imported to the leaderboard.
""" def _parse_markdown_link(value): text = str(value).strip() match = MODEL_LINK_RE.match(text) if match: return match.group("name"), match.group("url") return text, "" def _read_csv_leaderboard(): df = pd.read_csv(LEADERBOARD_CSV_PATH) if "Info" in df.columns and "Informativeness" not in df.columns: df = df.rename(columns={"Info": "Informativeness"}) names = [] urls = [] for value in df.get("Model", []): name, url = _parse_markdown_link(value) names.append(name) urls.append(url) if "Model" in df.columns: df["Model"] = names df["url"] = urls for col in NUMERIC_COLUMNS: if col in df.columns: df[col] = pd.to_numeric(df[col], errors="coerce") df = df.sort_values("Informativeness", ascending=False, na_position="last").reset_index(drop=True) df.insert(0, "Rank", range(1, len(df) + 1)) return df def _read_json_leaderboard(): with LEADERBOARD_JSON_PATH.open("r", encoding="utf-8") as f: data = json.load(f) rows = [] for season in data.get("seasons", {}).values(): for row in season.get("models", []): rows.append({ "Model": row.get("name", ""), "url": row.get("url", ""), "Organization": row.get("org", ""), "Input Config": str(row.get("modality", "")).upper(), "Conciseness": row.get("conciseness", 0), "Correctness": row.get("correctness", 0), "Completeness": row.get("completeness", 0), "F1-like": row.get("f1_like", row.get("informativeness", 0)), "Informativeness": row.get("informativeness", row.get("info", row.get("overall", 0))), "Date": row.get("date", ""), }) df = pd.DataFrame(rows) if df.empty: return pd.DataFrame(columns=DISPLAY_COLUMNS + ["url"]) df = df.sort_values("Informativeness", ascending=False, na_position="last").reset_index(drop=True) df.insert(0, "Rank", range(1, len(df) + 1)) return df def load_leaderboard_table(): if LEADERBOARD_CSV_PATH.exists(): try: return _read_csv_leaderboard() except Exception: pass return _read_json_leaderboard() def _format_cell(value, column): if pd.isna(value): return "" if column in NUMERIC_COLUMNS: return f"{float(value):.2f}" return html.escape(str(value)) def _render_input_config(value): config = str(value).upper() if config == "TEXT": return 'TEXT' if config == "VISUAL": return 'VISUAL' return html.escape(config) def render_leaderboard_html(): df = load_leaderboard_table() columns = [col for col in DISPLAY_COLUMNS if col in df.columns] thead = "".join( f'{html.escape(col)}' for col in columns ) body_rows = [] for _, row in df.iterrows(): cells = [] config_value = str(row.get("Input Config", "")).upper() for col in columns: classes = [] if col == "Rank": classes.append("rank") if col == "Model": classes.append("model") if col == "Organization": classes.append("org") if col in NUMERIC_COLUMNS or col == "Rank": classes.append("num") class_attr = f' class="{" ".join(classes)}"' if classes else "" data_value = html.escape(str(row[col]), quote=True) data_col = html.escape(col, quote=True) if col == "Model" and row.get("url"): text = html.escape(str(row[col])) url = html.escape(str(row["url"]), quote=True) value = f'{text}' elif col == "Input Config": value = _render_input_config(row[col]) else: value = _format_cell(row[col], col) cells.append(f'{value}') body_rows.append(f'' + "".join(cells) + "") return f"""
{len(df)} entries
{thead}{''.join(body_rows)}
{TABLE_SCRIPT} """ with gr.Blocks(title="RPC-Bench Leaderboard", analytics_enabled=False, css=CUSTOM_CSS) as demo: gr.Markdown( """ # RPC-Bench Leaderboard """, elem_classes=["rpc-title"], ) gr.Markdown(SUBMISSION_GUIDE) gr.Markdown("### Leaderboard") gr.HTML(render_leaderboard_html()) if __name__ == "__main__": demo.launch(show_api=False)