wangruisi1
Update on table scheme
fb265c6
import gradio as gr
import pandas as pd
import json
from collections import OrderedDict
from src.about import (
CITATION_BUTTON_LABEL,
CITATION_BUTTON_TEXT,
EVALUATION_QUEUE_TEXT,
INTRODUCTION_TEXT,
LLM_BENCHMARKS_TEXT,
TITLE,
)
from src.display.css_html_js import custom_css
# ============================================================
# Static Leaderboard Data for VBVR-Bench
# ============================================================
# Column group definitions (ordered for display)
COLUMN_GROUPS = OrderedDict([
("Overall", ["Overall"]),
("Overall by Category", [
"Abst.(All)", "Know.(All)", "Perc.(All)", "Spat.(All)", "Trans.(All)",
]),
("In-Domain (ID)", ["Overall(In-Domain)"]),
("In-Domain by Category", [
"Abst.(ID)", "Know.(ID)", "Perc.(ID)", "Spat.(ID)", "Trans.(ID)",
]),
("Out-of-Domain (OOD)", ["Overall(Out-of-Domain)"]),
("Out-of-Domain by Category", [
"Abst.(OOD)", "Know.(OOD)", "Perc.(OOD)", "Spat.(OOD)", "Trans.(OOD)",
]),
])
# Default column groups to show (matching LaTeX table layout)
DEFAULT_GROUPS = [
"Overall",
"In-Domain (ID)",
"In-Domain by Category",
"Out-of-Domain (OOD)",
"Out-of-Domain by Category",
]
# Columns always shown regardless of group selection
ALWAYS_VISIBLE_COLS = ["Model", "Type"]
# ============================================================
# Column-to-color mapping (used by JS)
# ============================================================
COLUMN_COLORS = {
# Overall (dark amber)
"Overall": "rgba(232, 180, 58, 0.30)",
# Overall by Category (light amber)
"Abst.(All)": "rgba(242, 200, 90, 0.15)",
"Know.(All)": "rgba(242, 200, 90, 0.15)",
"Perc.(All)": "rgba(242, 200, 90, 0.15)",
"Spat.(All)": "rgba(242, 200, 90, 0.15)",
"Trans.(All)": "rgba(242, 200, 90, 0.15)",
# In-Domain Overall (dark green)
"Overall(In-Domain)": "rgba(82, 183, 120, 0.30)",
# In-Domain by Category (light green)
"Abst.(ID)": "rgba(110, 200, 145, 0.15)",
"Know.(ID)": "rgba(110, 200, 145, 0.15)",
"Perc.(ID)": "rgba(110, 200, 145, 0.15)",
"Spat.(ID)": "rgba(110, 200, 145, 0.15)",
"Trans.(ID)": "rgba(110, 200, 145, 0.15)",
# Out-of-Domain Overall (dark blue)
"Overall(Out-of-Domain)": "rgba(95, 150, 215, 0.30)",
# Out-of-Domain by Category (light blue)
"Abst.(OOD)": "rgba(125, 175, 228, 0.15)",
"Know.(OOD)": "rgba(125, 175, 228, 0.15)",
"Perc.(OOD)": "rgba(125, 175, 228, 0.15)",
"Spat.(OOD)": "rgba(125, 175, 228, 0.15)",
"Trans.(OOD)": "rgba(125, 175, 228, 0.15)",
}
# ============================================================
# Static model scores data
# ============================================================
# Model links mapping
MODEL_LINKS = {
"VBVR-Wan2.2": "https://huggingface.co/Video-Reason/VBVR-Wan2.2",
"Sora 2": "https://sora.chatgpt.com/",
"Veo 3.1": "https://aistudio.google.com/models/veo-3",
"Runway Gen-4 Turbo": "https://runwayml.com/research/introducing-runway-gen-4",
"Wan2.2-I2V-A14B": "https://huggingface.co/Wan-AI/Wan2.2-I2V-A14B-Diffusers",
"Kling 2.6": "https://app.klingai.com/global/quickstart/klingai-video-26-audio-user-guide",
"LTX-2": "https://huggingface.co/Lightricks/LTX-2",
"CogVideoX1.5-5B-I2V": "https://huggingface.co/zai-org/CogVideoX1.5-5B-I2V",
"HunyuanVideo-I2V": "https://huggingface.co/tencent/HunyuanVideo-I2V",
}
def make_model_link(model_name):
"""Create a clickable HTML link for a model if URL exists."""
if model_name in MODEL_LINKS:
return f'<a href="{MODEL_LINKS[model_name]}" target="_blank">{model_name}</a>'
return model_name
MODELS_DATA = [
{
"Model": "Human",
"Type": "πŸ‘€ Reference",
"Overall": 0.974, "Overall(In-Domain)": 0.960, "Overall(Out-of-Domain)": 0.988,
"Abst.(All)": 0.947, "Know.(All)": 0.972, "Perc.(All)": 0.994, "Spat.(All)": 0.969, "Trans.(All)": 0.981,
"Abst.(ID)": 0.919, "Know.(ID)": 0.956, "Perc.(ID)": 1.000, "Spat.(ID)": 0.950, "Trans.(ID)": 1.000,
"Abst.(OOD)": 1.000, "Know.(OOD)": 1.000, "Perc.(OOD)": 0.990, "Spat.(OOD)": 1.000, "Trans.(OOD)": 0.970,
},
# ---- Open-source Models ----
{
"Model": "CogVideoX1.5-5B-I2V",
"Type": "🟒 Open-source",
"Overall": 0.2727, "Overall(In-Domain)": 0.2831, "Overall(Out-of-Domain)": 0.2623,
"Abst.(All)": 0.2548, "Know.(All)": 0.2952, "Perc.(All)": 0.2525, "Spat.(All)": 0.2996, "Trans.(All)": 0.2903,
"Abst.(ID)": 0.2408, "Know.(ID)": 0.3285, "Perc.(ID)": 0.2567, "Spat.(ID)": 0.3281, "Trans.(ID)": 0.3051,
"Abst.(OOD)": 0.2809, "Know.(OOD)": 0.2352, "Perc.(OOD)": 0.2501, "Spat.(OOD)": 0.2539, "Trans.(OOD)": 0.2824,
},
{
"Model": "HunyuanVideo-I2V",
"Type": "🟒 Open-source",
"Overall": 0.2726, "Overall(In-Domain)": 0.2799, "Overall(Out-of-Domain)": 0.2653,
"Abst.(All)": 0.1956, "Know.(All)": 0.3614, "Perc.(All)": 0.2910, "Spat.(All)": 0.2698, "Trans.(All)": 0.2733,
"Abst.(ID)": 0.2068, "Know.(ID)": 0.3573, "Perc.(ID)": 0.2933, "Spat.(ID)": 0.2802, "Trans.(ID)": 0.3160,
"Abst.(OOD)": 0.1747, "Know.(OOD)": 0.3688, "Perc.(OOD)": 0.2897, "Spat.(OOD)": 0.2530, "Trans.(OOD)": 0.2502,
},
{
"Model": "Wan2.2-I2V-A14B",
"Type": "🟒 Open-source",
"Overall": 0.3714, "Overall(In-Domain)": 0.4125, "Overall(Out-of-Domain)": 0.3287,
"Abst.(All)": 0.4212, "Know.(All)": 0.3556, "Perc.(All)": 0.3710, "Spat.(All)": 0.3397, "Trans.(All)": 0.3465,
"Abst.(ID)": 0.4301, "Know.(ID)": 0.3823, "Perc.(ID)": 0.4147, "Spat.(ID)": 0.4043, "Trans.(ID)": 0.4192,
"Abst.(OOD)": 0.4046, "Know.(OOD)": 0.3077, "Perc.(OOD)": 0.3427, "Spat.(OOD)": 0.2364, "Trans.(OOD)": 0.3073,
},
{
"Model": "LTX-2",
"Type": "🟒 Open-source",
"Overall": 0.3129, "Overall(In-Domain)": 0.3287, "Overall(Out-of-Domain)": 0.2971,
"Abst.(All)": 0.2908, "Know.(All)": 0.3531, "Perc.(All)": 0.3200, "Spat.(All)": 0.2980, "Trans.(All)": 0.3093,
"Abst.(ID)": 0.3156, "Know.(ID)": 0.3621, "Perc.(ID)": 0.3257, "Spat.(ID)": 0.3399, "Trans.(ID)": 0.3060,
"Abst.(OOD)": 0.2444, "Know.(OOD)": 0.3369, "Perc.(OOD)": 0.3167, "Spat.(OOD)": 0.2308, "Trans.(OOD)": 0.3110,
},
# ---- Proprietary Models ----
{
"Model": "Runway Gen-4 Turbo",
"Type": "πŸ”΅ Proprietary",
"Overall": 0.4031, "Overall(In-Domain)": 0.3920, "Overall(Out-of-Domain)": 0.4141,
"Abst.(All)": 0.4370, "Know.(All)": 0.4165, "Perc.(All)": 0.4223, "Spat.(All)": 0.3357, "Trans.(All)": 0.3696,
"Abst.(ID)": 0.3956, "Know.(ID)": 0.4094, "Perc.(ID)": 0.4288, "Spat.(ID)": 0.3409, "Trans.(ID)": 0.3629,
"Abst.(OOD)": 0.5147, "Know.(OOD)": 0.4294, "Perc.(OOD)": 0.4185, "Spat.(OOD)": 0.3274, "Trans.(OOD)": 0.3733,
},
{
"Model": "Sora 2",
"Type": "πŸ”΅ Proprietary",
"Overall": 0.5457, "Overall(In-Domain)": 0.5691, "Overall(Out-of-Domain)": 0.5225,
"Abst.(All)": 0.5824, "Know.(All)": 0.4749, "Perc.(All)": 0.5458, "Spat.(All)": 0.5298, "Trans.(All)": 0.5640,
"Abst.(ID)": 0.6023, "Know.(ID)": 0.4767, "Perc.(ID)": 0.5810, "Spat.(ID)": 0.5720, "Trans.(ID)": 0.5967,
"Abst.(OOD)": 0.5462, "Know.(OOD)": 0.4715, "Perc.(OOD)": 0.5254, "Spat.(OOD)": 0.4623, "Trans.(OOD)": 0.5465,
},
{
"Model": "Kling 2.6",
"Type": "πŸ”΅ Proprietary",
"Overall": 0.3691, "Overall(In-Domain)": 0.4082, "Overall(Out-of-Domain)": 0.3300,
"Abst.(All)": 0.4866, "Know.(All)": 0.2556, "Perc.(All)": 0.3095, "Spat.(All)": 0.3504, "Trans.(All)": 0.4149,
"Abst.(ID)": 0.4647, "Know.(ID)": 0.3225, "Perc.(ID)": 0.3749, "Spat.(ID)": 0.3471, "Trans.(ID)": 0.5193,
"Abst.(OOD)": 0.5277, "Know.(OOD)": 0.1350, "Perc.(OOD)": 0.2717, "Spat.(OOD)": 0.3556, "Trans.(OOD)": 0.3588,
},
{
"Model": "Veo 3.1",
"Type": "πŸ”΅ Proprietary",
"Overall": 0.4800, "Overall(In-Domain)": 0.5307, "Overall(Out-of-Domain)": 0.4288,
"Abst.(All)": 0.5991, "Know.(All)": 0.4225, "Perc.(All)": 0.4568, "Spat.(All)": 0.4430, "Trans.(All)": 0.4413,
"Abst.(ID)": 0.6109, "Know.(ID)": 0.5032, "Perc.(ID)": 0.5196, "Spat.(ID)": 0.4443, "Trans.(ID)": 0.5103,
"Abst.(OOD)": 0.5770, "Know.(OOD)": 0.2772, "Perc.(OOD)": 0.4204, "Spat.(OOD)": 0.4406, "Trans.(OOD)": 0.4041,
},
# ---- Data Scaling Strong Baseline ----
{
"Model": "VBVR-Wan2.2",
"Type": "⭐ Strong Baseline",
"Overall": 0.6848, "Overall(In-Domain)": 0.7599, "Overall(Out-of-Domain)": 0.6097,
"Abst.(All)": 0.7394, "Know.(All)": 0.6864, "Perc.(All)": 0.6333, "Spat.(All)": 0.6960, "Trans.(All)": 0.6909,
"Abst.(ID)": 0.7240, "Know.(ID)": 0.7500, "Perc.(ID)": 0.7817, "Spat.(ID)": 0.7446, "Trans.(ID)": 0.8327,
"Abst.(OOD)": 0.7682, "Know.(OOD)": 0.5720, "Perc.(OOD)": 0.5474, "Spat.(OOD)": 0.6182, "Trans.(OOD)": 0.6145,
},
]
def build_full_dataframe():
"""Build the complete DataFrame with all columns, sorted by Overall descending."""
df = pd.DataFrame(MODELS_DATA)
# Ensure column order: always-visible cols first, then groups in defined order
all_cols = list(ALWAYS_VISIBLE_COLS)
for group_cols in COLUMN_GROUPS.values():
all_cols.extend(group_cols)
df = df[all_cols]
# Sort by Overall descending
df = df.sort_values("Overall", ascending=False).reset_index(drop=True)
# Round numeric columns to 3 decimal places for clean display
numeric_cols = df.select_dtypes(include="number").columns
df[numeric_cols] = df[numeric_cols].round(3)
# Add clickable links to model names
df["Model"] = df["Model"].apply(make_model_link)
return df
FULL_DF = build_full_dataframe()
def get_filtered_df(selected_groups):
"""Filter DataFrame columns based on selected column groups."""
if not selected_groups:
selected_groups = ["Overall"] # Always show at least Overall
cols = list(ALWAYS_VISIBLE_COLS)
for group_name, group_cols in COLUMN_GROUPS.items():
if group_name in selected_groups:
cols.extend(group_cols)
return FULL_DF[cols]
# ============================================================
# Build the JS that colors columns by reading header text.
# Passed via Gradio's js= parameter on demo.load so it runs
# reliably after the page is fully rendered.
# ============================================================
COLOR_MAP_JSON = json.dumps(COLUMN_COLORS)
COLORING_JS = f"""
() => {{
const COLOR_MAP = {COLOR_MAP_JSON};
function colorColumns() {{
const container = document.querySelector('#leaderboard-table');
if (!container) return;
// Gradio Dataframe can use <table> or a virtual grid.
// Try standard <table> first.
const table = container.querySelector('table');
if (table) {{
const headers = table.querySelectorAll('thead th, thead td');
const headerTexts = [];
headers.forEach(th => headerTexts.push(th.textContent.trim()));
// Color header cells
headers.forEach((th, i) => {{
const color = COLOR_MAP[headerTexts[i]];
if (color) th.style.backgroundColor = color;
}});
// Color body cells
table.querySelectorAll('tbody tr').forEach(row => {{
const cells = row.querySelectorAll('td');
cells.forEach((td, i) => {{
const color = COLOR_MAP[headerTexts[i]];
if (color) td.style.backgroundColor = color;
}});
}});
return;
}}
// Fallback: Gradio virtual/svelte table (div-based grid)
const headerRow = container.querySelector('.header-row, .headers, [class*="header"]');
if (!headerRow) return;
const headerCells = headerRow.querySelectorAll('[class*="cell"], th, div');
const headerTexts = [];
headerCells.forEach(c => headerTexts.push(c.textContent.trim()));
headerCells.forEach((c, i) => {{
const color = COLOR_MAP[headerTexts[i]];
if (color) c.style.backgroundColor = color;
}});
const bodyRows = container.querySelectorAll('.body .row, tbody tr, [class*="row"]:not([class*="header"])');
bodyRows.forEach(row => {{
const cells = row.querySelectorAll('[class*="cell"], td, div');
cells.forEach((td, i) => {{
const color = COLOR_MAP[headerTexts[i]];
if (color) td.style.backgroundColor = color;
}});
}});
}}
// Run immediately, then with delays to catch late renders
colorColumns();
setTimeout(colorColumns, 300);
setTimeout(colorColumns, 800);
setTimeout(colorColumns, 1500);
// Also observe DOM changes to re-color when columns are toggled
const target = document.querySelector('#leaderboard-table');
if (target) {{
const obs = new MutationObserver(() => {{
setTimeout(colorColumns, 50);
}});
obs.observe(target, {{ childList: true, subtree: true }});
}}
}}
"""
# ============================================================
# Gradio Interface
# ============================================================
demo = gr.Blocks(css=custom_css)
with demo:
gr.HTML(TITLE)
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
with gr.Tabs(elem_classes="tab-buttons") as tabs:
with gr.TabItem("πŸ… VBVR-Bench Leaderboard", elem_id="llm-benchmark-tab-table", id=0):
with gr.Row():
column_selector = gr.CheckboxGroup(
choices=list(COLUMN_GROUPS.keys()),
value=DEFAULT_GROUPS,
label="Select Column Groups to Display:",
interactive=True,
)
leaderboard_table = gr.Dataframe(
value=get_filtered_df(DEFAULT_GROUPS),
interactive=False,
elem_id="leaderboard-table",
datatype=["html"] + ["str"] * 20,
)
column_selector.change(
fn=get_filtered_df,
inputs=[column_selector],
outputs=[leaderboard_table],
)
with gr.TabItem("πŸ“ About", elem_id="llm-benchmark-tab-table", id=1):
gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")
with gr.TabItem("πŸš€ Submit", elem_id="llm-benchmark-tab-submit", id=2):
gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text")
with gr.Row():
with gr.Accordion("πŸ“™ Citation", open=False):
citation_button = gr.Textbox(
value=CITATION_BUTTON_TEXT,
label=CITATION_BUTTON_LABEL,
lines=20,
elem_id="citation-button",
show_copy_button=True,
)
# Use Gradio's js= parameter on load β€” this is the official way
# to run JS after the page is fully rendered
demo.load(fn=None, inputs=None, outputs=None, js=COLORING_JS)
demo.queue(default_concurrency_limit=40).launch()