import gradio as gr import pandas as pd import json from collections import OrderedDict from src.about import ( CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT, EVALUATION_QUEUE_TEXT, INTRODUCTION_TEXT, LLM_BENCHMARKS_TEXT, TITLE, ) from src.display.css_html_js import custom_css # ============================================================ # Static Leaderboard Data for VBVR-Bench # ============================================================ # Column group definitions (ordered for display) COLUMN_GROUPS = OrderedDict([ ("Overall", ["Overall"]), ("Overall by Category", [ "Abst.(All)", "Know.(All)", "Perc.(All)", "Spat.(All)", "Trans.(All)", ]), ("In-Domain (ID)", ["Overall(In-Domain)"]), ("In-Domain by Category", [ "Abst.(ID)", "Know.(ID)", "Perc.(ID)", "Spat.(ID)", "Trans.(ID)", ]), ("Out-of-Domain (OOD)", ["Overall(Out-of-Domain)"]), ("Out-of-Domain by Category", [ "Abst.(OOD)", "Know.(OOD)", "Perc.(OOD)", "Spat.(OOD)", "Trans.(OOD)", ]), ]) # Default column groups to show (matching LaTeX table layout) DEFAULT_GROUPS = [ "Overall", "In-Domain (ID)", "In-Domain by Category", "Out-of-Domain (OOD)", "Out-of-Domain by Category", ] # Columns always shown regardless of group selection ALWAYS_VISIBLE_COLS = ["Model", "Type"] # ============================================================ # Column-to-color mapping (used by JS) # ============================================================ COLUMN_COLORS = { # Overall (dark amber) "Overall": "rgba(232, 180, 58, 0.30)", # Overall by Category (light amber) "Abst.(All)": "rgba(242, 200, 90, 0.15)", "Know.(All)": "rgba(242, 200, 90, 0.15)", "Perc.(All)": "rgba(242, 200, 90, 0.15)", "Spat.(All)": "rgba(242, 200, 90, 0.15)", "Trans.(All)": "rgba(242, 200, 90, 0.15)", # In-Domain Overall (dark green) "Overall(In-Domain)": "rgba(82, 183, 120, 0.30)", # In-Domain by Category (light green) "Abst.(ID)": "rgba(110, 200, 145, 0.15)", "Know.(ID)": "rgba(110, 200, 145, 0.15)", "Perc.(ID)": "rgba(110, 200, 145, 0.15)", "Spat.(ID)": "rgba(110, 200, 145, 0.15)", "Trans.(ID)": "rgba(110, 200, 145, 0.15)", # Out-of-Domain Overall (dark blue) "Overall(Out-of-Domain)": "rgba(95, 150, 215, 0.30)", # Out-of-Domain by Category (light blue) "Abst.(OOD)": "rgba(125, 175, 228, 0.15)", "Know.(OOD)": "rgba(125, 175, 228, 0.15)", "Perc.(OOD)": "rgba(125, 175, 228, 0.15)", "Spat.(OOD)": "rgba(125, 175, 228, 0.15)", "Trans.(OOD)": "rgba(125, 175, 228, 0.15)", } # ============================================================ # Static model scores data # ============================================================ # Model links mapping MODEL_LINKS = { "VBVR-Wan2.2": "https://huggingface.co/Video-Reason/VBVR-Wan2.2", "Sora 2": "https://sora.chatgpt.com/", "Veo 3.1": "https://aistudio.google.com/models/veo-3", "Runway Gen-4 Turbo": "https://runwayml.com/research/introducing-runway-gen-4", "Wan2.2-I2V-A14B": "https://huggingface.co/Wan-AI/Wan2.2-I2V-A14B-Diffusers", "Kling 2.6": "https://app.klingai.com/global/quickstart/klingai-video-26-audio-user-guide", "LTX-2": "https://huggingface.co/Lightricks/LTX-2", "CogVideoX1.5-5B-I2V": "https://huggingface.co/zai-org/CogVideoX1.5-5B-I2V", "HunyuanVideo-I2V": "https://huggingface.co/tencent/HunyuanVideo-I2V", } def make_model_link(model_name): """Create a clickable HTML link for a model if URL exists.""" if model_name in MODEL_LINKS: return f'{model_name}' return model_name MODELS_DATA = [ { "Model": "Human", "Type": "👤 Reference", "Overall": 0.974, "Overall(In-Domain)": 0.960, "Overall(Out-of-Domain)": 0.988, "Abst.(All)": 0.947, "Know.(All)": 0.972, "Perc.(All)": 0.994, "Spat.(All)": 0.969, "Trans.(All)": 0.981, "Abst.(ID)": 0.919, "Know.(ID)": 0.956, "Perc.(ID)": 1.000, "Spat.(ID)": 0.950, "Trans.(ID)": 1.000, "Abst.(OOD)": 1.000, "Know.(OOD)": 1.000, "Perc.(OOD)": 0.990, "Spat.(OOD)": 1.000, "Trans.(OOD)": 0.970, }, # ---- Open-source Models ---- { "Model": "CogVideoX1.5-5B-I2V", "Type": "🟢 Open-source", "Overall": 0.2727, "Overall(In-Domain)": 0.2831, "Overall(Out-of-Domain)": 0.2623, "Abst.(All)": 0.2548, "Know.(All)": 0.2952, "Perc.(All)": 0.2525, "Spat.(All)": 0.2996, "Trans.(All)": 0.2903, "Abst.(ID)": 0.2408, "Know.(ID)": 0.3285, "Perc.(ID)": 0.2567, "Spat.(ID)": 0.3281, "Trans.(ID)": 0.3051, "Abst.(OOD)": 0.2809, "Know.(OOD)": 0.2352, "Perc.(OOD)": 0.2501, "Spat.(OOD)": 0.2539, "Trans.(OOD)": 0.2824, }, { "Model": "HunyuanVideo-I2V", "Type": "🟢 Open-source", "Overall": 0.2726, "Overall(In-Domain)": 0.2799, "Overall(Out-of-Domain)": 0.2653, "Abst.(All)": 0.1956, "Know.(All)": 0.3614, "Perc.(All)": 0.2910, "Spat.(All)": 0.2698, "Trans.(All)": 0.2733, "Abst.(ID)": 0.2068, "Know.(ID)": 0.3573, "Perc.(ID)": 0.2933, "Spat.(ID)": 0.2802, "Trans.(ID)": 0.3160, "Abst.(OOD)": 0.1747, "Know.(OOD)": 0.3688, "Perc.(OOD)": 0.2897, "Spat.(OOD)": 0.2530, "Trans.(OOD)": 0.2502, }, { "Model": "Wan2.2-I2V-A14B", "Type": "🟢 Open-source", "Overall": 0.3714, "Overall(In-Domain)": 0.4125, "Overall(Out-of-Domain)": 0.3287, "Abst.(All)": 0.4212, "Know.(All)": 0.3556, "Perc.(All)": 0.3710, "Spat.(All)": 0.3397, "Trans.(All)": 0.3465, "Abst.(ID)": 0.4301, "Know.(ID)": 0.3823, "Perc.(ID)": 0.4147, "Spat.(ID)": 0.4043, "Trans.(ID)": 0.4192, "Abst.(OOD)": 0.4046, "Know.(OOD)": 0.3077, "Perc.(OOD)": 0.3427, "Spat.(OOD)": 0.2364, "Trans.(OOD)": 0.3073, }, { "Model": "LTX-2", "Type": "🟢 Open-source", "Overall": 0.3129, "Overall(In-Domain)": 0.3287, "Overall(Out-of-Domain)": 0.2971, "Abst.(All)": 0.2908, "Know.(All)": 0.3531, "Perc.(All)": 0.3200, "Spat.(All)": 0.2980, "Trans.(All)": 0.3093, "Abst.(ID)": 0.3156, "Know.(ID)": 0.3621, "Perc.(ID)": 0.3257, "Spat.(ID)": 0.3399, "Trans.(ID)": 0.3060, "Abst.(OOD)": 0.2444, "Know.(OOD)": 0.3369, "Perc.(OOD)": 0.3167, "Spat.(OOD)": 0.2308, "Trans.(OOD)": 0.3110, }, # ---- Proprietary Models ---- { "Model": "Runway Gen-4 Turbo", "Type": "🔵 Proprietary", "Overall": 0.4031, "Overall(In-Domain)": 0.3920, "Overall(Out-of-Domain)": 0.4141, "Abst.(All)": 0.4370, "Know.(All)": 0.4165, "Perc.(All)": 0.4223, "Spat.(All)": 0.3357, "Trans.(All)": 0.3696, "Abst.(ID)": 0.3956, "Know.(ID)": 0.4094, "Perc.(ID)": 0.4288, "Spat.(ID)": 0.3409, "Trans.(ID)": 0.3629, "Abst.(OOD)": 0.5147, "Know.(OOD)": 0.4294, "Perc.(OOD)": 0.4185, "Spat.(OOD)": 0.3274, "Trans.(OOD)": 0.3733, }, { "Model": "Sora 2", "Type": "🔵 Proprietary", "Overall": 0.5457, "Overall(In-Domain)": 0.5691, "Overall(Out-of-Domain)": 0.5225, "Abst.(All)": 0.5824, "Know.(All)": 0.4749, "Perc.(All)": 0.5458, "Spat.(All)": 0.5298, "Trans.(All)": 0.5640, "Abst.(ID)": 0.6023, "Know.(ID)": 0.4767, "Perc.(ID)": 0.5810, "Spat.(ID)": 0.5720, "Trans.(ID)": 0.5967, "Abst.(OOD)": 0.5462, "Know.(OOD)": 0.4715, "Perc.(OOD)": 0.5254, "Spat.(OOD)": 0.4623, "Trans.(OOD)": 0.5465, }, { "Model": "Kling 2.6", "Type": "🔵 Proprietary", "Overall": 0.3691, "Overall(In-Domain)": 0.4082, "Overall(Out-of-Domain)": 0.3300, "Abst.(All)": 0.4866, "Know.(All)": 0.2556, "Perc.(All)": 0.3095, "Spat.(All)": 0.3504, "Trans.(All)": 0.4149, "Abst.(ID)": 0.4647, "Know.(ID)": 0.3225, "Perc.(ID)": 0.3749, "Spat.(ID)": 0.3471, "Trans.(ID)": 0.5193, "Abst.(OOD)": 0.5277, "Know.(OOD)": 0.1350, "Perc.(OOD)": 0.2717, "Spat.(OOD)": 0.3556, "Trans.(OOD)": 0.3588, }, { "Model": "Veo 3.1", "Type": "🔵 Proprietary", "Overall": 0.4800, "Overall(In-Domain)": 0.5307, "Overall(Out-of-Domain)": 0.4288, "Abst.(All)": 0.5991, "Know.(All)": 0.4225, "Perc.(All)": 0.4568, "Spat.(All)": 0.4430, "Trans.(All)": 0.4413, "Abst.(ID)": 0.6109, "Know.(ID)": 0.5032, "Perc.(ID)": 0.5196, "Spat.(ID)": 0.4443, "Trans.(ID)": 0.5103, "Abst.(OOD)": 0.5770, "Know.(OOD)": 0.2772, "Perc.(OOD)": 0.4204, "Spat.(OOD)": 0.4406, "Trans.(OOD)": 0.4041, }, # ---- Data Scaling Strong Baseline ---- { "Model": "VBVR-Wan2.2", "Type": "⭐ Strong Baseline", "Overall": 0.6848, "Overall(In-Domain)": 0.7599, "Overall(Out-of-Domain)": 0.6097, "Abst.(All)": 0.7394, "Know.(All)": 0.6864, "Perc.(All)": 0.6333, "Spat.(All)": 0.6960, "Trans.(All)": 0.6909, "Abst.(ID)": 0.7240, "Know.(ID)": 0.7500, "Perc.(ID)": 0.7817, "Spat.(ID)": 0.7446, "Trans.(ID)": 0.8327, "Abst.(OOD)": 0.7682, "Know.(OOD)": 0.5720, "Perc.(OOD)": 0.5474, "Spat.(OOD)": 0.6182, "Trans.(OOD)": 0.6145, }, ] def build_full_dataframe(): """Build the complete DataFrame with all columns, sorted by Overall descending.""" df = pd.DataFrame(MODELS_DATA) # Ensure column order: always-visible cols first, then groups in defined order all_cols = list(ALWAYS_VISIBLE_COLS) for group_cols in COLUMN_GROUPS.values(): all_cols.extend(group_cols) df = df[all_cols] # Sort by Overall descending df = df.sort_values("Overall", ascending=False).reset_index(drop=True) # Round numeric columns to 3 decimal places for clean display numeric_cols = df.select_dtypes(include="number").columns df[numeric_cols] = df[numeric_cols].round(3) # Add clickable links to model names df["Model"] = df["Model"].apply(make_model_link) return df FULL_DF = build_full_dataframe() def get_filtered_df(selected_groups): """Filter DataFrame columns based on selected column groups.""" if not selected_groups: selected_groups = ["Overall"] # Always show at least Overall cols = list(ALWAYS_VISIBLE_COLS) for group_name, group_cols in COLUMN_GROUPS.items(): if group_name in selected_groups: cols.extend(group_cols) return FULL_DF[cols] # ============================================================ # Build the JS that colors columns by reading header text. # Passed via Gradio's js= parameter on demo.load so it runs # reliably after the page is fully rendered. # ============================================================ COLOR_MAP_JSON = json.dumps(COLUMN_COLORS) COLORING_JS = f""" () => {{ const COLOR_MAP = {COLOR_MAP_JSON}; function colorColumns() {{ const container = document.querySelector('#leaderboard-table'); if (!container) return; // Gradio Dataframe can use or a virtual grid. // Try standard
first. const table = container.querySelector('table'); if (table) {{ const headers = table.querySelectorAll('thead th, thead td'); const headerTexts = []; headers.forEach(th => headerTexts.push(th.textContent.trim())); // Color header cells headers.forEach((th, i) => {{ const color = COLOR_MAP[headerTexts[i]]; if (color) th.style.backgroundColor = color; }}); // Color body cells table.querySelectorAll('tbody tr').forEach(row => {{ const cells = row.querySelectorAll('td'); cells.forEach((td, i) => {{ const color = COLOR_MAP[headerTexts[i]]; if (color) td.style.backgroundColor = color; }}); }}); return; }} // Fallback: Gradio virtual/svelte table (div-based grid) const headerRow = container.querySelector('.header-row, .headers, [class*="header"]'); if (!headerRow) return; const headerCells = headerRow.querySelectorAll('[class*="cell"], th, div'); const headerTexts = []; headerCells.forEach(c => headerTexts.push(c.textContent.trim())); headerCells.forEach((c, i) => {{ const color = COLOR_MAP[headerTexts[i]]; if (color) c.style.backgroundColor = color; }}); const bodyRows = container.querySelectorAll('.body .row, tbody tr, [class*="row"]:not([class*="header"])'); bodyRows.forEach(row => {{ const cells = row.querySelectorAll('[class*="cell"], td, div'); cells.forEach((td, i) => {{ const color = COLOR_MAP[headerTexts[i]]; if (color) td.style.backgroundColor = color; }}); }}); }} // Run immediately, then with delays to catch late renders colorColumns(); setTimeout(colorColumns, 300); setTimeout(colorColumns, 800); setTimeout(colorColumns, 1500); // Also observe DOM changes to re-color when columns are toggled const target = document.querySelector('#leaderboard-table'); if (target) {{ const obs = new MutationObserver(() => {{ setTimeout(colorColumns, 50); }}); obs.observe(target, {{ childList: true, subtree: true }}); }} }} """ # ============================================================ # Gradio Interface # ============================================================ demo = gr.Blocks(css=custom_css) with demo: gr.HTML(TITLE) gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") with gr.Tabs(elem_classes="tab-buttons") as tabs: with gr.TabItem("🏅 VBVR-Bench Leaderboard", elem_id="llm-benchmark-tab-table", id=0): with gr.Row(): column_selector = gr.CheckboxGroup( choices=list(COLUMN_GROUPS.keys()), value=DEFAULT_GROUPS, label="Select Column Groups to Display:", interactive=True, ) leaderboard_table = gr.Dataframe( value=get_filtered_df(DEFAULT_GROUPS), interactive=False, elem_id="leaderboard-table", datatype=["html"] + ["str"] * 20, ) column_selector.change( fn=get_filtered_df, inputs=[column_selector], outputs=[leaderboard_table], ) with gr.TabItem("📝 About", elem_id="llm-benchmark-tab-table", id=1): gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") with gr.TabItem("🚀 Submit", elem_id="llm-benchmark-tab-submit", id=2): gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text") with gr.Row(): with gr.Accordion("📙 Citation", open=False): citation_button = gr.Textbox( value=CITATION_BUTTON_TEXT, label=CITATION_BUTTON_LABEL, lines=20, elem_id="citation-button", show_copy_button=True, ) # Use Gradio's js= parameter on load — this is the official way # to run JS after the page is fully rendered demo.load(fn=None, inputs=None, outputs=None, js=COLORING_JS) demo.queue(default_concurrency_limit=40).launch()