| | import gradio as gr |
| | import pandas as pd |
| | import json |
| | from collections import OrderedDict |
| |
|
| | from src.about import ( |
| | CITATION_BUTTON_LABEL, |
| | CITATION_BUTTON_TEXT, |
| | EVALUATION_QUEUE_TEXT, |
| | INTRODUCTION_TEXT, |
| | LLM_BENCHMARKS_TEXT, |
| | TITLE, |
| | ) |
| | from src.display.css_html_js import custom_css |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | |
| | COLUMN_GROUPS = OrderedDict([ |
| | ("Overall", ["Overall"]), |
| | ("Overall by Category", [ |
| | "Abst.(All)", "Know.(All)", "Perc.(All)", "Spat.(All)", "Trans.(All)", |
| | ]), |
| | ("In-Domain (ID)", ["Overall(In-Domain)"]), |
| | ("In-Domain by Category", [ |
| | "Abst.(ID)", "Know.(ID)", "Perc.(ID)", "Spat.(ID)", "Trans.(ID)", |
| | ]), |
| | ("Out-of-Domain (OOD)", ["Overall(Out-of-Domain)"]), |
| | ("Out-of-Domain by Category", [ |
| | "Abst.(OOD)", "Know.(OOD)", "Perc.(OOD)", "Spat.(OOD)", "Trans.(OOD)", |
| | ]), |
| | ]) |
| |
|
| | |
| | DEFAULT_GROUPS = [ |
| | "Overall", |
| | "In-Domain (ID)", |
| | "In-Domain by Category", |
| | "Out-of-Domain (OOD)", |
| | "Out-of-Domain by Category", |
| | ] |
| |
|
| | |
| | ALWAYS_VISIBLE_COLS = ["Model", "Type"] |
| |
|
| | |
| | |
| | |
| | COLUMN_COLORS = { |
| | |
| | "Overall": "rgba(232, 180, 58, 0.30)", |
| | |
| | "Abst.(All)": "rgba(242, 200, 90, 0.15)", |
| | "Know.(All)": "rgba(242, 200, 90, 0.15)", |
| | "Perc.(All)": "rgba(242, 200, 90, 0.15)", |
| | "Spat.(All)": "rgba(242, 200, 90, 0.15)", |
| | "Trans.(All)": "rgba(242, 200, 90, 0.15)", |
| | |
| | "Overall(In-Domain)": "rgba(82, 183, 120, 0.30)", |
| | |
| | "Abst.(ID)": "rgba(110, 200, 145, 0.15)", |
| | "Know.(ID)": "rgba(110, 200, 145, 0.15)", |
| | "Perc.(ID)": "rgba(110, 200, 145, 0.15)", |
| | "Spat.(ID)": "rgba(110, 200, 145, 0.15)", |
| | "Trans.(ID)": "rgba(110, 200, 145, 0.15)", |
| | |
| | "Overall(Out-of-Domain)": "rgba(95, 150, 215, 0.30)", |
| | |
| | "Abst.(OOD)": "rgba(125, 175, 228, 0.15)", |
| | "Know.(OOD)": "rgba(125, 175, 228, 0.15)", |
| | "Perc.(OOD)": "rgba(125, 175, 228, 0.15)", |
| | "Spat.(OOD)": "rgba(125, 175, 228, 0.15)", |
| | "Trans.(OOD)": "rgba(125, 175, 228, 0.15)", |
| | } |
| |
|
| | |
| | |
| | |
| |
|
| | |
| | MODEL_LINKS = { |
| | "VBVR-Wan2.2": "https://huggingface.co/Video-Reason/VBVR-Wan2.2", |
| | "Sora 2": "https://sora.chatgpt.com/", |
| | "Veo 3.1": "https://aistudio.google.com/models/veo-3", |
| | "Runway Gen-4 Turbo": "https://runwayml.com/research/introducing-runway-gen-4", |
| | "Wan2.2-I2V-A14B": "https://huggingface.co/Wan-AI/Wan2.2-I2V-A14B-Diffusers", |
| | "Kling 2.6": "https://app.klingai.com/global/quickstart/klingai-video-26-audio-user-guide", |
| | "LTX-2": "https://huggingface.co/Lightricks/LTX-2", |
| | "CogVideoX1.5-5B-I2V": "https://huggingface.co/zai-org/CogVideoX1.5-5B-I2V", |
| | "HunyuanVideo-I2V": "https://huggingface.co/tencent/HunyuanVideo-I2V", |
| | } |
| |
|
| | def make_model_link(model_name): |
| | """Create a clickable HTML link for a model if URL exists.""" |
| | if model_name in MODEL_LINKS: |
| | return f'<a href="{MODEL_LINKS[model_name]}" target="_blank">{model_name}</a>' |
| | return model_name |
| |
|
| | MODELS_DATA = [ |
| | { |
| | "Model": "Human", |
| | "Type": "π€ Reference", |
| | "Overall": 0.974, "Overall(In-Domain)": 0.960, "Overall(Out-of-Domain)": 0.988, |
| | "Abst.(All)": 0.947, "Know.(All)": 0.972, "Perc.(All)": 0.994, "Spat.(All)": 0.969, "Trans.(All)": 0.981, |
| | "Abst.(ID)": 0.919, "Know.(ID)": 0.956, "Perc.(ID)": 1.000, "Spat.(ID)": 0.950, "Trans.(ID)": 1.000, |
| | "Abst.(OOD)": 1.000, "Know.(OOD)": 1.000, "Perc.(OOD)": 0.990, "Spat.(OOD)": 1.000, "Trans.(OOD)": 0.970, |
| | }, |
| | |
| | { |
| | "Model": "CogVideoX1.5-5B-I2V", |
| | "Type": "π’ Open-source", |
| | "Overall": 0.2727, "Overall(In-Domain)": 0.2831, "Overall(Out-of-Domain)": 0.2623, |
| | "Abst.(All)": 0.2548, "Know.(All)": 0.2952, "Perc.(All)": 0.2525, "Spat.(All)": 0.2996, "Trans.(All)": 0.2903, |
| | "Abst.(ID)": 0.2408, "Know.(ID)": 0.3285, "Perc.(ID)": 0.2567, "Spat.(ID)": 0.3281, "Trans.(ID)": 0.3051, |
| | "Abst.(OOD)": 0.2809, "Know.(OOD)": 0.2352, "Perc.(OOD)": 0.2501, "Spat.(OOD)": 0.2539, "Trans.(OOD)": 0.2824, |
| | }, |
| | { |
| | "Model": "HunyuanVideo-I2V", |
| | "Type": "π’ Open-source", |
| | "Overall": 0.2726, "Overall(In-Domain)": 0.2799, "Overall(Out-of-Domain)": 0.2653, |
| | "Abst.(All)": 0.1956, "Know.(All)": 0.3614, "Perc.(All)": 0.2910, "Spat.(All)": 0.2698, "Trans.(All)": 0.2733, |
| | "Abst.(ID)": 0.2068, "Know.(ID)": 0.3573, "Perc.(ID)": 0.2933, "Spat.(ID)": 0.2802, "Trans.(ID)": 0.3160, |
| | "Abst.(OOD)": 0.1747, "Know.(OOD)": 0.3688, "Perc.(OOD)": 0.2897, "Spat.(OOD)": 0.2530, "Trans.(OOD)": 0.2502, |
| | }, |
| | { |
| | "Model": "Wan2.2-I2V-A14B", |
| | "Type": "π’ Open-source", |
| | "Overall": 0.3714, "Overall(In-Domain)": 0.4125, "Overall(Out-of-Domain)": 0.3287, |
| | "Abst.(All)": 0.4212, "Know.(All)": 0.3556, "Perc.(All)": 0.3710, "Spat.(All)": 0.3397, "Trans.(All)": 0.3465, |
| | "Abst.(ID)": 0.4301, "Know.(ID)": 0.3823, "Perc.(ID)": 0.4147, "Spat.(ID)": 0.4043, "Trans.(ID)": 0.4192, |
| | "Abst.(OOD)": 0.4046, "Know.(OOD)": 0.3077, "Perc.(OOD)": 0.3427, "Spat.(OOD)": 0.2364, "Trans.(OOD)": 0.3073, |
| | }, |
| | { |
| | "Model": "LTX-2", |
| | "Type": "π’ Open-source", |
| | "Overall": 0.3129, "Overall(In-Domain)": 0.3287, "Overall(Out-of-Domain)": 0.2971, |
| | "Abst.(All)": 0.2908, "Know.(All)": 0.3531, "Perc.(All)": 0.3200, "Spat.(All)": 0.2980, "Trans.(All)": 0.3093, |
| | "Abst.(ID)": 0.3156, "Know.(ID)": 0.3621, "Perc.(ID)": 0.3257, "Spat.(ID)": 0.3399, "Trans.(ID)": 0.3060, |
| | "Abst.(OOD)": 0.2444, "Know.(OOD)": 0.3369, "Perc.(OOD)": 0.3167, "Spat.(OOD)": 0.2308, "Trans.(OOD)": 0.3110, |
| | }, |
| | |
| | { |
| | "Model": "Runway Gen-4 Turbo", |
| | "Type": "π΅ Proprietary", |
| | "Overall": 0.4031, "Overall(In-Domain)": 0.3920, "Overall(Out-of-Domain)": 0.4141, |
| | "Abst.(All)": 0.4370, "Know.(All)": 0.4165, "Perc.(All)": 0.4223, "Spat.(All)": 0.3357, "Trans.(All)": 0.3696, |
| | "Abst.(ID)": 0.3956, "Know.(ID)": 0.4094, "Perc.(ID)": 0.4288, "Spat.(ID)": 0.3409, "Trans.(ID)": 0.3629, |
| | "Abst.(OOD)": 0.5147, "Know.(OOD)": 0.4294, "Perc.(OOD)": 0.4185, "Spat.(OOD)": 0.3274, "Trans.(OOD)": 0.3733, |
| | }, |
| | { |
| | "Model": "Sora 2", |
| | "Type": "π΅ Proprietary", |
| | "Overall": 0.5457, "Overall(In-Domain)": 0.5691, "Overall(Out-of-Domain)": 0.5225, |
| | "Abst.(All)": 0.5824, "Know.(All)": 0.4749, "Perc.(All)": 0.5458, "Spat.(All)": 0.5298, "Trans.(All)": 0.5640, |
| | "Abst.(ID)": 0.6023, "Know.(ID)": 0.4767, "Perc.(ID)": 0.5810, "Spat.(ID)": 0.5720, "Trans.(ID)": 0.5967, |
| | "Abst.(OOD)": 0.5462, "Know.(OOD)": 0.4715, "Perc.(OOD)": 0.5254, "Spat.(OOD)": 0.4623, "Trans.(OOD)": 0.5465, |
| | }, |
| | { |
| | "Model": "Kling 2.6", |
| | "Type": "π΅ Proprietary", |
| | "Overall": 0.3691, "Overall(In-Domain)": 0.4082, "Overall(Out-of-Domain)": 0.3300, |
| | "Abst.(All)": 0.4866, "Know.(All)": 0.2556, "Perc.(All)": 0.3095, "Spat.(All)": 0.3504, "Trans.(All)": 0.4149, |
| | "Abst.(ID)": 0.4647, "Know.(ID)": 0.3225, "Perc.(ID)": 0.3749, "Spat.(ID)": 0.3471, "Trans.(ID)": 0.5193, |
| | "Abst.(OOD)": 0.5277, "Know.(OOD)": 0.1350, "Perc.(OOD)": 0.2717, "Spat.(OOD)": 0.3556, "Trans.(OOD)": 0.3588, |
| | }, |
| | { |
| | "Model": "Veo 3.1", |
| | "Type": "π΅ Proprietary", |
| | "Overall": 0.4800, "Overall(In-Domain)": 0.5307, "Overall(Out-of-Domain)": 0.4288, |
| | "Abst.(All)": 0.5991, "Know.(All)": 0.4225, "Perc.(All)": 0.4568, "Spat.(All)": 0.4430, "Trans.(All)": 0.4413, |
| | "Abst.(ID)": 0.6109, "Know.(ID)": 0.5032, "Perc.(ID)": 0.5196, "Spat.(ID)": 0.4443, "Trans.(ID)": 0.5103, |
| | "Abst.(OOD)": 0.5770, "Know.(OOD)": 0.2772, "Perc.(OOD)": 0.4204, "Spat.(OOD)": 0.4406, "Trans.(OOD)": 0.4041, |
| | }, |
| | |
| | { |
| | "Model": "VBVR-Wan2.2", |
| | "Type": "β Strong Baseline", |
| | "Overall": 0.6848, "Overall(In-Domain)": 0.7599, "Overall(Out-of-Domain)": 0.6097, |
| | "Abst.(All)": 0.7394, "Know.(All)": 0.6864, "Perc.(All)": 0.6333, "Spat.(All)": 0.6960, "Trans.(All)": 0.6909, |
| | "Abst.(ID)": 0.7240, "Know.(ID)": 0.7500, "Perc.(ID)": 0.7817, "Spat.(ID)": 0.7446, "Trans.(ID)": 0.8327, |
| | "Abst.(OOD)": 0.7682, "Know.(OOD)": 0.5720, "Perc.(OOD)": 0.5474, "Spat.(OOD)": 0.6182, "Trans.(OOD)": 0.6145, |
| | }, |
| | ] |
| |
|
| |
|
| | def build_full_dataframe(): |
| | """Build the complete DataFrame with all columns, sorted by Overall descending.""" |
| | df = pd.DataFrame(MODELS_DATA) |
| | |
| | all_cols = list(ALWAYS_VISIBLE_COLS) |
| | for group_cols in COLUMN_GROUPS.values(): |
| | all_cols.extend(group_cols) |
| | df = df[all_cols] |
| | |
| | df = df.sort_values("Overall", ascending=False).reset_index(drop=True) |
| | |
| | numeric_cols = df.select_dtypes(include="number").columns |
| | df[numeric_cols] = df[numeric_cols].round(3) |
| | |
| | df["Model"] = df["Model"].apply(make_model_link) |
| | return df |
| |
|
| |
|
| | FULL_DF = build_full_dataframe() |
| |
|
| |
|
| | def get_filtered_df(selected_groups): |
| | """Filter DataFrame columns based on selected column groups.""" |
| | if not selected_groups: |
| | selected_groups = ["Overall"] |
| |
|
| | cols = list(ALWAYS_VISIBLE_COLS) |
| | for group_name, group_cols in COLUMN_GROUPS.items(): |
| | if group_name in selected_groups: |
| | cols.extend(group_cols) |
| |
|
| | return FULL_DF[cols] |
| |
|
| |
|
| | |
| | |
| | |
| | |
| | |
| | COLOR_MAP_JSON = json.dumps(COLUMN_COLORS) |
| |
|
| | COLORING_JS = f""" |
| | () => {{ |
| | const COLOR_MAP = {COLOR_MAP_JSON}; |
| | |
| | function colorColumns() {{ |
| | const container = document.querySelector('#leaderboard-table'); |
| | if (!container) return; |
| | |
| | // Gradio Dataframe can use <table> or a virtual grid. |
| | // Try standard <table> first. |
| | const table = container.querySelector('table'); |
| | if (table) {{ |
| | const headers = table.querySelectorAll('thead th, thead td'); |
| | const headerTexts = []; |
| | headers.forEach(th => headerTexts.push(th.textContent.trim())); |
| | |
| | // Color header cells |
| | headers.forEach((th, i) => {{ |
| | const color = COLOR_MAP[headerTexts[i]]; |
| | if (color) th.style.backgroundColor = color; |
| | }}); |
| | |
| | // Color body cells |
| | table.querySelectorAll('tbody tr').forEach(row => {{ |
| | const cells = row.querySelectorAll('td'); |
| | cells.forEach((td, i) => {{ |
| | const color = COLOR_MAP[headerTexts[i]]; |
| | if (color) td.style.backgroundColor = color; |
| | }}); |
| | }}); |
| | return; |
| | }} |
| | |
| | // Fallback: Gradio virtual/svelte table (div-based grid) |
| | const headerRow = container.querySelector('.header-row, .headers, [class*="header"]'); |
| | if (!headerRow) return; |
| | const headerCells = headerRow.querySelectorAll('[class*="cell"], th, div'); |
| | const headerTexts = []; |
| | headerCells.forEach(c => headerTexts.push(c.textContent.trim())); |
| | |
| | headerCells.forEach((c, i) => {{ |
| | const color = COLOR_MAP[headerTexts[i]]; |
| | if (color) c.style.backgroundColor = color; |
| | }}); |
| | |
| | const bodyRows = container.querySelectorAll('.body .row, tbody tr, [class*="row"]:not([class*="header"])'); |
| | bodyRows.forEach(row => {{ |
| | const cells = row.querySelectorAll('[class*="cell"], td, div'); |
| | cells.forEach((td, i) => {{ |
| | const color = COLOR_MAP[headerTexts[i]]; |
| | if (color) td.style.backgroundColor = color; |
| | }}); |
| | }}); |
| | }} |
| | |
| | // Run immediately, then with delays to catch late renders |
| | colorColumns(); |
| | setTimeout(colorColumns, 300); |
| | setTimeout(colorColumns, 800); |
| | setTimeout(colorColumns, 1500); |
| | |
| | // Also observe DOM changes to re-color when columns are toggled |
| | const target = document.querySelector('#leaderboard-table'); |
| | if (target) {{ |
| | const obs = new MutationObserver(() => {{ |
| | setTimeout(colorColumns, 50); |
| | }}); |
| | obs.observe(target, {{ childList: true, subtree: true }}); |
| | }} |
| | }} |
| | """ |
| |
|
| |
|
| | |
| | |
| | |
| | demo = gr.Blocks(css=custom_css) |
| | with demo: |
| | gr.HTML(TITLE) |
| | gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text") |
| |
|
| | with gr.Tabs(elem_classes="tab-buttons") as tabs: |
| | with gr.TabItem("π
VBVR-Bench Leaderboard", elem_id="llm-benchmark-tab-table", id=0): |
| | with gr.Row(): |
| | column_selector = gr.CheckboxGroup( |
| | choices=list(COLUMN_GROUPS.keys()), |
| | value=DEFAULT_GROUPS, |
| | label="Select Column Groups to Display:", |
| | interactive=True, |
| | ) |
| |
|
| | leaderboard_table = gr.Dataframe( |
| | value=get_filtered_df(DEFAULT_GROUPS), |
| | interactive=False, |
| | elem_id="leaderboard-table", |
| | datatype=["html"] + ["str"] * 20, |
| | ) |
| |
|
| | column_selector.change( |
| | fn=get_filtered_df, |
| | inputs=[column_selector], |
| | outputs=[leaderboard_table], |
| | ) |
| |
|
| | with gr.TabItem("π About", elem_id="llm-benchmark-tab-table", id=1): |
| | gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text") |
| |
|
| | with gr.TabItem("π Submit", elem_id="llm-benchmark-tab-submit", id=2): |
| | gr.Markdown(EVALUATION_QUEUE_TEXT, elem_classes="markdown-text") |
| |
|
| | with gr.Row(): |
| | with gr.Accordion("π Citation", open=False): |
| | citation_button = gr.Textbox( |
| | value=CITATION_BUTTON_TEXT, |
| | label=CITATION_BUTTON_LABEL, |
| | lines=20, |
| | elem_id="citation-button", |
| | show_copy_button=True, |
| | ) |
| |
|
| | |
| | |
| | demo.load(fn=None, inputs=None, outputs=None, js=COLORING_JS) |
| |
|
| | demo.queue(default_concurrency_limit=40).launch() |
| |
|