Spaces:

shi-labs
/

physical-ai-bench-leaderboard

Running

File size: 25,213 Bytes

import gradio as gr
import pandas as pd


# Your leaderboard name
TITLE = """<h1 align="center" id="space-title">Physical AI Bench Leaderboard</h1>"""

# CSS to make the leaderboard full height
CSS = """
#predict_leaderboard, #transfer_leaderboard, #reason_leaderboard {
    height: auto !important;
    max-height: none !important;
}
#predict_leaderboard .wrap, #transfer_leaderboard .wrap, #reason_leaderboard .wrap {
    max-height: none !important;
    height: auto !important;
}
#predict_leaderboard .tbody, #transfer_leaderboard .tbody, #reason_leaderboard .tbody {
    max-height: none !important;
    height: auto !important;
    overflow-x: auto !important;
    overflow-y: hidden !important;
}
"""


# What does your leaderboard evaluate?
INTRODUCTION_TEXT = """
**Physical AI Bench (PAI-Bench)** is a comprehensive benchmark suite for evaluating physical AI generation and understanding across diverse scenarios including autonomous vehicles, robotics, industrial spaces, and everyday ego-centric environments.
"""

# Which evaluations are you running? how can people reproduce what you have?
LLM_BENCHMARKS_TEXT = """
## How it works

This leaderboard tracks model performance across three core dimensions:

- **🎨 Generation**: Evaluates world foundation models' ability to predict future states across 1,044 diverse physical scenarios
- **🔄 Conditional Generation**: Focuses on world model generation with complex control signals, featuring 600 videos across robotic arm operations, autonomous driving, and ego-centric scenes
- **🧠 Understanding**: Evaluates understanding and reasoning about physical scenes, with 1,214 embodied reasoning scenarios focused on autonomous vehicle actions

PAI-Bench covers multiple physical AI domains including autonomous driving, robotics, industrial spaces, physics simulations, human interactions, and common sense reasoning.

### Resources
- 🌐 [GitHub Repository](https://github.com/SHI-Labs/physical-ai-bench)
- 📊 [Generation Dataset](https://huggingface.co/datasets/shi-labs/physical-ai-bench-predict)
- 📊 [Conditional Generation Dataset](https://huggingface.co/datasets/shi-labs/physical-ai-bench-transfer)
- 📊 [Understanding Dataset](https://huggingface.co/datasets/shi-labs/physical-ai-bench-reason)
- 📦 [Artifacts](https://huggingface.co/datasets/Leymore/physical-ai-bench-artifacts)

## Reproducibility

To evaluate your models on PAI-Bench, visit our [GitHub repository](https://github.com/SHI-Labs/physical-ai-bench) for evaluation scripts and detailed instructions.

## Citation

If you use Physical AI Bench in your research, please cite:

```bibtex
@misc{zhou2025paibenchcomprehensivebenchmarkphysical,
      title={PAI-Bench: A Comprehensive Benchmark For Physical AI},
      author={Fengzhe Zhou and Jiannan Huang and Jialuo Li and Deva Ramanan and Humphrey Shi},
      year={2025},
      eprint={2512.01989},
      archivePrefix={arXiv},
      primaryClass={cs.CV},
      url={https://arxiv.org/abs/2512.01989},
}

```
"""


# ============================================================================
# Model Links Utility
# ============================================================================

def create_model_link(model_name):
    """
    Convert a model name to a markdown link to Hugging Face.

    Args:
        model_name: Model name in format "org/model-name" or just a plain name

    Returns:
        Markdown formatted link or original name if format doesn't match
    """
    if not isinstance(model_name, str):
        return model_name

    if '/' in model_name:
        hf_url = f"https://huggingface.co/{model_name}"
        display_name = model_name.split('/')[-1]
        return f"[{display_name}]({hf_url})"

    return model_name


# ============================================================================
# Generation Tab Configuration and Utilities
# ============================================================================

# Column name to abbreviation mapping for display
PREDICT_COLUMN_ABBREV = {
    'Common Sense': 'CS',
    'AV': 'AV',
    'Robot': 'RO',
    'Industry': 'IN',
    'Human': 'HU',
    'Physics': 'PH',
    'Subject Consistency': 'SC',
    'Background Consistency': 'BC',
    'Motion Smoothness': 'MS',
    'Aesthetic Quality': 'AQ',
    'Imaging Quality': 'IQ',
    'Overall Consistency': 'OC',
    'I2V Subject': 'IS',
    'I2V Background': 'IB',
}

# Expected column order (full names from JSON)
PREDICT_COLUMN_ORDER = [
    'Model',
    'Overall',
    'Domain',
    'Quality',
    'Common Sense',
    'AV',
    'Robot',
    'Industry',
    'Human',
    'Physics',
    'Subject Consistency',
    'Background Consistency',
    'Motion Smoothness',
    'Aesthetic Quality',
    'Imaging Quality',
    'Overall Consistency',
    'I2V Subject',
    'I2V Background'
]

# Columns to hide by default (but still available for filtering/selection)
PREDICT_HIDDEN_COLUMNS = []

# Semantic/Domain dimensions (for selection button) - use abbreviations matching dataframe
PREDICT_DOMAIN_SCORE_DIMENSIONS = [
    'Domain',
    'CS', 'AV', 'RO', 'IN', 'HU', 'PH',
]

# Quality dimensions (for selection button) - use abbreviations matching dataframe
PREDICT_QUALITY_SCORE_DIMENSIONS = [
    'Quality',
    'SC', 'BC', 'MS', 'AQ', 'IQ', 'OC', 'IS', 'IB'
]

PREDICT_DESELECTED_COLUMNS = ['Domain', 'Quality']

PREDICT_ALL_SELECTED_COLUMNS = [
    'Domain', 'Quality',
    'CS', 'AV', 'RO', 'IN', 'HU', 'PH',
    'SC', 'BC', 'MS', 'AQ', 'IQ', 'OC', 'IS', 'IB'
]

# Columns that can never be deselected
PREDICT_NEVER_HIDDEN_COLUMNS = ['Model', 'Overall']

# Columns displayed by default
PREDICT_DEFAULT_DISPLAYED_COLUMNS = PREDICT_NEVER_HIDDEN_COLUMNS + PREDICT_ALL_SELECTED_COLUMNS

def load_predict_json(json_path):
    """
    Load generation leaderboard JSON.

    The JSON should already be pre-processed by generate_predict_leaderboard.py
    with correct column names, ordering, sorting, and separate model/url fields.
    """
    df = pd.read_json(json_path, orient='records')

    if 'model' in df.columns and 'url' in df.columns:
        def create_link(row):
            if pd.notna(row['url']):
                display_name = row['model'].split('/')[-1] if '/' in row['model'] else row['model']
                return f"[{display_name}]({row['url']})"
            return row['model']

        df['model'] = df.apply(create_link, axis=1)
        df = df.drop(columns=['url'])

    df = df.rename(columns={'model': 'Model'})

    for col in df.columns:
        if col != 'Model' and pd.api.types.is_numeric_dtype(df[col]):
            df[col] = df[col].apply(lambda x: f"{x:.1f}" if pd.notna(x) else x)

    # Rename columns to abbreviations for display
    df = df.rename(columns=PREDICT_COLUMN_ABBREV)

    return df


def get_predict_checkbox_choices(dataframe):
    """Get checkbox choices with full name (abbrev) format"""
    # Create reverse mapping from abbreviation to full name
    abbrev_to_full = {v: k for k, v in PREDICT_COLUMN_ABBREV.items()}

    choices = []
    for col in dataframe.columns:
        if col in ['Model', 'Overall']:
            continue
        if col in abbrev_to_full:
            full_name = abbrev_to_full[col]
            choices.append((f"{full_name} ({col})", col))
        else:
            choices.append((col, col))

    return choices


def select_predict_domain_score():
    """Return domain score for checkbox selection"""
    return gr.update(value=PREDICT_DOMAIN_SCORE_DIMENSIONS)

def select_predict_quality_score():
    """Return quality score for checkbox selection"""
    return gr.update(value=PREDICT_QUALITY_SCORE_DIMENSIONS)

def deselect_predict_all():
    """Deselect all dimensions"""
    return gr.update(value=PREDICT_DESELECTED_COLUMNS)

def select_predict_all():
    """Select all dimensions"""
    return gr.update(value=PREDICT_ALL_SELECTED_COLUMNS)

def on_predict_dimension_selection_change(selected_columns, full_df):
    """Handle dimension selection changes and update the dataframe"""
    present_columns = ['Model', 'Overall']

    for col in selected_columns:
        if col not in present_columns and col in full_df.columns:
            present_columns.append(col)

    updated_data = full_df[present_columns]

    datatypes = []
    for col in present_columns:
        if col == 'Model':
            datatypes.append('markdown')
        else:
            datatypes.append('str')

    return gr.update(value=updated_data, datatype=datatypes, headers=present_columns)


def init_predict_leaderboard(dataframe):
    """Initialize the Generation leaderboard with given dataframe"""
    if dataframe is None or dataframe.empty:
        raise ValueError("Leaderboard DataFrame is empty or None.")

    # Get columns that exist in the dataframe
    available_default_cols = [col for col in PREDICT_DEFAULT_DISPLAYED_COLUMNS if col in dataframe.columns]

    # Filter dataframe to show only default columns initially
    display_df = dataframe[available_default_cols]

    # Determine datatypes dynamically
    datatypes = []
    for col in display_df.columns:
        if col == 'Model':
            datatypes.append('markdown')
        else:
            datatypes.append('str')

    with gr.Row():
        with gr.Column(scale=1):
            domain_score_btn = gr.Button("Domain Score", size="md")
            quality_score_btn = gr.Button("Quality Score", size="md")
            select_all_btn = gr.Button("Select All", size="md")
            deselect_btn = gr.Button("Deselect All", size="md")

        with gr.Column(scale=4):
            # Get checkbox choices with "Full Name (Abbrev)" format
            checkbox_choices = get_predict_checkbox_choices(dataframe)

            checkbox_group = gr.CheckboxGroup(
                choices=checkbox_choices,
                value=[col for col in PREDICT_ALL_SELECTED_COLUMNS if col in dataframe.columns],
                label="Evaluation Dimensions",
                interactive=True,
            )

    data_component = gr.Dataframe(
        value=display_df,
        headers=list(display_df.columns),
        datatype=datatypes,
        interactive=False,
        visible=True,
        wrap=False,
        column_widths=["320px"] + ["80px"] * (len(display_df.columns) - 1),
        pinned_columns=1,
        elem_id="predict_leaderboard",
        max_height=10000,

    )

    # Setup event handlers
    domain_score_btn.click(
        select_predict_domain_score,
        inputs=None,
        outputs=[checkbox_group]
    ).then(
        fn=on_predict_dimension_selection_change,
        inputs=[checkbox_group, gr.State(dataframe)],
        outputs=data_component
    )

    quality_score_btn.click(
        select_predict_quality_score,
        inputs=None,
        outputs=[checkbox_group]
    ).then(
        fn=on_predict_dimension_selection_change,
        inputs=[checkbox_group, gr.State(dataframe)],
        outputs=data_component
    )

    deselect_btn.click(
        deselect_predict_all,
        inputs=None,
        outputs=[checkbox_group]
    ).then(
        fn=on_predict_dimension_selection_change,
        inputs=[checkbox_group, gr.State(dataframe)],
        outputs=data_component
    )

    select_all_btn.click(
        select_predict_all,
        inputs=None,
        outputs=[checkbox_group]
    ).then(
        fn=on_predict_dimension_selection_change,
        inputs=[checkbox_group, gr.State(dataframe)],
        outputs=data_component
    )

    checkbox_group.change(
        fn=on_predict_dimension_selection_change,
        inputs=[checkbox_group, gr.State(dataframe)],
        outputs=data_component
    )

    return data_component


# ============================================================================
# Conditional Generation Tab Configuration and Utilities
# ============================================================================

TRANSFER_COLUMN_ORDER = [
    'Model',
    'Condition',
    'Blur SSIM ↑',
    'Edge F1 ↑',
    'Depth si-RMSE ↓',
    'Mask mIoU ↑',
    'Quality Score ↑',
    'Diversity ↑'
]

TRANSFER_HIDDEN_COLUMNS = []

TRANSFER_QUALITY_DIMENSIONS = [
    'Blur SSIM ↑',
    'Edge F1 ↑',
    'Depth si-RMSE ↓',
    'Mask mIoU ↑',
    'Quality Score ↑',
    'Diversity ↑',
]

TRANSFER_ALL_SELECTED_COLUMNS = TRANSFER_QUALITY_DIMENSIONS

TRANSFER_NEVER_HIDDEN_COLUMNS = ['Model', 'Condition']

TRANSFER_DEFAULT_DISPLAYED_COLUMNS = TRANSFER_NEVER_HIDDEN_COLUMNS + TRANSFER_ALL_SELECTED_COLUMNS


def load_transfer_json(json_path):
    """Load conditional generation leaderboard JSON"""
    df = pd.read_json(json_path, orient='records')

    if 'model' in df.columns and 'url' in df.columns:
        def create_link(row):
            if pd.notna(row['url']):
                display_name = row['model'].split('/')[-1] if '/' in row['model'] else row['model']
                return f"[{display_name}]({row['url']})"
            return row['model']

        df['model'] = df.apply(create_link, axis=1)
        df = df.drop(columns=['url'])

    df = df.rename(columns={'model': 'Model'})

    for col in df.columns:
        if col not in ['Model', 'Condition'] and pd.api.types.is_numeric_dtype(df[col]):
            df[col] = df[col].apply(lambda x: f"{x:.3f}" if pd.notna(x) else x)

    return df


def select_transfer_all():
    """Select all dimensions"""
    return gr.update(value=TRANSFER_ALL_SELECTED_COLUMNS)


def deselect_transfer_all():
    """Deselect all dimensions"""
    return gr.update(value=[])


def on_transfer_dimension_selection_change(selected_columns, full_df):
    """Handle dimension selection changes and update the dataframe"""
    present_columns = ['Model', 'Condition']

    for col in selected_columns:
        if col not in present_columns and col in full_df.columns:
            present_columns.append(col)

    updated_data = full_df[present_columns]

    datatypes = []
    for col in present_columns:
        if col == 'Model':
            datatypes.append('markdown')
        else:
            datatypes.append('str')

    return gr.update(value=updated_data, datatype=datatypes, headers=present_columns)


def init_transfer_leaderboard(dataframe):
    """Initialize the Conditional Generation leaderboard with given dataframe"""
    if dataframe is None or dataframe.empty:
        raise ValueError("Leaderboard DataFrame is empty or None.")

    available_default_cols = [col for col in TRANSFER_DEFAULT_DISPLAYED_COLUMNS if col in dataframe.columns]

    display_df = dataframe[available_default_cols]

    datatypes = []
    for col in display_df.columns:
        if col == 'Model':
            datatypes.append('markdown')
        else:
            datatypes.append('str')

    with gr.Row():
        with gr.Column(scale=1):
            select_all_btn = gr.Button("Select All", size="md")
            deselect_btn = gr.Button("Deselect All", size="md")

        with gr.Column(scale=4):
            dimension_choices = [col for col in dataframe.columns
                                if col not in TRANSFER_NEVER_HIDDEN_COLUMNS]

            checkbox_group = gr.CheckboxGroup(
                choices=dimension_choices,
                value=[col for col in TRANSFER_DEFAULT_DISPLAYED_COLUMNS if col in dimension_choices],
                label="Evaluation Dimensions",
                interactive=True,
            )

    data_component = gr.Dataframe(
        value=display_df,
        headers=list(display_df.columns),
        datatype=datatypes,
        interactive=False,
        visible=True,
        wrap=False,
        column_widths=["280px", "120px"] + ["150px"] * (len(display_df.columns) - 2),
        pinned_columns=2,
        elem_id="transfer_leaderboard",
        max_height=10000,
    )

    deselect_btn.click(
        deselect_transfer_all,
        inputs=None,
        outputs=[checkbox_group]
    ).then(
        fn=on_transfer_dimension_selection_change,
        inputs=[checkbox_group, gr.State(dataframe)],
        outputs=data_component
    )

    select_all_btn.click(
        select_transfer_all,
        inputs=None,
        outputs=[checkbox_group]
    ).then(
        fn=on_transfer_dimension_selection_change,
        inputs=[checkbox_group, gr.State(dataframe)],
        outputs=data_component
    )

    checkbox_group.change(
        fn=on_transfer_dimension_selection_change,
        inputs=[checkbox_group, gr.State(dataframe)],
        outputs=data_component
    )

    return data_component


# ============================================================================
# Understanding Tab Configuration and Utilities
# ============================================================================

# Column name to abbreviation mapping for display
REASON_COLUMN_ABBREV = {
    'Common Sense': 'CS',
    'Embodied Reasoning': 'ER',
    'BridgeData V2': 'BD',
    'RoboVQA': 'RV',
    'RoboFail': 'RF',
    'Agibot': 'AB',
    'HoloAssist': 'HA',
}

# Desired column order (full names from JSON)
REASON_COLUMN_ORDER = [
    'Model',
    'Thinking',
    'Overall',
    'Common Sense',
    'Embodied Reasoning',
    'Space',
    'Time',
    'Physics',
    'BridgeData V2',
    'RoboVQA',
    'RoboFail',
    'Agibot',
    'HoloAssist',
    'AV'
]

# Columns to hide by default (but still available for filtering/selection)
REASON_HIDDEN_COLUMNS = []

# Reasoning dimensions (for selection button) - use abbreviations matching dataframe
REASON_COMMON_SENSE_DIMENSIONS = [
    'CS',
    'Space',
    'Time',
    'Physics',
]

# Domain dimensions (for selection button) - use abbreviations matching dataframe
REASON_EMBODIED_REASONING_DIMENSIONS = [
    'ER',
    'Space',
    'Time',
    'Physics',
    'BD', 'RV', 'RF', 'AB', 'HA', 'AV',
]

REASON_DESELECTED_COLUMNS = [
    'CS',
    'ER',
]

REASON_ALL_SELECTED_COLUMNS = [
    'CS', 'ER',
    'Space', 'Time', 'Physics',
    'BD', 'RV', 'RF', 'AB', 'HA', 'AV',
]

# Columns that can never be deselected
REASON_NEVER_HIDDEN_COLUMNS = ['Model', 'Thinking', 'Overall']

# Columns displayed by default (using renamed column names)
REASON_DEFAULT_DISPLAYED_COLUMNS = REASON_NEVER_HIDDEN_COLUMNS + REASON_ALL_SELECTED_COLUMNS


def load_reason_json(json_path):
    """Load understanding leaderboard JSON"""
    df = pd.read_json(json_path, orient='records')

    if 'model' in df.columns and 'url' in df.columns:
        def create_link(row):
            if pd.notna(row['url']):
                display_name = row['model'].split('/')[-1] if '/' in row['model'] else row['model']
                return f"[{display_name}]({row['url']})"
            return row['model']

        df['model'] = df.apply(create_link, axis=1)
        df = df.drop(columns=['url'])

    df = df.rename(columns={'model': 'Model'})

    for col in df.columns:
        if col != 'Model' and pd.api.types.is_numeric_dtype(df[col]):
            df[col] = df[col].apply(lambda x: f"{x:.1f}" if pd.notna(x) else x)

    # Rename columns to abbreviations for display
    df = df.rename(columns=REASON_COLUMN_ABBREV)

    return df


def get_reason_checkbox_choices(dataframe):
    """Get checkbox choices with full name (abbrev) format"""
    # Create reverse mapping from abbreviation to full name
    abbrev_to_full = {v: k for k, v in REASON_COLUMN_ABBREV.items()}

    choices = []
    for col in dataframe.columns:
        if col in ['Model', 'Thinking', 'Overall']:
            continue
        if col in abbrev_to_full:
            full_name = abbrev_to_full[col]
            choices.append((f"{full_name} ({col})", col))
        else:
            choices.append((col, col))

    return choices


def select_reason_common_sense_dimensions():
    """Return reasoning dimensions for checkbox selection"""
    return gr.update(value=REASON_COMMON_SENSE_DIMENSIONS)


def select_reason_embodied_reasoning_dimensions():
    """Return domain dimensions for checkbox selection"""
    return gr.update(value=REASON_EMBODIED_REASONING_DIMENSIONS)


def deselect_reason_all():
    """Deselect all dimensions"""
    return gr.update(value=REASON_DESELECTED_COLUMNS)


def select_reason_all():
    """Select all dimensions"""
    return gr.update(value=REASON_ALL_SELECTED_COLUMNS)


def on_reason_dimension_selection_change(selected_columns, full_df):
    """Handle dimension selection changes and update the dataframe"""
    present_columns = ['Model', 'Thinking', 'Overall']

    for col in selected_columns:
        if col not in present_columns and col in full_df.columns:
            present_columns.append(col)

    updated_data = full_df[present_columns]

    datatypes = []
    for col in present_columns:
        if col == 'Model':
            datatypes.append('markdown')
        else:
            datatypes.append('str')

    return gr.update(value=updated_data, datatype=datatypes, headers=present_columns)


def init_reason_leaderboard(dataframe):
    """Initialize the Understanding leaderboard with given dataframe"""
    if dataframe is None or dataframe.empty:
        raise ValueError("Leaderboard DataFrame is empty or None.")

    # Get columns that exist in the dataframe
    available_default_cols = [col for col in REASON_DEFAULT_DISPLAYED_COLUMNS if col in dataframe.columns]

    # Filter dataframe to show only default columns initially
    display_df = dataframe[available_default_cols]

    # Determine datatypes dynamically
    datatypes = []
    for col in display_df.columns:
        if col == 'Model':
            datatypes.append('markdown')
        else:
            datatypes.append('str')

    with gr.Row():
        with gr.Column(scale=1):
            common_sense_btn = gr.Button("Common Sense", size="md")
            embodied_reasoning_btn = gr.Button("Embodied Reasoning", size="md")
            select_all_btn = gr.Button("Select All", size="md")
            deselect_btn = gr.Button("Deselect All", size="md")

        with gr.Column(scale=4):
            # Get checkbox choices with "Full Name (Abbrev)" format
            checkbox_choices = get_reason_checkbox_choices(dataframe)

            checkbox_group = gr.CheckboxGroup(
                choices=checkbox_choices,
                value=[col for col in REASON_ALL_SELECTED_COLUMNS if col in dataframe.columns],
                label="Evaluation Dimensions",
                interactive=True,
            )

    data_component = gr.Dataframe(
        value=display_df,
        headers=list(display_df.columns),
        datatype=datatypes,
        interactive=False,
        visible=True,
        wrap=False,
        column_widths=["320px", "100px"] + ["100px"] * (len(display_df.columns) - 2),
        pinned_columns=2,
        elem_id="reason_leaderboard",
        max_height=10000,
    )

    # Setup event handlers
    common_sense_btn.click(
        select_reason_common_sense_dimensions,
        inputs=None,
        outputs=[checkbox_group]
    ).then(
        fn=on_reason_dimension_selection_change,
        inputs=[checkbox_group, gr.State(dataframe)],
        outputs=data_component
    )

    embodied_reasoning_btn.click(
        select_reason_embodied_reasoning_dimensions,
        inputs=None,
        outputs=[checkbox_group]
    ).then(
        fn=on_reason_dimension_selection_change,
        inputs=[checkbox_group, gr.State(dataframe)],
        outputs=data_component
    )

    deselect_btn.click(
        deselect_reason_all,
        inputs=None,
        outputs=[checkbox_group]
    ).then(
        fn=on_reason_dimension_selection_change,
        inputs=[checkbox_group, gr.State(dataframe)],
        outputs=data_component
    )

    select_all_btn.click(
        select_reason_all,
        inputs=None,
        outputs=[checkbox_group]
    ).then(
        fn=on_reason_dimension_selection_change,
        inputs=[checkbox_group, gr.State(dataframe)],
        outputs=data_component
    )

    checkbox_group.change(
        fn=on_reason_dimension_selection_change,
        inputs=[checkbox_group, gr.State(dataframe)],
        outputs=data_component
    )

    return data_component


# ============================================================================
# Main Application
# ============================================================================

demo = gr.Blocks()
with demo:
    gr.HTML(f"<style>{CSS}</style>")
    gr.HTML(TITLE)
    gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")

    with gr.Tabs(elem_classes="tab-buttons") as tabs:
        with gr.TabItem("🎨 Generation", elem_id="predict-tab", id=0):
            predict_df = load_predict_json("data/generation-leaderboard.json")
            predict_leaderboard = init_predict_leaderboard(predict_df)

        with gr.TabItem("🔄 Conditional Generation", elem_id="transfer-tab", id=1):
            transfer_df = load_transfer_json("data/conditional_generation-leaderboard.json")
            transfer_leaderboard = init_transfer_leaderboard(transfer_df)

        with gr.TabItem("🧠 Understanding", elem_id="reason-tab", id=2):
            reason_df = load_reason_json("data/understanding-leaderboard.json")
            reason_leaderboard = init_reason_leaderboard(reason_df)

        with gr.TabItem("ℹ️ About", elem_id="about-tab", id=3):
            gr.Markdown(LLM_BENCHMARKS_TEXT, elem_classes="markdown-text")

demo.launch()