| from __future__ import annotations |
|
|
| import os |
| from pathlib import Path |
|
|
| import gradio as gr |
|
|
| from leaderboard import ( |
| DATA_PATH, |
| SORT_CHOICES, |
| filter_leaderboard, |
| load_leaderboard, |
| metric_standings, |
| model_choices, |
| render_methodology, |
| render_profile, |
| render_summary, |
| render_table, |
| ) |
| from submission import submit_score |
|
|
|
|
| ROOT = Path(__file__).parent |
| OVERVIEW_IMAGE = ROOT / "assets" / "avbench_outline.png" |
| SUBMISSION_TEMPLATE = ROOT / "data" / "submission_template.csv" |
|
|
| LEADERBOARD = load_leaderboard(DATA_PATH) |
| STANDINGS = metric_standings(LEADERBOARD) |
|
|
|
|
| def update_leaderboard(component_type: str, query: str, sort_by: str, sort_order: str): |
| view = filter_leaderboard(LEADERBOARD, component_type, query, sort_by, sort_order) |
| return render_summary(LEADERBOARD, view), render_table(view, STANDINGS, sort_by, sort_order) |
|
|
|
|
| def update_profile(model: str): |
| return render_profile(LEADERBOARD, model) |
|
|
|
|
| CSS = """ |
| :root { |
| --avgen-bg: #f6f7f9; |
| --avgen-panel: #ffffff; |
| --avgen-ink: #18202b; |
| --avgen-muted: #5f6b7a; |
| --avgen-line: #d9dee7; |
| --avgen-blue: #1d5f9f; |
| --avgen-teal: #087a73; |
| --avgen-orange: #b45309; |
| --avgen-green: #24784f; |
| } |
| |
| .gradio-container { |
| max-width: 1280px !important; |
| margin: 0 auto !important; |
| background: var(--avgen-bg) !important; |
| color: var(--avgen-ink) !important; |
| } |
| |
| .app-header { |
| display: grid; |
| grid-template-columns: minmax(0, 1fr) auto; |
| gap: 24px; |
| align-items: end; |
| padding: 26px 0 12px; |
| border-bottom: 1px solid var(--avgen-line); |
| } |
| |
| .app-header h1 { |
| margin: 0; |
| color: #111827 !important; |
| font-size: clamp(28px, 4vw, 44px); |
| font-weight: 850; |
| line-height: 1.05; |
| letter-spacing: 0; |
| } |
| |
| .app-header p { |
| margin: 10px 0 0; |
| max-width: 860px; |
| color: #4b5563 !important; |
| font-size: 16px; |
| } |
| |
| .header-links { |
| display: flex; |
| flex-wrap: wrap; |
| gap: 8px; |
| justify-content: flex-end; |
| } |
| |
| .header-links a { |
| color: var(--avgen-blue); |
| text-decoration: none; |
| border: 1px solid var(--avgen-line); |
| background: #fff; |
| padding: 8px 11px; |
| border-radius: 8px; |
| font-weight: 650; |
| } |
| |
| .summary-grid { |
| display: grid; |
| grid-template-columns: repeat(5, minmax(130px, 1fr)); |
| gap: 10px; |
| margin: 14px 0; |
| } |
| |
| .summary-card, |
| .method-card { |
| border: 1px solid var(--avgen-line); |
| background: var(--avgen-panel); |
| border-radius: 8px; |
| padding: 12px; |
| } |
| |
| .summary-card span, |
| .profile-total span, |
| .eyebrow { |
| display: block; |
| color: #4f5d6e !important; |
| font-size: 12px; |
| text-transform: uppercase; |
| letter-spacing: 0; |
| font-weight: 700; |
| } |
| |
| .summary-card strong { |
| display: block; |
| margin-top: 4px; |
| color: #111827 !important; |
| font-size: 26px; |
| font-weight: 850; |
| line-height: 1; |
| } |
| |
| .summary-card small { |
| display: block; |
| margin-top: 6px; |
| color: #4b5563 !important; |
| min-height: 32px; |
| } |
| |
| .table-shell { |
| overflow-x: auto; |
| border: 1px solid var(--avgen-line); |
| border-radius: 8px; |
| background: #fff; |
| } |
| |
| .leaderboard-table { |
| width: 100%; |
| border-collapse: collapse; |
| min-width: 1180px; |
| font-size: 14px; |
| } |
| |
| .leaderboard-table th, |
| .leaderboard-table td { |
| padding: 11px 10px; |
| border-bottom: 1px solid #edf0f5; |
| vertical-align: middle; |
| } |
| |
| .leaderboard-table tbody td { |
| color: var(--avgen-ink) !important; |
| } |
| |
| .leaderboard-table thead th { |
| position: sticky; |
| top: 0; |
| background: #f1f4f8; |
| color: #202936 !important; |
| text-align: left; |
| white-space: nowrap; |
| z-index: 1; |
| } |
| |
| .leaderboard-table thead th.sorted { |
| background: #e6eef8; |
| color: #111827 !important; |
| box-shadow: inset 0 -2px 0 var(--avgen-blue); |
| } |
| |
| .sort-indicator { |
| display: inline-block; |
| margin-left: 6px; |
| color: var(--avgen-blue); |
| font-weight: 850; |
| } |
| |
| .leaderboard-table tbody tr:hover { |
| background: #faf7f1; |
| } |
| |
| .rank-cell { |
| width: 62px; |
| color: #4f5d6e !important; |
| font-weight: 700; |
| } |
| |
| .model-cell { |
| min-width: 180px; |
| color: #16202d !important; |
| font-weight: 750; |
| } |
| |
| .components-cell { |
| min-width: 220px; |
| } |
| |
| .metric-cell { |
| text-align: right; |
| color: #253142 !important; |
| font-weight: 650; |
| font-variant-numeric: tabular-nums; |
| } |
| |
| .metric-cell.best { |
| color: #0f6a43 !important; |
| font-weight: 800; |
| background: #edf8f2; |
| } |
| |
| .metric-cell.second { |
| color: #8a4b08 !important; |
| font-weight: 750; |
| background: #fff7e8; |
| } |
| |
| .component-badge, |
| .type-badge { |
| display: inline-flex; |
| align-items: center; |
| max-width: 100%; |
| margin: 2px 4px 2px 0; |
| padding: 3px 8px; |
| border-radius: 999px; |
| font-size: 12px; |
| font-weight: 700; |
| white-space: nowrap; |
| border: 1px solid transparent; |
| } |
| |
| .component-badge.proprietary, |
| .type-badge.proprietary { |
| color: var(--avgen-orange); |
| background: #fff4e5; |
| border-color: #f2d4aa; |
| } |
| |
| .component-badge.open, |
| .type-badge.opensource { |
| color: var(--avgen-blue); |
| background: #edf5ff; |
| border-color: #c7dff8; |
| } |
| |
| .component-badge.neutral, |
| .type-badge.mixed { |
| color: var(--avgen-teal); |
| background: #eaf7f5; |
| border-color: #bde0dc; |
| } |
| |
| .profile-panel { |
| display: grid; |
| grid-template-columns: minmax(0, 1fr) auto; |
| gap: 16px; |
| border: 1px solid var(--avgen-line); |
| background: #fff; |
| border-radius: 8px; |
| padding: 18px; |
| } |
| |
| .profile-panel h2 { |
| margin: 2px 0 8px; |
| font-size: 26px; |
| } |
| |
| .profile-total { |
| min-width: 140px; |
| text-align: right; |
| } |
| |
| .profile-total strong { |
| display: block; |
| font-size: 42px; |
| line-height: 1; |
| } |
| |
| .profile-grid { |
| grid-column: 1 / -1; |
| display: grid; |
| grid-template-columns: repeat(3, minmax(0, 1fr)); |
| gap: 10px; |
| } |
| |
| .profile-metric { |
| border: 1px solid #edf0f5; |
| border-radius: 8px; |
| padding: 10px; |
| } |
| |
| .profile-metric-head { |
| display: flex; |
| justify-content: space-between; |
| gap: 10px; |
| font-variant-numeric: tabular-nums; |
| } |
| |
| .bar-track { |
| height: 8px; |
| margin: 9px 0 6px; |
| border-radius: 999px; |
| background: #e7ebf1; |
| overflow: hidden; |
| } |
| |
| .bar-fill { |
| height: 100%; |
| border-radius: inherit; |
| background: linear-gradient(90deg, var(--avgen-teal), var(--avgen-green)); |
| } |
| |
| .profile-metric small, |
| .methodology p { |
| color: var(--avgen-muted); |
| } |
| |
| .method-grid { |
| display: grid; |
| grid-template-columns: repeat(3, minmax(0, 1fr)); |
| gap: 10px; |
| margin-bottom: 12px; |
| } |
| |
| .method-card h3 { |
| margin: 0 0 4px; |
| font-size: 16px; |
| } |
| |
| .method-card strong { |
| display: block; |
| font-size: 30px; |
| color: var(--avgen-blue); |
| } |
| |
| .empty-state { |
| border: 1px solid var(--avgen-line); |
| background: #fff; |
| border-radius: 8px; |
| padding: 22px; |
| color: var(--avgen-muted); |
| } |
| |
| .overview-image img { |
| border: 1px solid var(--avgen-line); |
| border-radius: 8px; |
| background: #fff; |
| } |
| |
| .submission-copy { |
| color: var(--avgen-muted); |
| margin: 0 0 14px; |
| } |
| |
| .submission-copy strong { |
| color: var(--avgen-ink); |
| } |
| |
| .status-card { |
| border: 1px solid var(--avgen-line); |
| border-radius: 8px; |
| padding: 14px; |
| background: #fff; |
| } |
| |
| .status-card strong { |
| display: block; |
| margin-bottom: 4px; |
| } |
| |
| .status-card p { |
| margin: 4px 0 0; |
| } |
| |
| .status-card.success { |
| border-color: #b8dbc9; |
| background: #f1faf5; |
| } |
| |
| .status-card.error { |
| border-color: #efc2bd; |
| background: #fff3f1; |
| } |
| |
| @media (max-width: 980px) { |
| .app-header { |
| grid-template-columns: 1fr; |
| } |
| |
| .header-links { |
| justify-content: flex-start; |
| } |
| |
| .summary-grid, |
| .profile-grid, |
| .method-grid { |
| grid-template-columns: 1fr 1fr; |
| } |
| } |
| |
| @media (max-width: 640px) { |
| .summary-grid, |
| .profile-grid, |
| .method-grid { |
| grid-template-columns: 1fr; |
| } |
| |
| .profile-panel { |
| grid-template-columns: 1fr; |
| } |
| |
| .profile-total { |
| text-align: left; |
| } |
| } |
| """ |
|
|
|
|
| HEADER = """ |
| <div class="app-header"> |
| <div> |
| <h1>AVGen-Bench Leaderboard</h1> |
| <p> |
| A leaderboard for multi-granular evaluation of Text-to-Audio-Video generation, |
| covering visual/audio quality, synchronization, fine-grained controllability, |
| physical plausibility, and holistic semantic alignment. |
| </p> |
| </div> |
| <div class="header-links"> |
| <a href="https://github.com/microsoft/AVGen-Bench" target="_blank" rel="noopener">GitHub</a> |
| <a href="https://arxiv.org/abs/2604.08540" target="_blank" rel="noopener">Paper</a> |
| <a href="https://huggingface.co/datasets/microsoft/AVGen-Bench" target="_blank" rel="noopener">Dataset</a> |
| </div> |
| </div> |
| """ |
|
|
|
|
| with gr.Blocks(title="AVGen-Bench Leaderboard") as demo: |
| gr.HTML(HEADER) |
|
|
| with gr.Tab("Leaderboard"): |
| with gr.Row(): |
| component_type = gr.Dropdown( |
| choices=["All", "Proprietary", "Open-source", "Mixed"], |
| value="All", |
| label="Component type", |
| ) |
| sort_by = gr.Dropdown( |
| choices=SORT_CHOICES, |
| value="Total", |
| label="Sort item", |
| ) |
| sort_order = gr.Radio( |
| choices=["Descending", "Ascending", "Best first"], |
| value="Descending", |
| label="Sort order", |
| ) |
| query = gr.Textbox(label="Search", placeholder="Model or component") |
|
|
| summary = gr.HTML() |
| table = gr.HTML() |
|
|
| with gr.Tab("Model Profile"): |
| model = gr.Dropdown(choices=model_choices(LEADERBOARD), value=model_choices(LEADERBOARD)[0], label="Model") |
| profile = gr.HTML() |
|
|
| with gr.Tab("Metric Scheme"): |
| gr.HTML(render_methodology()) |
| if OVERVIEW_IMAGE.exists(): |
| gr.Image( |
| value=str(OVERVIEW_IMAGE), |
| label="AVGen-Bench evaluation suite", |
| show_label=False, |
| interactive=False, |
| elem_classes=["overview-image"], |
| ) |
|
|
| with gr.Tab("Submission"): |
| gr.HTML( |
| """ |
| <p class="submission-copy"> |
| Submit raw AVGen-Bench metrics for review. The app recomputes |
| <strong>Total</strong> from the raw metrics and sends the entry to a |
| pending-review backend. Accepted entries are still merged into the |
| official leaderboard manually. |
| </p> |
| """ |
| ) |
| with gr.Row(): |
| submit_model = gr.Textbox(label="Model name", placeholder="Your Model") |
| submit_component_type = gr.Dropdown( |
| choices=["Proprietary", "Open-source", "Mixed"], |
| value="Open-source", |
| label="Component type", |
| ) |
| submit_components = gr.Textbox( |
| label="Components", |
| placeholder="VideoModel (Open-source)|AudioModel (Open-source)", |
| ) |
| with gr.Row(): |
| submit_contact = gr.Textbox(label="Public contact", placeholder="GitHub handle or email") |
| submit_model_url = gr.Textbox(label="Model or paper URL", placeholder="https://...") |
| submit_results_url = gr.Textbox(label="Evaluation artifact URL", placeholder="https://...") |
| submit_notes = gr.Textbox(label="Notes", lines=3, placeholder="Optional evaluation details") |
|
|
| with gr.Accordion("Raw metric scores", open=True): |
| with gr.Row(): |
| submit_vis = gr.Number(label="Vis") |
| submit_aud = gr.Number(label="Aud (PQ)") |
| submit_av = gr.Number(label="AV") |
| submit_lip = gr.Number(label="Lip") |
| with gr.Row(): |
| submit_text = gr.Number(label="Text") |
| submit_face = gr.Number(label="Face") |
| submit_music = gr.Number(label="Music") |
| submit_speech = gr.Number(label="Speech") |
| with gr.Row(): |
| submit_lophy = gr.Number(label="Lo-Phy") |
| submit_hiphy = gr.Number(label="Hi-Phy") |
| submit_holistic = gr.Number(label="Holistic") |
|
|
| submit_button = gr.Button("Submit for Review", variant="primary") |
| submit_status = gr.HTML() |
| submit_payload = gr.Code(label="Submission JSON", language="json") |
| gr.File(value=str(SUBMISSION_TEMPLATE), label="CSV template", interactive=False) |
|
|
| demo.load( |
| fn=update_leaderboard, |
| inputs=[component_type, query, sort_by, sort_order], |
| outputs=[summary, table], |
| ) |
| demo.load(fn=update_profile, inputs=[model], outputs=[profile]) |
|
|
| for control in [component_type, query, sort_by, sort_order]: |
| control.change( |
| fn=update_leaderboard, |
| inputs=[component_type, query, sort_by, sort_order], |
| outputs=[summary, table], |
| ) |
|
|
| model.change(fn=update_profile, inputs=[model], outputs=[profile]) |
| submit_button.click( |
| fn=submit_score, |
| inputs=[ |
| submit_model, |
| submit_components, |
| submit_component_type, |
| submit_contact, |
| submit_model_url, |
| submit_results_url, |
| submit_notes, |
| submit_vis, |
| submit_aud, |
| submit_av, |
| submit_lip, |
| submit_text, |
| submit_face, |
| submit_music, |
| submit_speech, |
| submit_lophy, |
| submit_hiphy, |
| submit_holistic, |
| ], |
| outputs=[submit_status, submit_payload], |
| api_name="submit_score", |
| ) |
|
|
|
|
| if __name__ == "__main__": |
| launch_kwargs = { |
| "css": CSS, |
| "server_name": os.environ.get("GRADIO_SERVER_NAME", "0.0.0.0"), |
| } |
| port = os.environ.get("PORT") or os.environ.get("GRADIO_SERVER_PORT") |
| if port: |
| launch_kwargs["server_port"] = int(port) |
| demo.launch(**launch_kwargs) |
|
|