from __future__ import annotations import os from pathlib import Path import gradio as gr from leaderboard import ( DATA_PATH, SORT_CHOICES, filter_leaderboard, load_leaderboard, metric_standings, model_choices, render_methodology, render_profile, render_summary, render_table, ) from submission import submit_score ROOT = Path(__file__).parent OVERVIEW_IMAGE = ROOT / "assets" / "avbench_outline.png" SUBMISSION_TEMPLATE = ROOT / "data" / "submission_template.csv" LEADERBOARD = load_leaderboard(DATA_PATH) STANDINGS = metric_standings(LEADERBOARD) def update_leaderboard(component_type: str, query: str, sort_by: str, sort_order: str): view = filter_leaderboard(LEADERBOARD, component_type, query, sort_by, sort_order) return render_summary(LEADERBOARD, view), render_table(view, STANDINGS, sort_by, sort_order) def update_profile(model: str): return render_profile(LEADERBOARD, model) CSS = """ :root { --avgen-bg: #f6f7f9; --avgen-panel: #ffffff; --avgen-ink: #18202b; --avgen-muted: #5f6b7a; --avgen-line: #d9dee7; --avgen-blue: #1d5f9f; --avgen-teal: #087a73; --avgen-orange: #b45309; --avgen-green: #24784f; } .gradio-container { max-width: 1280px !important; margin: 0 auto !important; background: var(--avgen-bg) !important; color: var(--avgen-ink) !important; } .app-header { display: grid; grid-template-columns: minmax(0, 1fr) auto; gap: 24px; align-items: end; padding: 26px 0 12px; border-bottom: 1px solid var(--avgen-line); } .app-header h1 { margin: 0; color: #111827 !important; font-size: clamp(28px, 4vw, 44px); font-weight: 850; line-height: 1.05; letter-spacing: 0; } .app-header p { margin: 10px 0 0; max-width: 860px; color: #4b5563 !important; font-size: 16px; } .header-links { display: flex; flex-wrap: wrap; gap: 8px; justify-content: flex-end; } .header-links a { color: var(--avgen-blue); text-decoration: none; border: 1px solid var(--avgen-line); background: #fff; padding: 8px 11px; border-radius: 8px; font-weight: 650; } .summary-grid { display: grid; grid-template-columns: repeat(5, minmax(130px, 1fr)); gap: 10px; margin: 14px 0; } .summary-card, .method-card { border: 1px solid var(--avgen-line); background: var(--avgen-panel); border-radius: 8px; padding: 12px; } .summary-card span, .profile-total span, .eyebrow { display: block; color: #4f5d6e !important; font-size: 12px; text-transform: uppercase; letter-spacing: 0; font-weight: 700; } .summary-card strong { display: block; margin-top: 4px; color: #111827 !important; font-size: 26px; font-weight: 850; line-height: 1; } .summary-card small { display: block; margin-top: 6px; color: #4b5563 !important; min-height: 32px; } .table-shell { overflow-x: auto; border: 1px solid var(--avgen-line); border-radius: 8px; background: #fff; } .leaderboard-table { width: 100%; border-collapse: collapse; min-width: 1180px; font-size: 14px; } .leaderboard-table th, .leaderboard-table td { padding: 11px 10px; border-bottom: 1px solid #edf0f5; vertical-align: middle; } .leaderboard-table tbody td { color: var(--avgen-ink) !important; } .leaderboard-table thead th { position: sticky; top: 0; background: #f1f4f8; color: #202936 !important; text-align: left; white-space: nowrap; z-index: 1; } .leaderboard-table thead th.sorted { background: #e6eef8; color: #111827 !important; box-shadow: inset 0 -2px 0 var(--avgen-blue); } .sort-indicator { display: inline-block; margin-left: 6px; color: var(--avgen-blue); font-weight: 850; } .leaderboard-table tbody tr:hover { background: #faf7f1; } .rank-cell { width: 62px; color: #4f5d6e !important; font-weight: 700; } .model-cell { min-width: 180px; color: #16202d !important; font-weight: 750; } .components-cell { min-width: 220px; } .metric-cell { text-align: right; color: #253142 !important; font-weight: 650; font-variant-numeric: tabular-nums; } .metric-cell.best { color: #0f6a43 !important; font-weight: 800; background: #edf8f2; } .metric-cell.second { color: #8a4b08 !important; font-weight: 750; background: #fff7e8; } .component-badge, .type-badge { display: inline-flex; align-items: center; max-width: 100%; margin: 2px 4px 2px 0; padding: 3px 8px; border-radius: 999px; font-size: 12px; font-weight: 700; white-space: nowrap; border: 1px solid transparent; } .component-badge.proprietary, .type-badge.proprietary { color: var(--avgen-orange); background: #fff4e5; border-color: #f2d4aa; } .component-badge.open, .type-badge.opensource { color: var(--avgen-blue); background: #edf5ff; border-color: #c7dff8; } .component-badge.neutral, .type-badge.mixed { color: var(--avgen-teal); background: #eaf7f5; border-color: #bde0dc; } .profile-panel { display: grid; grid-template-columns: minmax(0, 1fr) auto; gap: 16px; border: 1px solid var(--avgen-line); background: #fff; border-radius: 8px; padding: 18px; } .profile-panel h2 { margin: 2px 0 8px; font-size: 26px; } .profile-total { min-width: 140px; text-align: right; } .profile-total strong { display: block; font-size: 42px; line-height: 1; } .profile-grid { grid-column: 1 / -1; display: grid; grid-template-columns: repeat(3, minmax(0, 1fr)); gap: 10px; } .profile-metric { border: 1px solid #edf0f5; border-radius: 8px; padding: 10px; } .profile-metric-head { display: flex; justify-content: space-between; gap: 10px; font-variant-numeric: tabular-nums; } .bar-track { height: 8px; margin: 9px 0 6px; border-radius: 999px; background: #e7ebf1; overflow: hidden; } .bar-fill { height: 100%; border-radius: inherit; background: linear-gradient(90deg, var(--avgen-teal), var(--avgen-green)); } .profile-metric small, .methodology p { color: var(--avgen-muted); } .method-grid { display: grid; grid-template-columns: repeat(3, minmax(0, 1fr)); gap: 10px; margin-bottom: 12px; } .method-card h3 { margin: 0 0 4px; font-size: 16px; } .method-card strong { display: block; font-size: 30px; color: var(--avgen-blue); } .empty-state { border: 1px solid var(--avgen-line); background: #fff; border-radius: 8px; padding: 22px; color: var(--avgen-muted); } .overview-image img { border: 1px solid var(--avgen-line); border-radius: 8px; background: #fff; } .submission-copy { color: var(--avgen-muted); margin: 0 0 14px; } .submission-copy strong { color: var(--avgen-ink); } .status-card { border: 1px solid var(--avgen-line); border-radius: 8px; padding: 14px; background: #fff; } .status-card strong { display: block; margin-bottom: 4px; } .status-card p { margin: 4px 0 0; } .status-card.success { border-color: #b8dbc9; background: #f1faf5; } .status-card.error { border-color: #efc2bd; background: #fff3f1; } @media (max-width: 980px) { .app-header { grid-template-columns: 1fr; } .header-links { justify-content: flex-start; } .summary-grid, .profile-grid, .method-grid { grid-template-columns: 1fr 1fr; } } @media (max-width: 640px) { .summary-grid, .profile-grid, .method-grid { grid-template-columns: 1fr; } .profile-panel { grid-template-columns: 1fr; } .profile-total { text-align: left; } } """ HEADER = """
A leaderboard for multi-granular evaluation of Text-to-Audio-Video generation, covering visual/audio quality, synchronization, fine-grained controllability, physical plausibility, and holistic semantic alignment.
Submit raw AVGen-Bench metrics for review. The app recomputes Total from the raw metrics and sends the entry to a pending-review backend. Accepted entries are still merged into the official leaderboard manually.
""" ) with gr.Row(): submit_model = gr.Textbox(label="Model name", placeholder="Your Model") submit_component_type = gr.Dropdown( choices=["Proprietary", "Open-source", "Mixed"], value="Open-source", label="Component type", ) submit_components = gr.Textbox( label="Components", placeholder="VideoModel (Open-source)|AudioModel (Open-source)", ) with gr.Row(): submit_contact = gr.Textbox(label="Public contact", placeholder="GitHub handle or email") submit_model_url = gr.Textbox(label="Model or paper URL", placeholder="https://...") submit_results_url = gr.Textbox(label="Evaluation artifact URL", placeholder="https://...") submit_notes = gr.Textbox(label="Notes", lines=3, placeholder="Optional evaluation details") with gr.Accordion("Raw metric scores", open=True): with gr.Row(): submit_vis = gr.Number(label="Vis") submit_aud = gr.Number(label="Aud (PQ)") submit_av = gr.Number(label="AV") submit_lip = gr.Number(label="Lip") with gr.Row(): submit_text = gr.Number(label="Text") submit_face = gr.Number(label="Face") submit_music = gr.Number(label="Music") submit_speech = gr.Number(label="Speech") with gr.Row(): submit_lophy = gr.Number(label="Lo-Phy") submit_hiphy = gr.Number(label="Hi-Phy") submit_holistic = gr.Number(label="Holistic") submit_button = gr.Button("Submit for Review", variant="primary") submit_status = gr.HTML() submit_payload = gr.Code(label="Submission JSON", language="json") gr.File(value=str(SUBMISSION_TEMPLATE), label="CSV template", interactive=False) demo.load( fn=update_leaderboard, inputs=[component_type, query, sort_by, sort_order], outputs=[summary, table], ) demo.load(fn=update_profile, inputs=[model], outputs=[profile]) for control in [component_type, query, sort_by, sort_order]: control.change( fn=update_leaderboard, inputs=[component_type, query, sort_by, sort_order], outputs=[summary, table], ) model.change(fn=update_profile, inputs=[model], outputs=[profile]) submit_button.click( fn=submit_score, inputs=[ submit_model, submit_components, submit_component_type, submit_contact, submit_model_url, submit_results_url, submit_notes, submit_vis, submit_aud, submit_av, submit_lip, submit_text, submit_face, submit_music, submit_speech, submit_lophy, submit_hiphy, submit_holistic, ], outputs=[submit_status, submit_payload], api_name="submit_score", ) if __name__ == "__main__": launch_kwargs = { "css": CSS, "server_name": os.environ.get("GRADIO_SERVER_NAME", "0.0.0.0"), } port = os.environ.get("PORT") or os.environ.get("GRADIO_SERVER_PORT") if port: launch_kwargs["server_port"] = int(port) demo.launch(**launch_kwargs)