Spaces:
Running
Running
| import gradio as gr | |
| import pandas as pd | |
| from src.leaderboard.read_evals import get_leaderboard_df, get_tasks, get_raw_data | |
| from src.display.visualization import create_radar_chart, create_group_bar_chart, create_aup_curve_chart | |
| from src.display.css_html_js import custom_css, sort_table_js, get_foundation_class | |
| CITATION_HTML = """ | |
| <div style="max-width: 800px; margin: 30px auto 0 auto; padding: 20px; background: #f8f7ff; border-radius: 12px; border-left: 4px solid #5a3d8a;"> | |
| <p style="margin: 0 0 12px 0; color: #5a3d8a; font-weight: 600;">π If you find this Leaderboard useful for your research, please star <a href="https://github.com/hao-ai-lab/d3llm" target="_blank" style="color: #5a3d8a;">our GitHub repo</a> and cite our work:</p> | |
| <pre style="background: #fff; padding: 15px; border-radius: 8px; overflow-x: auto; font-size: 12px; margin: 0; color: #333; white-space: pre-wrap; word-wrap: break-word;">@article{preprint'25:d3llm, | |
| author = {Yu-Yang Qian and Junda Su and Lanxiang Hu and Peiyuan Zhang and Zhijie Deng and Peng Zhao and Hao Zhang}, | |
| title = {d3LLM: Ultra-Fast Diffusion LLM using Pseudo-Trajectory Distillation}, | |
| journal = {ArXiv preprint}, | |
| volume = {to appear}, | |
| note = {\\url{https://github.com/hao-ai-lab/d3LLM} [Accessed: 2025-12-11]}, | |
| year = {2025} | |
| }</pre> | |
| </div> | |
| """ | |
| def create_leaderboard_html(df, tasks): | |
| """Generate HTML table for detailed results.""" | |
| rows_html = "" | |
| for rank, (_, row) in enumerate(df.iterrows(), 1): | |
| medal = f'<span class="top-medal">{["π₯", "π₯", "π₯"][rank-1]}</span>' if rank <= 3 else str(rank) | |
| # Method with link | |
| method = row['Method'] | |
| link = row.get('Link', '') | |
| method_html = f'<a href="{link}" target="_blank">{method}</a>' if link else method | |
| # Type badge | |
| type_val = row.get('Type', '?') | |
| type_display = 'dLLM' if type_val == 'dLLM' else type_val | |
| type_class = 'ar' if type_val == 'AR' else 'dllm' | |
| # Foundation badge | |
| foundation = row.get('Foundation', '?') | |
| foundation_class = get_foundation_class(foundation) | |
| # Build cells for each task | |
| task_cells = "" | |
| for task in tasks: | |
| aup = row.get(f'{task}_AUP') | |
| tpf = row.get(f'{task}_TPF') | |
| acc = row.get(f'{task}_Acc') | |
| if pd.notna(aup): | |
| task_cells += f'''<td> | |
| <span class="aup-score">{aup:.1f}</span> | |
| <span class="sub-metrics">TPF:{tpf:.2f} Acc:{acc:.1f}</span> | |
| </td>''' | |
| else: | |
| task_cells += '<td><span class="aup-score">-</span></td>' | |
| # Avg AUP | |
| avg_aup = row.get('Avg_AUP', 0) | |
| rows_html += f'''<tr> | |
| <td class="rank-cell"><span class="rank-medal">{medal}</span></td> | |
| <td class="method-cell">{method_html}</td> | |
| <td class="type-cell"><span class="type-badge {type_class}">{type_display}</span></td> | |
| <td class="foundation-cell"><span class="foundation-badge {foundation_class}">{foundation}</span></td> | |
| {task_cells} | |
| <td class="avg-cell"><span class="aup-score">{avg_aup:.1f}</span></td> | |
| </tr>''' | |
| task_headers = ''.join(f'<th>{t}</th>' for t in tasks) | |
| return f''' | |
| {sort_table_js} | |
| <div class="table-wrapper"> | |
| <table class="leaderboard-table"> | |
| <thead><tr> | |
| <th>Rank</th><th>Method</th><th>Type</th><th>Foundation Model</th> | |
| {task_headers} | |
| <th>Avg AUP</th> | |
| </tr></thead> | |
| <tbody>{rows_html}</tbody> | |
| </table> | |
| </div> | |
| ''' | |
| def update_charts(top_n): | |
| df, tasks, raw_data = get_leaderboard_df(), get_tasks(), get_raw_data() | |
| return create_radar_chart(df, tasks, top_n), create_group_bar_chart(df, tasks, top_n), create_aup_curve_chart(raw_data, tasks, df, top_n) | |
| # Load data | |
| df, tasks, raw_data = get_leaderboard_df(), get_tasks(), get_raw_data() | |
| default_top_n = min(15, len(df)) | |
| with gr.Blocks(css=custom_css, title="dLLM Leaderboard", fill_height=False) as demo: | |
| gr.HTML(''' | |
| <div class="welcome-banner"> | |
| <h2>π«§ Welcome to dLLM Leaderboard! π</h2> | |
| <p>Benchmarking various Diffusion Large Language Models (dLLMs) with <i><a href="https://hao-ai-lab.github.io/blogs/text-diffusion/" target="_blank" style="color: inherit; text-decoration: underline;">AUP (Accuracy Under Parallelism)</a></i>, considering both accuracy and parallelism.</p> | |
| </div> | |
| ''') | |
| with gr.Tabs(): | |
| with gr.TabItem("π Leaderboard"): | |
| with gr.Row(): | |
| top_n_slider = gr.Slider(minimum=3, maximum=len(df), value=default_top_n, step=1, | |
| label="Number of Top Methods to Display") | |
| with gr.Row(): | |
| radar_plot = gr.Plot(value=create_radar_chart(df, tasks, default_top_n)) | |
| with gr.Row(): | |
| bar_plot = gr.Plot(value=create_group_bar_chart(df, tasks, default_top_n)) | |
| with gr.Row(): | |
| curve_plot = gr.Plot(value=create_aup_curve_chart(raw_data, tasks, df, default_top_n)) | |
| top_n_slider.change(fn=update_charts, inputs=[top_n_slider], outputs=[radar_plot, bar_plot, curve_plot]) | |
| gr.Markdown("### π Detailed Leaderboard") | |
| gr.HTML(create_leaderboard_html(df, tasks)) | |
| gr.HTML(CITATION_HTML) | |
| with gr.TabItem("π€ Submit Result"): | |
| gr.HTML(""" | |
| <div class="content-wrapper"> | |
| <div style="max-width: 800px; margin: 0 auto; padding: 20px; box-sizing: border-box;"> | |
| <h2>Submit Your Results</h2> | |
| <p>We welcome contributions to the dLLM Leaderboard! To submit your method's results:</p> | |
| <h3>Step 1: Evaluate Your Method</h3> | |
| <p>Follow the evaluation protocol in the <a href="https://github.com/hao-ai-lab/d3LLM" target="_blank">d3LLM repository</a>. | |
| Refer to the <code>eval_scripts</code> folder for benchmark evaluation scripts, and <code>AUP_leaderboard</code> folder for AUP calculation utilities.</p> | |
| <h3>Step 2: Prepare Your Evaluation Results</h3> | |
| <p>Add your results to the appropriate YAML file following this format:</p> | |
| <pre style="background: #f5f5f5; padding: 15px; border-radius: 8px; overflow-x: auto; white-space: pre-wrap; word-wrap: break-word;">_meta: | |
| YourMethod: | |
| type: dLLM # or AR | |
| foundation: YourFoundation | |
| link: https://link/to/your/method | |
| TaskName: | |
| YourMethod: | |
| - [rho_1, accuracy_1] # (parallelism, accuracy) pairs | |
| - [rho_2, accuracy_2]</pre> | |
| <h3>Step 3: Submit a Pull Request</h3> | |
| <ol> | |
| <li>Fork the repository</li> | |
| <li>Add your results to the YAML files</li> | |
| <li>Submit a PR with your method name, description, and evaluation details</li> | |
| </ol> | |
| <p><strong>Questions?</strong> Open an issue on <a href="https://github.com/hao-ai-lab/d3LLM/issues" target="_blank">GitHub</a>.</p> | |
| </div> | |
| </div> | |
| """ + CITATION_HTML) | |
| with gr.TabItem("βΉοΈ About"): | |
| gr.HTML(""" | |
| <div class="content-wrapper"> | |
| <div style="max-width: 800px; margin: 0 auto; padding: 20px; box-sizing: border-box;"> | |
| <h2>About dLLM Leaderboard</h2> | |
| <p>This leaderboard evaluates <strong>Diffusion Large Language Models (dLLMs)</strong> using the <strong>AUP (Accuracy Under Parallelism)</strong> metric.</p> | |
| <h3>Metrics</h3> | |
| <ul> | |
| <li><strong>AUP</strong>: Primary metric - measures efficiency-accuracy trade-off (higher is better)</li> | |
| <li><strong>TPF</strong>: Tokens Per Forward - parallelism level achieved</li> | |
| <li><strong>Acc</strong>: Accuracy at maximum parallelism</li> | |
| </ul> | |
| <h3>Benchmarks</h3> | |
| <p>GSM8K-CoT, MATH, HumanEval, MBPP, Long-GSM8K</p> | |
| <h3>References</h3> | |
| <p> | |
| GitHub Code Repo: <a href="https://github.com/hao-ai-lab/d3LLM" target="_blank">https://github.com/hao-ai-lab/d3LLM</a><br> | |
| Blog: <a href="https://hao-ai-lab.github.io/blogs/text-diffusion/" target="_blank">https://hao-ai-lab.github.io/blogs/text-diffusion/</a> | |
| </p> | |
| </div> | |
| </div> | |
| """ + CITATION_HTML) | |
| demo.launch() | |