d3LLM-Data-LLaDA's picture
Initial commit
d473371
import gradio as gr
import pandas as pd
from src.leaderboard.read_evals import get_leaderboard_df, get_tasks, get_raw_data
from src.display.visualization import create_radar_chart, create_group_bar_chart, create_aup_curve_chart
from src.display.css_html_js import custom_css, sort_table_js, get_foundation_class
CITATION_HTML = """
<div style="max-width: 800px; margin: 30px auto 0 auto; padding: 20px; background: #f8f7ff; border-radius: 12px; border-left: 4px solid #5a3d8a;">
<p style="margin: 0 0 12px 0; color: #5a3d8a; font-weight: 600;">πŸ“ If you find this Leaderboard useful for your research, please star <a href="https://github.com/hao-ai-lab/d3llm" target="_blank" style="color: #5a3d8a;">our GitHub repo</a> and cite our work:</p>
<pre style="background: #fff; padding: 15px; border-radius: 8px; overflow-x: auto; font-size: 12px; margin: 0; color: #333; white-space: pre-wrap; word-wrap: break-word;">@article{preprint'25:d3llm,
author = {Yu-Yang Qian and Junda Su and Lanxiang Hu and Peiyuan Zhang and Zhijie Deng and Peng Zhao and Hao Zhang},
title = {d3LLM: Ultra-Fast Diffusion LLM using Pseudo-Trajectory Distillation},
journal = {ArXiv preprint},
volume = {to appear},
note = {\\url{https://github.com/hao-ai-lab/d3LLM} [Accessed: 2025-12-11]},
year = {2025}
}</pre>
</div>
"""
def create_leaderboard_html(df, tasks):
"""Generate HTML table for detailed results."""
rows_html = ""
for rank, (_, row) in enumerate(df.iterrows(), 1):
medal = f'<span class="top-medal">{["πŸ₯‡", "πŸ₯ˆ", "πŸ₯‰"][rank-1]}</span>' if rank <= 3 else str(rank)
# Method with link
method = row['Method']
link = row.get('Link', '')
method_html = f'<a href="{link}" target="_blank">{method}</a>' if link else method
# Type badge
type_val = row.get('Type', '?')
type_display = 'dLLM' if type_val == 'dLLM' else type_val
type_class = 'ar' if type_val == 'AR' else 'dllm'
# Foundation badge
foundation = row.get('Foundation', '?')
foundation_class = get_foundation_class(foundation)
# Build cells for each task
task_cells = ""
for task in tasks:
aup = row.get(f'{task}_AUP')
tpf = row.get(f'{task}_TPF')
acc = row.get(f'{task}_Acc')
if pd.notna(aup):
task_cells += f'''<td>
<span class="aup-score">{aup:.1f}</span>
<span class="sub-metrics">TPF:{tpf:.2f} Acc:{acc:.1f}</span>
</td>'''
else:
task_cells += '<td><span class="aup-score">-</span></td>'
# Avg AUP
avg_aup = row.get('Avg_AUP', 0)
rows_html += f'''<tr>
<td class="rank-cell"><span class="rank-medal">{medal}</span></td>
<td class="method-cell">{method_html}</td>
<td class="type-cell"><span class="type-badge {type_class}">{type_display}</span></td>
<td class="foundation-cell"><span class="foundation-badge {foundation_class}">{foundation}</span></td>
{task_cells}
<td class="avg-cell"><span class="aup-score">{avg_aup:.1f}</span></td>
</tr>'''
task_headers = ''.join(f'<th>{t}</th>' for t in tasks)
return f'''
{sort_table_js}
<div class="table-wrapper">
<table class="leaderboard-table">
<thead><tr>
<th>Rank</th><th>Method</th><th>Type</th><th>Foundation Model</th>
{task_headers}
<th>Avg AUP</th>
</tr></thead>
<tbody>{rows_html}</tbody>
</table>
</div>
'''
def update_charts(top_n):
df, tasks, raw_data = get_leaderboard_df(), get_tasks(), get_raw_data()
return create_radar_chart(df, tasks, top_n), create_group_bar_chart(df, tasks, top_n), create_aup_curve_chart(raw_data, tasks, df, top_n)
# Load data
df, tasks, raw_data = get_leaderboard_df(), get_tasks(), get_raw_data()
default_top_n = min(15, len(df))
with gr.Blocks(css=custom_css, title="dLLM Leaderboard", fill_height=False) as demo:
gr.HTML('''
<div class="welcome-banner">
<h2>🫧 Welcome to dLLM Leaderboard! πŸ†</h2>
<p>Benchmarking various Diffusion Large Language Models (dLLMs) with <i><a href="https://hao-ai-lab.github.io/blogs/text-diffusion/" target="_blank" style="color: inherit; text-decoration: underline;">AUP (Accuracy Under Parallelism)</a></i>, considering both accuracy and parallelism.</p>
</div>
''')
with gr.Tabs():
with gr.TabItem("πŸ“Š Leaderboard"):
with gr.Row():
top_n_slider = gr.Slider(minimum=3, maximum=len(df), value=default_top_n, step=1,
label="Number of Top Methods to Display")
with gr.Row():
radar_plot = gr.Plot(value=create_radar_chart(df, tasks, default_top_n))
with gr.Row():
bar_plot = gr.Plot(value=create_group_bar_chart(df, tasks, default_top_n))
with gr.Row():
curve_plot = gr.Plot(value=create_aup_curve_chart(raw_data, tasks, df, default_top_n))
top_n_slider.change(fn=update_charts, inputs=[top_n_slider], outputs=[radar_plot, bar_plot, curve_plot])
gr.Markdown("### πŸ† Detailed Leaderboard")
gr.HTML(create_leaderboard_html(df, tasks))
gr.HTML(CITATION_HTML)
with gr.TabItem("πŸ“€ Submit Result"):
gr.HTML("""
<div class="content-wrapper">
<div style="max-width: 800px; margin: 0 auto; padding: 20px; box-sizing: border-box;">
<h2>Submit Your Results</h2>
<p>We welcome contributions to the dLLM Leaderboard! To submit your method's results:</p>
<h3>Step 1: Evaluate Your Method</h3>
<p>Follow the evaluation protocol in the <a href="https://github.com/hao-ai-lab/d3LLM" target="_blank">d3LLM repository</a>.
Refer to the <code>eval_scripts</code> folder for benchmark evaluation scripts, and <code>AUP_leaderboard</code> folder for AUP calculation utilities.</p>
<h3>Step 2: Prepare Your Evaluation Results</h3>
<p>Add your results to the appropriate YAML file following this format:</p>
<pre style="background: #f5f5f5; padding: 15px; border-radius: 8px; overflow-x: auto; white-space: pre-wrap; word-wrap: break-word;">_meta:
YourMethod:
type: dLLM # or AR
foundation: YourFoundation
link: https://link/to/your/method
TaskName:
YourMethod:
- [rho_1, accuracy_1] # (parallelism, accuracy) pairs
- [rho_2, accuracy_2]</pre>
<h3>Step 3: Submit a Pull Request</h3>
<ol>
<li>Fork the repository</li>
<li>Add your results to the YAML files</li>
<li>Submit a PR with your method name, description, and evaluation details</li>
</ol>
<p><strong>Questions?</strong> Open an issue on <a href="https://github.com/hao-ai-lab/d3LLM/issues" target="_blank">GitHub</a>.</p>
</div>
</div>
""" + CITATION_HTML)
with gr.TabItem("ℹ️ About"):
gr.HTML("""
<div class="content-wrapper">
<div style="max-width: 800px; margin: 0 auto; padding: 20px; box-sizing: border-box;">
<h2>About dLLM Leaderboard</h2>
<p>This leaderboard evaluates <strong>Diffusion Large Language Models (dLLMs)</strong> using the <strong>AUP (Accuracy Under Parallelism)</strong> metric.</p>
<h3>Metrics</h3>
<ul>
<li><strong>AUP</strong>: Primary metric - measures efficiency-accuracy trade-off (higher is better)</li>
<li><strong>TPF</strong>: Tokens Per Forward - parallelism level achieved</li>
<li><strong>Acc</strong>: Accuracy at maximum parallelism</li>
</ul>
<h3>Benchmarks</h3>
<p>GSM8K-CoT, MATH, HumanEval, MBPP, Long-GSM8K</p>
<h3>References</h3>
<p>
GitHub Code Repo: <a href="https://github.com/hao-ai-lab/d3LLM" target="_blank">https://github.com/hao-ai-lab/d3LLM</a><br>
Blog: <a href="https://hao-ai-lab.github.io/blogs/text-diffusion/" target="_blank">https://hao-ai-lab.github.io/blogs/text-diffusion/</a>
</p>
</div>
</div>
""" + CITATION_HTML)
demo.launch()