Spaces:
Running
Running
File size: 9,027 Bytes
d473371 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 |
import gradio as gr
import pandas as pd
from src.leaderboard.read_evals import get_leaderboard_df, get_tasks, get_raw_data
from src.display.visualization import create_radar_chart, create_group_bar_chart, create_aup_curve_chart
from src.display.css_html_js import custom_css, sort_table_js, get_foundation_class
CITATION_HTML = """
<div style="max-width: 800px; margin: 30px auto 0 auto; padding: 20px; background: #f8f7ff; border-radius: 12px; border-left: 4px solid #5a3d8a;">
<p style="margin: 0 0 12px 0; color: #5a3d8a; font-weight: 600;">π If you find this Leaderboard useful for your research, please star <a href="https://github.com/hao-ai-lab/d3llm" target="_blank" style="color: #5a3d8a;">our GitHub repo</a> and cite our work:</p>
<pre style="background: #fff; padding: 15px; border-radius: 8px; overflow-x: auto; font-size: 12px; margin: 0; color: #333; white-space: pre-wrap; word-wrap: break-word;">@article{preprint'25:d3llm,
author = {Yu-Yang Qian and Junda Su and Lanxiang Hu and Peiyuan Zhang and Zhijie Deng and Peng Zhao and Hao Zhang},
title = {d3LLM: Ultra-Fast Diffusion LLM using Pseudo-Trajectory Distillation},
journal = {ArXiv preprint},
volume = {to appear},
note = {\\url{https://github.com/hao-ai-lab/d3LLM} [Accessed: 2025-12-11]},
year = {2025}
}</pre>
</div>
"""
def create_leaderboard_html(df, tasks):
"""Generate HTML table for detailed results."""
rows_html = ""
for rank, (_, row) in enumerate(df.iterrows(), 1):
medal = f'<span class="top-medal">{["π₯", "π₯", "π₯"][rank-1]}</span>' if rank <= 3 else str(rank)
# Method with link
method = row['Method']
link = row.get('Link', '')
method_html = f'<a href="{link}" target="_blank">{method}</a>' if link else method
# Type badge
type_val = row.get('Type', '?')
type_display = 'dLLM' if type_val == 'dLLM' else type_val
type_class = 'ar' if type_val == 'AR' else 'dllm'
# Foundation badge
foundation = row.get('Foundation', '?')
foundation_class = get_foundation_class(foundation)
# Build cells for each task
task_cells = ""
for task in tasks:
aup = row.get(f'{task}_AUP')
tpf = row.get(f'{task}_TPF')
acc = row.get(f'{task}_Acc')
if pd.notna(aup):
task_cells += f'''<td>
<span class="aup-score">{aup:.1f}</span>
<span class="sub-metrics">TPF:{tpf:.2f} Acc:{acc:.1f}</span>
</td>'''
else:
task_cells += '<td><span class="aup-score">-</span></td>'
# Avg AUP
avg_aup = row.get('Avg_AUP', 0)
rows_html += f'''<tr>
<td class="rank-cell"><span class="rank-medal">{medal}</span></td>
<td class="method-cell">{method_html}</td>
<td class="type-cell"><span class="type-badge {type_class}">{type_display}</span></td>
<td class="foundation-cell"><span class="foundation-badge {foundation_class}">{foundation}</span></td>
{task_cells}
<td class="avg-cell"><span class="aup-score">{avg_aup:.1f}</span></td>
</tr>'''
task_headers = ''.join(f'<th>{t}</th>' for t in tasks)
return f'''
{sort_table_js}
<div class="table-wrapper">
<table class="leaderboard-table">
<thead><tr>
<th>Rank</th><th>Method</th><th>Type</th><th>Foundation Model</th>
{task_headers}
<th>Avg AUP</th>
</tr></thead>
<tbody>{rows_html}</tbody>
</table>
</div>
'''
def update_charts(top_n):
df, tasks, raw_data = get_leaderboard_df(), get_tasks(), get_raw_data()
return create_radar_chart(df, tasks, top_n), create_group_bar_chart(df, tasks, top_n), create_aup_curve_chart(raw_data, tasks, df, top_n)
# Load data
df, tasks, raw_data = get_leaderboard_df(), get_tasks(), get_raw_data()
default_top_n = min(15, len(df))
with gr.Blocks(css=custom_css, title="dLLM Leaderboard", fill_height=False) as demo:
gr.HTML('''
<div class="welcome-banner">
<h2>π«§ Welcome to dLLM Leaderboard! π</h2>
<p>Benchmarking various Diffusion Large Language Models (dLLMs) with <i><a href="https://hao-ai-lab.github.io/blogs/text-diffusion/" target="_blank" style="color: inherit; text-decoration: underline;">AUP (Accuracy Under Parallelism)</a></i>, considering both accuracy and parallelism.</p>
</div>
''')
with gr.Tabs():
with gr.TabItem("π Leaderboard"):
with gr.Row():
top_n_slider = gr.Slider(minimum=3, maximum=len(df), value=default_top_n, step=1,
label="Number of Top Methods to Display")
with gr.Row():
radar_plot = gr.Plot(value=create_radar_chart(df, tasks, default_top_n))
with gr.Row():
bar_plot = gr.Plot(value=create_group_bar_chart(df, tasks, default_top_n))
with gr.Row():
curve_plot = gr.Plot(value=create_aup_curve_chart(raw_data, tasks, df, default_top_n))
top_n_slider.change(fn=update_charts, inputs=[top_n_slider], outputs=[radar_plot, bar_plot, curve_plot])
gr.Markdown("### π Detailed Leaderboard")
gr.HTML(create_leaderboard_html(df, tasks))
gr.HTML(CITATION_HTML)
with gr.TabItem("π€ Submit Result"):
gr.HTML("""
<div class="content-wrapper">
<div style="max-width: 800px; margin: 0 auto; padding: 20px; box-sizing: border-box;">
<h2>Submit Your Results</h2>
<p>We welcome contributions to the dLLM Leaderboard! To submit your method's results:</p>
<h3>Step 1: Evaluate Your Method</h3>
<p>Follow the evaluation protocol in the <a href="https://github.com/hao-ai-lab/d3LLM" target="_blank">d3LLM repository</a>.
Refer to the <code>eval_scripts</code> folder for benchmark evaluation scripts, and <code>AUP_leaderboard</code> folder for AUP calculation utilities.</p>
<h3>Step 2: Prepare Your Evaluation Results</h3>
<p>Add your results to the appropriate YAML file following this format:</p>
<pre style="background: #f5f5f5; padding: 15px; border-radius: 8px; overflow-x: auto; white-space: pre-wrap; word-wrap: break-word;">_meta:
YourMethod:
type: dLLM # or AR
foundation: YourFoundation
link: https://link/to/your/method
TaskName:
YourMethod:
- [rho_1, accuracy_1] # (parallelism, accuracy) pairs
- [rho_2, accuracy_2]</pre>
<h3>Step 3: Submit a Pull Request</h3>
<ol>
<li>Fork the repository</li>
<li>Add your results to the YAML files</li>
<li>Submit a PR with your method name, description, and evaluation details</li>
</ol>
<p><strong>Questions?</strong> Open an issue on <a href="https://github.com/hao-ai-lab/d3LLM/issues" target="_blank">GitHub</a>.</p>
</div>
</div>
""" + CITATION_HTML)
with gr.TabItem("βΉοΈ About"):
gr.HTML("""
<div class="content-wrapper">
<div style="max-width: 800px; margin: 0 auto; padding: 20px; box-sizing: border-box;">
<h2>About dLLM Leaderboard</h2>
<p>This leaderboard evaluates <strong>Diffusion Large Language Models (dLLMs)</strong> using the <strong>AUP (Accuracy Under Parallelism)</strong> metric.</p>
<h3>Metrics</h3>
<ul>
<li><strong>AUP</strong>: Primary metric - measures efficiency-accuracy trade-off (higher is better)</li>
<li><strong>TPF</strong>: Tokens Per Forward - parallelism level achieved</li>
<li><strong>Acc</strong>: Accuracy at maximum parallelism</li>
</ul>
<h3>Benchmarks</h3>
<p>GSM8K-CoT, MATH, HumanEval, MBPP, Long-GSM8K</p>
<h3>References</h3>
<p>
GitHub Code Repo: <a href="https://github.com/hao-ai-lab/d3LLM" target="_blank">https://github.com/hao-ai-lab/d3LLM</a><br>
Blog: <a href="https://hao-ai-lab.github.io/blogs/text-diffusion/" target="_blank">https://hao-ai-lab.github.io/blogs/text-diffusion/</a>
</p>
</div>
</div>
""" + CITATION_HTML)
demo.launch()
|