Spaces:

d3LLM
/

dLLM_Leaderboard

Running

App Files Files Community

dLLM_Leaderboard / app.py

d3LLM-Data-LLaDA

Initial commit

d473371 4 days ago

raw

history blame contribute delete

9.03 kB

	import gradio as gr
	import pandas as pd
	from src.leaderboard.read_evals import get_leaderboard_df, get_tasks, get_raw_data
	from src.display.visualization import create_radar_chart, create_group_bar_chart, create_aup_curve_chart
	from src.display.css_html_js import custom_css, sort_table_js, get_foundation_class

	CITATION_HTML = """
	<div style="max-width: 800px; margin: 30px auto 0 auto; padding: 20px; background: #f8f7ff; border-radius: 12px; border-left: 4px solid #5a3d8a;">
	<p style="margin: 0 0 12px 0; color: #5a3d8a; font-weight: 600;">📝 If you find this Leaderboard useful for your research, please star <a href="https://github.com/hao-ai-lab/d3llm" target="_blank" style="color: #5a3d8a;">our GitHub repo</a> and cite our work:</p>
	<pre style="background: #fff; padding: 15px; border-radius: 8px; overflow-x: auto; font-size: 12px; margin: 0; color: #333; white-space: pre-wrap; word-wrap: break-word;">@article{preprint'25:d3llm,
	author = {Yu-Yang Qian and Junda Su and Lanxiang Hu and Peiyuan Zhang and Zhijie Deng and Peng Zhao and Hao Zhang},
	title = {d3LLM: Ultra-Fast Diffusion LLM using Pseudo-Trajectory Distillation},
	journal = {ArXiv preprint},
	volume = {to appear},
	note = {\\url{https://github.com/hao-ai-lab/d3LLM} [Accessed: 2025-12-11]},
	year = {2025}
	}</pre>
	</div>
	"""

	def create_leaderboard_html(df, tasks):
	"""Generate HTML table for detailed results."""
	rows_html = ""
	for rank, (_, row) in enumerate(df.iterrows(), 1):
	medal = f'<span class="top-medal">{["🥇", "🥈", "🥉"][rank-1]}</span>' if rank <= 3 else str(rank)

	# Method with link
	method = row['Method']
	link = row.get('Link', '')
	method_html = f'<a href="{link}" target="_blank">{method}</a>' if link else method

	# Type badge
	type_val = row.get('Type', '?')
	type_display = 'dLLM' if type_val == 'dLLM' else type_val
	type_class = 'ar' if type_val == 'AR' else 'dllm'

	# Foundation badge
	foundation = row.get('Foundation', '?')
	foundation_class = get_foundation_class(foundation)

	# Build cells for each task
	task_cells = ""
	for task in tasks:
	aup = row.get(f'{task}_AUP')
	tpf = row.get(f'{task}_TPF')
	acc = row.get(f'{task}_Acc')
	if pd.notna(aup):
	task_cells += f'''<td>
	<span class="aup-score">{aup:.1f}</span>
	<span class="sub-metrics">TPF:{tpf:.2f} Acc:{acc:.1f}</span>
	</td>'''
	else:
	task_cells += '<td><span class="aup-score">-</span></td>'

	# Avg AUP
	avg_aup = row.get('Avg_AUP', 0)

	rows_html += f'''<tr>
	<td class="rank-cell"><span class="rank-medal">{medal}</span></td>
	<td class="method-cell">{method_html}</td>
	<td class="type-cell"><span class="type-badge {type_class}">{type_display}</span></td>
	<td class="foundation-cell"><span class="foundation-badge {foundation_class}">{foundation}</span></td>
	{task_cells}
	<td class="avg-cell"><span class="aup-score">{avg_aup:.1f}</span></td>
	</tr>'''

	task_headers = ''.join(f'<th>{t}</th>' for t in tasks)

	return f'''
	{sort_table_js}
	<div class="table-wrapper">
	<table class="leaderboard-table">
	<thead><tr>
	<th>Rank</th><th>Method</th><th>Type</th><th>Foundation Model</th>
	{task_headers}
	<th>Avg AUP</th>
	</tr></thead>
	<tbody>{rows_html}</tbody>
	</table>
	</div>
	'''

	def update_charts(top_n):
	df, tasks, raw_data = get_leaderboard_df(), get_tasks(), get_raw_data()
	return create_radar_chart(df, tasks, top_n), create_group_bar_chart(df, tasks, top_n), create_aup_curve_chart(raw_data, tasks, df, top_n)

	# Load data
	df, tasks, raw_data = get_leaderboard_df(), get_tasks(), get_raw_data()
	default_top_n = min(15, len(df))

	with gr.Blocks(css=custom_css, title="dLLM Leaderboard", fill_height=False) as demo:
	gr.HTML('''
	<div class="welcome-banner">
	<h2>🫧 Welcome to dLLM Leaderboard! 🏆</h2>
	<p>Benchmarking various Diffusion Large Language Models (dLLMs) with <i><a href="https://hao-ai-lab.github.io/blogs/text-diffusion/" target="_blank" style="color: inherit; text-decoration: underline;">AUP (Accuracy Under Parallelism)</a></i>, considering both accuracy and parallelism.</p>
	</div>
	''')

	with gr.Tabs():
	with gr.TabItem("📊 Leaderboard"):
	with gr.Row():
	top_n_slider = gr.Slider(minimum=3, maximum=len(df), value=default_top_n, step=1,
	label="Number of Top Methods to Display")

	with gr.Row():
	radar_plot = gr.Plot(value=create_radar_chart(df, tasks, default_top_n))
	with gr.Row():
	bar_plot = gr.Plot(value=create_group_bar_chart(df, tasks, default_top_n))
	with gr.Row():
	curve_plot = gr.Plot(value=create_aup_curve_chart(raw_data, tasks, df, default_top_n))

	top_n_slider.change(fn=update_charts, inputs=[top_n_slider], outputs=[radar_plot, bar_plot, curve_plot])

	gr.Markdown("### 🏆 Detailed Leaderboard")
	gr.HTML(create_leaderboard_html(df, tasks))
	gr.HTML(CITATION_HTML)

	with gr.TabItem("📤 Submit Result"):
	gr.HTML("""
	<div class="content-wrapper">
	<div style="max-width: 800px; margin: 0 auto; padding: 20px; box-sizing: border-box;">
	<h2>Submit Your Results</h2>
	<p>We welcome contributions to the dLLM Leaderboard! To submit your method's results:</p>

	<h3>Step 1: Evaluate Your Method</h3>
	<p>Follow the evaluation protocol in the <a href="https://github.com/hao-ai-lab/d3LLM" target="_blank">d3LLM repository</a>.
	Refer to the <code>eval_scripts</code> folder for benchmark evaluation scripts, and <code>AUP_leaderboard</code> folder for AUP calculation utilities.</p>

	<h3>Step 2: Prepare Your Evaluation Results</h3>
	<p>Add your results to the appropriate YAML file following this format:</p>
	<pre style="background: #f5f5f5; padding: 15px; border-radius: 8px; overflow-x: auto; white-space: pre-wrap; word-wrap: break-word;">_meta:
	YourMethod:
	type: dLLM # or AR
	foundation: YourFoundation
	link: https://link/to/your/method

	TaskName:
	YourMethod:
	- [rho_1, accuracy_1] # (parallelism, accuracy) pairs
	- [rho_2, accuracy_2]</pre>

	<h3>Step 3: Submit a Pull Request</h3>
	<ol>
	<li>Fork the repository</li>
	<li>Add your results to the YAML files</li>
	<li>Submit a PR with your method name, description, and evaluation details</li>
	</ol>

	<p><strong>Questions?</strong> Open an issue on <a href="https://github.com/hao-ai-lab/d3LLM/issues" target="_blank">GitHub</a>.</p>
	</div>
	</div>
	""" + CITATION_HTML)

	with gr.TabItem("ℹ️ About"):
	gr.HTML("""
	<div class="content-wrapper">
	<div style="max-width: 800px; margin: 0 auto; padding: 20px; box-sizing: border-box;">
	<h2>About dLLM Leaderboard</h2>
	<p>This leaderboard evaluates <strong>Diffusion Large Language Models (dLLMs)</strong> using the <strong>AUP (Accuracy Under Parallelism)</strong> metric.</p>

	<h3>Metrics</h3>
	<ul>
	<li><strong>AUP</strong>: Primary metric - measures efficiency-accuracy trade-off (higher is better)</li>
	<li><strong>TPF</strong>: Tokens Per Forward - parallelism level achieved</li>
	<li><strong>Acc</strong>: Accuracy at maximum parallelism</li>
	</ul>

	<h3>Benchmarks</h3>
	<p>GSM8K-CoT, MATH, HumanEval, MBPP, Long-GSM8K</p>

	<h3>References</h3>
	<p>
	GitHub Code Repo: <a href="https://github.com/hao-ai-lab/d3LLM" target="_blank">https://github.com/hao-ai-lab/d3LLM</a><br>
	Blog: <a href="https://hao-ai-lab.github.io/blogs/text-diffusion/" target="_blank">https://hao-ai-lab.github.io/blogs/text-diffusion/</a>
	</p>
	</div>
	</div>
	""" + CITATION_HTML)

	demo.launch()