Mike0021's picture
Hide inactive Gradio status overlays
cc457e9 verified
Raw
History Blame Contribute Delete
44.6 kB
import os
os.environ.setdefault("HF_HOME", "/tmp/huggingface")
os.environ.setdefault("HF_MODULES_CACHE", "/tmp/hf_modules")
os.environ.setdefault("MPLCONFIGDIR", "/tmp/matplotlib")
os.environ.setdefault("GRADIO_SSR_MODE", "false")
import html
import random
from statistics import mean
import gradio as gr
import plotly.graph_objects as go
TASKS = [
"GSM8K",
"MATH-500",
"AIME25",
"MBPP",
"HumanEval",
"LiveCodeBench",
"MT-Bench",
"Alpaca",
"Arena-Hard v2",
]
DOMAINS = {
"Math": ["GSM8K", "MATH-500", "AIME25"],
"Code": ["MBPP", "HumanEval", "LiveCodeBench"],
"Chat": ["MT-Bench", "Alpaca", "Arena-Hard v2"],
}
TARGETS = ["Qwen3-4B", "Qwen3-8B", "Qwen3-14B", "Gemma4-12B"]
METHODS = ["DSpark", "DFlash", "EAGLE-3"]
COLORS = {
"DSpark": "#14b8a6",
"DFlash": "#f97316",
"EAGLE-3": "#8b5cf6",
"Baseline": "#94a3b8",
}
ACCEPTANCE = {
"Qwen3-4B": {
"EAGLE-3": {
"GSM8K": 5.14,
"MATH-500": 4.62,
"AIME25": 3.92,
"MBPP": 3.69,
"HumanEval": 4.16,
"LiveCodeBench": 3.77,
"MT-Bench": 2.39,
"Alpaca": 2.26,
"Arena-Hard v2": 2.55,
},
"DFlash": {
"GSM8K": 5.40,
"MATH-500": 4.85,
"AIME25": 4.15,
"MBPP": 4.40,
"HumanEval": 4.74,
"LiveCodeBench": 4.18,
"MT-Bench": 3.07,
"Alpaca": 2.96,
"Arena-Hard v2": 2.83,
},
"DSpark": {
"GSM8K": 6.11,
"MATH-500": 5.70,
"AIME25": 4.89,
"MBPP": 5.13,
"HumanEval": 5.38,
"LiveCodeBench": 4.86,
"MT-Bench": 3.64,
"Alpaca": 3.54,
"Arena-Hard v2": 3.29,
},
},
"Qwen3-8B": {
"EAGLE-3": {
"GSM8K": 5.30,
"MATH-500": 4.77,
"AIME25": 3.91,
"MBPP": 3.96,
"HumanEval": 4.33,
"LiveCodeBench": 4.17,
"MT-Bench": 2.66,
"Alpaca": 2.54,
"Arena-Hard v2": 2.54,
},
"DFlash": {
"GSM8K": 5.33,
"MATH-500": 4.91,
"AIME25": 4.07,
"MBPP": 4.36,
"HumanEval": 4.64,
"LiveCodeBench": 4.39,
"MT-Bench": 3.11,
"Alpaca": 2.98,
"Arena-Hard v2": 2.81,
},
"DSpark": {
"GSM8K": 6.17,
"MATH-500": 5.78,
"AIME25": 5.01,
"MBPP": 5.16,
"HumanEval": 5.52,
"LiveCodeBench": 5.17,
"MT-Bench": 3.72,
"Alpaca": 3.58,
"Arena-Hard v2": 3.21,
},
},
"Qwen3-14B": {
"EAGLE-3": {
"GSM8K": 5.24,
"MATH-500": 4.60,
"AIME25": 3.71,
"MBPP": 3.81,
"HumanEval": 4.14,
"LiveCodeBench": 4.01,
"MT-Bench": 2.62,
"Alpaca": 2.47,
"Arena-Hard v2": 2.48,
},
"DFlash": {
"GSM8K": 5.41,
"MATH-500": 4.84,
"AIME25": 3.98,
"MBPP": 4.44,
"HumanEval": 4.59,
"LiveCodeBench": 4.33,
"MT-Bench": 3.10,
"Alpaca": 2.94,
"Arena-Hard v2": 2.72,
},
"DSpark": {
"GSM8K": 6.21,
"MATH-500": 5.74,
"AIME25": 4.94,
"MBPP": 5.26,
"HumanEval": 5.43,
"LiveCodeBench": 5.02,
"MT-Bench": 3.70,
"Alpaca": 3.58,
"Arena-Hard v2": 3.13,
},
},
"Gemma4-12B": {
"EAGLE-3": {
"GSM8K": 5.87,
"MATH-500": 5.46,
"AIME25": 4.83,
"MBPP": 4.72,
"HumanEval": 5.37,
"LiveCodeBench": 4.16,
"MT-Bench": 3.19,
"Alpaca": 3.06,
"Arena-Hard v2": 2.72,
},
"DFlash": {
"GSM8K": 5.45,
"MATH-500": 5.04,
"AIME25": 4.22,
"MBPP": 4.39,
"HumanEval": 4.95,
"LiveCodeBench": 3.70,
"MT-Bench": 2.98,
"Alpaca": 2.84,
"Arena-Hard v2": 2.59,
},
"DSpark": {
"GSM8K": 6.05,
"MATH-500": 5.78,
"AIME25": 5.12,
"MBPP": 5.11,
"HumanEval": 5.64,
"LiveCodeBench": 4.51,
"MT-Bench": 3.49,
"Alpaca": 3.35,
"Arena-Hard v2": 2.92,
},
},
}
MODELS = [
("DSpark", "Qwen3-4B", "deepseek-ai/dspark_qwen3_4b_block7", 1.393, "Qwen3DSparkModel", "block7", "5", "yes", "Markov rank 256"),
("DSpark", "Qwen3-8B", "deepseek-ai/dspark_qwen3_8b_block7", 2.371, "Qwen3DSparkModel", "block7", "5", "yes", "Markov rank 256"),
("DSpark", "Qwen3-14B", "deepseek-ai/dspark_qwen3_14b_block7", 3.416, "Qwen3DSparkModel", "block7", "5", "yes", "Markov rank 256"),
("DSpark", "Gemma4-12B", "deepseek-ai/dspark_gemma4_12b_block7", 3.430, "Gemma4DSparkModel", "block7", "5", "yes", "Markov rank 256"),
("DFlash", "Qwen3-4B", "deepseek-ai/dflash_qwen3_4b_block7", 1.315, "Qwen3DSparkModel", "block7", "5", "no", "parallel block"),
("DFlash", "Qwen3-8B", "deepseek-ai/dflash_qwen3_8b_block7", 2.293, "Qwen3DSparkModel", "block7", "5", "no", "parallel block"),
("DFlash", "Qwen3-14B", "deepseek-ai/dflash_qwen3_14b_block7", 3.338, "Qwen3DSparkModel", "block7", "5", "no", "parallel block"),
("DFlash", "Gemma4-12B", "deepseek-ai/dflash_gemma4_12b_block7", 3.296, "Gemma4DSparkModel", "block7", "5", "no", "parallel block"),
("EAGLE-3", "Qwen3-4B", "deepseek-ai/eagle3_qwen3_4b_ttt7", 0.927, "Qwen3Eagle3Model", "ttt7", "1", "no", "training-time test"),
("EAGLE-3", "Qwen3-8B", "deepseek-ai/eagle3_qwen3_8b_ttt7", 1.547, "Qwen3Eagle3Model", "ttt7", "1", "no", "training-time test"),
("EAGLE-3", "Qwen3-14B", "deepseek-ai/eagle3_qwen3_14b_ttt7", 2.054, "Qwen3Eagle3Model", "ttt7", "1", "no", "training-time test"),
("EAGLE-3", "Gemma4-12B", "deepseek-ai/eagle3_gemma4_12b_ttt7", 2.362, "Gemma4Eagle3Model", "ttt7", "1", "no", "training-time test"),
]
FAMILY_COPY = {
"DSpark": {
"tag": "semi-autoregressive",
"summary": "Parallel DFlash-style backbone plus a lightweight Markov head and confidence scheduler.",
"strength": "Best accepted length in the released table and designed for load-aware serving.",
"tradeoff": "More machinery than a pure block drafter.",
},
"DFlash": {
"tag": "parallel block diffusion",
"summary": "Predicts a full block in one pass with target-feature conditioning and KV injection.",
"strength": "Very low drafting latency and strong first-token accuracy.",
"tradeoff": "Suffix tokens decay because positions are predicted independently.",
},
"EAGLE-3": {
"tag": "autoregressive feature drafter",
"summary": "Uses training-time test and fused target features to improve classic EAGLE drafting.",
"strength": "Strong lossless speculative baseline with stable sequential dependency modeling.",
"tradeoff": "Drafting cost scales with lookahead length.",
},
}
LEXICON = {
"GSM8K": "therefore the total is because each group contributes remaining answer equals final".split(),
"MATH-500": "let x satisfy equation substitute simplify bound hence root value proof".split(),
"AIME25": "triangle integer modulo sequence polynomial area count radius answer".split(),
"MBPP": "def return list index loop condition append result function test".split(),
"HumanEval": "class function assert edge case input output sorted recursive".split(),
"LiveCodeBench": "stdin parse graph dp binary search modulo constraints optimize".split(),
"MT-Bench": "I would compare the tradeoff and explain the practical implication".split(),
"Alpaca": "Here is a concise response with steps context and caveats".split(),
"Arena-Hard v2": "The best answer balances reasoning specificity and directness".split(),
}
def pct_gain(new, old):
if not old:
return 0.0
return (new / old - 1.0) * 100.0
def domain_for_task(task):
for domain, tasks in DOMAINS.items():
if task in tasks:
return domain
return "Mixed"
def model_rows():
rows = []
for family, target, repo, params, arch, horizon, layers, confidence, seq in MODELS:
rows.append([family, target, repo, f"{params:.3f}B", arch, horizon, layers, confidence, seq])
return rows
def benchmark_rows(target):
rows = []
for task in TASKS:
row = [task, domain_for_task(task)]
for method in METHODS:
row.append(f"{ACCEPTANCE[target][method][task]:.2f}")
rows.append(row)
return rows
def method_tau(target, method, task):
return ACCEPTANCE[target][method][task]
def simulated_tps(tau, method, baseline_tps, load):
load_pressure = max(0.0, min(1.0, (load - 1.0) / 99.0))
overhead = {"DSpark": 0.11, "DFlash": 0.10, "EAGLE-3": 0.19}[method]
waste = {"DSpark": 0.06, "DFlash": 0.24, "EAGLE-3": 0.16}[method]
return baseline_tps * tau / (1.0 + overhead) * (1.0 - load_pressure * waste)
def apply_dark_plot_layout(fig):
fig.update_layout(
template="plotly_dark",
paper_bgcolor="#0b1220",
plot_bgcolor="#0f172a",
font=dict(color="#e5efff", family="Inter, ui-sans-serif, system-ui, sans-serif"),
title_font=dict(color="#f8fafc", size=18),
legend=dict(font=dict(color="#dbeafe")),
hoverlabel=dict(
bgcolor="#111827",
bordercolor="#475569",
font=dict(color="#f8fafc"),
),
)
fig.update_xaxes(
gridcolor="rgba(148, 163, 184, 0.18)",
zerolinecolor="rgba(148, 163, 184, 0.22)",
linecolor="rgba(148, 163, 184, 0.34)",
tickcolor="rgba(148, 163, 184, 0.34)",
title_font=dict(color="#dbeafe"),
tickfont=dict(color="#cbd5e1"),
)
fig.update_yaxes(
gridcolor="rgba(148, 163, 184, 0.18)",
zerolinecolor="rgba(148, 163, 184, 0.22)",
linecolor="rgba(148, 163, 184, 0.34)",
tickcolor="rgba(148, 163, 184, 0.34)",
title_font=dict(color="#dbeafe"),
tickfont=dict(color="#cbd5e1"),
)
fig.update_traces(
colorbar=dict(
tickfont=dict(color="#dbeafe"),
title_font=dict(color="#f8fafc"),
),
selector=dict(type="heatmap"),
)
return fig
def metric_cards(target, task, method, baseline_tps, load):
dspark = method_tau(target, "DSpark", task)
dflash = method_tau(target, "DFlash", task)
eagle = method_tau(target, "EAGLE-3", task)
best_base = max(dflash, eagle)
selected = method_tau(target, method, task)
calls_saved = (1.0 - 1.0 / selected) * 100.0
selected_tps = simulated_tps(selected, method, baseline_tps, load)
domain = domain_for_task(task)
return f"""
<div class="metric-grid">
<div class="metric-card accent-dspark">
<span>DSpark accepted length</span>
<strong>{dspark:.2f}</strong>
<small>{pct_gain(dspark, best_base):+.1f}% vs strongest baseline on {task}</small>
</div>
<div class="metric-card accent-orange">
<span>{method} simulated rate</span>
<strong>{selected_tps:.1f}</strong>
<small>tokens/sec from a {baseline_tps:.1f} baseline input</small>
</div>
<div class="metric-card accent-violet">
<span>Target calls avoided</span>
<strong>{calls_saved:.1f}%</strong>
<small>estimated from accepted length tau={selected:.2f}</small>
</div>
<div class="metric-card accent-blue">
<span>Benchmark profile</span>
<strong>{domain}</strong>
<small>EAGLE-3 {eagle:.2f} / DFlash {dflash:.2f} / DSpark {dspark:.2f}</small>
</div>
</div>
"""
def acceptance_bar(target, task):
values = [method_tau(target, method, task) for method in METHODS]
fig = go.Figure()
fig.add_bar(
x=METHODS,
y=values,
marker_color=[COLORS[method] for method in METHODS],
text=[f"{v:.2f}" for v in values],
textfont=dict(color="#f8fafc", size=13),
textposition="outside",
hovertemplate="%{x}<br>Accepted length: %{y:.2f}<extra></extra>",
)
fig.update_layout(
title=f"Accepted length per verification round on {target} / {task}",
yaxis_title="Accepted length, including target bonus token",
xaxis_title="Draft family",
height=360,
margin=dict(l=35, r=20, t=55, b=35),
yaxis=dict(gridcolor="rgba(148,163,184,0.18)", range=[0, max(values) + 1.0]),
)
return apply_dark_plot_layout(fig)
def acceptance_heatmap(target):
z = [[method_tau(target, method, task) for task in TASKS] for method in METHODS]
fig = go.Figure(
data=go.Heatmap(
z=z,
x=TASKS,
y=METHODS,
colorscale=[
[0.0, "#0b1220"],
[0.35, "#1e3a8a"],
[0.68, "#0f766e"],
[1.0, "#9a3412"],
],
text=[[f"{v:.2f}" for v in row] for row in z],
texttemplate="%{text}",
hovertemplate="%{y}<br>%{x}: %{z:.2f}<extra></extra>",
colorbar=dict(title="tau"),
)
)
fig.update_layout(
title=f"DeepSpec Table 1 matrix for {target}",
height=405,
margin=dict(l=75, r=25, t=55, b=70),
xaxis=dict(tickangle=-30),
)
return apply_dark_plot_layout(fig)
def production_plot():
fig = go.Figure()
fig.add_trace(
go.Scatter(
x=[80, 120],
y=[51, 661],
mode="lines+markers+text",
name="V4-Flash",
text=["+51%", "+661%"],
textposition="top center",
line=dict(color="#14b8a6", width=3),
marker=dict(size=12),
textfont=dict(color="#f8fafc"),
hovertemplate="V4-Flash SLA %{x} tok/s/user<br>Throughput uplift %{y}%<extra></extra>",
)
)
fig.add_trace(
go.Scatter(
x=[35, 50],
y=[52, 406],
mode="lines+markers+text",
name="V4-Pro",
text=["+52%", "+406%"],
textposition="top center",
line=dict(color="#f97316", width=3),
marker=dict(size=12),
textfont=dict(color="#f8fafc"),
hovertemplate="V4-Pro SLA %{x} tok/s/user<br>Throughput uplift %{y}%<extra></extra>",
)
)
fig.update_layout(
title="Production DSpark frontier reported for DeepSeek-V4",
xaxis_title="Interactivity SLA anchor, tok/s/user",
yaxis_title="Aggregate throughput uplift vs MTP-1",
height=380,
margin=dict(l=45, r=25, t=55, b=45),
yaxis=dict(gridcolor="rgba(148,163,184,0.18)"),
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0),
)
return apply_dark_plot_layout(fig)
def inventory_plot():
fig = go.Figure()
for method in METHODS:
xs = [target for fam, target, *_ in MODELS if fam == method]
ys = [params for fam, _target, _repo, params, *_rest in MODELS if fam == method]
repos = [repo for fam, _target, repo, *_ in MODELS if fam == method]
fig.add_trace(
go.Scatter(
x=xs,
y=ys,
mode="markers+lines",
name=method,
marker=dict(size=14, color=COLORS[method]),
line=dict(color=COLORS[method], width=2),
text=repos,
hovertemplate="%{text}<br>Draft params %{y:.3f}B<extra></extra>",
)
)
fig.update_layout(
title="Released draft-module parameter scale",
yaxis_title="Draft module parameters, billions",
xaxis_title="Target model family",
height=380,
margin=dict(l=45, r=25, t=55, b=45),
yaxis=dict(gridcolor="rgba(148,163,184,0.18)"),
legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0),
)
return apply_dark_plot_layout(fig)
def architecture_panel():
cards = []
for method in METHODS:
info = FAMILY_COPY[method]
cards.append(
f"""
<div class="arch-card" style="--accent:{COLORS[method]}">
<div class="arch-top">
<span>{html.escape(info["tag"])}</span>
<strong>{method}</strong>
</div>
<p>{html.escape(info["summary"])}</p>
<div class="arch-detail"><b>Strength</b>{html.escape(info["strength"])}</div>
<div class="arch-detail"><b>Tradeoff</b>{html.escape(info["tradeoff"])}</div>
</div>
"""
)
return f"""
<div class="arch-grid">{''.join(cards)}</div>
<div class="pipeline">
<div><b>Target</b><span>prefill + bonus token</span></div>
<i></i>
<div><b>Draft</b><span>block proposal</span></div>
<i></i>
<div><b>Schedule</b><span>confidence prefix</span></div>
<i></i>
<div><b>Verify</b><span>lossless target check</span></div>
</div>
"""
def source_panel():
return """
<div class="source-panel">
<b>Research basis</b>
<span>The 12 checkpoint pages have no individual model cards; the DeepSpec GitHub README identifies them as the released checkpoints used for Table 1 in the DSpark paper. The app uses that table for accepted-length metrics, the public HF API for checkpoint metadata, and the DSpark/DFlash/EAGLE-3 papers for architecture notes.</span>
<a href="https://github.com/deepseek-ai/DeepSpec" target="_blank">DeepSpec repo</a>
<a href="https://github.com/deepseek-ai/DeepSpec/blob/main/DSpark_paper.pdf" target="_blank">DSpark paper</a>
<a href="https://arxiv.org/abs/2602.06036" target="_blank">DFlash paper</a>
<a href="https://arxiv.org/abs/2503.01840" target="_blank">EAGLE-3 paper</a>
<a href="https://huggingface.co/collections/deepseek-ai/deepspec-6a410e3f1831ca8ca801b88b" target="_blank">DeepSpec collection</a>
</div>
"""
def weighted_acceptance_count(rng, tau, method, scheduled_len, load):
draft_mean = max(0.0, tau - 1.0)
jitter = rng.uniform(-0.75, 0.75)
if method == "DFlash":
jitter -= max(0.0, (load - 65.0) / 140.0)
elif method == "EAGLE-3":
jitter -= max(0.0, (load - 80.0) / 220.0)
else:
jitter += max(0.0, (load - 80.0) / 260.0)
accepted = int(round(draft_mean + jitter))
return max(0, min(scheduled_len, accepted))
def scheduled_length(method, tau, load):
load_pressure = max(0.0, min(1.0, (load - 1.0) / 99.0))
if method == "DSpark":
confident = max(2, min(7, int(round(tau + 1.5))))
return max(2, int(round(confident - load_pressure * 2.0)))
if method == "DFlash":
return 7
return max(3, min(7, int(round(tau + 0.5))))
def simulate_tokens(target, task, method, output_tokens, load, seed, prompt):
rng = random.Random(f"{target}|{task}|{method}|{seed}|{prompt}")
vocab = list(LEXICON[task])
if prompt.strip():
prompt_words = [w.strip(".,:;!?()[]{}<>").lower() for w in prompt.split()]
vocab.extend([w for w in prompt_words if 2 < len(w) < 18])
tau = method_tau(target, method, task)
emitted = 0
cycle = 1
rows = []
while emitted < output_tokens and cycle <= 16:
sched = scheduled_length(method, tau, load)
accepted = weighted_acceptance_count(rng, tau, method, sched, load)
rejected = None if accepted >= sched else accepted
token_spans = []
for idx in range(7):
token = html.escape(rng.choice(vocab))
if idx < accepted:
cls = "tok accepted"
label = "accepted"
elif idx == rejected:
cls = "tok rejected"
label = "rejected"
elif idx >= sched:
cls = "tok dropped"
label = "not verified"
else:
cls = "tok tail"
label = "discarded suffix"
token_spans.append(f"<span class='{cls}' title='{label}'>{token}</span>")
bonus = html.escape(rng.choice(vocab))
token_spans.append(f"<span class='tok bonus' title='target bonus token'>{bonus}</span>")
emitted += accepted + 1
rows.append(
f"""
<div class="cycle-row">
<div class="cycle-id">round {cycle}</div>
<div class="token-strip">{''.join(token_spans)}</div>
<div class="cycle-stat">{accepted}+1 emitted</div>
</div>
"""
)
cycle += 1
return f"""
<div class="sim-head">
<div><b>{method}</b><span>{target} / {task} / load {load:.0f}%</span></div>
<div class="legend"><span class="dot accepted"></span>accepted <span class="dot rejected"></span>first reject <span class="dot dropped"></span>pruned <span class="dot bonus"></span>target bonus</div>
</div>
<div class="simulator">{''.join(rows)}</div>
"""
def production_cards():
return """
<div class="metric-grid compact">
<div class="metric-card accent-dspark"><span>V4-Flash moderate SLA</span><strong>+51%</strong><small>aggregate throughput at 80 tok/s/user</small></div>
<div class="metric-card accent-dspark"><span>V4-Flash matched capacity</span><strong>+60-85%</strong><small>faster per-user generation</small></div>
<div class="metric-card accent-orange"><span>V4-Pro moderate SLA</span><strong>+52%</strong><small>aggregate throughput at 35 tok/s/user</small></div>
<div class="metric-card accent-orange"><span>V4-Pro matched capacity</span><strong>+57-78%</strong><small>faster per-user generation</small></div>
</div>
"""
def render_all(target, task, method, output_tokens, baseline_tps, load, seed, prompt):
return (
metric_cards(target, task, method, baseline_tps, load),
simulate_tokens(target, task, method, int(output_tokens), load, int(seed), prompt or ""),
acceptance_bar(target, task),
acceptance_heatmap(target),
benchmark_rows(target),
inventory_plot(),
architecture_panel(),
production_cards(),
production_plot(),
source_panel(),
)
CSS = """
:root {
--bg: #070b13;
--panel: rgba(15, 23, 42, 0.86);
--line: rgba(148, 163, 184, 0.18);
--text: #e5efff;
--muted: #b8c4d6;
}
.gradio-container {
color-scheme: dark;
--body-background-fill: #070b13;
--body-text-color: #e5efff;
--body-text-color-subdued: #e5efff;
--block-background-fill: rgba(15, 23, 42, 0.92);
--block-border-color: rgba(148, 163, 184, 0.24);
--block-label-text-color: #f8fafc;
--border-color-primary: rgba(148, 163, 184, 0.28);
--input-background-fill: #0b1220;
--input-border-color: rgba(148, 163, 184, 0.36);
--input-text-color: #f8fafc;
--input-text-color-focus: #ffffff;
--input-placeholder-color: #dbeafe;
--button-primary-background-fill: #0f766e;
--button-primary-background-fill-hover: #0d9488;
--button-primary-text-color: #f8fafc;
--link-text-color: #67e8f9;
--neutral-50: #f8fafc;
--neutral-100: #f1f5f9;
--neutral-200: #e2e8f0;
--neutral-300: #cbd5e1;
--neutral-400: #dbeafe;
--neutral-500: #e5efff;
--table-background-fill: #0b1220;
--table-even-background-fill: #0f172a;
--table-odd-background-fill: #0b1220;
--table-border-color: rgba(148, 163, 184, 0.24);
--table-text-color: #e5efff;
--table-row-focus: #1e293b;
background:
radial-gradient(circle at 12% 0%, rgba(20, 184, 166, 0.18), transparent 28%),
linear-gradient(135deg, #070b13 0%, #0f172a 52%, #111827 100%);
color: var(--text);
}
.gradio-container label,
.gradio-container .label,
.gradio-container .prose,
.gradio-container .prose * {
color: #dbeafe !important;
opacity: 1 !important;
}
.gradio-container h1,
.gradio-container h2,
.gradio-container h3,
.gradio-container h4,
.gradio-container h5,
.gradio-container h6,
.gradio-container legend,
.gradio-container summary,
.gradio-container [data-testid="block-info"],
.gradio-container [class*="label"],
.gradio-container [class*="Label"],
.gradio-container [data-testid*="label"] {
color: #f8fafc !important;
opacity: 1 !important;
font-weight: 600 !important;
}
.gradio-container small,
.gradio-container .secondary,
.gradio-container [class*="secondary"],
.gradio-container [class*="Description"],
.gradio-container [class*="description"] {
color: #cbd5e1 !important;
opacity: 1 !important;
}
.gradio-container .min_value,
.gradio-container .max_value,
.gradio-container [class*="min_value"],
.gradio-container [class*="max_value"] {
color: #dbeafe !important;
opacity: 1 !important;
}
.gradio-container input,
.gradio-container textarea,
.gradio-container select {
background: #0b1220 !important;
color: #f8fafc !important;
border-color: rgba(148, 163, 184, 0.36) !important;
}
.gradio-container input:disabled,
.gradio-container textarea:disabled,
.gradio-container select:disabled,
.gradio-container button:disabled,
.gradio-container [disabled],
.gradio-container [aria-disabled="true"] {
opacity: 1 !important;
background: #0b1220 !important;
color: #f8fafc !important;
-webkit-text-fill-color: #f8fafc !important;
border-color: rgba(148, 163, 184, 0.34) !important;
}
.gradio-container .disabled,
.gradio-container .disabled *,
.gradio-container [class*="disabled"],
.gradio-container [class*="disabled"] * {
opacity: 1 !important;
}
.gradio-container .disabled input,
.gradio-container .disabled textarea,
.gradio-container .disabled select,
.gradio-container [class*="disabled"] input,
.gradio-container [class*="disabled"] textarea,
.gradio-container [class*="disabled"] select {
color: #f8fafc !important;
-webkit-text-fill-color: #f8fafc !important;
}
.gradio-container .form,
.gradio-container .form *,
.gradio-container fieldset,
.gradio-container fieldset *,
.gradio-container .block.padded,
.gradio-container .block.padded *,
.gradio-container [data-testid="input"],
.gradio-container [data-testid="input"] *,
.gradio-container [data-testid*="slider"],
.gradio-container [data-testid*="slider"] *,
.gradio-container [data-testid*="textbox"],
.gradio-container [data-testid*="textbox"] *,
.gradio-container [data-testid*="dropdown"],
.gradio-container [data-testid*="dropdown"] *,
.gradio-container [data-testid*="radio"],
.gradio-container [data-testid*="radio"] * {
opacity: 1 !important;
filter: none !important;
}
.gradio-container .form label,
.gradio-container .form label *,
.gradio-container .form legend,
.gradio-container .form span,
.gradio-container .form p,
.gradio-container .form input,
.gradio-container .form textarea,
.gradio-container .form select,
.gradio-container fieldset label,
.gradio-container fieldset label *,
.gradio-container fieldset legend,
.gradio-container fieldset span,
.gradio-container fieldset p,
.gradio-container [data-testid*="label"],
.gradio-container [data-testid*="label"] *,
.gradio-container [data-testid*="radio"] label,
.gradio-container [data-testid*="radio"] label *,
.gradio-container [data-testid*="radio"] span,
.gradio-container [data-testid*="slider"] label,
.gradio-container [data-testid*="slider"] label *,
.gradio-container [data-testid*="slider"] input,
.gradio-container [data-testid*="slider"] span,
.gradio-container [data-testid*="textbox"] label,
.gradio-container [data-testid*="textbox"] label *,
.gradio-container [data-testid*="textbox"] textarea,
.gradio-container [data-testid*="dropdown"] label,
.gradio-container [data-testid*="dropdown"] label *,
.gradio-container [data-testid*="dropdown"] input,
.gradio-container [data-testid*="number"] input,
.gradio-container [data-testid*="number"] span {
color: #f8fafc !important;
-webkit-text-fill-color: #f8fafc !important;
opacity: 1 !important;
filter: none !important;
text-shadow: 0 0 0 #f8fafc !important;
}
.gradio-container .form input::placeholder,
.gradio-container .form textarea::placeholder,
.gradio-container [data-testid*="textbox"] textarea::placeholder,
.gradio-container [data-testid*="dropdown"] input::placeholder {
color: #dbeafe !important;
-webkit-text-fill-color: #dbeafe !important;
opacity: 1 !important;
}
.gradio-container input::placeholder,
.gradio-container textarea::placeholder {
color: #dbeafe !important;
-webkit-text-fill-color: #dbeafe !important;
opacity: 1 !important;
}
.gradio-container input:focus,
.gradio-container textarea:focus,
.gradio-container select:focus {
border-color: rgba(20, 184, 166, 0.76) !important;
box-shadow: 0 0 0 2px rgba(20, 184, 166, 0.18) !important;
}
.gradio-container button {
border-color: rgba(148, 163, 184, 0.28) !important;
}
.gradio-container button[role="tab"],
.gradio-container .tab-nav button {
background: #0b1220 !important;
color: #cbd5e1 !important;
border-color: rgba(148, 163, 184, 0.2) !important;
}
.gradio-container button[aria-selected="true"],
.gradio-container button[role="tab"][aria-selected="true"] {
background: rgba(20, 184, 166, 0.18) !important;
color: #f8fafc !important;
border-color: rgba(20, 184, 166, 0.48) !important;
}
.gradio-container label:has(input[type="radio"]),
.gradio-container label:has(input[type="checkbox"]) {
background: #0b1220 !important;
color: #f8fafc !important;
border: 1px solid rgba(148, 163, 184, 0.38) !important;
border-radius: 8px !important;
opacity: 1 !important;
}
.gradio-container label:has(input[type="radio"]) *,
.gradio-container label:has(input[type="checkbox"]) * {
color: #f8fafc !important;
opacity: 1 !important;
}
.gradio-container label:has(input[type="radio"]:checked),
.gradio-container label:has(input[type="checkbox"]:checked) {
background: rgba(20, 184, 166, 0.22) !important;
border-color: rgba(20, 184, 166, 0.72) !important;
box-shadow: inset 0 0 0 1px rgba(45, 212, 191, 0.28) !important;
}
.gradio-container input[type="radio"],
.gradio-container input[type="checkbox"] {
accent-color: #14b8a6 !important;
}
.gradio-container input[type="radio"] + span,
.gradio-container input[type="checkbox"] + span {
color: #f8fafc !important;
opacity: 1 !important;
}
.gradio-container [role="radiogroup"] label,
.gradio-container [role="radiogroup"] [role="radio"] {
background: #0b1220 !important;
color: #f8fafc !important;
border: 1px solid rgba(148, 163, 184, 0.38) !important;
border-radius: 8px !important;
opacity: 1 !important;
}
.gradio-container [role="radiogroup"] label *,
.gradio-container [role="radiogroup"] [role="radio"] *,
.gradio-container [role="radiogroup"] span {
color: #f8fafc !important;
opacity: 1 !important;
}
.gradio-container [role="radiogroup"] label:has(input:checked),
.gradio-container [role="radiogroup"] [role="radio"][aria-checked="true"] {
background: rgba(20, 184, 166, 0.22) !important;
border-color: rgba(20, 184, 166, 0.72) !important;
}
.gradio-container .wrap,
.gradio-container .block,
.gradio-container .panel,
.gradio-container .form {
background: rgba(15, 23, 42, 0.72) !important;
border-color: rgba(148, 163, 184, 0.22) !important;
color: #e5efff !important;
}
.gradio-container .dataframe,
.gradio-container .table-wrap,
.gradio-container table {
background: #0b1220 !important;
color: #e5efff !important;
border-color: rgba(148, 163, 184, 0.24) !important;
}
.gradio-container .table-container,
.gradio-container svelte-virtual-table-viewport,
.gradio-container button.disable_click,
.gradio-container button[aria-label*="Drop CSV"],
.gradio-container button[aria-label*="TSV"] {
background: #0b1220 !important;
color: #e5efff !important;
border-color: rgba(148, 163, 184, 0.24) !important;
}
.gradio-container table.table,
.gradio-container table.table thead,
.gradio-container table.table tbody,
.gradio-container table.table tfoot {
background: #0b1220 !important;
color: #e5efff !important;
}
.gradio-container th {
background: #111827 !important;
color: #f8fafc !important;
border-color: rgba(148, 163, 184, 0.24) !important;
}
.gradio-container table.table thead,
.gradio-container table.table thead tr,
.gradio-container table.table th,
.gradio-container table.table th *,
.gradio-container table.table .header-content,
.gradio-container table.table .header-button,
.gradio-container table.table .cell-wrap {
background: #111827 !important;
color: #f8fafc !important;
border-color: rgba(148, 163, 184, 0.24) !important;
opacity: 1 !important;
}
.gradio-container td {
background: #0b1220 !important;
color: #e2e8f0 !important;
border-color: rgba(148, 163, 184, 0.18) !important;
}
.gradio-container tr:nth-child(even) td {
background: #0f172a !important;
}
.gradio-container table.table tbody tr,
.gradio-container table.table tbody td,
.gradio-container table.table tbody .cell-wrap {
background: #0b1220 !important;
color: #e2e8f0 !important;
}
.gradio-container table.table tbody tr.row-odd,
.gradio-container table.table tbody tr.row-odd td,
.gradio-container table.table tbody tr.row-odd .cell-wrap {
background: #0f172a !important;
}
.gradio-container table.table span.text,
.gradio-container table.table button {
color: #e5efff !important;
-webkit-text-fill-color: #e5efff !important;
opacity: 1 !important;
}
.gradio-container .plot-container,
.gradio-container .js-plotly-plot {
background: #0b1220 !important;
border-radius: 8px;
}
.gradio-container [aria-label="Empty value"],
.gradio-container .empty,
.gradio-container .unpadded_box,
.gradio-container .large.unpadded_box {
background: #0b1220 !important;
color: #93c5fd !important;
border-color: rgba(148, 163, 184, 0.24) !important;
}
.gradio-container [aria-label="Empty value"] *,
.gradio-container .empty *,
.gradio-container .unpadded_box * {
color: #93c5fd !important;
opacity: 1 !important;
}
.gradio-container [data-testid="status-tracker"],
.gradio-container [data-testid="status-tracker"].wrap,
.gradio-container .wrap.default.full,
.gradio-container .progress-text,
.gradio-container .meta-text {
background: rgba(11, 18, 32, 0.92) !important;
color: #dbeafe !important;
opacity: 1 !important;
}
.gradio-container [data-testid="status-tracker"].hide,
.gradio-container .wrap.default.full.hide {
background: transparent !important;
border: 0 !important;
color: transparent !important;
opacity: 0 !important;
visibility: hidden !important;
pointer-events: none !important;
}
.gradio-container [data-testid="status-tracker"].hide *,
.gradio-container .wrap.default.full.hide * {
background: transparent !important;
color: transparent !important;
opacity: 0 !important;
visibility: hidden !important;
}
.gradio-container .eta-bar {
background: rgba(20, 184, 166, 0.18) !important;
opacity: 1 !important;
}
.gradio-container [data-testid="block-label"],
.gradio-container label[data-testid="block-label"] {
background: #0b1220 !important;
color: #f8fafc !important;
border-color: rgba(148, 163, 184, 0.28) !important;
opacity: 1 !important;
}
.gradio-container [data-testid="block-label"] *,
.gradio-container label[data-testid="block-label"] * {
color: #f8fafc !important;
opacity: 1 !important;
}
.gradio-container footer,
.gradio-container footer *,
.gradio-container footer button,
.gradio-container footer a {
background: #0b1220 !important;
color: #e5efff !important;
-webkit-text-fill-color: #e5efff !important;
opacity: 1 !important;
}
.gradio-container footer img {
background: transparent !important;
}
.main-shell {
border: 1px solid var(--line);
border-radius: 8px;
padding: 24px;
background: linear-gradient(145deg, rgba(15, 23, 42, 0.94), rgba(17, 24, 39, 0.78));
box-shadow: 0 24px 80px rgba(0, 0, 0, 0.28);
}
.hero-title {
display: grid;
grid-template-columns: 1.25fr 0.75fr;
gap: 18px;
align-items: stretch;
}
.hero-title h1 {
margin: 0;
font-size: clamp(2.1rem, 4vw, 4.5rem);
line-height: 0.92;
letter-spacing: 0;
}
.hero-title p {
color: var(--muted);
max-width: 760px;
font-size: 1rem;
}
.hero-stats {
display: grid;
grid-template-columns: repeat(2, minmax(0, 1fr));
gap: 10px;
}
.hero-stat {
border: 1px solid var(--line);
border-radius: 8px;
padding: 14px;
background: rgba(2, 6, 23, 0.36);
}
.hero-stat b {
display: block;
font-size: 1.55rem;
color: #ffffff;
}
.hero-stat span {
color: var(--muted);
font-size: 0.82rem;
}
.metric-grid {
display: grid;
grid-template-columns: repeat(4, minmax(0, 1fr));
gap: 12px;
}
.metric-grid.compact {
margin-bottom: 14px;
}
.metric-card {
border: 1px solid var(--line);
border-radius: 8px;
padding: 14px;
min-height: 118px;
background: rgba(2, 6, 23, 0.66);
position: relative;
overflow: hidden;
}
.metric-card:before {
content: "";
position: absolute;
inset: 0 auto 0 0;
width: 4px;
background: var(--accent, #38bdf8);
}
.metric-card span, .metric-card small {
display: block;
color: var(--muted);
}
.metric-card strong {
display: block;
margin: 6px 0;
font-size: 2rem;
color: #ffffff;
}
.accent-dspark { --accent: #14b8a6; }
.accent-orange { --accent: #f97316; }
.accent-violet { --accent: #8b5cf6; }
.accent-blue { --accent: #38bdf8; }
.sim-head {
display: flex;
align-items: center;
justify-content: space-between;
gap: 12px;
margin: 8px 0 12px;
}
.sim-head span {
display: block;
color: var(--muted);
}
.legend {
color: var(--muted);
font-size: 0.86rem;
}
.dot {
width: 10px;
height: 10px;
border-radius: 50%;
display: inline-block;
margin: 0 5px 0 12px;
}
.simulator {
display: grid;
gap: 8px;
}
.cycle-row {
display: grid;
grid-template-columns: 76px 1fr 96px;
gap: 10px;
align-items: center;
border: 1px solid var(--line);
border-radius: 8px;
padding: 10px;
background: rgba(15, 23, 42, 0.58);
}
.cycle-id, .cycle-stat {
color: var(--muted);
font-size: 0.82rem;
}
.token-strip {
display: flex;
flex-wrap: wrap;
gap: 6px;
}
.tok {
border: 1px solid transparent;
border-radius: 6px;
padding: 5px 8px;
font-size: 0.85rem;
line-height: 1.1;
}
.accepted, .tok.accepted { background: rgba(20, 184, 166, 0.18); color: #99f6e4; border-color: rgba(20, 184, 166, 0.36); }
.rejected, .tok.rejected { background: rgba(244, 63, 94, 0.18); color: #fecdd3; border-color: rgba(244, 63, 94, 0.38); }
.dropped, .tok.dropped { background: rgba(100, 116, 139, 0.18); color: #cbd5e1; border-color: rgba(148, 163, 184, 0.22); text-decoration: line-through; }
.tok.tail { background: rgba(249, 115, 22, 0.14); color: #fed7aa; border-color: rgba(249, 115, 22, 0.26); }
.bonus, .tok.bonus { background: rgba(56, 189, 248, 0.16); color: #bae6fd; border-color: rgba(56, 189, 248, 0.34); }
.arch-grid {
display: grid;
grid-template-columns: repeat(3, minmax(0, 1fr));
gap: 12px;
margin-bottom: 14px;
}
.arch-card {
border: 1px solid var(--line);
border-radius: 8px;
padding: 16px;
background: rgba(2, 6, 23, 0.42);
box-shadow: inset 0 3px 0 var(--accent);
}
.arch-top span {
color: var(--accent);
text-transform: uppercase;
font-size: 0.76rem;
}
.arch-top strong {
display: block;
color: #fff;
font-size: 1.3rem;
}
.arch-card p, .arch-detail {
color: var(--muted);
}
.arch-detail {
margin-top: 10px;
}
.arch-detail b {
display: block;
color: #e5efff;
}
.pipeline {
display: grid;
grid-template-columns: 1fr 24px 1fr 24px 1fr 24px 1fr;
gap: 8px;
align-items: center;
border: 1px solid var(--line);
border-radius: 8px;
padding: 14px;
background: rgba(15, 23, 42, 0.54);
}
.pipeline div {
min-height: 72px;
border-radius: 8px;
border: 1px solid rgba(148, 163, 184, 0.18);
padding: 12px;
background: rgba(2, 6, 23, 0.42);
}
.pipeline b, .pipeline span {
display: block;
}
.pipeline span {
color: var(--muted);
}
.pipeline i {
height: 2px;
background: linear-gradient(90deg, #14b8a6, #f97316);
}
.source-panel {
display: flex;
gap: 10px;
flex-wrap: wrap;
align-items: center;
border: 1px solid var(--line);
border-radius: 8px;
padding: 12px;
background: rgba(2, 6, 23, 0.34);
color: var(--muted);
}
.source-panel b {
color: #fff;
}
.source-panel span {
flex: 1 1 520px;
}
.source-panel a {
color: #67e8f9;
text-decoration: none;
border: 1px solid rgba(103, 232, 249, 0.22);
border-radius: 6px;
padding: 4px 8px;
}
@media (max-width: 900px) {
.hero-title, .metric-grid, .arch-grid, .pipeline {
grid-template-columns: 1fr;
}
.pipeline i {
height: 18px;
width: 2px;
margin-left: 12px;
}
.cycle-row {
grid-template-columns: 1fr;
}
}
"""
with gr.Blocks() as demo:
gr.HTML(
"""
<div class="main-shell">
<div class="hero-title">
<div>
<h1>DeepSpec Decoding Lab</h1>
<p>Explore DeepSeek's 12 released draft modules across DSpark, DFlash, and EAGLE-3 with paper-backed accepted-length metrics, architecture comparisons, and a deterministic speculative-decoding simulator.</p>
</div>
<div class="hero-stats">
<div class="hero-stat"><b>12</b><span>released draft checkpoints</span></div>
<div class="hero-stat"><b>3</b><span>speculative-decoding families</span></div>
<div class="hero-stat"><b>9</b><span>benchmark tasks from Table 1</span></div>
<div class="hero-stat"><b>60-85%</b><span>reported V4-Flash per-user speed lift</span></div>
</div>
</div>
</div>
"""
)
with gr.Row():
with gr.Column(scale=1, min_width=280):
target = gr.Dropdown(TARGETS, value="Qwen3-4B", label="Target family")
task = gr.Dropdown(TASKS, value="HumanEval", label="Benchmark profile")
method = gr.Radio(METHODS, value="DSpark", label="Primary draft family")
output_tokens = gr.Slider(24, 128, value=64, step=8, label="Simulation output budget")
baseline_tps = gr.Slider(5, 160, value=40, step=5, label="Autoregressive baseline tok/s")
load = gr.Slider(1, 100, value=70, step=1, label="Serving load pressure")
seed = gr.Number(value=7, label="Deterministic seed", precision=0)
prompt = gr.Textbox(
value="Write a compact function, then explain why it is correct.",
label="Prompt flavor",
lines=3,
)
run = gr.Button("Run Speculation", variant="primary")
with gr.Column(scale=3):
cards = gr.HTML()
sim = gr.HTML()
with gr.Tabs():
with gr.Tab("Benchmark Matrix"):
bar = gr.Plot()
heatmap = gr.Plot()
table = gr.Dataframe(
headers=["Task", "Domain", "DSpark", "DFlash", "EAGLE-3"],
datatype=["str", "str", "str", "str", "str"],
interactive=False,
wrap=True,
)
with gr.Tab("Checkpoint Inventory"):
inv_plot = gr.Plot()
inv_table = gr.Dataframe(
value=model_rows(),
headers=["Family", "Target", "Repo", "Params", "Architecture", "Horizon", "Layers", "Confidence", "Sequential signal"],
datatype=["str"] * 9,
interactive=False,
wrap=True,
)
with gr.Tab("Architectures"):
arch = gr.HTML()
with gr.Tab("Production Frontier"):
prod_cards = gr.HTML()
prod_plot = gr.Plot()
with gr.Tab("Sources"):
sources = gr.HTML()
outputs = [cards, sim, bar, heatmap, table, inv_plot, arch, prod_cards, prod_plot, sources]
inputs = [target, task, method, output_tokens, baseline_tps, load, seed, prompt]
demo.load(render_all, inputs=inputs, outputs=outputs, api_name=None, api_visibility="private")
for control in [target, task, method, output_tokens, baseline_tps, load, seed]:
control.change(render_all, inputs=inputs, outputs=outputs, api_name=None, api_visibility="private")
prompt.submit(render_all, inputs=inputs, outputs=outputs, api_name=None, api_visibility="private")
run.click(render_all, inputs=inputs, outputs=outputs, api_name="simulate")
demo.queue(default_concurrency_limit=8)
if __name__ == "__main__":
demo.launch(css=CSS, theme=gr.themes.Base())