import os os.environ.setdefault("HF_HOME", "/tmp/huggingface") os.environ.setdefault("HF_MODULES_CACHE", "/tmp/hf_modules") os.environ.setdefault("MPLCONFIGDIR", "/tmp/matplotlib") os.environ.setdefault("GRADIO_SSR_MODE", "false") import html import random from statistics import mean import gradio as gr import plotly.graph_objects as go TASKS = [ "GSM8K", "MATH-500", "AIME25", "MBPP", "HumanEval", "LiveCodeBench", "MT-Bench", "Alpaca", "Arena-Hard v2", ] DOMAINS = { "Math": ["GSM8K", "MATH-500", "AIME25"], "Code": ["MBPP", "HumanEval", "LiveCodeBench"], "Chat": ["MT-Bench", "Alpaca", "Arena-Hard v2"], } TARGETS = ["Qwen3-4B", "Qwen3-8B", "Qwen3-14B", "Gemma4-12B"] METHODS = ["DSpark", "DFlash", "EAGLE-3"] COLORS = { "DSpark": "#14b8a6", "DFlash": "#f97316", "EAGLE-3": "#8b5cf6", "Baseline": "#94a3b8", } ACCEPTANCE = { "Qwen3-4B": { "EAGLE-3": { "GSM8K": 5.14, "MATH-500": 4.62, "AIME25": 3.92, "MBPP": 3.69, "HumanEval": 4.16, "LiveCodeBench": 3.77, "MT-Bench": 2.39, "Alpaca": 2.26, "Arena-Hard v2": 2.55, }, "DFlash": { "GSM8K": 5.40, "MATH-500": 4.85, "AIME25": 4.15, "MBPP": 4.40, "HumanEval": 4.74, "LiveCodeBench": 4.18, "MT-Bench": 3.07, "Alpaca": 2.96, "Arena-Hard v2": 2.83, }, "DSpark": { "GSM8K": 6.11, "MATH-500": 5.70, "AIME25": 4.89, "MBPP": 5.13, "HumanEval": 5.38, "LiveCodeBench": 4.86, "MT-Bench": 3.64, "Alpaca": 3.54, "Arena-Hard v2": 3.29, }, }, "Qwen3-8B": { "EAGLE-3": { "GSM8K": 5.30, "MATH-500": 4.77, "AIME25": 3.91, "MBPP": 3.96, "HumanEval": 4.33, "LiveCodeBench": 4.17, "MT-Bench": 2.66, "Alpaca": 2.54, "Arena-Hard v2": 2.54, }, "DFlash": { "GSM8K": 5.33, "MATH-500": 4.91, "AIME25": 4.07, "MBPP": 4.36, "HumanEval": 4.64, "LiveCodeBench": 4.39, "MT-Bench": 3.11, "Alpaca": 2.98, "Arena-Hard v2": 2.81, }, "DSpark": { "GSM8K": 6.17, "MATH-500": 5.78, "AIME25": 5.01, "MBPP": 5.16, "HumanEval": 5.52, "LiveCodeBench": 5.17, "MT-Bench": 3.72, "Alpaca": 3.58, "Arena-Hard v2": 3.21, }, }, "Qwen3-14B": { "EAGLE-3": { "GSM8K": 5.24, "MATH-500": 4.60, "AIME25": 3.71, "MBPP": 3.81, "HumanEval": 4.14, "LiveCodeBench": 4.01, "MT-Bench": 2.62, "Alpaca": 2.47, "Arena-Hard v2": 2.48, }, "DFlash": { "GSM8K": 5.41, "MATH-500": 4.84, "AIME25": 3.98, "MBPP": 4.44, "HumanEval": 4.59, "LiveCodeBench": 4.33, "MT-Bench": 3.10, "Alpaca": 2.94, "Arena-Hard v2": 2.72, }, "DSpark": { "GSM8K": 6.21, "MATH-500": 5.74, "AIME25": 4.94, "MBPP": 5.26, "HumanEval": 5.43, "LiveCodeBench": 5.02, "MT-Bench": 3.70, "Alpaca": 3.58, "Arena-Hard v2": 3.13, }, }, "Gemma4-12B": { "EAGLE-3": { "GSM8K": 5.87, "MATH-500": 5.46, "AIME25": 4.83, "MBPP": 4.72, "HumanEval": 5.37, "LiveCodeBench": 4.16, "MT-Bench": 3.19, "Alpaca": 3.06, "Arena-Hard v2": 2.72, }, "DFlash": { "GSM8K": 5.45, "MATH-500": 5.04, "AIME25": 4.22, "MBPP": 4.39, "HumanEval": 4.95, "LiveCodeBench": 3.70, "MT-Bench": 2.98, "Alpaca": 2.84, "Arena-Hard v2": 2.59, }, "DSpark": { "GSM8K": 6.05, "MATH-500": 5.78, "AIME25": 5.12, "MBPP": 5.11, "HumanEval": 5.64, "LiveCodeBench": 4.51, "MT-Bench": 3.49, "Alpaca": 3.35, "Arena-Hard v2": 2.92, }, }, } MODELS = [ ("DSpark", "Qwen3-4B", "deepseek-ai/dspark_qwen3_4b_block7", 1.393, "Qwen3DSparkModel", "block7", "5", "yes", "Markov rank 256"), ("DSpark", "Qwen3-8B", "deepseek-ai/dspark_qwen3_8b_block7", 2.371, "Qwen3DSparkModel", "block7", "5", "yes", "Markov rank 256"), ("DSpark", "Qwen3-14B", "deepseek-ai/dspark_qwen3_14b_block7", 3.416, "Qwen3DSparkModel", "block7", "5", "yes", "Markov rank 256"), ("DSpark", "Gemma4-12B", "deepseek-ai/dspark_gemma4_12b_block7", 3.430, "Gemma4DSparkModel", "block7", "5", "yes", "Markov rank 256"), ("DFlash", "Qwen3-4B", "deepseek-ai/dflash_qwen3_4b_block7", 1.315, "Qwen3DSparkModel", "block7", "5", "no", "parallel block"), ("DFlash", "Qwen3-8B", "deepseek-ai/dflash_qwen3_8b_block7", 2.293, "Qwen3DSparkModel", "block7", "5", "no", "parallel block"), ("DFlash", "Qwen3-14B", "deepseek-ai/dflash_qwen3_14b_block7", 3.338, "Qwen3DSparkModel", "block7", "5", "no", "parallel block"), ("DFlash", "Gemma4-12B", "deepseek-ai/dflash_gemma4_12b_block7", 3.296, "Gemma4DSparkModel", "block7", "5", "no", "parallel block"), ("EAGLE-3", "Qwen3-4B", "deepseek-ai/eagle3_qwen3_4b_ttt7", 0.927, "Qwen3Eagle3Model", "ttt7", "1", "no", "training-time test"), ("EAGLE-3", "Qwen3-8B", "deepseek-ai/eagle3_qwen3_8b_ttt7", 1.547, "Qwen3Eagle3Model", "ttt7", "1", "no", "training-time test"), ("EAGLE-3", "Qwen3-14B", "deepseek-ai/eagle3_qwen3_14b_ttt7", 2.054, "Qwen3Eagle3Model", "ttt7", "1", "no", "training-time test"), ("EAGLE-3", "Gemma4-12B", "deepseek-ai/eagle3_gemma4_12b_ttt7", 2.362, "Gemma4Eagle3Model", "ttt7", "1", "no", "training-time test"), ] FAMILY_COPY = { "DSpark": { "tag": "semi-autoregressive", "summary": "Parallel DFlash-style backbone plus a lightweight Markov head and confidence scheduler.", "strength": "Best accepted length in the released table and designed for load-aware serving.", "tradeoff": "More machinery than a pure block drafter.", }, "DFlash": { "tag": "parallel block diffusion", "summary": "Predicts a full block in one pass with target-feature conditioning and KV injection.", "strength": "Very low drafting latency and strong first-token accuracy.", "tradeoff": "Suffix tokens decay because positions are predicted independently.", }, "EAGLE-3": { "tag": "autoregressive feature drafter", "summary": "Uses training-time test and fused target features to improve classic EAGLE drafting.", "strength": "Strong lossless speculative baseline with stable sequential dependency modeling.", "tradeoff": "Drafting cost scales with lookahead length.", }, } LEXICON = { "GSM8K": "therefore the total is because each group contributes remaining answer equals final".split(), "MATH-500": "let x satisfy equation substitute simplify bound hence root value proof".split(), "AIME25": "triangle integer modulo sequence polynomial area count radius answer".split(), "MBPP": "def return list index loop condition append result function test".split(), "HumanEval": "class function assert edge case input output sorted recursive".split(), "LiveCodeBench": "stdin parse graph dp binary search modulo constraints optimize".split(), "MT-Bench": "I would compare the tradeoff and explain the practical implication".split(), "Alpaca": "Here is a concise response with steps context and caveats".split(), "Arena-Hard v2": "The best answer balances reasoning specificity and directness".split(), } def pct_gain(new, old): if not old: return 0.0 return (new / old - 1.0) * 100.0 def domain_for_task(task): for domain, tasks in DOMAINS.items(): if task in tasks: return domain return "Mixed" def model_rows(): rows = [] for family, target, repo, params, arch, horizon, layers, confidence, seq in MODELS: rows.append([family, target, repo, f"{params:.3f}B", arch, horizon, layers, confidence, seq]) return rows def benchmark_rows(target): rows = [] for task in TASKS: row = [task, domain_for_task(task)] for method in METHODS: row.append(f"{ACCEPTANCE[target][method][task]:.2f}") rows.append(row) return rows def method_tau(target, method, task): return ACCEPTANCE[target][method][task] def simulated_tps(tau, method, baseline_tps, load): load_pressure = max(0.0, min(1.0, (load - 1.0) / 99.0)) overhead = {"DSpark": 0.11, "DFlash": 0.10, "EAGLE-3": 0.19}[method] waste = {"DSpark": 0.06, "DFlash": 0.24, "EAGLE-3": 0.16}[method] return baseline_tps * tau / (1.0 + overhead) * (1.0 - load_pressure * waste) def apply_dark_plot_layout(fig): fig.update_layout( template="plotly_dark", paper_bgcolor="#0b1220", plot_bgcolor="#0f172a", font=dict(color="#e5efff", family="Inter, ui-sans-serif, system-ui, sans-serif"), title_font=dict(color="#f8fafc", size=18), legend=dict(font=dict(color="#dbeafe")), hoverlabel=dict( bgcolor="#111827", bordercolor="#475569", font=dict(color="#f8fafc"), ), ) fig.update_xaxes( gridcolor="rgba(148, 163, 184, 0.18)", zerolinecolor="rgba(148, 163, 184, 0.22)", linecolor="rgba(148, 163, 184, 0.34)", tickcolor="rgba(148, 163, 184, 0.34)", title_font=dict(color="#dbeafe"), tickfont=dict(color="#cbd5e1"), ) fig.update_yaxes( gridcolor="rgba(148, 163, 184, 0.18)", zerolinecolor="rgba(148, 163, 184, 0.22)", linecolor="rgba(148, 163, 184, 0.34)", tickcolor="rgba(148, 163, 184, 0.34)", title_font=dict(color="#dbeafe"), tickfont=dict(color="#cbd5e1"), ) fig.update_traces( colorbar=dict( tickfont=dict(color="#dbeafe"), title_font=dict(color="#f8fafc"), ), selector=dict(type="heatmap"), ) return fig def metric_cards(target, task, method, baseline_tps, load): dspark = method_tau(target, "DSpark", task) dflash = method_tau(target, "DFlash", task) eagle = method_tau(target, "EAGLE-3", task) best_base = max(dflash, eagle) selected = method_tau(target, method, task) calls_saved = (1.0 - 1.0 / selected) * 100.0 selected_tps = simulated_tps(selected, method, baseline_tps, load) domain = domain_for_task(task) return f"""
DSpark accepted length {dspark:.2f} {pct_gain(dspark, best_base):+.1f}% vs strongest baseline on {task}
{method} simulated rate {selected_tps:.1f} tokens/sec from a {baseline_tps:.1f} baseline input
Target calls avoided {calls_saved:.1f}% estimated from accepted length tau={selected:.2f}
Benchmark profile {domain} EAGLE-3 {eagle:.2f} / DFlash {dflash:.2f} / DSpark {dspark:.2f}
""" def acceptance_bar(target, task): values = [method_tau(target, method, task) for method in METHODS] fig = go.Figure() fig.add_bar( x=METHODS, y=values, marker_color=[COLORS[method] for method in METHODS], text=[f"{v:.2f}" for v in values], textfont=dict(color="#f8fafc", size=13), textposition="outside", hovertemplate="%{x}
Accepted length: %{y:.2f}", ) fig.update_layout( title=f"Accepted length per verification round on {target} / {task}", yaxis_title="Accepted length, including target bonus token", xaxis_title="Draft family", height=360, margin=dict(l=35, r=20, t=55, b=35), yaxis=dict(gridcolor="rgba(148,163,184,0.18)", range=[0, max(values) + 1.0]), ) return apply_dark_plot_layout(fig) def acceptance_heatmap(target): z = [[method_tau(target, method, task) for task in TASKS] for method in METHODS] fig = go.Figure( data=go.Heatmap( z=z, x=TASKS, y=METHODS, colorscale=[ [0.0, "#0b1220"], [0.35, "#1e3a8a"], [0.68, "#0f766e"], [1.0, "#9a3412"], ], text=[[f"{v:.2f}" for v in row] for row in z], texttemplate="%{text}", hovertemplate="%{y}
%{x}: %{z:.2f}", colorbar=dict(title="tau"), ) ) fig.update_layout( title=f"DeepSpec Table 1 matrix for {target}", height=405, margin=dict(l=75, r=25, t=55, b=70), xaxis=dict(tickangle=-30), ) return apply_dark_plot_layout(fig) def production_plot(): fig = go.Figure() fig.add_trace( go.Scatter( x=[80, 120], y=[51, 661], mode="lines+markers+text", name="V4-Flash", text=["+51%", "+661%"], textposition="top center", line=dict(color="#14b8a6", width=3), marker=dict(size=12), textfont=dict(color="#f8fafc"), hovertemplate="V4-Flash SLA %{x} tok/s/user
Throughput uplift %{y}%", ) ) fig.add_trace( go.Scatter( x=[35, 50], y=[52, 406], mode="lines+markers+text", name="V4-Pro", text=["+52%", "+406%"], textposition="top center", line=dict(color="#f97316", width=3), marker=dict(size=12), textfont=dict(color="#f8fafc"), hovertemplate="V4-Pro SLA %{x} tok/s/user
Throughput uplift %{y}%", ) ) fig.update_layout( title="Production DSpark frontier reported for DeepSeek-V4", xaxis_title="Interactivity SLA anchor, tok/s/user", yaxis_title="Aggregate throughput uplift vs MTP-1", height=380, margin=dict(l=45, r=25, t=55, b=45), yaxis=dict(gridcolor="rgba(148,163,184,0.18)"), legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0), ) return apply_dark_plot_layout(fig) def inventory_plot(): fig = go.Figure() for method in METHODS: xs = [target for fam, target, *_ in MODELS if fam == method] ys = [params for fam, _target, _repo, params, *_rest in MODELS if fam == method] repos = [repo for fam, _target, repo, *_ in MODELS if fam == method] fig.add_trace( go.Scatter( x=xs, y=ys, mode="markers+lines", name=method, marker=dict(size=14, color=COLORS[method]), line=dict(color=COLORS[method], width=2), text=repos, hovertemplate="%{text}
Draft params %{y:.3f}B", ) ) fig.update_layout( title="Released draft-module parameter scale", yaxis_title="Draft module parameters, billions", xaxis_title="Target model family", height=380, margin=dict(l=45, r=25, t=55, b=45), yaxis=dict(gridcolor="rgba(148,163,184,0.18)"), legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0), ) return apply_dark_plot_layout(fig) def architecture_panel(): cards = [] for method in METHODS: info = FAMILY_COPY[method] cards.append( f"""
{html.escape(info["tag"])} {method}

{html.escape(info["summary"])}

Strength{html.escape(info["strength"])}
Tradeoff{html.escape(info["tradeoff"])}
""" ) return f"""
{''.join(cards)}
Targetprefill + bonus token
Draftblock proposal
Scheduleconfidence prefix
Verifylossless target check
""" def source_panel(): return """
Research basis The 12 checkpoint pages have no individual model cards; the DeepSpec GitHub README identifies them as the released checkpoints used for Table 1 in the DSpark paper. The app uses that table for accepted-length metrics, the public HF API for checkpoint metadata, and the DSpark/DFlash/EAGLE-3 papers for architecture notes. DeepSpec repo DSpark paper DFlash paper EAGLE-3 paper DeepSpec collection
""" def weighted_acceptance_count(rng, tau, method, scheduled_len, load): draft_mean = max(0.0, tau - 1.0) jitter = rng.uniform(-0.75, 0.75) if method == "DFlash": jitter -= max(0.0, (load - 65.0) / 140.0) elif method == "EAGLE-3": jitter -= max(0.0, (load - 80.0) / 220.0) else: jitter += max(0.0, (load - 80.0) / 260.0) accepted = int(round(draft_mean + jitter)) return max(0, min(scheduled_len, accepted)) def scheduled_length(method, tau, load): load_pressure = max(0.0, min(1.0, (load - 1.0) / 99.0)) if method == "DSpark": confident = max(2, min(7, int(round(tau + 1.5)))) return max(2, int(round(confident - load_pressure * 2.0))) if method == "DFlash": return 7 return max(3, min(7, int(round(tau + 0.5)))) def simulate_tokens(target, task, method, output_tokens, load, seed, prompt): rng = random.Random(f"{target}|{task}|{method}|{seed}|{prompt}") vocab = list(LEXICON[task]) if prompt.strip(): prompt_words = [w.strip(".,:;!?()[]{}<>").lower() for w in prompt.split()] vocab.extend([w for w in prompt_words if 2 < len(w) < 18]) tau = method_tau(target, method, task) emitted = 0 cycle = 1 rows = [] while emitted < output_tokens and cycle <= 16: sched = scheduled_length(method, tau, load) accepted = weighted_acceptance_count(rng, tau, method, sched, load) rejected = None if accepted >= sched else accepted token_spans = [] for idx in range(7): token = html.escape(rng.choice(vocab)) if idx < accepted: cls = "tok accepted" label = "accepted" elif idx == rejected: cls = "tok rejected" label = "rejected" elif idx >= sched: cls = "tok dropped" label = "not verified" else: cls = "tok tail" label = "discarded suffix" token_spans.append(f"{token}") bonus = html.escape(rng.choice(vocab)) token_spans.append(f"{bonus}") emitted += accepted + 1 rows.append( f"""
round {cycle}
{''.join(token_spans)}
{accepted}+1 emitted
""" ) cycle += 1 return f"""
{method}{target} / {task} / load {load:.0f}%
accepted first reject pruned target bonus
{''.join(rows)}
""" def production_cards(): return """
V4-Flash moderate SLA+51%aggregate throughput at 80 tok/s/user
V4-Flash matched capacity+60-85%faster per-user generation
V4-Pro moderate SLA+52%aggregate throughput at 35 tok/s/user
V4-Pro matched capacity+57-78%faster per-user generation
""" def render_all(target, task, method, output_tokens, baseline_tps, load, seed, prompt): return ( metric_cards(target, task, method, baseline_tps, load), simulate_tokens(target, task, method, int(output_tokens), load, int(seed), prompt or ""), acceptance_bar(target, task), acceptance_heatmap(target), benchmark_rows(target), inventory_plot(), architecture_panel(), production_cards(), production_plot(), source_panel(), ) CSS = """ :root { --bg: #070b13; --panel: rgba(15, 23, 42, 0.86); --line: rgba(148, 163, 184, 0.18); --text: #e5efff; --muted: #b8c4d6; } .gradio-container { color-scheme: dark; --body-background-fill: #070b13; --body-text-color: #e5efff; --body-text-color-subdued: #e5efff; --block-background-fill: rgba(15, 23, 42, 0.92); --block-border-color: rgba(148, 163, 184, 0.24); --block-label-text-color: #f8fafc; --border-color-primary: rgba(148, 163, 184, 0.28); --input-background-fill: #0b1220; --input-border-color: rgba(148, 163, 184, 0.36); --input-text-color: #f8fafc; --input-text-color-focus: #ffffff; --input-placeholder-color: #dbeafe; --button-primary-background-fill: #0f766e; --button-primary-background-fill-hover: #0d9488; --button-primary-text-color: #f8fafc; --link-text-color: #67e8f9; --neutral-50: #f8fafc; --neutral-100: #f1f5f9; --neutral-200: #e2e8f0; --neutral-300: #cbd5e1; --neutral-400: #dbeafe; --neutral-500: #e5efff; --table-background-fill: #0b1220; --table-even-background-fill: #0f172a; --table-odd-background-fill: #0b1220; --table-border-color: rgba(148, 163, 184, 0.24); --table-text-color: #e5efff; --table-row-focus: #1e293b; background: radial-gradient(circle at 12% 0%, rgba(20, 184, 166, 0.18), transparent 28%), linear-gradient(135deg, #070b13 0%, #0f172a 52%, #111827 100%); color: var(--text); } .gradio-container label, .gradio-container .label, .gradio-container .prose, .gradio-container .prose * { color: #dbeafe !important; opacity: 1 !important; } .gradio-container h1, .gradio-container h2, .gradio-container h3, .gradio-container h4, .gradio-container h5, .gradio-container h6, .gradio-container legend, .gradio-container summary, .gradio-container [data-testid="block-info"], .gradio-container [class*="label"], .gradio-container [class*="Label"], .gradio-container [data-testid*="label"] { color: #f8fafc !important; opacity: 1 !important; font-weight: 600 !important; } .gradio-container small, .gradio-container .secondary, .gradio-container [class*="secondary"], .gradio-container [class*="Description"], .gradio-container [class*="description"] { color: #cbd5e1 !important; opacity: 1 !important; } .gradio-container .min_value, .gradio-container .max_value, .gradio-container [class*="min_value"], .gradio-container [class*="max_value"] { color: #dbeafe !important; opacity: 1 !important; } .gradio-container input, .gradio-container textarea, .gradio-container select { background: #0b1220 !important; color: #f8fafc !important; border-color: rgba(148, 163, 184, 0.36) !important; } .gradio-container input:disabled, .gradio-container textarea:disabled, .gradio-container select:disabled, .gradio-container button:disabled, .gradio-container [disabled], .gradio-container [aria-disabled="true"] { opacity: 1 !important; background: #0b1220 !important; color: #f8fafc !important; -webkit-text-fill-color: #f8fafc !important; border-color: rgba(148, 163, 184, 0.34) !important; } .gradio-container .disabled, .gradio-container .disabled *, .gradio-container [class*="disabled"], .gradio-container [class*="disabled"] * { opacity: 1 !important; } .gradio-container .disabled input, .gradio-container .disabled textarea, .gradio-container .disabled select, .gradio-container [class*="disabled"] input, .gradio-container [class*="disabled"] textarea, .gradio-container [class*="disabled"] select { color: #f8fafc !important; -webkit-text-fill-color: #f8fafc !important; } .gradio-container .form, .gradio-container .form *, .gradio-container fieldset, .gradio-container fieldset *, .gradio-container .block.padded, .gradio-container .block.padded *, .gradio-container [data-testid="input"], .gradio-container [data-testid="input"] *, .gradio-container [data-testid*="slider"], .gradio-container [data-testid*="slider"] *, .gradio-container [data-testid*="textbox"], .gradio-container [data-testid*="textbox"] *, .gradio-container [data-testid*="dropdown"], .gradio-container [data-testid*="dropdown"] *, .gradio-container [data-testid*="radio"], .gradio-container [data-testid*="radio"] * { opacity: 1 !important; filter: none !important; } .gradio-container .form label, .gradio-container .form label *, .gradio-container .form legend, .gradio-container .form span, .gradio-container .form p, .gradio-container .form input, .gradio-container .form textarea, .gradio-container .form select, .gradio-container fieldset label, .gradio-container fieldset label *, .gradio-container fieldset legend, .gradio-container fieldset span, .gradio-container fieldset p, .gradio-container [data-testid*="label"], .gradio-container [data-testid*="label"] *, .gradio-container [data-testid*="radio"] label, .gradio-container [data-testid*="radio"] label *, .gradio-container [data-testid*="radio"] span, .gradio-container [data-testid*="slider"] label, .gradio-container [data-testid*="slider"] label *, .gradio-container [data-testid*="slider"] input, .gradio-container [data-testid*="slider"] span, .gradio-container [data-testid*="textbox"] label, .gradio-container [data-testid*="textbox"] label *, .gradio-container [data-testid*="textbox"] textarea, .gradio-container [data-testid*="dropdown"] label, .gradio-container [data-testid*="dropdown"] label *, .gradio-container [data-testid*="dropdown"] input, .gradio-container [data-testid*="number"] input, .gradio-container [data-testid*="number"] span { color: #f8fafc !important; -webkit-text-fill-color: #f8fafc !important; opacity: 1 !important; filter: none !important; text-shadow: 0 0 0 #f8fafc !important; } .gradio-container .form input::placeholder, .gradio-container .form textarea::placeholder, .gradio-container [data-testid*="textbox"] textarea::placeholder, .gradio-container [data-testid*="dropdown"] input::placeholder { color: #dbeafe !important; -webkit-text-fill-color: #dbeafe !important; opacity: 1 !important; } .gradio-container input::placeholder, .gradio-container textarea::placeholder { color: #dbeafe !important; -webkit-text-fill-color: #dbeafe !important; opacity: 1 !important; } .gradio-container input:focus, .gradio-container textarea:focus, .gradio-container select:focus { border-color: rgba(20, 184, 166, 0.76) !important; box-shadow: 0 0 0 2px rgba(20, 184, 166, 0.18) !important; } .gradio-container button { border-color: rgba(148, 163, 184, 0.28) !important; } .gradio-container button[role="tab"], .gradio-container .tab-nav button { background: #0b1220 !important; color: #cbd5e1 !important; border-color: rgba(148, 163, 184, 0.2) !important; } .gradio-container button[aria-selected="true"], .gradio-container button[role="tab"][aria-selected="true"] { background: rgba(20, 184, 166, 0.18) !important; color: #f8fafc !important; border-color: rgba(20, 184, 166, 0.48) !important; } .gradio-container label:has(input[type="radio"]), .gradio-container label:has(input[type="checkbox"]) { background: #0b1220 !important; color: #f8fafc !important; border: 1px solid rgba(148, 163, 184, 0.38) !important; border-radius: 8px !important; opacity: 1 !important; } .gradio-container label:has(input[type="radio"]) *, .gradio-container label:has(input[type="checkbox"]) * { color: #f8fafc !important; opacity: 1 !important; } .gradio-container label:has(input[type="radio"]:checked), .gradio-container label:has(input[type="checkbox"]:checked) { background: rgba(20, 184, 166, 0.22) !important; border-color: rgba(20, 184, 166, 0.72) !important; box-shadow: inset 0 0 0 1px rgba(45, 212, 191, 0.28) !important; } .gradio-container input[type="radio"], .gradio-container input[type="checkbox"] { accent-color: #14b8a6 !important; } .gradio-container input[type="radio"] + span, .gradio-container input[type="checkbox"] + span { color: #f8fafc !important; opacity: 1 !important; } .gradio-container [role="radiogroup"] label, .gradio-container [role="radiogroup"] [role="radio"] { background: #0b1220 !important; color: #f8fafc !important; border: 1px solid rgba(148, 163, 184, 0.38) !important; border-radius: 8px !important; opacity: 1 !important; } .gradio-container [role="radiogroup"] label *, .gradio-container [role="radiogroup"] [role="radio"] *, .gradio-container [role="radiogroup"] span { color: #f8fafc !important; opacity: 1 !important; } .gradio-container [role="radiogroup"] label:has(input:checked), .gradio-container [role="radiogroup"] [role="radio"][aria-checked="true"] { background: rgba(20, 184, 166, 0.22) !important; border-color: rgba(20, 184, 166, 0.72) !important; } .gradio-container .wrap, .gradio-container .block, .gradio-container .panel, .gradio-container .form { background: rgba(15, 23, 42, 0.72) !important; border-color: rgba(148, 163, 184, 0.22) !important; color: #e5efff !important; } .gradio-container .dataframe, .gradio-container .table-wrap, .gradio-container table { background: #0b1220 !important; color: #e5efff !important; border-color: rgba(148, 163, 184, 0.24) !important; } .gradio-container .table-container, .gradio-container svelte-virtual-table-viewport, .gradio-container button.disable_click, .gradio-container button[aria-label*="Drop CSV"], .gradio-container button[aria-label*="TSV"] { background: #0b1220 !important; color: #e5efff !important; border-color: rgba(148, 163, 184, 0.24) !important; } .gradio-container table.table, .gradio-container table.table thead, .gradio-container table.table tbody, .gradio-container table.table tfoot { background: #0b1220 !important; color: #e5efff !important; } .gradio-container th { background: #111827 !important; color: #f8fafc !important; border-color: rgba(148, 163, 184, 0.24) !important; } .gradio-container table.table thead, .gradio-container table.table thead tr, .gradio-container table.table th, .gradio-container table.table th *, .gradio-container table.table .header-content, .gradio-container table.table .header-button, .gradio-container table.table .cell-wrap { background: #111827 !important; color: #f8fafc !important; border-color: rgba(148, 163, 184, 0.24) !important; opacity: 1 !important; } .gradio-container td { background: #0b1220 !important; color: #e2e8f0 !important; border-color: rgba(148, 163, 184, 0.18) !important; } .gradio-container tr:nth-child(even) td { background: #0f172a !important; } .gradio-container table.table tbody tr, .gradio-container table.table tbody td, .gradio-container table.table tbody .cell-wrap { background: #0b1220 !important; color: #e2e8f0 !important; } .gradio-container table.table tbody tr.row-odd, .gradio-container table.table tbody tr.row-odd td, .gradio-container table.table tbody tr.row-odd .cell-wrap { background: #0f172a !important; } .gradio-container table.table span.text, .gradio-container table.table button { color: #e5efff !important; -webkit-text-fill-color: #e5efff !important; opacity: 1 !important; } .gradio-container .plot-container, .gradio-container .js-plotly-plot { background: #0b1220 !important; border-radius: 8px; } .gradio-container [aria-label="Empty value"], .gradio-container .empty, .gradio-container .unpadded_box, .gradio-container .large.unpadded_box { background: #0b1220 !important; color: #93c5fd !important; border-color: rgba(148, 163, 184, 0.24) !important; } .gradio-container [aria-label="Empty value"] *, .gradio-container .empty *, .gradio-container .unpadded_box * { color: #93c5fd !important; opacity: 1 !important; } .gradio-container [data-testid="status-tracker"], .gradio-container [data-testid="status-tracker"].wrap, .gradio-container .wrap.default.full, .gradio-container .progress-text, .gradio-container .meta-text { background: rgba(11, 18, 32, 0.92) !important; color: #dbeafe !important; opacity: 1 !important; } .gradio-container [data-testid="status-tracker"].hide, .gradio-container .wrap.default.full.hide { background: transparent !important; border: 0 !important; color: transparent !important; opacity: 0 !important; visibility: hidden !important; pointer-events: none !important; } .gradio-container [data-testid="status-tracker"].hide *, .gradio-container .wrap.default.full.hide * { background: transparent !important; color: transparent !important; opacity: 0 !important; visibility: hidden !important; } .gradio-container .eta-bar { background: rgba(20, 184, 166, 0.18) !important; opacity: 1 !important; } .gradio-container [data-testid="block-label"], .gradio-container label[data-testid="block-label"] { background: #0b1220 !important; color: #f8fafc !important; border-color: rgba(148, 163, 184, 0.28) !important; opacity: 1 !important; } .gradio-container [data-testid="block-label"] *, .gradio-container label[data-testid="block-label"] * { color: #f8fafc !important; opacity: 1 !important; } .gradio-container footer, .gradio-container footer *, .gradio-container footer button, .gradio-container footer a { background: #0b1220 !important; color: #e5efff !important; -webkit-text-fill-color: #e5efff !important; opacity: 1 !important; } .gradio-container footer img { background: transparent !important; } .main-shell { border: 1px solid var(--line); border-radius: 8px; padding: 24px; background: linear-gradient(145deg, rgba(15, 23, 42, 0.94), rgba(17, 24, 39, 0.78)); box-shadow: 0 24px 80px rgba(0, 0, 0, 0.28); } .hero-title { display: grid; grid-template-columns: 1.25fr 0.75fr; gap: 18px; align-items: stretch; } .hero-title h1 { margin: 0; font-size: clamp(2.1rem, 4vw, 4.5rem); line-height: 0.92; letter-spacing: 0; } .hero-title p { color: var(--muted); max-width: 760px; font-size: 1rem; } .hero-stats { display: grid; grid-template-columns: repeat(2, minmax(0, 1fr)); gap: 10px; } .hero-stat { border: 1px solid var(--line); border-radius: 8px; padding: 14px; background: rgba(2, 6, 23, 0.36); } .hero-stat b { display: block; font-size: 1.55rem; color: #ffffff; } .hero-stat span { color: var(--muted); font-size: 0.82rem; } .metric-grid { display: grid; grid-template-columns: repeat(4, minmax(0, 1fr)); gap: 12px; } .metric-grid.compact { margin-bottom: 14px; } .metric-card { border: 1px solid var(--line); border-radius: 8px; padding: 14px; min-height: 118px; background: rgba(2, 6, 23, 0.66); position: relative; overflow: hidden; } .metric-card:before { content: ""; position: absolute; inset: 0 auto 0 0; width: 4px; background: var(--accent, #38bdf8); } .metric-card span, .metric-card small { display: block; color: var(--muted); } .metric-card strong { display: block; margin: 6px 0; font-size: 2rem; color: #ffffff; } .accent-dspark { --accent: #14b8a6; } .accent-orange { --accent: #f97316; } .accent-violet { --accent: #8b5cf6; } .accent-blue { --accent: #38bdf8; } .sim-head { display: flex; align-items: center; justify-content: space-between; gap: 12px; margin: 8px 0 12px; } .sim-head span { display: block; color: var(--muted); } .legend { color: var(--muted); font-size: 0.86rem; } .dot { width: 10px; height: 10px; border-radius: 50%; display: inline-block; margin: 0 5px 0 12px; } .simulator { display: grid; gap: 8px; } .cycle-row { display: grid; grid-template-columns: 76px 1fr 96px; gap: 10px; align-items: center; border: 1px solid var(--line); border-radius: 8px; padding: 10px; background: rgba(15, 23, 42, 0.58); } .cycle-id, .cycle-stat { color: var(--muted); font-size: 0.82rem; } .token-strip { display: flex; flex-wrap: wrap; gap: 6px; } .tok { border: 1px solid transparent; border-radius: 6px; padding: 5px 8px; font-size: 0.85rem; line-height: 1.1; } .accepted, .tok.accepted { background: rgba(20, 184, 166, 0.18); color: #99f6e4; border-color: rgba(20, 184, 166, 0.36); } .rejected, .tok.rejected { background: rgba(244, 63, 94, 0.18); color: #fecdd3; border-color: rgba(244, 63, 94, 0.38); } .dropped, .tok.dropped { background: rgba(100, 116, 139, 0.18); color: #cbd5e1; border-color: rgba(148, 163, 184, 0.22); text-decoration: line-through; } .tok.tail { background: rgba(249, 115, 22, 0.14); color: #fed7aa; border-color: rgba(249, 115, 22, 0.26); } .bonus, .tok.bonus { background: rgba(56, 189, 248, 0.16); color: #bae6fd; border-color: rgba(56, 189, 248, 0.34); } .arch-grid { display: grid; grid-template-columns: repeat(3, minmax(0, 1fr)); gap: 12px; margin-bottom: 14px; } .arch-card { border: 1px solid var(--line); border-radius: 8px; padding: 16px; background: rgba(2, 6, 23, 0.42); box-shadow: inset 0 3px 0 var(--accent); } .arch-top span { color: var(--accent); text-transform: uppercase; font-size: 0.76rem; } .arch-top strong { display: block; color: #fff; font-size: 1.3rem; } .arch-card p, .arch-detail { color: var(--muted); } .arch-detail { margin-top: 10px; } .arch-detail b { display: block; color: #e5efff; } .pipeline { display: grid; grid-template-columns: 1fr 24px 1fr 24px 1fr 24px 1fr; gap: 8px; align-items: center; border: 1px solid var(--line); border-radius: 8px; padding: 14px; background: rgba(15, 23, 42, 0.54); } .pipeline div { min-height: 72px; border-radius: 8px; border: 1px solid rgba(148, 163, 184, 0.18); padding: 12px; background: rgba(2, 6, 23, 0.42); } .pipeline b, .pipeline span { display: block; } .pipeline span { color: var(--muted); } .pipeline i { height: 2px; background: linear-gradient(90deg, #14b8a6, #f97316); } .source-panel { display: flex; gap: 10px; flex-wrap: wrap; align-items: center; border: 1px solid var(--line); border-radius: 8px; padding: 12px; background: rgba(2, 6, 23, 0.34); color: var(--muted); } .source-panel b { color: #fff; } .source-panel span { flex: 1 1 520px; } .source-panel a { color: #67e8f9; text-decoration: none; border: 1px solid rgba(103, 232, 249, 0.22); border-radius: 6px; padding: 4px 8px; } @media (max-width: 900px) { .hero-title, .metric-grid, .arch-grid, .pipeline { grid-template-columns: 1fr; } .pipeline i { height: 18px; width: 2px; margin-left: 12px; } .cycle-row { grid-template-columns: 1fr; } } """ with gr.Blocks() as demo: gr.HTML( """

DeepSpec Decoding Lab

Explore DeepSeek's 12 released draft modules across DSpark, DFlash, and EAGLE-3 with paper-backed accepted-length metrics, architecture comparisons, and a deterministic speculative-decoding simulator.

12released draft checkpoints
3speculative-decoding families
9benchmark tasks from Table 1
60-85%reported V4-Flash per-user speed lift
""" ) with gr.Row(): with gr.Column(scale=1, min_width=280): target = gr.Dropdown(TARGETS, value="Qwen3-4B", label="Target family") task = gr.Dropdown(TASKS, value="HumanEval", label="Benchmark profile") method = gr.Radio(METHODS, value="DSpark", label="Primary draft family") output_tokens = gr.Slider(24, 128, value=64, step=8, label="Simulation output budget") baseline_tps = gr.Slider(5, 160, value=40, step=5, label="Autoregressive baseline tok/s") load = gr.Slider(1, 100, value=70, step=1, label="Serving load pressure") seed = gr.Number(value=7, label="Deterministic seed", precision=0) prompt = gr.Textbox( value="Write a compact function, then explain why it is correct.", label="Prompt flavor", lines=3, ) run = gr.Button("Run Speculation", variant="primary") with gr.Column(scale=3): cards = gr.HTML() sim = gr.HTML() with gr.Tabs(): with gr.Tab("Benchmark Matrix"): bar = gr.Plot() heatmap = gr.Plot() table = gr.Dataframe( headers=["Task", "Domain", "DSpark", "DFlash", "EAGLE-3"], datatype=["str", "str", "str", "str", "str"], interactive=False, wrap=True, ) with gr.Tab("Checkpoint Inventory"): inv_plot = gr.Plot() inv_table = gr.Dataframe( value=model_rows(), headers=["Family", "Target", "Repo", "Params", "Architecture", "Horizon", "Layers", "Confidence", "Sequential signal"], datatype=["str"] * 9, interactive=False, wrap=True, ) with gr.Tab("Architectures"): arch = gr.HTML() with gr.Tab("Production Frontier"): prod_cards = gr.HTML() prod_plot = gr.Plot() with gr.Tab("Sources"): sources = gr.HTML() outputs = [cards, sim, bar, heatmap, table, inv_plot, arch, prod_cards, prod_plot, sources] inputs = [target, task, method, output_tokens, baseline_tps, load, seed, prompt] demo.load(render_all, inputs=inputs, outputs=outputs, api_name=None, api_visibility="private") for control in [target, task, method, output_tokens, baseline_tps, load, seed]: control.change(render_all, inputs=inputs, outputs=outputs, api_name=None, api_visibility="private") prompt.submit(render_all, inputs=inputs, outputs=outputs, api_name=None, api_visibility="private") run.click(render_all, inputs=inputs, outputs=outputs, api_name="simulate") demo.queue(default_concurrency_limit=8) if __name__ == "__main__": demo.launch(css=CSS, theme=gr.themes.Base())