Spaces:
Running
Running
| import os | |
| os.environ.setdefault("HF_HOME", "/tmp/huggingface") | |
| os.environ.setdefault("HF_MODULES_CACHE", "/tmp/hf_modules") | |
| os.environ.setdefault("MPLCONFIGDIR", "/tmp/matplotlib") | |
| os.environ.setdefault("GRADIO_SSR_MODE", "false") | |
| import html | |
| import random | |
| from statistics import mean | |
| import gradio as gr | |
| import plotly.graph_objects as go | |
| TASKS = [ | |
| "GSM8K", | |
| "MATH-500", | |
| "AIME25", | |
| "MBPP", | |
| "HumanEval", | |
| "LiveCodeBench", | |
| "MT-Bench", | |
| "Alpaca", | |
| "Arena-Hard v2", | |
| ] | |
| DOMAINS = { | |
| "Math": ["GSM8K", "MATH-500", "AIME25"], | |
| "Code": ["MBPP", "HumanEval", "LiveCodeBench"], | |
| "Chat": ["MT-Bench", "Alpaca", "Arena-Hard v2"], | |
| } | |
| TARGETS = ["Qwen3-4B", "Qwen3-8B", "Qwen3-14B", "Gemma4-12B"] | |
| METHODS = ["DSpark", "DFlash", "EAGLE-3"] | |
| COLORS = { | |
| "DSpark": "#14b8a6", | |
| "DFlash": "#f97316", | |
| "EAGLE-3": "#8b5cf6", | |
| "Baseline": "#94a3b8", | |
| } | |
| ACCEPTANCE = { | |
| "Qwen3-4B": { | |
| "EAGLE-3": { | |
| "GSM8K": 5.14, | |
| "MATH-500": 4.62, | |
| "AIME25": 3.92, | |
| "MBPP": 3.69, | |
| "HumanEval": 4.16, | |
| "LiveCodeBench": 3.77, | |
| "MT-Bench": 2.39, | |
| "Alpaca": 2.26, | |
| "Arena-Hard v2": 2.55, | |
| }, | |
| "DFlash": { | |
| "GSM8K": 5.40, | |
| "MATH-500": 4.85, | |
| "AIME25": 4.15, | |
| "MBPP": 4.40, | |
| "HumanEval": 4.74, | |
| "LiveCodeBench": 4.18, | |
| "MT-Bench": 3.07, | |
| "Alpaca": 2.96, | |
| "Arena-Hard v2": 2.83, | |
| }, | |
| "DSpark": { | |
| "GSM8K": 6.11, | |
| "MATH-500": 5.70, | |
| "AIME25": 4.89, | |
| "MBPP": 5.13, | |
| "HumanEval": 5.38, | |
| "LiveCodeBench": 4.86, | |
| "MT-Bench": 3.64, | |
| "Alpaca": 3.54, | |
| "Arena-Hard v2": 3.29, | |
| }, | |
| }, | |
| "Qwen3-8B": { | |
| "EAGLE-3": { | |
| "GSM8K": 5.30, | |
| "MATH-500": 4.77, | |
| "AIME25": 3.91, | |
| "MBPP": 3.96, | |
| "HumanEval": 4.33, | |
| "LiveCodeBench": 4.17, | |
| "MT-Bench": 2.66, | |
| "Alpaca": 2.54, | |
| "Arena-Hard v2": 2.54, | |
| }, | |
| "DFlash": { | |
| "GSM8K": 5.33, | |
| "MATH-500": 4.91, | |
| "AIME25": 4.07, | |
| "MBPP": 4.36, | |
| "HumanEval": 4.64, | |
| "LiveCodeBench": 4.39, | |
| "MT-Bench": 3.11, | |
| "Alpaca": 2.98, | |
| "Arena-Hard v2": 2.81, | |
| }, | |
| "DSpark": { | |
| "GSM8K": 6.17, | |
| "MATH-500": 5.78, | |
| "AIME25": 5.01, | |
| "MBPP": 5.16, | |
| "HumanEval": 5.52, | |
| "LiveCodeBench": 5.17, | |
| "MT-Bench": 3.72, | |
| "Alpaca": 3.58, | |
| "Arena-Hard v2": 3.21, | |
| }, | |
| }, | |
| "Qwen3-14B": { | |
| "EAGLE-3": { | |
| "GSM8K": 5.24, | |
| "MATH-500": 4.60, | |
| "AIME25": 3.71, | |
| "MBPP": 3.81, | |
| "HumanEval": 4.14, | |
| "LiveCodeBench": 4.01, | |
| "MT-Bench": 2.62, | |
| "Alpaca": 2.47, | |
| "Arena-Hard v2": 2.48, | |
| }, | |
| "DFlash": { | |
| "GSM8K": 5.41, | |
| "MATH-500": 4.84, | |
| "AIME25": 3.98, | |
| "MBPP": 4.44, | |
| "HumanEval": 4.59, | |
| "LiveCodeBench": 4.33, | |
| "MT-Bench": 3.10, | |
| "Alpaca": 2.94, | |
| "Arena-Hard v2": 2.72, | |
| }, | |
| "DSpark": { | |
| "GSM8K": 6.21, | |
| "MATH-500": 5.74, | |
| "AIME25": 4.94, | |
| "MBPP": 5.26, | |
| "HumanEval": 5.43, | |
| "LiveCodeBench": 5.02, | |
| "MT-Bench": 3.70, | |
| "Alpaca": 3.58, | |
| "Arena-Hard v2": 3.13, | |
| }, | |
| }, | |
| "Gemma4-12B": { | |
| "EAGLE-3": { | |
| "GSM8K": 5.87, | |
| "MATH-500": 5.46, | |
| "AIME25": 4.83, | |
| "MBPP": 4.72, | |
| "HumanEval": 5.37, | |
| "LiveCodeBench": 4.16, | |
| "MT-Bench": 3.19, | |
| "Alpaca": 3.06, | |
| "Arena-Hard v2": 2.72, | |
| }, | |
| "DFlash": { | |
| "GSM8K": 5.45, | |
| "MATH-500": 5.04, | |
| "AIME25": 4.22, | |
| "MBPP": 4.39, | |
| "HumanEval": 4.95, | |
| "LiveCodeBench": 3.70, | |
| "MT-Bench": 2.98, | |
| "Alpaca": 2.84, | |
| "Arena-Hard v2": 2.59, | |
| }, | |
| "DSpark": { | |
| "GSM8K": 6.05, | |
| "MATH-500": 5.78, | |
| "AIME25": 5.12, | |
| "MBPP": 5.11, | |
| "HumanEval": 5.64, | |
| "LiveCodeBench": 4.51, | |
| "MT-Bench": 3.49, | |
| "Alpaca": 3.35, | |
| "Arena-Hard v2": 2.92, | |
| }, | |
| }, | |
| } | |
| MODELS = [ | |
| ("DSpark", "Qwen3-4B", "deepseek-ai/dspark_qwen3_4b_block7", 1.393, "Qwen3DSparkModel", "block7", "5", "yes", "Markov rank 256"), | |
| ("DSpark", "Qwen3-8B", "deepseek-ai/dspark_qwen3_8b_block7", 2.371, "Qwen3DSparkModel", "block7", "5", "yes", "Markov rank 256"), | |
| ("DSpark", "Qwen3-14B", "deepseek-ai/dspark_qwen3_14b_block7", 3.416, "Qwen3DSparkModel", "block7", "5", "yes", "Markov rank 256"), | |
| ("DSpark", "Gemma4-12B", "deepseek-ai/dspark_gemma4_12b_block7", 3.430, "Gemma4DSparkModel", "block7", "5", "yes", "Markov rank 256"), | |
| ("DFlash", "Qwen3-4B", "deepseek-ai/dflash_qwen3_4b_block7", 1.315, "Qwen3DSparkModel", "block7", "5", "no", "parallel block"), | |
| ("DFlash", "Qwen3-8B", "deepseek-ai/dflash_qwen3_8b_block7", 2.293, "Qwen3DSparkModel", "block7", "5", "no", "parallel block"), | |
| ("DFlash", "Qwen3-14B", "deepseek-ai/dflash_qwen3_14b_block7", 3.338, "Qwen3DSparkModel", "block7", "5", "no", "parallel block"), | |
| ("DFlash", "Gemma4-12B", "deepseek-ai/dflash_gemma4_12b_block7", 3.296, "Gemma4DSparkModel", "block7", "5", "no", "parallel block"), | |
| ("EAGLE-3", "Qwen3-4B", "deepseek-ai/eagle3_qwen3_4b_ttt7", 0.927, "Qwen3Eagle3Model", "ttt7", "1", "no", "training-time test"), | |
| ("EAGLE-3", "Qwen3-8B", "deepseek-ai/eagle3_qwen3_8b_ttt7", 1.547, "Qwen3Eagle3Model", "ttt7", "1", "no", "training-time test"), | |
| ("EAGLE-3", "Qwen3-14B", "deepseek-ai/eagle3_qwen3_14b_ttt7", 2.054, "Qwen3Eagle3Model", "ttt7", "1", "no", "training-time test"), | |
| ("EAGLE-3", "Gemma4-12B", "deepseek-ai/eagle3_gemma4_12b_ttt7", 2.362, "Gemma4Eagle3Model", "ttt7", "1", "no", "training-time test"), | |
| ] | |
| FAMILY_COPY = { | |
| "DSpark": { | |
| "tag": "semi-autoregressive", | |
| "summary": "Parallel DFlash-style backbone plus a lightweight Markov head and confidence scheduler.", | |
| "strength": "Best accepted length in the released table and designed for load-aware serving.", | |
| "tradeoff": "More machinery than a pure block drafter.", | |
| }, | |
| "DFlash": { | |
| "tag": "parallel block diffusion", | |
| "summary": "Predicts a full block in one pass with target-feature conditioning and KV injection.", | |
| "strength": "Very low drafting latency and strong first-token accuracy.", | |
| "tradeoff": "Suffix tokens decay because positions are predicted independently.", | |
| }, | |
| "EAGLE-3": { | |
| "tag": "autoregressive feature drafter", | |
| "summary": "Uses training-time test and fused target features to improve classic EAGLE drafting.", | |
| "strength": "Strong lossless speculative baseline with stable sequential dependency modeling.", | |
| "tradeoff": "Drafting cost scales with lookahead length.", | |
| }, | |
| } | |
| LEXICON = { | |
| "GSM8K": "therefore the total is because each group contributes remaining answer equals final".split(), | |
| "MATH-500": "let x satisfy equation substitute simplify bound hence root value proof".split(), | |
| "AIME25": "triangle integer modulo sequence polynomial area count radius answer".split(), | |
| "MBPP": "def return list index loop condition append result function test".split(), | |
| "HumanEval": "class function assert edge case input output sorted recursive".split(), | |
| "LiveCodeBench": "stdin parse graph dp binary search modulo constraints optimize".split(), | |
| "MT-Bench": "I would compare the tradeoff and explain the practical implication".split(), | |
| "Alpaca": "Here is a concise response with steps context and caveats".split(), | |
| "Arena-Hard v2": "The best answer balances reasoning specificity and directness".split(), | |
| } | |
| def pct_gain(new, old): | |
| if not old: | |
| return 0.0 | |
| return (new / old - 1.0) * 100.0 | |
| def domain_for_task(task): | |
| for domain, tasks in DOMAINS.items(): | |
| if task in tasks: | |
| return domain | |
| return "Mixed" | |
| def model_rows(): | |
| rows = [] | |
| for family, target, repo, params, arch, horizon, layers, confidence, seq in MODELS: | |
| rows.append([family, target, repo, f"{params:.3f}B", arch, horizon, layers, confidence, seq]) | |
| return rows | |
| def benchmark_rows(target): | |
| rows = [] | |
| for task in TASKS: | |
| row = [task, domain_for_task(task)] | |
| for method in METHODS: | |
| row.append(f"{ACCEPTANCE[target][method][task]:.2f}") | |
| rows.append(row) | |
| return rows | |
| def method_tau(target, method, task): | |
| return ACCEPTANCE[target][method][task] | |
| def simulated_tps(tau, method, baseline_tps, load): | |
| load_pressure = max(0.0, min(1.0, (load - 1.0) / 99.0)) | |
| overhead = {"DSpark": 0.11, "DFlash": 0.10, "EAGLE-3": 0.19}[method] | |
| waste = {"DSpark": 0.06, "DFlash": 0.24, "EAGLE-3": 0.16}[method] | |
| return baseline_tps * tau / (1.0 + overhead) * (1.0 - load_pressure * waste) | |
| def apply_dark_plot_layout(fig): | |
| fig.update_layout( | |
| template="plotly_dark", | |
| paper_bgcolor="#0b1220", | |
| plot_bgcolor="#0f172a", | |
| font=dict(color="#e5efff", family="Inter, ui-sans-serif, system-ui, sans-serif"), | |
| title_font=dict(color="#f8fafc", size=18), | |
| legend=dict(font=dict(color="#dbeafe")), | |
| hoverlabel=dict( | |
| bgcolor="#111827", | |
| bordercolor="#475569", | |
| font=dict(color="#f8fafc"), | |
| ), | |
| ) | |
| fig.update_xaxes( | |
| gridcolor="rgba(148, 163, 184, 0.18)", | |
| zerolinecolor="rgba(148, 163, 184, 0.22)", | |
| linecolor="rgba(148, 163, 184, 0.34)", | |
| tickcolor="rgba(148, 163, 184, 0.34)", | |
| title_font=dict(color="#dbeafe"), | |
| tickfont=dict(color="#cbd5e1"), | |
| ) | |
| fig.update_yaxes( | |
| gridcolor="rgba(148, 163, 184, 0.18)", | |
| zerolinecolor="rgba(148, 163, 184, 0.22)", | |
| linecolor="rgba(148, 163, 184, 0.34)", | |
| tickcolor="rgba(148, 163, 184, 0.34)", | |
| title_font=dict(color="#dbeafe"), | |
| tickfont=dict(color="#cbd5e1"), | |
| ) | |
| fig.update_traces( | |
| colorbar=dict( | |
| tickfont=dict(color="#dbeafe"), | |
| title_font=dict(color="#f8fafc"), | |
| ), | |
| selector=dict(type="heatmap"), | |
| ) | |
| return fig | |
| def metric_cards(target, task, method, baseline_tps, load): | |
| dspark = method_tau(target, "DSpark", task) | |
| dflash = method_tau(target, "DFlash", task) | |
| eagle = method_tau(target, "EAGLE-3", task) | |
| best_base = max(dflash, eagle) | |
| selected = method_tau(target, method, task) | |
| calls_saved = (1.0 - 1.0 / selected) * 100.0 | |
| selected_tps = simulated_tps(selected, method, baseline_tps, load) | |
| domain = domain_for_task(task) | |
| return f""" | |
| <div class="metric-grid"> | |
| <div class="metric-card accent-dspark"> | |
| <span>DSpark accepted length</span> | |
| <strong>{dspark:.2f}</strong> | |
| <small>{pct_gain(dspark, best_base):+.1f}% vs strongest baseline on {task}</small> | |
| </div> | |
| <div class="metric-card accent-orange"> | |
| <span>{method} simulated rate</span> | |
| <strong>{selected_tps:.1f}</strong> | |
| <small>tokens/sec from a {baseline_tps:.1f} baseline input</small> | |
| </div> | |
| <div class="metric-card accent-violet"> | |
| <span>Target calls avoided</span> | |
| <strong>{calls_saved:.1f}%</strong> | |
| <small>estimated from accepted length tau={selected:.2f}</small> | |
| </div> | |
| <div class="metric-card accent-blue"> | |
| <span>Benchmark profile</span> | |
| <strong>{domain}</strong> | |
| <small>EAGLE-3 {eagle:.2f} / DFlash {dflash:.2f} / DSpark {dspark:.2f}</small> | |
| </div> | |
| </div> | |
| """ | |
| def acceptance_bar(target, task): | |
| values = [method_tau(target, method, task) for method in METHODS] | |
| fig = go.Figure() | |
| fig.add_bar( | |
| x=METHODS, | |
| y=values, | |
| marker_color=[COLORS[method] for method in METHODS], | |
| text=[f"{v:.2f}" for v in values], | |
| textfont=dict(color="#f8fafc", size=13), | |
| textposition="outside", | |
| hovertemplate="%{x}<br>Accepted length: %{y:.2f}<extra></extra>", | |
| ) | |
| fig.update_layout( | |
| title=f"Accepted length per verification round on {target} / {task}", | |
| yaxis_title="Accepted length, including target bonus token", | |
| xaxis_title="Draft family", | |
| height=360, | |
| margin=dict(l=35, r=20, t=55, b=35), | |
| yaxis=dict(gridcolor="rgba(148,163,184,0.18)", range=[0, max(values) + 1.0]), | |
| ) | |
| return apply_dark_plot_layout(fig) | |
| def acceptance_heatmap(target): | |
| z = [[method_tau(target, method, task) for task in TASKS] for method in METHODS] | |
| fig = go.Figure( | |
| data=go.Heatmap( | |
| z=z, | |
| x=TASKS, | |
| y=METHODS, | |
| colorscale=[ | |
| [0.0, "#0b1220"], | |
| [0.35, "#1e3a8a"], | |
| [0.68, "#0f766e"], | |
| [1.0, "#9a3412"], | |
| ], | |
| text=[[f"{v:.2f}" for v in row] for row in z], | |
| texttemplate="%{text}", | |
| hovertemplate="%{y}<br>%{x}: %{z:.2f}<extra></extra>", | |
| colorbar=dict(title="tau"), | |
| ) | |
| ) | |
| fig.update_layout( | |
| title=f"DeepSpec Table 1 matrix for {target}", | |
| height=405, | |
| margin=dict(l=75, r=25, t=55, b=70), | |
| xaxis=dict(tickangle=-30), | |
| ) | |
| return apply_dark_plot_layout(fig) | |
| def production_plot(): | |
| fig = go.Figure() | |
| fig.add_trace( | |
| go.Scatter( | |
| x=[80, 120], | |
| y=[51, 661], | |
| mode="lines+markers+text", | |
| name="V4-Flash", | |
| text=["+51%", "+661%"], | |
| textposition="top center", | |
| line=dict(color="#14b8a6", width=3), | |
| marker=dict(size=12), | |
| textfont=dict(color="#f8fafc"), | |
| hovertemplate="V4-Flash SLA %{x} tok/s/user<br>Throughput uplift %{y}%<extra></extra>", | |
| ) | |
| ) | |
| fig.add_trace( | |
| go.Scatter( | |
| x=[35, 50], | |
| y=[52, 406], | |
| mode="lines+markers+text", | |
| name="V4-Pro", | |
| text=["+52%", "+406%"], | |
| textposition="top center", | |
| line=dict(color="#f97316", width=3), | |
| marker=dict(size=12), | |
| textfont=dict(color="#f8fafc"), | |
| hovertemplate="V4-Pro SLA %{x} tok/s/user<br>Throughput uplift %{y}%<extra></extra>", | |
| ) | |
| ) | |
| fig.update_layout( | |
| title="Production DSpark frontier reported for DeepSeek-V4", | |
| xaxis_title="Interactivity SLA anchor, tok/s/user", | |
| yaxis_title="Aggregate throughput uplift vs MTP-1", | |
| height=380, | |
| margin=dict(l=45, r=25, t=55, b=45), | |
| yaxis=dict(gridcolor="rgba(148,163,184,0.18)"), | |
| legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0), | |
| ) | |
| return apply_dark_plot_layout(fig) | |
| def inventory_plot(): | |
| fig = go.Figure() | |
| for method in METHODS: | |
| xs = [target for fam, target, *_ in MODELS if fam == method] | |
| ys = [params for fam, _target, _repo, params, *_rest in MODELS if fam == method] | |
| repos = [repo for fam, _target, repo, *_ in MODELS if fam == method] | |
| fig.add_trace( | |
| go.Scatter( | |
| x=xs, | |
| y=ys, | |
| mode="markers+lines", | |
| name=method, | |
| marker=dict(size=14, color=COLORS[method]), | |
| line=dict(color=COLORS[method], width=2), | |
| text=repos, | |
| hovertemplate="%{text}<br>Draft params %{y:.3f}B<extra></extra>", | |
| ) | |
| ) | |
| fig.update_layout( | |
| title="Released draft-module parameter scale", | |
| yaxis_title="Draft module parameters, billions", | |
| xaxis_title="Target model family", | |
| height=380, | |
| margin=dict(l=45, r=25, t=55, b=45), | |
| yaxis=dict(gridcolor="rgba(148,163,184,0.18)"), | |
| legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0), | |
| ) | |
| return apply_dark_plot_layout(fig) | |
| def architecture_panel(): | |
| cards = [] | |
| for method in METHODS: | |
| info = FAMILY_COPY[method] | |
| cards.append( | |
| f""" | |
| <div class="arch-card" style="--accent:{COLORS[method]}"> | |
| <div class="arch-top"> | |
| <span>{html.escape(info["tag"])}</span> | |
| <strong>{method}</strong> | |
| </div> | |
| <p>{html.escape(info["summary"])}</p> | |
| <div class="arch-detail"><b>Strength</b>{html.escape(info["strength"])}</div> | |
| <div class="arch-detail"><b>Tradeoff</b>{html.escape(info["tradeoff"])}</div> | |
| </div> | |
| """ | |
| ) | |
| return f""" | |
| <div class="arch-grid">{''.join(cards)}</div> | |
| <div class="pipeline"> | |
| <div><b>Target</b><span>prefill + bonus token</span></div> | |
| <i></i> | |
| <div><b>Draft</b><span>block proposal</span></div> | |
| <i></i> | |
| <div><b>Schedule</b><span>confidence prefix</span></div> | |
| <i></i> | |
| <div><b>Verify</b><span>lossless target check</span></div> | |
| </div> | |
| """ | |
| def source_panel(): | |
| return """ | |
| <div class="source-panel"> | |
| <b>Research basis</b> | |
| <span>The 12 checkpoint pages have no individual model cards; the DeepSpec GitHub README identifies them as the released checkpoints used for Table 1 in the DSpark paper. The app uses that table for accepted-length metrics, the public HF API for checkpoint metadata, and the DSpark/DFlash/EAGLE-3 papers for architecture notes.</span> | |
| <a href="https://github.com/deepseek-ai/DeepSpec" target="_blank">DeepSpec repo</a> | |
| <a href="https://github.com/deepseek-ai/DeepSpec/blob/main/DSpark_paper.pdf" target="_blank">DSpark paper</a> | |
| <a href="https://arxiv.org/abs/2602.06036" target="_blank">DFlash paper</a> | |
| <a href="https://arxiv.org/abs/2503.01840" target="_blank">EAGLE-3 paper</a> | |
| <a href="https://huggingface.co/collections/deepseek-ai/deepspec-6a410e3f1831ca8ca801b88b" target="_blank">DeepSpec collection</a> | |
| </div> | |
| """ | |
| def weighted_acceptance_count(rng, tau, method, scheduled_len, load): | |
| draft_mean = max(0.0, tau - 1.0) | |
| jitter = rng.uniform(-0.75, 0.75) | |
| if method == "DFlash": | |
| jitter -= max(0.0, (load - 65.0) / 140.0) | |
| elif method == "EAGLE-3": | |
| jitter -= max(0.0, (load - 80.0) / 220.0) | |
| else: | |
| jitter += max(0.0, (load - 80.0) / 260.0) | |
| accepted = int(round(draft_mean + jitter)) | |
| return max(0, min(scheduled_len, accepted)) | |
| def scheduled_length(method, tau, load): | |
| load_pressure = max(0.0, min(1.0, (load - 1.0) / 99.0)) | |
| if method == "DSpark": | |
| confident = max(2, min(7, int(round(tau + 1.5)))) | |
| return max(2, int(round(confident - load_pressure * 2.0))) | |
| if method == "DFlash": | |
| return 7 | |
| return max(3, min(7, int(round(tau + 0.5)))) | |
| def simulate_tokens(target, task, method, output_tokens, load, seed, prompt): | |
| rng = random.Random(f"{target}|{task}|{method}|{seed}|{prompt}") | |
| vocab = list(LEXICON[task]) | |
| if prompt.strip(): | |
| prompt_words = [w.strip(".,:;!?()[]{}<>").lower() for w in prompt.split()] | |
| vocab.extend([w for w in prompt_words if 2 < len(w) < 18]) | |
| tau = method_tau(target, method, task) | |
| emitted = 0 | |
| cycle = 1 | |
| rows = [] | |
| while emitted < output_tokens and cycle <= 16: | |
| sched = scheduled_length(method, tau, load) | |
| accepted = weighted_acceptance_count(rng, tau, method, sched, load) | |
| rejected = None if accepted >= sched else accepted | |
| token_spans = [] | |
| for idx in range(7): | |
| token = html.escape(rng.choice(vocab)) | |
| if idx < accepted: | |
| cls = "tok accepted" | |
| label = "accepted" | |
| elif idx == rejected: | |
| cls = "tok rejected" | |
| label = "rejected" | |
| elif idx >= sched: | |
| cls = "tok dropped" | |
| label = "not verified" | |
| else: | |
| cls = "tok tail" | |
| label = "discarded suffix" | |
| token_spans.append(f"<span class='{cls}' title='{label}'>{token}</span>") | |
| bonus = html.escape(rng.choice(vocab)) | |
| token_spans.append(f"<span class='tok bonus' title='target bonus token'>{bonus}</span>") | |
| emitted += accepted + 1 | |
| rows.append( | |
| f""" | |
| <div class="cycle-row"> | |
| <div class="cycle-id">round {cycle}</div> | |
| <div class="token-strip">{''.join(token_spans)}</div> | |
| <div class="cycle-stat">{accepted}+1 emitted</div> | |
| </div> | |
| """ | |
| ) | |
| cycle += 1 | |
| return f""" | |
| <div class="sim-head"> | |
| <div><b>{method}</b><span>{target} / {task} / load {load:.0f}%</span></div> | |
| <div class="legend"><span class="dot accepted"></span>accepted <span class="dot rejected"></span>first reject <span class="dot dropped"></span>pruned <span class="dot bonus"></span>target bonus</div> | |
| </div> | |
| <div class="simulator">{''.join(rows)}</div> | |
| """ | |
| def production_cards(): | |
| return """ | |
| <div class="metric-grid compact"> | |
| <div class="metric-card accent-dspark"><span>V4-Flash moderate SLA</span><strong>+51%</strong><small>aggregate throughput at 80 tok/s/user</small></div> | |
| <div class="metric-card accent-dspark"><span>V4-Flash matched capacity</span><strong>+60-85%</strong><small>faster per-user generation</small></div> | |
| <div class="metric-card accent-orange"><span>V4-Pro moderate SLA</span><strong>+52%</strong><small>aggregate throughput at 35 tok/s/user</small></div> | |
| <div class="metric-card accent-orange"><span>V4-Pro matched capacity</span><strong>+57-78%</strong><small>faster per-user generation</small></div> | |
| </div> | |
| """ | |
| def render_all(target, task, method, output_tokens, baseline_tps, load, seed, prompt): | |
| return ( | |
| metric_cards(target, task, method, baseline_tps, load), | |
| simulate_tokens(target, task, method, int(output_tokens), load, int(seed), prompt or ""), | |
| acceptance_bar(target, task), | |
| acceptance_heatmap(target), | |
| benchmark_rows(target), | |
| inventory_plot(), | |
| architecture_panel(), | |
| production_cards(), | |
| production_plot(), | |
| source_panel(), | |
| ) | |
| CSS = """ | |
| :root { | |
| --bg: #070b13; | |
| --panel: rgba(15, 23, 42, 0.86); | |
| --line: rgba(148, 163, 184, 0.18); | |
| --text: #e5efff; | |
| --muted: #b8c4d6; | |
| } | |
| .gradio-container { | |
| color-scheme: dark; | |
| --body-background-fill: #070b13; | |
| --body-text-color: #e5efff; | |
| --body-text-color-subdued: #e5efff; | |
| --block-background-fill: rgba(15, 23, 42, 0.92); | |
| --block-border-color: rgba(148, 163, 184, 0.24); | |
| --block-label-text-color: #f8fafc; | |
| --border-color-primary: rgba(148, 163, 184, 0.28); | |
| --input-background-fill: #0b1220; | |
| --input-border-color: rgba(148, 163, 184, 0.36); | |
| --input-text-color: #f8fafc; | |
| --input-text-color-focus: #ffffff; | |
| --input-placeholder-color: #dbeafe; | |
| --button-primary-background-fill: #0f766e; | |
| --button-primary-background-fill-hover: #0d9488; | |
| --button-primary-text-color: #f8fafc; | |
| --link-text-color: #67e8f9; | |
| --neutral-50: #f8fafc; | |
| --neutral-100: #f1f5f9; | |
| --neutral-200: #e2e8f0; | |
| --neutral-300: #cbd5e1; | |
| --neutral-400: #dbeafe; | |
| --neutral-500: #e5efff; | |
| --table-background-fill: #0b1220; | |
| --table-even-background-fill: #0f172a; | |
| --table-odd-background-fill: #0b1220; | |
| --table-border-color: rgba(148, 163, 184, 0.24); | |
| --table-text-color: #e5efff; | |
| --table-row-focus: #1e293b; | |
| background: | |
| radial-gradient(circle at 12% 0%, rgba(20, 184, 166, 0.18), transparent 28%), | |
| linear-gradient(135deg, #070b13 0%, #0f172a 52%, #111827 100%); | |
| color: var(--text); | |
| } | |
| .gradio-container label, | |
| .gradio-container .label, | |
| .gradio-container .prose, | |
| .gradio-container .prose * { | |
| color: #dbeafe !important; | |
| opacity: 1 !important; | |
| } | |
| .gradio-container h1, | |
| .gradio-container h2, | |
| .gradio-container h3, | |
| .gradio-container h4, | |
| .gradio-container h5, | |
| .gradio-container h6, | |
| .gradio-container legend, | |
| .gradio-container summary, | |
| .gradio-container [data-testid="block-info"], | |
| .gradio-container [class*="label"], | |
| .gradio-container [class*="Label"], | |
| .gradio-container [data-testid*="label"] { | |
| color: #f8fafc !important; | |
| opacity: 1 !important; | |
| font-weight: 600 !important; | |
| } | |
| .gradio-container small, | |
| .gradio-container .secondary, | |
| .gradio-container [class*="secondary"], | |
| .gradio-container [class*="Description"], | |
| .gradio-container [class*="description"] { | |
| color: #cbd5e1 !important; | |
| opacity: 1 !important; | |
| } | |
| .gradio-container .min_value, | |
| .gradio-container .max_value, | |
| .gradio-container [class*="min_value"], | |
| .gradio-container [class*="max_value"] { | |
| color: #dbeafe !important; | |
| opacity: 1 !important; | |
| } | |
| .gradio-container input, | |
| .gradio-container textarea, | |
| .gradio-container select { | |
| background: #0b1220 !important; | |
| color: #f8fafc !important; | |
| border-color: rgba(148, 163, 184, 0.36) !important; | |
| } | |
| .gradio-container input:disabled, | |
| .gradio-container textarea:disabled, | |
| .gradio-container select:disabled, | |
| .gradio-container button:disabled, | |
| .gradio-container [disabled], | |
| .gradio-container [aria-disabled="true"] { | |
| opacity: 1 !important; | |
| background: #0b1220 !important; | |
| color: #f8fafc !important; | |
| -webkit-text-fill-color: #f8fafc !important; | |
| border-color: rgba(148, 163, 184, 0.34) !important; | |
| } | |
| .gradio-container .disabled, | |
| .gradio-container .disabled *, | |
| .gradio-container [class*="disabled"], | |
| .gradio-container [class*="disabled"] * { | |
| opacity: 1 !important; | |
| } | |
| .gradio-container .disabled input, | |
| .gradio-container .disabled textarea, | |
| .gradio-container .disabled select, | |
| .gradio-container [class*="disabled"] input, | |
| .gradio-container [class*="disabled"] textarea, | |
| .gradio-container [class*="disabled"] select { | |
| color: #f8fafc !important; | |
| -webkit-text-fill-color: #f8fafc !important; | |
| } | |
| .gradio-container .form, | |
| .gradio-container .form *, | |
| .gradio-container fieldset, | |
| .gradio-container fieldset *, | |
| .gradio-container .block.padded, | |
| .gradio-container .block.padded *, | |
| .gradio-container [data-testid="input"], | |
| .gradio-container [data-testid="input"] *, | |
| .gradio-container [data-testid*="slider"], | |
| .gradio-container [data-testid*="slider"] *, | |
| .gradio-container [data-testid*="textbox"], | |
| .gradio-container [data-testid*="textbox"] *, | |
| .gradio-container [data-testid*="dropdown"], | |
| .gradio-container [data-testid*="dropdown"] *, | |
| .gradio-container [data-testid*="radio"], | |
| .gradio-container [data-testid*="radio"] * { | |
| opacity: 1 !important; | |
| filter: none !important; | |
| } | |
| .gradio-container .form label, | |
| .gradio-container .form label *, | |
| .gradio-container .form legend, | |
| .gradio-container .form span, | |
| .gradio-container .form p, | |
| .gradio-container .form input, | |
| .gradio-container .form textarea, | |
| .gradio-container .form select, | |
| .gradio-container fieldset label, | |
| .gradio-container fieldset label *, | |
| .gradio-container fieldset legend, | |
| .gradio-container fieldset span, | |
| .gradio-container fieldset p, | |
| .gradio-container [data-testid*="label"], | |
| .gradio-container [data-testid*="label"] *, | |
| .gradio-container [data-testid*="radio"] label, | |
| .gradio-container [data-testid*="radio"] label *, | |
| .gradio-container [data-testid*="radio"] span, | |
| .gradio-container [data-testid*="slider"] label, | |
| .gradio-container [data-testid*="slider"] label *, | |
| .gradio-container [data-testid*="slider"] input, | |
| .gradio-container [data-testid*="slider"] span, | |
| .gradio-container [data-testid*="textbox"] label, | |
| .gradio-container [data-testid*="textbox"] label *, | |
| .gradio-container [data-testid*="textbox"] textarea, | |
| .gradio-container [data-testid*="dropdown"] label, | |
| .gradio-container [data-testid*="dropdown"] label *, | |
| .gradio-container [data-testid*="dropdown"] input, | |
| .gradio-container [data-testid*="number"] input, | |
| .gradio-container [data-testid*="number"] span { | |
| color: #f8fafc !important; | |
| -webkit-text-fill-color: #f8fafc !important; | |
| opacity: 1 !important; | |
| filter: none !important; | |
| text-shadow: 0 0 0 #f8fafc !important; | |
| } | |
| .gradio-container .form input::placeholder, | |
| .gradio-container .form textarea::placeholder, | |
| .gradio-container [data-testid*="textbox"] textarea::placeholder, | |
| .gradio-container [data-testid*="dropdown"] input::placeholder { | |
| color: #dbeafe !important; | |
| -webkit-text-fill-color: #dbeafe !important; | |
| opacity: 1 !important; | |
| } | |
| .gradio-container input::placeholder, | |
| .gradio-container textarea::placeholder { | |
| color: #dbeafe !important; | |
| -webkit-text-fill-color: #dbeafe !important; | |
| opacity: 1 !important; | |
| } | |
| .gradio-container input:focus, | |
| .gradio-container textarea:focus, | |
| .gradio-container select:focus { | |
| border-color: rgba(20, 184, 166, 0.76) !important; | |
| box-shadow: 0 0 0 2px rgba(20, 184, 166, 0.18) !important; | |
| } | |
| .gradio-container button { | |
| border-color: rgba(148, 163, 184, 0.28) !important; | |
| } | |
| .gradio-container button[role="tab"], | |
| .gradio-container .tab-nav button { | |
| background: #0b1220 !important; | |
| color: #cbd5e1 !important; | |
| border-color: rgba(148, 163, 184, 0.2) !important; | |
| } | |
| .gradio-container button[aria-selected="true"], | |
| .gradio-container button[role="tab"][aria-selected="true"] { | |
| background: rgba(20, 184, 166, 0.18) !important; | |
| color: #f8fafc !important; | |
| border-color: rgba(20, 184, 166, 0.48) !important; | |
| } | |
| .gradio-container label:has(input[type="radio"]), | |
| .gradio-container label:has(input[type="checkbox"]) { | |
| background: #0b1220 !important; | |
| color: #f8fafc !important; | |
| border: 1px solid rgba(148, 163, 184, 0.38) !important; | |
| border-radius: 8px !important; | |
| opacity: 1 !important; | |
| } | |
| .gradio-container label:has(input[type="radio"]) *, | |
| .gradio-container label:has(input[type="checkbox"]) * { | |
| color: #f8fafc !important; | |
| opacity: 1 !important; | |
| } | |
| .gradio-container label:has(input[type="radio"]:checked), | |
| .gradio-container label:has(input[type="checkbox"]:checked) { | |
| background: rgba(20, 184, 166, 0.22) !important; | |
| border-color: rgba(20, 184, 166, 0.72) !important; | |
| box-shadow: inset 0 0 0 1px rgba(45, 212, 191, 0.28) !important; | |
| } | |
| .gradio-container input[type="radio"], | |
| .gradio-container input[type="checkbox"] { | |
| accent-color: #14b8a6 !important; | |
| } | |
| .gradio-container input[type="radio"] + span, | |
| .gradio-container input[type="checkbox"] + span { | |
| color: #f8fafc !important; | |
| opacity: 1 !important; | |
| } | |
| .gradio-container [role="radiogroup"] label, | |
| .gradio-container [role="radiogroup"] [role="radio"] { | |
| background: #0b1220 !important; | |
| color: #f8fafc !important; | |
| border: 1px solid rgba(148, 163, 184, 0.38) !important; | |
| border-radius: 8px !important; | |
| opacity: 1 !important; | |
| } | |
| .gradio-container [role="radiogroup"] label *, | |
| .gradio-container [role="radiogroup"] [role="radio"] *, | |
| .gradio-container [role="radiogroup"] span { | |
| color: #f8fafc !important; | |
| opacity: 1 !important; | |
| } | |
| .gradio-container [role="radiogroup"] label:has(input:checked), | |
| .gradio-container [role="radiogroup"] [role="radio"][aria-checked="true"] { | |
| background: rgba(20, 184, 166, 0.22) !important; | |
| border-color: rgba(20, 184, 166, 0.72) !important; | |
| } | |
| .gradio-container .wrap, | |
| .gradio-container .block, | |
| .gradio-container .panel, | |
| .gradio-container .form { | |
| background: rgba(15, 23, 42, 0.72) !important; | |
| border-color: rgba(148, 163, 184, 0.22) !important; | |
| color: #e5efff !important; | |
| } | |
| .gradio-container .dataframe, | |
| .gradio-container .table-wrap, | |
| .gradio-container table { | |
| background: #0b1220 !important; | |
| color: #e5efff !important; | |
| border-color: rgba(148, 163, 184, 0.24) !important; | |
| } | |
| .gradio-container .table-container, | |
| .gradio-container svelte-virtual-table-viewport, | |
| .gradio-container button.disable_click, | |
| .gradio-container button[aria-label*="Drop CSV"], | |
| .gradio-container button[aria-label*="TSV"] { | |
| background: #0b1220 !important; | |
| color: #e5efff !important; | |
| border-color: rgba(148, 163, 184, 0.24) !important; | |
| } | |
| .gradio-container table.table, | |
| .gradio-container table.table thead, | |
| .gradio-container table.table tbody, | |
| .gradio-container table.table tfoot { | |
| background: #0b1220 !important; | |
| color: #e5efff !important; | |
| } | |
| .gradio-container th { | |
| background: #111827 !important; | |
| color: #f8fafc !important; | |
| border-color: rgba(148, 163, 184, 0.24) !important; | |
| } | |
| .gradio-container table.table thead, | |
| .gradio-container table.table thead tr, | |
| .gradio-container table.table th, | |
| .gradio-container table.table th *, | |
| .gradio-container table.table .header-content, | |
| .gradio-container table.table .header-button, | |
| .gradio-container table.table .cell-wrap { | |
| background: #111827 !important; | |
| color: #f8fafc !important; | |
| border-color: rgba(148, 163, 184, 0.24) !important; | |
| opacity: 1 !important; | |
| } | |
| .gradio-container td { | |
| background: #0b1220 !important; | |
| color: #e2e8f0 !important; | |
| border-color: rgba(148, 163, 184, 0.18) !important; | |
| } | |
| .gradio-container tr:nth-child(even) td { | |
| background: #0f172a !important; | |
| } | |
| .gradio-container table.table tbody tr, | |
| .gradio-container table.table tbody td, | |
| .gradio-container table.table tbody .cell-wrap { | |
| background: #0b1220 !important; | |
| color: #e2e8f0 !important; | |
| } | |
| .gradio-container table.table tbody tr.row-odd, | |
| .gradio-container table.table tbody tr.row-odd td, | |
| .gradio-container table.table tbody tr.row-odd .cell-wrap { | |
| background: #0f172a !important; | |
| } | |
| .gradio-container table.table span.text, | |
| .gradio-container table.table button { | |
| color: #e5efff !important; | |
| -webkit-text-fill-color: #e5efff !important; | |
| opacity: 1 !important; | |
| } | |
| .gradio-container .plot-container, | |
| .gradio-container .js-plotly-plot { | |
| background: #0b1220 !important; | |
| border-radius: 8px; | |
| } | |
| .gradio-container [aria-label="Empty value"], | |
| .gradio-container .empty, | |
| .gradio-container .unpadded_box, | |
| .gradio-container .large.unpadded_box { | |
| background: #0b1220 !important; | |
| color: #93c5fd !important; | |
| border-color: rgba(148, 163, 184, 0.24) !important; | |
| } | |
| .gradio-container [aria-label="Empty value"] *, | |
| .gradio-container .empty *, | |
| .gradio-container .unpadded_box * { | |
| color: #93c5fd !important; | |
| opacity: 1 !important; | |
| } | |
| .gradio-container [data-testid="status-tracker"], | |
| .gradio-container [data-testid="status-tracker"].wrap, | |
| .gradio-container .wrap.default.full, | |
| .gradio-container .progress-text, | |
| .gradio-container .meta-text { | |
| background: rgba(11, 18, 32, 0.92) !important; | |
| color: #dbeafe !important; | |
| opacity: 1 !important; | |
| } | |
| .gradio-container [data-testid="status-tracker"].hide, | |
| .gradio-container .wrap.default.full.hide { | |
| background: transparent !important; | |
| border: 0 !important; | |
| color: transparent !important; | |
| opacity: 0 !important; | |
| visibility: hidden !important; | |
| pointer-events: none !important; | |
| } | |
| .gradio-container [data-testid="status-tracker"].hide *, | |
| .gradio-container .wrap.default.full.hide * { | |
| background: transparent !important; | |
| color: transparent !important; | |
| opacity: 0 !important; | |
| visibility: hidden !important; | |
| } | |
| .gradio-container .eta-bar { | |
| background: rgba(20, 184, 166, 0.18) !important; | |
| opacity: 1 !important; | |
| } | |
| .gradio-container [data-testid="block-label"], | |
| .gradio-container label[data-testid="block-label"] { | |
| background: #0b1220 !important; | |
| color: #f8fafc !important; | |
| border-color: rgba(148, 163, 184, 0.28) !important; | |
| opacity: 1 !important; | |
| } | |
| .gradio-container [data-testid="block-label"] *, | |
| .gradio-container label[data-testid="block-label"] * { | |
| color: #f8fafc !important; | |
| opacity: 1 !important; | |
| } | |
| .gradio-container footer, | |
| .gradio-container footer *, | |
| .gradio-container footer button, | |
| .gradio-container footer a { | |
| background: #0b1220 !important; | |
| color: #e5efff !important; | |
| -webkit-text-fill-color: #e5efff !important; | |
| opacity: 1 !important; | |
| } | |
| .gradio-container footer img { | |
| background: transparent !important; | |
| } | |
| .main-shell { | |
| border: 1px solid var(--line); | |
| border-radius: 8px; | |
| padding: 24px; | |
| background: linear-gradient(145deg, rgba(15, 23, 42, 0.94), rgba(17, 24, 39, 0.78)); | |
| box-shadow: 0 24px 80px rgba(0, 0, 0, 0.28); | |
| } | |
| .hero-title { | |
| display: grid; | |
| grid-template-columns: 1.25fr 0.75fr; | |
| gap: 18px; | |
| align-items: stretch; | |
| } | |
| .hero-title h1 { | |
| margin: 0; | |
| font-size: clamp(2.1rem, 4vw, 4.5rem); | |
| line-height: 0.92; | |
| letter-spacing: 0; | |
| } | |
| .hero-title p { | |
| color: var(--muted); | |
| max-width: 760px; | |
| font-size: 1rem; | |
| } | |
| .hero-stats { | |
| display: grid; | |
| grid-template-columns: repeat(2, minmax(0, 1fr)); | |
| gap: 10px; | |
| } | |
| .hero-stat { | |
| border: 1px solid var(--line); | |
| border-radius: 8px; | |
| padding: 14px; | |
| background: rgba(2, 6, 23, 0.36); | |
| } | |
| .hero-stat b { | |
| display: block; | |
| font-size: 1.55rem; | |
| color: #ffffff; | |
| } | |
| .hero-stat span { | |
| color: var(--muted); | |
| font-size: 0.82rem; | |
| } | |
| .metric-grid { | |
| display: grid; | |
| grid-template-columns: repeat(4, minmax(0, 1fr)); | |
| gap: 12px; | |
| } | |
| .metric-grid.compact { | |
| margin-bottom: 14px; | |
| } | |
| .metric-card { | |
| border: 1px solid var(--line); | |
| border-radius: 8px; | |
| padding: 14px; | |
| min-height: 118px; | |
| background: rgba(2, 6, 23, 0.66); | |
| position: relative; | |
| overflow: hidden; | |
| } | |
| .metric-card:before { | |
| content: ""; | |
| position: absolute; | |
| inset: 0 auto 0 0; | |
| width: 4px; | |
| background: var(--accent, #38bdf8); | |
| } | |
| .metric-card span, .metric-card small { | |
| display: block; | |
| color: var(--muted); | |
| } | |
| .metric-card strong { | |
| display: block; | |
| margin: 6px 0; | |
| font-size: 2rem; | |
| color: #ffffff; | |
| } | |
| .accent-dspark { --accent: #14b8a6; } | |
| .accent-orange { --accent: #f97316; } | |
| .accent-violet { --accent: #8b5cf6; } | |
| .accent-blue { --accent: #38bdf8; } | |
| .sim-head { | |
| display: flex; | |
| align-items: center; | |
| justify-content: space-between; | |
| gap: 12px; | |
| margin: 8px 0 12px; | |
| } | |
| .sim-head span { | |
| display: block; | |
| color: var(--muted); | |
| } | |
| .legend { | |
| color: var(--muted); | |
| font-size: 0.86rem; | |
| } | |
| .dot { | |
| width: 10px; | |
| height: 10px; | |
| border-radius: 50%; | |
| display: inline-block; | |
| margin: 0 5px 0 12px; | |
| } | |
| .simulator { | |
| display: grid; | |
| gap: 8px; | |
| } | |
| .cycle-row { | |
| display: grid; | |
| grid-template-columns: 76px 1fr 96px; | |
| gap: 10px; | |
| align-items: center; | |
| border: 1px solid var(--line); | |
| border-radius: 8px; | |
| padding: 10px; | |
| background: rgba(15, 23, 42, 0.58); | |
| } | |
| .cycle-id, .cycle-stat { | |
| color: var(--muted); | |
| font-size: 0.82rem; | |
| } | |
| .token-strip { | |
| display: flex; | |
| flex-wrap: wrap; | |
| gap: 6px; | |
| } | |
| .tok { | |
| border: 1px solid transparent; | |
| border-radius: 6px; | |
| padding: 5px 8px; | |
| font-size: 0.85rem; | |
| line-height: 1.1; | |
| } | |
| .accepted, .tok.accepted { background: rgba(20, 184, 166, 0.18); color: #99f6e4; border-color: rgba(20, 184, 166, 0.36); } | |
| .rejected, .tok.rejected { background: rgba(244, 63, 94, 0.18); color: #fecdd3; border-color: rgba(244, 63, 94, 0.38); } | |
| .dropped, .tok.dropped { background: rgba(100, 116, 139, 0.18); color: #cbd5e1; border-color: rgba(148, 163, 184, 0.22); text-decoration: line-through; } | |
| .tok.tail { background: rgba(249, 115, 22, 0.14); color: #fed7aa; border-color: rgba(249, 115, 22, 0.26); } | |
| .bonus, .tok.bonus { background: rgba(56, 189, 248, 0.16); color: #bae6fd; border-color: rgba(56, 189, 248, 0.34); } | |
| .arch-grid { | |
| display: grid; | |
| grid-template-columns: repeat(3, minmax(0, 1fr)); | |
| gap: 12px; | |
| margin-bottom: 14px; | |
| } | |
| .arch-card { | |
| border: 1px solid var(--line); | |
| border-radius: 8px; | |
| padding: 16px; | |
| background: rgba(2, 6, 23, 0.42); | |
| box-shadow: inset 0 3px 0 var(--accent); | |
| } | |
| .arch-top span { | |
| color: var(--accent); | |
| text-transform: uppercase; | |
| font-size: 0.76rem; | |
| } | |
| .arch-top strong { | |
| display: block; | |
| color: #fff; | |
| font-size: 1.3rem; | |
| } | |
| .arch-card p, .arch-detail { | |
| color: var(--muted); | |
| } | |
| .arch-detail { | |
| margin-top: 10px; | |
| } | |
| .arch-detail b { | |
| display: block; | |
| color: #e5efff; | |
| } | |
| .pipeline { | |
| display: grid; | |
| grid-template-columns: 1fr 24px 1fr 24px 1fr 24px 1fr; | |
| gap: 8px; | |
| align-items: center; | |
| border: 1px solid var(--line); | |
| border-radius: 8px; | |
| padding: 14px; | |
| background: rgba(15, 23, 42, 0.54); | |
| } | |
| .pipeline div { | |
| min-height: 72px; | |
| border-radius: 8px; | |
| border: 1px solid rgba(148, 163, 184, 0.18); | |
| padding: 12px; | |
| background: rgba(2, 6, 23, 0.42); | |
| } | |
| .pipeline b, .pipeline span { | |
| display: block; | |
| } | |
| .pipeline span { | |
| color: var(--muted); | |
| } | |
| .pipeline i { | |
| height: 2px; | |
| background: linear-gradient(90deg, #14b8a6, #f97316); | |
| } | |
| .source-panel { | |
| display: flex; | |
| gap: 10px; | |
| flex-wrap: wrap; | |
| align-items: center; | |
| border: 1px solid var(--line); | |
| border-radius: 8px; | |
| padding: 12px; | |
| background: rgba(2, 6, 23, 0.34); | |
| color: var(--muted); | |
| } | |
| .source-panel b { | |
| color: #fff; | |
| } | |
| .source-panel span { | |
| flex: 1 1 520px; | |
| } | |
| .source-panel a { | |
| color: #67e8f9; | |
| text-decoration: none; | |
| border: 1px solid rgba(103, 232, 249, 0.22); | |
| border-radius: 6px; | |
| padding: 4px 8px; | |
| } | |
| @media (max-width: 900px) { | |
| .hero-title, .metric-grid, .arch-grid, .pipeline { | |
| grid-template-columns: 1fr; | |
| } | |
| .pipeline i { | |
| height: 18px; | |
| width: 2px; | |
| margin-left: 12px; | |
| } | |
| .cycle-row { | |
| grid-template-columns: 1fr; | |
| } | |
| } | |
| """ | |
| with gr.Blocks() as demo: | |
| gr.HTML( | |
| """ | |
| <div class="main-shell"> | |
| <div class="hero-title"> | |
| <div> | |
| <h1>DeepSpec Decoding Lab</h1> | |
| <p>Explore DeepSeek's 12 released draft modules across DSpark, DFlash, and EAGLE-3 with paper-backed accepted-length metrics, architecture comparisons, and a deterministic speculative-decoding simulator.</p> | |
| </div> | |
| <div class="hero-stats"> | |
| <div class="hero-stat"><b>12</b><span>released draft checkpoints</span></div> | |
| <div class="hero-stat"><b>3</b><span>speculative-decoding families</span></div> | |
| <div class="hero-stat"><b>9</b><span>benchmark tasks from Table 1</span></div> | |
| <div class="hero-stat"><b>60-85%</b><span>reported V4-Flash per-user speed lift</span></div> | |
| </div> | |
| </div> | |
| </div> | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1, min_width=280): | |
| target = gr.Dropdown(TARGETS, value="Qwen3-4B", label="Target family") | |
| task = gr.Dropdown(TASKS, value="HumanEval", label="Benchmark profile") | |
| method = gr.Radio(METHODS, value="DSpark", label="Primary draft family") | |
| output_tokens = gr.Slider(24, 128, value=64, step=8, label="Simulation output budget") | |
| baseline_tps = gr.Slider(5, 160, value=40, step=5, label="Autoregressive baseline tok/s") | |
| load = gr.Slider(1, 100, value=70, step=1, label="Serving load pressure") | |
| seed = gr.Number(value=7, label="Deterministic seed", precision=0) | |
| prompt = gr.Textbox( | |
| value="Write a compact function, then explain why it is correct.", | |
| label="Prompt flavor", | |
| lines=3, | |
| ) | |
| run = gr.Button("Run Speculation", variant="primary") | |
| with gr.Column(scale=3): | |
| cards = gr.HTML() | |
| sim = gr.HTML() | |
| with gr.Tabs(): | |
| with gr.Tab("Benchmark Matrix"): | |
| bar = gr.Plot() | |
| heatmap = gr.Plot() | |
| table = gr.Dataframe( | |
| headers=["Task", "Domain", "DSpark", "DFlash", "EAGLE-3"], | |
| datatype=["str", "str", "str", "str", "str"], | |
| interactive=False, | |
| wrap=True, | |
| ) | |
| with gr.Tab("Checkpoint Inventory"): | |
| inv_plot = gr.Plot() | |
| inv_table = gr.Dataframe( | |
| value=model_rows(), | |
| headers=["Family", "Target", "Repo", "Params", "Architecture", "Horizon", "Layers", "Confidence", "Sequential signal"], | |
| datatype=["str"] * 9, | |
| interactive=False, | |
| wrap=True, | |
| ) | |
| with gr.Tab("Architectures"): | |
| arch = gr.HTML() | |
| with gr.Tab("Production Frontier"): | |
| prod_cards = gr.HTML() | |
| prod_plot = gr.Plot() | |
| with gr.Tab("Sources"): | |
| sources = gr.HTML() | |
| outputs = [cards, sim, bar, heatmap, table, inv_plot, arch, prod_cards, prod_plot, sources] | |
| inputs = [target, task, method, output_tokens, baseline_tps, load, seed, prompt] | |
| demo.load(render_all, inputs=inputs, outputs=outputs, api_name=None, api_visibility="private") | |
| for control in [target, task, method, output_tokens, baseline_tps, load, seed]: | |
| control.change(render_all, inputs=inputs, outputs=outputs, api_name=None, api_visibility="private") | |
| prompt.submit(render_all, inputs=inputs, outputs=outputs, api_name=None, api_visibility="private") | |
| run.click(render_all, inputs=inputs, outputs=outputs, api_name="simulate") | |
| demo.queue(default_concurrency_limit=8) | |
| if __name__ == "__main__": | |
| demo.launch(css=CSS, theme=gr.themes.Base()) | |