File size: 1,274 Bytes
0a55ff6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
{
  "label": "dflash",
  "model": "poolside/Laguna-XS.2",
  "speculator": "poolside/Laguna-XS.2-speculator.dflash",
  "num_speculative_tokens": 7,
  "method": "dflash",
  "n": 14,
  "tokens_per_s_mean": 54.1741150379158,
  "ttft_s_mean": 2.5821559940065657,
  "acceptance_length_tau": null,
  "tau_note": "tau read from vLLM /metrics pinned at EXACTLY gamma+1 (=8.0) on BOTH the trivial and the mixed-difficulty runs, and the per-prompt /metrics deltas did not resolve a distribution (counter refresh granularity). We therefore treat the /metrics tau as UNRELIABLE and make NO acceptance-length claim. The load-bearing, directly-measured results are the wall-clock speedup and the byte-parity.",
  "source": "HF Job 6a19d8b73a4b8cae6044dfdf (h200), 2026-05-29; vLLM 0.22.0, --enforce-eager, --max-model-len 4096, greedy (temperature=0), --speculative-config method=dflash gamma=7",
  "prompt_set": "same 14 distinct mixed-difficulty prompts as baseline (trivial -> hard)",
  "corroborating_run": "An earlier 20-prompt trivial-only run (job 6a19d2105c8d10ffa1107774) gave DFlash 48.09 tok/s = 2.47x; this mixed-difficulty run gives 54.17 tok/s = 2.76x. Lossless in both.",
  "note": "DFlash completions are byte-identical to baseline (greedy) — see results/parity.json."
}