import time from .backend import xp, set_seed, randn from .softmax import softmax_canonical from .sampling import sample_canonical def timed(fn, repeats=15, warmup=5): for _ in range(warmup): fn() t0 = time.perf_counter() for _ in range(repeats): fn() t1 = time.perf_counter() return (t1 - t0) / repeats def bench_suite(seed=42, ns=(1, 8, 32), vs=(1024, 8192, 32768), dtype="float32"): set_seed(seed) results = [] for n in ns: for v in vs: x = randn((n, v), dtype=dtype) def f_std(): p = xp.exp(x - x.max(axis=1, keepdims=True)) p = p / p.sum(axis=1, keepdims=True) # emulate typical greedy/multinomial path _ = int(p.argmax(axis=1)[0]) def f_can(): # canonical per row; focus on sampling overhead for i in range(n): p = softmax_canonical(x[i]) _ = sample_canonical(p, seed=seed, token_idx=i) t_std = timed(f_std) t_can = timed(f_can) results.append({ "n": int(n), "v": int(v), "t_std_ms": 1000.0 * t_std, "t_can_ms": 1000.0 * t_can, "overhead_pct": 100.0 * (t_can - t_std) / max(t_std, 1e-9), }) return results