Lean Laguna: lossless DFlash speculative decoding on Laguna XS.2 (harness, environment, results)

0a55ff6 about 5 hours ago

1.5 kB

	#!/usr/bin/env bash
	# parity_local.sh — full local dry-run of the benchmark + parity harness on the Mac.
	# Starts two stub servers (baseline :8000, "dflash" :8001), waits until both are
	# ready, runs measure.py against each (writing results/*.json) and the greedy
	# parity check across both, then tears the stubs down. No CUDA / vLLM / Laguna.
	set -euo pipefail
	cd "$(dirname "$0")/.."
	PY=.venv/bin/python

	"$PY" scripts/stub_server.py --port 8000 & A=$!
	"$PY" scripts/stub_server.py --port 8001 --spec & B=$!
	trap 'kill $A $B 2>/dev/null \|\| true' EXIT

	# Wait for both ports to accept connections (no shell sleep — poll in python).
	"$PY" - <<'PY'
	import socket, time, sys
	for port in (8000, 8001):
	for _ in range(100):
	with socket.socket() as s:
	if s.connect_ex(("127.0.0.1", port)) == 0:
	break
	time.sleep(0.05)
	else:
	sys.exit(f"stub on {port} never came up")
	print("[parity_local] both stubs ready")
	PY

	mkdir -p results
	"$PY" bench/measure.py --base-url http://localhost:8001 --model laguna --label dflash --n 5 --out results/dflash.json
	"$PY" bench/measure.py --base-url http://localhost:8000 --model laguna --label baseline --n 5 --out results/baseline.json
	"$PY" evals/humaneval_subset.py --parity --base-url http://localhost:8000 --base-url-b http://localhost:8001 --model laguna --n 3
	"$PY" scripts/check_results.py results/dflash.json results/baseline.json
	echo "[parity_local] OK — results/ written, parity checked"