lean-laguna / scripts /parity_local.sh
art87able's picture
Lean Laguna: lossless DFlash speculative decoding on Laguna XS.2 (harness, environment, results)
0a55ff6
#!/usr/bin/env bash
# parity_local.sh — full local dry-run of the benchmark + parity harness on the Mac.
# Starts two stub servers (baseline :8000, "dflash" :8001), waits until both are
# ready, runs measure.py against each (writing results/*.json) and the greedy
# parity check across both, then tears the stubs down. No CUDA / vLLM / Laguna.
set -euo pipefail
cd "$(dirname "$0")/.."
PY=.venv/bin/python
"$PY" scripts/stub_server.py --port 8000 & A=$!
"$PY" scripts/stub_server.py --port 8001 --spec & B=$!
trap 'kill $A $B 2>/dev/null || true' EXIT
# Wait for both ports to accept connections (no shell sleep — poll in python).
"$PY" - <<'PY'
import socket, time, sys
for port in (8000, 8001):
for _ in range(100):
with socket.socket() as s:
if s.connect_ex(("127.0.0.1", port)) == 0:
break
time.sleep(0.05)
else:
sys.exit(f"stub on {port} never came up")
print("[parity_local] both stubs ready")
PY
mkdir -p results
"$PY" bench/measure.py --base-url http://localhost:8001 --model laguna --label dflash --n 5 --out results/dflash.json
"$PY" bench/measure.py --base-url http://localhost:8000 --model laguna --label baseline --n 5 --out results/baseline.json
"$PY" evals/humaneval_subset.py --parity --base-url http://localhost:8000 --base-url-b http://localhost:8001 --model laguna --n 3
"$PY" scripts/check_results.py results/dflash.json results/baseline.json
echo "[parity_local] OK — results/ written, parity checked"