| #!/usr/bin/env bash |
| |
|
|
| set -euo pipefail |
|
|
| REPO_ID="${REPO_ID:-satya007/gemmacut-spectral}" |
| HOST_ROOT="${HOST_ROOT:-$PWD/gemmacut-repro}" |
| VLLM_REPO="${VLLM_REPO:-https://github.com/bluecopa/vllm-spectral.git}" |
| VLLM_BRANCH="${VLLM_BRANCH:-spectral-codebook-docker}" |
| VLLM_COMMIT="${VLLM_COMMIT:-008dd7f87fb9de185e536ad30b4d524024ed9b9f}" |
| SIDECAR_SHA256="${SIDECAR_SHA256:-e47a36c13467cbedf720e7f782b976df3dcda2d989c727113a8315008661a3e4}" |
|
|
| export HF_HUB_DISABLE_XET="${HF_HUB_DISABLE_XET:-1}" |
|
|
| VLLM_DIR="$HOST_ROOT/vllm-spectral" |
| GEMMACUT_DIR="$HOST_ROOT/gemmacut" |
| TMP_DIR="${TMPDIR:-$HOST_ROOT/.hf-download}" |
|
|
| mkdir -p "$HOST_ROOT" "$GEMMACUT_DIR/results_it" |
|
|
| if [ ! -d "$VLLM_DIR/.git" ]; then |
| git clone --branch "$VLLM_BRANCH" "$VLLM_REPO" "$VLLM_DIR" |
| fi |
|
|
| git -C "$VLLM_DIR" fetch origin "$VLLM_BRANCH" |
| git -C "$VLLM_DIR" checkout "$VLLM_COMMIT" |
|
|
| rm -rf "$TMP_DIR" |
| mkdir -p "$TMP_DIR" |
|
|
| hf download "$REPO_ID" \ |
| scripts/bench_tokens_sec_phase2_eagle.sh \ |
| scripts/serve_phase2_eagle.sh \ |
| scripts/test_triton_codebook_match.py \ |
| scripts/measure_kv_cache_compression.py \ |
| README.md \ |
| --local-dir "$TMP_DIR" \ |
| --max-workers 1 |
|
|
| for attempt in 1 2 3; do |
| if hf download "$REPO_ID" \ |
| artifacts/spectral_sidecar_chat_v2.pt \ |
| --local-dir "$TMP_DIR" \ |
| --max-workers 1; then |
| break |
| fi |
|
|
| if [ "$attempt" = "3" ]; then |
| echo "Failed to download sidecar after $attempt attempts" >&2 |
| exit 1 |
| fi |
|
|
| sleep $((attempt * 5)) |
| done |
|
|
| cp "$TMP_DIR/artifacts/spectral_sidecar_chat_v2.pt" \ |
| "$GEMMACUT_DIR/results_it/spectral_sidecar_chat_v2.pt" |
| cp "$TMP_DIR/scripts/bench_tokens_sec_phase2_eagle.sh" \ |
| "$GEMMACUT_DIR/bench_tokens_sec_phase2_eagle.sh" |
| cp "$TMP_DIR/scripts/serve_phase2_eagle.sh" \ |
| "$GEMMACUT_DIR/serve_phase2_eagle.sh" |
| cp "$TMP_DIR/scripts/test_triton_codebook_match.py" \ |
| "$GEMMACUT_DIR/test_triton_codebook_match.py" |
| cp "$TMP_DIR/scripts/measure_kv_cache_compression.py" \ |
| "$GEMMACUT_DIR/measure_kv_cache_compression.py" |
| cp "$TMP_DIR/README.md" "$GEMMACUT_DIR/HF_REPRO_README.md" |
| chmod +x "$GEMMACUT_DIR/bench_tokens_sec_phase2_eagle.sh" \ |
| "$GEMMACUT_DIR/serve_phase2_eagle.sh" |
|
|
| if command -v sha256sum >/dev/null 2>&1; then |
| ACTUAL_SHA="$(sha256sum "$GEMMACUT_DIR/results_it/spectral_sidecar_chat_v2.pt" | awk '{print $1}')" |
| else |
| ACTUAL_SHA="$(shasum -a 256 "$GEMMACUT_DIR/results_it/spectral_sidecar_chat_v2.pt" | awk '{print $1}')" |
| fi |
|
|
| if [ "$ACTUAL_SHA" != "$SIDECAR_SHA256" ]; then |
| echo "Sidecar SHA256 mismatch: expected $SIDECAR_SHA256, got $ACTUAL_SHA" >&2 |
| exit 1 |
| fi |
|
|
| cat <<EOF |
| Repro bundle ready. |
| |
| HOST_ROOT=$HOST_ROOT |
| vLLM=$VLLM_DIR @ $VLLM_COMMIT |
| GemmaCut helper dir=$GEMMACUT_DIR |
| |
| Smoke command: |
| cd "$GEMMACUT_DIR" && \\ |
| HOST_ROOT="$HOST_ROOT" \\ |
| SPECTRAL_CUDA_GRAPH=1 \\ |
| RUN_SMOKE=1 \\ |
| SMOKE_ONLY=1 \\ |
| NUM_SPEC_TOKENS=3 \\ |
| ./bench_tokens_sec_phase2_eagle.sh |
| |
| Serve command: |
| cd "$GEMMACUT_DIR" && \\ |
| HOST_ROOT="$HOST_ROOT" \\ |
| HF_TOKEN="\${HF_TOKEN:-}" \\ |
| ./serve_phase2_eagle.sh |
| EOF |
|
|