Fix grouped KV commit hash in repro bundle
Browse files
README.md
CHANGED
|
@@ -35,7 +35,7 @@ The actual vLLM implementation lives here:
|
|
| 35 |
```text
|
| 36 |
https://github.com/bluecopa/vllm-spectral.git
|
| 37 |
branch: spectral-codebook-docker
|
| 38 |
-
commit:
|
| 39 |
```
|
| 40 |
|
| 41 |
## Requirements
|
|
@@ -82,7 +82,7 @@ chmod +x /tmp/gemmacut-spectral-bootstrap/scripts/setup_repro_from_hf.sh
|
|
| 82 |
The setup script:
|
| 83 |
|
| 84 |
- clones the tested vLLM branch over HTTPS,
|
| 85 |
-
- checks out `
|
| 86 |
- downloads this repo's sidecar and helper scripts,
|
| 87 |
- verifies the sidecar SHA256,
|
| 88 |
- writes everything under `$HOST_ROOT`.
|
|
@@ -165,7 +165,7 @@ DISABLE_HYBRID_KV_CACHE_MANAGER=0
|
|
| 165 |
kv_cache_dtype=fp8_e4m3
|
| 166 |
```
|
| 167 |
|
| 168 |
-
`DISABLE_HYBRID_KV_CACHE_MANAGER=0` uses the default vLLM hybrid KV cache manager. Commit `
|
| 169 |
|
| 170 |
Set `HF_HUB_OFFLINE=1` only after the base model and drafter are already cached under `$HOST_ROOT/.cache/huggingface`.
|
| 171 |
|
|
|
|
| 35 |
```text
|
| 36 |
https://github.com/bluecopa/vllm-spectral.git
|
| 37 |
branch: spectral-codebook-docker
|
| 38 |
+
commit: 008dd7f87fb9de185e536ad30b4d524024ed9b9f
|
| 39 |
```
|
| 40 |
|
| 41 |
## Requirements
|
|
|
|
| 82 |
The setup script:
|
| 83 |
|
| 84 |
- clones the tested vLLM branch over HTTPS,
|
| 85 |
+
- checks out `008dd7f87fb9de185e536ad30b4d524024ed9b9f`,
|
| 86 |
- downloads this repo's sidecar and helper scripts,
|
| 87 |
- verifies the sidecar SHA256,
|
| 88 |
- writes everything under `$HOST_ROOT`.
|
|
|
|
| 165 |
kv_cache_dtype=fp8_e4m3
|
| 166 |
```
|
| 167 |
|
| 168 |
+
`DISABLE_HYBRID_KV_CACHE_MANAGER=0` uses the default vLLM hybrid KV cache manager. Commit `008dd7f87fb9de185e536ad30b4d524024ed9b9f` teaches that path to account for Spectral's nonuniform per-layer page sizes with group-local block pools. Set `DISABLE_HYBRID_KV_CACHE_MANAGER=1` only as a fallback/bisect mode.
|
| 169 |
|
| 170 |
Set `HF_HUB_OFFLINE=1` only after the base model and drafter are already cached under `$HOST_ROOT/.cache/huggingface`.
|
| 171 |
|
manifest.json
CHANGED
|
@@ -11,7 +11,7 @@
|
|
| 11 |
},
|
| 12 |
"vllm_repo": "https://github.com/bluecopa/vllm-spectral.git",
|
| 13 |
"vllm_branch": "spectral-codebook-docker",
|
| 14 |
-
"vllm_commit": "
|
| 15 |
"previous_github_branch_commit": "85430a3a1d4d9769f75c5e4b73bbbf73bd460caa",
|
| 16 |
"base_model": "Intel/gemma-4-31B-it-int4-AutoRound",
|
| 17 |
"eagle3_drafter": "RedHatAI/gemma-4-31B-it-speculator.eagle3",
|
|
|
|
| 11 |
},
|
| 12 |
"vllm_repo": "https://github.com/bluecopa/vllm-spectral.git",
|
| 13 |
"vllm_branch": "spectral-codebook-docker",
|
| 14 |
+
"vllm_commit": "008dd7f87fb9de185e536ad30b4d524024ed9b9f",
|
| 15 |
"previous_github_branch_commit": "85430a3a1d4d9769f75c5e4b73bbbf73bd460caa",
|
| 16 |
"base_model": "Intel/gemma-4-31B-it-int4-AutoRound",
|
| 17 |
"eagle3_drafter": "RedHatAI/gemma-4-31B-it-speculator.eagle3",
|
results/candidate_grouped_kv_4k_niah_single_1_500_20260413_073312/result.json
CHANGED
|
@@ -12,7 +12,7 @@
|
|
| 12 |
"max_seq_length": 4096,
|
| 13 |
"max_tokens": 128,
|
| 14 |
"model": "gemmacut-spectral",
|
| 15 |
-
"vllm_commit": "
|
| 16 |
"disable_hybrid_kv_cache_manager": 0,
|
| 17 |
"spectral_cuda_graph": 1,
|
| 18 |
"data_path": "/teamspace/studios/this_studio/gemmacut-h100-pilot/ruler_pilot/candidate_grouped_kv_data_4k_500/niah_single_1/validation.jsonl",
|
|
|
|
| 12 |
"max_seq_length": 4096,
|
| 13 |
"max_tokens": 128,
|
| 14 |
"model": "gemmacut-spectral",
|
| 15 |
+
"vllm_commit": "008dd7f87fb9de185e536ad30b4d524024ed9b9f",
|
| 16 |
"disable_hybrid_kv_cache_manager": 0,
|
| 17 |
"spectral_cuda_graph": 1,
|
| 18 |
"data_path": "/teamspace/studios/this_studio/gemmacut-h100-pilot/ruler_pilot/candidate_grouped_kv_data_4k_500/niah_single_1/validation.jsonl",
|
scripts/setup_repro_from_hf.sh
CHANGED
|
@@ -7,7 +7,7 @@ REPO_ID="${REPO_ID:-satya007/gemmacut-spectral}"
|
|
| 7 |
HOST_ROOT="${HOST_ROOT:-$PWD/gemmacut-repro}"
|
| 8 |
VLLM_REPO="${VLLM_REPO:-https://github.com/bluecopa/vllm-spectral.git}"
|
| 9 |
VLLM_BRANCH="${VLLM_BRANCH:-spectral-codebook-docker}"
|
| 10 |
-
VLLM_COMMIT="${VLLM_COMMIT:-
|
| 11 |
SIDECAR_SHA256="${SIDECAR_SHA256:-e47a36c13467cbedf720e7f782b976df3dcda2d989c727113a8315008661a3e4}"
|
| 12 |
|
| 13 |
export HF_HUB_DISABLE_XET="${HF_HUB_DISABLE_XET:-1}"
|
|
|
|
| 7 |
HOST_ROOT="${HOST_ROOT:-$PWD/gemmacut-repro}"
|
| 8 |
VLLM_REPO="${VLLM_REPO:-https://github.com/bluecopa/vllm-spectral.git}"
|
| 9 |
VLLM_BRANCH="${VLLM_BRANCH:-spectral-codebook-docker}"
|
| 10 |
+
VLLM_COMMIT="${VLLM_COMMIT:-008dd7f87fb9de185e536ad30b4d524024ed9b9f}"
|
| 11 |
SIDECAR_SHA256="${SIDECAR_SHA256:-e47a36c13467cbedf720e7f782b976df3dcda2d989c727113a8315008661a3e4}"
|
| 12 |
|
| 13 |
export HF_HUB_DISABLE_XET="${HF_HUB_DISABLE_XET:-1}"
|