File size: 759 Bytes
9190eff | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 | #!/bin/bash
# Full pipeline for Llama-3-8B
set -e
echo "=== Per-Head KV Cache Compression — Llama-3-8B ==="
echo "Step 1: Download model"
hf download meta-llama/Meta-Llama-3-8B-Instruct --local-dir ./llama-model
echo "Step 2: Baseline"
python3 scripts/baseline.py llama-3-8b
echo "Step 3: Calibrate (20 min)"
python3 scripts/calibrate.py llama-3-8b
echo "Step 4: Run quantized inference"
python3 scripts/integrate.py llama-3-8b
echo "Step 5: Full benchmark"
python3 scripts/benchmark.py llama-3-8b
echo "Step 6: Long context benchmark"
python3 scripts/benchmark_long_context.py llama-3-8b
echo "Step 7: Generate graphs"
python3 scripts/visualize_results.py
python3 scripts/visualize_long_context.py
echo "=== Done! Check results/ and figures/ ==="
|