#!/bin/bash # Full pipeline for Llama-3-8B set -e echo "=== Per-Head KV Cache Compression — Llama-3-8B ===" echo "Step 1: Download model" hf download meta-llama/Meta-Llama-3-8B-Instruct --local-dir ./llama-model echo "Step 2: Baseline" python3 scripts/baseline.py llama-3-8b echo "Step 3: Calibrate (20 min)" python3 scripts/calibrate.py llama-3-8b echo "Step 4: Run quantized inference" python3 scripts/integrate.py llama-3-8b echo "Step 5: Full benchmark" python3 scripts/benchmark.py llama-3-8b echo "Step 6: Long context benchmark" python3 scripts/benchmark_long_context.py llama-3-8b echo "Step 7: Generate graphs" python3 scripts/visualize_results.py python3 scripts/visualize_long_context.py echo "=== Done! Check results/ and figures/ ==="