File size: 1,309 Bytes
3738140 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 | #!/usr/bin/env bash
set -euo pipefail
repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
runner="$repo_root/script/run_llmpruner_llama.sh"
base_model="${BASE_MODEL:-meta-llama/Llama-3.1-8B}"
device="${DEVICE:-cpu}"
eval_device="${EVAL_DEVICE:-cuda}"
num_examples="${NUM_EXAMPLES:-10}"
model_tag="$(basename "$base_model" | tr '[:upper:]' '[:lower:]' | tr -c 'a-z0-9' '_')"
run_case() {
local label="$1"
local ratio="$2"
shift 2
echo "[LLM-Pruner] ${label}: PRUNING_RATIO=${ratio}"
BASE_MODEL="$base_model" \
PRUNE_CKPT_PATH="${model_tag}_${label}" \
PRUNING_RATIO="$ratio" \
DEVICE="$device" \
EVAL_DEVICE="$eval_device" \
bash "$runner" --num_examples "$num_examples" "$@"
}
# Equivalent block-only pruning scales for dropping layers from a 32-layer Llama-2 7B.
run_case "drop6eq" "0.23"
run_case "drop11eq" "0.45"
run_case "drop16eq" "0.70" # Ratio = 54.3965%
# for llama2 7b
# run_case "drop6eq" "0.23"
# run_case "drop11eq" "0.42"
# run_case "drop16eq" "0.62"
# for llama3 8b
# run_case "drop6eq" "0.23"
# run_case "drop11eq" "0.45"
# run_case "drop16eq" "0.70"
# "0.23" 86.1997%
# "0.48" 68.1928%
# "0.51" # 68.1928%
# "0.70" # 56.6762%
# llama 7b depth
# 18.02%
# 33.04%
# 48.05%
# llama 8b depth
# 16.30%, 86.1997%
# 29.88%, 72.2934%
# 43.46%, 56.6762% |