temp_ss / script /run_llmpruner_whole.sh
LJYAI's picture
upload script
3738140 verified
#!/usr/bin/env bash
set -euo pipefail
repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
runner="$repo_root/script/run_llmpruner_llama.sh"
base_model="${BASE_MODEL:-meta-llama/Llama-3.1-8B}"
device="${DEVICE:-cpu}"
eval_device="${EVAL_DEVICE:-cuda}"
num_examples="${NUM_EXAMPLES:-10}"
model_tag="$(basename "$base_model" | tr '[:upper:]' '[:lower:]' | tr -c 'a-z0-9' '_')"
run_case() {
local label="$1"
local ratio="$2"
shift 2
echo "[LLM-Pruner] ${label}: PRUNING_RATIO=${ratio}"
BASE_MODEL="$base_model" \
PRUNE_CKPT_PATH="${model_tag}_${label}" \
PRUNING_RATIO="$ratio" \
DEVICE="$device" \
EVAL_DEVICE="$eval_device" \
bash "$runner" --num_examples "$num_examples" "$@"
}
# Equivalent block-only pruning scales for dropping layers from a 32-layer Llama-2 7B.
run_case "drop6eq" "0.23"
run_case "drop11eq" "0.45"
run_case "drop16eq" "0.70" # Ratio = 54.3965%
# for llama2 7b
# run_case "drop6eq" "0.23"
# run_case "drop11eq" "0.42"
# run_case "drop16eq" "0.62"
# for llama3 8b
# run_case "drop6eq" "0.23"
# run_case "drop11eq" "0.45"
# run_case "drop16eq" "0.70"
# "0.23" 86.1997%
# "0.48" 68.1928%
# "0.51" # 68.1928%
# "0.70" # 56.6762%
# llama 7b depth
# 18.02%
# 33.04%
# 48.05%
# llama 8b depth
# 16.30%, 86.1997%
# 29.88%, 72.2934%
# 43.46%, 56.6762%