#!/usr/bin/env bash set -euo pipefail repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" runner="$repo_root/script/run_llmpruner_llama.sh" base_model="${BASE_MODEL:-meta-llama/Llama-3.1-8B}" device="${DEVICE:-cpu}" eval_device="${EVAL_DEVICE:-cuda}" num_examples="${NUM_EXAMPLES:-10}" model_tag="$(basename "$base_model" | tr '[:upper:]' '[:lower:]' | tr -c 'a-z0-9' '_')" run_case() { local label="$1" local ratio="$2" shift 2 echo "[LLM-Pruner] ${label}: PRUNING_RATIO=${ratio}" BASE_MODEL="$base_model" \ PRUNE_CKPT_PATH="${model_tag}_${label}" \ PRUNING_RATIO="$ratio" \ DEVICE="$device" \ EVAL_DEVICE="$eval_device" \ bash "$runner" --num_examples "$num_examples" "$@" } # Equivalent block-only pruning scales for dropping layers from a 32-layer Llama-2 7B. run_case "drop6eq" "0.23" run_case "drop11eq" "0.45" run_case "drop16eq" "0.70" # Ratio = 54.3965% # for llama2 7b # run_case "drop6eq" "0.23" # run_case "drop11eq" "0.42" # run_case "drop16eq" "0.62" # for llama3 8b # run_case "drop6eq" "0.23" # run_case "drop11eq" "0.45" # run_case "drop16eq" "0.70" # "0.23" 86.1997% # "0.48" 68.1928% # "0.51" # 68.1928% # "0.70" # 56.6762% # llama 7b depth # 18.02% # 33.04% # 48.05% # llama 8b depth # 16.30%, 86.1997% # 29.88%, 72.2934% # 43.46%, 56.6762%