File size: 1,309 Bytes
3738140
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/usr/bin/env bash
set -euo pipefail

repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
runner="$repo_root/script/run_llmpruner_llama.sh"

base_model="${BASE_MODEL:-meta-llama/Llama-3.1-8B}"
device="${DEVICE:-cpu}"
eval_device="${EVAL_DEVICE:-cuda}"
num_examples="${NUM_EXAMPLES:-10}"
model_tag="$(basename "$base_model" | tr '[:upper:]' '[:lower:]' | tr -c 'a-z0-9' '_')"

run_case() {
  local label="$1"
  local ratio="$2"
  shift 2

  echo "[LLM-Pruner] ${label}: PRUNING_RATIO=${ratio}"
  BASE_MODEL="$base_model" \
  PRUNE_CKPT_PATH="${model_tag}_${label}" \
  PRUNING_RATIO="$ratio" \
  DEVICE="$device" \
  EVAL_DEVICE="$eval_device" \
  bash "$runner" --num_examples "$num_examples" "$@"
}

# Equivalent block-only pruning scales for dropping layers from a 32-layer Llama-2 7B.
run_case "drop6eq" "0.23"
run_case "drop11eq" "0.45" 
run_case "drop16eq" "0.70" # Ratio = 54.3965%



# for llama2 7b
# run_case "drop6eq" "0.23"
# run_case "drop11eq" "0.42"
# run_case "drop16eq" "0.62"

# for llama3 8b
# run_case "drop6eq" "0.23"
# run_case "drop11eq" "0.45" 
# run_case "drop16eq" "0.70"





# "0.23" 86.1997%
# "0.48" 68.1928%
# "0.51" # 68.1928%
# "0.70" # 56.6762%


# llama 7b depth
# 18.02%
# 33.04%
# 48.05%

# llama 8b depth 
# 16.30%, 86.1997%
# 29.88%, 72.2934%
# 43.46%, 56.6762%