LJYAI
/

temp_ss

Model card Files Files and versions

temp_ss / script /run_llmpruner_whole.sh

LJYAI's picture

upload script

3738140 verified 22 days ago

history blame contribute delete

1.31 kB

	#!/usr/bin/env bash
	set -euo pipefail

	repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
	runner="$repo_root/script/run_llmpruner_llama.sh"

	base_model="${BASE_MODEL:-meta-llama/Llama-3.1-8B}"
	device="${DEVICE:-cpu}"
	eval_device="${EVAL_DEVICE:-cuda}"
	num_examples="${NUM_EXAMPLES:-10}"
	model_tag="$(basename "$base_model" \| tr '[:upper:]' '[:lower:]' \| tr -c 'a-z0-9' '_')"

	run_case() {
	local label="$1"
	local ratio="$2"
	shift 2

	echo "[LLM-Pruner] ${label}: PRUNING_RATIO=${ratio}"
	BASE_MODEL="$base_model" \
	PRUNE_CKPT_PATH="${model_tag}_${label}" \
	PRUNING_RATIO="$ratio" \
	DEVICE="$device" \
	EVAL_DEVICE="$eval_device" \
	bash "$runner" --num_examples "$num_examples" "$@"
	}

	# Equivalent block-only pruning scales for dropping layers from a 32-layer Llama-2 7B.
	run_case "drop6eq" "0.23"
	run_case "drop11eq" "0.45"
	run_case "drop16eq" "0.70" # Ratio = 54.3965%



	# for llama2 7b
	# run_case "drop6eq" "0.23"
	# run_case "drop11eq" "0.42"
	# run_case "drop16eq" "0.62"

	# for llama3 8b
	# run_case "drop6eq" "0.23"
	# run_case "drop11eq" "0.45"
	# run_case "drop16eq" "0.70"





	# "0.23" 86.1997%
	# "0.48" 68.1928%
	# "0.51" # 68.1928%
	# "0.70" # 56.6762%


	# llama 7b depth
	# 18.02%
	# 33.04%
	# 48.05%

	# llama 8b depth
	# 16.30%, 86.1997%
	# 29.88%, 72.2934%
	# 43.46%, 56.6762%