s1ghhh
/

nbl_try

Model card Files Files and versions

nbl_try / LLM-Drop_superweights_change /scripts /dropping /super_weight_eval.sh

s1ghhh's picture

Upload folder using huggingface_hub

d73500e verified 5 months ago

history blame contribute delete

3.51 kB

	#!/usr/bin/bash

	# Batch Super Weight Detection over multiple configs
	# Combines the looping pattern from eval/eval.sh with super_weight.sh execution

	port="23308"
	GPUs="0,1,2,3"

	# ===== Customize these lists =====
	model_names=("llama3-8b")
	drop_modules=("attn")
	drop_nums=("17" "18" "19" "20" "21" "22" "23" "24")

	# 预设 config 文件目录
	configs_dir="/workspace/1016_qif/LLM-Drop_superweights/eval/configs"

	# Base dataset/settings for super weight detection
	dataset="c4_val"
	prune_data_type="pt"
	n_calibration_samples=256
	seq_len=2048

	prune_method="super_weight"
	super_weight_method="analysis"
	super_weight_threshold=3.0
	prune_super_weight_n=0 # 0: detection only; >0: also prune top-N SW

	# trust_remote_code & attention impl for custom dropped models
	trust_remote_code=True
	attn_implementation=eager

	echo "Starting Super Weight batch detection..."

	for model_name in "${model_names[@]}"; do
	for drop_module in "${drop_modules[@]}"; do
	for drop_num in "${drop_nums[@]}"; do
	cfg_path="${configs_dir}/"${model_name}"_drop"${drop_num}"_"${drop_module}".json"
	model_dir=/workspace/1016_qif/LLM-Drop_superweights/eval/Meta-Llama-3-8B

	if [ ! -f "$cfg_path" ]; then
	echo "[WARN] Config not found: $cfg_path. Skipping."
	continue
	fi
	if [ ! -d "$model_dir" ]; then
	echo "[ERROR] Model directory not found: $model_dir. Expected weights here. Skipping."
	continue
	fi

	echo "=========================================="
	echo "Model: ${model_name} \| Module: ${drop_module} \| Drop: ${drop_num}"
	echo "Applying config: ${cfg_path} -> ${model_dir}/config.json"
	echo "=========================================="

	# Replace config.json inside the local model folder
	cp -f "$cfg_path" "${model_dir}/config.json"

	# Unique folder/tag per config
	tag="${model_name}-${prune_method}-${super_weight_method}-${drop_module}-drop${drop_num}"
	output_dir=../results_prune/${tag}
	prune_model_save_path=${output_dir}/checkpoint
	super_weight_cache_file="../results_prune/cache/${tag}-${dataset}-${n_calibration_samples}samples.pt"

	mkdir -p "${output_dir}" \|\| true

	# Run super weight analysis with the local model path
	CUDA_VISIBLE_DEVICES=$GPUs accelerate launch --main_process_port $port \
	src/compress.py \
	--stage prune \
	--model_name_or_path ${model_dir} \
	--dataset ${dataset} \
	--dataset_dir ./src/llmtuner/data \
	--split "train" \
	--only_update_config False \
	--prune_data_type ${prune_data_type} \
	--cutoff_len ${seq_len} \
	--output_dir ${output_dir} \
	--logging_steps 10 \
	--bf16 \
	--n_calibration_samples ${n_calibration_samples} \
	--prune_method ${prune_method} \
	--super_weight_method ${super_weight_method} \
	--super_weight_threshold ${super_weight_threshold} \
	--super_weight_cache_file ${super_weight_cache_file} \
	--prune_super_weight_n ${prune_super_weight_n} \
	--prune_model_save_path ${prune_model_save_path} \
	--trust_remote_code ${trust_remote_code} \
	--attn_implementation ${attn_implementation}

	echo "[DONE] Saved to: ${output_dir}"
	echo "[INFO] Super Weight JSON: ${prune_model_save_path}/super_weights.json"
	echo
	# Small gap to reduce memory pressure between runs
	sleep 5
	done
	done
	done

	echo "All Super Weight batch detections completed."