nbl_try / LLM-Drop_superweights /scripts /dropping /super_weight_guided_layer_drop.sh

Upload folder using huggingface_hub

d73500e verified 5 months ago

2.63 kB

	#!/usr/bin/bash

	# Sequential attention dropping guided by Super Weight activations.
	# The run produces super_weight_attn_drop_trace.json containing the drop order
	# and the measured activation deltas for each layer.

	set -euo pipefail

	port="21304"
	GPUs="0,1,2,3"

	dataset="c4_val"
	prune_data_type="pt"
	n_calibration_samples=256
	seq_len=2048

	prune_method="layer_drop"
	layer_drop_method="super_weight_guided"
	target_layer="attn"
	drop_n=0 # unused but kept for compatibility
	super_weight_threshold=3.0

	model_name=llama-3-base
	model_name_or_path=meta-llama/Llama-3.1-8B

	folder_name="${model_name}-${prune_method}_${target_layer}-${layer_drop_method}"
	output_dir=../results_prune/${folder_name}
	prune_model_save_path=${output_dir}/checkpoint
	trace_file=${prune_model_save_path}/super_weight_attn_drop_trace.json

	echo "[SuperWeightDrop] output_dir=${output_dir}"
	echo "[SuperWeightDrop] model=${model_name_or_path}"

	CUDA_VISIBLE_DEVICES=$GPUs accelerate launch --main_process_port $port \
	src/compress.py \
	--stage prune \
	--model_name_or_path ${model_name_or_path} \
	--dataset ${dataset} \
	--dataset_dir ./src/llmtuner/data \
	--split "train" \
	--only_update_config False \
	--prune_data_type ${prune_data_type} \
	--cutoff_len ${seq_len} \
	--output_dir ${output_dir} \
	--logging_steps 10 \
	--bf16 \
	--n_calibration_samples ${n_calibration_samples} \
	--prune_method ${prune_method} \
	--layer_drop_method ${layer_drop_method} \
	--target_layer ${target_layer} \
	--drop_n ${drop_n} \
	--super_weight_threshold ${super_weight_threshold} \
	--prune_model_save_path ${prune_model_save_path}

	echo "[SuperWeightDrop] Guided dropping complete. Trace saved to ${trace_file}"

	# Optional: convert the checkpoint into a fully materialized dropped model.
	# Set only_update_config to True to just update configs.
	layer_drop_method="post_dropping"
	only_update_config=False

	python src/compress.py \
	--stage prune \
	--model_name_or_path ${model_name_or_path} \
	--dataset ${dataset} \
	--dataset_dir ./src/llmtuner/data \
	--split "train" \
	--only_update_config $only_update_config \
	--layer_drop_norm True \
	--target_layer ${target_layer} \
	--prune_data_type ${prune_data_type} \
	--cutoff_len ${seq_len} \
	--output_dir ${output_dir} \
	--logging_steps 10 \
	--bf16 \
	--n_calibration_samples ${n_calibration_samples} \
	--prune_method ${prune_method} \
	--layer_drop_method ${layer_drop_method} \
	--drop_n ${drop_n} \
	--prune_model_save_path ${prune_model_save_path}

	echo "[SuperWeightDrop] Post-dropped model/config saved to ${prune_model_save_path}"