| #!/usr/bin/bash |
|
|
| |
| |
|
|
| port="21304" |
| GPUs="0,1,2,3" |
|
|
| dataset="c4_val" |
| prune_data_type="pt" |
| n_calibration_samples=256 |
| seq_len=2048 |
|
|
| prune_method="super_weight" |
| super_weight_method="analysis" |
| super_weight_threshold=3.0 |
| prune_super_weight_n=0 |
|
|
|
|
| |
| |
|
|
| model_name=llama-2-7b |
| model_name_or_path=meta-llama/Llama-2-7b-hf |
|
|
| folder_name="${model_name}-${prune_method}-${super_weight_method}" |
| super_weight_cache_file="../results_prune/cache/${model_name}-${prune_method}-${dataset}-${n_calibration_samples}samples.pt" |
|
|
| echo ${folder_name} |
|
|
| output_dir=../results_prune/${folder_name} |
| prune_model_save_path=${output_dir}/checkpoint |
|
|
| CUDA_VISIBLE_DEVICES=$GPUs accelerate launch --main_process_port $port \ |
| src/compress.py \ |
| --stage prune \ |
| --model_name_or_path ${model_name_or_path} \ |
| --dataset ${dataset} \ |
| --dataset_dir ./src/llmtuner/data \ |
| --split "train" \ |
| --only_update_config False \ |
| --prune_data_type ${prune_data_type} \ |
| --cutoff_len ${seq_len} \ |
| --output_dir ${output_dir} \ |
| --logging_steps 10 \ |
| --bf16 \ |
| --n_calibration_samples ${n_calibration_samples} \ |
| --prune_method ${prune_method} \ |
| --super_weight_method ${super_weight_method} \ |
| --super_weight_threshold ${super_weight_threshold} \ |
| --super_weight_cache_file ${super_weight_cache_file} \ |
| --prune_super_weight_n ${prune_super_weight_n} \ |
| --prune_model_save_path ${prune_model_save_path} |
|
|
| echo "Super Weight analysis completed!" |
| echo "Results saved to: ${output_dir}" |
| echo "Super Weight information saved to: ${prune_model_save_path}/super_weights.json" |
|
|