| cd /workspace/1016_qif/LLM-Drop_superweights_change | |
| # export TOKENIZERS_PARALLELISM=true | |
| CUDA_VISIBLE_DEVICES=0,1,2,3 accelerate launch --main_process_port 21304 \ | |
| src/compress.py \ | |
| --stage prune \ | |
| --model_name_or_path meta-llama/Llama-3.1-8B \ | |
| --dataset c4_val \ | |
| --dataset_dir ./src/llmtuner/data \ | |
| --split train \ | |
| --prune_data_type pt \ | |
| --cutoff_len 1024 \ | |
| --layer_drop_norm True \ | |
| --target_layer attn \ | |
| --output_dir ./results_prune/super_activation_run \ | |
| --logging_steps 10 \ | |
| --bf16 \ | |
| --n_calibration_samples 32 \ | |
| --prune_method layer_drop \ | |
| --layer_drop_method super_activation \ | |
| --similarity_cache_file ../results_prune/cache/super_activation_cache.pt \ | |
| --prune_model_save_path ./results_prune/super_activation_run/checkpoint | |