#!/usr/bin/env bash set -euo pipefail # Prune-only runner for LLM-Pruner on Qwen-family checkpoints. export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-0}" repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" workdir="$repo_root/compare_model/LLM-Pruner" base_model="${BASE_MODEL:-Qwen/Qwen3-1.7B}" prune_ckpt_path="${PRUNE_CKPT_PATH:-qwen3_1_7b_prune}" pruning_ratio="${PRUNING_RATIO:-0.25}" block_mlp_layer_start="${BLOCK_MLP_LAYER_START:-4}" block_mlp_layer_end="${BLOCK_MLP_LAYER_END:-24}" block_attention_layer_start="${BLOCK_ATTENTION_LAYER_START:-4}" block_attention_layer_end="${BLOCK_ATTENTION_LAYER_END:-24}" block_attention_roots="${BLOCK_ATTENTION_ROOTS:-q_proj,k_proj}" block_mlp_roots="${BLOCK_MLP_ROOTS:-gate_proj,up_proj}" pruner_type="${PRUNER_TYPE:-taylor}" taylor_mode="${TAYLOR_MODE:-param_first}" device="${DEVICE:-cuda}" eval_device="${EVAL_DEVICE:-cuda}" script_name="${PRUNE_SCRIPT:-llama3.py}" output_dir="${OUTDIR:-$workdir/prune_log/$prune_ckpt_path}" python_args=( --base_model "$base_model" --pruning_ratio "$pruning_ratio" --block_wise --block_attention_roots "$block_attention_roots" --block_mlp_roots "$block_mlp_roots" --block_mlp_layer_start "$block_mlp_layer_start" --block_mlp_layer_end "$block_mlp_layer_end" --block_attention_layer_start "$block_attention_layer_start" --block_attention_layer_end "$block_attention_layer_end" --pruner_type "$pruner_type" --taylor "$taylor_mode" --test_after_train --device "$device" --eval_device "$eval_device" --save_ckpt_log_name "$prune_ckpt_path" --save_model ) python_args+=("$@") mkdir -p "$output_dir" git_commit="unknown" if git -C "$repo_root" rev-parse --is-inside-work-tree >/dev/null 2>&1; then git_commit=$(git -C "$repo_root" rev-parse HEAD) fi { echo "git_commit=$git_commit" echo "command:" printf '%q ' python "$repo_root/compare_model/LLM-Pruner/$script_name" "${python_args[@]}" echo } > "$output_dir/run_args.txt" cd "$workdir" PYTHONPATH="$workdir:$repo_root${PYTHONPATH:+:$PYTHONPATH}" \ python "$script_name" "${python_args[@]}"