LJYAI commited on Apr 23

Commit

3738140

verified ·

1 Parent(s): 2c44909

upload script

Browse files

Files changed (18) hide show

script/loratune.sh +38 -0
script/run_abprune.sh +171 -0
script/run_abprune_inst.sh +197 -0
script/run_abprune_small.sh +162 -0
script/run_abprune_smoke.sh +134 -0
script/run_eval_ppl.sh +53 -0
script/run_eval_zeroshot.sh +91 -0
script/run_laco_llama.sh +69 -0
script/run_laco_qwen.sh +69 -0
script/run_llmpruner_llama.sh +66 -0
script/run_llmpruner_qwen.sh +61 -0
script/run_llmpruner_whole.sh +62 -0
script/run_llmstreamline_llama.sh +58 -0
script/run_llmstreamline_qwen.sh +53 -0
script/run_replaceme_llama.sh +57 -0
script/run_replaceme_qwen.sh +26 -0
script/run_uidl_llama.sh +12 -0
script/run_uidl_qwen.sh +12 -0

script/loratune.sh ADDED Viewed

	@@ -0,0 +1,38 @@

+#!/usr/bin/env bash
+set -euo pipefail
+export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-2}"
+ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+if [[ $# -lt 2 ]]; then
+  cat <<'USAGE'
+Usage:
+  script/loratune.sh <base_model> <output_dir> [extra lora args...]
+Example:
+  script/loratune.sh /path/to/base_model /path/to/output_dir --epochs 2 --batch_size 32
+USAGE
+  exit 1
+fi
+BASE_MODEL="$1"
+OUTPUT_DIR="$2"
+shift 2
+python "$ROOT/src/loratune.py" \
+  --base_model "$BASE_MODEL" \
+  --output_dir "$OUTPUT_DIR" \
+  --device cuda \
+  --dtype "${DTYPE:-bfloat16}" \
+  --instruction_dataset "${INSTRUCTION_DATASET:-tatsu-lab/alpaca}" \
+  --instruction_split "${INSTRUCTION_SPLIT:-train}" \
+  --max_samples "${MAX_SAMPLES:-0}" \
+  --seq_len "${SEQ_LEN:-1024}" \
+  --batch_size "${BATCH_SIZE:-64}" \
+  --micro_batch_size "${MICRO_BATCH_SIZE:-8}" \
+  --epochs "${EPOCHS:-1.0}" \
+  --learning_rate "${LEARNING_RATE:-1e-4}" \
+  --log_steps "${LOG_STEPS:-100}" \
+  --lora_rank "${LORA_RANK:-8}" \
+  "$@"

script/run_abprune.sh ADDED Viewed

	@@ -0,0 +1,171 @@

+#!/usr/bin/env bash
+set -euo pipefail
+export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-0}"
+repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+if [[ $# -lt 1 ]]; then
+  cat <<'USAGE'
+Usage:
+  script/run_abprune.sh <model> [output_dir] [extra fuse_layers args...]
+Examples:
+  script/run_abprune.sh Qwen/Qwen3-1.7B
+  script/run_abprune.sh /path/to/model /path/to/output --num_progressive 8
+USAGE
+  exit 1
+fi
+# all meta-llama/Llama-2-7b-hf, meta-llama/Llama-3.1-8B
+model="$1"
+shift
+dataset="${DATASET:-slimpajama}"
+dataset_config="${DATASET_CONFIG:-none}"
+num_progressive="${NUM_PROGRESSIVE:-16}"
+seq_len="${SEQ_LEN:-1024}"
+target_tokens="${TARGET_TOKENS:-500000}"
+calib_sequences="${CALIB_SEQUENCES:-128}"
+distill_batch_size="${DISTILL_BATCH_SIZE:-1}"
+eval_batch_size="${EVAL_BATCH_SIZE:-1}"
+eval_num_samples="${EVAL_NUM_SAMPLES:-200}"
+distill_seq_len="${DISTILL_SEQ_LEN:-1024}"
+lora_epochs="${LORA_EPOCHS:-0}"
+distill_epochs="${DISTILL_EPOCHS:-1.0}"
+distill_kl_weight="${DISTILL_KL_WEIGHT:-0.02}"
+distill_kl_temp="${DISTILL_KL_TEMP:-4.0}"
+distill_hidden_mse_weight="${DISTILL_HIDDEN_MSE_WEIGHT:-1.0}"
+distill_attn_mse_weight="${DISTILL_ATTN_MSE_WEIGHT:-0.25}"
+distill_mlp_mse_weight="${DISTILL_MLP_MSE_WEIGHT:-1.0}"
+reparam_eta="${REPARAM_ETA:-0}"
+reparam_gamma="${REPARAM_GAMMA:-0}"
+reparam_attn_reg_scale="${REPARAM_ATTN_REG_SCALE:-1.0}"
+reparam_mlp_reg_scale="${REPARAM_MLP_REG_SCALE:-1.0}"
+reparam_param_subset="${REPARAM_PARAM_SUBSET:-mlp}"
+dtype="${DTYPE:-bfloat16}"
+batch_size="${BATCH_SIZE:-2}"
+use_pertensor_fisher="${USE_PERTENSOR_FISHER:-0}"
+save_full_model_cycles="${SAVE_FULL_MODEL_CYCLES:-6,11}"
+comm_skip_post_reselect="${COMM_SKIP_POST_RESELECT:-1}"
+head_permute="${HEAD_PERMUTE:-0}"
+head_permute_select="${HEAD_PERMUTE_SELECT:-$head_permute}"
+head_permute_merge="${HEAD_PERMUTE_MERGE:-$head_permute}"
+fisher_args=(--fisher_mode param)
+if [[ "$use_pertensor_fisher" == "1" ]]; then
+  fisher_args=(--fisher_mode tensor)
+fi
+output_dir_suffix="progressive_common_${num_progressive}_nopost_only_last"
+if [[ "$use_pertensor_fisher" == "1" ]]; then
+  output_dir_suffix="${output_dir_suffix}_pertensor"
+fi
+model_slug="$(echo "$model" | tr '/:@' '___' | tr -cs '[:alnum:]_.-' '_' | sed 's/^_\\+//; s/_\\+$//')"
+output_dir_default="$repo_root/results/${model_slug}_${output_dir_suffix}"
+output_dir=""
+if [[ $# -gt 0 && "${1:0:2}" != "--" ]]; then
+  output_dir="$1"
+  shift
+elif [[ -n "${OUTDIR:-}" ]]; then
+  output_dir="${OUTDIR}"
+else
+  output_dir="${output_dir_default}"
+fi
+if [[ -n "${RUN_NAME:-}" ]]; then
+  output_dir="${output_dir}_${RUN_NAME}"
+fi
+python_args=(
+  --model "$model" \
+  --dataset "$dataset" \
+  --dataset_config "$dataset_config" \
+  --target_tokens "$target_tokens" \
+  --num_samples "$calib_sequences" \
+  --seq_len "$seq_len" \
+  --batch_size "$batch_size" \
+  --distill_batch_size "$distill_batch_size" \
+  --distill_seq_len "$distill_seq_len" \
+  --distill_epochs "$distill_epochs" \
+  --eval_batch_size "$eval_batch_size" \
+  --eval_seq_len "$seq_len" \
+  --eval_num_samples "$eval_num_samples" \
+  --distill_kl_weight "$distill_kl_weight" \
+  --distill_kl_temp "$distill_kl_temp" \
+  --distill_hidden_mse_weight "$distill_hidden_mse_weight" \
+  --distill_attn_mse_weight "$distill_attn_mse_weight" \
+  --distill_mlp_mse_weight "$distill_mlp_mse_weight" \
+  --reparam_eta "$reparam_eta" \
+  --reparam_gamma "$reparam_gamma" \
+  --reparam_attn_reg_scale "$reparam_attn_reg_scale" \
+  --reparam_mlp_reg_scale "$reparam_mlp_reg_scale" \
+  --reparam_param_subset "$reparam_param_subset" \
+  --distill_weight_decay 0.0 \
+  --distill_max_grad_norm 1.0 \
+  --distill_grad_accum_steps 1 \
+  --distill_eval_every 2000 \
+  --lora_eval_every 2000 \
+  --lora_epochs "$lora_epochs" \
+)
+python_args+=("${fisher_args[@]}")
+if [[ -n "$save_full_model_cycles" ]]; then
+  python_args+=(--save_full_model_cycles "$save_full_model_cycles")
+fi
+python_args+=(
+  --distill_method reparam \
+  --redistrib_teacher_source previous_cycle \
+  --comm_enabled \
+  --comm_mu_auto \
+  --layer auto \
+  --exclude_pairs 0,1,-1 \
+  --num_progressive "$num_progressive" \
+  --output_dir "$output_dir" \
+  --dtype "$dtype" \
+)
+if [[ "$comm_skip_post_reselect" == "1" ]]; then
+  python_args+=(--comm_skip_post_reselect)
+fi
+if [[ "$head_permute_select" == "0" ]]; then
+  python_args+=(--no_head_permute_select)
+fi
+if [[ "$head_permute_merge" == "0" ]]; then
+  python_args+=(--no_head_permute_merge)
+fi
+python_args+=("$@")
+mkdir -p "$output_dir"
+run_args_file="$output_dir/run_args.txt"
+git_commit="unknown"
+if git -C "$repo_root" rev-parse --is-inside-work-tree >/dev/null 2>&1; then
+  git_commit=$(git -C "$repo_root" rev-parse HEAD)
+fi
+start_epoch=$(date +%s)
+start_time=$(date --iso-8601=seconds)
+{
+  echo "git_commit=$git_commit"
+  echo "start_time=$start_time"
+  echo "HEAD_PERMUTE=$head_permute"
+  echo "HEAD_PERMUTE_SELECT=$head_permute_select"
+  echo "HEAD_PERMUTE_MERGE=$head_permute_merge"
+  echo "command:"
+  printf '%q ' python "$repo_root/src/fuse_layers.py" "${python_args[@]}"
+  echo
+} > "$run_args_file"
+write_run_summary() {
+  local exit_code=$?
+  local end_epoch end_time elapsed_seconds
+  end_epoch=$(date +%s)
+  end_time=$(date --iso-8601=seconds)
+  elapsed_seconds=$((end_epoch - start_epoch))
+  {
+    echo "end_time=$end_time"
+    echo "elapsed_seconds=$elapsed_seconds"
+    echo "exit_code=$exit_code"
+  } >> "$run_args_file"
+}
+trap write_run_summary EXIT
+python "$repo_root/src/fuse_layers.py" "${python_args[@]}"

script/run_abprune_inst.sh ADDED Viewed

	@@ -0,0 +1,197 @@

+#!/usr/bin/env bash
+set -euo pipefail
+export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-0}"
+repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+if [[ $# -lt 1 ]]; then
+  cat <<'USAGE'
+Usage:
+  script/run_abprune_inst.sh <model> [output_dir] [extra fuse_layers args...]
+Examples:
+  script/run_abprune_inst.sh Qwen/Qwen3-1.7B
+  script/run_abprune_inst.sh /path/to/model /path/to/output --num_progressive 8
+USAGE
+  exit 1
+fi
+# all meta-llama/Llama-2-7b-hf, meta-llama/Llama-3.1-8B, facebook/opt-6.7b
+model="$1"
+shift
+dataset="${DATASET:-slimpajama}"
+dataset_config="${DATASET_CONFIG:-none}"
+num_progressive="${NUM_PROGRESSIVE:-16}"
+seq_len="${SEQ_LEN:-1024}"
+target_tokens="${TARGET_TOKENS:-500000}"
+calib_sequences="${CALIB_SEQUENCES:-128}"
+distill_batch_size="${DISTILL_BATCH_SIZE:-1}"
+eval_batch_size="${EVAL_BATCH_SIZE:-1}"
+eval_num_samples="${EVAL_NUM_SAMPLES:-200}"
+distill_seq_len="${DISTILL_SEQ_LEN:-1024}"
+lora_epochs="${LORA_EPOCHS:-0}"
+calibration_source="${CALIBRATION_SOURCE:-lm}"
+instruction_dataset="${INSTRUCTION_DATASET:-}"
+instruction_config="${INSTRUCTION_CONFIG:-none}"
+instruction_split="${INSTRUCTION_SPLIT:-train}"
+instruction_format="${INSTRUCTION_FORMAT:-auto}"
+instruction_field_instruction="${INSTRUCTION_FIELD_INSTRUCTION:-instruction}"
+instruction_field_input="${INSTRUCTION_FIELD_INPUT:-input}"
+instruction_field_output="${INSTRUCTION_FIELD_OUTPUT:-output}"
+distillation_source="${DISTILLATION_SOURCE:-$calibration_source}"
+distill_inst_samples="${DISTILL_INST_SAMPLES:-500}"
+distill_epochs="${DISTILL_EPOCHS:-1.0}"
+distill_kl_weight="${DISTILL_KL_WEIGHT:-0.02}"
+distill_kl_temp="${DISTILL_KL_TEMP:-4.0}"
+distill_hidden_mse_weight="${DISTILL_HIDDEN_MSE_WEIGHT:-1.0}"
+distill_attn_mse_weight="${DISTILL_ATTN_MSE_WEIGHT:-0.25}"
+distill_mlp_mse_weight="${DISTILL_MLP_MSE_WEIGHT:-1.0}"
+reparam_eta="${REPARAM_ETA:-0}"
+reparam_gamma="${REPARAM_GAMMA:-0}"
+reparam_attn_reg_scale="${REPARAM_ATTN_REG_SCALE:-1.0}"
+reparam_mlp_reg_scale="${REPARAM_MLP_REG_SCALE:-1.0}"
+reparam_param_subset="${REPARAM_PARAM_SUBSET:-mlp}"
+dtype="${DTYPE:-bfloat16}"
+batch_size="${BATCH_SIZE:-2}"
+use_pertensor_fisher="${USE_PERTENSOR_FISHER:-0}"
+save_full_model_cycles="${SAVE_FULL_MODEL_CYCLES:-6,11}"
+comm_skip_post_reselect="${COMM_SKIP_POST_RESELECT:-1}"
+head_permute="${HEAD_PERMUTE:-1}"
+head_permute_select="${HEAD_PERMUTE_SELECT:-$head_permute}"
+head_permute_merge="${HEAD_PERMUTE_MERGE:-$head_permute}"
+fisher_args=(--fisher_mode param)
+if [[ "$use_pertensor_fisher" == "1" ]]; then
+  fisher_args=(--fisher_mode tensor)
+fi
+output_dir_suffix="progressive_common_${num_progressive}_nopost_only_last"
+if [[ "$use_pertensor_fisher" == "1" ]]; then
+  output_dir_suffix="${output_dir_suffix}_pertensor"
+fi
+model_slug="$(echo "$model" | tr '/:@' '___' | tr -cs '[:alnum:]_.-' '_' | sed 's/^_\\+//; s/_\\+$//')"
+output_dir_default="$repo_root/results/${model_slug}_${output_dir_suffix}"
+output_dir=""
+if [[ $# -gt 0 && "${1:0:2}" != "--" ]]; then
+  output_dir="$1"
+  shift
+elif [[ -n "${OUTDIR:-}" ]]; then
+  output_dir="${OUTDIR}"
+else
+  output_dir="${output_dir_default}"
+fi
+if [[ -n "${RUN_NAME:-}" ]]; then
+  output_dir="${output_dir}_${RUN_NAME}"
+fi
+python_args=(
+  --model "$model" \
+  --dataset "$dataset" \
+  --dataset_config "$dataset_config" \
+  --target_tokens "$target_tokens" \
+  --num_samples "$calib_sequences" \
+  --seq_len "$seq_len" \
+  --batch_size "$batch_size" \
+  --calibration_source "$calibration_source" \
+  --distillation_source "$distillation_source" \
+  --distill_batch_size "$distill_batch_size" \
+  --distill_inst_samples "$distill_inst_samples" \
+  --distill_seq_len "$distill_seq_len" \
+  --distill_epochs "$distill_epochs" \
+  --eval_batch_size "$eval_batch_size" \
+  --eval_seq_len "$seq_len" \
+  --eval_num_samples "$eval_num_samples" \
+  --distill_kl_weight "$distill_kl_weight" \
+  --distill_kl_temp "$distill_kl_temp" \
+  --distill_hidden_mse_weight "$distill_hidden_mse_weight" \
+  --distill_attn_mse_weight "$distill_attn_mse_weight" \
+  --distill_mlp_mse_weight "$distill_mlp_mse_weight" \
+  --reparam_eta "$reparam_eta" \
+  --reparam_gamma "$reparam_gamma" \
+  --reparam_attn_reg_scale "$reparam_attn_reg_scale" \
+  --reparam_mlp_reg_scale "$reparam_mlp_reg_scale" \
+  --reparam_param_subset "$reparam_param_subset" \
+  --distill_weight_decay 0.0 \
+  --distill_max_grad_norm 1.0 \
+  --distill_grad_accum_steps 1 \
+  --distill_eval_every 2000 \
+  --lora_eval_every 2000 \
+  --lora_epochs "$lora_epochs" \
+)
+if [[ -n "$instruction_dataset" ]]; then
+  python_args+=(
+    --instruction_dataset "$instruction_dataset" \
+    --instruction_config "$instruction_config" \
+    --instruction_split "$instruction_split" \
+    --instruction_format "$instruction_format" \
+    --instruction_field_instruction "$instruction_field_instruction" \
+    --instruction_field_input "$instruction_field_input" \
+    --instruction_field_output "$instruction_field_output" \
+  )
+fi
+python_args+=("${fisher_args[@]}")
+if [[ -n "$save_full_model_cycles" ]]; then
+  python_args+=(--save_full_model_cycles "$save_full_model_cycles")
+fi
+python_args+=(
+  --distill_method reparam \
+  --redistrib_teacher_source previous_cycle \
+  --comm_enabled \
+  --comm_mu_auto \
+  --layer auto \
+  --exclude_pairs 0,1,-1 \
+  --num_progressive "$num_progressive" \
+  --output_dir "$output_dir" \
+  --dtype "$dtype" \
+)
+if [[ "$comm_skip_post_reselect" == "1" ]]; then
+  python_args+=(--comm_skip_post_reselect)
+fi
+if [[ "$head_permute_select" == "0" ]]; then
+  python_args+=(--no_head_permute_select)
+fi
+if [[ "$head_permute_merge" == "0" ]]; then
+  python_args+=(--no_head_permute_merge)
+fi
+python_args+=("$@")
+mkdir -p "$output_dir"
+run_args_file="$output_dir/run_args.txt"
+git_commit="unknown"
+if git -C "$repo_root" rev-parse --is-inside-work-tree >/dev/null 2>&1; then
+  git_commit=$(git -C "$repo_root" rev-parse HEAD)
+fi
+start_epoch=$(date +%s)
+start_time=$(date --iso-8601=seconds)
+{
+  echo "git_commit=$git_commit"
+  echo "start_time=$start_time"
+  echo "HEAD_PERMUTE=$head_permute"
+  echo "HEAD_PERMUTE_SELECT=$head_permute_select"
+  echo "HEAD_PERMUTE_MERGE=$head_permute_merge"
+  echo "command:"
+  printf '%q ' python "$repo_root/src_inst/fuse_layers.py" "${python_args[@]}"
+  echo
+} > "$run_args_file"
+write_run_summary() {
+  local exit_code=$?
+  local end_epoch end_time elapsed_seconds
+  end_epoch=$(date +%s)
+  end_time=$(date --iso-8601=seconds)
+  elapsed_seconds=$((end_epoch - start_epoch))
+  {
+    echo "end_time=$end_time"
+    echo "elapsed_seconds=$elapsed_seconds"
+    echo "exit_code=$exit_code"
+  } >> "$run_args_file"
+}
+trap write_run_summary EXIT
+PYTHONPATH="$repo_root/src_inst:$repo_root${PYTHONPATH:+:$PYTHONPATH}" \
+python "$repo_root/src_inst/fuse_layers.py" "${python_args[@]}"

script/run_abprune_small.sh ADDED Viewed

	@@ -0,0 +1,162 @@

+#!/usr/bin/env bash
+set -euo pipefail
+export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-3}"
+repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+if [[ $# -lt 1 ]]; then
+  cat <<'USAGE'
+Usage:
+  script/run_abprune.sh <model> [output_dir] [extra fuse_layers args...]
+Examples:
+  script/run_abprune.sh Qwen/Qwen3-1.7B
+  script/run_abprune.sh /path/to/model /path/to/output --num_progressive 8
+USAGE
+  exit 1
+fi
+model="$1"
+shift
+# whole settings
+dataset="${DATASET:-slimpajama}"
+dataset_config="${DATASET_CONFIG:-none}"
+num_progressive="${NUM_PROGRESSIVE:-14}"
+dtype="${DTYPE:-bfloat16}"
+use_pertensor_fisher="${USE_PERTENSOR_FISHER:-0}"
+save_full_model_cycles="${SAVE_FULL_MODEL_CYCLES:-6,11}"
+head_permute="${HEAD_PERMUTE:-1}"
+head_permute_select="${HEAD_PERMUTE_SELECT:-$head_permute}"
+head_permute_merge="${HEAD_PERMUTE_MERGE:-$head_permute}"
+# calibration dataset
+calib_sequences="${CALIB_SEQUENCES:-128}"
+seq_len="${SEQ_LEN:-512}"
+# distillation dataset
+distill_seq_len="${DISTILL_SEQ_LEN:-512}"
+target_tokens="${TARGET_TOKENS:-500000}"
+distill_batch_size="${DISTILL_BATCH_SIZE:-1}"
+# distillation evaluation
+batch_size="${BATCH_SIZE:-1}"
+eval_batch_size="${EVAL_BATCH_SIZE:-1}"
+eval_num_samples="${EVAL_NUM_SAMPLES:-200}"
+lora_epochs="${LORA_EPOCHS:-0}"
+distill_epochs="${DISTILL_EPOCHS:-1.0}"
+distill_kl_weight="${DISTILL_KL_WEIGHT:-0.01}"
+distill_kl_temp="${DISTILL_KL_TEMP:-4.0}"
+fisher_args=(--fisher_mode param)
+if [[ "$use_pertensor_fisher" == "1" ]]; then
+  fisher_args=(--fisher_mode tensor)
+fi
+output_dir_suffix="progressive_common_${num_progressive}_nopost_only_last"
+if [[ "$use_pertensor_fisher" == "1" ]]; then
+  output_dir_suffix="${output_dir_suffix}_pertensor"
+fi
+model_slug="$(echo "$model" | tr '/:@' '___' | tr -cs '[:alnum:]_.-' '_' | sed 's/^_\\+//; s/_\\+$//')"
+output_dir_default="$repo_root/results/${model_slug}_${output_dir_suffix}"
+output_dir=""
+if [[ $# -gt 0 && "${1:0:2}" != "--" ]]; then
+  output_dir="$1"
+  shift
+elif [[ -n "${OUTDIR:-}" ]]; then
+  output_dir="${OUTDIR}"
+else
+  output_dir="${output_dir_default}"
+fi
+if [[ -n "${RUN_NAME:-}" ]]; then
+  output_dir="${output_dir}_${RUN_NAME}"
+fi
+python_args=(
+  --model "$model" \
+  --dataset "$dataset" \
+  --dataset_config "$dataset_config" \
+  --target_tokens "$target_tokens" \
+  --num_samples "$calib_sequences" \
+  --seq_len "$seq_len" \
+  --batch_size "$batch_size" \
+  --distill_batch_size "$distill_batch_size" \
+  --distill_seq_len "$distill_seq_len" \
+  --distill_epochs "$distill_epochs" \
+  --eval_batch_size "$eval_batch_size" \
+  --eval_seq_len "$seq_len" \
+  --eval_num_samples "$eval_num_samples" \
+  --distill_kl_weight "$distill_kl_weight" \
+  --distill_kl_temp "$distill_kl_temp" \
+  --distill_weight_decay 0.0 \
+  --distill_max_grad_norm 1.0 \
+  --distill_grad_accum_steps 1 \
+  --distill_eval_every 2000 \
+  --lora_eval_every 2000 \
+  --lora_epochs "$lora_epochs" \
+  --auto_metric dwce \
+)
+# --auto_cosine_topk 5
+python_args+=("${fisher_args[@]}")
+if [[ -n "$save_full_model_cycles" ]]; then
+  python_args+=(--save_full_model_cycles "$save_full_model_cycles")
+fi
+python_args+=(
+  --distill_method reparam \
+  --redistrib_teacher_source previous_cycle \
+  --comm_enabled \
+  --comm_mu_auto \
+  --layer auto \
+  --exclude_pairs 0,1,-1 \
+  --num_progressive "$num_progressive" \
+  --output_dir "$output_dir" \
+  --dtype "$dtype" \
+)
+if [[ "$head_permute_select" == "0" ]]; then
+  python_args+=(--no_head_permute_select)
+fi
+if [[ "$head_permute_merge" == "0" ]]; then
+  python_args+=(--no_head_permute_merge)
+fi
+python_args+=("$@")
+mkdir -p "$output_dir"
+run_args_file="$output_dir/run_args.txt"
+git_commit="unknown"
+if git -C "$repo_root" rev-parse --is-inside-work-tree >/dev/null 2>&1; then
+  git_commit=$(git -C "$repo_root" rev-parse HEAD)
+fi
+start_epoch=$(date +%s)
+start_time=$(date --iso-8601=seconds)
+{
+  echo "git_commit=$git_commit"
+  echo "start_time=$start_time"
+  echo "HEAD_PERMUTE=$head_permute"
+  echo "HEAD_PERMUTE_SELECT=$head_permute_select"
+  echo "HEAD_PERMUTE_MERGE=$head_permute_merge"
+  echo "command:"
+  printf '%q ' python "$repo_root/src/fuse_layers.py" "${python_args[@]}"
+  echo
+} > "$run_args_file"
+write_run_summary() {
+  local exit_code=$?
+  local end_epoch end_time elapsed_seconds
+  end_epoch=$(date +%s)
+  end_time=$(date --iso-8601=seconds)
+  elapsed_seconds=$((end_epoch - start_epoch))
+  {
+    echo "end_time=$end_time"
+    echo "elapsed_seconds=$elapsed_seconds"
+    echo "exit_code=$exit_code"
+  } >> "$run_args_file"
+}
+trap write_run_summary EXIT
+python "$repo_root/src/fuse_layers.py" "${python_args[@]}"

script/run_abprune_smoke.sh ADDED Viewed

	@@ -0,0 +1,134 @@

+#!/usr/bin/env bash
+set -euo pipefail
+export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-1}"
+repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+if [[ $# -lt 1 ]]; then
+  cat <<'USAGE'
+Usage:
+  script/run_abprune_smoke.sh <model> [output_dir] [extra fuse_layers args...]
+Examples:
+  script/run_abprune_smoke.sh Qwen/Qwen3-1.7B
+  script/run_abprune_smoke.sh /path/to/model /path/to/output --num_progressive 3
+USAGE
+  exit 1
+fi
+model="$1"
+shift
+dataset="${DATASET:-slimpajama}"
+dataset_config="${DATASET_CONFIG:-none}"
+num_progressive="${NUM_PROGRESSIVE:-4}"
+seq_len="${SEQ_LEN:-128}"
+target_tokens="${TARGET_TOKENS:-8192}"
+calib_sequences="${CALIB_SEQUENCES:-8}"
+distill_batch_size="${DISTILL_BATCH_SIZE:-1}"
+eval_batch_size="${EVAL_BATCH_SIZE:-1}"
+eval_num_samples="${EVAL_NUM_SAMPLES:-8}"
+distill_seq_len="${DISTILL_SEQ_LEN:-128}"
+lora_epochs="${LORA_EPOCHS:-0}"
+distill_epochs="${DISTILL_EPOCHS:-0.1}"
+distill_kl_weight="${DISTILL_KL_WEIGHT:-0.01}"
+distill_kl_temp="${DISTILL_KL_TEMP:-4.0}"
+dtype="${DTYPE:-bfloat16}"
+batch_size="${BATCH_SIZE:-1}"
+use_pertensor_fisher="${USE_PERTENSOR_FISHER:-0}"
+save_full_model_cycles="${SAVE_FULL_MODEL_CYCLES:-3}"
+fisher_args=(--fisher_mode param)
+if [[ "$use_pertensor_fisher" == "1" ]]; then
+  fisher_args=(--fisher_mode tensor)
+fi
+output_dir_suffix="progressive_common_smoke"
+if [[ "$use_pertensor_fisher" == "1" ]]; then
+  output_dir_suffix="${output_dir_suffix}_pertensor"
+fi
+model_slug="$(echo "$model" | tr '/:@' '___' | tr -cs '[:alnum:]_.-' '_' | sed 's/^_\\+//; s/_\\+$//')"
+output_dir_default="$repo_root/results/${model_slug}_${output_dir_suffix}"
+output_dir=""
+if [[ $# -gt 0 && "${1:0:2}" != "--" ]]; then
+  output_dir="$1"
+  shift
+elif [[ -n "${OUTDIR:-}" ]]; then
+  output_dir="${OUTDIR}"
+else
+  output_dir="${output_dir_default}"
+fi
+if [[ -n "${RUN_NAME:-}" ]]; then
+  output_dir="${output_dir}_${RUN_NAME}"
+fi
+python_args=(
+  --model "$model" \
+  --dataset "$dataset" \
+  --dataset_config "$dataset_config" \
+  --target_tokens "$target_tokens" \
+  --num_samples "$calib_sequences" \
+  --seq_len "$seq_len" \
+  --batch_size "$batch_size" \
+  --distill_batch_size "$distill_batch_size" \
+  --distill_seq_len "$distill_seq_len" \
+  --distill_epochs "$distill_epochs" \
+  --eval_batch_size "$eval_batch_size" \
+  --eval_seq_len "$seq_len" \
+  --eval_num_samples "$eval_num_samples" \
+  --distill_kl_weight "$distill_kl_weight" \
+  --distill_kl_temp "$distill_kl_temp" \
+  --distill_weight_decay 0.0 \
+  --distill_max_grad_norm 1.0 \
+  --distill_grad_accum_steps 1 \
+  --distill_eval_every 0 \
+  --lora_eval_every 0 \
+  --lora_epochs "$lora_epochs" \
+)
+python_args+=("${fisher_args[@]}")
+if [[ -n "$save_full_model_cycles" ]]; then
+  python_args+=(--save_full_model_cycles "$save_full_model_cycles")
+fi
+python_args+=(
+  --distill_method reparam \
+  --redistrib_teacher_source previous_cycle \
+  --comm_enabled \
+  --comm_mu_auto \
+  --layer auto \
+  --exclude_pairs -1 \
+  --num_progressive "$num_progressive" \
+  --output_dir "$output_dir" \
+  --dtype "$dtype" \
+)
+python_args+=("$@")
+mkdir -p "$output_dir"
+run_args_file="$output_dir/run_args.txt"
+start_epoch=$(date +%s)
+start_time=$(date --iso-8601=seconds)
+{
+  echo "start_time=$start_time"
+  echo "command:"
+  printf '%q ' python "$repo_root/src/fuse_layers.py" "${python_args[@]}"
+  echo
+} > "$run_args_file"
+write_run_summary() {
+  local exit_code=$?
+  local end_epoch end_time elapsed_seconds
+  end_epoch=$(date +%s)
+  end_time=$(date --iso-8601=seconds)
+  elapsed_seconds=$((end_epoch - start_epoch))
+  {
+    echo "end_time=$end_time"
+    echo "elapsed_seconds=$elapsed_seconds"
+    echo "exit_code=$exit_code"
+  } >> "$run_args_file"
+}
+trap write_run_summary EXIT
+python "$repo_root/src/fuse_layers.py" "${python_args[@]}"

script/run_eval_ppl.sh ADDED Viewed

	@@ -0,0 +1,53 @@

+#!/usr/bin/env bash
+set -euo pipefail
+export CUDA_VISIBLE_DEVICES=0
+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+if [[ $# -lt 1 ]]; then
+  cat <<'USAGE'
+Usage:
+  script/run_eval_ppl.sh <model_path> [output_dir] [extra eval_ppl args...]
+Examples:
+  script/run_eval_ppl.sh /path/to/model
+  script/run_eval_ppl.sh /path/to/model /tmp/ppl_out --dataset wikitext2 --max_seq_len 1024 --batch_size 8
+USAGE
+  exit 1
+fi
+MODEL_PATH="$1"
+shift
+OUTPUT_DIR=""
+if [[ $# -gt 0 && "${1:0:2}" != "--" ]]; then
+  OUTPUT_DIR="$1"
+  shift
+fi
+CMD=(
+  python "$ROOT_DIR/src/eval_ppl.py"
+  --base_model "$MODEL_PATH"
+)
+if [[ -n "$OUTPUT_DIR" ]]; then
+  CMD+=(--output_dir "$OUTPUT_DIR")
+fi
+# Default to WikiText-2 only unless user explicitly sets --dataset.
+HAS_DATASET_FLAG=0
+for arg in "$@"; do
+  if [[ "$arg" == "--dataset" ]]; then
+    HAS_DATASET_FLAG=1
+    break
+  fi
+done
+if [[ "$HAS_DATASET_FLAG" -eq 0 ]]; then
+  CMD+=(--dataset wikitext2)
+fi
+CMD+=("$@")
+echo "Running: ${CMD[*]}"
+exec "${CMD[@]}"

script/run_eval_zeroshot.sh ADDED Viewed

	@@ -0,0 +1,91 @@

+#!/usr/bin/env bash
+set -euo pipefail
+export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-2}"
+ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+if [[ $# -lt 2 ]]; then
+  cat <<'USAGE'
+Usage:
+  script/run_eval_zeroshot.sh <model_path> <output_dir> [--mmlu] [extra lm_eval args...]
+Examples:
+  script/run_eval_zeroshot.sh /path/to/model /path/to/output
+  script/run_eval_zeroshot.sh /path/to/model /path/to/output --mmlu
+  script/run_eval_zeroshot.sh /path/to/model /path/to/output --tasks arc_easy,arc_challenge,hellaswag
+USAGE
+  exit 1
+fi
+MODEL_PATH="$1"
+OUTPUT_DIR="$2"
+shift 2
+TASKS="${TASKS:-arc_easy,arc_challenge,hellaswag,piqa,winogrande,openbookqa,boolq}"
+DEVICE="${DEVICE:-cuda}"
+BATCH_SIZE="${BATCH_SIZE:-auto}"
+NUM_FEWSHOT="${NUM_FEWSHOT:-0}"
+OUTPUT_FILE="${OUTPUT_FILE:-zeroshot_results.json}"
+INCLUDE_MMLU=0
+PASSTHROUGH_ARGS=()
+for arg in "$@"; do
+  if [[ "$arg" == "--mmlu" ]]; then
+    INCLUDE_MMLU=1
+    continue
+  fi
+  PASSTHROUGH_ARGS+=("$arg")
+done
+if [[ "$INCLUDE_MMLU" -eq 1 && ",$TASKS," != *",mmlu,"* ]]; then
+  TASKS="${TASKS},mmlu"
+fi
+mkdir -p "$OUTPUT_DIR"
+RUN_ARGS_FILE="$OUTPUT_DIR/run_zeroshot_args.txt"
+RESOLVED_MODEL_PATH="$MODEL_PATH"
+git_commit="unknown"
+if git -C "$ROOT_DIR" rev-parse --is-inside-work-tree >/dev/null 2>&1; then
+  git_commit=$(git -C "$ROOT_DIR" rev-parse HEAD)
+fi
+start_epoch=$(date +%s)
+start_time=$(date --iso-8601=seconds)
+LM_EVAL_CMD=(
+  lm_eval
+  --model hf
+  --model_args "pretrained=$RESOLVED_MODEL_PATH"
+  --tasks "$TASKS"
+  --num_fewshot "$NUM_FEWSHOT"
+  --device "$DEVICE"
+  --batch_size 32
+  --output_path "$OUTPUT_DIR/$OUTPUT_FILE"
+)
+LM_EVAL_CMD+=("${PASSTHROUGH_ARGS[@]}")
+{
+  echo "git_commit=$git_commit"
+  echo "start_time=$start_time"
+  echo "resolved_model_path=$RESOLVED_MODEL_PATH"
+  echo "command:"
+  printf '%q ' "${LM_EVAL_CMD[@]}"
+  echo
+} > "$RUN_ARGS_FILE"
+write_run_summary() {
+  local exit_code=$?
+  local end_epoch end_time elapsed_seconds
+  end_epoch=$(date +%s)
+  end_time=$(date --iso-8601=seconds)
+  elapsed_seconds=$((end_epoch - start_epoch))
+  {
+    echo "end_time=$end_time"
+    echo "elapsed_seconds=$elapsed_seconds"
+    echo "exit_code=$exit_code"
+  } >> "$RUN_ARGS_FILE"
+}
+trap write_run_summary EXIT
+echo "Running: ${LM_EVAL_CMD[*]}"
+exec "${LM_EVAL_CMD[@]}"

script/run_laco_llama.sh ADDED Viewed

	@@ -0,0 +1,69 @@

+#!/usr/bin/env bash
+set -euo pipefail
+export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-0}"
+repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+workdir="$repo_root/compare_model/LaCo"
+model_path="${MODEL_PATH:-meta-llama/Llama-2-7b-hf}"
+target_layers="${TARGET_LAYERS:-16}"
+merge_layers="${MERGE_LAYERS:-2}"
+interval="${INTERVAL:-1}"
+lowest_layer="${LOWEST_LAYER:-0}"
+threshold="${THRESHOLD:-0.45}"
+dtype="${DTYPE:-bfloat16}"
+device="${DEVICE:-cuda}"
+max_prompt_length="${MAX_PROMPT_LENGTH:-128}"
+output_dir="${OUTDIR:-$repo_root/results/laco_llama_target_${target_layers}}"
+python_args=(
+  --model_path "$model_path"
+  --output_dir "$output_dir"
+  --target_layers "$target_layers"
+  --merge_layers "$merge_layers"
+  --interval "$interval"
+  --lowest_layer "$lowest_layer"
+  --threshold "$threshold"
+  --dtype "$dtype"
+  --device "$device"
+  --max_prompt_length "$max_prompt_length"
+)
+if [[ "${TRUST_REMOTE_CODE:-0}" == "1" ]]; then
+  python_args+=(--trust_remote_code)
+fi
+if [[ "${FORCE_TARGET:-1}" == "1" ]]; then
+  python_args+=(--force_target)
+else
+  python_args+=(--no_force_target)
+fi
+if [[ -n "${PROMPT_FILE:-}" ]]; then
+  python_args+=(--prompt_file "$PROMPT_FILE")
+fi
+if [[ -n "${SAVE_LAYERS:-}" ]]; then
+  # shellcheck disable=SC2206
+  save_layers=(${SAVE_LAYERS})
+  python_args+=(--save_layers "${save_layers[@]}")
+fi
+python_args+=("$@")
+mkdir -p "$output_dir"
+git_commit="unknown"
+if git -C "$repo_root" rev-parse --is-inside-work-tree >/dev/null 2>&1; then
+  git_commit=$(git -C "$repo_root" rev-parse HEAD)
+fi
+{
+  echo "git_commit=$git_commit"
+  echo "command:"
+  printf '%q ' python "$repo_root/compare_model/LaCo/laco_llama.py" "${python_args[@]}"
+  echo
+} > "$output_dir/run_args.txt"
+cd "$workdir"
+PYTHONPATH="$repo_root${PYTHONPATH:+:$PYTHONPATH}" \
+python laco_llama.py "${python_args[@]}"

script/run_laco_qwen.sh ADDED Viewed

	@@ -0,0 +1,69 @@

+#!/usr/bin/env bash
+set -euo pipefail
+export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-0}"
+repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+workdir="$repo_root/compare_model/LaCo"
+model_path="${MODEL_PATH:-Qwen/Qwen3-1.7B}"
+target_layers="${TARGET_LAYERS:-14}"
+merge_layers="${MERGE_LAYERS:-2}"
+interval="${INTERVAL:-1}"
+lowest_layer="${LOWEST_LAYER:-0}"
+threshold="${THRESHOLD:-0.45}"
+dtype="${DTYPE:-bfloat16}"
+device="${DEVICE:-cuda}"
+max_prompt_length="${MAX_PROMPT_LENGTH:-128}"
+output_dir="${OUTDIR:-$repo_root/results/laco_qwen_target_${target_layers}}"
+python_args=(
+  --model_path "$model_path"
+  --output_dir "$output_dir"
+  --target_layers "$target_layers"
+  --merge_layers "$merge_layers"
+  --interval "$interval"
+  --lowest_layer "$lowest_layer"
+  --threshold "$threshold"
+  --dtype "$dtype"
+  --device "$device"
+  --max_prompt_length "$max_prompt_length"
+)
+if [[ "${TRUST_REMOTE_CODE:-1}" == "1" ]]; then
+  python_args+=(--trust_remote_code)
+fi
+if [[ "${FORCE_TARGET:-1}" == "1" ]]; then
+  python_args+=(--force_target)
+else
+  python_args+=(--no_force_target)
+fi
+if [[ -n "${PROMPT_FILE:-}" ]]; then
+  python_args+=(--prompt_file "$PROMPT_FILE")
+fi
+if [[ -n "${SAVE_LAYERS:-}" ]]; then
+  # shellcheck disable=SC2206
+  save_layers=(${SAVE_LAYERS})
+  python_args+=(--save_layers "${save_layers[@]}")
+fi
+python_args+=("$@")
+mkdir -p "$output_dir"
+git_commit="unknown"
+if git -C "$repo_root" rev-parse --is-inside-work-tree >/dev/null 2>&1; then
+  git_commit=$(git -C "$repo_root" rev-parse HEAD)
+fi
+{
+  echo "git_commit=$git_commit"
+  echo "command:"
+  printf '%q ' python "$repo_root/compare_model/LaCo/laco_qwen.py" "${python_args[@]}"
+  echo
+} > "$output_dir/run_args.txt"
+cd "$workdir"
+PYTHONPATH="$repo_root${PYTHONPATH:+:$PYTHONPATH}" \
+python laco_qwen.py "${python_args[@]}"

script/run_llmpruner_llama.sh ADDED Viewed

	@@ -0,0 +1,66 @@

+#!/usr/bin/env bash
+set -euo pipefail
+# meta-llama/Llama-2-7b-hf, meta-llama/Llama-3.1-8B
+# Prune-only runner for LLM-Pruner on Llama-family checkpoints.
+export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-2}"
+repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+workdir="$repo_root/compare_model/LLM-Pruner"
+base_model="${BASE_MODEL:-meta-llama/Llama-2-7b-hf}"
+prune_ckpt_path="${PRUNE_CKPT_PATH:-llama2_7b_prune}"
+pruning_ratio="${PRUNING_RATIO:-0.25}"
+block_mlp_layer_start="${BLOCK_MLP_LAYER_START:-4}"
+block_mlp_layer_end="${BLOCK_MLP_LAYER_END:-30}"
+block_attention_layer_start="${BLOCK_ATTENTION_LAYER_START:-4}"
+block_attention_layer_end="${BLOCK_ATTENTION_LAYER_END:-30}"
+pruner_type="${PRUNER_TYPE:-taylor}"
+taylor_mode="${TAYLOR_MODE:-param_first}"
+device="${DEVICE:-cpu}"
+eval_device="${EVAL_DEVICE:-cuda}"
+default_script="hf_prune.py"
+skip_eval_flag="--skip_post_eval"
+if [[ "$base_model" == *"Llama-3"* ]] || [[ "$base_model" == *"Llama-3."* ]] || [[ "$base_model" == *"llama-3"* ]]; then
+  default_script="llama3.py"
+  skip_eval_flag="--skip_eval_after_prune"
+fi
+script_name="${PRUNE_SCRIPT:-$default_script}"
+output_dir="${OUTDIR:-$workdir/prune_log/$prune_ckpt_path}"
+python_args=(
+  --base_model "$base_model"
+  --pruning_ratio "$pruning_ratio"
+  --block_wise
+  --block_mlp_layer_start "$block_mlp_layer_start"
+  --block_mlp_layer_end "$block_mlp_layer_end"
+  --block_attention_layer_start "$block_attention_layer_start"
+  --block_attention_layer_end "$block_attention_layer_end"
+  --pruner_type "$pruner_type"
+  --taylor "$taylor_mode"
+  --device "$device"
+  --eval_device "$eval_device"
+  --save_ckpt_log_name "$prune_ckpt_path"
+  --save_model
+  "$skip_eval_flag"
+)
+python_args+=("$@")
+mkdir -p "$output_dir"
+git_commit="unknown"
+if git -C "$repo_root" rev-parse --is-inside-work-tree >/dev/null 2>&1; then
+  git_commit=$(git -C "$repo_root" rev-parse HEAD)
+fi
+{
+  echo "git_commit=$git_commit"
+  echo "command:"
+  printf '%q ' python "$repo_root/compare_model/LLM-Pruner/$script_name" "${python_args[@]}"
+  echo
+} > "$output_dir/run_args.txt"
+cd "$workdir"
+PYTHONPATH="$workdir:$repo_root${PYTHONPATH:+:$PYTHONPATH}" \
+python "$script_name" "${python_args[@]}"

script/run_llmpruner_qwen.sh ADDED Viewed

	@@ -0,0 +1,61 @@

+#!/usr/bin/env bash
+set -euo pipefail
+# Prune-only runner for LLM-Pruner on Qwen-family checkpoints.
+export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-0}"
+repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+workdir="$repo_root/compare_model/LLM-Pruner"
+base_model="${BASE_MODEL:-Qwen/Qwen3-1.7B}"
+prune_ckpt_path="${PRUNE_CKPT_PATH:-qwen3_1_7b_prune}"
+pruning_ratio="${PRUNING_RATIO:-0.25}"
+block_mlp_layer_start="${BLOCK_MLP_LAYER_START:-4}"
+block_mlp_layer_end="${BLOCK_MLP_LAYER_END:-24}"
+block_attention_layer_start="${BLOCK_ATTENTION_LAYER_START:-4}"
+block_attention_layer_end="${BLOCK_ATTENTION_LAYER_END:-24}"
+block_attention_roots="${BLOCK_ATTENTION_ROOTS:-q_proj,k_proj}"
+block_mlp_roots="${BLOCK_MLP_ROOTS:-gate_proj,up_proj}"
+pruner_type="${PRUNER_TYPE:-taylor}"
+taylor_mode="${TAYLOR_MODE:-param_first}"
+device="${DEVICE:-cuda}"
+eval_device="${EVAL_DEVICE:-cuda}"
+script_name="${PRUNE_SCRIPT:-llama3.py}"
+output_dir="${OUTDIR:-$workdir/prune_log/$prune_ckpt_path}"
+python_args=(
+  --base_model "$base_model"
+  --pruning_ratio "$pruning_ratio"
+  --block_wise
+  --block_attention_roots "$block_attention_roots"
+  --block_mlp_roots "$block_mlp_roots"
+  --block_mlp_layer_start "$block_mlp_layer_start"
+  --block_mlp_layer_end "$block_mlp_layer_end"
+  --block_attention_layer_start "$block_attention_layer_start"
+  --block_attention_layer_end "$block_attention_layer_end"
+  --pruner_type "$pruner_type"
+  --taylor "$taylor_mode"
+  --test_after_train
+  --device "$device"
+  --eval_device "$eval_device"
+  --save_ckpt_log_name "$prune_ckpt_path"
+  --save_model
+)
+python_args+=("$@")
+mkdir -p "$output_dir"
+git_commit="unknown"
+if git -C "$repo_root" rev-parse --is-inside-work-tree >/dev/null 2>&1; then
+  git_commit=$(git -C "$repo_root" rev-parse HEAD)
+fi
+{
+  echo "git_commit=$git_commit"
+  echo "command:"
+  printf '%q ' python "$repo_root/compare_model/LLM-Pruner/$script_name" "${python_args[@]}"
+  echo
+} > "$output_dir/run_args.txt"
+cd "$workdir"
+PYTHONPATH="$workdir:$repo_root${PYTHONPATH:+:$PYTHONPATH}" \
+python "$script_name" "${python_args[@]}"

script/run_llmpruner_whole.sh ADDED Viewed

	@@ -0,0 +1,62 @@

+#!/usr/bin/env bash
+set -euo pipefail
+repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+runner="$repo_root/script/run_llmpruner_llama.sh"
+base_model="${BASE_MODEL:-meta-llama/Llama-3.1-8B}"
+device="${DEVICE:-cpu}"
+eval_device="${EVAL_DEVICE:-cuda}"
+num_examples="${NUM_EXAMPLES:-10}"
+model_tag="$(basename "$base_model" | tr '[:upper:]' '[:lower:]' | tr -c 'a-z0-9' '_')"
+run_case() {
+  local label="$1"
+  local ratio="$2"
+  shift 2
+  echo "[LLM-Pruner] ${label}: PRUNING_RATIO=${ratio}"
+  BASE_MODEL="$base_model" \
+  PRUNE_CKPT_PATH="${model_tag}_${label}" \
+  PRUNING_RATIO="$ratio" \
+  DEVICE="$device" \
+  EVAL_DEVICE="$eval_device" \
+  bash "$runner" --num_examples "$num_examples" "$@"
+}
+# Equivalent block-only pruning scales for dropping layers from a 32-layer Llama-2 7B.
+run_case "drop6eq" "0.23"
+run_case "drop11eq" "0.45"
+run_case "drop16eq" "0.70" # Ratio = 54.3965%
+# for llama2 7b
+# run_case "drop6eq" "0.23"
+# run_case "drop11eq" "0.42"
+# run_case "drop16eq" "0.62"
+# for llama3 8b
+# run_case "drop6eq" "0.23"
+# run_case "drop11eq" "0.45"
+# run_case "drop16eq" "0.70"
+# "0.23" 86.1997%
+# "0.48" 68.1928%
+# "0.51" # 68.1928%
+# "0.70" # 56.6762%
+# llama 7b depth
+# 18.02%
+# 33.04%
+# 48.05%
+# llama 8b depth
+# 16.30%, 86.1997%
+# 29.88%, 72.2934%
+# 43.46%, 56.6762%

script/run_llmstreamline_llama.sh ADDED Viewed

	@@ -0,0 +1,58 @@

+#!/usr/bin/env bash
+set -euo pipefail
+export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-0}"
+# meta-llama/Llama-3.2-3B
+# meta-llama/Llama-2-7b-hf
+repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+workdir="$repo_root/compare_model/LLM-Streamline"
+pruned_blocks="${COMMON_PRUNED_BLOCKS:-14}"
+layer_intervals="${LAYER_INTERVALS:-$pruned_blocks}"
+output_dir="${OUTDIR:-$repo_root/results/llama2_7b_streamline_mse_common_${layer_intervals}}"
+model_name="${MODEL_NAME:-meta-llama/Llama-3.2-3B}"
+cosine_num_data="${COSINE_NUM_DATA:-300}"
+train_num_data="${TRAIN_NUM_DATA:-5000}"
+epochs="${EPOCHS:-15}"
+batch_size="${BATCH_SIZE:-8}"
+train_batch_size="${TRAIN_BATCH_SIZE:-$batch_size}"
+grad_accum="${GRAD_ACCUM:-16}"
+lr="${LR:-1e-5}"
+min_lr="${MIN_LR:-5e-5}"
+wd="${WD:-1e-3}"
+dtype="${DTYPE:-bfloat16}"
+python_args=(
+  --model_name "$model_name"
+  --output_dir "$output_dir"
+  --layer_intervals "$layer_intervals"
+  --cosine_num_data "$cosine_num_data"
+  --train_num_data "$train_num_data"
+  --epoches "$epochs"
+  --batch_size "$batch_size"
+  --train_batch_size "$train_batch_size"
+  --dtype "$dtype"
+  --gradient_accumulation_step "$grad_accum"
+  --lr "$lr"
+  --min_lr "$min_lr"
+  --wd "$wd"
+)
+python_args+=("$@")
+mkdir -p "$output_dir"
+git_commit="unknown"
+if git -C "$repo_root" rev-parse --is-inside-work-tree >/dev/null 2>&1; then
+  git_commit=$(git -C "$repo_root" rev-parse HEAD)
+fi
+{
+  echo "git_commit=$git_commit"
+  echo "command:"
+  printf '%q ' python "$repo_root/compare_model/LLM-Streamline/mseloss_entry.py" "${python_args[@]}"
+  echo
+} > "$output_dir/run_args.txt"
+cd "$workdir"
+PYTHONPATH="$workdir:$repo_root${PYTHONPATH:+:$PYTHONPATH}" \
+python mseloss_entry.py "${python_args[@]}"

script/run_llmstreamline_qwen.sh ADDED Viewed

	@@ -0,0 +1,53 @@

+#!/usr/bin/env bash
+set -euo pipefail
+export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-3}"
+repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+workdir="$repo_root/compare_model/LLM-Streamline"
+pruned_blocks="${COMMON_PRUNED_BLOCKS:-14}"
+layer_intervals="${LAYER_INTERVALS:-$pruned_blocks}"
+output_dir="${OUTDIR:-$repo_root/results/qwen3_1_7b_streamline_mse_common_${layer_intervals}}"
+model_name="${MODEL_NAME:-Qwen/Qwen3-1.7B}"
+cosine_num_data="${COSINE_NUM_DATA:-300}"
+train_num_data="${TRAIN_NUM_DATA:-5000}"
+epochs="${EPOCHS:-15}"
+batch_size="${BATCH_SIZE:-8}"
+train_batch_size="${TRAIN_BATCH_SIZE:-$batch_size}"
+grad_accum="${GRAD_ACCUM:-16}"
+lr="${LR:-1e-5}"
+min_lr="${MIN_LR:-5e-5}"
+wd="${WD:-1e-3}"
+python_args=(
+  --model_name "$model_name"
+  --output_dir "$output_dir"
+  --layer_intervals "$layer_intervals"
+  --cosine_num_data "$cosine_num_data"
+  --train_num_data "$train_num_data"
+  --epoches "$epochs"
+  --batch_size "$batch_size"
+  --train_batch_size "$train_batch_size"
+  --gradient_accumulation_step "$grad_accum"
+  --lr "$lr"
+  --min_lr "$min_lr"
+  --wd "$wd"
+)
+python_args+=("$@")
+mkdir -p "$output_dir"
+git_commit="unknown"
+if git -C "$repo_root" rev-parse --is-inside-work-tree >/dev/null 2>&1; then
+  git_commit=$(git -C "$repo_root" rev-parse HEAD)
+fi
+{
+  echo "git_commit=$git_commit"
+  echo "command:"
+  printf '%q ' python "$repo_root/compare_model/LLM-Streamline/qwen_mseloss_entry.py" "${python_args[@]}"
+  echo
+} > "$output_dir/run_args.txt"
+cd "$workdir"
+PYTHONPATH="$workdir:$repo_root${PYTHONPATH:+:$PYTHONPATH}" \
+python qwen_mseloss_entry.py "${python_args[@]}"

script/run_replaceme_llama.sh ADDED Viewed

	@@ -0,0 +1,57 @@

+#!/usr/bin/env bash
+set -euo pipefail
+export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-1}"
+repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+workdir="$repo_root/compare_model/ReplaceMe"
+default_config="$workdir/examples/llama2_7b_replaceme_lstsq_skip16_common.yaml"
+config_path="${CONFIG_PATH:-$default_config}"
+target_tokens="${COMMON_TARGET_TOKENS_OVERRIDE:-}"
+passthrough_args=()
+while [[ $# -gt 0 ]]; do
+  case "$1" in
+    --target_tokens)
+      if [[ $# -lt 2 ]]; then
+        echo "error: --target_tokens requires a value" >&2
+        exit 1
+      fi
+      target_tokens="$2"
+      shift 2
+      ;;
+    --target_tokens=*)
+      target_tokens="${1#*=}"
+      shift
+      ;;
+    *)
+      passthrough_args+=("$1")
+      shift
+      ;;
+  esac
+done
+mkdir -p "$repo_root/results/llama_7b_replaceme_common_16"
+git_commit="unknown"
+if git -C "$repo_root" rev-parse --is-inside-work-tree >/dev/null 2>&1; then
+  git_commit=$(git -C "$repo_root" rev-parse HEAD)
+fi
+{
+  echo "git_commit=$git_commit"
+  echo "config_path=$config_path"
+  echo "COMMON_TARGET_TOKENS_OVERRIDE=${target_tokens:-4500000}"
+  echo "command:"
+  printf '%q ' python "$repo_root/compare_model/ReplaceMe/run_replaceme.py" --config "$config_path"
+  if [[ -n "$target_tokens" ]]; then
+    printf '%q ' "# COMMON_TARGET_TOKENS_OVERRIDE=$target_tokens"
+  fi
+  if [[ ${#passthrough_args[@]} -gt 0 ]]; then
+    printf '%q ' "${passthrough_args[@]}"
+  fi
+  echo
+} > "$repo_root/results/llama_7b_replaceme_common_16/run_args.txt"
+cd "$workdir"
+COMMON_TARGET_TOKENS_OVERRIDE="${target_tokens:-${COMMON_TARGET_TOKENS_OVERRIDE:-}}" \
+PYTHONPATH="$workdir:$repo_root${PYTHONPATH:+:$PYTHONPATH}" \
+python run_replaceme.py --config "$config_path" "${passthrough_args[@]}"

script/run_replaceme_qwen.sh ADDED Viewed

	@@ -0,0 +1,26 @@

+#!/usr/bin/env bash
+set -euo pipefail
+export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-3}"
+repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+workdir="$repo_root/compare_model/ReplaceMe"
+default_config="$workdir/examples/qwen3_1.7b_replaceme_lstsq_skip14_common.yaml"
+config_path="${CONFIG_PATH:-$default_config}"
+mkdir -p "$repo_root/results/llama_7b_replaceme_common_14"
+git_commit="unknown"
+if git -C "$repo_root" rev-parse --is-inside-work-tree >/dev/null 2>&1; then
+  git_commit=$(git -C "$repo_root" rev-parse HEAD)
+fi
+{
+  echo "git_commit=$git_commit"
+  echo "config_path=$config_path"
+  echo "command:"
+  printf '%q ' python "$repo_root/compare_model/ReplaceMe/run_replaceme_qwen.py" --config "$config_path"
+  echo
+} > "$repo_root/results/llama_7b_replaceme_common_14/run_args.txt"
+cd "$workdir"
+PYTHONPATH="$workdir:$repo_root${PYTHONPATH:+:$PYTHONPATH}" \
+python run_replaceme_qwen.py --config "$config_path" "$@"

script/run_uidl_llama.sh ADDED Viewed

	@@ -0,0 +1,12 @@

+#!/usr/bin/env bash
+set -euo pipefail
+repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-0}"
+export MODEL="${MODEL:-meta-llama/Llama-2-7b-hf}"
+export OUTPUT_ROOT="${OUTPUT_ROOT:-$repo_root/results/uidl_llama}"
+export SIM_DATASET="${SIM_DATASET:-slimpajama}"
+export SIM_DATASET_CONFIG="${SIM_DATASET_CONFIG:-none}"
+"$repo_root/compare_model/UIDL/run_uidl_prune.sh" "$@"

script/run_uidl_qwen.sh ADDED Viewed

	@@ -0,0 +1,12 @@

+#!/usr/bin/env bash
+set -euo pipefail
+repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
+export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-0}"
+export MODEL="${MODEL:-Qwen/Qwen3-1.7B}"
+export OUTPUT_ROOT="${OUTPUT_ROOT:-$repo_root/results/uidl_qwen}"
+export SIM_DATASET="${SIM_DATASET:-slimpajama}"
+export SIM_DATASET_CONFIG="${SIM_DATASET_CONFIG:-none}"
+"$repo_root/compare_model/UIDL/run_uidl_prune.sh" "$@"