LJYAI commited on
Commit
3738140
·
verified ·
1 Parent(s): 2c44909

upload script

Browse files
script/loratune.sh ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-2}"
5
+
6
+ ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
7
+
8
+ if [[ $# -lt 2 ]]; then
9
+ cat <<'USAGE'
10
+ Usage:
11
+ script/loratune.sh <base_model> <output_dir> [extra lora args...]
12
+
13
+ Example:
14
+ script/loratune.sh /path/to/base_model /path/to/output_dir --epochs 2 --batch_size 32
15
+ USAGE
16
+ exit 1
17
+ fi
18
+
19
+ BASE_MODEL="$1"
20
+ OUTPUT_DIR="$2"
21
+ shift 2
22
+
23
+ python "$ROOT/src/loratune.py" \
24
+ --base_model "$BASE_MODEL" \
25
+ --output_dir "$OUTPUT_DIR" \
26
+ --device cuda \
27
+ --dtype "${DTYPE:-bfloat16}" \
28
+ --instruction_dataset "${INSTRUCTION_DATASET:-tatsu-lab/alpaca}" \
29
+ --instruction_split "${INSTRUCTION_SPLIT:-train}" \
30
+ --max_samples "${MAX_SAMPLES:-0}" \
31
+ --seq_len "${SEQ_LEN:-1024}" \
32
+ --batch_size "${BATCH_SIZE:-64}" \
33
+ --micro_batch_size "${MICRO_BATCH_SIZE:-8}" \
34
+ --epochs "${EPOCHS:-1.0}" \
35
+ --learning_rate "${LEARNING_RATE:-1e-4}" \
36
+ --log_steps "${LOG_STEPS:-100}" \
37
+ --lora_rank "${LORA_RANK:-8}" \
38
+ "$@"
script/run_abprune.sh ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-0}"
5
+
6
+ repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
7
+
8
+ if [[ $# -lt 1 ]]; then
9
+ cat <<'USAGE'
10
+ Usage:
11
+ script/run_abprune.sh <model> [output_dir] [extra fuse_layers args...]
12
+
13
+ Examples:
14
+ script/run_abprune.sh Qwen/Qwen3-1.7B
15
+ script/run_abprune.sh /path/to/model /path/to/output --num_progressive 8
16
+ USAGE
17
+ exit 1
18
+ fi
19
+ # all meta-llama/Llama-2-7b-hf, meta-llama/Llama-3.1-8B
20
+ model="$1"
21
+ shift
22
+
23
+ dataset="${DATASET:-slimpajama}"
24
+ dataset_config="${DATASET_CONFIG:-none}"
25
+ num_progressive="${NUM_PROGRESSIVE:-16}"
26
+ seq_len="${SEQ_LEN:-1024}"
27
+ target_tokens="${TARGET_TOKENS:-500000}"
28
+ calib_sequences="${CALIB_SEQUENCES:-128}"
29
+ distill_batch_size="${DISTILL_BATCH_SIZE:-1}"
30
+ eval_batch_size="${EVAL_BATCH_SIZE:-1}"
31
+ eval_num_samples="${EVAL_NUM_SAMPLES:-200}"
32
+ distill_seq_len="${DISTILL_SEQ_LEN:-1024}"
33
+ lora_epochs="${LORA_EPOCHS:-0}"
34
+ distill_epochs="${DISTILL_EPOCHS:-1.0}"
35
+ distill_kl_weight="${DISTILL_KL_WEIGHT:-0.02}"
36
+ distill_kl_temp="${DISTILL_KL_TEMP:-4.0}"
37
+ distill_hidden_mse_weight="${DISTILL_HIDDEN_MSE_WEIGHT:-1.0}"
38
+ distill_attn_mse_weight="${DISTILL_ATTN_MSE_WEIGHT:-0.25}"
39
+ distill_mlp_mse_weight="${DISTILL_MLP_MSE_WEIGHT:-1.0}"
40
+ reparam_eta="${REPARAM_ETA:-0}"
41
+ reparam_gamma="${REPARAM_GAMMA:-0}"
42
+ reparam_attn_reg_scale="${REPARAM_ATTN_REG_SCALE:-1.0}"
43
+ reparam_mlp_reg_scale="${REPARAM_MLP_REG_SCALE:-1.0}"
44
+ reparam_param_subset="${REPARAM_PARAM_SUBSET:-mlp}"
45
+ dtype="${DTYPE:-bfloat16}"
46
+ batch_size="${BATCH_SIZE:-2}"
47
+ use_pertensor_fisher="${USE_PERTENSOR_FISHER:-0}"
48
+ save_full_model_cycles="${SAVE_FULL_MODEL_CYCLES:-6,11}"
49
+ comm_skip_post_reselect="${COMM_SKIP_POST_RESELECT:-1}"
50
+ head_permute="${HEAD_PERMUTE:-0}"
51
+ head_permute_select="${HEAD_PERMUTE_SELECT:-$head_permute}"
52
+ head_permute_merge="${HEAD_PERMUTE_MERGE:-$head_permute}"
53
+
54
+ fisher_args=(--fisher_mode param)
55
+ if [[ "$use_pertensor_fisher" == "1" ]]; then
56
+ fisher_args=(--fisher_mode tensor)
57
+ fi
58
+
59
+ output_dir_suffix="progressive_common_${num_progressive}_nopost_only_last"
60
+ if [[ "$use_pertensor_fisher" == "1" ]]; then
61
+ output_dir_suffix="${output_dir_suffix}_pertensor"
62
+ fi
63
+
64
+ model_slug="$(echo "$model" | tr '/:@' '___' | tr -cs '[:alnum:]_.-' '_' | sed 's/^_\\+//; s/_\\+$//')"
65
+ output_dir_default="$repo_root/results/${model_slug}_${output_dir_suffix}"
66
+ output_dir=""
67
+ if [[ $# -gt 0 && "${1:0:2}" != "--" ]]; then
68
+ output_dir="$1"
69
+ shift
70
+ elif [[ -n "${OUTDIR:-}" ]]; then
71
+ output_dir="${OUTDIR}"
72
+ else
73
+ output_dir="${output_dir_default}"
74
+ fi
75
+ if [[ -n "${RUN_NAME:-}" ]]; then
76
+ output_dir="${output_dir}_${RUN_NAME}"
77
+ fi
78
+
79
+ python_args=(
80
+ --model "$model" \
81
+ --dataset "$dataset" \
82
+ --dataset_config "$dataset_config" \
83
+ --target_tokens "$target_tokens" \
84
+ --num_samples "$calib_sequences" \
85
+ --seq_len "$seq_len" \
86
+ --batch_size "$batch_size" \
87
+ --distill_batch_size "$distill_batch_size" \
88
+ --distill_seq_len "$distill_seq_len" \
89
+ --distill_epochs "$distill_epochs" \
90
+ --eval_batch_size "$eval_batch_size" \
91
+ --eval_seq_len "$seq_len" \
92
+ --eval_num_samples "$eval_num_samples" \
93
+ --distill_kl_weight "$distill_kl_weight" \
94
+ --distill_kl_temp "$distill_kl_temp" \
95
+ --distill_hidden_mse_weight "$distill_hidden_mse_weight" \
96
+ --distill_attn_mse_weight "$distill_attn_mse_weight" \
97
+ --distill_mlp_mse_weight "$distill_mlp_mse_weight" \
98
+ --reparam_eta "$reparam_eta" \
99
+ --reparam_gamma "$reparam_gamma" \
100
+ --reparam_attn_reg_scale "$reparam_attn_reg_scale" \
101
+ --reparam_mlp_reg_scale "$reparam_mlp_reg_scale" \
102
+ --reparam_param_subset "$reparam_param_subset" \
103
+ --distill_weight_decay 0.0 \
104
+ --distill_max_grad_norm 1.0 \
105
+ --distill_grad_accum_steps 1 \
106
+ --distill_eval_every 2000 \
107
+ --lora_eval_every 2000 \
108
+ --lora_epochs "$lora_epochs" \
109
+ )
110
+
111
+ python_args+=("${fisher_args[@]}")
112
+ if [[ -n "$save_full_model_cycles" ]]; then
113
+ python_args+=(--save_full_model_cycles "$save_full_model_cycles")
114
+ fi
115
+
116
+ python_args+=(
117
+ --distill_method reparam \
118
+ --redistrib_teacher_source previous_cycle \
119
+ --comm_enabled \
120
+ --comm_mu_auto \
121
+ --layer auto \
122
+ --exclude_pairs 0,1,-1 \
123
+ --num_progressive "$num_progressive" \
124
+ --output_dir "$output_dir" \
125
+ --dtype "$dtype" \
126
+ )
127
+ if [[ "$comm_skip_post_reselect" == "1" ]]; then
128
+ python_args+=(--comm_skip_post_reselect)
129
+ fi
130
+ if [[ "$head_permute_select" == "0" ]]; then
131
+ python_args+=(--no_head_permute_select)
132
+ fi
133
+ if [[ "$head_permute_merge" == "0" ]]; then
134
+ python_args+=(--no_head_permute_merge)
135
+ fi
136
+ python_args+=("$@")
137
+
138
+ mkdir -p "$output_dir"
139
+ run_args_file="$output_dir/run_args.txt"
140
+ git_commit="unknown"
141
+ if git -C "$repo_root" rev-parse --is-inside-work-tree >/dev/null 2>&1; then
142
+ git_commit=$(git -C "$repo_root" rev-parse HEAD)
143
+ fi
144
+ start_epoch=$(date +%s)
145
+ start_time=$(date --iso-8601=seconds)
146
+ {
147
+ echo "git_commit=$git_commit"
148
+ echo "start_time=$start_time"
149
+ echo "HEAD_PERMUTE=$head_permute"
150
+ echo "HEAD_PERMUTE_SELECT=$head_permute_select"
151
+ echo "HEAD_PERMUTE_MERGE=$head_permute_merge"
152
+ echo "command:"
153
+ printf '%q ' python "$repo_root/src/fuse_layers.py" "${python_args[@]}"
154
+ echo
155
+ } > "$run_args_file"
156
+
157
+ write_run_summary() {
158
+ local exit_code=$?
159
+ local end_epoch end_time elapsed_seconds
160
+ end_epoch=$(date +%s)
161
+ end_time=$(date --iso-8601=seconds)
162
+ elapsed_seconds=$((end_epoch - start_epoch))
163
+ {
164
+ echo "end_time=$end_time"
165
+ echo "elapsed_seconds=$elapsed_seconds"
166
+ echo "exit_code=$exit_code"
167
+ } >> "$run_args_file"
168
+ }
169
+ trap write_run_summary EXIT
170
+
171
+ python "$repo_root/src/fuse_layers.py" "${python_args[@]}"
script/run_abprune_inst.sh ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-0}"
5
+
6
+ repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
7
+
8
+ if [[ $# -lt 1 ]]; then
9
+ cat <<'USAGE'
10
+ Usage:
11
+ script/run_abprune_inst.sh <model> [output_dir] [extra fuse_layers args...]
12
+
13
+ Examples:
14
+ script/run_abprune_inst.sh Qwen/Qwen3-1.7B
15
+ script/run_abprune_inst.sh /path/to/model /path/to/output --num_progressive 8
16
+ USAGE
17
+ exit 1
18
+ fi
19
+ # all meta-llama/Llama-2-7b-hf, meta-llama/Llama-3.1-8B, facebook/opt-6.7b
20
+ model="$1"
21
+ shift
22
+
23
+ dataset="${DATASET:-slimpajama}"
24
+ dataset_config="${DATASET_CONFIG:-none}"
25
+ num_progressive="${NUM_PROGRESSIVE:-16}"
26
+ seq_len="${SEQ_LEN:-1024}"
27
+ target_tokens="${TARGET_TOKENS:-500000}"
28
+ calib_sequences="${CALIB_SEQUENCES:-128}"
29
+ distill_batch_size="${DISTILL_BATCH_SIZE:-1}"
30
+ eval_batch_size="${EVAL_BATCH_SIZE:-1}"
31
+ eval_num_samples="${EVAL_NUM_SAMPLES:-200}"
32
+ distill_seq_len="${DISTILL_SEQ_LEN:-1024}"
33
+ lora_epochs="${LORA_EPOCHS:-0}"
34
+ calibration_source="${CALIBRATION_SOURCE:-lm}"
35
+ instruction_dataset="${INSTRUCTION_DATASET:-}"
36
+ instruction_config="${INSTRUCTION_CONFIG:-none}"
37
+ instruction_split="${INSTRUCTION_SPLIT:-train}"
38
+ instruction_format="${INSTRUCTION_FORMAT:-auto}"
39
+ instruction_field_instruction="${INSTRUCTION_FIELD_INSTRUCTION:-instruction}"
40
+ instruction_field_input="${INSTRUCTION_FIELD_INPUT:-input}"
41
+ instruction_field_output="${INSTRUCTION_FIELD_OUTPUT:-output}"
42
+ distillation_source="${DISTILLATION_SOURCE:-$calibration_source}"
43
+ distill_inst_samples="${DISTILL_INST_SAMPLES:-500}"
44
+ distill_epochs="${DISTILL_EPOCHS:-1.0}"
45
+ distill_kl_weight="${DISTILL_KL_WEIGHT:-0.02}"
46
+ distill_kl_temp="${DISTILL_KL_TEMP:-4.0}"
47
+ distill_hidden_mse_weight="${DISTILL_HIDDEN_MSE_WEIGHT:-1.0}"
48
+ distill_attn_mse_weight="${DISTILL_ATTN_MSE_WEIGHT:-0.25}"
49
+ distill_mlp_mse_weight="${DISTILL_MLP_MSE_WEIGHT:-1.0}"
50
+ reparam_eta="${REPARAM_ETA:-0}"
51
+ reparam_gamma="${REPARAM_GAMMA:-0}"
52
+ reparam_attn_reg_scale="${REPARAM_ATTN_REG_SCALE:-1.0}"
53
+ reparam_mlp_reg_scale="${REPARAM_MLP_REG_SCALE:-1.0}"
54
+ reparam_param_subset="${REPARAM_PARAM_SUBSET:-mlp}"
55
+ dtype="${DTYPE:-bfloat16}"
56
+ batch_size="${BATCH_SIZE:-2}"
57
+ use_pertensor_fisher="${USE_PERTENSOR_FISHER:-0}"
58
+ save_full_model_cycles="${SAVE_FULL_MODEL_CYCLES:-6,11}"
59
+ comm_skip_post_reselect="${COMM_SKIP_POST_RESELECT:-1}"
60
+ head_permute="${HEAD_PERMUTE:-1}"
61
+ head_permute_select="${HEAD_PERMUTE_SELECT:-$head_permute}"
62
+ head_permute_merge="${HEAD_PERMUTE_MERGE:-$head_permute}"
63
+
64
+ fisher_args=(--fisher_mode param)
65
+ if [[ "$use_pertensor_fisher" == "1" ]]; then
66
+ fisher_args=(--fisher_mode tensor)
67
+ fi
68
+
69
+ output_dir_suffix="progressive_common_${num_progressive}_nopost_only_last"
70
+ if [[ "$use_pertensor_fisher" == "1" ]]; then
71
+ output_dir_suffix="${output_dir_suffix}_pertensor"
72
+ fi
73
+
74
+ model_slug="$(echo "$model" | tr '/:@' '___' | tr -cs '[:alnum:]_.-' '_' | sed 's/^_\\+//; s/_\\+$//')"
75
+ output_dir_default="$repo_root/results/${model_slug}_${output_dir_suffix}"
76
+ output_dir=""
77
+ if [[ $# -gt 0 && "${1:0:2}" != "--" ]]; then
78
+ output_dir="$1"
79
+ shift
80
+ elif [[ -n "${OUTDIR:-}" ]]; then
81
+ output_dir="${OUTDIR}"
82
+ else
83
+ output_dir="${output_dir_default}"
84
+ fi
85
+ if [[ -n "${RUN_NAME:-}" ]]; then
86
+ output_dir="${output_dir}_${RUN_NAME}"
87
+ fi
88
+
89
+ python_args=(
90
+ --model "$model" \
91
+ --dataset "$dataset" \
92
+ --dataset_config "$dataset_config" \
93
+ --target_tokens "$target_tokens" \
94
+ --num_samples "$calib_sequences" \
95
+ --seq_len "$seq_len" \
96
+ --batch_size "$batch_size" \
97
+ --calibration_source "$calibration_source" \
98
+ --distillation_source "$distillation_source" \
99
+ --distill_batch_size "$distill_batch_size" \
100
+ --distill_inst_samples "$distill_inst_samples" \
101
+ --distill_seq_len "$distill_seq_len" \
102
+ --distill_epochs "$distill_epochs" \
103
+ --eval_batch_size "$eval_batch_size" \
104
+ --eval_seq_len "$seq_len" \
105
+ --eval_num_samples "$eval_num_samples" \
106
+ --distill_kl_weight "$distill_kl_weight" \
107
+ --distill_kl_temp "$distill_kl_temp" \
108
+ --distill_hidden_mse_weight "$distill_hidden_mse_weight" \
109
+ --distill_attn_mse_weight "$distill_attn_mse_weight" \
110
+ --distill_mlp_mse_weight "$distill_mlp_mse_weight" \
111
+ --reparam_eta "$reparam_eta" \
112
+ --reparam_gamma "$reparam_gamma" \
113
+ --reparam_attn_reg_scale "$reparam_attn_reg_scale" \
114
+ --reparam_mlp_reg_scale "$reparam_mlp_reg_scale" \
115
+ --reparam_param_subset "$reparam_param_subset" \
116
+ --distill_weight_decay 0.0 \
117
+ --distill_max_grad_norm 1.0 \
118
+ --distill_grad_accum_steps 1 \
119
+ --distill_eval_every 2000 \
120
+ --lora_eval_every 2000 \
121
+ --lora_epochs "$lora_epochs" \
122
+ )
123
+
124
+ if [[ -n "$instruction_dataset" ]]; then
125
+ python_args+=(
126
+ --instruction_dataset "$instruction_dataset" \
127
+ --instruction_config "$instruction_config" \
128
+ --instruction_split "$instruction_split" \
129
+ --instruction_format "$instruction_format" \
130
+ --instruction_field_instruction "$instruction_field_instruction" \
131
+ --instruction_field_input "$instruction_field_input" \
132
+ --instruction_field_output "$instruction_field_output" \
133
+ )
134
+ fi
135
+
136
+ python_args+=("${fisher_args[@]}")
137
+ if [[ -n "$save_full_model_cycles" ]]; then
138
+ python_args+=(--save_full_model_cycles "$save_full_model_cycles")
139
+ fi
140
+
141
+ python_args+=(
142
+ --distill_method reparam \
143
+ --redistrib_teacher_source previous_cycle \
144
+ --comm_enabled \
145
+ --comm_mu_auto \
146
+ --layer auto \
147
+ --exclude_pairs 0,1,-1 \
148
+ --num_progressive "$num_progressive" \
149
+ --output_dir "$output_dir" \
150
+ --dtype "$dtype" \
151
+ )
152
+ if [[ "$comm_skip_post_reselect" == "1" ]]; then
153
+ python_args+=(--comm_skip_post_reselect)
154
+ fi
155
+ if [[ "$head_permute_select" == "0" ]]; then
156
+ python_args+=(--no_head_permute_select)
157
+ fi
158
+ if [[ "$head_permute_merge" == "0" ]]; then
159
+ python_args+=(--no_head_permute_merge)
160
+ fi
161
+ python_args+=("$@")
162
+
163
+ mkdir -p "$output_dir"
164
+ run_args_file="$output_dir/run_args.txt"
165
+ git_commit="unknown"
166
+ if git -C "$repo_root" rev-parse --is-inside-work-tree >/dev/null 2>&1; then
167
+ git_commit=$(git -C "$repo_root" rev-parse HEAD)
168
+ fi
169
+ start_epoch=$(date +%s)
170
+ start_time=$(date --iso-8601=seconds)
171
+ {
172
+ echo "git_commit=$git_commit"
173
+ echo "start_time=$start_time"
174
+ echo "HEAD_PERMUTE=$head_permute"
175
+ echo "HEAD_PERMUTE_SELECT=$head_permute_select"
176
+ echo "HEAD_PERMUTE_MERGE=$head_permute_merge"
177
+ echo "command:"
178
+ printf '%q ' python "$repo_root/src_inst/fuse_layers.py" "${python_args[@]}"
179
+ echo
180
+ } > "$run_args_file"
181
+
182
+ write_run_summary() {
183
+ local exit_code=$?
184
+ local end_epoch end_time elapsed_seconds
185
+ end_epoch=$(date +%s)
186
+ end_time=$(date --iso-8601=seconds)
187
+ elapsed_seconds=$((end_epoch - start_epoch))
188
+ {
189
+ echo "end_time=$end_time"
190
+ echo "elapsed_seconds=$elapsed_seconds"
191
+ echo "exit_code=$exit_code"
192
+ } >> "$run_args_file"
193
+ }
194
+ trap write_run_summary EXIT
195
+
196
+ PYTHONPATH="$repo_root/src_inst:$repo_root${PYTHONPATH:+:$PYTHONPATH}" \
197
+ python "$repo_root/src_inst/fuse_layers.py" "${python_args[@]}"
script/run_abprune_small.sh ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-3}"
5
+
6
+ repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
7
+
8
+ if [[ $# -lt 1 ]]; then
9
+ cat <<'USAGE'
10
+ Usage:
11
+ script/run_abprune.sh <model> [output_dir] [extra fuse_layers args...]
12
+
13
+ Examples:
14
+ script/run_abprune.sh Qwen/Qwen3-1.7B
15
+ script/run_abprune.sh /path/to/model /path/to/output --num_progressive 8
16
+ USAGE
17
+ exit 1
18
+ fi
19
+
20
+ model="$1"
21
+ shift
22
+
23
+ # whole settings
24
+ dataset="${DATASET:-slimpajama}"
25
+ dataset_config="${DATASET_CONFIG:-none}"
26
+ num_progressive="${NUM_PROGRESSIVE:-14}"
27
+ dtype="${DTYPE:-bfloat16}"
28
+ use_pertensor_fisher="${USE_PERTENSOR_FISHER:-0}"
29
+ save_full_model_cycles="${SAVE_FULL_MODEL_CYCLES:-6,11}"
30
+ head_permute="${HEAD_PERMUTE:-1}"
31
+ head_permute_select="${HEAD_PERMUTE_SELECT:-$head_permute}"
32
+ head_permute_merge="${HEAD_PERMUTE_MERGE:-$head_permute}"
33
+
34
+ # calibration dataset
35
+ calib_sequences="${CALIB_SEQUENCES:-128}"
36
+ seq_len="${SEQ_LEN:-512}"
37
+
38
+ # distillation dataset
39
+ distill_seq_len="${DISTILL_SEQ_LEN:-512}"
40
+ target_tokens="${TARGET_TOKENS:-500000}"
41
+ distill_batch_size="${DISTILL_BATCH_SIZE:-1}"
42
+
43
+ # distillation evaluation
44
+ batch_size="${BATCH_SIZE:-1}"
45
+ eval_batch_size="${EVAL_BATCH_SIZE:-1}"
46
+ eval_num_samples="${EVAL_NUM_SAMPLES:-200}"
47
+ lora_epochs="${LORA_EPOCHS:-0}"
48
+ distill_epochs="${DISTILL_EPOCHS:-1.0}"
49
+ distill_kl_weight="${DISTILL_KL_WEIGHT:-0.01}"
50
+ distill_kl_temp="${DISTILL_KL_TEMP:-4.0}"
51
+
52
+
53
+ fisher_args=(--fisher_mode param)
54
+ if [[ "$use_pertensor_fisher" == "1" ]]; then
55
+ fisher_args=(--fisher_mode tensor)
56
+ fi
57
+
58
+ output_dir_suffix="progressive_common_${num_progressive}_nopost_only_last"
59
+ if [[ "$use_pertensor_fisher" == "1" ]]; then
60
+ output_dir_suffix="${output_dir_suffix}_pertensor"
61
+ fi
62
+
63
+ model_slug="$(echo "$model" | tr '/:@' '___' | tr -cs '[:alnum:]_.-' '_' | sed 's/^_\\+//; s/_\\+$//')"
64
+ output_dir_default="$repo_root/results/${model_slug}_${output_dir_suffix}"
65
+ output_dir=""
66
+ if [[ $# -gt 0 && "${1:0:2}" != "--" ]]; then
67
+ output_dir="$1"
68
+ shift
69
+ elif [[ -n "${OUTDIR:-}" ]]; then
70
+ output_dir="${OUTDIR}"
71
+ else
72
+ output_dir="${output_dir_default}"
73
+ fi
74
+ if [[ -n "${RUN_NAME:-}" ]]; then
75
+ output_dir="${output_dir}_${RUN_NAME}"
76
+ fi
77
+
78
+ python_args=(
79
+ --model "$model" \
80
+ --dataset "$dataset" \
81
+ --dataset_config "$dataset_config" \
82
+ --target_tokens "$target_tokens" \
83
+ --num_samples "$calib_sequences" \
84
+ --seq_len "$seq_len" \
85
+ --batch_size "$batch_size" \
86
+ --distill_batch_size "$distill_batch_size" \
87
+ --distill_seq_len "$distill_seq_len" \
88
+ --distill_epochs "$distill_epochs" \
89
+ --eval_batch_size "$eval_batch_size" \
90
+ --eval_seq_len "$seq_len" \
91
+ --eval_num_samples "$eval_num_samples" \
92
+ --distill_kl_weight "$distill_kl_weight" \
93
+ --distill_kl_temp "$distill_kl_temp" \
94
+ --distill_weight_decay 0.0 \
95
+ --distill_max_grad_norm 1.0 \
96
+ --distill_grad_accum_steps 1 \
97
+ --distill_eval_every 2000 \
98
+ --lora_eval_every 2000 \
99
+ --lora_epochs "$lora_epochs" \
100
+ --auto_metric dwce \
101
+ )
102
+
103
+ # --auto_cosine_topk 5
104
+
105
+ python_args+=("${fisher_args[@]}")
106
+ if [[ -n "$save_full_model_cycles" ]]; then
107
+ python_args+=(--save_full_model_cycles "$save_full_model_cycles")
108
+ fi
109
+
110
+ python_args+=(
111
+ --distill_method reparam \
112
+ --redistrib_teacher_source previous_cycle \
113
+ --comm_enabled \
114
+ --comm_mu_auto \
115
+ --layer auto \
116
+ --exclude_pairs 0,1,-1 \
117
+ --num_progressive "$num_progressive" \
118
+ --output_dir "$output_dir" \
119
+ --dtype "$dtype" \
120
+ )
121
+ if [[ "$head_permute_select" == "0" ]]; then
122
+ python_args+=(--no_head_permute_select)
123
+ fi
124
+ if [[ "$head_permute_merge" == "0" ]]; then
125
+ python_args+=(--no_head_permute_merge)
126
+ fi
127
+ python_args+=("$@")
128
+
129
+ mkdir -p "$output_dir"
130
+ run_args_file="$output_dir/run_args.txt"
131
+ git_commit="unknown"
132
+ if git -C "$repo_root" rev-parse --is-inside-work-tree >/dev/null 2>&1; then
133
+ git_commit=$(git -C "$repo_root" rev-parse HEAD)
134
+ fi
135
+ start_epoch=$(date +%s)
136
+ start_time=$(date --iso-8601=seconds)
137
+ {
138
+ echo "git_commit=$git_commit"
139
+ echo "start_time=$start_time"
140
+ echo "HEAD_PERMUTE=$head_permute"
141
+ echo "HEAD_PERMUTE_SELECT=$head_permute_select"
142
+ echo "HEAD_PERMUTE_MERGE=$head_permute_merge"
143
+ echo "command:"
144
+ printf '%q ' python "$repo_root/src/fuse_layers.py" "${python_args[@]}"
145
+ echo
146
+ } > "$run_args_file"
147
+
148
+ write_run_summary() {
149
+ local exit_code=$?
150
+ local end_epoch end_time elapsed_seconds
151
+ end_epoch=$(date +%s)
152
+ end_time=$(date --iso-8601=seconds)
153
+ elapsed_seconds=$((end_epoch - start_epoch))
154
+ {
155
+ echo "end_time=$end_time"
156
+ echo "elapsed_seconds=$elapsed_seconds"
157
+ echo "exit_code=$exit_code"
158
+ } >> "$run_args_file"
159
+ }
160
+ trap write_run_summary EXIT
161
+
162
+ python "$repo_root/src/fuse_layers.py" "${python_args[@]}"
script/run_abprune_smoke.sh ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-1}"
5
+
6
+ repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
7
+
8
+ if [[ $# -lt 1 ]]; then
9
+ cat <<'USAGE'
10
+ Usage:
11
+ script/run_abprune_smoke.sh <model> [output_dir] [extra fuse_layers args...]
12
+
13
+ Examples:
14
+ script/run_abprune_smoke.sh Qwen/Qwen3-1.7B
15
+ script/run_abprune_smoke.sh /path/to/model /path/to/output --num_progressive 3
16
+ USAGE
17
+ exit 1
18
+ fi
19
+
20
+ model="$1"
21
+ shift
22
+
23
+ dataset="${DATASET:-slimpajama}"
24
+ dataset_config="${DATASET_CONFIG:-none}"
25
+ num_progressive="${NUM_PROGRESSIVE:-4}"
26
+ seq_len="${SEQ_LEN:-128}"
27
+ target_tokens="${TARGET_TOKENS:-8192}"
28
+ calib_sequences="${CALIB_SEQUENCES:-8}"
29
+ distill_batch_size="${DISTILL_BATCH_SIZE:-1}"
30
+ eval_batch_size="${EVAL_BATCH_SIZE:-1}"
31
+ eval_num_samples="${EVAL_NUM_SAMPLES:-8}"
32
+ distill_seq_len="${DISTILL_SEQ_LEN:-128}"
33
+ lora_epochs="${LORA_EPOCHS:-0}"
34
+ distill_epochs="${DISTILL_EPOCHS:-0.1}"
35
+ distill_kl_weight="${DISTILL_KL_WEIGHT:-0.01}"
36
+ distill_kl_temp="${DISTILL_KL_TEMP:-4.0}"
37
+ dtype="${DTYPE:-bfloat16}"
38
+ batch_size="${BATCH_SIZE:-1}"
39
+ use_pertensor_fisher="${USE_PERTENSOR_FISHER:-0}"
40
+ save_full_model_cycles="${SAVE_FULL_MODEL_CYCLES:-3}"
41
+
42
+ fisher_args=(--fisher_mode param)
43
+ if [[ "$use_pertensor_fisher" == "1" ]]; then
44
+ fisher_args=(--fisher_mode tensor)
45
+ fi
46
+
47
+ output_dir_suffix="progressive_common_smoke"
48
+ if [[ "$use_pertensor_fisher" == "1" ]]; then
49
+ output_dir_suffix="${output_dir_suffix}_pertensor"
50
+ fi
51
+
52
+ model_slug="$(echo "$model" | tr '/:@' '___' | tr -cs '[:alnum:]_.-' '_' | sed 's/^_\\+//; s/_\\+$//')"
53
+ output_dir_default="$repo_root/results/${model_slug}_${output_dir_suffix}"
54
+ output_dir=""
55
+ if [[ $# -gt 0 && "${1:0:2}" != "--" ]]; then
56
+ output_dir="$1"
57
+ shift
58
+ elif [[ -n "${OUTDIR:-}" ]]; then
59
+ output_dir="${OUTDIR}"
60
+ else
61
+ output_dir="${output_dir_default}"
62
+ fi
63
+ if [[ -n "${RUN_NAME:-}" ]]; then
64
+ output_dir="${output_dir}_${RUN_NAME}"
65
+ fi
66
+
67
+ python_args=(
68
+ --model "$model" \
69
+ --dataset "$dataset" \
70
+ --dataset_config "$dataset_config" \
71
+ --target_tokens "$target_tokens" \
72
+ --num_samples "$calib_sequences" \
73
+ --seq_len "$seq_len" \
74
+ --batch_size "$batch_size" \
75
+ --distill_batch_size "$distill_batch_size" \
76
+ --distill_seq_len "$distill_seq_len" \
77
+ --distill_epochs "$distill_epochs" \
78
+ --eval_batch_size "$eval_batch_size" \
79
+ --eval_seq_len "$seq_len" \
80
+ --eval_num_samples "$eval_num_samples" \
81
+ --distill_kl_weight "$distill_kl_weight" \
82
+ --distill_kl_temp "$distill_kl_temp" \
83
+ --distill_weight_decay 0.0 \
84
+ --distill_max_grad_norm 1.0 \
85
+ --distill_grad_accum_steps 1 \
86
+ --distill_eval_every 0 \
87
+ --lora_eval_every 0 \
88
+ --lora_epochs "$lora_epochs" \
89
+ )
90
+
91
+ python_args+=("${fisher_args[@]}")
92
+ if [[ -n "$save_full_model_cycles" ]]; then
93
+ python_args+=(--save_full_model_cycles "$save_full_model_cycles")
94
+ fi
95
+
96
+ python_args+=(
97
+ --distill_method reparam \
98
+ --redistrib_teacher_source previous_cycle \
99
+ --comm_enabled \
100
+ --comm_mu_auto \
101
+ --layer auto \
102
+ --exclude_pairs -1 \
103
+ --num_progressive "$num_progressive" \
104
+ --output_dir "$output_dir" \
105
+ --dtype "$dtype" \
106
+ )
107
+ python_args+=("$@")
108
+
109
+ mkdir -p "$output_dir"
110
+ run_args_file="$output_dir/run_args.txt"
111
+ start_epoch=$(date +%s)
112
+ start_time=$(date --iso-8601=seconds)
113
+ {
114
+ echo "start_time=$start_time"
115
+ echo "command:"
116
+ printf '%q ' python "$repo_root/src/fuse_layers.py" "${python_args[@]}"
117
+ echo
118
+ } > "$run_args_file"
119
+
120
+ write_run_summary() {
121
+ local exit_code=$?
122
+ local end_epoch end_time elapsed_seconds
123
+ end_epoch=$(date +%s)
124
+ end_time=$(date --iso-8601=seconds)
125
+ elapsed_seconds=$((end_epoch - start_epoch))
126
+ {
127
+ echo "end_time=$end_time"
128
+ echo "elapsed_seconds=$elapsed_seconds"
129
+ echo "exit_code=$exit_code"
130
+ } >> "$run_args_file"
131
+ }
132
+ trap write_run_summary EXIT
133
+
134
+ python "$repo_root/src/fuse_layers.py" "${python_args[@]}"
script/run_eval_ppl.sh ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ export CUDA_VISIBLE_DEVICES=0
5
+
6
+ ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
7
+
8
+ if [[ $# -lt 1 ]]; then
9
+ cat <<'USAGE'
10
+ Usage:
11
+ script/run_eval_ppl.sh <model_path> [output_dir] [extra eval_ppl args...]
12
+
13
+ Examples:
14
+ script/run_eval_ppl.sh /path/to/model
15
+ script/run_eval_ppl.sh /path/to/model /tmp/ppl_out --dataset wikitext2 --max_seq_len 1024 --batch_size 8
16
+ USAGE
17
+ exit 1
18
+ fi
19
+
20
+ MODEL_PATH="$1"
21
+ shift
22
+
23
+ OUTPUT_DIR=""
24
+ if [[ $# -gt 0 && "${1:0:2}" != "--" ]]; then
25
+ OUTPUT_DIR="$1"
26
+ shift
27
+ fi
28
+
29
+ CMD=(
30
+ python "$ROOT_DIR/src/eval_ppl.py"
31
+ --base_model "$MODEL_PATH"
32
+ )
33
+
34
+ if [[ -n "$OUTPUT_DIR" ]]; then
35
+ CMD+=(--output_dir "$OUTPUT_DIR")
36
+ fi
37
+
38
+ # Default to WikiText-2 only unless user explicitly sets --dataset.
39
+ HAS_DATASET_FLAG=0
40
+ for arg in "$@"; do
41
+ if [[ "$arg" == "--dataset" ]]; then
42
+ HAS_DATASET_FLAG=1
43
+ break
44
+ fi
45
+ done
46
+ if [[ "$HAS_DATASET_FLAG" -eq 0 ]]; then
47
+ CMD+=(--dataset wikitext2)
48
+ fi
49
+
50
+ CMD+=("$@")
51
+
52
+ echo "Running: ${CMD[*]}"
53
+ exec "${CMD[@]}"
script/run_eval_zeroshot.sh ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-2}"
5
+
6
+ ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
7
+
8
+ if [[ $# -lt 2 ]]; then
9
+ cat <<'USAGE'
10
+ Usage:
11
+ script/run_eval_zeroshot.sh <model_path> <output_dir> [--mmlu] [extra lm_eval args...]
12
+
13
+ Examples:
14
+ script/run_eval_zeroshot.sh /path/to/model /path/to/output
15
+ script/run_eval_zeroshot.sh /path/to/model /path/to/output --mmlu
16
+ script/run_eval_zeroshot.sh /path/to/model /path/to/output --tasks arc_easy,arc_challenge,hellaswag
17
+ USAGE
18
+ exit 1
19
+ fi
20
+
21
+ MODEL_PATH="$1"
22
+ OUTPUT_DIR="$2"
23
+ shift 2
24
+
25
+ TASKS="${TASKS:-arc_easy,arc_challenge,hellaswag,piqa,winogrande,openbookqa,boolq}"
26
+ DEVICE="${DEVICE:-cuda}"
27
+ BATCH_SIZE="${BATCH_SIZE:-auto}"
28
+ NUM_FEWSHOT="${NUM_FEWSHOT:-0}"
29
+ OUTPUT_FILE="${OUTPUT_FILE:-zeroshot_results.json}"
30
+
31
+ INCLUDE_MMLU=0
32
+ PASSTHROUGH_ARGS=()
33
+ for arg in "$@"; do
34
+ if [[ "$arg" == "--mmlu" ]]; then
35
+ INCLUDE_MMLU=1
36
+ continue
37
+ fi
38
+ PASSTHROUGH_ARGS+=("$arg")
39
+ done
40
+ if [[ "$INCLUDE_MMLU" -eq 1 && ",$TASKS," != *",mmlu,"* ]]; then
41
+ TASKS="${TASKS},mmlu"
42
+ fi
43
+
44
+ mkdir -p "$OUTPUT_DIR"
45
+ RUN_ARGS_FILE="$OUTPUT_DIR/run_zeroshot_args.txt"
46
+ RESOLVED_MODEL_PATH="$MODEL_PATH"
47
+
48
+ git_commit="unknown"
49
+ if git -C "$ROOT_DIR" rev-parse --is-inside-work-tree >/dev/null 2>&1; then
50
+ git_commit=$(git -C "$ROOT_DIR" rev-parse HEAD)
51
+ fi
52
+ start_epoch=$(date +%s)
53
+ start_time=$(date --iso-8601=seconds)
54
+
55
+ LM_EVAL_CMD=(
56
+ lm_eval
57
+ --model hf
58
+ --model_args "pretrained=$RESOLVED_MODEL_PATH"
59
+ --tasks "$TASKS"
60
+ --num_fewshot "$NUM_FEWSHOT"
61
+ --device "$DEVICE"
62
+ --batch_size 32
63
+ --output_path "$OUTPUT_DIR/$OUTPUT_FILE"
64
+ )
65
+ LM_EVAL_CMD+=("${PASSTHROUGH_ARGS[@]}")
66
+
67
+ {
68
+ echo "git_commit=$git_commit"
69
+ echo "start_time=$start_time"
70
+ echo "resolved_model_path=$RESOLVED_MODEL_PATH"
71
+ echo "command:"
72
+ printf '%q ' "${LM_EVAL_CMD[@]}"
73
+ echo
74
+ } > "$RUN_ARGS_FILE"
75
+
76
+ write_run_summary() {
77
+ local exit_code=$?
78
+ local end_epoch end_time elapsed_seconds
79
+ end_epoch=$(date +%s)
80
+ end_time=$(date --iso-8601=seconds)
81
+ elapsed_seconds=$((end_epoch - start_epoch))
82
+ {
83
+ echo "end_time=$end_time"
84
+ echo "elapsed_seconds=$elapsed_seconds"
85
+ echo "exit_code=$exit_code"
86
+ } >> "$RUN_ARGS_FILE"
87
+ }
88
+ trap write_run_summary EXIT
89
+
90
+ echo "Running: ${LM_EVAL_CMD[*]}"
91
+ exec "${LM_EVAL_CMD[@]}"
script/run_laco_llama.sh ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-0}"
5
+
6
+ repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
7
+ workdir="$repo_root/compare_model/LaCo"
8
+
9
+ model_path="${MODEL_PATH:-meta-llama/Llama-2-7b-hf}"
10
+ target_layers="${TARGET_LAYERS:-16}"
11
+ merge_layers="${MERGE_LAYERS:-2}"
12
+ interval="${INTERVAL:-1}"
13
+ lowest_layer="${LOWEST_LAYER:-0}"
14
+ threshold="${THRESHOLD:-0.45}"
15
+ dtype="${DTYPE:-bfloat16}"
16
+ device="${DEVICE:-cuda}"
17
+ max_prompt_length="${MAX_PROMPT_LENGTH:-128}"
18
+ output_dir="${OUTDIR:-$repo_root/results/laco_llama_target_${target_layers}}"
19
+
20
+ python_args=(
21
+ --model_path "$model_path"
22
+ --output_dir "$output_dir"
23
+ --target_layers "$target_layers"
24
+ --merge_layers "$merge_layers"
25
+ --interval "$interval"
26
+ --lowest_layer "$lowest_layer"
27
+ --threshold "$threshold"
28
+ --dtype "$dtype"
29
+ --device "$device"
30
+ --max_prompt_length "$max_prompt_length"
31
+ )
32
+
33
+ if [[ "${TRUST_REMOTE_CODE:-0}" == "1" ]]; then
34
+ python_args+=(--trust_remote_code)
35
+ fi
36
+
37
+ if [[ "${FORCE_TARGET:-1}" == "1" ]]; then
38
+ python_args+=(--force_target)
39
+ else
40
+ python_args+=(--no_force_target)
41
+ fi
42
+
43
+ if [[ -n "${PROMPT_FILE:-}" ]]; then
44
+ python_args+=(--prompt_file "$PROMPT_FILE")
45
+ fi
46
+
47
+ if [[ -n "${SAVE_LAYERS:-}" ]]; then
48
+ # shellcheck disable=SC2206
49
+ save_layers=(${SAVE_LAYERS})
50
+ python_args+=(--save_layers "${save_layers[@]}")
51
+ fi
52
+
53
+ python_args+=("$@")
54
+
55
+ mkdir -p "$output_dir"
56
+ git_commit="unknown"
57
+ if git -C "$repo_root" rev-parse --is-inside-work-tree >/dev/null 2>&1; then
58
+ git_commit=$(git -C "$repo_root" rev-parse HEAD)
59
+ fi
60
+ {
61
+ echo "git_commit=$git_commit"
62
+ echo "command:"
63
+ printf '%q ' python "$repo_root/compare_model/LaCo/laco_llama.py" "${python_args[@]}"
64
+ echo
65
+ } > "$output_dir/run_args.txt"
66
+
67
+ cd "$workdir"
68
+ PYTHONPATH="$repo_root${PYTHONPATH:+:$PYTHONPATH}" \
69
+ python laco_llama.py "${python_args[@]}"
script/run_laco_qwen.sh ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-0}"
5
+
6
+ repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
7
+ workdir="$repo_root/compare_model/LaCo"
8
+
9
+ model_path="${MODEL_PATH:-Qwen/Qwen3-1.7B}"
10
+ target_layers="${TARGET_LAYERS:-14}"
11
+ merge_layers="${MERGE_LAYERS:-2}"
12
+ interval="${INTERVAL:-1}"
13
+ lowest_layer="${LOWEST_LAYER:-0}"
14
+ threshold="${THRESHOLD:-0.45}"
15
+ dtype="${DTYPE:-bfloat16}"
16
+ device="${DEVICE:-cuda}"
17
+ max_prompt_length="${MAX_PROMPT_LENGTH:-128}"
18
+ output_dir="${OUTDIR:-$repo_root/results/laco_qwen_target_${target_layers}}"
19
+
20
+ python_args=(
21
+ --model_path "$model_path"
22
+ --output_dir "$output_dir"
23
+ --target_layers "$target_layers"
24
+ --merge_layers "$merge_layers"
25
+ --interval "$interval"
26
+ --lowest_layer "$lowest_layer"
27
+ --threshold "$threshold"
28
+ --dtype "$dtype"
29
+ --device "$device"
30
+ --max_prompt_length "$max_prompt_length"
31
+ )
32
+
33
+ if [[ "${TRUST_REMOTE_CODE:-1}" == "1" ]]; then
34
+ python_args+=(--trust_remote_code)
35
+ fi
36
+
37
+ if [[ "${FORCE_TARGET:-1}" == "1" ]]; then
38
+ python_args+=(--force_target)
39
+ else
40
+ python_args+=(--no_force_target)
41
+ fi
42
+
43
+ if [[ -n "${PROMPT_FILE:-}" ]]; then
44
+ python_args+=(--prompt_file "$PROMPT_FILE")
45
+ fi
46
+
47
+ if [[ -n "${SAVE_LAYERS:-}" ]]; then
48
+ # shellcheck disable=SC2206
49
+ save_layers=(${SAVE_LAYERS})
50
+ python_args+=(--save_layers "${save_layers[@]}")
51
+ fi
52
+
53
+ python_args+=("$@")
54
+
55
+ mkdir -p "$output_dir"
56
+ git_commit="unknown"
57
+ if git -C "$repo_root" rev-parse --is-inside-work-tree >/dev/null 2>&1; then
58
+ git_commit=$(git -C "$repo_root" rev-parse HEAD)
59
+ fi
60
+ {
61
+ echo "git_commit=$git_commit"
62
+ echo "command:"
63
+ printf '%q ' python "$repo_root/compare_model/LaCo/laco_qwen.py" "${python_args[@]}"
64
+ echo
65
+ } > "$output_dir/run_args.txt"
66
+
67
+ cd "$workdir"
68
+ PYTHONPATH="$repo_root${PYTHONPATH:+:$PYTHONPATH}" \
69
+ python laco_qwen.py "${python_args[@]}"
script/run_llmpruner_llama.sh ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ # meta-llama/Llama-2-7b-hf, meta-llama/Llama-3.1-8B
5
+
6
+ # Prune-only runner for LLM-Pruner on Llama-family checkpoints.
7
+ export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-2}"
8
+
9
+ repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
10
+ workdir="$repo_root/compare_model/LLM-Pruner"
11
+
12
+ base_model="${BASE_MODEL:-meta-llama/Llama-2-7b-hf}"
13
+ prune_ckpt_path="${PRUNE_CKPT_PATH:-llama2_7b_prune}"
14
+ pruning_ratio="${PRUNING_RATIO:-0.25}"
15
+ block_mlp_layer_start="${BLOCK_MLP_LAYER_START:-4}"
16
+ block_mlp_layer_end="${BLOCK_MLP_LAYER_END:-30}"
17
+ block_attention_layer_start="${BLOCK_ATTENTION_LAYER_START:-4}"
18
+ block_attention_layer_end="${BLOCK_ATTENTION_LAYER_END:-30}"
19
+ pruner_type="${PRUNER_TYPE:-taylor}"
20
+ taylor_mode="${TAYLOR_MODE:-param_first}"
21
+ device="${DEVICE:-cpu}"
22
+ eval_device="${EVAL_DEVICE:-cuda}"
23
+
24
+ default_script="hf_prune.py"
25
+ skip_eval_flag="--skip_post_eval"
26
+ if [[ "$base_model" == *"Llama-3"* ]] || [[ "$base_model" == *"Llama-3."* ]] || [[ "$base_model" == *"llama-3"* ]]; then
27
+ default_script="llama3.py"
28
+ skip_eval_flag="--skip_eval_after_prune"
29
+ fi
30
+ script_name="${PRUNE_SCRIPT:-$default_script}"
31
+
32
+ output_dir="${OUTDIR:-$workdir/prune_log/$prune_ckpt_path}"
33
+
34
+ python_args=(
35
+ --base_model "$base_model"
36
+ --pruning_ratio "$pruning_ratio"
37
+ --block_wise
38
+ --block_mlp_layer_start "$block_mlp_layer_start"
39
+ --block_mlp_layer_end "$block_mlp_layer_end"
40
+ --block_attention_layer_start "$block_attention_layer_start"
41
+ --block_attention_layer_end "$block_attention_layer_end"
42
+ --pruner_type "$pruner_type"
43
+ --taylor "$taylor_mode"
44
+ --device "$device"
45
+ --eval_device "$eval_device"
46
+ --save_ckpt_log_name "$prune_ckpt_path"
47
+ --save_model
48
+ "$skip_eval_flag"
49
+ )
50
+ python_args+=("$@")
51
+
52
+ mkdir -p "$output_dir"
53
+ git_commit="unknown"
54
+ if git -C "$repo_root" rev-parse --is-inside-work-tree >/dev/null 2>&1; then
55
+ git_commit=$(git -C "$repo_root" rev-parse HEAD)
56
+ fi
57
+ {
58
+ echo "git_commit=$git_commit"
59
+ echo "command:"
60
+ printf '%q ' python "$repo_root/compare_model/LLM-Pruner/$script_name" "${python_args[@]}"
61
+ echo
62
+ } > "$output_dir/run_args.txt"
63
+
64
+ cd "$workdir"
65
+ PYTHONPATH="$workdir:$repo_root${PYTHONPATH:+:$PYTHONPATH}" \
66
+ python "$script_name" "${python_args[@]}"
script/run_llmpruner_qwen.sh ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ # Prune-only runner for LLM-Pruner on Qwen-family checkpoints.
5
+ export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-0}"
6
+
7
+ repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
8
+ workdir="$repo_root/compare_model/LLM-Pruner"
9
+
10
+ base_model="${BASE_MODEL:-Qwen/Qwen3-1.7B}"
11
+ prune_ckpt_path="${PRUNE_CKPT_PATH:-qwen3_1_7b_prune}"
12
+ pruning_ratio="${PRUNING_RATIO:-0.25}"
13
+ block_mlp_layer_start="${BLOCK_MLP_LAYER_START:-4}"
14
+ block_mlp_layer_end="${BLOCK_MLP_LAYER_END:-24}"
15
+ block_attention_layer_start="${BLOCK_ATTENTION_LAYER_START:-4}"
16
+ block_attention_layer_end="${BLOCK_ATTENTION_LAYER_END:-24}"
17
+ block_attention_roots="${BLOCK_ATTENTION_ROOTS:-q_proj,k_proj}"
18
+ block_mlp_roots="${BLOCK_MLP_ROOTS:-gate_proj,up_proj}"
19
+ pruner_type="${PRUNER_TYPE:-taylor}"
20
+ taylor_mode="${TAYLOR_MODE:-param_first}"
21
+ device="${DEVICE:-cuda}"
22
+ eval_device="${EVAL_DEVICE:-cuda}"
23
+ script_name="${PRUNE_SCRIPT:-llama3.py}"
24
+
25
+ output_dir="${OUTDIR:-$workdir/prune_log/$prune_ckpt_path}"
26
+
27
+ python_args=(
28
+ --base_model "$base_model"
29
+ --pruning_ratio "$pruning_ratio"
30
+ --block_wise
31
+ --block_attention_roots "$block_attention_roots"
32
+ --block_mlp_roots "$block_mlp_roots"
33
+ --block_mlp_layer_start "$block_mlp_layer_start"
34
+ --block_mlp_layer_end "$block_mlp_layer_end"
35
+ --block_attention_layer_start "$block_attention_layer_start"
36
+ --block_attention_layer_end "$block_attention_layer_end"
37
+ --pruner_type "$pruner_type"
38
+ --taylor "$taylor_mode"
39
+ --test_after_train
40
+ --device "$device"
41
+ --eval_device "$eval_device"
42
+ --save_ckpt_log_name "$prune_ckpt_path"
43
+ --save_model
44
+ )
45
+ python_args+=("$@")
46
+
47
+ mkdir -p "$output_dir"
48
+ git_commit="unknown"
49
+ if git -C "$repo_root" rev-parse --is-inside-work-tree >/dev/null 2>&1; then
50
+ git_commit=$(git -C "$repo_root" rev-parse HEAD)
51
+ fi
52
+ {
53
+ echo "git_commit=$git_commit"
54
+ echo "command:"
55
+ printf '%q ' python "$repo_root/compare_model/LLM-Pruner/$script_name" "${python_args[@]}"
56
+ echo
57
+ } > "$output_dir/run_args.txt"
58
+
59
+ cd "$workdir"
60
+ PYTHONPATH="$workdir:$repo_root${PYTHONPATH:+:$PYTHONPATH}" \
61
+ python "$script_name" "${python_args[@]}"
script/run_llmpruner_whole.sh ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
5
+ runner="$repo_root/script/run_llmpruner_llama.sh"
6
+
7
+ base_model="${BASE_MODEL:-meta-llama/Llama-3.1-8B}"
8
+ device="${DEVICE:-cpu}"
9
+ eval_device="${EVAL_DEVICE:-cuda}"
10
+ num_examples="${NUM_EXAMPLES:-10}"
11
+ model_tag="$(basename "$base_model" | tr '[:upper:]' '[:lower:]' | tr -c 'a-z0-9' '_')"
12
+
13
+ run_case() {
14
+ local label="$1"
15
+ local ratio="$2"
16
+ shift 2
17
+
18
+ echo "[LLM-Pruner] ${label}: PRUNING_RATIO=${ratio}"
19
+ BASE_MODEL="$base_model" \
20
+ PRUNE_CKPT_PATH="${model_tag}_${label}" \
21
+ PRUNING_RATIO="$ratio" \
22
+ DEVICE="$device" \
23
+ EVAL_DEVICE="$eval_device" \
24
+ bash "$runner" --num_examples "$num_examples" "$@"
25
+ }
26
+
27
+ # Equivalent block-only pruning scales for dropping layers from a 32-layer Llama-2 7B.
28
+ run_case "drop6eq" "0.23"
29
+ run_case "drop11eq" "0.45"
30
+ run_case "drop16eq" "0.70" # Ratio = 54.3965%
31
+
32
+
33
+
34
+ # for llama2 7b
35
+ # run_case "drop6eq" "0.23"
36
+ # run_case "drop11eq" "0.42"
37
+ # run_case "drop16eq" "0.62"
38
+
39
+ # for llama3 8b
40
+ # run_case "drop6eq" "0.23"
41
+ # run_case "drop11eq" "0.45"
42
+ # run_case "drop16eq" "0.70"
43
+
44
+
45
+
46
+
47
+
48
+ # "0.23" 86.1997%
49
+ # "0.48" 68.1928%
50
+ # "0.51" # 68.1928%
51
+ # "0.70" # 56.6762%
52
+
53
+
54
+ # llama 7b depth
55
+ # 18.02%
56
+ # 33.04%
57
+ # 48.05%
58
+
59
+ # llama 8b depth
60
+ # 16.30%, 86.1997%
61
+ # 29.88%, 72.2934%
62
+ # 43.46%, 56.6762%
script/run_llmstreamline_llama.sh ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-0}"
5
+
6
+ # meta-llama/Llama-3.2-3B
7
+ # meta-llama/Llama-2-7b-hf
8
+
9
+ repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
10
+ workdir="$repo_root/compare_model/LLM-Streamline"
11
+
12
+ pruned_blocks="${COMMON_PRUNED_BLOCKS:-14}"
13
+ layer_intervals="${LAYER_INTERVALS:-$pruned_blocks}"
14
+ output_dir="${OUTDIR:-$repo_root/results/llama2_7b_streamline_mse_common_${layer_intervals}}"
15
+ model_name="${MODEL_NAME:-meta-llama/Llama-3.2-3B}"
16
+ cosine_num_data="${COSINE_NUM_DATA:-300}"
17
+ train_num_data="${TRAIN_NUM_DATA:-5000}"
18
+ epochs="${EPOCHS:-15}"
19
+ batch_size="${BATCH_SIZE:-8}"
20
+ train_batch_size="${TRAIN_BATCH_SIZE:-$batch_size}"
21
+ grad_accum="${GRAD_ACCUM:-16}"
22
+ lr="${LR:-1e-5}"
23
+ min_lr="${MIN_LR:-5e-5}"
24
+ wd="${WD:-1e-3}"
25
+ dtype="${DTYPE:-bfloat16}"
26
+
27
+ python_args=(
28
+ --model_name "$model_name"
29
+ --output_dir "$output_dir"
30
+ --layer_intervals "$layer_intervals"
31
+ --cosine_num_data "$cosine_num_data"
32
+ --train_num_data "$train_num_data"
33
+ --epoches "$epochs"
34
+ --batch_size "$batch_size"
35
+ --train_batch_size "$train_batch_size"
36
+ --dtype "$dtype"
37
+ --gradient_accumulation_step "$grad_accum"
38
+ --lr "$lr"
39
+ --min_lr "$min_lr"
40
+ --wd "$wd"
41
+ )
42
+ python_args+=("$@")
43
+
44
+ mkdir -p "$output_dir"
45
+ git_commit="unknown"
46
+ if git -C "$repo_root" rev-parse --is-inside-work-tree >/dev/null 2>&1; then
47
+ git_commit=$(git -C "$repo_root" rev-parse HEAD)
48
+ fi
49
+ {
50
+ echo "git_commit=$git_commit"
51
+ echo "command:"
52
+ printf '%q ' python "$repo_root/compare_model/LLM-Streamline/mseloss_entry.py" "${python_args[@]}"
53
+ echo
54
+ } > "$output_dir/run_args.txt"
55
+
56
+ cd "$workdir"
57
+ PYTHONPATH="$workdir:$repo_root${PYTHONPATH:+:$PYTHONPATH}" \
58
+ python mseloss_entry.py "${python_args[@]}"
script/run_llmstreamline_qwen.sh ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-3}"
5
+
6
+ repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
7
+ workdir="$repo_root/compare_model/LLM-Streamline"
8
+
9
+ pruned_blocks="${COMMON_PRUNED_BLOCKS:-14}"
10
+ layer_intervals="${LAYER_INTERVALS:-$pruned_blocks}"
11
+ output_dir="${OUTDIR:-$repo_root/results/qwen3_1_7b_streamline_mse_common_${layer_intervals}}"
12
+ model_name="${MODEL_NAME:-Qwen/Qwen3-1.7B}"
13
+ cosine_num_data="${COSINE_NUM_DATA:-300}"
14
+ train_num_data="${TRAIN_NUM_DATA:-5000}"
15
+ epochs="${EPOCHS:-15}"
16
+ batch_size="${BATCH_SIZE:-8}"
17
+ train_batch_size="${TRAIN_BATCH_SIZE:-$batch_size}"
18
+ grad_accum="${GRAD_ACCUM:-16}"
19
+ lr="${LR:-1e-5}"
20
+ min_lr="${MIN_LR:-5e-5}"
21
+ wd="${WD:-1e-3}"
22
+
23
+ python_args=(
24
+ --model_name "$model_name"
25
+ --output_dir "$output_dir"
26
+ --layer_intervals "$layer_intervals"
27
+ --cosine_num_data "$cosine_num_data"
28
+ --train_num_data "$train_num_data"
29
+ --epoches "$epochs"
30
+ --batch_size "$batch_size"
31
+ --train_batch_size "$train_batch_size"
32
+ --gradient_accumulation_step "$grad_accum"
33
+ --lr "$lr"
34
+ --min_lr "$min_lr"
35
+ --wd "$wd"
36
+ )
37
+ python_args+=("$@")
38
+
39
+ mkdir -p "$output_dir"
40
+ git_commit="unknown"
41
+ if git -C "$repo_root" rev-parse --is-inside-work-tree >/dev/null 2>&1; then
42
+ git_commit=$(git -C "$repo_root" rev-parse HEAD)
43
+ fi
44
+ {
45
+ echo "git_commit=$git_commit"
46
+ echo "command:"
47
+ printf '%q ' python "$repo_root/compare_model/LLM-Streamline/qwen_mseloss_entry.py" "${python_args[@]}"
48
+ echo
49
+ } > "$output_dir/run_args.txt"
50
+
51
+ cd "$workdir"
52
+ PYTHONPATH="$workdir:$repo_root${PYTHONPATH:+:$PYTHONPATH}" \
53
+ python qwen_mseloss_entry.py "${python_args[@]}"
script/run_replaceme_llama.sh ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-1}"
5
+
6
+ repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
7
+ workdir="$repo_root/compare_model/ReplaceMe"
8
+ default_config="$workdir/examples/llama2_7b_replaceme_lstsq_skip16_common.yaml"
9
+ config_path="${CONFIG_PATH:-$default_config}"
10
+ target_tokens="${COMMON_TARGET_TOKENS_OVERRIDE:-}"
11
+ passthrough_args=()
12
+
13
+ while [[ $# -gt 0 ]]; do
14
+ case "$1" in
15
+ --target_tokens)
16
+ if [[ $# -lt 2 ]]; then
17
+ echo "error: --target_tokens requires a value" >&2
18
+ exit 1
19
+ fi
20
+ target_tokens="$2"
21
+ shift 2
22
+ ;;
23
+ --target_tokens=*)
24
+ target_tokens="${1#*=}"
25
+ shift
26
+ ;;
27
+ *)
28
+ passthrough_args+=("$1")
29
+ shift
30
+ ;;
31
+ esac
32
+ done
33
+
34
+ mkdir -p "$repo_root/results/llama_7b_replaceme_common_16"
35
+ git_commit="unknown"
36
+ if git -C "$repo_root" rev-parse --is-inside-work-tree >/dev/null 2>&1; then
37
+ git_commit=$(git -C "$repo_root" rev-parse HEAD)
38
+ fi
39
+ {
40
+ echo "git_commit=$git_commit"
41
+ echo "config_path=$config_path"
42
+ echo "COMMON_TARGET_TOKENS_OVERRIDE=${target_tokens:-4500000}"
43
+ echo "command:"
44
+ printf '%q ' python "$repo_root/compare_model/ReplaceMe/run_replaceme.py" --config "$config_path"
45
+ if [[ -n "$target_tokens" ]]; then
46
+ printf '%q ' "# COMMON_TARGET_TOKENS_OVERRIDE=$target_tokens"
47
+ fi
48
+ if [[ ${#passthrough_args[@]} -gt 0 ]]; then
49
+ printf '%q ' "${passthrough_args[@]}"
50
+ fi
51
+ echo
52
+ } > "$repo_root/results/llama_7b_replaceme_common_16/run_args.txt"
53
+
54
+ cd "$workdir"
55
+ COMMON_TARGET_TOKENS_OVERRIDE="${target_tokens:-${COMMON_TARGET_TOKENS_OVERRIDE:-}}" \
56
+ PYTHONPATH="$workdir:$repo_root${PYTHONPATH:+:$PYTHONPATH}" \
57
+ python run_replaceme.py --config "$config_path" "${passthrough_args[@]}"
script/run_replaceme_qwen.sh ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-3}"
5
+
6
+ repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
7
+ workdir="$repo_root/compare_model/ReplaceMe"
8
+ default_config="$workdir/examples/qwen3_1.7b_replaceme_lstsq_skip14_common.yaml"
9
+ config_path="${CONFIG_PATH:-$default_config}"
10
+
11
+ mkdir -p "$repo_root/results/llama_7b_replaceme_common_14"
12
+ git_commit="unknown"
13
+ if git -C "$repo_root" rev-parse --is-inside-work-tree >/dev/null 2>&1; then
14
+ git_commit=$(git -C "$repo_root" rev-parse HEAD)
15
+ fi
16
+ {
17
+ echo "git_commit=$git_commit"
18
+ echo "config_path=$config_path"
19
+ echo "command:"
20
+ printf '%q ' python "$repo_root/compare_model/ReplaceMe/run_replaceme_qwen.py" --config "$config_path"
21
+ echo
22
+ } > "$repo_root/results/llama_7b_replaceme_common_14/run_args.txt"
23
+
24
+ cd "$workdir"
25
+ PYTHONPATH="$workdir:$repo_root${PYTHONPATH:+:$PYTHONPATH}" \
26
+ python run_replaceme_qwen.py --config "$config_path" "$@"
script/run_uidl_llama.sh ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
5
+
6
+ export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-0}"
7
+ export MODEL="${MODEL:-meta-llama/Llama-2-7b-hf}"
8
+ export OUTPUT_ROOT="${OUTPUT_ROOT:-$repo_root/results/uidl_llama}"
9
+ export SIM_DATASET="${SIM_DATASET:-slimpajama}"
10
+ export SIM_DATASET_CONFIG="${SIM_DATASET_CONFIG:-none}"
11
+
12
+ "$repo_root/compare_model/UIDL/run_uidl_prune.sh" "$@"
script/run_uidl_qwen.sh ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ repo_root="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
5
+
6
+ export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-0}"
7
+ export MODEL="${MODEL:-Qwen/Qwen3-1.7B}"
8
+ export OUTPUT_ROOT="${OUTPUT_ROOT:-$repo_root/results/uidl_qwen}"
9
+ export SIM_DATASET="${SIM_DATASET:-slimpajama}"
10
+ export SIM_DATASET_CONFIG="${SIM_DATASET_CONFIG:-none}"
11
+
12
+ "$repo_root/compare_model/UIDL/run_uidl_prune.sh" "$@"