Cialtion commited on
Commit
aa7584f
·
verified ·
1 Parent(s): 2eddab0

Delete batch_quantize_w4a16.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. batch_quantize_w4a16.py +0 -175
batch_quantize_w4a16.py DELETED
@@ -1,175 +0,0 @@
1
- import os
2
- import torch
3
- from llmcompressor import oneshot
4
- from llmcompressor.modifiers.awq import AWQModifier, AWQMapping
5
- from datasets import Dataset
6
-
7
- # ============================================================
8
- # 配置区(批量量化,仅需修改此处全局配置,无需逐个调整模型)
9
- # ============================================================
10
- ROOT_MODEL_DIR = "./"
11
- QUANT_SUFFIX = "_awq_w4a16"
12
-
13
- # 校准数据
14
- CALIB_DATA = [
15
- """You are a helpful assistant.
16
- User: 帮我写一份关于全球气候变化的报告大纲。
17
- Assistant: 当然,这是一个关于全球气候变化报告的大纲建议:
18
- I. 引言
19
- A. 什么是全球气候变化
20
- B. 报告的目的和范围
21
- II. 气候变化的原因
22
- A. 自然原因(太阳活动、火山喷发)
23
- B. 人为原因(温室气体排放、土地利用变化)
24
- """
25
- ]
26
-
27
- # AWQ 映射表(适配Qwen系列模型)
28
- LLAMA_MAPPINGS = [
29
- AWQMapping(
30
- "re:.*input_layernorm",
31
- ["re:.*q_proj", "re:.*k_proj", "re:.*v_proj"],
32
- ),
33
- AWQMapping("re:.*v_proj", ["re:.*o_proj"]),
34
- AWQMapping(
35
- "re:.*post_attention_layernorm",
36
- ["re:.*gate_proj", "re:.*up_proj"],
37
- ),
38
- AWQMapping(
39
- "re:.*up_proj",
40
- ["re:.*down_proj"],
41
- ),
42
- ]
43
-
44
- # ============================================================
45
- # 工具函数:获取所有待量化的sft_qwenxxx模型目录
46
- # ============================================================
47
- def get_target_model_dirs():
48
- """
49
- 遍历ROOT_MODEL_DIR,筛选出所有sft_qwen开头的目录(待量化模型)
50
- 排除已经量化过的模型(包含_awq_的目录)
51
- """
52
- target_dirs = []
53
- skipped_dirs = []
54
-
55
- for item in os.listdir(ROOT_MODEL_DIR):
56
- item_path = os.path.abspath(os.path.join(ROOT_MODEL_DIR, item))
57
-
58
- # 筛选条件:是目录 + 以sft_qwen开头
59
- if os.path.isdir(item_path) and item.startswith("sft_qwen"):
60
- # 【修复】排除已经量化过的模型(包含_awq_的目录)
61
- if "_awq_" in item:
62
- skipped_dirs.append(item)
63
- print(f"[跳过已量化模型] {item}")
64
- else:
65
- target_dirs.append(item)
66
- print(f"[发现待量化模型] {item}")
67
-
68
- if skipped_dirs:
69
- print(f"\n⏭️ 跳过 {len(skipped_dirs)} 个已量化模型")
70
-
71
- if not target_dirs:
72
- print("⚠️ 未发现任何待量化的sft_qwen模型目录")
73
- else:
74
- print(f"\n✅ 共发现 {len(target_dirs)} 个待量化模型\n")
75
-
76
- return target_dirs
77
-
78
- # ============================================================
79
- # 核心函数:单个模型量化
80
- # ============================================================
81
- def quantize_single_model(model_name):
82
- """
83
- 量化单个模型
84
- :param model_name: 模型目录名(如sft_qwen3_4b)
85
- """
86
- MODEL_PATH = os.path.join(ROOT_MODEL_DIR, model_name)
87
- QUANT_PATH = os.path.join(ROOT_MODEL_DIR, f"{model_name}{QUANT_SUFFIX}")
88
-
89
- print(f"\n" + "="*100)
90
- print(f"开始量化模型: {model_name}")
91
- print(f"模型输入路径: {MODEL_PATH}")
92
- print(f"量化输出路径: {QUANT_PATH}")
93
- print("="*100 + "\n")
94
-
95
- if not torch.cuda.is_available():
96
- print("❌ 错误:此过程需要GPU支持,无法继续量化")
97
- return False
98
-
99
- try:
100
- calib_dataset = Dataset.from_dict({"text": CALIB_DATA})
101
- except Exception as e:
102
- print(f"❌ 构建校准数据集失败,错误:{e}")
103
- return False
104
-
105
- # 每次量化创建全新的AWQModifier实例
106
- recipe = [
107
- AWQModifier(
108
- scheme="W4A16_ASYM",
109
- mappings=LLAMA_MAPPINGS,
110
- ignore=["lm_head"],
111
- targets=["Linear"]
112
- ),
113
- ]
114
-
115
- try:
116
- print("[步骤 1/2] 正在执行AWQ W4A16 oneshot量化...")
117
- print(" 此过程会进行权重缩放和低比特量化,耗时较长,请耐心等待...")
118
-
119
- # 【修复】移除不支持的 tokenizer_kwargs 参数
120
- oneshot(
121
- model=MODEL_PATH,
122
- dataset=calib_dataset,
123
- recipe=recipe,
124
- output_dir=QUANT_PATH,
125
- num_calibration_samples=len(CALIB_DATA),
126
- max_seq_length=4096,
127
- )
128
-
129
- print("\n[步骤 2/2] oneshot量化完成!")
130
-
131
- except Exception as e:
132
- print(f"\n❌ 量化模型 {model_name} 过程中发生错误")
133
- print(f"错误详情: {e}")
134
- import traceback
135
- traceback.print_exc()
136
- return False
137
- finally:
138
- if torch.cuda.is_available():
139
- torch.cuda.empty_cache()
140
- torch.cuda.synchronize()
141
-
142
- print("\n" + "="*80)
143
- print(f"🎉 模型 {model_name} 量化成功!")
144
- print(f"4-bit AWQ模型已保存到: {QUANT_PATH}")
145
- print("="*80 + "\n")
146
- return True
147
-
148
- # ============================================================
149
- # 主函数:批量执行所��模型量化
150
- # ============================================================
151
- def run_batch_quantization():
152
- print("🚀 启动Qwen系列模型批量W4A16量化任务")
153
- print(f"工作目录: {os.path.abspath(ROOT_MODEL_DIR)}\n")
154
-
155
- target_models = get_target_model_dirs()
156
- if not target_models:
157
- return
158
-
159
- success_count = 0
160
- fail_count = 0
161
- for idx, model_name in enumerate(target_models, 1):
162
- print(f"\n========== 批量量化 {idx}/{len(target_models)} ==========")
163
- if quantize_single_model(model_name):
164
- success_count += 1
165
- else:
166
- fail_count += 1
167
-
168
- print("\n" + "="*100)
169
- print("📊 批量量化任务全部结束")
170
- print(f"✅ 成功量化:{success_count} 个模型")
171
- print(f"❌ 量化失败:{fail_count} 个模型")
172
- print("="*100)
173
-
174
- if __name__ == "__main__":
175
- run_batch_quantization()