radna commited on
Commit
bdd08f1
·
verified ·
1 Parent(s): c762bf4

Create awq_quant.py

Browse files
Files changed (1) hide show
  1. awq_quant.py +48 -0
awq_quant.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from awq import AutoAWQForCausalLM
2
+ from transformers import AutoTokenizer
3
+
4
+ # take in a model path and quantization args
5
+ import argparse
6
+
7
+ parser = argparse.ArgumentParser()
8
+ parser.add_argument(
9
+ "--model_path", type=str, default="deepseek-ai/DeepSeek-R1-Distill-Qwen-14B"
10
+ )
11
+ parser.add_argument("--quant_path", type=str, default="r1-14b-awq-max-ptb")
12
+ args = parser.parse_args()
13
+
14
+ model_path = args.model_path
15
+ quant_path = args.quant_path
16
+ quant_config = {
17
+ "zero_point": True,
18
+ "q_group_size": 128,
19
+ "w_bit": 4,
20
+ "version": "GEMM",
21
+ }
22
+
23
+
24
+ # Load model
25
+ model = AutoAWQForCausalLM.from_pretrained(model_path)
26
+ tokenizer = AutoTokenizer.from_pretrained(
27
+ model_path,
28
+ trust_remote_code=True,
29
+ )
30
+
31
+
32
+ # Quantize
33
+ model.quantize(
34
+ tokenizer,
35
+ quant_config=quant_config,
36
+ # calib_data="neuralmagic/LLM_compression_calibration",
37
+ # calib_data=get_long_dataset(),
38
+ # calib_data="ptb",
39
+ # max_calib_samples=128,
40
+ # max_calib_seq_len=12288,
41
+ # n_parallel_calib_samples=128,
42
+ )
43
+
44
+ # Save quantized model
45
+ model.save_quantized(quant_path)
46
+ tokenizer.save_pretrained(quant_path)
47
+
48
+ print(f'Model is quantized and saved at "{quant_path}"')