radna commited on
Commit
254e136
·
verified ·
1 Parent(s): bdd08f1

Delete awq_max.py

Browse files
Files changed (1) hide show
  1. awq_max.py +0 -48
awq_max.py DELETED
@@ -1,48 +0,0 @@
1
- from awq import AutoAWQForCausalLM
2
- from transformers import AutoTokenizer
3
-
4
- # take in a model path and quantization args
5
- import argparse
6
-
7
- parser = argparse.ArgumentParser()
8
- parser.add_argument(
9
- "--model_path", type=str, default="deepseek-ai/DeepSeek-R1-Distill-Qwen-14B"
10
- )
11
- parser.add_argument("--quant_path", type=str, default="r1-14b-awq-max-ptb")
12
- args = parser.parse_args()
13
-
14
- model_path = args.model_path
15
- quant_path = args.quant_path
16
- quant_config = {
17
- "zero_point": True,
18
- "q_group_size": 128,
19
- "w_bit": 4,
20
- "version": "GEMM",
21
- }
22
-
23
-
24
- # Load model
25
- model = AutoAWQForCausalLM.from_pretrained(model_path)
26
- tokenizer = AutoTokenizer.from_pretrained(
27
- model_path,
28
- trust_remote_code=True,
29
- )
30
-
31
-
32
- # Quantize
33
- model.quantize(
34
- tokenizer,
35
- quant_config=quant_config,
36
- # calib_data="neuralmagic/LLM_compression_calibration",
37
- # calib_data=get_long_dataset(),
38
- # calib_data="ptb",
39
- # max_calib_samples=128,
40
- max_calib_seq_len=12288,
41
- # n_parallel_calib_samples=128,
42
- )
43
-
44
- # Save quantized model
45
- model.save_quantized(quant_path)
46
- tokenizer.save_pretrained(quant_path)
47
-
48
- print(f'Model is quantized and saved at "{quant_path}"')