|
|
from awq import AutoAWQForCausalLM |
|
|
from transformers import AutoTokenizer |
|
|
|
|
|
|
|
|
import argparse |
|
|
|
|
|
parser = argparse.ArgumentParser() |
|
|
parser.add_argument( |
|
|
"--model_path", type=str, default="deepseek-ai/DeepSeek-R1-Distill-Qwen-14B" |
|
|
) |
|
|
parser.add_argument("--quant_path", type=str, default="r1-14b-awq-max-ptb") |
|
|
args = parser.parse_args() |
|
|
|
|
|
model_path = args.model_path |
|
|
quant_path = args.quant_path |
|
|
quant_config = { |
|
|
"zero_point": True, |
|
|
"q_group_size": 128, |
|
|
"w_bit": 4, |
|
|
"version": "GEMM", |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
model = AutoAWQForCausalLM.from_pretrained(model_path) |
|
|
tokenizer = AutoTokenizer.from_pretrained( |
|
|
model_path, |
|
|
trust_remote_code=True, |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
model.quantize( |
|
|
tokenizer, |
|
|
quant_config=quant_config, |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
) |
|
|
|
|
|
|
|
|
model.save_quantized(quant_path) |
|
|
tokenizer.save_pretrained(quant_path) |
|
|
|
|
|
print(f'Model is quantized and saved at "{quant_path}"') |
|
|
|