File size: 1,151 Bytes
bdd08f1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 |
from awq import AutoAWQForCausalLM
from transformers import AutoTokenizer
# take in a model path and quantization args
import argparse
parser = argparse.ArgumentParser()
parser.add_argument(
"--model_path", type=str, default="deepseek-ai/DeepSeek-R1-Distill-Qwen-14B"
)
parser.add_argument("--quant_path", type=str, default="r1-14b-awq-max-ptb")
args = parser.parse_args()
model_path = args.model_path
quant_path = args.quant_path
quant_config = {
"zero_point": True,
"q_group_size": 128,
"w_bit": 4,
"version": "GEMM",
}
# Load model
model = AutoAWQForCausalLM.from_pretrained(model_path)
tokenizer = AutoTokenizer.from_pretrained(
model_path,
trust_remote_code=True,
)
# Quantize
model.quantize(
tokenizer,
quant_config=quant_config,
# calib_data="neuralmagic/LLM_compression_calibration",
# calib_data=get_long_dataset(),
# calib_data="ptb",
# max_calib_samples=128,
# max_calib_seq_len=12288,
# n_parallel_calib_samples=128,
)
# Save quantized model
model.save_quantized(quant_path)
tokenizer.save_pretrained(quant_path)
print(f'Model is quantized and saved at "{quant_path}"')
|