#!/usr/bin/env python # Run after: pip install autoawq from awq import AutoAWQForCausalLM from transformers import AutoTokenizer import torch model = AutoAWQForCausalLM.from_pretrained( "./outputs_phi3/phi3_4k__fft__instruct__masked/merged", torch_dtype=torch.float16, device_map="auto" ) tok = AutoTokenizer.from_pretrained("./outputs_phi3/phi3_4k__fft__instruct__masked/merged") model.quantize(tok, quant_config={ "zero_point": True, "q_group_size": 128, "w_bit": 4, "version": "GEMM" }) model.save_quantized("./outputs_phi3/phi3_4k__fft__instruct__masked/awq") tok.save_pretrained("./outputs_phi3/phi3_4k__fft__instruct__masked/awq") print("AWQ export complete → ./outputs_phi3/phi3_4k__fft__instruct__masked/awq")