File size: 493 Bytes
cb2428f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
# merge-lora
CUDA_VISIBLE_DEVICES=0 swift export \
--adapters swift/test_bert \
--output_dir output/swift_test_bert_merged \
--merge_lora true
# gptq quantize
CUDA_VISIBLE_DEVICES=0 swift export \
--model output/swift_test_bert_merged \
--load_data_args true \
--output_dir output/swift_test_bert_gptq_int4 \
--quant_bits 4 \
--quant_method gptq \
--max_length 512
# infer
CUDA_VISIBLE_DEVICES=0 swift infer \
--model output/swift_test_bert_gptq_int4
|