# merge-lora CUDA_VISIBLE_DEVICES=0 swift export \ --adapters swift/test_bert \ --output_dir output/swift_test_bert_merged \ --merge_lora true # gptq quantize CUDA_VISIBLE_DEVICES=0 swift export \ --model output/swift_test_bert_merged \ --load_data_args true \ --output_dir output/swift_test_bert_gptq_int4 \ --quant_bits 4 \ --quant_method gptq \ --max_length 512 # infer CUDA_VISIBLE_DEVICES=0 swift infer \ --model output/swift_test_bert_gptq_int4