| import os |
| import argparse |
| |
| |
| |
| |
| parser = argparse.ArgumentParser() |
| parser.add_argument("--model_path", type=str, required=True, |
| help="Path to the fine-tuned model/adapter to convert.") |
| parser.add_argument("--save_path", type=str, required=True, |
| help="Path to save the converted BF16 model.") |
| parser.add_argument("--msl", type=int, default=8192, |
| help="Maximum sequence length for the model.") |
| parser.add_argument("--cuda_device", type=str, default="2", |
| help="CUDA device index to use.") |
| args = parser.parse_args() |
|
|
| |
| os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" |
| os.environ["CUDA_VISIBLE_DEVICES"] = args.cuda_device |
| import torch |
| from unsloth import FastLanguageModel |
|
|
| |
| |
| |
| |
| |
|
|
| |
| |
|
|
| def convert_and_save(): |
| print(f"Loading model from: {args.model_path}") |
| |
| |
| |
| |
| model, tokenizer = FastLanguageModel.from_pretrained( |
| model_name=args.model_path, |
| max_seq_length=args.msl, |
| dtype=torch.bfloat16, |
| load_in_4bit=False, |
| ) |
|
|
| print(f"Saving merged BF16 model to: {args.save_path}") |
| |
| |
| |
| |
| model.save_pretrained_merged( |
| args.save_path, |
| tokenizer, |
| save_method="merged_16bit", |
| ) |
| |
| print("Conversion complete. You can now use this path for vLLM or standard inference.") |
|
|
| if __name__ == "__main__": |
| convert_and_save() |