agent-os-training-guide / scripts /export_gguf.py
devsomosahub's picture
Upload scripts/export_gguf.py with huggingface_hub
1f834c5 verified
"""Merge LoRA adapter and export to GGUF for llama.cpp."""
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
MODEL_ID = "Qwen/Qwen2.5-1.5B-Instruct"
ADAPTER_DIR = "./adapter-model"
MERGED_DIR = "./merged-model"
print("Loading base model...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID, torch_dtype=torch.float16, device_map="cpu", trust_remote_code=True
)
print("Loading adapter...")
model = PeftModel.from_pretrained(model, ADAPTER_DIR)
print("Merging...")
model = model.merge_and_unload()
print(f"Saving merged model to {MERGED_DIR}...")
model.save_pretrained(MERGED_DIR)
tokenizer.save_pretrained(MERGED_DIR)
print(f"""
Done! Now convert to GGUF:
pip install llama-cpp-python
python -m llama_cpp.convert {MERGED_DIR} --outfile adapter-q8.gguf --outtype q8_0
Or use llama.cpp's convert script:
python /opt/llama.cpp/convert_hf_to_gguf.py {MERGED_DIR} --outfile adapter-q8.gguf --outtype q8_0
""")