devsomosahub
/

agent-os-training-guide

devsomosahub commited on 20 days ago

Commit

1f834c5

verified ·

1 Parent(s): dac0aaf

Upload scripts/export_gguf.py with huggingface_hub

Files changed (1) hide show

scripts/export_gguf.py ADDED Viewed

+"""Merge LoRA adapter and export to GGUF for llama.cpp."""
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from peft import PeftModel
+MODEL_ID = "Qwen/Qwen2.5-1.5B-Instruct"
+ADAPTER_DIR = "./adapter-model"
+MERGED_DIR = "./merged-model"
+print("Loading base model...")
+tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
+model = AutoModelForCausalLM.from_pretrained(
+    MODEL_ID, torch_dtype=torch.float16, device_map="cpu", trust_remote_code=True
+)
+print("Loading adapter...")
+model = PeftModel.from_pretrained(model, ADAPTER_DIR)
+print("Merging...")
+model = model.merge_and_unload()
+print(f"Saving merged model to {MERGED_DIR}...")
+model.save_pretrained(MERGED_DIR)
+tokenizer.save_pretrained(MERGED_DIR)
+print(f"""
+Done! Now convert to GGUF:
+  pip install llama-cpp-python
+  python -m llama_cpp.convert {MERGED_DIR} --outfile adapter-q8.gguf --outtype q8_0
+Or use llama.cpp's convert script:
+  python /opt/llama.cpp/convert_hf_to_gguf.py {MERGED_DIR} --outfile adapter-q8.gguf --outtype q8_0
+""")