devsomosahub commited on
Commit
1f834c5
·
verified ·
1 Parent(s): dac0aaf

Upload scripts/export_gguf.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. scripts/export_gguf.py +34 -0
scripts/export_gguf.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Merge LoRA adapter and export to GGUF for llama.cpp."""
2
+ import torch
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
+ from peft import PeftModel
5
+
6
+ MODEL_ID = "Qwen/Qwen2.5-1.5B-Instruct"
7
+ ADAPTER_DIR = "./adapter-model"
8
+ MERGED_DIR = "./merged-model"
9
+
10
+ print("Loading base model...")
11
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
12
+ model = AutoModelForCausalLM.from_pretrained(
13
+ MODEL_ID, torch_dtype=torch.float16, device_map="cpu", trust_remote_code=True
14
+ )
15
+
16
+ print("Loading adapter...")
17
+ model = PeftModel.from_pretrained(model, ADAPTER_DIR)
18
+
19
+ print("Merging...")
20
+ model = model.merge_and_unload()
21
+
22
+ print(f"Saving merged model to {MERGED_DIR}...")
23
+ model.save_pretrained(MERGED_DIR)
24
+ tokenizer.save_pretrained(MERGED_DIR)
25
+
26
+ print(f"""
27
+ Done! Now convert to GGUF:
28
+
29
+ pip install llama-cpp-python
30
+ python -m llama_cpp.convert {MERGED_DIR} --outfile adapter-q8.gguf --outtype q8_0
31
+
32
+ Or use llama.cpp's convert script:
33
+ python /opt/llama.cpp/convert_hf_to_gguf.py {MERGED_DIR} --outfile adapter-q8.gguf --outtype q8_0
34
+ """)