nathens commited on
Commit
1fbdc40
·
verified ·
1 Parent(s): 8b28065

Upload convert_to_gguf.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. convert_to_gguf.py +123 -0
convert_to_gguf.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # /// script
2
+ # dependencies = ["transformers", "peft", "huggingface_hub", "torch"]
3
+ # ///
4
+
5
+ """
6
+ Convert fine-tuned LoRA model to GGUF format with quantization.
7
+ Merges adapter with base model, converts to GGUF, and uploads to Hub.
8
+ """
9
+
10
+ import os
11
+ import subprocess
12
+ from transformers import AutoModelForCausalLM, AutoTokenizer
13
+ from peft import PeftModel
14
+ import torch
15
+
16
+ # Configuration from environment variables or defaults
17
+ ADAPTER_MODEL = os.getenv("ADAPTER_MODEL", "nathens/qwen-codeforces-sft")
18
+ BASE_MODEL = os.getenv("BASE_MODEL", "Qwen/Qwen2.5-0.5B")
19
+ OUTPUT_REPO = os.getenv("OUTPUT_REPO", "nathens/my-model-gguf")
20
+ QUANTIZATION = os.getenv("QUANTIZATION", "Q4_K_M")
21
+
22
+ print(f"🔧 Converting model to GGUF")
23
+ print(f" Base model: {BASE_MODEL}")
24
+ print(f" Adapter: {ADAPTER_MODEL}")
25
+ print(f" Output: {OUTPUT_REPO}")
26
+ print(f" Quantization: {QUANTIZATION}")
27
+
28
+ # Step 1: Load base model and tokenizer
29
+ print("\n📦 Loading base model and tokenizer...")
30
+ base_model = AutoModelForCausalLM.from_pretrained(
31
+ BASE_MODEL,
32
+ torch_dtype=torch.float16,
33
+ device_map="auto",
34
+ trust_remote_code=True
35
+ )
36
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
37
+
38
+ # Step 2: Load and merge LoRA adapter
39
+ print(f"🔀 Loading and merging LoRA adapter from {ADAPTER_MODEL}...")
40
+ model = PeftModel.from_pretrained(base_model, ADAPTER_MODEL)
41
+ print("⚙️ Merging adapter weights into base model...")
42
+ merged_model = model.merge_and_unload()
43
+
44
+ # Step 3: Save merged model
45
+ print("💾 Saving merged model...")
46
+ merged_dir = "./merged_model"
47
+ merged_model.save_pretrained(merged_dir)
48
+ tokenizer.save_pretrained(merged_dir)
49
+ print(f"✅ Merged model saved to {merged_dir}")
50
+
51
+ # Step 4: Install llama.cpp for conversion
52
+ print("\n📥 Installing llama.cpp for GGUF conversion...")
53
+ subprocess.run(["apt-get", "update", "-qq"], check=True)
54
+ subprocess.run(["apt-get", "install", "-y", "-qq", "git", "build-essential"], check=True)
55
+ subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git"], check=True)
56
+ subprocess.run(["make", "-C", "llama.cpp", "-j", "$(nproc)"], check=True, shell=True)
57
+
58
+ # Step 5: Convert to GGUF format
59
+ print("\n🔄 Converting to GGUF format...")
60
+ subprocess.run([
61
+ "python3", "llama.cpp/convert_hf_to_gguf.py",
62
+ merged_dir,
63
+ "--outfile", f"./model-f16.gguf",
64
+ "--outtype", "f16"
65
+ ], check=True)
66
+ print("✅ Converted to FP16 GGUF")
67
+
68
+ # Step 6: Quantize to specified format
69
+ print(f"\n⚡ Quantizing to {QUANTIZATION}...")
70
+ subprocess.run([
71
+ "./llama.cpp/llama-quantize",
72
+ "./model-f16.gguf",
73
+ f"./model-{QUANTIZATION}.gguf",
74
+ QUANTIZATION
75
+ ], check=True)
76
+ print(f"✅ Quantized to {QUANTIZATION}")
77
+
78
+ # Step 7: Upload to Hub
79
+ print(f"\n📤 Uploading to {OUTPUT_REPO}...")
80
+ from huggingface_hub import HfApi
81
+ api = HfApi()
82
+
83
+ # Create repo if it doesn't exist
84
+ try:
85
+ api.create_repo(OUTPUT_REPO, repo_type="model", exist_ok=True)
86
+ except Exception as e:
87
+ print(f"Note: {e}")
88
+
89
+ # Upload the quantized GGUF file
90
+ api.upload_file(
91
+ path_or_fileobj=f"./model-{QUANTIZATION}.gguf",
92
+ path_in_repo=f"model-{QUANTIZATION}.gguf",
93
+ repo_id=OUTPUT_REPO,
94
+ repo_type="model"
95
+ )
96
+
97
+ # Also upload the original FP16 version
98
+ api.upload_file(
99
+ path_or_fileobj="./model-f16.gguf",
100
+ path_in_repo="model-f16.gguf",
101
+ repo_id=OUTPUT_REPO,
102
+ repo_type="model"
103
+ )
104
+
105
+ # Upload tokenizer files
106
+ for file in ["tokenizer.json", "tokenizer_config.json", "vocab.json", "merges.txt", "special_tokens_map.json"]:
107
+ try:
108
+ api.upload_file(
109
+ path_or_fileobj=f"{merged_dir}/{file}",
110
+ path_in_repo=file,
111
+ repo_id=OUTPUT_REPO,
112
+ repo_type="model"
113
+ )
114
+ except Exception:
115
+ pass # Some files may not exist
116
+
117
+ print(f"\n✅ Conversion complete!")
118
+ print(f"📁 GGUF model available at: https://huggingface.co/{OUTPUT_REPO}")
119
+ print(f"\n💡 To use with Ollama:")
120
+ print(f" 1. Download: huggingface-cli download {OUTPUT_REPO} model-{QUANTIZATION}.gguf")
121
+ print(f" 2. Create Modelfile with the downloaded GGUF")
122
+ print(f" 3. Run: ollama create my-model -f Modelfile")
123
+ print(f" 4. Use: ollama run my-model")