albertlieadrian commited on
Commit
ed25460
Β·
verified Β·
1 Parent(s): ed82e98

Upload convert_to_gguf.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. convert_to_gguf.py +286 -0
convert_to_gguf.py ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # /// script
2
+ # requires-python = ">=3.10"
3
+ # dependencies = [
4
+ # "transformers>=4.36.0",
5
+ # "peft>=0.7.0",
6
+ # "torch>=2.0.0",
7
+ # "accelerate>=0.24.0",
8
+ # "huggingface_hub>=0.20.0",
9
+ # "sentencepiece>=0.1.99",
10
+ # "protobuf>=3.20.0",
11
+ # "numpy",
12
+ # "gguf",
13
+ # ]
14
+ # ///
15
+
16
+ """
17
+ GGUF Conversion - Q4_K_M Only
18
+
19
+ Converts fine-tuned model to GGUF with Q4_K_M quantization.
20
+ """
21
+
22
+ import os
23
+ import sys
24
+ import torch
25
+ from transformers import AutoModelForCausalLM, AutoTokenizer
26
+ from peft import PeftModel
27
+ from huggingface_hub import HfApi
28
+ import subprocess
29
+
30
+
31
+ def run_command(cmd, description):
32
+ """Run a command with error handling."""
33
+ print(f" {description}...")
34
+ try:
35
+ result = subprocess.run(
36
+ cmd,
37
+ check=True,
38
+ capture_output=True,
39
+ text=True
40
+ )
41
+ if result.stdout:
42
+ print(f" {result.stdout[:200]}")
43
+ return True
44
+ except subprocess.CalledProcessError as e:
45
+ print(f" ❌ Command failed: {' '.join(cmd)}")
46
+ if e.stderr:
47
+ print(f" STDERR: {e.stderr[:500]}")
48
+ return False
49
+ except FileNotFoundError:
50
+ print(f" ❌ Command not found: {cmd[0]}")
51
+ return False
52
+
53
+
54
+ print("πŸ”„ GGUF Conversion - Q4_K_M")
55
+ print("=" * 60)
56
+
57
+ # Configuration from environment variables
58
+ ADAPTER_MODEL = os.environ.get("ADAPTER_MODEL", "albertlieadrian/qwen3-0.6b-codeforces-sft")
59
+ BASE_MODEL = os.environ.get("BASE_MODEL", "Qwen/Qwen3-0.6B")
60
+ OUTPUT_REPO = os.environ.get("OUTPUT_REPO", "albertlieadrian/qwen3-0.6b-codeforces-sft-gguf")
61
+ HF_USERNAME = os.environ.get("HF_USERNAME", "albertlieadrian")
62
+
63
+ print(f"\nπŸ“¦ Configuration:")
64
+ print(f" Base model: {BASE_MODEL}")
65
+ print(f" Adapter model: {ADAPTER_MODEL}")
66
+ print(f" Output repo: {OUTPUT_REPO}")
67
+
68
+ # Step 1: Load base model and adapter
69
+ print("\nπŸ”§ Step 1: Loading base model and LoRA adapter...")
70
+
71
+ try:
72
+ base_model = AutoModelForCausalLM.from_pretrained(
73
+ BASE_MODEL,
74
+ dtype=torch.float16,
75
+ device_map="auto",
76
+ trust_remote_code=True,
77
+ )
78
+ print(" βœ… Base model loaded")
79
+ except Exception as e:
80
+ print(f" ❌ Failed to load base model: {e}")
81
+ sys.exit(1)
82
+
83
+ try:
84
+ print(" Loading LoRA adapter...")
85
+ model = PeftModel.from_pretrained(base_model, ADAPTER_MODEL)
86
+ print(" βœ… Adapter loaded")
87
+
88
+ print(" Merging adapter with base model...")
89
+ merged_model = model.merge_and_unload()
90
+ print(" βœ… Models merged!")
91
+ except Exception as e:
92
+ print(f" ❌ Failed to merge models: {e}")
93
+ sys.exit(1)
94
+
95
+ try:
96
+ tokenizer = AutoTokenizer.from_pretrained(ADAPTER_MODEL, trust_remote_code=True)
97
+ print(" βœ… Tokenizer loaded")
98
+ except Exception as e:
99
+ print(f" ❌ Failed to load tokenizer: {e}")
100
+ sys.exit(1)
101
+
102
+ # Step 2: Save merged model
103
+ print("\nπŸ’Ύ Step 2: Saving merged model...")
104
+ merged_dir = "/tmp/merged_model"
105
+ try:
106
+ merged_model.save_pretrained(merged_dir, safe_serialization=True)
107
+ tokenizer.save_pretrained(merged_dir)
108
+ print(f" βœ… Merged model saved to {merged_dir}")
109
+ except Exception as e:
110
+ print(f" ❌ Failed to save merged model: {e}")
111
+ sys.exit(1)
112
+
113
+ # Step 3: Setup llama.cpp
114
+ print("\nπŸ“₯ Step 3: Setting up llama.cpp...")
115
+
116
+ if not run_command(
117
+ ["git", "clone", "--depth", "1", "https://github.com/ggerganov/llama.cpp.git", "/tmp/llama.cpp"],
118
+ "Cloning llama.cpp"
119
+ ):
120
+ sys.exit(1)
121
+
122
+ print(" Installing Python dependencies...")
123
+ run_command(["pip", "install", "-r", "/tmp/llama.cpp/requirements.txt"], "Installing requirements")
124
+ run_command(["pip", "install", "sentencepiece", "protobuf"], "Installing tokenizer deps")
125
+
126
+ # Step 4: Convert to GGUF (FP16)
127
+ print("\nπŸ”„ Step 4: Converting to GGUF format (FP16)...")
128
+ gguf_output_dir = "/tmp/gguf_output"
129
+ os.makedirs(gguf_output_dir, exist_ok=True)
130
+
131
+ convert_script = "/tmp/llama.cpp/convert_hf_to_gguf.py"
132
+ model_name = ADAPTER_MODEL.split('/')[-1]
133
+ gguf_file = f"{gguf_output_dir}/{model_name}-f16.gguf"
134
+
135
+ if not run_command(
136
+ [sys.executable, convert_script, merged_dir, "--outfile", gguf_file, "--outtype", "f16"],
137
+ "Converting to FP16"
138
+ ):
139
+ print(" ❌ Conversion failed!")
140
+ sys.exit(1)
141
+
142
+ print(f" βœ… FP16 GGUF created: {gguf_file}")
143
+
144
+ # Step 5: Quantize to Q4_K_M
145
+ print("\nβš™οΈ Step 5: Quantizing to Q4_K_M...")
146
+
147
+ # Build quantize tool with CMake
148
+ print(" Building quantize tool with CMake...")
149
+ os.makedirs("/tmp/llama.cpp/build", exist_ok=True)
150
+
151
+ if not run_command(
152
+ ["cmake", "-B", "/tmp/llama.cpp/build", "-S", "/tmp/llama.cpp", "-DGGML_CUDA=OFF"],
153
+ "Configuring with CMake"
154
+ ):
155
+ sys.exit(1)
156
+
157
+ if not run_command(
158
+ ["cmake", "--build", "/tmp/llama.cpp/build", "--target", "llama-quantize", "-j", "4"],
159
+ "Building llama-quantize"
160
+ ):
161
+ sys.exit(1)
162
+
163
+ print(" βœ… Quantize tool built")
164
+
165
+ quantize_bin = "/tmp/llama.cpp/build/bin/llama-quantize"
166
+ quant_file = f"{gguf_output_dir}/{model_name}-q4_k_m.gguf"
167
+
168
+ print(f" Creating Q4_K_M quantization...")
169
+ if not run_command([quantize_bin, gguf_file, quant_file, "Q4_K_M"], "Quantizing to Q4_K_M"):
170
+ print(" ❌ Quantization failed!")
171
+ sys.exit(1)
172
+
173
+ size_mb = os.path.getsize(quant_file) / (1024 * 1024)
174
+ print(f" βœ… Q4_K_M: {size_mb:.1f} MB")
175
+
176
+ # Step 6: Upload to Hub
177
+ print("\n☁️ Step 6: Uploading to Hugging Face Hub...")
178
+ api = HfApi()
179
+
180
+ print(f" Creating repository: {OUTPUT_REPO}")
181
+ try:
182
+ api.create_repo(repo_id=OUTPUT_REPO, repo_type="model", exist_ok=True)
183
+ print(" βœ… Repository ready")
184
+ except Exception as e:
185
+ print(f" ℹ️ Repository may already exist: {e}")
186
+
187
+ # Upload Q4_K_M
188
+ print(" Uploading Q4_K_M GGUF...")
189
+ try:
190
+ api.upload_file(
191
+ path_or_fileobj=quant_file,
192
+ path_in_repo=f"{model_name}-q4_k_m.gguf",
193
+ repo_id=OUTPUT_REPO,
194
+ )
195
+ print(" βœ… Q4_K_M uploaded")
196
+ except Exception as e:
197
+ print(f" ❌ Upload failed: {e}")
198
+ sys.exit(1)
199
+
200
+ # Create README
201
+ print("\nπŸ“ Creating README...")
202
+ readme_content = f"""---
203
+ base_model: {BASE_MODEL}
204
+ tags:
205
+ - gguf
206
+ - llama.cpp
207
+ - quantized
208
+ - trl
209
+ - sft
210
+ ---
211
+
212
+ # {model_name}-gguf
213
+
214
+ This is a GGUF conversion of [{ADAPTER_MODEL}](https://huggingface.co/{ADAPTER_MODEL}), which is a LoRA fine-tuned version of [{BASE_MODEL}](https://huggingface.co/{BASE_MODEL}).
215
+
216
+ ## Model Details
217
+
218
+ - **Base Model:** {BASE_MODEL}
219
+ - **Fine-tuned Model:** {ADAPTER_MODEL}
220
+ - **Training:** Supervised Fine-Tuning (SFT) with TRL
221
+ - **Format:** GGUF (for llama.cpp, Ollama, LM Studio, etc.)
222
+
223
+ ## Quantization
224
+
225
+ | File | Quant | Size | Description |
226
+ |------|-------|------|-------------|
227
+ | {model_name}-q4_k_m.gguf | Q4_K_M | ~{size_mb:.0f}MB | 4-bit medium (recommended) |
228
+
229
+ ## Usage
230
+
231
+ ### With llama.cpp
232
+
233
+ ```bash
234
+ huggingface-cli download {OUTPUT_REPO} {model_name}-q4_k_m.gguf
235
+ ./llama-cli -m {model_name}-q4_k_m.gguf -p "Your prompt"
236
+ ```
237
+
238
+ ### With Ollama
239
+
240
+ 1. Create a `Modelfile`:
241
+ ```
242
+ FROM ./{model_name}-q4_k_m.gguf
243
+ ```
244
+
245
+ 2. Create and run:
246
+ ```bash
247
+ ollama create my-model -f Modelfile
248
+ ollama run my-model
249
+ ```
250
+
251
+ ### With LM Studio
252
+
253
+ 1. Download the `.gguf` file
254
+ 2. Import into LM Studio
255
+ 3. Start chatting!
256
+
257
+ ## License
258
+
259
+ Inherits the license from the base model: {BASE_MODEL}
260
+
261
+ ---
262
+
263
+ *Converted to GGUF format using llama.cpp*
264
+ """
265
+
266
+ try:
267
+ api.upload_file(
268
+ path_or_fileobj=readme_content.encode(),
269
+ path_in_repo="README.md",
270
+ repo_id=OUTPUT_REPO,
271
+ )
272
+ print(" βœ… README uploaded")
273
+ except Exception as e:
274
+ print(f" ❌ README upload failed: {e}")
275
+
276
+ print("\n" + "=" * 60)
277
+ print("βœ… GGUF Conversion Complete!")
278
+ print(f"πŸ“¦ Repository: https://huggingface.co/{OUTPUT_REPO}")
279
+ print(f"\nπŸ“₯ Download with:")
280
+ print(f" huggingface-cli download {OUTPUT_REPO} {model_name}-q4_k_m.gguf")
281
+ print(f"\nπŸš€ Use with Ollama:")
282
+ print(f" 1. Download the GGUF file")
283
+ print(f" 2. Create Modelfile: FROM ./{model_name}-q4_k_m.gguf")
284
+ print(" 3. ollama create my-model -f Modelfile")
285
+ print(" 4. ollama run my-model")
286
+ print("=" * 60)