kingjux commited on
Commit
1ffa62b
·
verified ·
1 Parent(s): 09d99d2

Upload convert_to_gguf.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. convert_to_gguf.py +173 -0
convert_to_gguf.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # /// script
2
+ # dependencies = ["torch", "transformers", "peft", "huggingface_hub", "sentencepiece", "protobuf", "gguf"]
3
+ # ///
4
+
5
+ import os
6
+ import subprocess
7
+ import shutil
8
+ from pathlib import Path
9
+ from huggingface_hub import HfApi, snapshot_download, create_repo
10
+ from peft import AutoPeftModelForCausalLM
11
+ from transformers import AutoTokenizer
12
+
13
+ # Config
14
+ ADAPTER_REPO = "kingjux/ffmpeg-command-generator"
15
+ OUTPUT_REPO = "kingjux/ffmpeg-command-generator-gguf"
16
+ BASE_MODEL = "Qwen/Qwen2.5-0.5B-Instruct"
17
+ QUANTIZATIONS = ["Q4_K_M", "Q8_0"] # Good balance of size/quality
18
+
19
+ print("=" * 50)
20
+ print("GGUF Conversion for LM Studio")
21
+ print("=" * 50)
22
+
23
+ # Step 1: Load and merge LoRA with base model
24
+ print("\n[1/4] Loading adapter and merging with base model...")
25
+ model = AutoPeftModelForCausalLM.from_pretrained(
26
+ ADAPTER_REPO,
27
+ device_map="auto",
28
+ trust_remote_code=True,
29
+ )
30
+ tokenizer = AutoTokenizer.from_pretrained(ADAPTER_REPO, trust_remote_code=True)
31
+
32
+ # Merge LoRA weights into base model
33
+ print("Merging LoRA weights...")
34
+ merged_model = model.merge_and_unload()
35
+
36
+ # Save merged model
37
+ merged_path = Path("/tmp/merged_model")
38
+ merged_path.mkdir(exist_ok=True)
39
+ print(f"Saving merged model to {merged_path}...")
40
+ merged_model.save_pretrained(merged_path)
41
+ tokenizer.save_pretrained(merged_path)
42
+ print("Merged model saved!")
43
+
44
+ # Step 2: Clone llama.cpp for conversion
45
+ print("\n[2/4] Setting up llama.cpp converter...")
46
+ llama_cpp_path = Path("/tmp/llama.cpp")
47
+ if not llama_cpp_path.exists():
48
+ subprocess.run([
49
+ "git", "clone", "--depth", "1",
50
+ "https://github.com/ggerganov/llama.cpp.git",
51
+ str(llama_cpp_path)
52
+ ], check=True)
53
+
54
+ # Install conversion requirements
55
+ subprocess.run([
56
+ "pip", "install", "-r",
57
+ str(llama_cpp_path / "requirements" / "requirements-convert_hf_to_gguf.txt")
58
+ ], check=True, capture_output=True)
59
+
60
+ # Step 3: Convert to GGUF
61
+ print("\n[3/4] Converting to GGUF format...")
62
+ gguf_output_dir = Path("/tmp/gguf_output")
63
+ gguf_output_dir.mkdir(exist_ok=True)
64
+
65
+ # Convert to F16 GGUF first
66
+ f16_path = gguf_output_dir / "ffmpeg-command-generator-f16.gguf"
67
+ subprocess.run([
68
+ "python", str(llama_cpp_path / "convert_hf_to_gguf.py"),
69
+ str(merged_path),
70
+ "--outfile", str(f16_path),
71
+ "--outtype", "f16"
72
+ ], check=True)
73
+ print(f"Created: {f16_path}")
74
+
75
+ # Build llama.cpp for quantization
76
+ print("\nBuilding llama.cpp for quantization...")
77
+ subprocess.run(["make", "-C", str(llama_cpp_path), "llama-quantize"], check=True, capture_output=True)
78
+
79
+ # Quantize to different formats
80
+ quantized_files = []
81
+ for quant in QUANTIZATIONS:
82
+ quant_path = gguf_output_dir / f"ffmpeg-command-generator-{quant.lower()}.gguf"
83
+ print(f"Quantizing to {quant}...")
84
+ subprocess.run([
85
+ str(llama_cpp_path / "llama-quantize"),
86
+ str(f16_path),
87
+ str(quant_path),
88
+ quant
89
+ ], check=True)
90
+ quantized_files.append(quant_path)
91
+ print(f"Created: {quant_path}")
92
+
93
+ # Step 4: Upload to Hub
94
+ print("\n[4/4] Uploading to Hugging Face Hub...")
95
+ api = HfApi()
96
+
97
+ # Create repo
98
+ create_repo(OUTPUT_REPO, repo_type="model", exist_ok=True)
99
+
100
+ # Create model card
101
+ model_card = """---
102
+ license: apache-2.0
103
+ base_model: Qwen/Qwen2.5-0.5B-Instruct
104
+ tags:
105
+ - gguf
106
+ - ffmpeg
107
+ - command-generation
108
+ - lm-studio
109
+ - ollama
110
+ ---
111
+
112
+ # FFMPEG Command Generator (GGUF)
113
+
114
+ A fine-tuned model that generates FFMPEG commands from natural language descriptions with chain-of-thought reasoning.
115
+
116
+ ## Usage
117
+
118
+ ### LM Studio
119
+ ```bash
120
+ lms import kingjux/ffmpeg-command-generator-gguf
121
+ ```
122
+
123
+ ### Ollama
124
+ ```bash
125
+ ollama run hf.co/kingjux/ffmpeg-command-generator-gguf
126
+ ```
127
+
128
+ ## Example
129
+
130
+ **Input:** "Convert video.mp4 to webm format"
131
+
132
+ **Output:**
133
+ ```
134
+ <think>
135
+ Task: Convert MP4 to WebM
136
+ - WebM container uses VP9 video codec and Opus audio
137
+ - Use -c:v libvpx-vp9 for video encoding
138
+ - Use -c:a libopus for audio encoding
139
+ </think>
140
+
141
+ ffmpeg -i video.mp4 -c:v libvpx-vp9 -c:a libopus output.webm
142
+ ```
143
+
144
+ ## Files
145
+
146
+ - `ffmpeg-command-generator-q4_k_m.gguf` - 4-bit quantized (smallest, fastest)
147
+ - `ffmpeg-command-generator-q8_0.gguf` - 8-bit quantized (better quality)
148
+
149
+ ## Training
150
+
151
+ Fine-tuned from Qwen2.5-0.5B-Instruct on 30 FFMPEG command examples with CoT reasoning.
152
+ """
153
+
154
+ # Save and upload model card
155
+ card_path = gguf_output_dir / "README.md"
156
+ card_path.write_text(model_card)
157
+
158
+ # Upload all files
159
+ for file in [card_path] + quantized_files:
160
+ print(f"Uploading {file.name}...")
161
+ api.upload_file(
162
+ path_or_fileobj=str(file),
163
+ path_in_repo=file.name,
164
+ repo_id=OUTPUT_REPO,
165
+ repo_type="model"
166
+ )
167
+
168
+ print("\n" + "=" * 50)
169
+ print("DONE!")
170
+ print(f"Model available at: https://huggingface.co/{OUTPUT_REPO}")
171
+ print("\nTo use in LM Studio:")
172
+ print(f" lms import {OUTPUT_REPO}")
173
+ print("=" * 50)