File size: 4,697 Bytes
1ffa62b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# /// script
# dependencies = ["torch", "transformers", "peft", "huggingface_hub", "sentencepiece", "protobuf", "gguf"]
# ///

import os
import subprocess
import shutil
from pathlib import Path
from huggingface_hub import HfApi, snapshot_download, create_repo
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer

# Config
ADAPTER_REPO = "kingjux/ffmpeg-command-generator"
OUTPUT_REPO = "kingjux/ffmpeg-command-generator-gguf"
BASE_MODEL = "Qwen/Qwen2.5-0.5B-Instruct"
QUANTIZATIONS = ["Q4_K_M", "Q8_0"]  # Good balance of size/quality

print("=" * 50)
print("GGUF Conversion for LM Studio")
print("=" * 50)

# Step 1: Load and merge LoRA with base model
print("\n[1/4] Loading adapter and merging with base model...")
model = AutoPeftModelForCausalLM.from_pretrained(
    ADAPTER_REPO,
    device_map="auto",
    trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained(ADAPTER_REPO, trust_remote_code=True)

# Merge LoRA weights into base model
print("Merging LoRA weights...")
merged_model = model.merge_and_unload()

# Save merged model
merged_path = Path("/tmp/merged_model")
merged_path.mkdir(exist_ok=True)
print(f"Saving merged model to {merged_path}...")
merged_model.save_pretrained(merged_path)
tokenizer.save_pretrained(merged_path)
print("Merged model saved!")

# Step 2: Clone llama.cpp for conversion
print("\n[2/4] Setting up llama.cpp converter...")
llama_cpp_path = Path("/tmp/llama.cpp")
if not llama_cpp_path.exists():
    subprocess.run([
        "git", "clone", "--depth", "1",
        "https://github.com/ggerganov/llama.cpp.git",
        str(llama_cpp_path)
    ], check=True)

# Install conversion requirements
subprocess.run([
    "pip", "install", "-r",
    str(llama_cpp_path / "requirements" / "requirements-convert_hf_to_gguf.txt")
], check=True, capture_output=True)

# Step 3: Convert to GGUF
print("\n[3/4] Converting to GGUF format...")
gguf_output_dir = Path("/tmp/gguf_output")
gguf_output_dir.mkdir(exist_ok=True)

# Convert to F16 GGUF first
f16_path = gguf_output_dir / "ffmpeg-command-generator-f16.gguf"
subprocess.run([
    "python", str(llama_cpp_path / "convert_hf_to_gguf.py"),
    str(merged_path),
    "--outfile", str(f16_path),
    "--outtype", "f16"
], check=True)
print(f"Created: {f16_path}")

# Build llama.cpp for quantization
print("\nBuilding llama.cpp for quantization...")
subprocess.run(["make", "-C", str(llama_cpp_path), "llama-quantize"], check=True, capture_output=True)

# Quantize to different formats
quantized_files = []
for quant in QUANTIZATIONS:
    quant_path = gguf_output_dir / f"ffmpeg-command-generator-{quant.lower()}.gguf"
    print(f"Quantizing to {quant}...")
    subprocess.run([
        str(llama_cpp_path / "llama-quantize"),
        str(f16_path),
        str(quant_path),
        quant
    ], check=True)
    quantized_files.append(quant_path)
    print(f"Created: {quant_path}")

# Step 4: Upload to Hub
print("\n[4/4] Uploading to Hugging Face Hub...")
api = HfApi()

# Create repo
create_repo(OUTPUT_REPO, repo_type="model", exist_ok=True)

# Create model card
model_card = """---
license: apache-2.0
base_model: Qwen/Qwen2.5-0.5B-Instruct
tags:
  - gguf
  - ffmpeg
  - command-generation
  - lm-studio
  - ollama
---

# FFMPEG Command Generator (GGUF)

A fine-tuned model that generates FFMPEG commands from natural language descriptions with chain-of-thought reasoning.

## Usage

### LM Studio
```bash
lms import kingjux/ffmpeg-command-generator-gguf
```

### Ollama
```bash
ollama run hf.co/kingjux/ffmpeg-command-generator-gguf
```

## Example

**Input:** "Convert video.mp4 to webm format"

**Output:**
```
<think>
Task: Convert MP4 to WebM
- WebM container uses VP9 video codec and Opus audio
- Use -c:v libvpx-vp9 for video encoding
- Use -c:a libopus for audio encoding
</think>

ffmpeg -i video.mp4 -c:v libvpx-vp9 -c:a libopus output.webm
```

## Files

- `ffmpeg-command-generator-q4_k_m.gguf` - 4-bit quantized (smallest, fastest)
- `ffmpeg-command-generator-q8_0.gguf` - 8-bit quantized (better quality)

## Training

Fine-tuned from Qwen2.5-0.5B-Instruct on 30 FFMPEG command examples with CoT reasoning.
"""

# Save and upload model card
card_path = gguf_output_dir / "README.md"
card_path.write_text(model_card)

# Upload all files
for file in [card_path] + quantized_files:
    print(f"Uploading {file.name}...")
    api.upload_file(
        path_or_fileobj=str(file),
        path_in_repo=file.name,
        repo_id=OUTPUT_REPO,
        repo_type="model"
    )

print("\n" + "=" * 50)
print("DONE!")
print(f"Model available at: https://huggingface.co/{OUTPUT_REPO}")
print("\nTo use in LM Studio:")
print(f"  lms import {OUTPUT_REPO}")
print("=" * 50)