ffmpeg-command-generator / convert_to_gguf_simple.py
kingjux's picture
Upload convert_to_gguf_simple.py with huggingface_hub
67ede66 verified
# /// script
# dependencies = ["torch", "transformers", "peft", "huggingface_hub", "sentencepiece", "protobuf", "gguf"]
# ///
import subprocess
from pathlib import Path
from huggingface_hub import HfApi, create_repo
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer
# Config
ADAPTER_REPO = "kingjux/ffmpeg-command-generator"
OUTPUT_REPO = "kingjux/ffmpeg-command-generator-gguf"
print("=" * 50)
print("GGUF Conversion for LM Studio")
print("=" * 50)
# Step 1: Load and merge LoRA with base model
print("\n[1/3] Loading adapter and merging with base model...")
model = AutoPeftModelForCausalLM.from_pretrained(
ADAPTER_REPO,
device_map="auto",
trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained(ADAPTER_REPO, trust_remote_code=True)
print("Merging LoRA weights...")
merged_model = model.merge_and_unload()
merged_path = Path("/tmp/merged_model")
merged_path.mkdir(exist_ok=True)
print(f"Saving merged model to {merged_path}...")
merged_model.save_pretrained(merged_path)
tokenizer.save_pretrained(merged_path)
# Step 2: Convert to GGUF using llama.cpp Python converter
print("\n[2/3] Converting to GGUF...")
llama_cpp_path = Path("/tmp/llama.cpp")
if not llama_cpp_path.exists():
subprocess.run([
"git", "clone", "--depth", "1",
"https://github.com/ggerganov/llama.cpp.git",
str(llama_cpp_path)
], check=True)
# Install requirements
subprocess.run([
"pip", "install", "-q", "-r",
str(llama_cpp_path / "requirements" / "requirements-convert_hf_to_gguf.txt")
], check=True)
gguf_output_dir = Path("/tmp/gguf_output")
gguf_output_dir.mkdir(exist_ok=True)
# Convert to F16 GGUF (no quantization needed - LM Studio handles it)
f16_path = gguf_output_dir / "ffmpeg-command-generator-f16.gguf"
subprocess.run([
"python", str(llama_cpp_path / "convert_hf_to_gguf.py"),
str(merged_path),
"--outfile", str(f16_path),
"--outtype", "f16"
], check=True)
print(f"Created: {f16_path}")
# Step 3: Upload to Hub
print("\n[3/3] Uploading to Hugging Face Hub...")
api = HfApi()
create_repo(OUTPUT_REPO, repo_type="model", exist_ok=True)
# Model card
model_card = """---
license: apache-2.0
base_model: Qwen/Qwen2.5-0.5B-Instruct
tags:
- gguf
- ffmpeg
- command-generation
- lm-studio
- ollama
---
# FFMPEG Command Generator (GGUF)
Fine-tuned Qwen2.5-0.5B that generates FFMPEG commands from natural language with chain-of-thought reasoning.
## Quick Start
### LM Studio
```bash
lms import kingjux/ffmpeg-command-generator-gguf
```
### Ollama
```bash
ollama run hf.co/kingjux/ffmpeg-command-generator-gguf
```
## Example
**Input:** "Convert video.mp4 to webm format"
**Output:**
```
<think>
Task: Convert MP4 to WebM
- WebM uses VP9 video + Opus audio
- Use -c:v libvpx-vp9 for video
- Use -c:a libopus for audio
</think>
ffmpeg -i video.mp4 -c:v libvpx-vp9 -c:a libopus output.webm
```
## Training
- Base: Qwen2.5-0.5B-Instruct
- Method: LoRA fine-tuning (r=16, alpha=32)
- Dataset: 30 FFMPEG command examples with CoT reasoning
- Trained on HuggingFace Jobs (T4 GPU)
"""
card_path = gguf_output_dir / "README.md"
card_path.write_text(model_card)
# Upload
for file in [card_path, f16_path]:
print(f"Uploading {file.name}...")
api.upload_file(
path_or_fileobj=str(file),
path_in_repo=file.name,
repo_id=OUTPUT_REPO,
repo_type="model"
)
print("\n" + "=" * 50)
print("DONE!")
print(f"Model: https://huggingface.co/{OUTPUT_REPO}")
print(f"\nLM Studio: lms import {OUTPUT_REPO}")
print("=" * 50)