File size: 3,566 Bytes
67ede66 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
# /// script
# dependencies = ["torch", "transformers", "peft", "huggingface_hub", "sentencepiece", "protobuf", "gguf"]
# ///
import subprocess
from pathlib import Path
from huggingface_hub import HfApi, create_repo
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer
# Config
ADAPTER_REPO = "kingjux/ffmpeg-command-generator"
OUTPUT_REPO = "kingjux/ffmpeg-command-generator-gguf"
print("=" * 50)
print("GGUF Conversion for LM Studio")
print("=" * 50)
# Step 1: Load and merge LoRA with base model
print("\n[1/3] Loading adapter and merging with base model...")
model = AutoPeftModelForCausalLM.from_pretrained(
ADAPTER_REPO,
device_map="auto",
trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained(ADAPTER_REPO, trust_remote_code=True)
print("Merging LoRA weights...")
merged_model = model.merge_and_unload()
merged_path = Path("/tmp/merged_model")
merged_path.mkdir(exist_ok=True)
print(f"Saving merged model to {merged_path}...")
merged_model.save_pretrained(merged_path)
tokenizer.save_pretrained(merged_path)
# Step 2: Convert to GGUF using llama.cpp Python converter
print("\n[2/3] Converting to GGUF...")
llama_cpp_path = Path("/tmp/llama.cpp")
if not llama_cpp_path.exists():
subprocess.run([
"git", "clone", "--depth", "1",
"https://github.com/ggerganov/llama.cpp.git",
str(llama_cpp_path)
], check=True)
# Install requirements
subprocess.run([
"pip", "install", "-q", "-r",
str(llama_cpp_path / "requirements" / "requirements-convert_hf_to_gguf.txt")
], check=True)
gguf_output_dir = Path("/tmp/gguf_output")
gguf_output_dir.mkdir(exist_ok=True)
# Convert to F16 GGUF (no quantization needed - LM Studio handles it)
f16_path = gguf_output_dir / "ffmpeg-command-generator-f16.gguf"
subprocess.run([
"python", str(llama_cpp_path / "convert_hf_to_gguf.py"),
str(merged_path),
"--outfile", str(f16_path),
"--outtype", "f16"
], check=True)
print(f"Created: {f16_path}")
# Step 3: Upload to Hub
print("\n[3/3] Uploading to Hugging Face Hub...")
api = HfApi()
create_repo(OUTPUT_REPO, repo_type="model", exist_ok=True)
# Model card
model_card = """---
license: apache-2.0
base_model: Qwen/Qwen2.5-0.5B-Instruct
tags:
- gguf
- ffmpeg
- command-generation
- lm-studio
- ollama
---
# FFMPEG Command Generator (GGUF)
Fine-tuned Qwen2.5-0.5B that generates FFMPEG commands from natural language with chain-of-thought reasoning.
## Quick Start
### LM Studio
```bash
lms import kingjux/ffmpeg-command-generator-gguf
```
### Ollama
```bash
ollama run hf.co/kingjux/ffmpeg-command-generator-gguf
```
## Example
**Input:** "Convert video.mp4 to webm format"
**Output:**
```
<think>
Task: Convert MP4 to WebM
- WebM uses VP9 video + Opus audio
- Use -c:v libvpx-vp9 for video
- Use -c:a libopus for audio
</think>
ffmpeg -i video.mp4 -c:v libvpx-vp9 -c:a libopus output.webm
```
## Training
- Base: Qwen2.5-0.5B-Instruct
- Method: LoRA fine-tuning (r=16, alpha=32)
- Dataset: 30 FFMPEG command examples with CoT reasoning
- Trained on HuggingFace Jobs (T4 GPU)
"""
card_path = gguf_output_dir / "README.md"
card_path.write_text(model_card)
# Upload
for file in [card_path, f16_path]:
print(f"Uploading {file.name}...")
api.upload_file(
path_or_fileobj=str(file),
path_in_repo=file.name,
repo_id=OUTPUT_REPO,
repo_type="model"
)
print("\n" + "=" * 50)
print("DONE!")
print(f"Model: https://huggingface.co/{OUTPUT_REPO}")
print(f"\nLM Studio: lms import {OUTPUT_REPO}")
print("=" * 50)
|