File size: 4,697 Bytes
1ffa62b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 |
# /// script
# dependencies = ["torch", "transformers", "peft", "huggingface_hub", "sentencepiece", "protobuf", "gguf"]
# ///
import os
import subprocess
import shutil
from pathlib import Path
from huggingface_hub import HfApi, snapshot_download, create_repo
from peft import AutoPeftModelForCausalLM
from transformers import AutoTokenizer
# Config
ADAPTER_REPO = "kingjux/ffmpeg-command-generator"
OUTPUT_REPO = "kingjux/ffmpeg-command-generator-gguf"
BASE_MODEL = "Qwen/Qwen2.5-0.5B-Instruct"
QUANTIZATIONS = ["Q4_K_M", "Q8_0"] # Good balance of size/quality
print("=" * 50)
print("GGUF Conversion for LM Studio")
print("=" * 50)
# Step 1: Load and merge LoRA with base model
print("\n[1/4] Loading adapter and merging with base model...")
model = AutoPeftModelForCausalLM.from_pretrained(
ADAPTER_REPO,
device_map="auto",
trust_remote_code=True,
)
tokenizer = AutoTokenizer.from_pretrained(ADAPTER_REPO, trust_remote_code=True)
# Merge LoRA weights into base model
print("Merging LoRA weights...")
merged_model = model.merge_and_unload()
# Save merged model
merged_path = Path("/tmp/merged_model")
merged_path.mkdir(exist_ok=True)
print(f"Saving merged model to {merged_path}...")
merged_model.save_pretrained(merged_path)
tokenizer.save_pretrained(merged_path)
print("Merged model saved!")
# Step 2: Clone llama.cpp for conversion
print("\n[2/4] Setting up llama.cpp converter...")
llama_cpp_path = Path("/tmp/llama.cpp")
if not llama_cpp_path.exists():
subprocess.run([
"git", "clone", "--depth", "1",
"https://github.com/ggerganov/llama.cpp.git",
str(llama_cpp_path)
], check=True)
# Install conversion requirements
subprocess.run([
"pip", "install", "-r",
str(llama_cpp_path / "requirements" / "requirements-convert_hf_to_gguf.txt")
], check=True, capture_output=True)
# Step 3: Convert to GGUF
print("\n[3/4] Converting to GGUF format...")
gguf_output_dir = Path("/tmp/gguf_output")
gguf_output_dir.mkdir(exist_ok=True)
# Convert to F16 GGUF first
f16_path = gguf_output_dir / "ffmpeg-command-generator-f16.gguf"
subprocess.run([
"python", str(llama_cpp_path / "convert_hf_to_gguf.py"),
str(merged_path),
"--outfile", str(f16_path),
"--outtype", "f16"
], check=True)
print(f"Created: {f16_path}")
# Build llama.cpp for quantization
print("\nBuilding llama.cpp for quantization...")
subprocess.run(["make", "-C", str(llama_cpp_path), "llama-quantize"], check=True, capture_output=True)
# Quantize to different formats
quantized_files = []
for quant in QUANTIZATIONS:
quant_path = gguf_output_dir / f"ffmpeg-command-generator-{quant.lower()}.gguf"
print(f"Quantizing to {quant}...")
subprocess.run([
str(llama_cpp_path / "llama-quantize"),
str(f16_path),
str(quant_path),
quant
], check=True)
quantized_files.append(quant_path)
print(f"Created: {quant_path}")
# Step 4: Upload to Hub
print("\n[4/4] Uploading to Hugging Face Hub...")
api = HfApi()
# Create repo
create_repo(OUTPUT_REPO, repo_type="model", exist_ok=True)
# Create model card
model_card = """---
license: apache-2.0
base_model: Qwen/Qwen2.5-0.5B-Instruct
tags:
- gguf
- ffmpeg
- command-generation
- lm-studio
- ollama
---
# FFMPEG Command Generator (GGUF)
A fine-tuned model that generates FFMPEG commands from natural language descriptions with chain-of-thought reasoning.
## Usage
### LM Studio
```bash
lms import kingjux/ffmpeg-command-generator-gguf
```
### Ollama
```bash
ollama run hf.co/kingjux/ffmpeg-command-generator-gguf
```
## Example
**Input:** "Convert video.mp4 to webm format"
**Output:**
```
<think>
Task: Convert MP4 to WebM
- WebM container uses VP9 video codec and Opus audio
- Use -c:v libvpx-vp9 for video encoding
- Use -c:a libopus for audio encoding
</think>
ffmpeg -i video.mp4 -c:v libvpx-vp9 -c:a libopus output.webm
```
## Files
- `ffmpeg-command-generator-q4_k_m.gguf` - 4-bit quantized (smallest, fastest)
- `ffmpeg-command-generator-q8_0.gguf` - 8-bit quantized (better quality)
## Training
Fine-tuned from Qwen2.5-0.5B-Instruct on 30 FFMPEG command examples with CoT reasoning.
"""
# Save and upload model card
card_path = gguf_output_dir / "README.md"
card_path.write_text(model_card)
# Upload all files
for file in [card_path] + quantized_files:
print(f"Uploading {file.name}...")
api.upload_file(
path_or_fileobj=str(file),
path_in_repo=file.name,
repo_id=OUTPUT_REPO,
repo_type="model"
)
print("\n" + "=" * 50)
print("DONE!")
print(f"Model available at: https://huggingface.co/{OUTPUT_REPO}")
print("\nTo use in LM Studio:")
print(f" lms import {OUTPUT_REPO}")
print("=" * 50)
|