kingjux
/

ffmpeg-command-generator

+# /// script
+# dependencies = ["torch", "transformers", "peft", "huggingface_hub", "sentencepiece", "protobuf", "gguf"]
+# ///
+import os
+import subprocess
+import shutil
+from pathlib import Path
+from huggingface_hub import HfApi, snapshot_download, create_repo
+from peft import AutoPeftModelForCausalLM
+from transformers import AutoTokenizer
+# Config
+ADAPTER_REPO = "kingjux/ffmpeg-command-generator"
+OUTPUT_REPO = "kingjux/ffmpeg-command-generator-gguf"
+BASE_MODEL = "Qwen/Qwen2.5-0.5B-Instruct"
+QUANTIZATIONS = ["Q4_K_M", "Q8_0"]  # Good balance of size/quality
+print("=" * 50)
+print("GGUF Conversion for LM Studio")
+print("=" * 50)
+# Step 1: Load and merge LoRA with base model
+print("\n[1/4] Loading adapter and merging with base model...")
+model = AutoPeftModelForCausalLM.from_pretrained(
+    ADAPTER_REPO,
+    device_map="auto",
+    trust_remote_code=True,
+)
+tokenizer = AutoTokenizer.from_pretrained(ADAPTER_REPO, trust_remote_code=True)
+# Merge LoRA weights into base model
+print("Merging LoRA weights...")
+merged_model = model.merge_and_unload()
+# Save merged model
+merged_path = Path("/tmp/merged_model")
+merged_path.mkdir(exist_ok=True)
+print(f"Saving merged model to {merged_path}...")
+merged_model.save_pretrained(merged_path)
+tokenizer.save_pretrained(merged_path)
+print("Merged model saved!")
+# Step 2: Clone llama.cpp for conversion
+print("\n[2/4] Setting up llama.cpp converter...")
+llama_cpp_path = Path("/tmp/llama.cpp")
+if not llama_cpp_path.exists():
+    subprocess.run([
+        "git", "clone", "--depth", "1",
+        "https://github.com/ggerganov/llama.cpp.git",
+        str(llama_cpp_path)
+    ], check=True)
+# Install conversion requirements
+subprocess.run([
+    "pip", "install", "-r",
+    str(llama_cpp_path / "requirements" / "requirements-convert_hf_to_gguf.txt")
+], check=True, capture_output=True)
+# Step 3: Convert to GGUF
+print("\n[3/4] Converting to GGUF format...")
+gguf_output_dir = Path("/tmp/gguf_output")
+gguf_output_dir.mkdir(exist_ok=True)
+# Convert to F16 GGUF first
+f16_path = gguf_output_dir / "ffmpeg-command-generator-f16.gguf"
+subprocess.run([
+    "python", str(llama_cpp_path / "convert_hf_to_gguf.py"),
+    str(merged_path),
+    "--outfile", str(f16_path),
+    "--outtype", "f16"
+], check=True)
+print(f"Created: {f16_path}")
+# Build llama.cpp for quantization
+print("\nBuilding llama.cpp for quantization...")
+subprocess.run(["make", "-C", str(llama_cpp_path), "llama-quantize"], check=True, capture_output=True)
+# Quantize to different formats
+quantized_files = []
+for quant in QUANTIZATIONS:
+    quant_path = gguf_output_dir / f"ffmpeg-command-generator-{quant.lower()}.gguf"
+    print(f"Quantizing to {quant}...")
+    subprocess.run([
+        str(llama_cpp_path / "llama-quantize"),
+        str(f16_path),
+        str(quant_path),
+        quant
+    ], check=True)
+    quantized_files.append(quant_path)
+    print(f"Created: {quant_path}")
+# Step 4: Upload to Hub
+print("\n[4/4] Uploading to Hugging Face Hub...")
+api = HfApi()
+# Create repo
+create_repo(OUTPUT_REPO, repo_type="model", exist_ok=True)
+# Create model card
+model_card = """---
+license: apache-2.0
+base_model: Qwen/Qwen2.5-0.5B-Instruct
+tags:
+  - gguf
+  - ffmpeg
+  - command-generation
+  - lm-studio
+  - ollama
+---
+# FFMPEG Command Generator (GGUF)
+A fine-tuned model that generates FFMPEG commands from natural language descriptions with chain-of-thought reasoning.
+## Usage
+### LM Studio
+```bash
+lms import kingjux/ffmpeg-command-generator-gguf
+```
+### Ollama
+```bash
+ollama run hf.co/kingjux/ffmpeg-command-generator-gguf
+```
+## Example
+**Input:** "Convert video.mp4 to webm format"
+**Output:**
+```
+<think>
+Task: Convert MP4 to WebM
+- WebM container uses VP9 video codec and Opus audio
+- Use -c:v libvpx-vp9 for video encoding
+- Use -c:a libopus for audio encoding
+</think>
+ffmpeg -i video.mp4 -c:v libvpx-vp9 -c:a libopus output.webm
+```
+## Files
+- `ffmpeg-command-generator-q4_k_m.gguf` - 4-bit quantized (smallest, fastest)
+- `ffmpeg-command-generator-q8_0.gguf` - 8-bit quantized (better quality)
+## Training
+Fine-tuned from Qwen2.5-0.5B-Instruct on 30 FFMPEG command examples with CoT reasoning.
+"""
+# Save and upload model card
+card_path = gguf_output_dir / "README.md"
+card_path.write_text(model_card)
+# Upload all files
+for file in [card_path] + quantized_files:
+    print(f"Uploading {file.name}...")
+    api.upload_file(
+        path_or_fileobj=str(file),
+        path_in_repo=file.name,
+        repo_id=OUTPUT_REPO,
+        repo_type="model"
+    )
+print("\n" + "=" * 50)
+print("DONE!")
+print(f"Model available at: https://huggingface.co/{OUTPUT_REPO}")
+print("\nTo use in LM Studio:")
+print(f"  lms import {OUTPUT_REPO}")
+print("=" * 50)