# /// script # dependencies = ["torch", "transformers", "peft", "huggingface_hub", "sentencepiece", "protobuf", "gguf"] # /// import os import subprocess import shutil from pathlib import Path from huggingface_hub import HfApi, snapshot_download, create_repo from peft import AutoPeftModelForCausalLM from transformers import AutoTokenizer # Config ADAPTER_REPO = "kingjux/ffmpeg-command-generator" OUTPUT_REPO = "kingjux/ffmpeg-command-generator-gguf" BASE_MODEL = "Qwen/Qwen2.5-0.5B-Instruct" QUANTIZATIONS = ["Q4_K_M", "Q8_0"] # Good balance of size/quality print("=" * 50) print("GGUF Conversion for LM Studio") print("=" * 50) # Step 1: Load and merge LoRA with base model print("\n[1/4] Loading adapter and merging with base model...") model = AutoPeftModelForCausalLM.from_pretrained( ADAPTER_REPO, device_map="auto", trust_remote_code=True, ) tokenizer = AutoTokenizer.from_pretrained(ADAPTER_REPO, trust_remote_code=True) # Merge LoRA weights into base model print("Merging LoRA weights...") merged_model = model.merge_and_unload() # Save merged model merged_path = Path("/tmp/merged_model") merged_path.mkdir(exist_ok=True) print(f"Saving merged model to {merged_path}...") merged_model.save_pretrained(merged_path) tokenizer.save_pretrained(merged_path) print("Merged model saved!") # Step 2: Clone llama.cpp for conversion print("\n[2/4] Setting up llama.cpp converter...") llama_cpp_path = Path("/tmp/llama.cpp") if not llama_cpp_path.exists(): subprocess.run([ "git", "clone", "--depth", "1", "https://github.com/ggerganov/llama.cpp.git", str(llama_cpp_path) ], check=True) # Install conversion requirements subprocess.run([ "pip", "install", "-r", str(llama_cpp_path / "requirements" / "requirements-convert_hf_to_gguf.txt") ], check=True, capture_output=True) # Step 3: Convert to GGUF print("\n[3/4] Converting to GGUF format...") gguf_output_dir = Path("/tmp/gguf_output") gguf_output_dir.mkdir(exist_ok=True) # Convert to F16 GGUF first f16_path = gguf_output_dir / "ffmpeg-command-generator-f16.gguf" subprocess.run([ "python", str(llama_cpp_path / "convert_hf_to_gguf.py"), str(merged_path), "--outfile", str(f16_path), "--outtype", "f16" ], check=True) print(f"Created: {f16_path}") # Build llama.cpp for quantization print("\nBuilding llama.cpp for quantization...") subprocess.run(["make", "-C", str(llama_cpp_path), "llama-quantize"], check=True, capture_output=True) # Quantize to different formats quantized_files = [] for quant in QUANTIZATIONS: quant_path = gguf_output_dir / f"ffmpeg-command-generator-{quant.lower()}.gguf" print(f"Quantizing to {quant}...") subprocess.run([ str(llama_cpp_path / "llama-quantize"), str(f16_path), str(quant_path), quant ], check=True) quantized_files.append(quant_path) print(f"Created: {quant_path}") # Step 4: Upload to Hub print("\n[4/4] Uploading to Hugging Face Hub...") api = HfApi() # Create repo create_repo(OUTPUT_REPO, repo_type="model", exist_ok=True) # Create model card model_card = """--- license: apache-2.0 base_model: Qwen/Qwen2.5-0.5B-Instruct tags: - gguf - ffmpeg - command-generation - lm-studio - ollama --- # FFMPEG Command Generator (GGUF) A fine-tuned model that generates FFMPEG commands from natural language descriptions with chain-of-thought reasoning. ## Usage ### LM Studio ```bash lms import kingjux/ffmpeg-command-generator-gguf ``` ### Ollama ```bash ollama run hf.co/kingjux/ffmpeg-command-generator-gguf ``` ## Example **Input:** "Convert video.mp4 to webm format" **Output:** ``` Task: Convert MP4 to WebM - WebM container uses VP9 video codec and Opus audio - Use -c:v libvpx-vp9 for video encoding - Use -c:a libopus for audio encoding ffmpeg -i video.mp4 -c:v libvpx-vp9 -c:a libopus output.webm ``` ## Files - `ffmpeg-command-generator-q4_k_m.gguf` - 4-bit quantized (smallest, fastest) - `ffmpeg-command-generator-q8_0.gguf` - 8-bit quantized (better quality) ## Training Fine-tuned from Qwen2.5-0.5B-Instruct on 30 FFMPEG command examples with CoT reasoning. """ # Save and upload model card card_path = gguf_output_dir / "README.md" card_path.write_text(model_card) # Upload all files for file in [card_path] + quantized_files: print(f"Uploading {file.name}...") api.upload_file( path_or_fileobj=str(file), path_in_repo=file.name, repo_id=OUTPUT_REPO, repo_type="model" ) print("\n" + "=" * 50) print("DONE!") print(f"Model available at: https://huggingface.co/{OUTPUT_REPO}") print("\nTo use in LM Studio:") print(f" lms import {OUTPUT_REPO}") print("=" * 50)