nathens
/

training-scripts

Model card Files Files and versions

xet

Community

nathens commited on Dec 18, 2025

Commit

1fbdc40

verified ·

1 Parent(s): 8b28065

Upload convert_to_gguf.py with huggingface_hub

Browse files

Files changed (1) hide show

convert_to_gguf.py +123 -0

convert_to_gguf.py ADDED Viewed

	@@ -0,0 +1,123 @@

+# /// script
+# dependencies = ["transformers", "peft", "huggingface_hub", "torch"]
+# ///
+"""
+Convert fine-tuned LoRA model to GGUF format with quantization.
+Merges adapter with base model, converts to GGUF, and uploads to Hub.
+"""
+import os
+import subprocess
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from peft import PeftModel
+import torch
+# Configuration from environment variables or defaults
+ADAPTER_MODEL = os.getenv("ADAPTER_MODEL", "nathens/qwen-codeforces-sft")
+BASE_MODEL = os.getenv("BASE_MODEL", "Qwen/Qwen2.5-0.5B")
+OUTPUT_REPO = os.getenv("OUTPUT_REPO", "nathens/my-model-gguf")
+QUANTIZATION = os.getenv("QUANTIZATION", "Q4_K_M")
+print(f"🔧 Converting model to GGUF")
+print(f"  Base model: {BASE_MODEL}")
+print(f"  Adapter: {ADAPTER_MODEL}")
+print(f"  Output: {OUTPUT_REPO}")
+print(f"  Quantization: {QUANTIZATION}")
+# Step 1: Load base model and tokenizer
+print("\n📦 Loading base model and tokenizer...")
+base_model = AutoModelForCausalLM.from_pretrained(
+    BASE_MODEL,
+    torch_dtype=torch.float16,
+    device_map="auto",
+    trust_remote_code=True
+)
+tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
+# Step 2: Load and merge LoRA adapter
+print(f"🔀 Loading and merging LoRA adapter from {ADAPTER_MODEL}...")
+model = PeftModel.from_pretrained(base_model, ADAPTER_MODEL)
+print("⚙️  Merging adapter weights into base model...")
+merged_model = model.merge_and_unload()
+# Step 3: Save merged model
+print("💾 Saving merged model...")
+merged_dir = "./merged_model"
+merged_model.save_pretrained(merged_dir)
+tokenizer.save_pretrained(merged_dir)
+print(f"✅ Merged model saved to {merged_dir}")
+# Step 4: Install llama.cpp for conversion
+print("\n📥 Installing llama.cpp for GGUF conversion...")
+subprocess.run(["apt-get", "update", "-qq"], check=True)
+subprocess.run(["apt-get", "install", "-y", "-qq", "git", "build-essential"], check=True)
+subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git"], check=True)
+subprocess.run(["make", "-C", "llama.cpp", "-j", "$(nproc)"], check=True, shell=True)
+# Step 5: Convert to GGUF format
+print("\n🔄 Converting to GGUF format...")
+subprocess.run([
+    "python3", "llama.cpp/convert_hf_to_gguf.py",
+    merged_dir,
+    "--outfile", f"./model-f16.gguf",
+    "--outtype", "f16"
+], check=True)
+print("✅ Converted to FP16 GGUF")
+# Step 6: Quantize to specified format
+print(f"\n⚡ Quantizing to {QUANTIZATION}...")
+subprocess.run([
+    "./llama.cpp/llama-quantize",
+    "./model-f16.gguf",
+    f"./model-{QUANTIZATION}.gguf",
+    QUANTIZATION
+], check=True)
+print(f"✅ Quantized to {QUANTIZATION}")
+# Step 7: Upload to Hub
+print(f"\n📤 Uploading to {OUTPUT_REPO}...")
+from huggingface_hub import HfApi
+api = HfApi()
+# Create repo if it doesn't exist
+try:
+    api.create_repo(OUTPUT_REPO, repo_type="model", exist_ok=True)
+except Exception as e:
+    print(f"Note: {e}")
+# Upload the quantized GGUF file
+api.upload_file(
+    path_or_fileobj=f"./model-{QUANTIZATION}.gguf",
+    path_in_repo=f"model-{QUANTIZATION}.gguf",
+    repo_id=OUTPUT_REPO,
+    repo_type="model"
+)
+# Also upload the original FP16 version
+api.upload_file(
+    path_or_fileobj="./model-f16.gguf",
+    path_in_repo="model-f16.gguf",
+    repo_id=OUTPUT_REPO,
+    repo_type="model"
+)
+# Upload tokenizer files
+for file in ["tokenizer.json", "tokenizer_config.json", "vocab.json", "merges.txt", "special_tokens_map.json"]:
+    try:
+        api.upload_file(
+            path_or_fileobj=f"{merged_dir}/{file}",
+            path_in_repo=file,
+            repo_id=OUTPUT_REPO,
+            repo_type="model"
+        )
+    except Exception:
+        pass  # Some files may not exist
+print(f"\n✅ Conversion complete!")
+print(f"📁 GGUF model available at: https://huggingface.co/{OUTPUT_REPO}")
+print(f"\n💡 To use with Ollama:")
+print(f"   1. Download: huggingface-cli download {OUTPUT_REPO} model-{QUANTIZATION}.gguf")
+print(f"   2. Create Modelfile with the downloaded GGUF")
+print(f"   3. Run: ollama create my-model -f Modelfile")
+print(f"   4. Use: ollama run my-model")