nathens
/

training-scripts

Model card Files Files and versions

xet

Community

nathens commited on Dec 18, 2025

Commit

811a1b2

verified ·

1 Parent(s): 59c9a78

Upload convert_to_gguf_simple.py with huggingface_hub

Browse files

Files changed (1) hide show

convert_to_gguf_simple.py +126 -0

convert_to_gguf_simple.py ADDED Viewed

	@@ -0,0 +1,126 @@

+# /// script
+# dependencies = ["transformers", "peft", "huggingface_hub", "torch"]
+# ///
+"""
+Convert fine-tuned LoRA model to GGUF format with Q4_K_M quantization.
+"""
+import os
+import subprocess
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from peft import PeftModel
+import torch
+# Hardcoded configuration
+ADAPTER_MODEL = "nathens/qwen-codeforces-sft"
+BASE_MODEL = "Qwen/Qwen2.5-0.5B"
+OUTPUT_REPO = "nathens/my-model-gguf"
+QUANTIZATION = "Q4_K_M"
+print(f"🔧 Converting model to GGUF")
+print(f"  Base model: {BASE_MODEL}")
+print(f"  Adapter: {ADAPTER_MODEL}")
+print(f"  Output: {OUTPUT_REPO}")
+print(f"  Quantization: {QUANTIZATION}")
+# Step 1: Load base model and tokenizer
+print("\n📦 Loading base model and tokenizer...")
+base_model = AutoModelForCausalLM.from_pretrained(
+    BASE_MODEL,
+    dtype=torch.float16,
+    device_map="auto",
+    trust_remote_code=True
+)
+tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
+# Step 2: Load and merge LoRA adapter
+print(f"🔀 Loading and merging LoRA adapter from {ADAPTER_MODEL}...")
+model = PeftModel.from_pretrained(base_model, ADAPTER_MODEL)
+print("⚙️  Merging adapter weights into base model...")
+merged_model = model.merge_and_unload()
+# Step 3: Save merged model
+print("💾 Saving merged model...")
+merged_dir = "./merged_model"
+merged_model.save_pretrained(merged_dir)
+tokenizer.save_pretrained(merged_dir)
+print(f"✅ Merged model saved to {merged_dir}")
+# Step 4: Install llama.cpp for conversion
+print("\n📥 Installing llama.cpp for GGUF conversion...")
+subprocess.run(["apt-get", "update", "-qq"], check=True)
+subprocess.run(["apt-get", "install", "-y", "-qq", "git", "build-essential", "cmake"], check=True)
+subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git"], check=True)
+# Build llama.cpp with CMake
+nproc_result = subprocess.run(["nproc"], capture_output=True, text=True, check=True)
+nproc = nproc_result.stdout.strip()
+print(f"Building llama.cpp with {nproc} cores using CMake...")
+os.makedirs("llama.cpp/build", exist_ok=True)
+subprocess.run(["cmake", "-B", "llama.cpp/build", "-S", "llama.cpp"], check=True)
+subprocess.run(["cmake", "--build", "llama.cpp/build", "--config", "Release", "-j", nproc], check=True)
+# Step 5: Convert to GGUF format
+print("\n🔄 Converting to GGUF format...")
+subprocess.run([
+    "python3", "llama.cpp/convert_hf_to_gguf.py",
+    merged_dir,
+    "--outfile", "./model-f16.gguf",
+    "--outtype", "f16"
+], check=True)
+print("✅ Converted to FP16 GGUF")
+# Step 6: Quantize to Q4_K_M
+print(f"\n⚡ Quantizing to {QUANTIZATION}...")
+subprocess.run([
+    "./llama.cpp/build/bin/llama-quantize",
+    "./model-f16.gguf",
+    f"./model-{QUANTIZATION}.gguf",
+    QUANTIZATION
+], check=True)
+print(f"✅ Quantized to {QUANTIZATION}")
+# Step 7: Upload to Hub
+print(f"\n📤 Uploading to {OUTPUT_REPO}...")
+from huggingface_hub import HfApi
+api = HfApi()
+# Create repo
+try:
+    api.create_repo(OUTPUT_REPO, repo_type="model", exist_ok=True)
+except Exception as e:
+    print(f"Note: {e}")
+# Upload GGUF files
+api.upload_file(
+    path_or_fileobj=f"./model-{QUANTIZATION}.gguf",
+    path_in_repo=f"model-{QUANTIZATION}.gguf",
+    repo_id=OUTPUT_REPO,
+    repo_type="model"
+)
+api.upload_file(
+    path_or_fileobj="./model-f16.gguf",
+    path_in_repo="model-f16.gguf",
+    repo_id=OUTPUT_REPO,
+    repo_type="model"
+)
+# Upload tokenizer files
+for file in ["tokenizer.json", "tokenizer_config.json"]:
+    try:
+        api.upload_file(
+            path_or_fileobj=f"{merged_dir}/{file}",
+            path_in_repo=file,
+            repo_id=OUTPUT_REPO,
+            repo_type="model"
+        )
+    except Exception:
+        pass
+print(f"\n✅ Conversion complete!")
+print(f"📁 GGUF model available at: https://huggingface.co/{OUTPUT_REPO}")
+print(f"\n💡 To use with Ollama:")
+print(f"   huggingface-cli download {OUTPUT_REPO} model-{QUANTIZATION}.gguf")