nathens commited on
Commit
811a1b2
·
verified ·
1 Parent(s): 59c9a78

Upload convert_to_gguf_simple.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. convert_to_gguf_simple.py +126 -0
convert_to_gguf_simple.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # /// script
2
+ # dependencies = ["transformers", "peft", "huggingface_hub", "torch"]
3
+ # ///
4
+
5
+ """
6
+ Convert fine-tuned LoRA model to GGUF format with Q4_K_M quantization.
7
+ """
8
+
9
+ import os
10
+ import subprocess
11
+ from transformers import AutoModelForCausalLM, AutoTokenizer
12
+ from peft import PeftModel
13
+ import torch
14
+
15
+ # Hardcoded configuration
16
+ ADAPTER_MODEL = "nathens/qwen-codeforces-sft"
17
+ BASE_MODEL = "Qwen/Qwen2.5-0.5B"
18
+ OUTPUT_REPO = "nathens/my-model-gguf"
19
+ QUANTIZATION = "Q4_K_M"
20
+
21
+ print(f"🔧 Converting model to GGUF")
22
+ print(f" Base model: {BASE_MODEL}")
23
+ print(f" Adapter: {ADAPTER_MODEL}")
24
+ print(f" Output: {OUTPUT_REPO}")
25
+ print(f" Quantization: {QUANTIZATION}")
26
+
27
+ # Step 1: Load base model and tokenizer
28
+ print("\n📦 Loading base model and tokenizer...")
29
+ base_model = AutoModelForCausalLM.from_pretrained(
30
+ BASE_MODEL,
31
+ dtype=torch.float16,
32
+ device_map="auto",
33
+ trust_remote_code=True
34
+ )
35
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL, trust_remote_code=True)
36
+
37
+ # Step 2: Load and merge LoRA adapter
38
+ print(f"🔀 Loading and merging LoRA adapter from {ADAPTER_MODEL}...")
39
+ model = PeftModel.from_pretrained(base_model, ADAPTER_MODEL)
40
+ print("⚙️ Merging adapter weights into base model...")
41
+ merged_model = model.merge_and_unload()
42
+
43
+ # Step 3: Save merged model
44
+ print("💾 Saving merged model...")
45
+ merged_dir = "./merged_model"
46
+ merged_model.save_pretrained(merged_dir)
47
+ tokenizer.save_pretrained(merged_dir)
48
+ print(f"✅ Merged model saved to {merged_dir}")
49
+
50
+ # Step 4: Install llama.cpp for conversion
51
+ print("\n📥 Installing llama.cpp for GGUF conversion...")
52
+ subprocess.run(["apt-get", "update", "-qq"], check=True)
53
+ subprocess.run(["apt-get", "install", "-y", "-qq", "git", "build-essential", "cmake"], check=True)
54
+ subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git"], check=True)
55
+
56
+ # Build llama.cpp with CMake
57
+ nproc_result = subprocess.run(["nproc"], capture_output=True, text=True, check=True)
58
+ nproc = nproc_result.stdout.strip()
59
+ print(f"Building llama.cpp with {nproc} cores using CMake...")
60
+
61
+ os.makedirs("llama.cpp/build", exist_ok=True)
62
+ subprocess.run(["cmake", "-B", "llama.cpp/build", "-S", "llama.cpp"], check=True)
63
+ subprocess.run(["cmake", "--build", "llama.cpp/build", "--config", "Release", "-j", nproc], check=True)
64
+
65
+ # Step 5: Convert to GGUF format
66
+ print("\n🔄 Converting to GGUF format...")
67
+ subprocess.run([
68
+ "python3", "llama.cpp/convert_hf_to_gguf.py",
69
+ merged_dir,
70
+ "--outfile", "./model-f16.gguf",
71
+ "--outtype", "f16"
72
+ ], check=True)
73
+ print("✅ Converted to FP16 GGUF")
74
+
75
+ # Step 6: Quantize to Q4_K_M
76
+ print(f"\n⚡ Quantizing to {QUANTIZATION}...")
77
+ subprocess.run([
78
+ "./llama.cpp/build/bin/llama-quantize",
79
+ "./model-f16.gguf",
80
+ f"./model-{QUANTIZATION}.gguf",
81
+ QUANTIZATION
82
+ ], check=True)
83
+ print(f"✅ Quantized to {QUANTIZATION}")
84
+
85
+ # Step 7: Upload to Hub
86
+ print(f"\n📤 Uploading to {OUTPUT_REPO}...")
87
+ from huggingface_hub import HfApi
88
+ api = HfApi()
89
+
90
+ # Create repo
91
+ try:
92
+ api.create_repo(OUTPUT_REPO, repo_type="model", exist_ok=True)
93
+ except Exception as e:
94
+ print(f"Note: {e}")
95
+
96
+ # Upload GGUF files
97
+ api.upload_file(
98
+ path_or_fileobj=f"./model-{QUANTIZATION}.gguf",
99
+ path_in_repo=f"model-{QUANTIZATION}.gguf",
100
+ repo_id=OUTPUT_REPO,
101
+ repo_type="model"
102
+ )
103
+
104
+ api.upload_file(
105
+ path_or_fileobj="./model-f16.gguf",
106
+ path_in_repo="model-f16.gguf",
107
+ repo_id=OUTPUT_REPO,
108
+ repo_type="model"
109
+ )
110
+
111
+ # Upload tokenizer files
112
+ for file in ["tokenizer.json", "tokenizer_config.json"]:
113
+ try:
114
+ api.upload_file(
115
+ path_or_fileobj=f"{merged_dir}/{file}",
116
+ path_in_repo=file,
117
+ repo_id=OUTPUT_REPO,
118
+ repo_type="model"
119
+ )
120
+ except Exception:
121
+ pass
122
+
123
+ print(f"\n✅ Conversion complete!")
124
+ print(f"📁 GGUF model available at: https://huggingface.co/{OUTPUT_REPO}")
125
+ print(f"\n💡 To use with Ollama:")
126
+ print(f" huggingface-cli download {OUTPUT_REPO} model-{QUANTIZATION}.gguf")