AmiDwivedi commited on
Commit
e32ff7f
·
verified ·
1 Parent(s): 5874129

Upload convert_to_gguf.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. convert_to_gguf.py +199 -0
convert_to_gguf.py ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # /// script
2
+ # dependencies = ["torch", "transformers", "peft", "accelerate", "llama-cpp-python", "huggingface_hub", "gguf"]
3
+ # ///
4
+
5
+ """
6
+ Convert Underwood LoRA adapter to GGUF format
7
+ Merges adapter with base model and converts to quantized GGUF
8
+ """
9
+
10
+ import os
11
+ import torch
12
+ from pathlib import Path
13
+ from transformers import AutoModelForCausalLM, AutoTokenizer
14
+ from peft import PeftModel
15
+ from huggingface_hub import HfApi, upload_file, create_repo
16
+ import subprocess
17
+ import shutil
18
+
19
+ # Configuration
20
+ ADAPTER_REPO = "AmiDwivedi/underwood-lr1e4"
21
+ BASE_MODEL = "google/gemma-3-4b-it"
22
+ OUTPUT_REPO = "AmiDwivedi/underwood-gguf"
23
+ QUANTIZATION = "Q4_K_M" # Good balance of size and quality
24
+
25
+ def main():
26
+ print("=" * 60)
27
+ print("Underwood GGUF Conversion")
28
+ print("=" * 60)
29
+ print(f"Adapter: {ADAPTER_REPO}")
30
+ print(f"Base model: {BASE_MODEL}")
31
+ print(f"Output: {OUTPUT_REPO}")
32
+ print(f"Quantization: {QUANTIZATION}")
33
+
34
+ # Step 1: Load and merge model
35
+ print("\n[1/4] Loading base model...")
36
+ model = AutoModelForCausalLM.from_pretrained(
37
+ BASE_MODEL,
38
+ torch_dtype=torch.float16,
39
+ device_map="auto",
40
+ trust_remote_code=True,
41
+ )
42
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
43
+
44
+ print("\n[2/4] Loading and merging LoRA adapter...")
45
+ model = PeftModel.from_pretrained(model, ADAPTER_REPO)
46
+ model = model.merge_and_unload()
47
+
48
+ # Save merged model
49
+ merged_path = Path("/tmp/underwood-merged")
50
+ merged_path.mkdir(parents=True, exist_ok=True)
51
+
52
+ print(f"Saving merged model to {merged_path}...")
53
+ model.save_pretrained(merged_path, safe_serialization=True)
54
+ tokenizer.save_pretrained(merged_path)
55
+
56
+ # Step 2: Convert to GGUF using llama.cpp
57
+ print("\n[3/4] Converting to GGUF format...")
58
+
59
+ # Clone llama.cpp if needed
60
+ llama_cpp_path = Path("/tmp/llama.cpp")
61
+ if not llama_cpp_path.exists():
62
+ print("Cloning llama.cpp...")
63
+ subprocess.run([
64
+ "git", "clone", "--depth", "1",
65
+ "https://github.com/ggerganov/llama.cpp.git",
66
+ str(llama_cpp_path)
67
+ ], check=True)
68
+
69
+ # Install conversion dependencies
70
+ subprocess.run([
71
+ "pip", "install", "-q", "-r",
72
+ str(llama_cpp_path / "requirements.txt")
73
+ ], check=True)
74
+
75
+ # Convert to GGUF
76
+ gguf_path = Path("/tmp/underwood.gguf")
77
+
78
+ print("Running conversion...")
79
+ result = subprocess.run([
80
+ "python", str(llama_cpp_path / "convert_hf_to_gguf.py"),
81
+ str(merged_path),
82
+ "--outfile", str(gguf_path),
83
+ "--outtype", "f16",
84
+ ], capture_output=True, text=True)
85
+
86
+ if result.returncode != 0:
87
+ print(f"Conversion error: {result.stderr}")
88
+ # Try alternative conversion
89
+ print("Trying alternative conversion method...")
90
+ result = subprocess.run([
91
+ "python", str(llama_cpp_path / "convert_hf_to_gguf.py"),
92
+ str(merged_path),
93
+ "--outfile", str(gguf_path),
94
+ ], capture_output=True, text=True)
95
+
96
+ print(result.stdout)
97
+
98
+ # Quantize
99
+ quantized_path = Path(f"/tmp/underwood-{QUANTIZATION}.gguf")
100
+
101
+ # Build quantize tool if needed
102
+ print(f"Quantizing to {QUANTIZATION}...")
103
+
104
+ # Use llama-quantize if available, otherwise upload f16
105
+ quantize_bin = llama_cpp_path / "build" / "bin" / "llama-quantize"
106
+ if not quantize_bin.exists():
107
+ # Try to build
108
+ build_dir = llama_cpp_path / "build"
109
+ build_dir.mkdir(exist_ok=True)
110
+ subprocess.run(["cmake", ".."], cwd=build_dir)
111
+ subprocess.run(["cmake", "--build", ".", "--target", "llama-quantize", "-j"], cwd=build_dir)
112
+
113
+ if quantize_bin.exists():
114
+ subprocess.run([
115
+ str(quantize_bin),
116
+ str(gguf_path),
117
+ str(quantized_path),
118
+ QUANTIZATION
119
+ ], check=True)
120
+ final_gguf = quantized_path
121
+ else:
122
+ print("Quantize tool not available, uploading f16 version")
123
+ final_gguf = gguf_path
124
+
125
+ # Step 3: Upload to HuggingFace
126
+ print("\n[4/4] Uploading to HuggingFace Hub...")
127
+
128
+ api = HfApi()
129
+
130
+ # Create repo
131
+ try:
132
+ create_repo(OUTPUT_REPO, repo_type="model", exist_ok=True)
133
+ except Exception as e:
134
+ print(f"Repo creation: {e}")
135
+
136
+ # Upload GGUF file
137
+ print(f"Uploading {final_gguf.name}...")
138
+ upload_file(
139
+ path_or_fileobj=str(final_gguf),
140
+ path_in_repo=final_gguf.name,
141
+ repo_id=OUTPUT_REPO,
142
+ repo_type="model"
143
+ )
144
+
145
+ # Create README
146
+ readme = f"""# Underwood Strategic Advisor - GGUF
147
+
148
+ Fine-tuned Gemma 3 4B model for Frank Underwood-style strategic advice.
149
+
150
+ ## Model Details
151
+ - **Base model:** google/gemma-3-4b-it
152
+ - **Training:** QLoRA fine-tuning on synthetic conversations
153
+ - **Quantization:** {QUANTIZATION}
154
+
155
+ ## Usage with Ollama
156
+
157
+ ```bash
158
+ # Download the GGUF file
159
+ wget https://huggingface.co/{OUTPUT_REPO}/resolve/main/{final_gguf.name}
160
+
161
+ # Create Modelfile
162
+ cat > Modelfile << 'EOF'
163
+ FROM ./{final_gguf.name}
164
+
165
+ SYSTEM "You are a strategic advisor who speaks like Frank Underwood — direct, theatrical, calculating, with a taste for dark humor. You weave in wisdom from Sun Tzu and Machiavelli when relevant. Before giving advice, you ask clarifying questions to understand the power dynamics, key players, and stakes involved."
166
+
167
+ PARAMETER temperature 0.7
168
+ PARAMETER top_p 0.9
169
+ EOF
170
+
171
+ # Create and run
172
+ ollama create underwood -f Modelfile
173
+ ollama run underwood
174
+ ```
175
+
176
+ ## Training Data
177
+ - House of Cards transcripts
178
+ - Sun Tzu's Art of War
179
+ - Machiavelli's works (The Prince, Art of War, Discourses)
180
+ """
181
+
182
+ readme_path = Path("/tmp/README.md")
183
+ readme_path.write_text(readme)
184
+ upload_file(
185
+ path_or_fileobj=str(readme_path),
186
+ path_in_repo="README.md",
187
+ repo_id=OUTPUT_REPO,
188
+ repo_type="model"
189
+ )
190
+
191
+ print("\n" + "=" * 60)
192
+ print("CONVERSION COMPLETE!")
193
+ print("=" * 60)
194
+ print(f"\nGGUF uploaded to: https://huggingface.co/{OUTPUT_REPO}")
195
+ print(f"File: {final_gguf.name}")
196
+ print(f"Size: {final_gguf.stat().st_size / (1024**3):.2f} GB")
197
+
198
+ if __name__ == "__main__":
199
+ main()