Upload convert_to_gguf.py with huggingface_hub
Browse files- convert_to_gguf.py +199 -0
convert_to_gguf.py
ADDED
|
@@ -0,0 +1,199 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# /// script
|
| 2 |
+
# dependencies = ["torch", "transformers", "peft", "accelerate", "llama-cpp-python", "huggingface_hub", "gguf"]
|
| 3 |
+
# ///
|
| 4 |
+
|
| 5 |
+
"""
|
| 6 |
+
Convert Underwood LoRA adapter to GGUF format
|
| 7 |
+
Merges adapter with base model and converts to quantized GGUF
|
| 8 |
+
"""
|
| 9 |
+
|
| 10 |
+
import os
|
| 11 |
+
import torch
|
| 12 |
+
from pathlib import Path
|
| 13 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 14 |
+
from peft import PeftModel
|
| 15 |
+
from huggingface_hub import HfApi, upload_file, create_repo
|
| 16 |
+
import subprocess
|
| 17 |
+
import shutil
|
| 18 |
+
|
| 19 |
+
# Configuration
|
| 20 |
+
ADAPTER_REPO = "AmiDwivedi/underwood-lr1e4"
|
| 21 |
+
BASE_MODEL = "google/gemma-3-4b-it"
|
| 22 |
+
OUTPUT_REPO = "AmiDwivedi/underwood-gguf"
|
| 23 |
+
QUANTIZATION = "Q4_K_M" # Good balance of size and quality
|
| 24 |
+
|
| 25 |
+
def main():
|
| 26 |
+
print("=" * 60)
|
| 27 |
+
print("Underwood GGUF Conversion")
|
| 28 |
+
print("=" * 60)
|
| 29 |
+
print(f"Adapter: {ADAPTER_REPO}")
|
| 30 |
+
print(f"Base model: {BASE_MODEL}")
|
| 31 |
+
print(f"Output: {OUTPUT_REPO}")
|
| 32 |
+
print(f"Quantization: {QUANTIZATION}")
|
| 33 |
+
|
| 34 |
+
# Step 1: Load and merge model
|
| 35 |
+
print("\n[1/4] Loading base model...")
|
| 36 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 37 |
+
BASE_MODEL,
|
| 38 |
+
torch_dtype=torch.float16,
|
| 39 |
+
device_map="auto",
|
| 40 |
+
trust_remote_code=True,
|
| 41 |
+
)
|
| 42 |
+
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
|
| 43 |
+
|
| 44 |
+
print("\n[2/4] Loading and merging LoRA adapter...")
|
| 45 |
+
model = PeftModel.from_pretrained(model, ADAPTER_REPO)
|
| 46 |
+
model = model.merge_and_unload()
|
| 47 |
+
|
| 48 |
+
# Save merged model
|
| 49 |
+
merged_path = Path("/tmp/underwood-merged")
|
| 50 |
+
merged_path.mkdir(parents=True, exist_ok=True)
|
| 51 |
+
|
| 52 |
+
print(f"Saving merged model to {merged_path}...")
|
| 53 |
+
model.save_pretrained(merged_path, safe_serialization=True)
|
| 54 |
+
tokenizer.save_pretrained(merged_path)
|
| 55 |
+
|
| 56 |
+
# Step 2: Convert to GGUF using llama.cpp
|
| 57 |
+
print("\n[3/4] Converting to GGUF format...")
|
| 58 |
+
|
| 59 |
+
# Clone llama.cpp if needed
|
| 60 |
+
llama_cpp_path = Path("/tmp/llama.cpp")
|
| 61 |
+
if not llama_cpp_path.exists():
|
| 62 |
+
print("Cloning llama.cpp...")
|
| 63 |
+
subprocess.run([
|
| 64 |
+
"git", "clone", "--depth", "1",
|
| 65 |
+
"https://github.com/ggerganov/llama.cpp.git",
|
| 66 |
+
str(llama_cpp_path)
|
| 67 |
+
], check=True)
|
| 68 |
+
|
| 69 |
+
# Install conversion dependencies
|
| 70 |
+
subprocess.run([
|
| 71 |
+
"pip", "install", "-q", "-r",
|
| 72 |
+
str(llama_cpp_path / "requirements.txt")
|
| 73 |
+
], check=True)
|
| 74 |
+
|
| 75 |
+
# Convert to GGUF
|
| 76 |
+
gguf_path = Path("/tmp/underwood.gguf")
|
| 77 |
+
|
| 78 |
+
print("Running conversion...")
|
| 79 |
+
result = subprocess.run([
|
| 80 |
+
"python", str(llama_cpp_path / "convert_hf_to_gguf.py"),
|
| 81 |
+
str(merged_path),
|
| 82 |
+
"--outfile", str(gguf_path),
|
| 83 |
+
"--outtype", "f16",
|
| 84 |
+
], capture_output=True, text=True)
|
| 85 |
+
|
| 86 |
+
if result.returncode != 0:
|
| 87 |
+
print(f"Conversion error: {result.stderr}")
|
| 88 |
+
# Try alternative conversion
|
| 89 |
+
print("Trying alternative conversion method...")
|
| 90 |
+
result = subprocess.run([
|
| 91 |
+
"python", str(llama_cpp_path / "convert_hf_to_gguf.py"),
|
| 92 |
+
str(merged_path),
|
| 93 |
+
"--outfile", str(gguf_path),
|
| 94 |
+
], capture_output=True, text=True)
|
| 95 |
+
|
| 96 |
+
print(result.stdout)
|
| 97 |
+
|
| 98 |
+
# Quantize
|
| 99 |
+
quantized_path = Path(f"/tmp/underwood-{QUANTIZATION}.gguf")
|
| 100 |
+
|
| 101 |
+
# Build quantize tool if needed
|
| 102 |
+
print(f"Quantizing to {QUANTIZATION}...")
|
| 103 |
+
|
| 104 |
+
# Use llama-quantize if available, otherwise upload f16
|
| 105 |
+
quantize_bin = llama_cpp_path / "build" / "bin" / "llama-quantize"
|
| 106 |
+
if not quantize_bin.exists():
|
| 107 |
+
# Try to build
|
| 108 |
+
build_dir = llama_cpp_path / "build"
|
| 109 |
+
build_dir.mkdir(exist_ok=True)
|
| 110 |
+
subprocess.run(["cmake", ".."], cwd=build_dir)
|
| 111 |
+
subprocess.run(["cmake", "--build", ".", "--target", "llama-quantize", "-j"], cwd=build_dir)
|
| 112 |
+
|
| 113 |
+
if quantize_bin.exists():
|
| 114 |
+
subprocess.run([
|
| 115 |
+
str(quantize_bin),
|
| 116 |
+
str(gguf_path),
|
| 117 |
+
str(quantized_path),
|
| 118 |
+
QUANTIZATION
|
| 119 |
+
], check=True)
|
| 120 |
+
final_gguf = quantized_path
|
| 121 |
+
else:
|
| 122 |
+
print("Quantize tool not available, uploading f16 version")
|
| 123 |
+
final_gguf = gguf_path
|
| 124 |
+
|
| 125 |
+
# Step 3: Upload to HuggingFace
|
| 126 |
+
print("\n[4/4] Uploading to HuggingFace Hub...")
|
| 127 |
+
|
| 128 |
+
api = HfApi()
|
| 129 |
+
|
| 130 |
+
# Create repo
|
| 131 |
+
try:
|
| 132 |
+
create_repo(OUTPUT_REPO, repo_type="model", exist_ok=True)
|
| 133 |
+
except Exception as e:
|
| 134 |
+
print(f"Repo creation: {e}")
|
| 135 |
+
|
| 136 |
+
# Upload GGUF file
|
| 137 |
+
print(f"Uploading {final_gguf.name}...")
|
| 138 |
+
upload_file(
|
| 139 |
+
path_or_fileobj=str(final_gguf),
|
| 140 |
+
path_in_repo=final_gguf.name,
|
| 141 |
+
repo_id=OUTPUT_REPO,
|
| 142 |
+
repo_type="model"
|
| 143 |
+
)
|
| 144 |
+
|
| 145 |
+
# Create README
|
| 146 |
+
readme = f"""# Underwood Strategic Advisor - GGUF
|
| 147 |
+
|
| 148 |
+
Fine-tuned Gemma 3 4B model for Frank Underwood-style strategic advice.
|
| 149 |
+
|
| 150 |
+
## Model Details
|
| 151 |
+
- **Base model:** google/gemma-3-4b-it
|
| 152 |
+
- **Training:** QLoRA fine-tuning on synthetic conversations
|
| 153 |
+
- **Quantization:** {QUANTIZATION}
|
| 154 |
+
|
| 155 |
+
## Usage with Ollama
|
| 156 |
+
|
| 157 |
+
```bash
|
| 158 |
+
# Download the GGUF file
|
| 159 |
+
wget https://huggingface.co/{OUTPUT_REPO}/resolve/main/{final_gguf.name}
|
| 160 |
+
|
| 161 |
+
# Create Modelfile
|
| 162 |
+
cat > Modelfile << 'EOF'
|
| 163 |
+
FROM ./{final_gguf.name}
|
| 164 |
+
|
| 165 |
+
SYSTEM "You are a strategic advisor who speaks like Frank Underwood — direct, theatrical, calculating, with a taste for dark humor. You weave in wisdom from Sun Tzu and Machiavelli when relevant. Before giving advice, you ask clarifying questions to understand the power dynamics, key players, and stakes involved."
|
| 166 |
+
|
| 167 |
+
PARAMETER temperature 0.7
|
| 168 |
+
PARAMETER top_p 0.9
|
| 169 |
+
EOF
|
| 170 |
+
|
| 171 |
+
# Create and run
|
| 172 |
+
ollama create underwood -f Modelfile
|
| 173 |
+
ollama run underwood
|
| 174 |
+
```
|
| 175 |
+
|
| 176 |
+
## Training Data
|
| 177 |
+
- House of Cards transcripts
|
| 178 |
+
- Sun Tzu's Art of War
|
| 179 |
+
- Machiavelli's works (The Prince, Art of War, Discourses)
|
| 180 |
+
"""
|
| 181 |
+
|
| 182 |
+
readme_path = Path("/tmp/README.md")
|
| 183 |
+
readme_path.write_text(readme)
|
| 184 |
+
upload_file(
|
| 185 |
+
path_or_fileobj=str(readme_path),
|
| 186 |
+
path_in_repo="README.md",
|
| 187 |
+
repo_id=OUTPUT_REPO,
|
| 188 |
+
repo_type="model"
|
| 189 |
+
)
|
| 190 |
+
|
| 191 |
+
print("\n" + "=" * 60)
|
| 192 |
+
print("CONVERSION COMPLETE!")
|
| 193 |
+
print("=" * 60)
|
| 194 |
+
print(f"\nGGUF uploaded to: https://huggingface.co/{OUTPUT_REPO}")
|
| 195 |
+
print(f"File: {final_gguf.name}")
|
| 196 |
+
print(f"Size: {final_gguf.stat().st_size / (1024**3):.2f} GB")
|
| 197 |
+
|
| 198 |
+
if __name__ == "__main__":
|
| 199 |
+
main()
|