File size: 6,259 Bytes
c676833 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 | #!/usr/bin/env python3
"""
Merge Codette LoRA Adapter with Base Model β v2
Merges HuggingFace PEFT adapter into base model using llama.cpp's export tool.
Run this AFTER training completes and the adapter is on HuggingFace.
Two paths:
A) HuggingFace format β merged safetensors (for further conversion)
B) GGUF base + GGUF LoRA β merged GGUF (if you have GGUF versions of both)
Usage:
$env:HF_TOKEN = "your_token"
python merge_lora_adapter.py
"""
import os
import sys
import subprocess
from pathlib import Path
# ββ Config βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
HF_TOKEN = os.environ.get("HF_TOKEN", "")
# Path A: Merge HuggingFace adapter (use this after training completes)
BASE_MODEL_HF = "meta-llama/Llama-3.2-1B-Instruct"
ADAPTER_REPO = "Raiff1982/codette-llama-adapter"
MERGED_HF_DIR = Path("J:/TheAI/models/codette-v2-merged")
# Path B: Merge GGUF LoRA into GGUF base (use if you have GGUF-format LoRA)
BASE_GGUF = Path("J:/TheAI/models/codette-v2-gguf/codette-v2.gguf")
LORA_GGUF = Path("J:/TheAI/models/codette-rc-xi-lora.bin")
OUTPUT_GGUF = Path("J:/TheAI/models/codette-v2-merged.gguf")
LLAMA_TOOL = Path("J:/TheAI/llama.cpp/build/bin/Release/llama-export-lora.exe")
# ββ Validate token ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
if not HF_TOKEN:
print("[!] HF_TOKEN not set. Run:")
print(' $env:HF_TOKEN = "your_token_here"')
sys.exit(1)
print("=" * 80)
print("MERGE CODETTE v2 LORA ADAPTER WITH BASE MODEL")
print("=" * 80)
print()
print("Select merge path:")
print(" A) HuggingFace format (PEFT adapter + HF base β merged safetensors)")
print(" B) GGUF format (GGUF base + GGUF LoRA β merged GGUF)")
print()
choice = input("Enter A or B [default: A]: ").strip().upper() or "A"
# ββ Path A: HuggingFace PEFT merge βββββββββββββββββββββββββββββββββββββββββ
if choice == "A":
print()
print("=" * 60)
print("PATH A: HuggingFace PEFT Merge")
print("=" * 60)
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel
MERGED_HF_DIR.mkdir(parents=True, exist_ok=True)
print(f"[*] Loading tokenizer: {BASE_MODEL_HF}")
tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL_HF, token=HF_TOKEN)
if tokenizer.pad_token is None:
tokenizer.pad_token = tokenizer.eos_token
print(f"[*] Loading base model: {BASE_MODEL_HF}")
base = AutoModelForCausalLM.from_pretrained(
BASE_MODEL_HF,
torch_dtype=torch.float16,
low_cpu_mem_usage=True,
token=HF_TOKEN,
)
print(f"[*] Loading LoRA adapter: {ADAPTER_REPO}")
model = PeftModel.from_pretrained(base, ADAPTER_REPO, token=HF_TOKEN)
print("[*] Merging and unloading LoRA weights...")
model = model.merge_and_unload()
model = model.to(torch.float16)
print(f"[*] Saving merged model to {MERGED_HF_DIR}")
model.save_pretrained(MERGED_HF_DIR, safe_serialization=True)
tokenizer.save_pretrained(MERGED_HF_DIR)
size_gb = sum(f.stat().st_size for f in MERGED_HF_DIR.rglob("*") if f.is_file()) / (1024**3)
print(f"[β] Merged model saved β {size_gb:.2f} GB")
print()
print("[*] Next step β convert to GGUF:")
print(f" python J:/TheAI/llama.cpp/convert_hf_to_gguf.py {MERGED_HF_DIR} --outfile J:/TheAI/models/codette-v2-gguf/codette-v2.gguf --outtype q8_0")
print()
print("[*] Or run make_codette_gguf.py which does all steps automatically.")
# ββ Path B: GGUF LoRA merge βββββββββββββββββββββββββββββββββββββββββββββββββ
elif choice == "B":
print()
print("=" * 60)
print("PATH B: GGUF LoRA Merge")
print("=" * 60)
print("[*] Checking required files...")
if not BASE_GGUF.exists():
print(f"[!] Base GGUF not found: {BASE_GGUF}")
print("[!] Run make_codette_gguf.py first to create the base GGUF.")
sys.exit(1)
print(f"[β] Base GGUF: {BASE_GGUF.stat().st_size / (1024**3):.2f} GB")
if not LORA_GGUF.exists():
print(f"[!] LoRA GGUF not found: {LORA_GGUF}")
print("[!] Note: HuggingFace PEFT adapters are not GGUF format.")
print("[!] Use Path A to merge the HuggingFace adapter, then convert the result.")
sys.exit(1)
print(f"[β] LoRA GGUF: {LORA_GGUF.stat().st_size / (1024**2):.2f} MB")
if not LLAMA_TOOL.exists():
print(f"[!] Merge tool not found: {LLAMA_TOOL}")
print("[!] Build llama.cpp first:")
print(" cd J:/TheAI/llama.cpp")
print(" cmake -B build && cmake --build build --config Release")
sys.exit(1)
print(f"[β] Merge tool found")
OUTPUT_GGUF.parent.mkdir(parents=True, exist_ok=True)
print()
print(f"[*] Merging {BASE_GGUF.name} + {LORA_GGUF.name}")
print(f"[*] Output: {OUTPUT_GGUF}")
print()
cmd = [
str(LLAMA_TOOL),
"--model", str(BASE_GGUF),
"--lora", str(LORA_GGUF),
"--output", str(OUTPUT_GGUF),
]
result = subprocess.run(cmd, cwd="J:/TheAI")
if result.returncode == 0 and OUTPUT_GGUF.exists():
size_gb = OUTPUT_GGUF.stat().st_size / (1024**3)
print(f"[β] Merge complete: {OUTPUT_GGUF} ({size_gb:.2f} GB)")
print()
print("[*] Create Ollama model:")
print(f" ollama create codette-v2 -f J:/TheAI/models/codette-v2-gguf/Modelfile")
print()
print("[*] Or load directly in llama.cpp:")
print(f" llama-cli.exe -m {OUTPUT_GGUF} -p 'Your prompt here'")
else:
print("[!] Merge failed or output not created.")
print()
print("[*] Alternative β load LoRA separately at inference time:")
print(f" llama-cli.exe -m {BASE_GGUF} --lora {LORA_GGUF}")
else:
print(f"[!] Unknown choice: {choice}")
sys.exit(1)
|