| |
| """Convert PEFT LoRA safetensors to llama.cpp GGUF LoRA format. |
| |
| Lightweight converter — no torch/transformers dependency. |
| Only needs: safetensors, gguf, numpy, struct. |
| |
| Matches the exact format produced by llama.cpp's convert_lora_to_gguf.py. |
| """ |
|
|
| import json |
| import struct |
| import sys |
| from pathlib import Path |
| import numpy as np |
|
|
| |
| from gguf import GGUFWriter, GGMLQuantizationType |
|
|
|
|
| |
| |
| |
| PROJ_MAP = { |
| "q_proj": "attn_q", |
| "k_proj": "attn_k", |
| "v_proj": "attn_v", |
| "o_proj": "attn_output", |
| } |
|
|
|
|
| def bf16_to_f16(data_bytes: bytes) -> np.ndarray: |
| """Convert bfloat16 raw bytes to float16 numpy array. |
| |
| bf16: sign(1) + exp(8) + mantissa(7) |
| f16: sign(1) + exp(5) + mantissa(10) |
| |
| We go bf16 -> f32 -> f16 to avoid precision edge cases. |
| """ |
| |
| bf16 = np.frombuffer(data_bytes, dtype=np.uint16) |
| |
| f32_bytes = np.zeros(len(bf16), dtype=np.uint32) |
| f32_bytes[:] = bf16.astype(np.uint32) << 16 |
| f32 = f32_bytes.view(np.float32) |
| |
| return f32.astype(np.float16) |
|
|
|
|
| def read_safetensors(path: Path) -> dict: |
| """Read safetensors file, handling bf16 manually.""" |
| with open(path, "rb") as f: |
| |
| header_size = struct.unpack("<Q", f.read(8))[0] |
| header_json = f.read(header_size) |
| header = json.loads(header_json) |
|
|
| data_start = 8 + header_size |
| tensors = {} |
|
|
| for name, info in header.items(): |
| if name == "__metadata__": |
| continue |
| dtype = info["dtype"] |
| shape = info["shape"] |
| offsets = info["data_offsets"] |
| start, end = offsets |
|
|
| f.seek(data_start + start) |
| raw = f.read(end - start) |
|
|
| if dtype == "BF16": |
| arr = bf16_to_f16(raw).reshape(shape) |
| elif dtype == "F16": |
| arr = np.frombuffer(raw, dtype=np.float16).reshape(shape) |
| elif dtype == "F32": |
| arr = np.frombuffer(raw, dtype=np.float32).reshape(shape) |
| arr = arr.astype(np.float16) |
| else: |
| raise ValueError(f"Unsupported dtype: {dtype}") |
|
|
| tensors[name] = arr |
|
|
| return tensors |
|
|
|
|
| def peft_name_to_gguf(peft_name: str) -> str | None: |
| """Map PEFT tensor name to GGUF tensor name. |
| |
| Input: base_model.model.model.layers.0.self_attn.q_proj.lora_A.weight |
| Output: blk.0.attn_q.weight.lora_a |
| """ |
| parts = peft_name.split(".") |
| |
| try: |
| layer_idx = parts[4] |
| proj = parts[6] |
| lora_part = parts[7] |
| except IndexError: |
| return None |
|
|
| gguf_proj = PROJ_MAP.get(proj) |
| if gguf_proj is None: |
| return None |
|
|
| ab = lora_part.lower() |
| return f"blk.{layer_idx}.{gguf_proj}.weight.{ab}" |
|
|
|
|
| def convert(adapter_dir: Path, output_path: Path, adapter_name: str): |
| """Convert a PEFT LoRA adapter to GGUF format.""" |
| config_path = adapter_dir / "adapter_config.json" |
| safetensors_path = adapter_dir / "adapter_model.safetensors" |
|
|
| if not config_path.exists(): |
| raise FileNotFoundError(f"No adapter_config.json in {adapter_dir}") |
| if not safetensors_path.exists(): |
| raise FileNotFoundError(f"No adapter_model.safetensors in {adapter_dir}") |
|
|
| |
| with open(config_path) as f: |
| config = json.load(f) |
|
|
| lora_alpha = config.get("lora_alpha", 32) |
| lora_rank = config.get("r", 16) |
| print(f" Config: rank={lora_rank}, alpha={lora_alpha}") |
|
|
| |
| print(f" Reading safetensors...") |
| tensors = read_safetensors(safetensors_path) |
| print(f" Loaded {len(tensors)} tensors") |
|
|
| |
| writer = GGUFWriter(str(output_path), arch="llama") |
|
|
| |
| writer.add_string("general.type", "adapter") |
| writer.add_string("adapter.type", "lora") |
| writer.add_string("general.name", adapter_name) |
| writer.add_uint32("general.base_model.count", 1) |
| writer.add_string("general.base_model.0.name", "Llama 3.1 8B Instruct") |
| writer.add_string("general.base_model.0.organization", "Meta Llama") |
| writer.add_string("general.base_model.0.repo_url", |
| "https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct") |
| writer.add_array("general.tags", [ |
| "base_model:adapter:meta-llama/Llama-3.1-8B-Instruct", |
| "lora", "sft", "transformers", "trl", "text-generation", |
| ]) |
| writer.add_float32("adapter.lora.alpha", float(lora_alpha)) |
| writer.add_uint32("general.quantization_version", 2) |
|
|
| |
| converted = 0 |
| for peft_name, data in sorted(tensors.items()): |
| gguf_name = peft_name_to_gguf(peft_name) |
| if gguf_name is None: |
| print(f" SKIP: {peft_name}") |
| continue |
|
|
| |
| writer.add_tensor(gguf_name, data, raw_dtype=GGMLQuantizationType.F16) |
| converted += 1 |
|
|
| print(f" Converted {converted} tensors") |
|
|
| |
| writer.write_header_to_file() |
| writer.write_kv_data_to_file() |
| writer.write_tensors_to_file() |
| writer.close() |
|
|
| size_mb = output_path.stat().st_size / 1024 / 1024 |
| print(f" Output: {output_path} ({size_mb:.1f} MB)") |
|
|
|
|
| def main(): |
| adapters_dir = Path("J:/codette-training-lab/adapters") |
| hf_dir = adapters_dir / "hf_download" |
|
|
| |
| to_convert = [] |
| for name in ["empathy", "philosophy", "quantum", |
| "consciousness", "multi_perspective", "systems_architecture"]: |
| src = hf_dir / name |
| dst = adapters_dir / f"{name}-lora-f16.gguf" |
| if src.exists() and (src / "adapter_model.safetensors").exists(): |
| if dst.exists(): |
| print(f"SKIP {name}: GGUF already exists") |
| else: |
| to_convert.append((name, src, dst)) |
| else: |
| print(f"SKIP {name}: no safetensors found") |
|
|
| if not to_convert: |
| print("Nothing to convert!") |
| return |
|
|
| for name, src, dst in to_convert: |
| print(f"\nConverting {name}...") |
| try: |
| convert(src, dst, name) |
| print(f"OK: {name}") |
| except Exception as e: |
| print(f"FAIL: {name}: {e}") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|