""" GGUF conversion script for WorldDisasterLM. Converts the merged Hugging Face model to GGUF format for CPU inference and mobile deployment using llama.cpp. Quantization sizes (approximate for 8B model) ----------------------------------------------- Q4_K_M → ~4.8 GB (recommended for most use cases) Q5_K_M → ~5.6 GB (better quality) Q8_0 → ~8.5 GB (highest quality, slower) f16 → ~15 GB (full precision) Usage ----- # Full automated flow (requires llama.cpp cloned alongside this repo) python scripts/convert_gguf.py \\ --model-path checkpoints/worlddisasterlm-merged \\ --llama-cpp-path ../llama.cpp \\ --quant Q4_K_M # Manual steps are printed if llama.cpp is not found """ from __future__ import annotations import argparse import logging import shutil import subprocess import sys from pathlib import Path logger = logging.getLogger(__name__) def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser(description="Convert WorldDisasterLM to GGUF") parser.add_argument("--model-path", default="checkpoints/worlddisasterlm-merged", help="Path to merged HF model") parser.add_argument("--output-dir", default="artifacts", help="Output directory for GGUF files") parser.add_argument("--llama-cpp-path", default="../llama.cpp", help="Path to llama.cpp repo") parser.add_argument( "--quant", default="Q4_K_M", choices=["Q4_K_M", "Q5_K_M", "Q8_0", "f16"], help="Quantization type", ) return parser.parse_args() def print_manual_steps(model_path: str, output_dir: str, quant: str) -> None: print("\n" + "=" * 70) print("MANUAL GGUF CONVERSION STEPS") print("=" * 70) print("\nStep 1: Clone llama.cpp and build") print(" git clone https://github.com/ggerganov/llama.cpp") print(" cd llama.cpp") print(" cmake -B build -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS") print(" cmake --build build --config Release") print() print("Step 2: Install Python dependencies") print(" pip install -r llama.cpp/requirements.txt") print() print("Step 3: Convert HF model to GGUF (f16)") print(f" python llama.cpp/convert_hf_to_gguf.py {model_path} \\") print(f" --outtype f16 --outfile {output_dir}/worlddisasterlm_f16.gguf") print() print(f"Step 4: Quantize to {quant}") print(f" ./llama.cpp/build/bin/llama-quantize \\") print(f" {output_dir}/worlddisasterlm_f16.gguf \\") print(f" {output_dir}/worlddisasterlm_{quant.lower()}.gguf \\") print(f" {quant}") print() print("Step 5: Upload GGUF to Hugging Face") print(" huggingface-cli upload YourUsername/WorldDisasterLM-GGUF \\") print(f" {output_dir}/worlddisasterlm_{quant.lower()}.gguf \\") print(f" worlddisasterlm_{quant.lower()}.gguf") print("=" * 70 + "\n") def run_conversion(model_path: str, llama_cpp_path: str, output_dir: str, quant: str) -> None: llama_dir = Path(llama_cpp_path).resolve() model_dir = Path(model_path).resolve() out_dir = Path(output_dir) out_dir.mkdir(parents=True, exist_ok=True) convert_script = llama_dir / "convert_hf_to_gguf.py" quantize_bin = llama_dir / "build" / "bin" / "llama-quantize" quantize_bin_win = llama_dir / "build" / "bin" / "Release" / "llama-quantize.exe" if not convert_script.exists(): logger.error("convert_hf_to_gguf.py not found in %s", llama_dir) print_manual_steps(model_path, output_dir, quant) sys.exit(1) f16_gguf = out_dir / "worlddisasterlm_f16.gguf" quant_gguf = out_dir / f"worlddisasterlm_{quant.lower()}.gguf" # Convert to f16 GGUF logger.info("Converting HF model to f16 GGUF …") subprocess.run( [sys.executable, str(convert_script), str(model_dir), "--outtype", "f16", "--outfile", str(f16_gguf)], check=True, ) # Find quantize binary q_bin = quantize_bin if quantize_bin.exists() else (quantize_bin_win if quantize_bin_win.exists() else None) if q_bin is None: logger.warning("llama-quantize binary not found. f16 GGUF saved at %s", f16_gguf) print_manual_steps(model_path, output_dir, quant) return # Quantize logger.info("Quantizing to %s …", quant) subprocess.run([str(q_bin), str(f16_gguf), str(quant_gguf), quant], check=True) logger.info("GGUF model saved to %s", quant_gguf) logger.info("Upload with: huggingface-cli upload %s", quant_gguf) def main() -> None: logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(levelname)s | %(message)s") args = parse_args() llama_dir = Path(args.llama_cpp_path) if not llama_dir.exists(): logger.warning("llama.cpp directory not found at %s — printing manual steps.", args.llama_cpp_path) print_manual_steps(args.model_path, args.output_dir, args.quant) return run_conversion( model_path=args.model_path, llama_cpp_path=args.llama_cpp_path, output_dir=args.output_dir, quant=args.quant, ) if __name__ == "__main__": main()