"""
GGUF conversion script for WorldDisasterLM.

Converts the merged Hugging Face model to GGUF format for CPU inference
and mobile deployment using llama.cpp.

Quantization sizes (approximate for 8B model)
-----------------------------------------------
  Q4_K_M  → ~4.8 GB  (recommended for most use cases)
  Q5_K_M  → ~5.6 GB  (better quality)
  Q8_0    → ~8.5 GB  (highest quality, slower)
  f16     → ~15 GB   (full precision)

Usage
-----
  # Full automated flow (requires llama.cpp cloned alongside this repo)
  python scripts/convert_gguf.py \\
      --model-path checkpoints/worlddisasterlm-merged \\
      --llama-cpp-path ../llama.cpp \\
      --quant Q4_K_M

  # Manual steps are printed if llama.cpp is not found
"""

from __future__ import annotations

import argparse
import logging
import shutil
import subprocess
import sys
from pathlib import Path

logger = logging.getLogger(__name__)


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description="Convert WorldDisasterLM to GGUF")
    parser.add_argument("--model-path", default="checkpoints/worlddisasterlm-merged", help="Path to merged HF model")
    parser.add_argument("--output-dir", default="artifacts", help="Output directory for GGUF files")
    parser.add_argument("--llama-cpp-path", default="../llama.cpp", help="Path to llama.cpp repo")
    parser.add_argument(
        "--quant",
        default="Q4_K_M",
        choices=["Q4_K_M", "Q5_K_M", "Q8_0", "f16"],
        help="Quantization type",
    )
    return parser.parse_args()


def print_manual_steps(model_path: str, output_dir: str, quant: str) -> None:
    print("\n" + "=" * 70)
    print("MANUAL GGUF CONVERSION STEPS")
    print("=" * 70)
    print("\nStep 1: Clone llama.cpp and build")
    print("  git clone https://github.com/ggerganov/llama.cpp")
    print("  cd llama.cpp")
    print("  cmake -B build -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS")
    print("  cmake --build build --config Release")
    print()
    print("Step 2: Install Python dependencies")
    print("  pip install -r llama.cpp/requirements.txt")
    print()
    print("Step 3: Convert HF model to GGUF (f16)")
    print(f"  python llama.cpp/convert_hf_to_gguf.py {model_path} \\")
    print(f"         --outtype f16 --outfile {output_dir}/worlddisasterlm_f16.gguf")
    print()
    print(f"Step 4: Quantize to {quant}")
    print(f"  ./llama.cpp/build/bin/llama-quantize \\")
    print(f"      {output_dir}/worlddisasterlm_f16.gguf \\")
    print(f"      {output_dir}/worlddisasterlm_{quant.lower()}.gguf \\")
    print(f"      {quant}")
    print()
    print("Step 5: Upload GGUF to Hugging Face")
    print("  huggingface-cli upload YourUsername/WorldDisasterLM-GGUF \\")
    print(f"      {output_dir}/worlddisasterlm_{quant.lower()}.gguf \\")
    print(f"      worlddisasterlm_{quant.lower()}.gguf")
    print("=" * 70 + "\n")


def run_conversion(model_path: str, llama_cpp_path: str, output_dir: str, quant: str) -> None:
    llama_dir = Path(llama_cpp_path).resolve()
    model_dir = Path(model_path).resolve()
    out_dir = Path(output_dir)
    out_dir.mkdir(parents=True, exist_ok=True)

    convert_script = llama_dir / "convert_hf_to_gguf.py"
    quantize_bin = llama_dir / "build" / "bin" / "llama-quantize"
    quantize_bin_win = llama_dir / "build" / "bin" / "Release" / "llama-quantize.exe"

    if not convert_script.exists():
        logger.error("convert_hf_to_gguf.py not found in %s", llama_dir)
        print_manual_steps(model_path, output_dir, quant)
        sys.exit(1)

    f16_gguf = out_dir / "worlddisasterlm_f16.gguf"
    quant_gguf = out_dir / f"worlddisasterlm_{quant.lower()}.gguf"

    # Convert to f16 GGUF
    logger.info("Converting HF model to f16 GGUF …")
    subprocess.run(
        [sys.executable, str(convert_script), str(model_dir), "--outtype", "f16", "--outfile", str(f16_gguf)],
        check=True,
    )

    # Find quantize binary
    q_bin = quantize_bin if quantize_bin.exists() else (quantize_bin_win if quantize_bin_win.exists() else None)
    if q_bin is None:
        logger.warning("llama-quantize binary not found. f16 GGUF saved at %s", f16_gguf)
        print_manual_steps(model_path, output_dir, quant)
        return

    # Quantize
    logger.info("Quantizing to %s …", quant)
    subprocess.run([str(q_bin), str(f16_gguf), str(quant_gguf), quant], check=True)
    logger.info("GGUF model saved to %s", quant_gguf)
    logger.info("Upload with: huggingface-cli upload <repo_id> %s", quant_gguf)


def main() -> None:
    logging.basicConfig(level=logging.INFO, format="%(asctime)s | %(levelname)s | %(message)s")
    args = parse_args()

    llama_dir = Path(args.llama_cpp_path)
    if not llama_dir.exists():
        logger.warning("llama.cpp directory not found at %s — printing manual steps.", args.llama_cpp_path)
        print_manual_steps(args.model_path, args.output_dir, args.quant)
        return

    run_conversion(
        model_path=args.model_path,
        llama_cpp_path=args.llama_cpp_path,
        output_dir=args.output_dir,
        quant=args.quant,
    )


if __name__ == "__main__":
    main()