#!/usr/bin/env python3
"""
Fetch Pre-exported mT5 ONNX Model from Hugging Face Hub
=========================================================
Downloads the pre-exported ONNX model instead of converting from PyTorch,
saving ~5 minutes per CI run.

The model is hosted at: https://huggingface.co/dev11-13/mT5-XLSum-ONNX
Downloaded to: models/mt5_onnx/

This script is used by:
- GitHub Actions CI (Hindi pipeline) to avoid expensive PyTorch→ONNX export
- Local development when you don't want to install full export dependencies

Usage:
    python backend/models/fetch_mt5.py
"""

from pathlib import Path
from huggingface_hub import snapshot_download

HF_REPO_ID = "dev11-13/mT5-XLSum-ONNX"
OUTPUT_DIR = Path(__file__).parent.parent.parent / "models" / "mt5_onnx"


def main():
    onnx_sentinel = OUTPUT_DIR / "encoder_model.onnx"

    if onnx_sentinel.exists():
        print(f"✅ ONNX model already present ({onnx_sentinel}). Skipping download.")
        return

    print(f"⬇️  Downloading pre-exported ONNX model from {HF_REPO_ID} ...")
    snapshot_download(
        repo_id=HF_REPO_ID,
        local_dir=OUTPUT_DIR,
        local_dir_use_symlinks=False,
    )

    # Verify download
    if not onnx_sentinel.exists():
        raise RuntimeError(
            f"Download completed but {onnx_sentinel} not found. "
            f"Check the HF repository: https://huggingface.co/{HF_REPO_ID}"
        )

    files = sorted(OUTPUT_DIR.iterdir())
    print(f"\n✅ Downloaded {len(files)} files to {OUTPUT_DIR}:")
    for f in files:
        size_mb = f.stat().st_size / (1024 * 1024)
        print(f"  {f.name:40s} {size_mb:>8.2f} MB")

    print(f"\nModel ready for inference.")


if __name__ == "__main__":
    main()