"""Pre-pull Qwen2.5-VL weights into the local HF cache.

Resumable: HF snapshot_download dedupes already-downloaded files.
Both models go into $HF_HOME (default: /mnt/local-fast/opd_zt/hf_cache).
"""
from __future__ import annotations

import os
import sys
import time

from huggingface_hub import snapshot_download

MODELS = [
    # Student first — smaller, gives us a quick win and confirms cache works.
    ("Qwen/Qwen2.5-VL-7B-Instruct", 8),
    # Teacher — large, do last so it doesn't block the student.
    ("Qwen/Qwen2.5-VL-72B-Instruct", 16),
]

# Skip pytorch_model.bin files since safetensors are present and we only need one format.
IGNORE = ["*.bin", "*.msgpack", "*.h5", "*.onnx", "*.gguf", "original/*"]


def log(msg: str) -> None:
    print(f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] {msg}", flush=True)


def pull(repo_id: str, workers: int) -> None:
    log(f"START  {repo_id}  (workers={workers}, ignore={IGNORE})")
    t0 = time.time()
    path = snapshot_download(
        repo_id=repo_id,
        repo_type="model",
        max_workers=workers,
        ignore_patterns=IGNORE,
    )
    dt = time.time() - t0
    log(f"DONE   {repo_id}  in {dt/60:.1f} min  -> {path}")


def main() -> None:
    log(f"start.  HF_HOME={os.environ.get('HF_HOME', '~/.cache/huggingface')}")
    for repo_id, workers in MODELS:
        pull(repo_id, workers)
    log("ALL DONE")


if __name__ == "__main__":
    try:
        main()
    except Exception:
        import traceback
        traceback.print_exc()
        sys.exit(1)