"""Pre-pull Qwen2.5-VL weights into the local HF cache. Resumable: HF snapshot_download dedupes already-downloaded files. Both models go into $HF_HOME (default: /mnt/local-fast/opd_zt/hf_cache). """ from __future__ import annotations import os import sys import time from huggingface_hub import snapshot_download MODELS = [ # Student first — smaller, gives us a quick win and confirms cache works. ("Qwen/Qwen2.5-VL-7B-Instruct", 8), # Teacher — large, do last so it doesn't block the student. ("Qwen/Qwen2.5-VL-72B-Instruct", 16), ] # Skip pytorch_model.bin files since safetensors are present and we only need one format. IGNORE = ["*.bin", "*.msgpack", "*.h5", "*.onnx", "*.gguf", "original/*"] def log(msg: str) -> None: print(f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] {msg}", flush=True) def pull(repo_id: str, workers: int) -> None: log(f"START {repo_id} (workers={workers}, ignore={IGNORE})") t0 = time.time() path = snapshot_download( repo_id=repo_id, repo_type="model", max_workers=workers, ignore_patterns=IGNORE, ) dt = time.time() - t0 log(f"DONE {repo_id} in {dt/60:.1f} min -> {path}") def main() -> None: log(f"start. HF_HOME={os.environ.get('HF_HOME', '~/.cache/huggingface')}") for repo_id, workers in MODELS: pull(repo_id, workers) log("ALL DONE") if __name__ == "__main__": try: main() except Exception: import traceback traceback.print_exc() sys.exit(1)