File size: 1,533 Bytes
bf46e5d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 | """Pre-pull Qwen2.5-VL weights into the local HF cache.
Resumable: HF snapshot_download dedupes already-downloaded files.
Both models go into $HF_HOME (default: /mnt/local-fast/opd_zt/hf_cache).
"""
from __future__ import annotations
import os
import sys
import time
from huggingface_hub import snapshot_download
MODELS = [
# Student first — smaller, gives us a quick win and confirms cache works.
("Qwen/Qwen2.5-VL-7B-Instruct", 8),
# Teacher — large, do last so it doesn't block the student.
("Qwen/Qwen2.5-VL-72B-Instruct", 16),
]
# Skip pytorch_model.bin files since safetensors are present and we only need one format.
IGNORE = ["*.bin", "*.msgpack", "*.h5", "*.onnx", "*.gguf", "original/*"]
def log(msg: str) -> None:
print(f"[{time.strftime('%Y-%m-%d %H:%M:%S')}] {msg}", flush=True)
def pull(repo_id: str, workers: int) -> None:
log(f"START {repo_id} (workers={workers}, ignore={IGNORE})")
t0 = time.time()
path = snapshot_download(
repo_id=repo_id,
repo_type="model",
max_workers=workers,
ignore_patterns=IGNORE,
)
dt = time.time() - t0
log(f"DONE {repo_id} in {dt/60:.1f} min -> {path}")
def main() -> None:
log(f"start. HF_HOME={os.environ.get('HF_HOME', '~/.cache/huggingface')}")
for repo_id, workers in MODELS:
pull(repo_id, workers)
log("ALL DONE")
if __name__ == "__main__":
try:
main()
except Exception:
import traceback
traceback.print_exc()
sys.exit(1)
|