# cache_download.py - Run ONCE from transformers import AutoModelForCausalLM, AutoTokenizer import os MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct" MODEL_NAME = "Qwen/Qwen2-1.5B-Instruct" # ✅ No tokenizer bugs print("Caching Phi-3...") tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir="./hf_cache") model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, cache_dir="./hf_cache", torch_dtype="auto", device_map="cpu" ) print("✅ Cached to ./hf_cache/")