Spaces:
Running
Running
| # Pre-download Llama weights into the HF cache before starting the server. | |
| # Runs once on cold boot; subsequent restarts use the cached weights. | |
| set -e | |
| MODEL_ID="${LLAMA_MODEL_ID:-meta-llama/Llama-3.2-1B-Instruct}" | |
| if [ -n "$HUGGINGFACE_TOKEN" ]; then | |
| echo "[startup] Pre-downloading ${MODEL_ID} …" | |
| python - <<EOF | |
| import os | |
| from huggingface_hub import snapshot_download | |
| snapshot_download( | |
| repo_id="${MODEL_ID}", | |
| token=os.environ["HUGGINGFACE_TOKEN"], | |
| ignore_patterns=["*.pt", "original/*"], | |
| ) | |
| print("[startup] Model weights ready.") | |
| EOF | |
| else | |
| echo "[startup] HUGGINGFACE_TOKEN not set — weights download on first request." | |
| fi | |
| exec uvicorn app.main:app --host 0.0.0.0 --port 7860 | |