#!/bin/bash # Pre-download Llama weights into the HF cache before starting the server. # Runs once on cold boot; subsequent restarts use the cached weights. set -e MODEL_ID="${LLAMA_MODEL_ID:-meta-llama/Llama-3.2-1B-Instruct}" if [ -n "$HUGGINGFACE_TOKEN" ]; then echo "[startup] Pre-downloading ${MODEL_ID} …" python - <