#!/bin/bash
# Pre-download Llama weights into the HF cache before starting the server.
# Runs once on cold boot; subsequent restarts use the cached weights.
set -e

MODEL_ID="${LLAMA_MODEL_ID:-meta-llama/Llama-3.2-1B-Instruct}"

if [ -n "$HUGGINGFACE_TOKEN" ]; then
    echo "[startup] Pre-downloading ${MODEL_ID} …"
    python - <<EOF
import os
from huggingface_hub import snapshot_download
snapshot_download(
    repo_id="${MODEL_ID}",
    token=os.environ["HUGGINGFACE_TOKEN"],
    ignore_patterns=["*.pt", "original/*"],
)
print("[startup] Model weights ready.")
EOF
else
    echo "[startup] HUGGINGFACE_TOKEN not set — weights download on first request."
fi

exec uvicorn app.main:app --host 0.0.0.0 --port 7860