# syntax=docker/dockerfile:1 # Hugging Face Spaces — Docker deployment # Port 7860 is required by HF Spaces. # # Space Secrets to set (Settings → Variables and secrets): # HUGGINGFACE_TOKEN HF token with Llama-3.2-1B licence accepted # OPENAI_API_KEY GPT-4.1 primary (or leave blank, use GROQ_API_KEY only) # GROQ_API_KEY Groq fallback — free tier at console.groq.com # # The entrypoint pre-downloads Llama weights before uvicorn starts so the # first user request is not blocked by a 2.5 GB download. FROM python:3.11-slim-bookworm ENV HF_HOME=/app/.cache/huggingface \ PYTHONDONTWRITEBYTECODE=1 \ PYTHONUNBUFFERED=1 WORKDIR /app COPY requirements.txt . # Install CPU-only torch first to avoid pulling the 2 GB CUDA wheel. # HF Spaces CPU Basic has no GPU; the Llama assistant auto-detects this. RUN pip install --no-cache-dir \ torch>=2.2.0 \ --index-url https://download.pytorch.org/whl/cpu \ && pip install --no-cache-dir -r requirements.txt COPY app/ app/ COPY scripts/entrypoint.sh scripts/entrypoint.sh RUN chmod +x scripts/entrypoint.sh EXPOSE 7860 ENTRYPOINT ["scripts/entrypoint.sh"]