# ─── Stage: Base ────────────────────────────────────────────────────────────── # Hugging Face Spaces uses port 7860 by default. # We install Ollama (llama.cpp under the hood) for fast CPU inference. FROM python:3.11-slim # System dependencies for Ollama install script + curl RUN apt-get update && apt-get install -y \ curl \ ca-certificates \ bash \ zstd \ && rm -rf /var/lib/apt/lists/* # ─── Install Ollama ─────────────────────────────────────────────────────────── RUN curl -fsSL https://ollama.com/install.sh | bash WORKDIR /app # ─── Python dependencies ────────────────────────────────────────────────────── COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt # ─── Copy source code ───────────────────────────────────────────────────────── COPY app/ ./app/ COPY tests/ ./tests/ COPY startup.sh . RUN chmod +x startup.sh # ─── Environment ────────────────────────────────────────────────────────────── # Set MOCK_LLM=false to use Ollama. Override at runtime if needed for testing. ENV MOCK_LLM=false ENV MODEL_NAME=qwen2.5:0.5b ENV OLLAMA_HOST=http://localhost:11434 EXPOSE 7860 # startup.sh: boots Ollama, pulls model, starts FastAPI CMD ["./startup.sh"]