# Base image with Python and llama-cpp dependencies FROM python:3.11-slim # System dependencies for llama-cpp RUN apt-get update && apt-get install -y \ build-essential \ cmake \ wget \ git \ && rm -rf /var/lib/apt/lists/* # Install Python packages RUN pip install --no-cache-dir \ llama-cpp-python==0.2.66 \ fastapi \ uvicorn \ huggingface-hub # Create app directory WORKDIR /app COPY . /app # Download model from Hugging Face Hub (on container startup) ENV MODEL_REPO=TheBloke/phi-2-GGUF ENV MODEL_FILE=phi-2.Q4_K_M.gguf # Create model loader script RUN echo '#!/bin/bash\n'\ 'python download_model.py\n'\ 'uvicorn main:app --host 0.0.0.0 --port 7860' > entrypoint.sh && \ chmod +x entrypoint.sh CMD ["./entrypoint.sh"]