# Stage 1: Compile llama-cpp-python to wheel (one-time build) FROM python:3.11-slim AS builder WORKDIR /tmp/build # Install build tools only in Stage 1 RUN apt-get update && apt-get install -y \ build-essential \ cmake \ && rm -rf /var/lib/apt/lists/* # Copy requirements COPY requirements.txt . # Build ALL wheels (llama-cpp-python gets compiled here) RUN pip wheel --no-cache-dir -r requirements.txt -w /tmp/wheels # Stage 2: Production (just installs pre-built wheels from Stage 1) FROM python:3.11-slim WORKDIR /app # Install only runtime dependencies (NO build tools!) RUN apt-get update && apt-get install -y \ tesseract-ocr \ libtesseract-dev \ && rm -rf /var/lib/apt/lists/* # Copy pre-built wheels from Stage 1 (compilation already done!) COPY --from=builder /tmp/wheels /tmp/wheels # Install from pre-built wheels (INSTANT - no compilation!) RUN pip install --no-cache-dir --no-index --find-links /tmp/wheels -r requirements.txt # Copy application code COPY . . # Create models directory RUN mkdir -p models # Download models at build time COPY download_models.py . RUN python download_models.py || echo "Model download attempted" EXPOSE 7860 CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860", "--timeout-keep-alive", "75"]