# Stage 1: Compile llama-cpp-python to wheel (one-time build)
FROM python:3.11-slim AS builder

WORKDIR /tmp/build

# Install build tools only in Stage 1
RUN apt-get update && apt-get install -y \
    build-essential \
    cmake \
    && rm -rf /var/lib/apt/lists/*

# Copy requirements
COPY requirements.txt .

# Build ALL wheels (llama-cpp-python gets compiled here)
RUN pip wheel --no-cache-dir -r requirements.txt -w /tmp/wheels

# Stage 2: Production (just installs pre-built wheels from Stage 1)
FROM python:3.11-slim

WORKDIR /app

# Install only runtime dependencies (NO build tools!)
RUN apt-get update && apt-get install -y \
    tesseract-ocr \
    libtesseract-dev \
    && rm -rf /var/lib/apt/lists/*

# Copy pre-built wheels from Stage 1 (compilation already done!)
COPY --from=builder /tmp/wheels /tmp/wheels

# Install from pre-built wheels (INSTANT - no compilation!)
RUN pip install --no-cache-dir --no-index --find-links /tmp/wheels -r requirements.txt

# Copy application code
COPY . .

# Create models directory
RUN mkdir -p models

# Download models at build time
COPY download_models.py .
RUN python download_models.py || echo "Model download attempted"

EXPOSE 7860

CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860", "--timeout-keep-alive", "75"]