alpha-core-ai / Dockerfile
Sabithulla's picture
Multi-stage Docker build: Stage 1 compiles llama-cpp-python once, Stage 2 reuses compiled wheels - NO TIMEOUT! Build time 8-12 minutes first time, then cached.
9d2777a
# Stage 1: Compile llama-cpp-python to wheel (one-time build)
FROM python:3.11-slim AS builder
WORKDIR /tmp/build
# Install build tools only in Stage 1
RUN apt-get update && apt-get install -y \
build-essential \
cmake \
&& rm -rf /var/lib/apt/lists/*
# Copy requirements
COPY requirements.txt .
# Build ALL wheels (llama-cpp-python gets compiled here)
RUN pip wheel --no-cache-dir -r requirements.txt -w /tmp/wheels
# Stage 2: Production (just installs pre-built wheels from Stage 1)
FROM python:3.11-slim
WORKDIR /app
# Install only runtime dependencies (NO build tools!)
RUN apt-get update && apt-get install -y \
tesseract-ocr \
libtesseract-dev \
&& rm -rf /var/lib/apt/lists/*
# Copy pre-built wheels from Stage 1 (compilation already done!)
COPY --from=builder /tmp/wheels /tmp/wheels
# Install from pre-built wheels (INSTANT - no compilation!)
RUN pip install --no-cache-dir --no-index --find-links /tmp/wheels -r requirements.txt
# Copy application code
COPY . .
# Create models directory
RUN mkdir -p models
# Download models at build time
COPY download_models.py .
RUN python download_models.py || echo "Model download attempted"
EXPOSE 7860
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860", "--timeout-keep-alive", "75"]