# ============================================================================
# Stage 1: Build llama-cpp-python with CPU optimizations
# ============================================================================
FROM python:3.11-slim AS builder

# Install build dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
    build-essential \
    cmake \
    git \
    libopenblas-dev \
    pkg-config \
    && rm -rf /var/lib/apt/lists/*

# Build llama-cpp-python with OpenBLAS + AVX2
# This is the key advantage over the Gradio wheel — compiled for this CPU
ENV CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DGGML_AVX2=ON -DGGML_FMA=ON"
ENV FORCE_CMAKE=1

RUN pip install --no-cache-dir --upgrade pip && \
    pip install --no-cache-dir llama-cpp-python==0.3.4

# ============================================================================
# Stage 2: Runtime image (lean)
# ============================================================================
FROM python:3.11-slim AS runtime

# Only the runtime lib needed for OpenBLAS
RUN apt-get update && apt-get install -y --no-install-recommends \
    libopenblas0 \
    && rm -rf /var/lib/apt/lists/*

# Copy the compiled llama-cpp-python from builder
COPY --from=builder /usr/local/lib/python3.11/site-packages /usr/local/lib/python3.11/site-packages
COPY --from=builder /usr/local/bin /usr/local/bin

# Install remaining Python deps (no llama-cpp here — already copied)
RUN pip install --no-cache-dir \
    fastapi==0.115.0 \
    uvicorn[standard]==0.30.0 \
    huggingface_hub==0.24.0 \
    pydantic==2.8.0

# Create non-root user (HF Spaces requirement)
RUN useradd -m -u 1000 user
USER user

WORKDIR /app

# Copy app code
COPY --chown=user:user app.py .

# HF Spaces persistent storage mounts at /data
# We just make sure the path exists as a fallback if not mounted
RUN mkdir -p /home/user/data/models

EXPOSE 7860

CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--log-level", "info"]