# OpenELM API Docker Configuration
# Version 4: Background model loading to prevent timeout

FROM python:3.10-slim

# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
    build-essential \
    cmake \
    pkg-config \
    && rm -rf /var/lib/apt/lists/*

# Install SentencePiece (required for tokenizer)
RUN apt-get update && apt-get install -y --no-install-recommends \
    libsentencepiece-dev \
    && rm -rf /var/lib/apt/lists/*

RUN useradd -m -u 1000 user
USER user
ENV PATH="/home/user/.local/bin:$PATH"

WORKDIR /app

# Set environment variables for memory optimization
ENV PYTHONUNBUFFERED=1
ENV HF_HOME=/app/.cache/huggingface
ENV TRANSFORMERS_CACHE=/app/.cache/transformers
ENV HUGGINGFACE_HUB_CACHE=/app/.cache/huggingface

# Copy requirements first for better caching
COPY --chown=user ./requirements.txt requirements.txt

# Install Python dependencies
RUN pip install --no-cache-dir --upgrade pip wheel
RUN pip install --no-cache-dir -r requirements.txt

# Install sentencepiece Python package (required for tokenizer)
RUN pip install --no-cache-dir sentencepiece

# Copy application code
COPY --chown=user . /app

# Expose the API port
EXPOSE 8000 7860

# Set default command - NOW USING app_v4 which has background loading!
CMD ["uvicorn", "app_v4:app", "--host", "0.0.0.0", "--port", "8000", "--timeout-keep-alive", "120", "--log-level", "info"]