# OpenELM API Docker Configuration # Version 4: Background model loading to prevent timeout FROM python:3.10-slim # Install system dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ cmake \ pkg-config \ && rm -rf /var/lib/apt/lists/* # Install SentencePiece (required for tokenizer) RUN apt-get update && apt-get install -y --no-install-recommends \ libsentencepiece-dev \ && rm -rf /var/lib/apt/lists/* RUN useradd -m -u 1000 user USER user ENV PATH="/home/user/.local/bin:$PATH" WORKDIR /app # Set environment variables for memory optimization ENV PYTHONUNBUFFERED=1 ENV HF_HOME=/app/.cache/huggingface ENV TRANSFORMERS_CACHE=/app/.cache/transformers ENV HUGGINGFACE_HUB_CACHE=/app/.cache/huggingface # Copy requirements first for better caching COPY --chown=user ./requirements.txt requirements.txt # Install Python dependencies RUN pip install --no-cache-dir --upgrade pip wheel RUN pip install --no-cache-dir -r requirements.txt # Install sentencepiece Python package (required for tokenizer) RUN pip install --no-cache-dir sentencepiece # Copy application code COPY --chown=user . /app # Expose the API port EXPOSE 8000 7860 # Set default command - NOW USING app_v4 which has background loading! CMD ["uvicorn", "app_v4:app", "--host", "0.0.0.0", "--port", "8000", "--timeout-keep-alive", "120", "--log-level", "info"]