File size: 1,408 Bytes
d36a46f
7ae4b71
91bc5ae
9604400
 
 
 
 
d36a46f
 
 
 
7ae4b71
d36a46f
 
9604400
91bc5ae
 
 
 
 
 
 
9604400
 
 
d36a46f
9604400
 
 
91bc5ae
 
9604400
 
 
 
d36a46f
 
 
9604400
91bc5ae
9604400
 
 
 
7ae4b71
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# OpenELM API Docker Configuration
# Version 4: Background model loading to prevent timeout

FROM python:3.10-slim

# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
    build-essential \
    cmake \
    pkg-config \
    && rm -rf /var/lib/apt/lists/*

# Install SentencePiece (required for tokenizer)
RUN apt-get update && apt-get install -y --no-install-recommends \
    libsentencepiece-dev \
    && rm -rf /var/lib/apt/lists/*

RUN useradd -m -u 1000 user
USER user
ENV PATH="/home/user/.local/bin:$PATH"

WORKDIR /app

# Set environment variables for memory optimization
ENV PYTHONUNBUFFERED=1
ENV HF_HOME=/app/.cache/huggingface
ENV TRANSFORMERS_CACHE=/app/.cache/transformers
ENV HUGGINGFACE_HUB_CACHE=/app/.cache/huggingface

# Copy requirements first for better caching
COPY --chown=user ./requirements.txt requirements.txt

# Install Python dependencies
RUN pip install --no-cache-dir --upgrade pip wheel
RUN pip install --no-cache-dir -r requirements.txt

# Install sentencepiece Python package (required for tokenizer)
RUN pip install --no-cache-dir sentencepiece

# Copy application code
COPY --chown=user . /app

# Expose the API port
EXPOSE 8000 7860

# Set default command - NOW USING app_v4 which has background loading!
CMD ["uvicorn", "app_v4:app", "--host", "0.0.0.0", "--port", "8000", "--timeout-keep-alive", "120", "--log-level", "info"]