| |
| FROM python:3.10-slim |
|
|
| ENV DEBIAN_FRONTEND=noninteractive \ |
| PYTHONUNBUFFERED=1 \ |
| PYTHONDONTWRITEBYTECODE=1 \ |
| PORT=7860 |
|
|
| WORKDIR /code |
|
|
| RUN apt-get update && apt-get install -y --no-install-recommends \ |
| build-essential \ |
| curl \ |
| libopenblas-dev \ |
| libomp-dev \ |
| && rm -rf /var/lib/apt/lists/* |
|
|
| COPY requirements.txt . |
| RUN pip install --no-cache-dir --upgrade pip \ |
| && pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu \ |
| && pip install --no-cache-dir -r requirements.txt \ |
| && pip install --no-cache-dir torch --index-url https://download.pytorch.org/whl/cpu --force-reinstall --no-deps \ |
| && pip install --no-cache-dir huggingface-hub sentencepiece |
|
|
| ENV HF_HOME=/models/huggingface \ |
| TRANSFORMERS_CACHE=/models/huggingface \ |
| HUGGINGFACE_HUB_CACHE=/models/huggingface \ |
| HF_HUB_CACHE=/models/huggingface |
|
|
| RUN mkdir -p /models/huggingface && chmod -R 777 /models/huggingface |
|
|
| ENV OMP_NUM_THREADS=2 \ |
| MKL_NUM_THREADS=2 \ |
| NUMEXPR_NUM_THREADS=2 |
|
|
| ARG HF_TOKEN= |
| ARG HUGGING_FACE_HUB_TOKEN= |
| ARG DOCKER_BUILD_SKIP_LLM_WARM=1 |
| ARG GENERATION_BACKEND=gemini |
| ARG SKIP_LOCAL_LLM_HUB_DOWNLOAD=1 |
| ENV HF_TOKEN=${HF_TOKEN} |
| ENV HUGGING_FACE_HUB_TOKEN=${HUGGING_FACE_HUB_TOKEN} |
| ENV DOCKER_BUILD_SKIP_LLM_WARM=${DOCKER_BUILD_SKIP_LLM_WARM} |
| ENV GENERATION_BACKEND=${GENERATION_BACKEND} |
| ENV SKIP_LOCAL_LLM_HUB_DOWNLOAD=${SKIP_LOCAL_LLM_HUB_DOWNLOAD} |
|
|
| COPY . . |
|
|
| RUN python scripts/docker_build_assets.py |
|
|
| ENV HF_HUB_OFFLINE=1 \ |
| TRANSFORMERS_OFFLINE=1 |
|
|
| EXPOSE 7860 |
|
|
| CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1", "--timeout-keep-alive", "120"] |
|
|