lamhieu's picture
refactor(core): overhaul architecture for better performance, efficiency, and maintainability
c4742ee
# syntax=docker/dockerfile:1.7
# -----------------------------------------------------------------------------
# Lightweight Embeddings — multi-stage image (CPU-only by default).
#
# Stage 1 builder compiles wheels from requirements.txt into /install
# Stage 2 runtime slim image with jemalloc + healthcheck + non-root user
#
# Build:
# docker build -t lightweight-embeddings .
# Run:
# docker run --rm -p 7860:7860 lightweight-embeddings
# -----------------------------------------------------------------------------
ARG PYTHON_VERSION=3.10
# ============================================================================
# Stage 1: builder
# ============================================================================
FROM python:${PYTHON_VERSION}-slim AS builder
ENV PIP_DISABLE_PIP_VERSION_CHECK=1 \
PIP_NO_CACHE_DIR=0 \
PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1
# Build tools required by torch/transformers wheels with no prebuilt arch.
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
apt-get update && apt-get install -y --no-install-recommends \
build-essential git ca-certificates
WORKDIR /build
COPY requirements.txt ./
# CPU torch wheels are an order of magnitude smaller than the CUDA ones.
# Override at build time with: --build-arg TORCH_INDEX_URL=...
ARG TORCH_INDEX_URL=https://download.pytorch.org/whl/cpu
RUN --mount=type=cache,target=/root/.cache/pip,sharing=locked \
pip install --prefix=/install \
--extra-index-url "${TORCH_INDEX_URL}" \
-r requirements.txt
# ============================================================================
# Stage 2: runtime
# ============================================================================
FROM python:${PYTHON_VERSION}-slim AS runtime
LABEL org.opencontainers.image.title="lightweight-embeddings" \
org.opencontainers.image.description="Multilingual text+image embeddings & reranking API" \
org.opencontainers.image.licenses="MIT" \
org.opencontainers.image.source="https://github.com/lh0x00/lightweight-embeddings"
ENV PYTHONUNBUFFERED=1 \
PYTHONDONTWRITEBYTECODE=1 \
PIP_DISABLE_PIP_VERSION_CHECK=1 \
OMP_NUM_THREADS=2 \
MKL_NUM_THREADS=2 \
TOKENIZERS_PARALLELISM=false \
HF_HOME=/home/user/.cache/huggingface \
PORT=7860
# jemalloc keeps RSS predictable for workloads with frequent (de)allocations.
RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \
--mount=type=cache,target=/var/lib/apt,sharing=locked \
apt-get update && apt-get install -y --no-install-recommends \
libjemalloc2 ca-certificates curl
ENV LD_PRELOAD=/usr/lib/x86_64-linux-gnu/libjemalloc.so.2
# Non-root.
RUN useradd -m -u 1000 user
USER user
WORKDIR /home/user/app
# Pull the prebuilt site-packages from stage 1.
COPY --from=builder /install /usr/local
# Application source.
COPY --chown=user . .
EXPOSE 7860
HEALTHCHECK --interval=30s --timeout=5s --start-period=60s --retries=3 \
CMD curl -fsS http://127.0.0.1:7860/healthz >/dev/null || exit 1
CMD ["uvicorn", "app:app", \
"--host", "0.0.0.0", \
"--port", "7860", \
"--proxy-headers", \
"--forwarded-allow-ips", "*"]