Spaces:
Running
Running
File size: 3,445 Bytes
b2f9b47 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 | # services/encoder/Dockerfile
#
# WHY python:3.11-slim AND NOT python:3.11:
# The full python:3.11 image is ~900MB β it includes compilers, dev headers,
# documentation, and dozens of tools you never need at runtime.
# python:3.11-slim is ~130MB β just the runtime.
#
# We do need build tools temporarily to compile some Python packages
# (onnxruntime has C extensions). We install them, build, then remove them.
# This is called a multi-stage awareness pattern β using build deps only
# when needed.
#
# FINAL IMAGE SIZE TARGET: ~800MB
# - python:3.11-slim base: 130MB
# - onnxruntime: ~400MB (it's big β ONNX runtime is a full inference engine)
# - clip + torchvision: ~150MB (we need torchvision for preprocessing)
# - Our code: <1MB
# - ONNX model files: ~90MB (mounted as volume, not baked in)
#
# WHY NOT ALPINE (even smaller base):
# Alpine uses musl libc instead of glibc.
# onnxruntime ships pre-compiled wheels built against glibc.
# Using Alpine would require compiling onnxruntime from source: 2+ hours.
# Not worth it for a <100MB size difference.
FROM python:3.11-slim
WORKDIR /app
# Install system deps needed to build Python packages with C extensions
# --no-install-recommends: only install exactly what's listed, not suggested packages
RUN apt-get update && apt-get install -y --no-install-recommends \
git \
gcc \
g++ \
libgomp1 \
&& rm -rf /var/lib/apt/lists/*
# rm -rf /var/lib/apt/lists/ removes the package cache after install
# Every RUN command creates a Docker layer. The cleanup MUST be in the same
# RUN to actually reduce layer size. If you put it in a separate RUN,
# the cache files are already baked into the previous layer.
COPY requirements.txt .
RUN pip install --default-timeout=1200 --no-cache-dir -r requirements.txt
# --no-cache-dir: don't store pip's download cache in the image
# pip normally caches to speed up repeated installs, but in Docker
# we build once and run many times β cache just wastes space.
# Copy only the application code (not scripts, not embeddings)
COPY main.py .
# Create directory for model files
# Actual models are mounted via Docker volume β NOT baked into the image.
# WHY NOT BAKE IN:
# If models are in the image, every model update requires rebuilding the image.
# With volumes, you can update models without touching Docker.
# Also: 90MB model in image = 90MB transferred on every docker pull.
RUN mkdir -p models
# Port 8001: encoder service (internal, not exposed to host directly)
EXPOSE 8001
# HEALTHCHECK: Docker polls this to know if the service is ready.
# --interval: check every 30s
# --timeout: fail if no response in 10s
# --start-period: wait 60s before starting checks (model loading takes time)
# --retries: mark unhealthy after 3 consecutive failures
HEALTHCHECK --interval=30s --timeout=10s --start-period=60s --retries=3 \
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8001/health')"
# Run with uvicorn (ASGI server for FastAPI)
# --host 0.0.0.0: listen on all interfaces (needed inside Docker)
# --port 8001: encoder port
# --workers 1: ONE worker. Why?
# ONNX sessions are NOT safely forkable.
# Multiple workers would each load the 90MB model into RAM.
# For CPU-bound inference, multiple workers don't help β use async instead.
CMD ["python", "-m", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8001", "--workers", "1"] |