File size: 1,880 Bytes
b2f9b47
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# services/api/Dockerfile
#
# WHY THIS IS SEPARATE FROM THE ENCODER:
#   If they were one container:
#   - Restart API β†’ also restarts encoder β†’ 3s model reload on every code change
#   - Scale horizontally β†’ each replica carries the 90MB model in RAM
#   - One crash takes down both search logic AND inference
#
#   Separate containers = independent restart, scale, update, and failure domains.
#
# THIS CONTAINER IS LIGHTER than the encoder:
#   - No onnxruntime (that's the encoder's job)
#   - Needs faiss-cpu, whisper, httpx (for calling encoder)
#   - Target size: ~600MB

FROM python:3.11-slim

WORKDIR /app

RUN apt-get update && apt-get install -y --no-install-recommends \
    ffmpeg \
     git \
    # ffmpeg is needed by Whisper to decode audio files (mp3, wav, webm, etc.)
    # Without it, Whisper can only handle raw PCM.
    # Size cost: ~80MB β€” worth it for voice search capability.
    
    && rm -rf /var/lib/apt/lists/*

COPY requirements.txt .
RUN pip install --upgrade pip setuptools wheel
# RUN pip install --no-cache-dir -r requirements.txt
RUN pip install --no-cache-dir --no-build-isolation -r requirements.txt
COPY main.py .

# Create directories for runtime data
# embeddings/ and data/ are mounted as volumes β€” not baked in
RUN mkdir -p embeddings data images

EXPOSE 8000

HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
    CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"

# 2 workers for the API (it's I/O bound β€” waiting on encoder HTTP calls)
# I/O-bound services benefit from multiple workers because while one worker
# waits for the encoder response, another can handle a new request.
# The encoder is CPU-bound β€” multiple workers there would fight for CPU.
CMD ["python", "-m", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "2"]