Spaces:

darshvit20
/

visual-search-space

Running

App Files Files Community

visual-search-space / services /api /Dockerfile

darshvit20

Initial deploy

b2f9b47 6 days ago

raw

history blame contribute delete

1.88 kB

	# services/api/Dockerfile
	#
	# WHY THIS IS SEPARATE FROM THE ENCODER:
	# If they were one container:
	# - Restart API → also restarts encoder → 3s model reload on every code change
	# - Scale horizontally → each replica carries the 90MB model in RAM
	# - One crash takes down both search logic AND inference
	#
	# Separate containers = independent restart, scale, update, and failure domains.
	#
	# THIS CONTAINER IS LIGHTER than the encoder:
	# - No onnxruntime (that's the encoder's job)
	# - Needs faiss-cpu, whisper, httpx (for calling encoder)
	# - Target size: ~600MB

	FROM python:3.11-slim

	WORKDIR /app

	RUN apt-get update && apt-get install -y --no-install-recommends \
	ffmpeg \
	git \
	# ffmpeg is needed by Whisper to decode audio files (mp3, wav, webm, etc.)
	# Without it, Whisper can only handle raw PCM.
	# Size cost: ~80MB — worth it for voice search capability.

	&& rm -rf /var/lib/apt/lists/*

	COPY requirements.txt .
	RUN pip install --upgrade pip setuptools wheel
	# RUN pip install --no-cache-dir -r requirements.txt
	RUN pip install --no-cache-dir --no-build-isolation -r requirements.txt
	COPY main.py .

	# Create directories for runtime data
	# embeddings/ and data/ are mounted as volumes — not baked in
	RUN mkdir -p embeddings data images

	EXPOSE 8000

	HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
	CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')"

	# 2 workers for the API (it's I/O bound — waiting on encoder HTTP calls)
	# I/O-bound services benefit from multiple workers because while one worker
	# waits for the encoder response, another can handle a new request.
	# The encoder is CPU-bound — multiple workers there would fight for CPU.
	CMD ["python", "-m", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--workers", "2"]