File size: 1,855 Bytes
983d8eb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# Embeddings sidecar β€” FastAPI + fastembed.
# Builds a small image that exposes /embed/dense, /embed/colbert,
# /embed/colbert/query, /health.
#
# Designed to run anywhere a Dockerfile is accepted:
#   - Hugging Face Spaces (Docker SDK)  β€” easiest, free tier, weights cached
#   - Fly.io                            β€” `fly launch` then `fly deploy`
#   - Railway / Render / Koyeb          β€” auto-detects Dockerfile
#
# The runtime port is taken from $PORT (HF Spaces, Railway, Render set this);
# defaults to 7860 (HF Spaces convention).

FROM python:3.11-slim

ENV PYTHONUNBUFFERED=1 \
    PIP_DISABLE_PIP_VERSION_CHECK=1 \
    PIP_NO_CACHE_DIR=1 \
    # Cache fastembed model downloads in a writable location (HF Spaces uses
    # /data for persistent storage on paid tiers; falls back to /tmp on free).
    FASTEMBED_CACHE_PATH=/tmp/fastembed_cache \
    HF_HOME=/tmp/huggingface \
    PORT=7860

WORKDIR /app

# Install build essentials needed for some onnxruntime / tokenizers wheels.
RUN apt-get update \
    && apt-get install -y --no-install-recommends \
       build-essential \
       libgomp1 \
    && rm -rf /var/lib/apt/lists/*

COPY requirements.txt .
RUN pip install -r requirements.txt

COPY main.py .

# Pre-warm the model cache at build time so the first request is fast.
# Skipped if HF_TOKEN is required for a gated model (set as a secret at
# runtime then the first request will warm the cache).
RUN python -c "from fastembed import TextEmbedding, LateInteractionTextEmbedding; \
    TextEmbedding(model_name='sentence-transformers/all-MiniLM-L6-v2'); \
    LateInteractionTextEmbedding(model_name='colbert-ir/colbertv2.0')" \
    || echo "Model pre-warm skipped β€” will download on first request."

EXPOSE 7860

# Use a shell so $PORT is interpolated.
CMD ["sh", "-c", "uvicorn main:app --host 0.0.0.0 --port ${PORT:-7860}"]