File size: 1,552 Bytes
8ff1b66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# ------------------------------------------------------------------------------
# Stage 1: Quantize NLP model (torch needed ONLY here for PyTorch -> ONNX export)
# ------------------------------------------------------------------------------
FROM python:3.11-slim AS model-quantizer

WORKDIR /app

RUN pip install --no-cache-dir \
    --extra-index-url https://download.pytorch.org/whl/cpu \
    "torch==2.2.0" \
    "optimum[onnxruntime]==1.16.2" \
    "transformers==4.37.2" \
    "huggingface-hub==0.20.3" \
    "numpy<2.0.0"

COPY scripts/quantize_model.py scripts/quantize_model.py
RUN python3 scripts/quantize_model.py


# ------------------------------------------------------------------------------
# Stage 2: Runtime (Python FastAPI Worker — no torch, no frontend)
# ------------------------------------------------------------------------------
FROM python:3.11-slim

WORKDIR /app

# Create non-root user for security
RUN useradd -m -u 1000 user
USER user
ENV HOME=/home/user \
    PATH=/home/user/.local/bin:$PATH

# Install Python dependencies (no torch — ~700MB RAM saved)
COPY --chown=user:user backend/requirements.txt backend/requirements.txt
RUN pip install --no-cache-dir --upgrade -r backend/requirements.txt

# Copy Backend code
COPY --chown=user:user backend backend

# Copy pre-quantized ONNX model from Stage 1
COPY --chown=user:user --from=model-quantizer /app/backend/models/quantized backend/models/quantized

WORKDIR /app/backend

EXPOSE 7860

CMD ["uvicorn", "worker_main:app", "--host", "0.0.0.0", "--port", "7860"]