Spaces:
Running
Running
File size: 1,552 Bytes
8ff1b66 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 | # ------------------------------------------------------------------------------
# Stage 1: Quantize NLP model (torch needed ONLY here for PyTorch -> ONNX export)
# ------------------------------------------------------------------------------
FROM python:3.11-slim AS model-quantizer
WORKDIR /app
RUN pip install --no-cache-dir \
--extra-index-url https://download.pytorch.org/whl/cpu \
"torch==2.2.0" \
"optimum[onnxruntime]==1.16.2" \
"transformers==4.37.2" \
"huggingface-hub==0.20.3" \
"numpy<2.0.0"
COPY scripts/quantize_model.py scripts/quantize_model.py
RUN python3 scripts/quantize_model.py
# ------------------------------------------------------------------------------
# Stage 2: Runtime (Python FastAPI Worker — no torch, no frontend)
# ------------------------------------------------------------------------------
FROM python:3.11-slim
WORKDIR /app
# Create non-root user for security
RUN useradd -m -u 1000 user
USER user
ENV HOME=/home/user \
PATH=/home/user/.local/bin:$PATH
# Install Python dependencies (no torch — ~700MB RAM saved)
COPY --chown=user:user backend/requirements.txt backend/requirements.txt
RUN pip install --no-cache-dir --upgrade -r backend/requirements.txt
# Copy Backend code
COPY --chown=user:user backend backend
# Copy pre-quantized ONNX model from Stage 1
COPY --chown=user:user --from=model-quantizer /app/backend/models/quantized backend/models/quantized
WORKDIR /app/backend
EXPOSE 7860
CMD ["uvicorn", "worker_main:app", "--host", "0.0.0.0", "--port", "7860"]
|