Spaces:
Running
Running
| # ------------------------------------------------------------------------------ | |
| # Stage 1: Quantize NLP model (torch needed ONLY here for PyTorch -> ONNX export) | |
| # ------------------------------------------------------------------------------ | |
| FROM python:3.11-slim AS model-quantizer | |
| WORKDIR /app | |
| RUN pip install --no-cache-dir \ | |
| --extra-index-url https://download.pytorch.org/whl/cpu \ | |
| "torch==2.2.0" \ | |
| "optimum[onnxruntime]==1.16.2" \ | |
| "transformers==4.37.2" \ | |
| "huggingface-hub==0.20.3" \ | |
| "numpy<2.0.0" | |
| COPY scripts/quantize_model.py scripts/quantize_model.py | |
| RUN python3 scripts/quantize_model.py | |
| # ------------------------------------------------------------------------------ | |
| # Stage 2: Runtime (Python FastAPI Worker — no torch, no frontend) | |
| # ------------------------------------------------------------------------------ | |
| FROM python:3.11-slim | |
| WORKDIR /app | |
| # Create non-root user for security | |
| RUN useradd -m -u 1000 user | |
| USER user | |
| ENV HOME=/home/user \ | |
| PATH=/home/user/.local/bin:$PATH | |
| # Install Python dependencies (no torch — ~700MB RAM saved) | |
| COPY --chown=user:user backend/requirements.txt backend/requirements.txt | |
| RUN pip install --no-cache-dir --upgrade -r backend/requirements.txt | |
| # Copy Backend code | |
| COPY --chown=user:user backend backend | |
| # Copy pre-quantized ONNX model from Stage 1 | |
| COPY --chown=user:user --from=model-quantizer /app/backend/models/quantized backend/models/quantized | |
| WORKDIR /app/backend | |
| EXPOSE 7860 | |
| CMD ["uvicorn", "worker_main:app", "--host", "0.0.0.0", "--port", "7860"] | |