Spaces:

klzn
/

sentimentstream-worker

Running

GitHub Action

deploy: worker release from GitHub

8ff1b66 15 days ago

1.55 kB

	# ------------------------------------------------------------------------------
	# Stage 1: Quantize NLP model (torch needed ONLY here for PyTorch -> ONNX export)
	# ------------------------------------------------------------------------------
	FROM python:3.11-slim AS model-quantizer

	WORKDIR /app

	RUN pip install --no-cache-dir \
	--extra-index-url https://download.pytorch.org/whl/cpu \
	"torch==2.2.0" \
	"optimum[onnxruntime]==1.16.2" \
	"transformers==4.37.2" \
	"huggingface-hub==0.20.3" \
	"numpy<2.0.0"

	COPY scripts/quantize_model.py scripts/quantize_model.py
	RUN python3 scripts/quantize_model.py


	# ------------------------------------------------------------------------------
	# Stage 2: Runtime (Python FastAPI Worker — no torch, no frontend)
	# ------------------------------------------------------------------------------
	FROM python:3.11-slim

	WORKDIR /app

	# Create non-root user for security
	RUN useradd -m -u 1000 user
	USER user
	ENV HOME=/home/user \
	PATH=/home/user/.local/bin:$PATH

	# Install Python dependencies (no torch — ~700MB RAM saved)
	COPY --chown=user:user backend/requirements.txt backend/requirements.txt
	RUN pip install --no-cache-dir --upgrade -r backend/requirements.txt

	# Copy Backend code
	COPY --chown=user:user backend backend

	# Copy pre-quantized ONNX model from Stage 1
	COPY --chown=user:user --from=model-quantizer /app/backend/models/quantized backend/models/quantized

	WORKDIR /app/backend

	EXPOSE 7860

	CMD ["uvicorn", "worker_main:app", "--host", "0.0.0.0", "--port", "7860"]