FROM python:3.12.2-slim ENV PYTHONDONTWRITEBYTECODE=1 \ PYTHONUNBUFFERED=1 \ PIP_NO_CACHE_DIR=1 # build-essential covers the few deps with C extensions (numpy/pandas wheels # are usually prebuilt for 3.12, but this is cheap insurance). RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ && rm -rf /var/lib/apt/lists/* WORKDIR /app # Deps first so editing app.py doesn't reinstall torch on every rebuild. COPY requirements.txt . RUN pip install --extra-index-url https://download.pytorch.org/whl/cpu -r requirements.txt # Source next. COPY src/ ./src/ COPY app.py ./ # Weights last — biggest layer, changes least often, so cache stays warm # when you iterate on app.py. COPY model/ ./model/ # torch.load on tokenizer.pt unpickles a `Tokenizer` instance that was # saved from the module path `tokenizer` (src/tokenizer.py). Making /app/src # importable lets the unpickler find that class. ENV PYTHONPATH=/app/src # HF Spaces routes external traffic to $PORT; 7860 is the convention. # Single worker — the model lives in process memory and multiple workers # would multiply the ~1.5 GB RSS. EXPOSE 7860 CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]