FROM python:3.11-slim WORKDIR /code COPY ./requirements.txt /code/requirements.txt RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt COPY . . EXPOSE 7860 # --timeout 120: first request lazily loads BOTH transformer members into RAM, # which can exceed gunicorn's default 30s worker timeout on free CPU hardware. CMD ["gunicorn", "-b", "0.0.0.0:7860", "--timeout", "120", "app:app"]