Spaces:
Running
Running
| FROM python:3.12-slim | |
| WORKDIR /app | |
| # git is needed for huggingface_hub model downloads. | |
| # No cmake/build-essential needed -- no C++ compilation. | |
| # LLM runs locally via transformers. | |
| RUN apt-get update && apt-get install -y git \ | |
| && rm -rf /var/lib/apt/lists/* | |
| COPY requirements.txt . | |
| RUN pip install --upgrade pip | |
| # -- Step 1: CPU-only PyTorch ------------------------------------------------ | |
| # sentence-transformers + transformers both need torch. | |
| # Pre-installing the CPU wheel (~190 MB) prevents pip from resolving the | |
| # default CUDA bundle (~3.5 GB) which would blow the build disk quota. | |
| RUN pip install torch --index-url https://download.pytorch.org/whl/cpu \ | |
| --no-cache-dir | |
| # -- Step 2: Everything else ------------------------------------------------- | |
| RUN pip install -r requirements.txt --no-cache-dir | |
| # -- Step 3: Pin transformers to a stable release --------------------------- | |
| # Llama 3.2 works with any recent PyPI release; no git-main needed. | |
| RUN pip install --force-reinstall --no-cache-dir "transformers>=4.43.0,<5.0.0" | |
| COPY . . | |
| EXPOSE 7860 | |
| CMD ["uvicorn", "app:app", \ | |
| "--host", "0.0.0.0", \ | |
| "--port", "7860", \ | |
| "--workers", "1", \ | |
| "--timeout-keep-alive", "120"] |