| FROM python:3.10-slim | |
| # Install curl | |
| RUN apt-get update && apt-get install -y \ | |
| curl \ | |
| && rm -rf /var/lib/apt/lists/* | |
| WORKDIR /code | |
| # Limit OpenMP threads to match the HF container's CPU quota (2 vCPUs) | |
| # to prevent severe thread thrashing and context switching slowdowns. | |
| ENV OMP_NUM_THREADS=2 | |
| ENV OMP_PROC_BIND=spread | |
| ENV OMP_PLACES=cores | |
| # Upgrade pip to ensure the latest wheel tags are supported | |
| RUN pip install --no-cache-dir --upgrade pip | |
| # Install prebuilt CPU wheel for llama-cpp-python (locked to 0.3.30 to match the prebuilt wheel index) | |
| RUN pip install --no-cache-dir llama-cpp-python==0.3.30 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu | |
| # Copy requirements and install other dependencies | |
| COPY requirements.txt /code/requirements.txt | |
| RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt | |
| # Copy application files | |
| COPY . /code | |
| # Expose HF Space port | |
| EXPOSE 7860 | |
| # Start server | |
| CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"] | |