FROM python:3.10-slim

# Install curl
RUN apt-get update && apt-get install -y \
    curl \
    && rm -rf /var/lib/apt/lists/*

WORKDIR /code

# Limit OpenMP threads to match the HF container's CPU quota (2 vCPUs)
# to prevent severe thread thrashing and context switching slowdowns.
ENV OMP_NUM_THREADS=2
ENV OMP_PROC_BIND=spread
ENV OMP_PLACES=cores

# Upgrade pip to ensure the latest wheel tags are supported
RUN pip install --no-cache-dir --upgrade pip

# Install prebuilt CPU wheel for llama-cpp-python (locked to 0.3.30 to match the prebuilt wheel index)
RUN pip install --no-cache-dir llama-cpp-python==0.3.30 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cpu

# Copy requirements and install other dependencies
COPY requirements.txt /code/requirements.txt
RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt

# Copy application files
COPY . /code

# Expose HF Space port
EXPOSE 7860

# Start server
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]