FROM python:3.10-slim WORKDIR /app # Install system dependencies RUN apt-get update && apt-get install -y \ gcc g++ make cmake git libopenblas-dev \ && rm -rf /var/lib/apt/lists/* # Install optimized llama-cpp-python wheel for CPU RUN pip install --no-cache-dir \ https://huggingface.co/Luigi/llama-cpp-python-wheels-hf-spaces-free-cpu/resolve/main/llama_cpp_python-0.3.22-cp310-cp310-linux_x86_64.whl # Install other dependencies COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt # Copy application code COPY . . # Strict threading limits for 2 vCPU environment ENV PYTHONUNBUFFERED=1 ENV GRADIO_SERVER_NAME=0.0.0.0 ENV OMP_NUM_THREADS=2 ENV MKL_NUM_THREADS=2 ENV OPENBLAS_NUM_THREADS=2 ENV LLAMA_CPP_LOG_DISABLE=1 # Expose Gradio port EXPOSE 7860 CMD ["python", "app.py"]