FROM nvidia/cuda:12.1.1-cudnn8-devel-ubuntu22.04 # System dependencies RUN apt-get update && apt-get install -y \ python3 python3-pip python3-dev \ git git-lfs curl build-essential \ && rm -rf /var/lib/apt/lists/* RUN ln -sf /usr/bin/python3 /usr/bin/python WORKDIR /app # Install llama-cpp-python with CUDA support first — longest build step RUN CMAKE_ARGS="-DGGML_CUDA=on" pip install llama-cpp-python --no-cache-dir # Install remaining requirements COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt # Copy application files COPY . . # HuggingFace Spaces runs as non-root user 1000 RUN useradd -m -u 1000 user RUN chown -R user:user /app USER user RUN mkdir -p /app/hf_cache ENV HF_HOME=/app/hf_cache ENV PYTHONUNBUFFERED=1 EXPOSE 7860 CMD ["python", "app.py"]