# Base image with CUDA 12.1 and Ubuntu 22.04 FROM nvidia/cuda:12.1.1-base-ubuntu22.04 # Install Python 3.10 and essential dependencies RUN apt-get update && \ apt-get install -y --no-install-recommends \ python3.10 \ python3.10-dev \ python3.10-distutils \ curl \ git \ && rm -rf /var/lib/apt/lists/* # Make Python 3.10 the default RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1 # Install pip for Python 3.10 RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10 # Install Ollama with GPU layers ENV OLLAMA_GPU_LAYERS=100 RUN curl -fsSL https://ollama.com/install.sh | sh # Set up application directory WORKDIR /app # Configure environment variables (FROM YOUR ORIGINAL SETUP) ENV HOME=/home/user \ PATH=/home/user/.local/bin:$PATH \ # VECTOR_STORE_DIR=/app/vector_stores \ # EMBED_MODEL_PATH=/app/datas/bge_onnx \ PYTHONUNBUFFERED=1 \ GRADIO_SERVER_NAME="0.0.0.0" \ HF_HOME=/data/.huggingface \ HF_HUB_DISABLE_PROGRESS_BARS=1 \ OLLAMA_MODELS=/data/.ollama/models \ SYSTEM=spaces # Set up a new user named "user" with user ID 1000 RUN useradd -m -u 1000 user USER root RUN mkdir -p /data && chown user:user /data # Switch to the "user" user WORKDIR /app USER user COPY --chown=user requirements.txt /app/ # Install Python dependencies RUN pip install --no-cache-dir -r requirements.txt COPY --chown=user . /app # COPY --chown=user . $HOME/app # RUN mkdir -p /data # RUN chmod 777 /data # Verify CUDA and Python versions # RUN python3 -c "import torch; print(f'PyTorch CUDA available: {torch.cuda.is_available()}')" && \ # python3 --version # Expose ports for Ollama and Gradio EXPOSE 11434 7860 # Copy and set permissions for start script # COPY start.sh /app/start.sh RUN chmod +x /app/start.sh # Start services using the startup script CMD ["/app/start.sh"]