# Base image with CUDA 12.1 and Ubuntu 22.04
FROM nvidia/cuda:12.1.1-base-ubuntu22.04

# Install Python 3.10 and essential dependencies
RUN apt-get update && \
    apt-get install -y --no-install-recommends \
    python3.10 \
    python3.10-dev \
    python3.10-distutils \
    curl \
    git \
    && rm -rf /var/lib/apt/lists/*

# Make Python 3.10 the default
RUN update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1

# Install pip for Python 3.10
RUN curl -sS https://bootstrap.pypa.io/get-pip.py | python3.10


# Install Ollama with GPU layers
ENV OLLAMA_GPU_LAYERS=100
RUN curl -fsSL https://ollama.com/install.sh | sh

# Set up application directory
WORKDIR /app




# Configure environment variables (FROM YOUR ORIGINAL SETUP)
ENV HOME=/home/user \
	PATH=/home/user/.local/bin:$PATH \
    # VECTOR_STORE_DIR=/app/vector_stores \
    # EMBED_MODEL_PATH=/app/datas/bge_onnx \
    PYTHONUNBUFFERED=1 \
    GRADIO_SERVER_NAME="0.0.0.0" \
    HF_HOME=/data/.huggingface \
    HF_HUB_DISABLE_PROGRESS_BARS=1 \
    OLLAMA_MODELS=/data/.ollama/models \
    SYSTEM=spaces

# Set up a new user named "user" with user ID 1000
RUN useradd -m -u 1000 user

USER root
RUN mkdir -p /data && chown user:user /data

# Switch to the "user" user
WORKDIR /app
USER user
COPY --chown=user requirements.txt /app/
# Install Python dependencies
RUN pip install --no-cache-dir -r requirements.txt


COPY --chown=user . /app


# COPY --chown=user . $HOME/app
# RUN mkdir -p /data
# RUN chmod 777 /data


# Verify CUDA and Python versions
# RUN python3 -c "import torch; print(f'PyTorch CUDA available: {torch.cuda.is_available()}')" && \
#     python3 --version

# Expose ports for Ollama and Gradio
EXPOSE 11434 7860


# Copy and set permissions for start script
# COPY start.sh /app/start.sh
RUN chmod +x /app/start.sh

# Start services using the startup script
CMD ["/app/start.sh"]