visual-rag-toolkit / Dockerfile
Yeroyan's picture
add cloudinary
84b827f verified
# Use HuggingFace's pre-built GPU image (includes CUDA, PyTorch, Transformers)
FROM huggingface/transformers-pytorch-gpu:latest@sha256:4c7317881a534b22e18add49c925096fa902651fb0571c69f3cad58af3ea2c0f
WORKDIR /app
# Install system dependencies
RUN apt-get update && apt-get install -y \
build-essential \
curl \
git \
poppler-utils \
&& rm -rf /var/lib/apt/lists/*
# Verify Python version
RUN python3 -V
# Copy requirements first (for better Docker layer caching)
COPY requirements.txt ./
# Install Python dependencies
RUN pip3 install --no-cache-dir -r requirements.txt
# Pre-download Hugging Face models during build
# IMPORTANT: Set cache directory BEFORE downloading to ensure models are cached
ENV HF_HOME=/app/.cache/huggingface
ENV HF_HUB_CACHE=/app/.cache/huggingface
ENV HF_DATASETS_CACHE=/app/.cache/huggingface
ENV TRANSFORMERS_CACHE=/app/.cache/huggingface
COPY demo/download_models.py .
RUN mkdir -p /app/.cache/huggingface && \
chmod -R 755 /app/.cache && \
chmod -R 755 /app/.cache/huggingface
#&& \ python3 download_models.py
# Install colpali-engine (after model download to ensure deps are ready)
RUN pip3 install colpali-engine einops~=0.8.1
# Copy all application files
COPY visual_rag/ ./visual_rag/
COPY benchmarks/ ./benchmarks/
COPY demo/ ./demo/
COPY pyproject.toml ./
COPY README.md ./
# Install the package in editable mode
RUN pip3 install -e .
# Setup Streamlit configuration
RUN mkdir -p /app/.streamlit && \
mkdir -p /app/.cache/streamlit && \
mkdir -p /app/.cache/huggingface && \
mkdir -p /app/results && \
printf '[server]\nport = 7860\nheadless = true\nenableCORS = false\nenableXsrfProtection = false\nmaxUploadSize = 500\n\n[browser]\ngatherUsageStats = false\n' > /app/.streamlit/config.toml && \
chmod -R 777 /app/.streamlit && \
chmod -R 777 /app/.cache/streamlit && \
chmod -R 777 /app/.cache/huggingface && \
chmod -R 777 /app/results
ENV STREAMLIT_CONFIG_HOME=/app/.streamlit
ENV STREAMLIT_BROWSER_GATHER_USAGE_STATS=false
ENV STREAMLIT_USER_BASE_PATH=/app/.cache/streamlit
# Expose port (HF Spaces uses 7860)
EXPOSE 7860
# Verify GPU availability (will show False during build, True at runtime)
RUN python3 -c "import torch; print('CUDA available:', torch.cuda.is_available())" || true
RUN pip3 install cloudinary>=1.30.0
# Run Streamlit app
ENTRYPOINT ["streamlit", "run", "demo/app.py", "--server.port=7860", "--server.address=0.0.0.0", "--server.headless", "true"]