# Use HuggingFace's pre-built GPU image (includes CUDA, PyTorch, Transformers) FROM huggingface/transformers-pytorch-gpu:latest@sha256:4c7317881a534b22e18add49c925096fa902651fb0571c69f3cad58af3ea2c0f WORKDIR /app # Install system dependencies RUN apt-get update && apt-get install -y \ build-essential \ curl \ git \ poppler-utils \ && rm -rf /var/lib/apt/lists/* # Verify Python version RUN python3 -V # Copy requirements first (for better Docker layer caching) COPY requirements.txt ./ # Install Python dependencies RUN pip3 install --no-cache-dir -r requirements.txt # Pre-download Hugging Face models during build # IMPORTANT: Set cache directory BEFORE downloading to ensure models are cached ENV HF_HOME=/app/.cache/huggingface ENV HF_HUB_CACHE=/app/.cache/huggingface ENV HF_DATASETS_CACHE=/app/.cache/huggingface ENV TRANSFORMERS_CACHE=/app/.cache/huggingface COPY demo/download_models.py . RUN mkdir -p /app/.cache/huggingface && \ chmod -R 755 /app/.cache && \ chmod -R 755 /app/.cache/huggingface #&& \ python3 download_models.py # Install colpali-engine (after model download to ensure deps are ready) RUN pip3 install colpali-engine einops~=0.8.1 # Copy all application files COPY visual_rag/ ./visual_rag/ COPY benchmarks/ ./benchmarks/ COPY demo/ ./demo/ COPY pyproject.toml ./ COPY README.md ./ # Install the package in editable mode RUN pip3 install -e . # Setup Streamlit configuration RUN mkdir -p /app/.streamlit && \ mkdir -p /app/.cache/streamlit && \ mkdir -p /app/.cache/huggingface && \ mkdir -p /app/results && \ printf '[server]\nport = 7860\nheadless = true\nenableCORS = false\nenableXsrfProtection = false\nmaxUploadSize = 500\n\n[browser]\ngatherUsageStats = false\n' > /app/.streamlit/config.toml && \ chmod -R 777 /app/.streamlit && \ chmod -R 777 /app/.cache/streamlit && \ chmod -R 777 /app/.cache/huggingface && \ chmod -R 777 /app/results ENV STREAMLIT_CONFIG_HOME=/app/.streamlit ENV STREAMLIT_BROWSER_GATHER_USAGE_STATS=false ENV STREAMLIT_USER_BASE_PATH=/app/.cache/streamlit # Expose port (HF Spaces uses 7860) EXPOSE 7860 # Verify GPU availability (will show False during build, True at runtime) RUN python3 -c "import torch; print('CUDA available:', torch.cuda.is_available())" || true RUN pip3 install cloudinary>=1.30.0 # Run Streamlit app ENTRYPOINT ["streamlit", "run", "demo/app.py", "--server.port=7860", "--server.address=0.0.0.0", "--server.headless", "true"]