# ============================================================================= # RAG System Dockerfile for Hugging Face Spaces # ============================================================================= # This Dockerfile creates a containerized environment for the RAG system # with all necessary dependencies and configurations for deployment. # ============================================================================= # BASE IMAGE SELECTION # ============================================================================= # Use Python 3.10 slim image for optimal size and compatibility # Slim images contain only essential packages, reducing container size FROM python:3.10-slim # ============================================================================= # WORKING DIRECTORY SETUP # ============================================================================= # Set the working directory inside the container # All subsequent commands will be executed from this directory WORKDIR /app ENV PYTHONUNBUFFERED=1 \ PORT=8000 \ HF_HOME=/home/user/huggingface # ============================================================================= # SYSTEM DEPENDENCIES INSTALLATION # ============================================================================= # Install system-level dependencies required for Python packages # build-essential: Required for compiling some Python packages # curl: Used for health checks and potential downloads RUN apt-get update && apt-get install -y \ build-essential \ curl \ && rm -rf /var/lib/apt/lists/* # ============================================================================= # PYTHON DEPENDENCIES INSTALLATION # ============================================================================= # Copy requirements file first for better Docker layer caching # This ensures that dependencies are only reinstalled if requirements.txt changes COPY requirements.txt . # Install Python dependencies with optimized settings # --no-cache-dir: Reduces image size by not caching downloaded packages # --upgrade pip: Ensures we have the latest pip version RUN pip install --no-cache-dir --upgrade pip && \ pip install --no-cache-dir -r requirements.txt # ============================================================================= # APPLICATION FILES COPY # ============================================================================= # Copy all application files to the container # This includes Python scripts, configuration files, and documentation COPY . . # ============================================================================= # DIRECTORY CREATION # ============================================================================= # Create vector store directory for FAISS index persistence # This directory will store the vector embeddings and metadata RUN mkdir -p vector_store # ============================================================================= # TEST DATA SETUP # ============================================================================= # Copy all PDF documents for testing and demonstration # These files will be automatically loaded when the system starts COPY *.pdf /app/ # ============================================================================= # ENVIRONMENT VARIABLES CONFIGURATION # ============================================================================= # Set Python path to include the application directory ENV PYTHONPATH=/app # Streamlit server configuration for containerized deployment ENV STREAMLIT_SERVER_PORT=8501 ENV STREAMLIT_SERVER_ADDRESS=0.0.0.0 ENV STREAMLIT_SERVER_HEADLESS=true ENV STREAMLIT_SERVER_ENABLE_CORS=false ENV STREAMLIT_SERVER_ENABLE_XSRF_PROTECTION=false ENV STREAMLIT_LOGGER_LEVEL=debug # ============================================================================= # NETWORK CONFIGURATION # ============================================================================= # Expose port 8501 for Streamlit web interface # This port will be accessible from outside the container EXPOSE 8501 # ============================================================================= # HEALTH CHECK CONFIGURATION # ============================================================================= # Health check to monitor application status # Uses curl to check if the Streamlit health endpoint is responding HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health # ============================================================================= # APPLICATION STARTUP # ============================================================================= # Start the Streamlit application with proper configuration # --server.port: Specifies the port for the web interface # --server.address: Binds to all network interfaces for container access CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]