Spaces:
Sleeping
Sleeping
| # ============================================================================= | |
| # RAG System Dockerfile for Hugging Face Spaces | |
| # ============================================================================= | |
| # This Dockerfile creates a containerized environment for the RAG system | |
| # with all necessary dependencies and configurations for deployment. | |
| # ============================================================================= | |
| # BASE IMAGE SELECTION | |
| # ============================================================================= | |
| # Use Python 3.10 slim image for optimal size and compatibility | |
| # Slim images contain only essential packages, reducing container size | |
| FROM python:3.10-slim | |
| # ============================================================================= | |
| # WORKING DIRECTORY SETUP | |
| # ============================================================================= | |
| # Set the working directory inside the container | |
| # All subsequent commands will be executed from this directory | |
| WORKDIR /app | |
| ENV PYTHONUNBUFFERED=1 \ | |
| PORT=8000 \ | |
| HF_HOME=/home/user/huggingface | |
| # ============================================================================= | |
| # SYSTEM DEPENDENCIES INSTALLATION | |
| # ============================================================================= | |
| # Install system-level dependencies required for Python packages | |
| # build-essential: Required for compiling some Python packages | |
| # curl: Used for health checks and potential downloads | |
| RUN apt-get update && apt-get install -y \ | |
| build-essential \ | |
| curl \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # ============================================================================= | |
| # PYTHON DEPENDENCIES INSTALLATION | |
| # ============================================================================= | |
| # Copy requirements file first for better Docker layer caching | |
| # This ensures that dependencies are only reinstalled if requirements.txt changes | |
| COPY requirements.txt . | |
| # Install Python dependencies with optimized settings | |
| # --no-cache-dir: Reduces image size by not caching downloaded packages | |
| # --upgrade pip: Ensures we have the latest pip version | |
| RUN pip install --no-cache-dir --upgrade pip && \ | |
| pip install --no-cache-dir -r requirements.txt | |
| # ============================================================================= | |
| # APPLICATION FILES COPY | |
| # ============================================================================= | |
| # Copy all application files to the container | |
| # This includes Python scripts, configuration files, and documentation | |
| COPY . . | |
| # ============================================================================= | |
| # DIRECTORY CREATION | |
| # ============================================================================= | |
| # Create vector store directory for FAISS index persistence | |
| # This directory will store the vector embeddings and metadata | |
| RUN mkdir -p vector_store | |
| # ============================================================================= | |
| # TEST DATA SETUP | |
| # ============================================================================= | |
| # Copy all PDF documents for testing and demonstration | |
| # These files will be automatically loaded when the system starts | |
| COPY *.pdf /app/ | |
| # ============================================================================= | |
| # ENVIRONMENT VARIABLES CONFIGURATION | |
| # ============================================================================= | |
| # Set Python path to include the application directory | |
| ENV PYTHONPATH=/app | |
| # Streamlit server configuration for containerized deployment | |
| ENV STREAMLIT_SERVER_PORT=8501 | |
| ENV STREAMLIT_SERVER_ADDRESS=0.0.0.0 | |
| ENV STREAMLIT_SERVER_HEADLESS=true | |
| ENV STREAMLIT_SERVER_ENABLE_CORS=false | |
| ENV STREAMLIT_SERVER_ENABLE_XSRF_PROTECTION=false | |
| ENV STREAMLIT_LOGGER_LEVEL=debug | |
| # ============================================================================= | |
| # NETWORK CONFIGURATION | |
| # ============================================================================= | |
| # Expose port 8501 for Streamlit web interface | |
| # This port will be accessible from outside the container | |
| EXPOSE 8501 | |
| # ============================================================================= | |
| # HEALTH CHECK CONFIGURATION | |
| # ============================================================================= | |
| # Health check to monitor application status | |
| # Uses curl to check if the Streamlit health endpoint is responding | |
| HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health | |
| # ============================================================================= | |
| # APPLICATION STARTUP | |
| # ============================================================================= | |
| # Start the Streamlit application with proper configuration | |
| # --server.port: Specifies the port for the web interface | |
| # --server.address: Binds to all network interfaces for container access | |
| CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"] | |