File size: 4,880 Bytes
c74f73f
 
 
 
 
 
 
 
 
 
 
 
192b2d2
3852fcf
c74f73f
 
 
 
 
 
3852fcf
c74f73f
 
 
 
 
 
 
3852fcf
c74f73f
 
 
3852fcf
 
 
 
 
c74f73f
 
 
 
 
 
192b2d2
3852fcf
c74f73f
 
 
192b2d2
 
3852fcf
c74f73f
 
 
 
 
 
192b2d2
 
c74f73f
 
 
 
 
 
192b2d2
 
c74f73f
 
 
 
 
 
192b2d2
 
c74f73f
 
 
 
 
192b2d2
c74f73f
 
192b2d2
 
 
 
 
 
 
c74f73f
 
 
 
 
 
3852fcf
 
c74f73f
 
 
 
 
 
3852fcf
 
c74f73f
 
 
 
 
 
 
192b2d2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# =============================================================================
# RAG System Dockerfile for Hugging Face Spaces
# =============================================================================
# This Dockerfile creates a containerized environment for the RAG system
# with all necessary dependencies and configurations for deployment.

# =============================================================================
# BASE IMAGE SELECTION
# =============================================================================

# Use Python 3.10 slim image for optimal size and compatibility
# Slim images contain only essential packages, reducing container size
FROM python:3.10-slim

# =============================================================================
# WORKING DIRECTORY SETUP
# =============================================================================

# Set the working directory inside the container
# All subsequent commands will be executed from this directory
WORKDIR /app
ENV PYTHONUNBUFFERED=1 \
    PORT=8000 \
    HF_HOME=/home/user/huggingface

# =============================================================================
# SYSTEM DEPENDENCIES INSTALLATION
# =============================================================================

# Install system-level dependencies required for Python packages
# build-essential: Required for compiling some Python packages
# curl: Used for health checks and potential downloads
RUN apt-get update && apt-get install -y \
    build-essential \
    curl \
    && rm -rf /var/lib/apt/lists/*

# =============================================================================
# PYTHON DEPENDENCIES INSTALLATION
# =============================================================================

# Copy requirements file first for better Docker layer caching
# This ensures that dependencies are only reinstalled if requirements.txt changes
COPY requirements.txt .

# Install Python dependencies with optimized settings
# --no-cache-dir: Reduces image size by not caching downloaded packages
# --upgrade pip: Ensures we have the latest pip version
RUN pip install --no-cache-dir --upgrade pip && \
    pip install --no-cache-dir -r requirements.txt

# =============================================================================
# APPLICATION FILES COPY
# =============================================================================

# Copy all application files to the container
# This includes Python scripts, configuration files, and documentation
COPY . .

# =============================================================================
# DIRECTORY CREATION
# =============================================================================

# Create vector store directory for FAISS index persistence
# This directory will store the vector embeddings and metadata
RUN mkdir -p vector_store

# =============================================================================
# TEST DATA SETUP
# =============================================================================

# Copy all PDF documents for testing and demonstration
# These files will be automatically loaded when the system starts
COPY *.pdf /app/

# =============================================================================
# ENVIRONMENT VARIABLES CONFIGURATION
# =============================================================================

# Set Python path to include the application directory
ENV PYTHONPATH=/app

# Streamlit server configuration for containerized deployment
ENV STREAMLIT_SERVER_PORT=8501
ENV STREAMLIT_SERVER_ADDRESS=0.0.0.0
ENV STREAMLIT_SERVER_HEADLESS=true
ENV STREAMLIT_SERVER_ENABLE_CORS=false
ENV STREAMLIT_SERVER_ENABLE_XSRF_PROTECTION=false
ENV STREAMLIT_LOGGER_LEVEL=debug

# =============================================================================
# NETWORK CONFIGURATION
# =============================================================================

# Expose port 8501 for Streamlit web interface
# This port will be accessible from outside the container
EXPOSE 8501

# =============================================================================
# HEALTH CHECK CONFIGURATION
# =============================================================================

# Health check to monitor application status
# Uses curl to check if the Streamlit health endpoint is responding
HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health

# =============================================================================
# APPLICATION STARTUP
# =============================================================================

# Start the Streamlit application with proper configuration
# --server.port: Specifies the port for the web interface
# --server.address: Binds to all network interfaces for container access
CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]