# Use official Python 3.12 slim image FROM python:3.12-slim # Set environment variables ENV PYTHONUNBUFFERED=1 ENV PYTHONDONTWRITEBYTECODE=1 # Set work directory WORKDIR /app # Install system dependencies in a single layer RUN apt-get update && apt-get install -y \ curl \ build-essential \ tesseract-ocr \ libtesseract-dev \ poppler-utils \ libgl1-mesa-glx \ libglib2.0-0 \ libsm6 \ libxext6 \ libxrender-dev \ libgomp1 \ && apt-get clean \ && rm -rf /var/lib/apt/lists/* # Copy requirements file (if it exists) COPY requirements.txt* /app/ # Install Python dependencies using pip RUN pip install --no-cache-dir --upgrade pip # Install dependencies from requirements.txt if it exists, otherwise install manually RUN if [ -f "requirements.txt" ]; then \ pip install --no-cache-dir -r requirements.txt; \ else \ pip install --no-cache-dir \ streamlit>=1.28.0 \ pandas>=2.0.0 \ numpy>=1.24.0 \ PyMuPDF>=1.23.0 \ PyPDF2>=3.0.0 \ doctr>=2.4.0 \ pdf2image>=1.16.0 \ spacy>=3.7.0 \ torch>=2.0.0 \ fuzzywuzzy>=0.18.0 \ python-Levenshtein>=0.21.0 \ openai>=1.0.0 \ huggingface-hub>=0.19.0 \ pydantic>=2.0.0 \ pydantic-settings>=2.0.0 \ python-dateutil>=2.8.0 \ python-dotenv>=1.0.0; \ fi # Install PyTorch with CPU support RUN pip3 install torch torchvision torchaudio # Install spaCy models RUN python -m spacy download en_core_web_sm # Create temp directory for file processing RUN mkdir -p /app/temp && chmod 777 /app/temp # Copy the source code COPY src/ /app/src/ COPY main.py /app/ # Expose the port Streamlit will run on EXPOSE 8501 # Set environment variables for Streamlit ENV STREAMLIT_SERVER_PORT=8501 ENV STREAMLIT_SERVER_ADDRESS=0.0.0.0 ENV STREAMLIT_SERVER_HEADLESS=true ENV STREAMLIT_SERVER_ENABLE_CORS=false ENV STREAMLIT_SERVER_ENABLE_XSRF_PROTECTION=false # Run the Streamlit application CMD ["streamlit", "run", "main.py", "--server.port=8501", "--server.address=0.0.0.0"]