# =============================== # Base image # =============================== FROM python:3.10-slim WORKDIR /app # =============================== # System dependencies + OCR libraries # =============================== RUN apt-get update && apt-get install -y --no-install-recommends \ sqlite3 \ ca-certificates \ libgomp1 \ tesseract-ocr \ tesseract-ocr-eng \ libglib2.0-0 \ libsm6 \ libxext6 \ libxrender-dev \ libgl1 \ build-essential \ gcc \ g++ \ python3-dev \ && rm -rf /var/lib/apt/lists/* # Verify Tesseract installation RUN tesseract --version # =============================== # Copy requirements and upgrade pip # =============================== COPY requirements.txt . RUN pip install --no-cache-dir --upgrade pip setuptools wheel cython # =============================== # Install NumPy first (prebuilt wheel) # =============================== RUN pip install --no-cache-dir numpy==1.24.4 # =============================== # Install Pandas from prebuilt wheel (avoid --no-binary) # =============================== RUN pip install --no-cache-dir pandas==2.0.3 --only-binary=:all: # =============================== # Install remaining dependencies # =============================== RUN pip install --no-cache-dir -r requirements.txt # =============================== # Optional: Gemini SDK, EasyOCR # =============================== RUN pip install --no-cache-dir --upgrade google-generativeai google-ai-generativelanguage RUN pip install --no-cache-dir easyocr opencv-python-headless Pillow pytesseract # =============================== # Copy application code # =============================== COPY . . # =============================== # Create necessary directories # =============================== RUN mkdir -p /app/data/logs /app/data/docs /app/backend/app/db && chmod -R 777 /app/data # =============================== # Create __init__.py files # =============================== RUN touch backend/__init__.py \ && touch backend/feature_builder/__init__.py \ && touch backend/app/__init__.py \ && touch backend/app/api/__init__.py \ && touch backend/app/agent/__init__.py \ && touch backend/app/wrappers/__init__.py \ && touch backend/app/db/__init__.py \ && touch backend/ingest/__init__.py # =============================== # Verify agent files exist # =============================== RUN test -f backend/app/agent/agent_orchestrator.py || \ (echo "ERROR: agent_orchestrator.py not found! Add it before building." && exit 1) # =============================== # Initialize database # =============================== COPY backend/app/db/db_init.py backend/app/db/db_init.py RUN echo "🗄️ Initializing database during build..." && \ python backend/app/db/db_init.py && \ echo "✅ Database initialized successfully!" # =============================== # Expose port # =============================== EXPOSE 7860 # =============================== # Startup script # =============================== RUN echo '#!/bin/bash\n\ echo "🔍 Checking database..."\n\ python backend/app/db/db_init.py\n\ echo "✅ Database ready"\n\ echo "🚀 Starting application..."\n\ exec uvicorn app:app --host 0.0.0.0 --port 7860 --timeout-keep-alive 75\n\ ' > /app/start.sh && chmod +x /app/start.sh CMD ["/app/start.sh"]