# =============================== # Base image # =============================== FROM python:3.10-slim WORKDIR /app # =============================== # System dependencies + OCR libraries # =============================== RUN apt-get update && apt-get install -y --no-install-recommends \ sqlite3 \ ca-certificates \ libgomp1 \ tesseract-ocr \ tesseract-ocr-eng \ libglib2.0-0 \ libsm6 \ libxext6 \ libxrender-dev \ libgl1 \ build-essential \ gcc \ g++ \ python3-dev \ && rm -rf /var/lib/apt/lists/* # Verify Tesseract RUN tesseract --version # =============================== # Python tooling # =============================== RUN pip install --no-cache-dir --upgrade pip setuptools wheel # =============================== # 🔒 CRITICAL: single dependency layer # =============================== RUN pip install --no-cache-dir \ numpy==1.24.4 \ pandas==2.0.3 \ fastapi==0.104.1 \ uvicorn[standard]==0.24.0 \ pydantic==2.5.0 \ scikit-learn==1.6.1 \ lightgbm==4.1.0 \ joblib==1.3.2 \ python-dateutil==2.8.2 \ filelock==3.13.1 \ python-multipart==0.0.6 \ httpx==0.25.2 \ redis==5.0.1 \ rq==1.15.1 \ pdfplumber==0.10.3 \ PyMuPDF==1.23.8 \ pytesseract==0.3.10 \ Pillow==10.1.0 \ python-magic==0.4.27 \ requests==2.31.0 \ python-dotenv==1.0.0 \ psycopg2-binary==2.9.7 \ SQLAlchemy==2.0.20 \ alembic==1.11.1 \ google-generativeai \ google-ai-generativelanguage \ easyocr \ opencv-python-headless # =============================== # Copy application code # =============================== COPY . . # =============================== # Runtime directories # =============================== RUN mkdir -p /app/data/logs /app/data/docs /app/backend/app/db \ && chmod -R 777 /app/data # =============================== # Python package structure # =============================== RUN touch backend/__init__.py \ backend/feature_builder/__init__.py \ backend/app/__init__.py \ backend/app/api/__init__.py \ backend/app/agent/__init__.py \ backend/app/wrappers/__init__.py \ backend/app/db/__init__.py \ backend/ingest/__init__.py # =============================== # Verify agent file exists # =============================== RUN test -f backend/app/agent/agent_orchestrator.py || \ (echo "❌ ERROR: agent_orchestrator.py missing" && exit 1) # =============================== # Initialize database at build time # =============================== RUN echo "🗄️ Initializing database..." && \ python backend/app/db/db_init.py && \ echo "✅ Database ready" # =============================== # Expose port # =============================== EXPOSE 7860 # =============================== # Startup script # =============================== RUN echo '#!/bin/bash\n\ set -e\n\ echo \"🔍 Checking database...\"\n\ python backend/app/db/db_init.py\n\ echo \"✅ Database ready\"\n\ echo \"🚀 Starting application...\"\n\ exec uvicorn app:app --host 0.0.0.0 --port 7860 --timeout-keep-alive 75\n\ ' > /app/start.sh && chmod +x /app/start.sh CMD ["/app/start.sh"]