dipan004's picture
Update Dockerfile
6f5f8ce verified
# ===============================
# Base image
# ===============================
FROM python:3.10-slim
WORKDIR /app
# ===============================
# System dependencies + OCR libraries
# ===============================
RUN apt-get update && apt-get install -y --no-install-recommends \
sqlite3 \
ca-certificates \
libgomp1 \
tesseract-ocr \
tesseract-ocr-eng \
libglib2.0-0 \
libsm6 \
libxext6 \
libxrender-dev \
libgl1 \
build-essential \
gcc \
g++ \
python3-dev \
&& rm -rf /var/lib/apt/lists/*
# Verify Tesseract
RUN tesseract --version
# ===============================
# Python tooling
# ===============================
RUN pip install --no-cache-dir --upgrade pip setuptools wheel
# ===============================
# πŸ”’ CRITICAL: single dependency layer
# ===============================
RUN pip install --no-cache-dir \
numpy==1.24.4 \
pandas==2.0.3 \
fastapi==0.104.1 \
uvicorn[standard]==0.24.0 \
pydantic==2.5.0 \
scikit-learn==1.6.1 \
lightgbm==4.1.0 \
joblib==1.3.2 \
python-dateutil==2.8.2 \
filelock==3.13.1 \
python-multipart==0.0.6 \
httpx==0.25.2 \
redis==5.0.1 \
rq==1.15.1 \
pdfplumber==0.10.3 \
PyMuPDF==1.23.8 \
pytesseract==0.3.10 \
Pillow==10.1.0 \
python-magic==0.4.27 \
requests==2.31.0 \
python-dotenv==1.0.0 \
psycopg2-binary==2.9.7 \
SQLAlchemy==2.0.20 \
alembic==1.11.1 \
google-generativeai \
google-ai-generativelanguage \
easyocr \
opencv-python-headless
# ===============================
# Copy application code
# ===============================
COPY . .
# ===============================
# Runtime directories
# ===============================
RUN mkdir -p /app/data/logs /app/data/docs /app/backend/app/db \
&& chmod -R 777 /app/data
# ===============================
# Python package structure
# ===============================
RUN touch backend/__init__.py \
backend/feature_builder/__init__.py \
backend/app/__init__.py \
backend/app/api/__init__.py \
backend/app/agent/__init__.py \
backend/app/wrappers/__init__.py \
backend/app/db/__init__.py \
backend/ingest/__init__.py
# ===============================
# Verify agent file exists
# ===============================
RUN test -f backend/app/agent/agent_orchestrator.py || \
(echo "❌ ERROR: agent_orchestrator.py missing" && exit 1)
# ===============================
# Initialize database at build time
# ===============================
RUN echo "πŸ—„οΈ Initializing database..." && \
python backend/app/db/db_init.py && \
echo "βœ… Database ready"
# ===============================
# Expose port
# ===============================
EXPOSE 7860
# ===============================
# Startup script
# ===============================
RUN echo '#!/bin/bash\n\
set -e\n\
echo \"πŸ” Checking database...\"\n\
python backend/app/db/db_init.py\n\
echo \"βœ… Database ready\"\n\
echo \"πŸš€ Starting application...\"\n\
exec uvicorn app:app --host 0.0.0.0 --port 7860 --timeout-keep-alive 75\n\
' > /app/start.sh && chmod +x /app/start.sh
CMD ["/app/start.sh"]