dipan004's picture
Update Dockerfile
d3bf819 verified
raw
history blame
3.35 kB
# ===============================
# Base image
# ===============================
FROM python:3.10-slim
WORKDIR /app
# ===============================
# System dependencies + OCR libraries
# ===============================
RUN apt-get update && apt-get install -y --no-install-recommends \
sqlite3 \
ca-certificates \
libgomp1 \
tesseract-ocr \
tesseract-ocr-eng \
libglib2.0-0 \
libsm6 \
libxext6 \
libxrender-dev \
libgl1 \
build-essential \
gcc \
g++ \
python3-dev \
&& rm -rf /var/lib/apt/lists/*
# Verify Tesseract installation
RUN tesseract --version
# ===============================
# Copy requirements and upgrade pip
# ===============================
COPY requirements.txt .
RUN pip install --no-cache-dir --upgrade pip setuptools wheel cython
# ===============================
# Install NumPy first (prebuilt wheel)
# ===============================
RUN pip install --no-cache-dir numpy==1.24.4
# ===============================
# Install Pandas from prebuilt wheel (avoid --no-binary)
# ===============================
RUN pip install --no-cache-dir pandas==2.0.3 --only-binary=:all:
# ===============================
# Install remaining dependencies
# ===============================
RUN pip install --no-cache-dir -r requirements.txt
# ===============================
# Optional: Gemini SDK, EasyOCR
# ===============================
RUN pip install --no-cache-dir --upgrade google-generativeai google-ai-generativelanguage
RUN pip install --no-cache-dir easyocr opencv-python-headless Pillow pytesseract
# ===============================
# Copy application code
# ===============================
COPY . .
# ===============================
# Create necessary directories
# ===============================
RUN mkdir -p /app/data/logs /app/data/docs /app/backend/app/db && chmod -R 777 /app/data
# ===============================
# Create __init__.py files
# ===============================
RUN touch backend/__init__.py \
&& touch backend/feature_builder/__init__.py \
&& touch backend/app/__init__.py \
&& touch backend/app/api/__init__.py \
&& touch backend/app/agent/__init__.py \
&& touch backend/app/wrappers/__init__.py \
&& touch backend/app/db/__init__.py \
&& touch backend/ingest/__init__.py
# ===============================
# Verify agent files exist
# ===============================
RUN test -f backend/app/agent/agent_orchestrator.py || \
(echo "ERROR: agent_orchestrator.py not found! Add it before building." && exit 1)
# ===============================
# Initialize database
# ===============================
COPY backend/app/db/db_init.py backend/app/db/db_init.py
RUN echo "πŸ—„οΈ Initializing database during build..." && \
python backend/app/db/db_init.py && \
echo "βœ… Database initialized successfully!"
# ===============================
# Expose port
# ===============================
EXPOSE 7860
# ===============================
# Startup script
# ===============================
RUN echo '#!/bin/bash\n\
echo "πŸ” Checking database..."\n\
python backend/app/db/db_init.py\n\
echo "βœ… Database ready"\n\
echo "πŸš€ Starting application..."\n\
exec uvicorn app:app --host 0.0.0.0 --port 7860 --timeout-keep-alive 75\n\
' > /app/start.sh && chmod +x /app/start.sh
CMD ["/app/start.sh"]