Spaces:
Sleeping
Sleeping
| FROM python:3.10-slim | |
| # Install system dependencies including enhanced poppler and OpenCV support | |
| RUN apt-get update && apt-get install -y \ | |
| # Tesseract OCR with multiple languages | |
| tesseract-ocr \ | |
| tesseract-ocr-eng tesseract-ocr-hin tesseract-ocr-ara tesseract-ocr-spa \ | |
| tesseract-ocr-ita tesseract-ocr-fra tesseract-ocr-rus \ | |
| # Poppler utilities for PDF processing | |
| poppler-utils \ | |
| poppler-data \ | |
| libopencv-dev \ | |
| libgl1-mesa-glx \ | |
| libglib2.0-0 | |
| # Set environment variables to avoid permission issues and optimize processing | |
| ENV XDG_CACHE_HOME=/tmp | |
| ENV FONTCONFIG_PATH=/tmp | |
| ENV TESSDATA_PREFIX=/usr/share/tesseract-ocr/5/tessdata/ | |
| ENV OPENCV_LOG_LEVEL=ERROR | |
| ENV PYTHONPATH=/app | |
| ENV PYTHONUNBUFFERED=1 | |
| # Create temporary directories with proper permissions | |
| RUN mkdir -p /tmp/fontconfig | |
| RUN chmod 755 /tmp/fontconfig | |
| # Set working directory | |
| WORKDIR /app | |
| # Copy files | |
| COPY . /app/ | |
| # Debug: List files and verify Tesseract installation | |
| RUN ls -la /app | |
| RUN tesseract --version | |
| RUN tesseract --list-langs | |
| RUN pdftoppm -h | |
| # Install Python dependencies | |
| RUN pip install --upgrade pip | |
| RUN pip install --no-cache-dir -r /app/requirements.txt | |
| EXPOSE 7860 | |
| CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"] |