FROM python:3.9-slim # Install system dependencies RUN apt-get update && apt-get install -y \ poppler-utils \ tesseract-ocr \ libtesseract-dev \ && rm -rf /var/lib/apt/lists/* # Set working directory WORKDIR /app # Copy files COPY requirements.txt . COPY app.py . COPY README.md . # Create /data and /data/files directories with permissions RUN mkdir -p /data/files /data/nltk_data && chmod -R 777 /data # Install Python dependencies RUN pip install --no-cache-dir -r requirements.txt # Pre-download NLTK data RUN python -m nltk.downloader -d /data/nltk_data punkt stopwords # Expose port EXPOSE 7860 # Run the Gradio app CMD ["python", "app.py"]