PDF-Reader-OCR / Dockerfile
aursalan's picture
Added OCR
4e3c340
# Use an official Python runtime as a parent image
FROM python:3.12-slim
# --- ADDED: Install Tesseract OCR Engine ---
# First, update the package lists. Then, install Tesseract and its English language pack.
# The `-y` flag auto-confirms the installation.
# Cleaning up the apt cache (`rm -rf ...`) keeps the final Docker image smaller.
RUN apt-get update && apt-get install -y \
tesseract-ocr \
&& rm -rf /var/lib/apt/lists/*
# --- END OF ADDED SECTION ---
# Set the working directory in the container
WORKDIR /app
# Copy the requirements file into the container
COPY requirements.txt .
# Install any needed packages specified in requirements.txt
RUN pip install --no-cache-dir --upgrade -r requirements.txt
# Copy the rest of the application's code into the container
COPY . .
# Make port 7860 available to the world outside this container
EXPOSE 7860
# Command to run the application using Gunicorn
# This is a robust way to run FastAPI/Flask in production.
CMD ["gunicorn", "--workers", "4", "--worker-class", "uvicorn.workers.UvicornWorker", "--bind", "0.0.0.0:7860", "app:app"]