Ai-pdf-solver / Dockerfile
ZeroTraceX's picture
Update Dockerfile
97d327d verified
FROM python:3.9
# Switch to root for system installations
USER root
# Install Tesseract and language data
RUN apt-get update && apt-get install -y \
tesseract-ocr \
tesseract-ocr-eng \
&& rm -rf /var/lib/apt/lists/*
# Find tessdata directory and set permissions
RUN tessdata_dir=$(find /usr -name "tessdata" -type d | grep "tesseract") && \
echo "Found tessdata at: $tessdata_dir" && \
mkdir -p /usr/share/tesseract-ocr/4.00/tessdata && \
cp -r $tessdata_dir/* /usr/share/tesseract-ocr/4.00/tessdata/ && \
chmod -R 755 /usr/share/tesseract-ocr && \
chown -R root:root /usr/share/tesseract-ocr
# Set environment variable for Tesseract
ENV TESSDATA_PREFIX=/usr/share/tesseract-ocr/4.00/tessdata/
# Verify tessdata files are present
RUN ls -la $TESSDATA_PREFIX
# Create user and set up environment
RUN useradd -m -u 1000 user && \
chown -R user:user /usr/share/tesseract-ocr/4.00/tessdata
RUN --mount=type=secret,id=Access_key,mode=0444,required=true \
git clone $(cat /run/secrets/Access_key) /app
WORKDIR /app
# Switch to user for pip installations
USER user
ENV PATH="/home/user/.local/bin:$PATH"
# Install Python dependencies
RUN pip install --no-cache-dir --upgrade -r requirements.txt
# Copy application files
CMD ["gunicorn", "-b", "0.0.0.0:7860", "--timeout", "300", "--workers", "2", "--threads", "4", "app:app"]