data-extract / Dockerfile
validops-east-1's picture
update
697be7d
Raw
History Blame Contribute Delete
2.63 kB
# ─────────────────────────────────────────────────────────────
# Reconciliation File Processing Service — Production Dockerfile
# Port: 7860
# ─────────────────────────────────────────────────────────────
FROM python:3.12-slim
LABEL maintainer="Reconciliation File Processing Service API"
LABEL description="Document-to-Markdown & PDF-to-image API"
LABEL version="2.2.0"
LABEL description="Production-ready: consolidated logging, unified app, cleanup loop, self-ping"
# ── System dependencies ──────────────────────────────────────
RUN apt-get update && apt-get install -y --no-install-recommends \
curl \
ffmpeg \
libmagic1 \
poppler-utils \
&& rm -rf /var/lib/apt/lists/*
# ── Non-root user ────────────────────────────────────────────
RUN groupadd --gid 1000 appuser && \
useradd --uid 1000 --gid appuser --shell /bin/bash --create-home appuser
WORKDIR /app
# ── Python dependencies ──────────────────────────────────────
COPY requirements.txt .
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -r requirements.txt && \
python -m spacy download en_core_web_sm
# ── Application code ─────────────────────────────────────────
COPY --chown=appuser:appuser . .
# ── Ensure startup script exists and is executable ──────────
RUN test -f /app/start.sh || (echo "ERROR: start.sh not found" && exit 1) && \
chmod +x /app/start.sh
# ── Create runtime directories ──────────────────────────────
RUN mkdir -p /app/logs /tmp/pdf2img_outputs /tmp/pdf2img_temp && \
chown -R appuser:appuser /app/logs /tmp/pdf2img_outputs /tmp/pdf2img_temp
USER appuser
ENV PYTHONPATH=/app
ENV PYTHONUNBUFFERED=1
ENV OUTPUT_DIR=/tmp/pdf2img_outputs
ENV TEMP_DIR=/tmp/pdf2img_temp
EXPOSE 7860
HEALTHCHECK --interval=30s --timeout=10s --start-period=30s --retries=3 \
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:7860/ping')" || exit 1
CMD ["/bin/bash", "/app/start.sh"]