depscreen / Dockerfile
halsabbah's picture
deploy: sync code from GitHub main
fbdce94 verified
FROM python:3.13-slim
WORKDIR /app
# Install system dependencies
# - tesseract-ocr + English traineddata: fallback OCR for scanned PDFs
# - ghostscript + unpaper + pngquant: OCRmyPDF runtime prerequisites
RUN apt-get update && apt-get install -y --no-install-recommends \
git \
tesseract-ocr \
tesseract-ocr-eng \
ghostscript \
unpaper \
pngquant \
qpdf \
libgl1 \
libglib2.0-0 \
&& rm -rf /var/lib/apt/lists/*
# Install Python dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy application code
COPY . .
# Create necessary directories
RUN mkdir -p ml/models ml/knowledge_base uploads
# Create a named user for UID 1000 so getpass.getuser() resolves correctly
# (torch._dynamo calls getpwuid at import time; bare UID with no passwd entry crashes).
# HF Spaces also runs as UID 1000.
RUN groupadd --system --gid 1000 appuser && \
useradd --system --uid 1000 --gid 1000 --no-create-home appuser
RUN chown -R appuser:appuser /app/ml/models /app/uploads && \
chmod -R 755 /app/ml/models /app/uploads
USER appuser
EXPOSE 8000
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]