| FROM python:3.13-slim | |
| WORKDIR /app | |
| # Install system dependencies | |
| # - tesseract-ocr + English traineddata: fallback OCR for scanned PDFs | |
| # - ghostscript + unpaper + pngquant: OCRmyPDF runtime prerequisites | |
| RUN apt-get update && apt-get install -y --no-install-recommends \ | |
| git \ | |
| tesseract-ocr \ | |
| tesseract-ocr-eng \ | |
| ghostscript \ | |
| unpaper \ | |
| pngquant \ | |
| qpdf \ | |
| libgl1 \ | |
| libglib2.0-0 \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # Install Python dependencies | |
| COPY requirements.txt . | |
| RUN pip install --no-cache-dir -r requirements.txt | |
| # Copy application code | |
| COPY . . | |
| # Create necessary directories | |
| RUN mkdir -p ml/models ml/knowledge_base uploads | |
| # Create a named user for UID 1000 so getpass.getuser() resolves correctly | |
| # (torch._dynamo calls getpwuid at import time; bare UID with no passwd entry crashes). | |
| # HF Spaces also runs as UID 1000. | |
| RUN groupadd --system --gid 1000 appuser && \ | |
| useradd --system --uid 1000 --gid 1000 --no-create-home appuser | |
| RUN chown -R appuser:appuser /app/ml/models /app/uploads && \ | |
| chmod -R 755 /app/ml/models /app/uploads | |
| USER appuser | |
| EXPOSE 8000 | |
| CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] | |