Spaces:
Running
Running
| # Use Python 3.12 as the base image | |
| FROM python:3.12-slim | |
| # Set working directory in the container | |
| WORKDIR /app | |
| # Install system dependencies including Tesseract OCR | |
| RUN apt-get update && apt-get install -y --no-install-recommends \ | |
| gcc \ | |
| python3-dev \ | |
| tesseract-ocr \ | |
| libtesseract-dev \ | |
| tesseract-ocr-eng \ | |
| && apt-get clean \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # Create a non-root user and set ownership | |
| RUN useradd -m -u 1000 appuser && \ | |
| chown -R appuser:appuser /app | |
| # Copy requirements first to leverage Docker cache | |
| COPY pyproject.toml . | |
| # Install Python dependencies | |
| RUN pip install --no-cache-dir --upgrade pip && \ | |
| pip install --no-cache-dir . | |
| # Copy the rest of the application | |
| COPY . . | |
| # Create ALL needed directories with proper permissions | |
| RUN mkdir -p temp uploads \ | |
| /app/.cache \ | |
| /app/nltk_data \ | |
| /app/app/routers/temp \ | |
| /app/app/config/temp && \ | |
| chown -R appuser:appuser /app && \ | |
| chmod -R 777 temp uploads /app/.cache /app/nltk_data /app/app/routers/temp /app/app/config/temp | |
| # Set environment variables for cache directories and Tesseract | |
| ENV HF_HOME=/app/.cache/huggingface \ | |
| TRANSFORMERS_CACHE=/app/.cache/huggingface/transformers \ | |
| PYTORCH_PRETRAINED_BERT_CACHE=/app/.cache/torch \ | |
| NLTK_DATA=/app/nltk_data \ | |
| XDG_CACHE_HOME=/app/.cache \ | |
| TESSDATA_PREFIX=/usr/share/tesseract-ocr/5/tessdata \ | |
| TESSERACT_CMD=/usr/bin/tesseract \ | |
| PATH=/usr/bin:$PATH | |
| # Verify Tesseract installation | |
| RUN tesseract --version | |
| # Switch to non-root user | |
| USER appuser | |
| # Expose the port that Hugging Face Spaces expects | |
| EXPOSE 7860 | |
| # Command to run the application using Uvicorn on port 7860 | |
| CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "7860"] |