Spaces:
Build error
Build error
| FROM python:3.9-slim | |
| # Create app directory and set permissions | |
| RUN mkdir -p /home/user/app && \ | |
| chmod -R 777 /home/user && \ | |
| mkdir -p /home/user/app/data/uploads && \ | |
| mkdir -p /home/user/app/data/processed && \ | |
| mkdir -p /home/user/app/nltk_data && \ | |
| chmod -R 777 /home/user/app | |
| WORKDIR /home/user/app | |
| # Install system dependencies | |
| RUN apt-get update && apt-get install -y \ | |
| build-essential \ | |
| curl \ | |
| libssl-dev \ | |
| libffi-dev \ | |
| python3-dev \ | |
| python3-pip \ | |
| git \ | |
| poppler-utils \ | |
| tesseract-ocr \ | |
| tesseract-ocr-eng \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # Set environment variables | |
| ENV PYTHONUNBUFFERED=1 | |
| ENV NLTK_DATA=/home/user/app/nltk_data | |
| ENV HUGGINGFACE_HUB_CACHE=/home/user/app/huggingface_cache | |
| ENV UPLOAD_FOLDER=/home/user/app/data/uploads | |
| ENV PROCESSED_FOLDER=/home/user/app/data/processed | |
| # Create a non-root user and switch to it | |
| RUN useradd -m -u 1000 user && \ | |
| chown -R user:user /home/user | |
| # Copy requirements first to leverage Docker cache | |
| COPY --chown=user:user requirements.txt . | |
| # Install Python dependencies | |
| USER root | |
| # Create a directory for NLTK data with proper permissions | |
| RUN mkdir -p /usr/local/share/nltk_data \ | |
| && chmod -R 777 /usr/local/share/nltk_data | |
| # Install Python dependencies | |
| RUN pip install --no-cache-dir -r requirements.txt | |
| # Install system dependencies for NLTK | |
| RUN apt-get update && apt-get install -y --no-install-recommends \ | |
| unzip \ | |
| && rm -rf /var/lib/apt/lists/* | |
| # Download and install NLTK data as root | |
| RUN python -c "import nltk; nltk.download('punkt', download_dir='/usr/local/share/nltk_data')" \ | |
| && python -c "import nltk; nltk.download('stopwords', download_dir='/usr/local/share/nltk_data')" \ | |
| && python -c "import nltk; nltk.download('wordnet', download_dir='/usr/local/share/nltk_data')" \ | |
| && python -c "import nltk; nltk.download('averaged_perceptron_tagger', download_dir='/usr/local/share/nltk_data')" \ | |
| && chmod -R 755 /usr/local/share/nltk_data | |
| # Set NLTK_DATA environment variable | |
| ENV NLTK_DATA=/usr/local/share/nltk_data | |
| # Verify NLTK data is accessible | |
| RUN python -c "import nltk; nltk.data.path.append('/usr/local/share/nltk_data'); nltk.data.find('tokenizers/punkt')" | |
| # Switch to non-root user | |
| USER user | |
| # Copy application files | |
| COPY --chown=user:user . . | |
| # Make scripts executable | |
| RUN chmod +x /home/user/app/start.sh /home/user/app/download_nltk_data.py | |
| USER user | |
| # Set working directory | |
| WORKDIR /home/user/app | |
| # Expose the port the app runs on | |
| EXPOSE 7860 | |
| # Health check | |
| HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \ | |
| CMD curl -f http://localhost:7860/ || exit 1 | |
| # Command to run the application | |
| CMD ["/app/start.sh"] | |