Spaces:
Build error
Build error
File size: 2,721 Bytes
3022fd1 3c83f33 3022fd1 3c83f33 3022fd1 3c83f33 3022fd1 83a76fb 544d677 3022fd1 83a76fb 544d677 83a76fb 23e4091 83a76fb 5daea2d 83a76fb 544d677 23e4091 83a76fb 544d677 5daea2d 3022fd1 3c83f33 3022fd1 3c83f33 3022fd1 3c83f33 3022fd1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
FROM python:3.9-slim
# Create app directory and set permissions
RUN mkdir -p /home/user/app && \
chmod -R 777 /home/user && \
mkdir -p /home/user/app/data/uploads && \
mkdir -p /home/user/app/data/processed && \
mkdir -p /home/user/app/nltk_data && \
chmod -R 777 /home/user/app
WORKDIR /home/user/app
# Install system dependencies
RUN apt-get update && apt-get install -y \
build-essential \
curl \
libssl-dev \
libffi-dev \
python3-dev \
python3-pip \
git \
poppler-utils \
tesseract-ocr \
tesseract-ocr-eng \
&& rm -rf /var/lib/apt/lists/*
# Set environment variables
ENV PYTHONUNBUFFERED=1
ENV NLTK_DATA=/home/user/app/nltk_data
ENV HUGGINGFACE_HUB_CACHE=/home/user/app/huggingface_cache
ENV UPLOAD_FOLDER=/home/user/app/data/uploads
ENV PROCESSED_FOLDER=/home/user/app/data/processed
# Create a non-root user and switch to it
RUN useradd -m -u 1000 user && \
chown -R user:user /home/user
# Copy requirements first to leverage Docker cache
COPY --chown=user:user requirements.txt .
# Install Python dependencies
USER root
# Create a directory for NLTK data with proper permissions
RUN mkdir -p /usr/local/share/nltk_data \
&& chmod -R 777 /usr/local/share/nltk_data
# Install Python dependencies
RUN pip install --no-cache-dir -r requirements.txt
# Install system dependencies for NLTK
RUN apt-get update && apt-get install -y --no-install-recommends \
unzip \
&& rm -rf /var/lib/apt/lists/*
# Download and install NLTK data as root
RUN python -c "import nltk; nltk.download('punkt', download_dir='/usr/local/share/nltk_data')" \
&& python -c "import nltk; nltk.download('stopwords', download_dir='/usr/local/share/nltk_data')" \
&& python -c "import nltk; nltk.download('wordnet', download_dir='/usr/local/share/nltk_data')" \
&& python -c "import nltk; nltk.download('averaged_perceptron_tagger', download_dir='/usr/local/share/nltk_data')" \
&& chmod -R 755 /usr/local/share/nltk_data
# Set NLTK_DATA environment variable
ENV NLTK_DATA=/usr/local/share/nltk_data
# Verify NLTK data is accessible
RUN python -c "import nltk; nltk.data.path.append('/usr/local/share/nltk_data'); nltk.data.find('tokenizers/punkt')"
# Switch to non-root user
USER user
# Copy application files
COPY --chown=user:user . .
# Make scripts executable
RUN chmod +x /home/user/app/start.sh /home/user/app/download_nltk_data.py
USER user
# Set working directory
WORKDIR /home/user/app
# Expose the port the app runs on
EXPOSE 7860
# Health check
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
CMD curl -f http://localhost:7860/ || exit 1
# Command to run the application
CMD ["/app/start.sh"]
|