Anupam007's picture
Update Dockerfile
e11eccf verified
raw
history blame contribute delete
670 Bytes
FROM python:3.9-slim
# Install system dependencies
RUN apt-get update && apt-get install -y \
poppler-utils \
tesseract-ocr \
libtesseract-dev \
&& rm -rf /var/lib/apt/lists/*
# Set working directory
WORKDIR /app
# Copy files
COPY requirements.txt .
COPY app.py .
COPY README.md .
# Create /data and /data/files directories with permissions
RUN mkdir -p /data/files /data/nltk_data && chmod -R 777 /data
# Install Python dependencies
RUN pip install --no-cache-dir -r requirements.txt
# Pre-download NLTK data
RUN python -m nltk.downloader -d /data/nltk_data punkt stopwords
# Expose port
EXPOSE 7860
# Run the Gradio app
CMD ["python", "app.py"]