Spaces:
Sleeping
Sleeping
Create Dockerfile
Browse files- Dockerfile +35 -0
Dockerfile
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
FROM python:3.10-slim
|
| 2 |
+
|
| 3 |
+
WORKDIR /app
|
| 4 |
+
|
| 5 |
+
# 1. Install System Dependencies (The Heavy Lifting)
|
| 6 |
+
# build-essential: for compiling python libs
|
| 7 |
+
# curl/git: standard tools
|
| 8 |
+
# tesseract-ocr: The OCR engine for OCREnhancedPDFLoader
|
| 9 |
+
# poppler-utils: Required by pdf2image to convert PDF pages to images
|
| 10 |
+
RUN apt-get update && apt-get install -y \
|
| 11 |
+
build-essential \
|
| 12 |
+
curl \
|
| 13 |
+
git \
|
| 14 |
+
tesseract-ocr \
|
| 15 |
+
poppler-utils \
|
| 16 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 17 |
+
|
| 18 |
+
# 2. Install Python Libraries
|
| 19 |
+
COPY requirements.txt ./
|
| 20 |
+
RUN pip3 install --no-cache-dir -r requirements.txt
|
| 21 |
+
|
| 22 |
+
# 3. Download NLP Models (Bake them into the image)
|
| 23 |
+
# Spacy model for your Chunkers
|
| 24 |
+
RUN python -m spacy download en_core_web_sm
|
| 25 |
+
# NLTK data for your TextPreprocessor
|
| 26 |
+
RUN python -m nltk.downloader stopwords wordnet omw-1.4
|
| 27 |
+
|
| 28 |
+
# 4. Copy Application Code
|
| 29 |
+
COPY src/ ./src/
|
| 30 |
+
|
| 31 |
+
# 5. Config
|
| 32 |
+
EXPOSE 8501
|
| 33 |
+
HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
|
| 34 |
+
|
| 35 |
+
ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0", "--server.enableCORS=false", "--server.enableXsrfProtection=false"]
|