NavyDevilDoc commited on
Commit
5826fc1
·
verified ·
1 Parent(s): 39eb938

Create Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +35 -0
Dockerfile ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ WORKDIR /app
4
+
5
+ # 1. Install System Dependencies (The Heavy Lifting)
6
+ # build-essential: for compiling python libs
7
+ # curl/git: standard tools
8
+ # tesseract-ocr: The OCR engine for OCREnhancedPDFLoader
9
+ # poppler-utils: Required by pdf2image to convert PDF pages to images
10
+ RUN apt-get update && apt-get install -y \
11
+ build-essential \
12
+ curl \
13
+ git \
14
+ tesseract-ocr \
15
+ poppler-utils \
16
+ && rm -rf /var/lib/apt/lists/*
17
+
18
+ # 2. Install Python Libraries
19
+ COPY requirements.txt ./
20
+ RUN pip3 install --no-cache-dir -r requirements.txt
21
+
22
+ # 3. Download NLP Models (Bake them into the image)
23
+ # Spacy model for your Chunkers
24
+ RUN python -m spacy download en_core_web_sm
25
+ # NLTK data for your TextPreprocessor
26
+ RUN python -m nltk.downloader stopwords wordnet omw-1.4
27
+
28
+ # 4. Copy Application Code
29
+ COPY src/ ./src/
30
+
31
+ # 5. Config
32
+ EXPOSE 8501
33
+ HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
34
+
35
+ ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0", "--server.enableCORS=false", "--server.enableXsrfProtection=false"]