jonghhhh commited on
Commit
4e6f326
·
1 Parent(s): cf8d80a

Fix: Update Dockerfile, requirements.txt, and pre-download models to resolve timeout issue

Browse files
Files changed (3) hide show
  1. Dockerfile +22 -7
  2. download_models.py +7 -1
  3. requirements.txt +4 -0
Dockerfile CHANGED
@@ -1,20 +1,35 @@
1
- FROM python:3.13.5-slim
 
 
 
 
 
 
2
 
3
  WORKDIR /app
4
 
 
5
  RUN apt-get update && apt-get install -y \
6
  build-essential \
7
  curl \
8
  git \
9
  && rm -rf /var/lib/apt/lists/*
10
 
11
- COPY requirements.txt ./
12
- COPY src/ ./src/
 
 
 
 
13
 
14
- RUN pip3 install -r requirements.txt
 
15
 
16
- EXPOSE 8501
 
17
 
18
- HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
 
19
 
20
- ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
 
 
1
+ FROM python:3.11-slim
2
+
3
+ # Set environment variables
4
+ ENV PYTHONUNBUFFERED=1 \
5
+ PYTHONDONTWRITEBYTECODE=1 \
6
+ STREAMLIT_SERVER_PORT=7860 \
7
+ STREAMLIT_SERVER_ADDRESS=0.0.0.0
8
 
9
  WORKDIR /app
10
 
11
+ # Install system dependencies
12
  RUN apt-get update && apt-get install -y \
13
  build-essential \
14
  curl \
15
  git \
16
  && rm -rf /var/lib/apt/lists/*
17
 
18
+ # Copy requirements first for better caching
19
+ COPY requirements.txt .
20
+ RUN pip3 install --no-cache-dir -r requirements.txt
21
+
22
+ # Copy the rest of the application
23
+ COPY . .
24
 
25
+ # Pre-download models to speed up startup and avoid runtime download issues
26
+ RUN python3 download_models.py
27
 
28
+ # Hugging Face Spaces use port 7860 by default
29
+ EXPOSE 7860
30
 
31
+ # Health check to ensure the container is running correctly
32
+ HEALTHCHECK CMD curl --fail http://localhost:7860/_stcore/health
33
 
34
+ # Run the application
35
+ ENTRYPOINT ["streamlit", "run", "multimodal_rag_langgraph_gemini_st.py"]
download_models.py CHANGED
@@ -3,7 +3,13 @@
3
  """
4
  from sentence_transformers import SentenceTransformer
5
  import os
 
6
 
7
  print("Downloading embedding model...")
8
  model = SentenceTransformer("intfloat/multilingual-e5-large-instruct", device="cpu")
9
- print("✅ Model downloaded successfully!")
 
 
 
 
 
 
3
  """
4
  from sentence_transformers import SentenceTransformer
5
  import os
6
+ import nltk
7
 
8
  print("Downloading embedding model...")
9
  model = SentenceTransformer("intfloat/multilingual-e5-large-instruct", device="cpu")
10
+ print("✅ Embedding model downloaded successfully!")
11
+
12
+ print("Downloading nltk punkt...")
13
+ nltk.download('punkt')
14
+ nltk.download('punkt_tab')
15
+ print("✅ NLTK data downloaded successfully!")
requirements.txt CHANGED
@@ -22,6 +22,10 @@ Pillow>=11.0.0
22
  requests==2.32.5
23
  beautifulsoup4==4.14.2
24
  lxml==6.0.2
 
 
 
 
25
 
26
  # Data Processing
27
  pandas>=2.0.0
 
22
  requests==2.32.5
23
  beautifulsoup4==4.14.2
24
  lxml==6.0.2
25
+ trafilatura==1.12.2
26
+ newspaper3k==0.2.8
27
+ fake-useragent==2.0.3
28
+ extruct==0.17.0
29
 
30
  # Data Processing
31
  pandas>=2.0.0