olamideba commited on
Commit
fa10f25
·
1 Parent(s): c106191

Update Dockerfile and .dockerignore; modify download_assets.py to use specific ChromaDB path

Browse files
Files changed (3) hide show
  1. .dockerignore +2 -1
  2. Dockerfile +16 -17
  3. app/scripts/download_assets.py +1 -1
.dockerignore CHANGED
@@ -26,4 +26,5 @@ data/
26
  .chroma/
27
  embeddings/
28
  data/
29
- models/
 
 
26
  .chroma/
27
  embeddings/
28
  data/
29
+ models/
30
+ chromadb.tar.gz
Dockerfile CHANGED
@@ -1,28 +1,27 @@
1
  FROM python:3.11-slim
2
 
3
- WORKDIR /
4
 
5
- # Install curl for healthcheck
6
- RUN apt-get update && apt-get install -y \
7
- curl \
8
- && rm -rf /var/lib/apt/lists/*
9
 
10
- # RUN git clone https://github.com/mujeeb-gh/rag-chatbot-final.git .
 
 
 
11
 
 
12
  COPY . .
13
 
14
- RUN pip3 install -r requirements.txt
15
 
16
- EXPOSE 8501
 
 
 
 
17
 
 
18
  HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
19
 
20
- WORKDIR /app
21
-
22
- # Create entrypoint script that downloads assets before starting Streamlit
23
- RUN echo '#!/bin/bash\n\
24
- python3 /app/scripts/download_assets.py\n\
25
- exec streamlit run main.py --server.port=8501 --server.address=0.0.0.0\n\
26
- ' > /app/entrypoint.sh && chmod +x /app/entrypoint.sh
27
-
28
- ENTRYPOINT ["/app/entrypoint.sh"]
 
1
  FROM python:3.11-slim
2
 
3
+ WORKDIR /app
4
 
5
+ # Install only curl for healthcheck
6
+ RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
 
 
7
 
8
+ # Copy and install dependencies FIRST (this layer only rebuilds when requirements.txt changes)
9
+ COPY requirements.txt .
10
+ RUN pip install --no-cache-dir --upgrade pip && \
11
+ pip install --no-cache-dir -r requirements.txt
12
 
13
+ # Copy application code LAST (changes here won't invalidate pip install)
14
  COPY . .
15
 
16
+ ENV PYTHONPATH=/app/app
17
 
18
+ # Create entrypoint script
19
+ RUN echo '#!/bin/bash\n\
20
+ python3 -m app.scripts.download_assets\n\
21
+ exec streamlit run app/main.py --server.port=8501 --server.address=0.0.0.0\n\
22
+ ' > /entrypoint.sh && chmod +x /entrypoint.sh
23
 
24
+ EXPOSE 8501
25
  HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
26
 
27
+ ENTRYPOINT ["/entrypoint.sh"]
 
 
 
 
 
 
 
 
app/scripts/download_assets.py CHANGED
@@ -85,7 +85,7 @@ def download_chromadb(chromadb_repo: str, chromadb_dir: Path, hf_token: str | No
85
  print(f"Checking ChromaDB in {chromadb_dir}...")
86
 
87
  # Check if ChromaDB directory already has content
88
- expected_chroma_path = chromadb_dir / settings.chroma_db
89
  if expected_chroma_path.exists() and any(expected_chroma_path.iterdir()):
90
  print(f"ChromaDB directory already contains files. Skipping download.")
91
  print(f"To force re-download, delete {chromadb_dir} and restart.")
 
85
  print(f"Checking ChromaDB in {chromadb_dir}...")
86
 
87
  # Check if ChromaDB directory already has content
88
+ expected_chroma_path = chromadb_dir / "bge-small-finetuned-chroma" # should be settings.chroma_db
89
  if expected_chroma_path.exists() and any(expected_chroma_path.iterdir()):
90
  print(f"ChromaDB directory already contains files. Skipping download.")
91
  print(f"To force re-download, delete {chromadb_dir} and restart.")