Update Dockerfile and .dockerignore; modify download_assets.py to use specific ChromaDB path
Browse files- .dockerignore +2 -1
- Dockerfile +16 -17
- app/scripts/download_assets.py +1 -1
.dockerignore
CHANGED
|
@@ -26,4 +26,5 @@ data/
|
|
| 26 |
.chroma/
|
| 27 |
embeddings/
|
| 28 |
data/
|
| 29 |
-
models/
|
|
|
|
|
|
| 26 |
.chroma/
|
| 27 |
embeddings/
|
| 28 |
data/
|
| 29 |
+
models/
|
| 30 |
+
chromadb.tar.gz
|
Dockerfile
CHANGED
|
@@ -1,28 +1,27 @@
|
|
| 1 |
FROM python:3.11-slim
|
| 2 |
|
| 3 |
-
WORKDIR /
|
| 4 |
|
| 5 |
-
# Install curl for healthcheck
|
| 6 |
-
RUN apt-get update && apt-get install -y
|
| 7 |
-
curl \
|
| 8 |
-
&& rm -rf /var/lib/apt/lists/*
|
| 9 |
|
| 10 |
-
#
|
|
|
|
|
|
|
|
|
|
| 11 |
|
|
|
|
| 12 |
COPY . .
|
| 13 |
|
| 14 |
-
|
| 15 |
|
| 16 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 17 |
|
|
|
|
| 18 |
HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
|
| 19 |
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
# Create entrypoint script that downloads assets before starting Streamlit
|
| 23 |
-
RUN echo '#!/bin/bash\n\
|
| 24 |
-
python3 /app/scripts/download_assets.py\n\
|
| 25 |
-
exec streamlit run main.py --server.port=8501 --server.address=0.0.0.0\n\
|
| 26 |
-
' > /app/entrypoint.sh && chmod +x /app/entrypoint.sh
|
| 27 |
-
|
| 28 |
-
ENTRYPOINT ["/app/entrypoint.sh"]
|
|
|
|
| 1 |
FROM python:3.11-slim
|
| 2 |
|
| 3 |
+
WORKDIR /app
|
| 4 |
|
| 5 |
+
# Install only curl for healthcheck
|
| 6 |
+
RUN apt-get update && apt-get install -y curl && rm -rf /var/lib/apt/lists/*
|
|
|
|
|
|
|
| 7 |
|
| 8 |
+
# Copy and install dependencies FIRST (this layer only rebuilds when requirements.txt changes)
|
| 9 |
+
COPY requirements.txt .
|
| 10 |
+
RUN pip install --no-cache-dir --upgrade pip && \
|
| 11 |
+
pip install --no-cache-dir -r requirements.txt
|
| 12 |
|
| 13 |
+
# Copy application code LAST (changes here won't invalidate pip install)
|
| 14 |
COPY . .
|
| 15 |
|
| 16 |
+
ENV PYTHONPATH=/app/app
|
| 17 |
|
| 18 |
+
# Create entrypoint script
|
| 19 |
+
RUN echo '#!/bin/bash\n\
|
| 20 |
+
python3 -m app.scripts.download_assets\n\
|
| 21 |
+
exec streamlit run app/main.py --server.port=8501 --server.address=0.0.0.0\n\
|
| 22 |
+
' > /entrypoint.sh && chmod +x /entrypoint.sh
|
| 23 |
|
| 24 |
+
EXPOSE 8501
|
| 25 |
HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
|
| 26 |
|
| 27 |
+
ENTRYPOINT ["/entrypoint.sh"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app/scripts/download_assets.py
CHANGED
|
@@ -85,7 +85,7 @@ def download_chromadb(chromadb_repo: str, chromadb_dir: Path, hf_token: str | No
|
|
| 85 |
print(f"Checking ChromaDB in {chromadb_dir}...")
|
| 86 |
|
| 87 |
# Check if ChromaDB directory already has content
|
| 88 |
-
expected_chroma_path = chromadb_dir / settings.chroma_db
|
| 89 |
if expected_chroma_path.exists() and any(expected_chroma_path.iterdir()):
|
| 90 |
print(f"ChromaDB directory already contains files. Skipping download.")
|
| 91 |
print(f"To force re-download, delete {chromadb_dir} and restart.")
|
|
|
|
| 85 |
print(f"Checking ChromaDB in {chromadb_dir}...")
|
| 86 |
|
| 87 |
# Check if ChromaDB directory already has content
|
| 88 |
+
expected_chroma_path = chromadb_dir / "bge-small-finetuned-chroma" # should be settings.chroma_db
|
| 89 |
if expected_chroma_path.exists() and any(expected_chroma_path.iterdir()):
|
| 90 |
print(f"ChromaDB directory already contains files. Skipping download.")
|
| 91 |
print(f"To force re-download, delete {chromadb_dir} and restart.")
|