Spaces:
Sleeping
Sleeping
Update Dockerfile
Browse files- Dockerfile +16 -16
Dockerfile
CHANGED
|
@@ -1,37 +1,37 @@
|
|
| 1 |
FROM python:3.10-slim
|
| 2 |
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
git curl && rm -rf /var/lib/apt/lists/*
|
| 6 |
|
| 7 |
WORKDIR /app
|
| 8 |
COPY requirements.txt .
|
| 9 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 10 |
|
| 11 |
-
# copy app
|
| 12 |
COPY . .
|
| 13 |
|
| 14 |
-
#
|
| 15 |
ENV HF_HOME=/data/.huggingface
|
| 16 |
-
|
| 17 |
-
# default envs (override in Space Settings if you like)
|
| 18 |
ENV CORPUS_REPO=Azizahalq/materialmind-corpus
|
| 19 |
-
ENV INDEX_UUID=space
|
| 20 |
ENV EMB_PROVIDER=hf
|
| 21 |
ENV EMB_MODEL=BAAI/bge-small-en-v1.5
|
| 22 |
-
ENV
|
| 23 |
ENV INDEX_COLLECTION=materialmind
|
|
|
|
| 24 |
ENV PORT=7860
|
| 25 |
|
| 26 |
-
# On
|
| 27 |
-
# 1) if the index folder doesn't exist, build it from the HF dataset
|
| 28 |
-
# 2) start the Flask app with gunicorn
|
| 29 |
CMD bash -lc '\
|
| 30 |
-
if [ ! -d "MaterialMind/index/chroma_v3/${INDEX_UUID}" ]; then \
|
| 31 |
-
echo "[BOOT] Building index from $CORPUS_REPO into
|
| 32 |
-
python build_index_from_hf.py
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
else \
|
| 34 |
-
echo "[BOOT] Found existing index at
|
| 35 |
fi; \
|
| 36 |
gunicorn -w 2 -k gthread -t 120 -b 0.0.0.0:${PORT} app:app \
|
| 37 |
'
|
|
|
|
| 1 |
FROM python:3.10-slim
|
| 2 |
|
| 3 |
+
RUN apt-get update && apt-get install -y --no-install-recommends git curl && \
|
| 4 |
+
rm -rf /var/lib/apt/lists/*
|
|
|
|
| 5 |
|
| 6 |
WORKDIR /app
|
| 7 |
COPY requirements.txt .
|
| 8 |
RUN pip install --no-cache-dir -r requirements.txt
|
| 9 |
|
|
|
|
| 10 |
COPY . .
|
| 11 |
|
| 12 |
+
# Use /data for caches and our index
|
| 13 |
ENV HF_HOME=/data/.huggingface
|
| 14 |
+
ENV DATA_ROOT=/data
|
|
|
|
| 15 |
ENV CORPUS_REPO=Azizahalq/materialmind-corpus
|
|
|
|
| 16 |
ENV EMB_PROVIDER=hf
|
| 17 |
ENV EMB_MODEL=BAAI/bge-small-en-v1.5
|
| 18 |
+
ENV INDEX_UUID=space
|
| 19 |
ENV INDEX_COLLECTION=materialmind
|
| 20 |
+
ENV INDEX_DIR=/data/MaterialMind/index/chroma_v3/${INDEX_UUID}
|
| 21 |
ENV PORT=7860
|
| 22 |
|
| 23 |
+
# On start: build index under /data if missing, then run gunicorn
|
|
|
|
|
|
|
| 24 |
CMD bash -lc '\
|
| 25 |
+
if [ ! -d "/data/MaterialMind/index/chroma_v3/${INDEX_UUID}" ]; then \
|
| 26 |
+
echo "[BOOT] Building index from $CORPUS_REPO into /data/MaterialMind/index/chroma_v3/${INDEX_UUID}..."; \
|
| 27 |
+
python build_index_from_hf.py \
|
| 28 |
+
--repo ${CORPUS_REPO} \
|
| 29 |
+
--split train \
|
| 30 |
+
--out_dir /data/MaterialMind/index/chroma_v3 \
|
| 31 |
+
--uuid ${INDEX_UUID} \
|
| 32 |
+
--collection ${INDEX_COLLECTION}; \
|
| 33 |
else \
|
| 34 |
+
echo "[BOOT] Found existing index at /data/MaterialMind/index/chroma_v3/${INDEX_UUID}"; \
|
| 35 |
fi; \
|
| 36 |
gunicorn -w 2 -k gthread -t 120 -b 0.0.0.0:${PORT} app:app \
|
| 37 |
'
|