Azizahalq commited on
Commit
5df5d53
·
verified ·
1 Parent(s): c7d8536

Update Dockerfile

Browse files
Files changed (1) hide show
  1. Dockerfile +16 -16
Dockerfile CHANGED
@@ -1,37 +1,37 @@
1
  FROM python:3.10-slim
2
 
3
- # system deps (optional but nice)
4
- RUN apt-get update && apt-get install -y --no-install-recommends \
5
- git curl && rm -rf /var/lib/apt/lists/*
6
 
7
  WORKDIR /app
8
  COPY requirements.txt .
9
  RUN pip install --no-cache-dir -r requirements.txt
10
 
11
- # copy app
12
  COPY . .
13
 
14
- # cache models/datasets to persistent disk on Spaces
15
  ENV HF_HOME=/data/.huggingface
16
-
17
- # default envs (override in Space Settings if you like)
18
  ENV CORPUS_REPO=Azizahalq/materialmind-corpus
19
- ENV INDEX_UUID=space
20
  ENV EMB_PROVIDER=hf
21
  ENV EMB_MODEL=BAAI/bge-small-en-v1.5
22
- ENV INDEX_DIR=MaterialMind/index/chroma_v3/${INDEX_UUID}
23
  ENV INDEX_COLLECTION=materialmind
 
24
  ENV PORT=7860
25
 
26
- # On container start:
27
- # 1) if the index folder doesn't exist, build it from the HF dataset
28
- # 2) start the Flask app with gunicorn
29
  CMD bash -lc '\
30
- if [ ! -d "MaterialMind/index/chroma_v3/${INDEX_UUID}" ]; then \
31
- echo "[BOOT] Building index from $CORPUS_REPO into ${INDEX_DIR}..."; \
32
- python build_index_from_hf.py --repo ${CORPUS_REPO} --split train --uuid ${INDEX_UUID} --collection ${INDEX_COLLECTION}; \
 
 
 
 
 
33
  else \
34
- echo "[BOOT] Found existing index at ${INDEX_DIR}"; \
35
  fi; \
36
  gunicorn -w 2 -k gthread -t 120 -b 0.0.0.0:${PORT} app:app \
37
  '
 
1
  FROM python:3.10-slim
2
 
3
+ RUN apt-get update && apt-get install -y --no-install-recommends git curl && \
4
+ rm -rf /var/lib/apt/lists/*
 
5
 
6
  WORKDIR /app
7
  COPY requirements.txt .
8
  RUN pip install --no-cache-dir -r requirements.txt
9
 
 
10
  COPY . .
11
 
12
+ # Use /data for caches and our index
13
  ENV HF_HOME=/data/.huggingface
14
+ ENV DATA_ROOT=/data
 
15
  ENV CORPUS_REPO=Azizahalq/materialmind-corpus
 
16
  ENV EMB_PROVIDER=hf
17
  ENV EMB_MODEL=BAAI/bge-small-en-v1.5
18
+ ENV INDEX_UUID=space
19
  ENV INDEX_COLLECTION=materialmind
20
+ ENV INDEX_DIR=/data/MaterialMind/index/chroma_v3/${INDEX_UUID}
21
  ENV PORT=7860
22
 
23
+ # On start: build index under /data if missing, then run gunicorn
 
 
24
  CMD bash -lc '\
25
+ if [ ! -d "/data/MaterialMind/index/chroma_v3/${INDEX_UUID}" ]; then \
26
+ echo "[BOOT] Building index from $CORPUS_REPO into /data/MaterialMind/index/chroma_v3/${INDEX_UUID}..."; \
27
+ python build_index_from_hf.py \
28
+ --repo ${CORPUS_REPO} \
29
+ --split train \
30
+ --out_dir /data/MaterialMind/index/chroma_v3 \
31
+ --uuid ${INDEX_UUID} \
32
+ --collection ${INDEX_COLLECTION}; \
33
  else \
34
+ echo "[BOOT] Found existing index at /data/MaterialMind/index/chroma_v3/${INDEX_UUID}"; \
35
  fi; \
36
  gunicorn -w 2 -k gthread -t 120 -b 0.0.0.0:${PORT} app:app \
37
  '