File size: 1,511 Bytes
e84586b
 
5df5d53
 
e84586b
 
 
 
 
 
 
5df5d53
e84586b
5df5d53
e84586b
 
 
5df5d53
e84586b
5df5d53
e84586b
5df5d53
e84586b
d3186b6
 
 
 
 
 
 
 
 
 
 
 
5df5d53
d3186b6
5df5d53
d3186b6
 
 
e84586b
d3186b6
e84586b
d3186b6
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
FROM python:3.10-slim

RUN apt-get update && apt-get install -y --no-install-recommends git curl && \
    rm -rf /var/lib/apt/lists/*

WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

COPY . .

# Use /data for caches and our index
ENV HF_HOME=/data/.huggingface
ENV DATA_ROOT=/data
ENV CORPUS_REPO=Azizahalq/materialmind-corpus
ENV EMB_PROVIDER=hf
ENV EMB_MODEL=BAAI/bge-small-en-v1.5
ENV INDEX_UUID=space
ENV INDEX_COLLECTION=materialmind
ENV INDEX_DIR=/data/MaterialMind/index/chroma_v3/${INDEX_UUID}
ENV PORT=7860
# On start: build index under /data if missing, then run gunicorn
CMD bash -lc '\
  DATA_BASE="${DATA_ROOT:-/data}"; \
  mkdir -p "$DATA_BASE" 2>/dev/null || true; \
  if [ ! -w "$DATA_BASE" ]; then \
    echo "[BOOT] $DATA_BASE not writable; falling back to /tmp"; \
    DATA_BASE="/tmp"; \
  fi; \
  export HF_HOME="$DATA_BASE/.huggingface"; \
  OUT_DIR="$DATA_BASE/MaterialMind/index/chroma_v3"; \
  INDEX_PATH="$OUT_DIR/${INDEX_UUID}"; \
  if [ ! -d "$INDEX_PATH" ]; then \
    echo "[BOOT] Building index from $CORPUS_REPO into $INDEX_PATH ..."; \
    mkdir -p "$OUT_DIR"; \
    python build_index_from_hf.py \
      --repo "${CORPUS_REPO}" \
      --split train \
      --out_dir "$OUT_DIR" \
      --uuid "${INDEX_UUID}" \
      --collection "${INDEX_COLLECTION}"; \
  else \
    echo "[BOOT] Found existing index at $INDEX_PATH"; \
  fi; \
  export INDEX_DIR="$INDEX_PATH"; \
  gunicorn -w 2 -k gthread -t 120 -b 0.0.0.0:${PORT:-7860} app:app \
'