Spaces:
Sleeping
Sleeping
File size: 1,511 Bytes
e84586b 5df5d53 e84586b 5df5d53 e84586b 5df5d53 e84586b 5df5d53 e84586b 5df5d53 e84586b 5df5d53 e84586b d3186b6 5df5d53 d3186b6 5df5d53 d3186b6 e84586b d3186b6 e84586b d3186b6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
FROM python:3.10-slim
RUN apt-get update && apt-get install -y --no-install-recommends git curl && \
rm -rf /var/lib/apt/lists/*
WORKDIR /app
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
COPY . .
# Use /data for caches and our index
ENV HF_HOME=/data/.huggingface
ENV DATA_ROOT=/data
ENV CORPUS_REPO=Azizahalq/materialmind-corpus
ENV EMB_PROVIDER=hf
ENV EMB_MODEL=BAAI/bge-small-en-v1.5
ENV INDEX_UUID=space
ENV INDEX_COLLECTION=materialmind
ENV INDEX_DIR=/data/MaterialMind/index/chroma_v3/${INDEX_UUID}
ENV PORT=7860
# On start: build index under /data if missing, then run gunicorn
CMD bash -lc '\
DATA_BASE="${DATA_ROOT:-/data}"; \
mkdir -p "$DATA_BASE" 2>/dev/null || true; \
if [ ! -w "$DATA_BASE" ]; then \
echo "[BOOT] $DATA_BASE not writable; falling back to /tmp"; \
DATA_BASE="/tmp"; \
fi; \
export HF_HOME="$DATA_BASE/.huggingface"; \
OUT_DIR="$DATA_BASE/MaterialMind/index/chroma_v3"; \
INDEX_PATH="$OUT_DIR/${INDEX_UUID}"; \
if [ ! -d "$INDEX_PATH" ]; then \
echo "[BOOT] Building index from $CORPUS_REPO into $INDEX_PATH ..."; \
mkdir -p "$OUT_DIR"; \
python build_index_from_hf.py \
--repo "${CORPUS_REPO}" \
--split train \
--out_dir "$OUT_DIR" \
--uuid "${INDEX_UUID}" \
--collection "${INDEX_COLLECTION}"; \
else \
echo "[BOOT] Found existing index at $INDEX_PATH"; \
fi; \
export INDEX_DIR="$INDEX_PATH"; \
gunicorn -w 2 -k gthread -t 120 -b 0.0.0.0:${PORT:-7860} app:app \
' |