FROM python:3.10-slim RUN apt-get update && apt-get install -y --no-install-recommends git curl && \ rm -rf /var/lib/apt/lists/* WORKDIR /app COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt COPY . . # Use /data for caches and our index ENV HF_HOME=/data/.huggingface ENV DATA_ROOT=/data ENV CORPUS_REPO=Azizahalq/materialmind-corpus ENV EMB_PROVIDER=hf ENV EMB_MODEL=BAAI/bge-small-en-v1.5 ENV INDEX_UUID=space ENV INDEX_COLLECTION=materialmind ENV INDEX_DIR=/data/MaterialMind/index/chroma_v3/${INDEX_UUID} ENV PORT=7860 # On start: build index under /data if missing, then run gunicorn CMD bash -lc '\ DATA_BASE="${DATA_ROOT:-/data}"; \ mkdir -p "$DATA_BASE" 2>/dev/null || true; \ if [ ! -w "$DATA_BASE" ]; then \ echo "[BOOT] $DATA_BASE not writable; falling back to /tmp"; \ DATA_BASE="/tmp"; \ fi; \ export HF_HOME="$DATA_BASE/.huggingface"; \ OUT_DIR="$DATA_BASE/MaterialMind/index/chroma_v3"; \ INDEX_PATH="$OUT_DIR/${INDEX_UUID}"; \ if [ ! -d "$INDEX_PATH" ]; then \ echo "[BOOT] Building index from $CORPUS_REPO into $INDEX_PATH ..."; \ mkdir -p "$OUT_DIR"; \ python build_index_from_hf.py \ --repo "${CORPUS_REPO}" \ --split train \ --out_dir "$OUT_DIR" \ --uuid "${INDEX_UUID}" \ --collection "${INDEX_COLLECTION}"; \ else \ echo "[BOOT] Found existing index at $INDEX_PATH"; \ fi; \ export INDEX_DIR="$INDEX_PATH"; \ gunicorn -w 2 -k gthread -t 120 -b 0.0.0.0:${PORT:-7860} app:app \ '