ragtiquicIA / scripts /streamlit_entrypoint.sh
Santiago Casas
working perfectly on docker locally
df81c4f
#!/usr/bin/env bash
set -euo pipefail
DB_NAME="${DB_NAME:-test_db}"
export DB_NAME
export KG_DB_URL="${KG_DB_URL:-ws://localhost:8000/rpc}"
export KG_ENABLE_INGESTION="${KG_ENABLE_INGESTION:-false}"
export KG_EMBEDDINGS_PROVIDER="${KG_EMBEDDINGS_PROVIDER:-sentence-transformers}"
export KG_LOCAL_EMBEDDINGS_MODEL="${KG_LOCAL_EMBEDDINGS_MODEL:-sentence-transformers/all-MiniLM-L6-v2}"
export KG_DOCLING_TOKENIZER="${KG_DOCLING_TOKENIZER:-cl100k_base}"
export STREAMLIT_SERVER_MAX_UPLOAD_SIZE="${STREAMLIT_SERVER_MAX_UPLOAD_SIZE:-50}"
export STREAMLIT_SERVER_HEADLESS="${STREAMLIT_SERVER_HEADLESS:-true}"
export STREAMLIT_SERVER_PORT="${STREAMLIT_SERVER_PORT:-8501}"
export STREAMLIT_SERVER_ADDRESS="${STREAMLIT_SERVER_ADDRESS:-0.0.0.0}"
mkdir -p /app/logs
# Pull SurrealDB RocksDB store from a public HF dataset repo.
HF_DATASET_REPO_URL="${HF_DATASET_REPO_URL:-https://huggingface.co/datasets/santicas/politicaldatabase}"
HF_DATASET_DIR="${HF_DATASET_DIR:-/dbs/politicaldatabase}"
HF_DB_SUBDIR="${HF_DB_SUBDIR:-dbs/knowledge-graph}"
HF_DATASET_UPDATE="${HF_DATASET_UPDATE:-1}"
HF_DATASET_HYDRATE="${HF_DATASET_HYDRATE:-1}"
mkdir -p "$(dirname "$HF_DATASET_DIR")"
# When $HF_DATASET_DIR is bind-mounted (local dev), git may reject operations due
# to ownership differences. Mark it safe to avoid fatal errors.
git config --global --add safe.directory "$HF_DATASET_DIR" >/dev/null 2>&1 || true
if [ ! -d "$HF_DATASET_DIR/.git" ]; then
rm -rf "$HF_DATASET_DIR"
echo "Cloning dataset repo: $HF_DATASET_REPO_URL -> $HF_DATASET_DIR"
GIT_LFS_SKIP_SMUDGE=1 git clone --depth 1 "$HF_DATASET_REPO_URL" "$HF_DATASET_DIR"
else
if [ "$HF_DATASET_UPDATE" = "1" ]; then
echo "Updating dataset repo: $HF_DATASET_DIR"
git -C "$HF_DATASET_DIR" pull --ff-only
else
echo "Using existing dataset checkout: $HF_DATASET_DIR"
fi
fi
# Hydrate only the DB files (uses git-xet transfer agent under the hood).
if [ "$HF_DATASET_HYDRATE" = "1" ]; then
git -C "$HF_DATASET_DIR" lfs pull --include "$HF_DB_SUBDIR/**" || git -C "$HF_DATASET_DIR" lfs pull
fi
HF_DB_PATH="$HF_DATASET_DIR/$HF_DB_SUBDIR"
if [ ! -f "$HF_DB_PATH/CURRENT" ]; then
echo "Expected SurrealDB RocksDB store not found at: $HF_DB_PATH" >&2
echo "Make sure the dataset repo contains: $HF_DB_SUBDIR" >&2
exit 1
fi
surreal start -u root -p root "rocksdb:$HF_DB_PATH" \
>/var/log/surrealdb.log 2>&1 &
for _ in $(seq 1 60); do
python /app/scripts/helpers/is_port_open.py 127.0.0.1 8000 && break
sleep 1
done
exec uv run streamlit run examples/knowledge-graph/streamlit_app.py \
--server.address="${STREAMLIT_SERVER_ADDRESS}" \
--server.port="${STREAMLIT_SERVER_PORT}"