#!/usr/bin/env bash set -euo pipefail DB_NAME="${DB_NAME:-test_db}" export DB_NAME export KG_DB_URL="${KG_DB_URL:-ws://localhost:8000/rpc}" export KG_ENABLE_INGESTION="${KG_ENABLE_INGESTION:-false}" export KG_EMBEDDINGS_PROVIDER="${KG_EMBEDDINGS_PROVIDER:-sentence-transformers}" export KG_LOCAL_EMBEDDINGS_MODEL="${KG_LOCAL_EMBEDDINGS_MODEL:-sentence-transformers/all-MiniLM-L6-v2}" export KG_DOCLING_TOKENIZER="${KG_DOCLING_TOKENIZER:-cl100k_base}" export STREAMLIT_SERVER_MAX_UPLOAD_SIZE="${STREAMLIT_SERVER_MAX_UPLOAD_SIZE:-50}" export STREAMLIT_SERVER_HEADLESS="${STREAMLIT_SERVER_HEADLESS:-true}" export STREAMLIT_SERVER_PORT="${STREAMLIT_SERVER_PORT:-8501}" export STREAMLIT_SERVER_ADDRESS="${STREAMLIT_SERVER_ADDRESS:-0.0.0.0}" mkdir -p /app/logs # Pull SurrealDB RocksDB store from a public HF dataset repo. HF_DATASET_REPO_URL="${HF_DATASET_REPO_URL:-https://huggingface.co/datasets/santicas/politicaldatabase}" HF_DATASET_DIR="${HF_DATASET_DIR:-/dbs/politicaldatabase}" HF_DB_SUBDIR="${HF_DB_SUBDIR:-dbs/knowledge-graph}" HF_DATASET_UPDATE="${HF_DATASET_UPDATE:-1}" HF_DATASET_HYDRATE="${HF_DATASET_HYDRATE:-1}" mkdir -p "$(dirname "$HF_DATASET_DIR")" # When $HF_DATASET_DIR is bind-mounted (local dev), git may reject operations due # to ownership differences. Mark it safe to avoid fatal errors. git config --global --add safe.directory "$HF_DATASET_DIR" >/dev/null 2>&1 || true if [ ! -d "$HF_DATASET_DIR/.git" ]; then rm -rf "$HF_DATASET_DIR" echo "Cloning dataset repo: $HF_DATASET_REPO_URL -> $HF_DATASET_DIR" GIT_LFS_SKIP_SMUDGE=1 git clone --depth 1 "$HF_DATASET_REPO_URL" "$HF_DATASET_DIR" else if [ "$HF_DATASET_UPDATE" = "1" ]; then echo "Updating dataset repo: $HF_DATASET_DIR" git -C "$HF_DATASET_DIR" pull --ff-only else echo "Using existing dataset checkout: $HF_DATASET_DIR" fi fi # Hydrate only the DB files (uses git-xet transfer agent under the hood). if [ "$HF_DATASET_HYDRATE" = "1" ]; then git -C "$HF_DATASET_DIR" lfs pull --include "$HF_DB_SUBDIR/**" || git -C "$HF_DATASET_DIR" lfs pull fi HF_DB_PATH="$HF_DATASET_DIR/$HF_DB_SUBDIR" if [ ! -f "$HF_DB_PATH/CURRENT" ]; then echo "Expected SurrealDB RocksDB store not found at: $HF_DB_PATH" >&2 echo "Make sure the dataset repo contains: $HF_DB_SUBDIR" >&2 exit 1 fi surreal start -u root -p root "rocksdb:$HF_DB_PATH" \ >/var/log/surrealdb.log 2>&1 & for _ in $(seq 1 60); do python /app/scripts/helpers/is_port_open.py 127.0.0.1 8000 && break sleep 1 done exec uv run streamlit run examples/knowledge-graph/streamlit_app.py \ --server.address="${STREAMLIT_SERVER_ADDRESS}" \ --server.port="${STREAMLIT_SERVER_PORT}"