iris-at-text2sparql / scripts /hf_space_boot.sh
Alex Latipov
Restart Virtuoso after DBpedia graph normalization
6314cdf
#!/usr/bin/env bash
set -euo pipefail
cd /app
export PYTHONPATH=/app:${PYTHONPATH:-}
export CORPORATE_GRAPH_URI="${CORPORATE_GRAPH_URI:-http://ld.company.org/prod}"
export PORT="${PORT:-7860}"
export VIRTUOSO_RUNTIME_DIR="${VIRTUOSO_RUNTIME_DIR:-/tmp/virtuoso_runtime}"
bash scripts/hf_restore_db_snapshot.sh
bash scripts/hf_prepare_virtuoso_ini.sh
bash scripts/hf_start_virtuoso.sh
GRAPH_PROBE_RESPONSE="$(
curl -fsG \
--data-urlencode "query=SELECT ?g (COUNT(*) AS ?c) WHERE { GRAPH ?g { ?s ?p ?o } } GROUP BY ?g ORDER BY DESC(?c) LIMIT 20" \
--data-urlencode "format=application/sparql-results+json" \
"http://127.0.0.1:8890/sparql"
)"
DBPEDIA_GRAPH_URI="$(
GRAPH_PROBE_RESPONSE="$GRAPH_PROBE_RESPONSE" CORPORATE_GRAPH_URI="$CORPORATE_GRAPH_URI" python3 - <<'PY'
import json
import os
import sys
payload = json.loads(os.environ["GRAPH_PROBE_RESPONSE"])
corporate = os.environ["CORPORATE_GRAPH_URI"]
def is_system_graph(uri: str) -> bool:
return (
uri == corporate
or uri.startswith("urn:")
or "openlinksw.com/schemas/virtrdf" in uri
or uri.endswith("/DAV/")
or uri.startswith("http://www.w3.org/")
)
for binding in payload.get("results", {}).get("bindings", []):
uri = binding.get("g", {}).get("value", "")
if uri and not is_system_graph(uri):
print(uri)
sys.exit(0)
sys.exit(1)
PY
)"
if [[ -z "${DBPEDIA_GRAPH_URI}" ]]; then
echo "Could not detect DBpedia graph URI after snapshot restore."
exit 1
fi
NORMALIZED_DBPEDIA_GRAPH=0
if [[ "${DBPEDIA_GRAPH_URI}" != "http://dbpedia.org" ]]; then
ISQL_BIN="$(command -v isql-vt || true)"
if [[ -z "$ISQL_BIN" ]]; then
ISQL_BIN="$(command -v isql || true)"
fi
if [[ -z "$ISQL_BIN" ]]; then
echo "Could not find Virtuoso ISQL client for DBpedia graph normalization."
exit 1
fi
if [[ "${DBPEDIA_GRAPH_URI}" =~ ^iri_id_([0-9]+)_with_no_name_entry$ ]]; then
DBPEDIA_GRAPH_ID="${BASH_REMATCH[1]}"
printf "log_enable(3);\nUPDATE DB.DBA.RDF_QUAD TABLE OPTION (index RDF_QUAD_GS)\n SET g = iri_to_id('http://dbpedia.org')\n WHERE g = iri_id_from_num(%s);\ncheckpoint;\nlog_enable(1);\n" "${DBPEDIA_GRAPH_ID}" \
| "$ISQL_BIN" 1111 dba dba >/tmp/hf_normalize_dbpedia_graph.log 2>&1
else
printf "log_enable(3);\nUPDATE DB.DBA.RDF_QUAD TABLE OPTION (index RDF_QUAD_GS)\n SET g = iri_to_id('http://dbpedia.org')\n WHERE g = iri_to_id('%s', 0);\ncheckpoint;\nlog_enable(1);\n" "${DBPEDIA_GRAPH_URI}" \
| "$ISQL_BIN" 1111 dba dba >/tmp/hf_normalize_dbpedia_graph.log 2>&1
fi
NORMALIZED_DBPEDIA_GRAPH=1
DBPEDIA_GRAPH_URI="http://dbpedia.org"
fi
export DBPEDIA_GRAPH_URI="http://dbpedia.org"
export DBPEDIA_ENDPOINT_URL="http://127.0.0.1:8890/sparql?default-graph-uri=http://dbpedia.org"
export CORPORATE_ENDPOINT_URL="http://127.0.0.1:8890/sparql?default-graph-uri=http://dbpedia.org&default-graph-uri=${CORPORATE_GRAPH_URI}"
echo "Detected DBpedia graph URI: ${DBPEDIA_GRAPH_URI}"
if [[ "$NORMALIZED_DBPEDIA_GRAPH" == "1" ]]; then
if [[ -f "${VIRTUOSO_RUNTIME_DIR}/virtuoso.pid" ]]; then
VIRT_PID="$(cat "${VIRTUOSO_RUNTIME_DIR}/virtuoso.pid")"
echo "Restarting Virtuoso after DBpedia graph normalization ..."
kill "$VIRT_PID"
for _ in $(seq 1 30); do
if ! kill -0 "$VIRT_PID" 2>/dev/null; then
break
fi
sleep 1
done
fi
bash scripts/hf_start_virtuoso.sh
fi
bash scripts/hf_load_corporate_graph.sh
exec uvicorn service.app:app --host 0.0.0.0 --port "$PORT"