File size: 3,509 Bytes
d745844
 
 
 
 
 
ac1fd0a
d745844
6314cdf
d745844
 
 
 
 
e9cac37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6314cdf
8c8c523
 
 
 
 
 
 
 
 
 
 
 
60407af
8c8c523
 
 
 
 
 
6314cdf
8c8c523
 
 
 
 
 
e9cac37
 
 
6314cdf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d745844
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/usr/bin/env bash
set -euo pipefail

cd /app
export PYTHONPATH=/app:${PYTHONPATH:-}

export CORPORATE_GRAPH_URI="${CORPORATE_GRAPH_URI:-http://ld.company.org/prod}"
export PORT="${PORT:-7860}"
export VIRTUOSO_RUNTIME_DIR="${VIRTUOSO_RUNTIME_DIR:-/tmp/virtuoso_runtime}"

bash scripts/hf_restore_db_snapshot.sh
bash scripts/hf_prepare_virtuoso_ini.sh
bash scripts/hf_start_virtuoso.sh

GRAPH_PROBE_RESPONSE="$(
  curl -fsG \
    --data-urlencode "query=SELECT ?g (COUNT(*) AS ?c) WHERE { GRAPH ?g { ?s ?p ?o } } GROUP BY ?g ORDER BY DESC(?c) LIMIT 20" \
    --data-urlencode "format=application/sparql-results+json" \
    "http://127.0.0.1:8890/sparql"
)"

DBPEDIA_GRAPH_URI="$(
  GRAPH_PROBE_RESPONSE="$GRAPH_PROBE_RESPONSE" CORPORATE_GRAPH_URI="$CORPORATE_GRAPH_URI" python3 - <<'PY'
import json
import os
import sys

payload = json.loads(os.environ["GRAPH_PROBE_RESPONSE"])
corporate = os.environ["CORPORATE_GRAPH_URI"]

def is_system_graph(uri: str) -> bool:
    return (
        uri == corporate
        or uri.startswith("urn:")
        or "openlinksw.com/schemas/virtrdf" in uri
        or uri.endswith("/DAV/")
        or uri.startswith("http://www.w3.org/")
    )

for binding in payload.get("results", {}).get("bindings", []):
    uri = binding.get("g", {}).get("value", "")
    if uri and not is_system_graph(uri):
        print(uri)
        sys.exit(0)

sys.exit(1)
PY
)"

if [[ -z "${DBPEDIA_GRAPH_URI}" ]]; then
  echo "Could not detect DBpedia graph URI after snapshot restore."
  exit 1
fi

NORMALIZED_DBPEDIA_GRAPH=0
if [[ "${DBPEDIA_GRAPH_URI}" != "http://dbpedia.org" ]]; then
  ISQL_BIN="$(command -v isql-vt || true)"
  if [[ -z "$ISQL_BIN" ]]; then
    ISQL_BIN="$(command -v isql || true)"
  fi
  if [[ -z "$ISQL_BIN" ]]; then
    echo "Could not find Virtuoso ISQL client for DBpedia graph normalization."
    exit 1
  fi

  if [[ "${DBPEDIA_GRAPH_URI}" =~ ^iri_id_([0-9]+)_with_no_name_entry$ ]]; then
    DBPEDIA_GRAPH_ID="${BASH_REMATCH[1]}"
    printf "log_enable(3);\nUPDATE DB.DBA.RDF_QUAD TABLE OPTION (index RDF_QUAD_GS)\n  SET g = iri_to_id('http://dbpedia.org')\n  WHERE g = iri_id_from_num(%s);\ncheckpoint;\nlog_enable(1);\n" "${DBPEDIA_GRAPH_ID}" \
      | "$ISQL_BIN" 1111 dba dba >/tmp/hf_normalize_dbpedia_graph.log 2>&1
  else
    printf "log_enable(3);\nUPDATE DB.DBA.RDF_QUAD TABLE OPTION (index RDF_QUAD_GS)\n  SET g = iri_to_id('http://dbpedia.org')\n  WHERE g = iri_to_id('%s', 0);\ncheckpoint;\nlog_enable(1);\n" "${DBPEDIA_GRAPH_URI}" \
      | "$ISQL_BIN" 1111 dba dba >/tmp/hf_normalize_dbpedia_graph.log 2>&1
  fi

  NORMALIZED_DBPEDIA_GRAPH=1
  DBPEDIA_GRAPH_URI="http://dbpedia.org"
fi

export DBPEDIA_GRAPH_URI="http://dbpedia.org"
export DBPEDIA_ENDPOINT_URL="http://127.0.0.1:8890/sparql?default-graph-uri=http://dbpedia.org"
export CORPORATE_ENDPOINT_URL="http://127.0.0.1:8890/sparql?default-graph-uri=http://dbpedia.org&default-graph-uri=${CORPORATE_GRAPH_URI}"

echo "Detected DBpedia graph URI: ${DBPEDIA_GRAPH_URI}"

if [[ "$NORMALIZED_DBPEDIA_GRAPH" == "1" ]]; then
  if [[ -f "${VIRTUOSO_RUNTIME_DIR}/virtuoso.pid" ]]; then
    VIRT_PID="$(cat "${VIRTUOSO_RUNTIME_DIR}/virtuoso.pid")"
    echo "Restarting Virtuoso after DBpedia graph normalization ..."
    kill "$VIRT_PID"
    for _ in $(seq 1 30); do
      if ! kill -0 "$VIRT_PID" 2>/dev/null; then
        break
      fi
      sleep 1
    done
  fi
  bash scripts/hf_start_virtuoso.sh
fi

bash scripts/hf_load_corporate_graph.sh

exec uvicorn service.app:app --host 0.0.0.0 --port "$PORT"