File size: 3,620 Bytes
6252f54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/bin/bash
# Startup: set auth → start Neo4j → wait → seed graph → start app services
set -e

NEO4J_PASS="your-password"
SEED_FILE="/app/neo4j_backup/seed_graph.cypher"
SEED_FLAG="/var/lib/neo4j/data/.graph_seeded"

log() { echo "[entrypoint] $(date '+%H:%M:%S') $*"; }

# ── Step 1: Set initial password before first start ──────────────────────────
log "Setting Neo4j password..."
neo4j-admin dbms set-initial-password "$NEO4J_PASS"

# ── Step 2: Fix ownership and start Neo4j ────────────────────────────────────
log "Fixing data directory ownership..."
chown -R neo4j:neo4j /var/lib/neo4j/data /var/lib/neo4j/run /var/log/neo4j 2>/dev/null || true

log "Starting Neo4j..."
su -s /bin/bash neo4j -c \
    "NEO4J_CONF=/etc/neo4j neo4j console >> /var/log/neo4j/console.log 2>&1 &"

# ── Step 3: Wait for bolt (up to 120 s) ──────────────────────────────────────
log "Waiting for bolt on :7687..."
for i in $(seq 1 120); do
    if nc -z localhost 7687 2>/dev/null; then
        log "Bolt ready after ${i}s."
        break
    fi
    if [ "$i" -eq 120 ]; then
        log "ERROR: bolt not ready — check /var/log/neo4j/console.log"; exit 1
    fi
    sleep 1
done

# ── Step 4: Seed the graph (only once per container lifetime) ─────────────────
if [ ! -f "$SEED_FLAG" ]; then
    log "Seeding knowledge graph from $SEED_FILE ..."
    # Extra 5 s grace so system DB is fully accepting auth before we query
    sleep 5
    # Plain MERGE/MATCH statements — pipe each line individually
    # --fail-at-end so one bad statement doesn't abort the whole import
    cypher-shell -u neo4j -p "$NEO4J_PASS" \
        --format plain --fail-at-end \
        < "$SEED_FILE" \
        >> /var/log/neo4j/seed.log 2>&1 \
        && touch "$SEED_FLAG" \
        && log "Graph seeded successfully." \
        || log "WARNING: seed may have partially loaded — check /var/log/neo4j/seed.log"

    # Smoke-test: count nodes
    COUNT=$(cypher-shell -u neo4j -p "$NEO4J_PASS" --format plain \
        "MATCH (n) WHERE n.id <> '__hub__' RETURN count(n) AS c" 2>/dev/null \
        | tail -1 || echo "?")
    log "Node count after seed: ${COUNT}"

    # Build vector embeddings for semantic search
    log "Building capability embeddings (vector search)..."
    export NEO4J_URI="bolt://localhost:7687"
    export NEO4J_USERNAME="neo4j"
    export NEO4J_PASSWORD="$NEO4J_PASS"
    export PYTHONPATH="/app"
    python3 /app/pipeline/embed_nodes.py >> /var/log/neo4j/embed.log 2>&1 \
      && log "Embeddings done." \
      || { log "WARNING: embeddings failed:"; tail -20 /var/log/neo4j/embed.log; }

    # Create vector index so CALL db.index.vector.queryNodes works
    cypher-shell -u neo4j -p "$NEO4J_PASS" --format plain \
      "CREATE VECTOR INDEX capability_embedding IF NOT EXISTS FOR (n:Capability) ON n.embedding OPTIONS {indexConfig: {\`vector.dimensions\`: 384, \`vector.similarity_function\`: 'cosine'}}" \
      >> /var/log/neo4j/seed.log 2>&1 \
      && log "Vector index created." \
      || log "WARNING: vector index creation failed."
else
    log "Graph already seeded — skipping."
fi

# ── Step 5: Hand off to supervisord ──────────────────────────────────────────
log "Starting application services..."
exec /usr/bin/supervisord -n -c /etc/supervisor/supervisord.conf