TheQuantEd commited on
Commit
6fa287a
Β·
1 Parent(s): 1c46761

Fix: empty Neo4j startup + auto-seed on first boot

Browse files
.gitignore CHANGED
@@ -18,6 +18,10 @@ frontend/out/
18
  # Docker volumes (local)
19
  neo4j_data/
20
 
 
 
 
 
21
  # OS
22
  .DS_Store
23
  Thumbs.db
 
18
  # Docker volumes (local)
19
  neo4j_data/
20
 
21
+ # Neo4j dump β€” large binary, excluded from HF push
22
+ docker/neo4j.dump
23
+ neo4j_dump/
24
+
25
  # OS
26
  .DS_Store
27
  Thumbs.db
Dockerfile CHANGED
@@ -43,24 +43,17 @@ RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
43
  && apt-get install -y --no-install-recommends nodejs \
44
  && rm -rf /var/lib/apt/lists/*
45
 
46
- # ── Neo4j Community 5.x ───────────────────────────────────────────────────────
47
- ENV NEO4J_VERSION=5.18.0
48
  ENV NEO4J_HOME=/opt/neo4j
49
  ENV PATH="${NEO4J_HOME}/bin:${PATH}"
50
 
51
- ENV APOC_VERSION=5.18.0
52
-
53
  RUN wget -q "https://dist.neo4j.org/neo4j-community-${NEO4J_VERSION}-unix.tar.gz" \
54
  && tar -xzf "neo4j-community-${NEO4J_VERSION}-unix.tar.gz" -C /opt \
55
  && mv "/opt/neo4j-community-${NEO4J_VERSION}" /opt/neo4j \
56
  && rm "neo4j-community-${NEO4J_VERSION}-unix.tar.gz" \
57
  && rm -rf /opt/neo4j/data # will be symlinked to /data at runtime
58
 
59
- # Download APOC plugin (Community-compatible jar)
60
- RUN wget -q \
61
- "https://github.com/neo4j/apoc/releases/download/${APOC_VERSION}/apoc-${APOC_VERSION}-core.jar" \
62
- -O /opt/neo4j/plugins/apoc-${APOC_VERSION}-core.jar
63
-
64
  # Neo4j configuration β€” listen on all interfaces, use /data for persistence
65
  RUN { \
66
  echo "server.bolt.listen_address=0.0.0.0:7687"; \
@@ -68,9 +61,7 @@ RUN { \
68
  echo "server.directories.data=/data/neo4j/data"; \
69
  echo "server.directories.logs=/data/neo4j/logs"; \
70
  echo "server.directories.plugins=/opt/neo4j/plugins"; \
71
- echo "dbms.security.auth_enabled=false"; \
72
- echo "dbms.security.procedures.unrestricted=apoc.*"; \
73
- echo "dbms.security.procedures.allowlist=apoc.*"; \
74
  echo "server.memory.heap.initial_size=512m"; \
75
  echo "server.memory.heap.max_size=1g"; \
76
  echo "server.memory.pagecache.size=256m"; \
@@ -95,11 +86,12 @@ COPY --from=frontend-builder /build/frontend/.next/static ./.next/static
95
  COPY --from=frontend-builder /build/frontend/public ./public
96
 
97
  # ── Config files ───────────────────────────────────────────────────────────────
98
- COPY docker/nginx.conf /app/docker/nginx.conf
99
- COPY docker/supervisord.conf /app/docker/supervisord.conf
100
- COPY docker/entrypoint.sh /app/docker/entrypoint.sh
 
101
 
102
- RUN chmod +x /app/docker/entrypoint.sh
103
 
104
  # ── Nginx writable dirs (runs without root after init) ────────────────────────
105
  RUN mkdir -p /tmp/nginx-cache /tmp/nginx-body /tmp/nginx-run \
@@ -113,8 +105,6 @@ ENV NEO4J_URI=bolt://127.0.0.1:7687
113
  ENV NEO4J_USERNAME=neo4j
114
  ENV NEO4J_PASSWORD=clinicalmatch2024
115
  ENV NEO4J_DATABASE=neo4j
116
- # NEO4J_AUTH tells Neo4j 5.x to set this password on first boot (format: user/pass)
117
- ENV NEO4J_AUTH=none
118
 
119
  # LLM β€” OpenAI-compatible (set real values via HF Spaces secrets)
120
  ENV OPENAI_API_KEY=""
 
43
  && apt-get install -y --no-install-recommends nodejs \
44
  && rm -rf /var/lib/apt/lists/*
45
 
46
+ # ── Neo4j Community 2026.04.0 ─────────────────────────────────────────────────
47
+ ENV NEO4J_VERSION=2026.04.0
48
  ENV NEO4J_HOME=/opt/neo4j
49
  ENV PATH="${NEO4J_HOME}/bin:${PATH}"
50
 
 
 
51
  RUN wget -q "https://dist.neo4j.org/neo4j-community-${NEO4J_VERSION}-unix.tar.gz" \
52
  && tar -xzf "neo4j-community-${NEO4J_VERSION}-unix.tar.gz" -C /opt \
53
  && mv "/opt/neo4j-community-${NEO4J_VERSION}" /opt/neo4j \
54
  && rm "neo4j-community-${NEO4J_VERSION}-unix.tar.gz" \
55
  && rm -rf /opt/neo4j/data # will be symlinked to /data at runtime
56
 
 
 
 
 
 
57
  # Neo4j configuration β€” listen on all interfaces, use /data for persistence
58
  RUN { \
59
  echo "server.bolt.listen_address=0.0.0.0:7687"; \
 
61
  echo "server.directories.data=/data/neo4j/data"; \
62
  echo "server.directories.logs=/data/neo4j/logs"; \
63
  echo "server.directories.plugins=/opt/neo4j/plugins"; \
64
+ echo "dbms.security.auth_enabled=true"; \
 
 
65
  echo "server.memory.heap.initial_size=512m"; \
66
  echo "server.memory.heap.max_size=1g"; \
67
  echo "server.memory.pagecache.size=256m"; \
 
86
  COPY --from=frontend-builder /build/frontend/public ./public
87
 
88
  # ── Config files ───────────────────────────────────────────────────────────────
89
+ COPY docker/nginx.conf /app/docker/nginx.conf
90
+ COPY docker/supervisord.conf /app/docker/supervisord.conf
91
+ COPY docker/entrypoint.sh /app/docker/entrypoint.sh
92
+ COPY docker/seed_on_startup.sh /app/docker/seed_on_startup.sh
93
 
94
+ RUN chmod +x /app/docker/entrypoint.sh /app/docker/seed_on_startup.sh
95
 
96
  # ── Nginx writable dirs (runs without root after init) ────────────────────────
97
  RUN mkdir -p /tmp/nginx-cache /tmp/nginx-body /tmp/nginx-run \
 
105
  ENV NEO4J_USERNAME=neo4j
106
  ENV NEO4J_PASSWORD=clinicalmatch2024
107
  ENV NEO4J_DATABASE=neo4j
 
 
108
 
109
  # LLM β€” OpenAI-compatible (set real values via HF Spaces secrets)
110
  ENV OPENAI_API_KEY=""
backend/seeder_v2.log ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ============================================================
2
+ ClinicalMatch AI β€” Graph Seeder v2
3
+ 100 k synthetic patients Β· 20 oncology conditions
4
+ ============================================================
5
+
6
+ [1/5] Seeding clinical trials from ClinicalTrials.gov...
7
+ breast cancer: 50 trials fetched
8
+ prostate cancer: 50 trials fetched
9
+ non-small cell lung cancer: 50 trials fetched
10
+ colorectal cancer: 50 trials fetched
11
+ ovarian cancer: 50 trials fetched
12
+ melanoma: 50 trials fetched
13
+ leukemia: 50 trials fetched
14
+ lymphoma: 50 trials fetched
15
+ glioblastoma: 50 trials fetched
16
+ pancreatic cancer: 50 trials fetched
17
+ bladder cancer: 50 trials fetched
18
+ renal cell carcinoma: 50 trials fetched
19
+ thyroid cancer: 50 trials fetched
20
+ multiple myeloma: 50 trials fetched
21
+ endometrial cancer: 50 trials fetched
22
+ cervical cancer: 50 trials fetched
23
+ gastric cancer: 50 trials fetched
24
+ hepatocellular carcinoma: 50 trials fetched
25
+ head and neck cancer: 50 trials fetched
26
+ sarcoma: 50 trials fetched
27
+ Total trials seeded: 1000
28
+
29
+ [2/5] Seeding medications from RxNorm...
30
+ trastuzumab: 5 RxCUI concepts
31
+ pembrolizumab: 5 RxCUI concepts
32
+ nivolumab: 5 RxCUI concepts
33
+ osimertinib: 4 RxCUI concepts
34
+ olaparib: 5 RxCUI concepts
35
+ enzalutamide: 5 RxCUI concepts
36
+ bevacizumab: 5 RxCUI concepts
37
+ rituximab: 5 RxCUI concepts
38
+ imatinib: 5 RxCUI concepts
39
+ dabrafenib: 5 RxCUI concepts
40
+ vemurafenib: 2 RxCUI concepts
41
+ atezolizumab: 5 RxCUI concepts
42
+ durvalumab: 4 RxCUI concepts
43
+ cetuximab: 4 RxCUI concepts
44
+ erlotinib: 5 RxCUI concepts
45
+ capecitabine: 4 RxCUI concepts
46
+ Total medications seeded: 16
47
+
48
+ [3/5] Seeding diagnoses from ICD-10 CM...
49
+ ICD-10 C50: 20 codes
50
+ ICD-10 C61: 1 codes
51
+ ICD-10 C34: 16 codes
52
+ ICD-10 C18: 10 codes
53
+ ICD-10 C56: 4 codes
54
+ ICD-10 C43: 20 codes
55
+ ICD-10 C91: 20 codes
56
+ ICD-10 C85: 20 codes
57
+ ICD-10 C71: 10 codes
58
+ ICD-10 C25: 8 codes
59
+ Total diagnoses seeded: 129
60
+
61
+ [4/5] Seeding supporting literature from PubMed...
62
+ breast cancer: 5 publications linked
63
+ prostate cancer: 5 publications linked
64
+ non-small cell lung cancer: 5 publications linked
65
+ colorectal cancer: 5 publications linked
66
+ ovarian cancer: 5 publications linked
67
+ Total publications seeded: 25
68
+
69
+ [5/5] Seeding biomarkers (curated from COSMIC/NCIT)...
70
+ 57 biomarkers seeded and linked to conditions
71
+
72
+ [+] Deriving eligibility relationships...
73
+ Eligibility relationships derived.
74
+
75
+ [6/6] Generating 100,000 clinically-informed synthetic patients...
76
+ (SEER incidence weights Β· TCGA biomarker prevalence Β· US Census demographics)
77
+ breast cancer: 17,222 patients β€” already done, skipping
78
+ non-small cell lung cancer: 14,444 patients (40 trials) [resuming from 4,000]
79
+ ↳ wrote 10,444 patients | total so far: 31,666/100,000 | edges: 351,181
80
+ prostate cancer: 10,556 patients (44 trials)
81
+ ↳ wrote 10,556 patients | total so far: 42,222/100,000 | edges: 766,259
82
+ colorectal cancer: 9,444 patients (34 trials)
83
+ ↳ wrote 9,444 patients | total so far: 51,666/100,000 | edges: 1,047,777
84
+ melanoma: 6,111 patients (56 trials)
85
+ ↳ wrote 6,111 patients | total so far: 57,777/100,000 | edges: 1,329,267
86
+ bladder cancer: 5,000 patients (41 trials)
87
+ ↳ wrote 5,000 patients | total so far: 62,777/100,000 | edges: 1,509,909
88
+ renal cell carcinoma: 4,667 patients (42 trials)
89
+ ↳ wrote 4,667 patients | total so far: 67,444/100,000 | edges: 1,687,802
90
+ lymphoma: 4,667 patients (46 trials)
91
+ ↳ wrote 4,667 patients | total so far: 72,111/100,000 | edges: 1,847,153
92
+ endometrial cancer: 4,222 patients (40 trials)
93
+ ↳ wrote 4,222 patients | total so far: 76,333/100,000 | edges: 1,992,865
94
+ leukemia: 3,889 patients (27 trials)
95
+ ↳ wrote 3,889 patients | total so far: 80,222/100,000 | edges: 2,071,433
96
+ pancreatic cancer: 3,667 patients (35 trials)
97
+ ↳ wrote 3,667 patients | total so far: 83,889/100,000 | edges: 2,172,901
98
+ thyroid cancer: 3,333 patients (41 trials)
99
+ ↳ wrote 3,333 patients | total so far: 87,222/100,000 | edges: 2,302,009
100
+ multiple myeloma: 2,778 patients (50 trials)
101
+ ↳ wrote 2,778 patients | total so far: 90,000/100,000 | edges: 2,415,994
102
+ gastric cancer: 2,000 patients (38 trials)
103
+ ↳ wrote 2,000 patients | total so far: 92,000/100,000 | edges: 2,474,564
104
+ ovarian cancer: 2,000 patients (29 trials)
105
+ ↳ wrote 2,000 patients | total so far: 94,000/100,000 | edges: 2,516,658
106
+ hepatocellular carcinoma: 1,667 patients (47 trials)
107
+ ↳ wrote 1,667 patients | total so far: 95,667/100,000 | edges: 2,578,834
108
+ glioblastoma: 1,333 patients (45 trials)
109
+ ↳ wrote 1,333 patients | total so far: 97,000/100,000 | edges: 2,623,714
110
+ head and neck cancer: 1,333 patients (49 trials)
111
+ ↳ wrote 1,333 patients | total so far: 98,333/100,000 | edges: 2,677,369
112
+ cervical cancer: 889 patients (10 trials)
113
+ ↳ wrote 889 patients | total so far: 99,222/100,000 | edges: 2,685,508
114
+ sarcoma: 778 patients (50 trials)
115
+ ↳ wrote 778 patients | total so far: 100,000/100,000 | edges: 2,717,650
116
+
117
+ βœ“ Total patients: 100,000
118
+ βœ“ Total ELIGIBLE_FOR edges: 2,717,650
119
+
120
+ ============================================================
121
+ Seeding complete in 21.1 min
122
+ Trials: 1000
123
+ Medications: 16
124
+ Diagnoses: 129
125
+ Publications: 25
126
+ Biomarkers: 57
127
+ Patients: 100,000
128
+ ============================================================
docker-compose.yml CHANGED
@@ -18,7 +18,7 @@ services:
18
  container_name: clinicalmatch-neo4j
19
  restart: unless-stopped
20
  ports:
21
- - "7476:7474" # Neo4j Browser
22
  - "7687:7687" # Bolt
23
  volumes:
24
  - neo4j_data:/data
 
18
  container_name: clinicalmatch-neo4j
19
  restart: unless-stopped
20
  ports:
21
+ - "7474:7474" # Neo4j Browser
22
  - "7687:7687" # Bolt
23
  volumes:
24
  - neo4j_data:/data
docker/Dockerfile CHANGED
@@ -43,24 +43,17 @@ RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
43
  && apt-get install -y --no-install-recommends nodejs \
44
  && rm -rf /var/lib/apt/lists/*
45
 
46
- # ── Neo4j Community 5.x ───────────────────────────────────────────────────────
47
- ENV NEO4J_VERSION=5.18.0
48
  ENV NEO4J_HOME=/opt/neo4j
49
  ENV PATH="${NEO4J_HOME}/bin:${PATH}"
50
 
51
- ENV APOC_VERSION=5.18.0
52
-
53
  RUN wget -q "https://dist.neo4j.org/neo4j-community-${NEO4J_VERSION}-unix.tar.gz" \
54
  && tar -xzf "neo4j-community-${NEO4J_VERSION}-unix.tar.gz" -C /opt \
55
  && mv "/opt/neo4j-community-${NEO4J_VERSION}" /opt/neo4j \
56
  && rm "neo4j-community-${NEO4J_VERSION}-unix.tar.gz" \
57
  && rm -rf /opt/neo4j/data # will be symlinked to /data at runtime
58
 
59
- # Download APOC plugin (Community-compatible jar)
60
- RUN wget -q \
61
- "https://github.com/neo4j/apoc/releases/download/${APOC_VERSION}/apoc-${APOC_VERSION}-core.jar" \
62
- -O /opt/neo4j/plugins/apoc-${APOC_VERSION}-core.jar
63
-
64
  # Neo4j configuration β€” listen on all interfaces, use /data for persistence
65
  RUN { \
66
  echo "server.bolt.listen_address=0.0.0.0:7687"; \
@@ -68,9 +61,7 @@ RUN { \
68
  echo "server.directories.data=/data/neo4j/data"; \
69
  echo "server.directories.logs=/data/neo4j/logs"; \
70
  echo "server.directories.plugins=/opt/neo4j/plugins"; \
71
- echo "dbms.security.auth_enabled=false"; \
72
- echo "dbms.security.procedures.unrestricted=apoc.*"; \
73
- echo "dbms.security.procedures.allowlist=apoc.*"; \
74
  echo "server.memory.heap.initial_size=512m"; \
75
  echo "server.memory.heap.max_size=1g"; \
76
  echo "server.memory.pagecache.size=256m"; \
@@ -94,6 +85,9 @@ COPY --from=frontend-builder /build/frontend/.next/standalone ./
94
  COPY --from=frontend-builder /build/frontend/.next/static ./.next/static
95
  COPY --from=frontend-builder /build/frontend/public ./public
96
 
 
 
 
97
  # ── Config files ───────────────────────────────────────────────────────────────
98
  COPY docker/nginx.conf /app/docker/nginx.conf
99
  COPY docker/supervisord.conf /app/docker/supervisord.conf
@@ -113,8 +107,6 @@ ENV NEO4J_URI=bolt://127.0.0.1:7687
113
  ENV NEO4J_USERNAME=neo4j
114
  ENV NEO4J_PASSWORD=clinicalmatch2024
115
  ENV NEO4J_DATABASE=neo4j
116
- # NEO4J_AUTH tells Neo4j 5.x to set this password on first boot (format: user/pass)
117
- ENV NEO4J_AUTH=none
118
 
119
  # LLM β€” OpenAI-compatible (set real values via HF Spaces secrets)
120
  ENV OPENAI_API_KEY=""
 
43
  && apt-get install -y --no-install-recommends nodejs \
44
  && rm -rf /var/lib/apt/lists/*
45
 
46
+ # ── Neo4j Community 2026.04.0 ─────────────────────────────────────────────────
47
+ ENV NEO4J_VERSION=2026.04.0
48
  ENV NEO4J_HOME=/opt/neo4j
49
  ENV PATH="${NEO4J_HOME}/bin:${PATH}"
50
 
 
 
51
  RUN wget -q "https://dist.neo4j.org/neo4j-community-${NEO4J_VERSION}-unix.tar.gz" \
52
  && tar -xzf "neo4j-community-${NEO4J_VERSION}-unix.tar.gz" -C /opt \
53
  && mv "/opt/neo4j-community-${NEO4J_VERSION}" /opt/neo4j \
54
  && rm "neo4j-community-${NEO4J_VERSION}-unix.tar.gz" \
55
  && rm -rf /opt/neo4j/data # will be symlinked to /data at runtime
56
 
 
 
 
 
 
57
  # Neo4j configuration β€” listen on all interfaces, use /data for persistence
58
  RUN { \
59
  echo "server.bolt.listen_address=0.0.0.0:7687"; \
 
61
  echo "server.directories.data=/data/neo4j/data"; \
62
  echo "server.directories.logs=/data/neo4j/logs"; \
63
  echo "server.directories.plugins=/opt/neo4j/plugins"; \
64
+ echo "dbms.security.auth_enabled=true"; \
 
 
65
  echo "server.memory.heap.initial_size=512m"; \
66
  echo "server.memory.heap.max_size=1g"; \
67
  echo "server.memory.pagecache.size=256m"; \
 
85
  COPY --from=frontend-builder /build/frontend/.next/static ./.next/static
86
  COPY --from=frontend-builder /build/frontend/public ./public
87
 
88
+ # ── Neo4j seed dump ────────────────────────────────────────────────────────────
89
+ COPY docker/neo4j.dump /app/docker/neo4j.dump
90
+
91
  # ── Config files ───────────────────────────────────────────────────────────────
92
  COPY docker/nginx.conf /app/docker/nginx.conf
93
  COPY docker/supervisord.conf /app/docker/supervisord.conf
 
107
  ENV NEO4J_USERNAME=neo4j
108
  ENV NEO4J_PASSWORD=clinicalmatch2024
109
  ENV NEO4J_DATABASE=neo4j
 
 
110
 
111
  # LLM β€” OpenAI-compatible (set real values via HF Spaces secrets)
112
  ENV OPENAI_API_KEY=""
docker/entrypoint.sh CHANGED
@@ -3,19 +3,28 @@ set -e
3
 
4
  log() { echo "[entrypoint] $*"; }
5
 
6
- # ── Persistent data dirs ───────────────────────────────────────────────────────
7
- mkdir -p /data/neo4j/data /data/neo4j/logs \
8
- /tmp/nginx-cache /tmp/nginx-body /tmp/nginx-run
9
 
10
- # Symlink Neo4j dirs to persistent volume
11
- if [ ! -L /opt/neo4j/data ]; then
12
- rm -rf /opt/neo4j/data
13
- ln -sf /data/neo4j/data /opt/neo4j/data
14
- fi
15
- if [ ! -L /opt/neo4j/logs ]; then
16
- rm -rf /opt/neo4j/logs
17
- ln -sf /data/neo4j/logs /opt/neo4j/logs
 
 
 
 
 
 
 
18
  fi
19
 
 
 
 
20
  log "Starting all services via supervisord..."
21
  exec /usr/bin/supervisord -c /app/docker/supervisord.conf
 
3
 
4
  log() { echo "[entrypoint] $*"; }
5
 
6
+ # ── Persistent data dirs (HF Spaces mounts /data) ─────────────────────────────
7
+ mkdir -p /data/neo4j/data /data/neo4j/logs /data/neo4j/transactions
 
8
 
9
+ # Point Neo4j config at the persistent volume paths (already set in neo4j.conf,
10
+ # but the data dir must exist before Neo4j starts or it refuses to launch)
11
+ mkdir -p /data/neo4j/data/databases /data/neo4j/data/dbms
12
+
13
+ # ── First-boot: set initial password ──────────────────────────────────────────
14
+ NEO4J_PASS="${NEO4J_PASSWORD:-clinicalmatch2024}"
15
+
16
+ if [ ! -f /data/.neo4j_initialized ]; then
17
+ log "First boot β€” setting Neo4j initial password..."
18
+ # neo4j-admin must write to the same dbms dir Neo4j will use
19
+ NEO4J_CONF=/opt/neo4j/conf \
20
+ neo4j-admin dbms set-initial-password "$NEO4J_PASS" 2>&1 || \
21
+ log "WARNING: set-initial-password failed (may already be set β€” continuing)"
22
+ touch /data/.neo4j_initialized
23
+ log "Password initialisation done."
24
  fi
25
 
26
+ # ── Nginx tmp dirs ─────────────────────────────────────────────────────────────
27
+ mkdir -p /tmp/nginx-cache /tmp/nginx-body
28
+
29
  log "Starting all services via supervisord..."
30
  exec /usr/bin/supervisord -c /app/docker/supervisord.conf
docker/seed_on_startup.sh ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # Waits for Neo4j and the FastAPI backend to be ready, then auto-seeds the graph
3
+ # if it is empty. Runs once at container startup via supervisord.
4
+
5
+ BACKEND="http://127.0.0.1:8000"
6
+ NEO4J_BOLT="127.0.0.1:7687"
7
+ SEED_FLAG="/data/.graph_seeded"
8
+
9
+ log() { echo "[seeder] $*"; }
10
+
11
+ wait_for_tcp() {
12
+ local host=$1 port=$2 label=$3 max=${4:-120}
13
+ local i=0
14
+ while ! (echo > /dev/tcp/"$host"/"$port") 2>/dev/null; do
15
+ i=$((i+1))
16
+ if [ $i -ge $max ]; then
17
+ log "ERROR: $label did not become reachable within ${max}s β€” aborting seed"
18
+ exit 1
19
+ fi
20
+ sleep 2
21
+ done
22
+ log "$label is reachable after $((i*2))s"
23
+ }
24
+
25
+ wait_for_http() {
26
+ local url=$1 label=$2 max=${3:-120}
27
+ local i=0
28
+ while ! curl -sf "$url" > /dev/null 2>&1; do
29
+ i=$((i+1))
30
+ if [ $i -ge $max ]; then
31
+ log "ERROR: $label did not respond within ${max}s β€” aborting seed"
32
+ exit 1
33
+ fi
34
+ sleep 2
35
+ done
36
+ log "$label ready after $((i*2))s"
37
+ }
38
+
39
+ log "Waiting for Neo4j bolt on $NEO4J_BOLT..."
40
+ wait_for_tcp 127.0.0.1 7687 "Neo4j bolt" 180
41
+
42
+ log "Waiting for FastAPI backend..."
43
+ wait_for_http "$BACKEND/health" "FastAPI /health" 120
44
+
45
+ # Check if already seeded (flag file OR graph has data)
46
+ if [ -f "$SEED_FLAG" ]; then
47
+ log "Seed flag found at $SEED_FLAG β€” skipping."
48
+ exit 0
49
+ fi
50
+
51
+ # Ask the backend how many patients are in the graph
52
+ PATIENT_COUNT=$(curl -sf "$BACKEND/api/v1/graph/stats" 2>/dev/null | \
53
+ python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('patients',0))" 2>/dev/null || echo 0)
54
+
55
+ log "Graph patient count: $PATIENT_COUNT"
56
+
57
+ if [ "$PATIENT_COUNT" -ge 100 ] 2>/dev/null; then
58
+ log "Graph already seeded ($PATIENT_COUNT patients) β€” skipping."
59
+ touch "$SEED_FLAG"
60
+ exit 0
61
+ fi
62
+
63
+ log "Graph is empty β€” triggering POST $BACKEND/seed ..."
64
+ HTTP_CODE=$(curl -sf -o /tmp/seed_response.json -w "%{http_code}" \
65
+ -X POST "$BACKEND/seed" \
66
+ -H "Content-Type: application/json" 2>&1)
67
+
68
+ if [ "$HTTP_CODE" = "200" ] || [ "$HTTP_CODE" = "202" ]; then
69
+ log "Seeding started successfully (HTTP $HTTP_CODE)."
70
+ log "Response: $(cat /tmp/seed_response.json 2>/dev/null)"
71
+ # Mark flag so future restarts don't re-trigger (seeder runs in background
72
+ # inside FastAPI; real completion check is patient count)
73
+ touch "$SEED_FLAG"
74
+ else
75
+ log "WARNING: /seed returned HTTP $HTTP_CODE β€” check backend logs."
76
+ cat /tmp/seed_response.json 2>/dev/null || true
77
+ fi
docker/supervisord.conf CHANGED
@@ -16,28 +16,46 @@ serverurl=unix:///tmp/supervisor.sock
16
  # ── Neo4j Community ────────────────────────────────────────────────────────────
17
  [program:neo4j]
18
  command=/opt/neo4j/bin/neo4j console
19
- environment=
20
- NEO4J_HOME=/opt/neo4j,
21
- JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64,
22
- NEO4J_AUTH="none"
23
  autostart=true
24
  autorestart=true
25
  startsecs=45
26
  startretries=3
27
  stdout_logfile=/tmp/neo4j.log
 
28
  redirect_stderr=true
29
  priority=10
30
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  # ── FastAPI backend ────────────────────────────────────────────────────────────
32
- # No environment= block β€” inherits full container env so HF Spaces secrets are visible
33
  [program:backend]
34
- command=bash -c "sleep 60 && python3 -m uvicorn main:app --host 127.0.0.1 --port 8000 --workers 1"
35
  directory=/app/backend
 
 
 
 
 
 
 
 
36
  autostart=true
37
  autorestart=true
38
- startsecs=15
39
  startretries=5
40
  stdout_logfile=/tmp/backend.log
 
41
  redirect_stderr=true
42
  priority=30
43
 
@@ -50,6 +68,7 @@ autostart=true
50
  autorestart=true
51
  startsecs=5
52
  stdout_logfile=/tmp/frontend.log
 
53
  redirect_stderr=true
54
  priority=40
55
 
@@ -60,5 +79,6 @@ autostart=true
60
  autorestart=true
61
  startsecs=3
62
  stdout_logfile=/tmp/nginx.log
 
63
  redirect_stderr=true
64
  priority=50
 
16
  # ── Neo4j Community ────────────────────────────────────────────────────────────
17
  [program:neo4j]
18
  command=/opt/neo4j/bin/neo4j console
19
+ environment=NEO4J_HOME=/opt/neo4j,JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64,NEO4J_CONF=/opt/neo4j/conf
 
 
 
20
  autostart=true
21
  autorestart=true
22
  startsecs=45
23
  startretries=3
24
  stdout_logfile=/tmp/neo4j.log
25
+ stderr_logfile=/tmp/neo4j.log
26
  redirect_stderr=true
27
  priority=10
28
 
29
+ # ── Auto-seeder (one-shot, runs after Neo4j + backend are live) ───────────────
30
+ [program:seeder]
31
+ command=/bin/bash /app/docker/seed_on_startup.sh
32
+ autostart=true
33
+ autorestart=false
34
+ startsecs=0
35
+ startretries=1
36
+ stdout_logfile=/tmp/seeder.log
37
+ stderr_logfile=/tmp/seeder.log
38
+ redirect_stderr=true
39
+ priority=99
40
+
41
  # ── FastAPI backend ────────────────────────────────────────────────────────────
 
42
  [program:backend]
43
+ command=python3 -m uvicorn main:app --host 127.0.0.1 --port 8000 --workers 2
44
  directory=/app/backend
45
+ environment=
46
+ NEO4J_URI="bolt://127.0.0.1:7687",
47
+ NEO4J_USERNAME="%(ENV_NEO4J_USERNAME)s",
48
+ NEO4J_PASSWORD="%(ENV_NEO4J_PASSWORD)s",
49
+ NEO4J_DATABASE="%(ENV_NEO4J_DATABASE)s",
50
+ OPENAI_API_KEY="%(ENV_OPENAI_API_KEY)s",
51
+ OPENAI_BASE_URL="%(ENV_OPENAI_BASE_URL)s",
52
+ OPENAI_MODEL="%(ENV_OPENAI_MODEL)s"
53
  autostart=true
54
  autorestart=true
55
+ startsecs=10
56
  startretries=5
57
  stdout_logfile=/tmp/backend.log
58
+ stderr_logfile=/tmp/backend.log
59
  redirect_stderr=true
60
  priority=30
61
 
 
68
  autorestart=true
69
  startsecs=5
70
  stdout_logfile=/tmp/frontend.log
71
+ stderr_logfile=/tmp/frontend.log
72
  redirect_stderr=true
73
  priority=40
74
 
 
79
  autorestart=true
80
  startsecs=3
81
  stdout_logfile=/tmp/nginx.log
82
+ stderr_logfile=/tmp/nginx.log
83
  redirect_stderr=true
84
  priority=50
mcp_manifest.json CHANGED
@@ -10,8 +10,8 @@
10
  "NEO4J_USERNAME": "neo4j",
11
  "NEO4J_PASSWORD": "clinicalmatch2024",
12
  "OPENAI_API_KEY": "<your-key>",
13
- "OPENAI_BASE_URL": "https://ai.aimlapi.com/v1",
14
- "OPENAI_MODEL": "claude-opus-4-7"
15
  },
16
  "tools": [
17
  {
 
10
  "NEO4J_USERNAME": "neo4j",
11
  "NEO4J_PASSWORD": "clinicalmatch2024",
12
  "OPENAI_API_KEY": "<your-key>",
13
+ "OPENAI_BASE_URL": "https://api.groq.com/openai/v1",
14
+ "OPENAI_MODEL": "qwen/qwen3-32b"
15
  },
16
  "tools": [
17
  {