Spaces:
Running
Running
Commit Β·
6fa287a
1
Parent(s): 1c46761
Fix: empty Neo4j startup + auto-seed on first boot
Browse files- .gitignore +4 -0
- Dockerfile +8 -18
- backend/seeder_v2.log +128 -0
- docker-compose.yml +1 -1
- docker/Dockerfile +6 -14
- docker/entrypoint.sh +20 -11
- docker/seed_on_startup.sh +77 -0
- docker/supervisord.conf +27 -7
- mcp_manifest.json +2 -2
.gitignore
CHANGED
|
@@ -18,6 +18,10 @@ frontend/out/
|
|
| 18 |
# Docker volumes (local)
|
| 19 |
neo4j_data/
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
# OS
|
| 22 |
.DS_Store
|
| 23 |
Thumbs.db
|
|
|
|
| 18 |
# Docker volumes (local)
|
| 19 |
neo4j_data/
|
| 20 |
|
| 21 |
+
# Neo4j dump β large binary, excluded from HF push
|
| 22 |
+
docker/neo4j.dump
|
| 23 |
+
neo4j_dump/
|
| 24 |
+
|
| 25 |
# OS
|
| 26 |
.DS_Store
|
| 27 |
Thumbs.db
|
Dockerfile
CHANGED
|
@@ -43,24 +43,17 @@ RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
|
|
| 43 |
&& apt-get install -y --no-install-recommends nodejs \
|
| 44 |
&& rm -rf /var/lib/apt/lists/*
|
| 45 |
|
| 46 |
-
# ββ Neo4j Community
|
| 47 |
-
ENV NEO4J_VERSION=
|
| 48 |
ENV NEO4J_HOME=/opt/neo4j
|
| 49 |
ENV PATH="${NEO4J_HOME}/bin:${PATH}"
|
| 50 |
|
| 51 |
-
ENV APOC_VERSION=5.18.0
|
| 52 |
-
|
| 53 |
RUN wget -q "https://dist.neo4j.org/neo4j-community-${NEO4J_VERSION}-unix.tar.gz" \
|
| 54 |
&& tar -xzf "neo4j-community-${NEO4J_VERSION}-unix.tar.gz" -C /opt \
|
| 55 |
&& mv "/opt/neo4j-community-${NEO4J_VERSION}" /opt/neo4j \
|
| 56 |
&& rm "neo4j-community-${NEO4J_VERSION}-unix.tar.gz" \
|
| 57 |
&& rm -rf /opt/neo4j/data # will be symlinked to /data at runtime
|
| 58 |
|
| 59 |
-
# Download APOC plugin (Community-compatible jar)
|
| 60 |
-
RUN wget -q \
|
| 61 |
-
"https://github.com/neo4j/apoc/releases/download/${APOC_VERSION}/apoc-${APOC_VERSION}-core.jar" \
|
| 62 |
-
-O /opt/neo4j/plugins/apoc-${APOC_VERSION}-core.jar
|
| 63 |
-
|
| 64 |
# Neo4j configuration β listen on all interfaces, use /data for persistence
|
| 65 |
RUN { \
|
| 66 |
echo "server.bolt.listen_address=0.0.0.0:7687"; \
|
|
@@ -68,9 +61,7 @@ RUN { \
|
|
| 68 |
echo "server.directories.data=/data/neo4j/data"; \
|
| 69 |
echo "server.directories.logs=/data/neo4j/logs"; \
|
| 70 |
echo "server.directories.plugins=/opt/neo4j/plugins"; \
|
| 71 |
-
echo "dbms.security.auth_enabled=
|
| 72 |
-
echo "dbms.security.procedures.unrestricted=apoc.*"; \
|
| 73 |
-
echo "dbms.security.procedures.allowlist=apoc.*"; \
|
| 74 |
echo "server.memory.heap.initial_size=512m"; \
|
| 75 |
echo "server.memory.heap.max_size=1g"; \
|
| 76 |
echo "server.memory.pagecache.size=256m"; \
|
|
@@ -95,11 +86,12 @@ COPY --from=frontend-builder /build/frontend/.next/static ./.next/static
|
|
| 95 |
COPY --from=frontend-builder /build/frontend/public ./public
|
| 96 |
|
| 97 |
# ββ Config files βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 98 |
-
COPY docker/nginx.conf
|
| 99 |
-
COPY docker/supervisord.conf
|
| 100 |
-
COPY docker/entrypoint.sh
|
|
|
|
| 101 |
|
| 102 |
-
RUN chmod +x /app/docker/entrypoint.sh
|
| 103 |
|
| 104 |
# ββ Nginx writable dirs (runs without root after init) ββββββββββββββββββββββββ
|
| 105 |
RUN mkdir -p /tmp/nginx-cache /tmp/nginx-body /tmp/nginx-run \
|
|
@@ -113,8 +105,6 @@ ENV NEO4J_URI=bolt://127.0.0.1:7687
|
|
| 113 |
ENV NEO4J_USERNAME=neo4j
|
| 114 |
ENV NEO4J_PASSWORD=clinicalmatch2024
|
| 115 |
ENV NEO4J_DATABASE=neo4j
|
| 116 |
-
# NEO4J_AUTH tells Neo4j 5.x to set this password on first boot (format: user/pass)
|
| 117 |
-
ENV NEO4J_AUTH=none
|
| 118 |
|
| 119 |
# LLM β OpenAI-compatible (set real values via HF Spaces secrets)
|
| 120 |
ENV OPENAI_API_KEY=""
|
|
|
|
| 43 |
&& apt-get install -y --no-install-recommends nodejs \
|
| 44 |
&& rm -rf /var/lib/apt/lists/*
|
| 45 |
|
| 46 |
+
# ββ Neo4j Community 2026.04.0 βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 47 |
+
ENV NEO4J_VERSION=2026.04.0
|
| 48 |
ENV NEO4J_HOME=/opt/neo4j
|
| 49 |
ENV PATH="${NEO4J_HOME}/bin:${PATH}"
|
| 50 |
|
|
|
|
|
|
|
| 51 |
RUN wget -q "https://dist.neo4j.org/neo4j-community-${NEO4J_VERSION}-unix.tar.gz" \
|
| 52 |
&& tar -xzf "neo4j-community-${NEO4J_VERSION}-unix.tar.gz" -C /opt \
|
| 53 |
&& mv "/opt/neo4j-community-${NEO4J_VERSION}" /opt/neo4j \
|
| 54 |
&& rm "neo4j-community-${NEO4J_VERSION}-unix.tar.gz" \
|
| 55 |
&& rm -rf /opt/neo4j/data # will be symlinked to /data at runtime
|
| 56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
# Neo4j configuration β listen on all interfaces, use /data for persistence
|
| 58 |
RUN { \
|
| 59 |
echo "server.bolt.listen_address=0.0.0.0:7687"; \
|
|
|
|
| 61 |
echo "server.directories.data=/data/neo4j/data"; \
|
| 62 |
echo "server.directories.logs=/data/neo4j/logs"; \
|
| 63 |
echo "server.directories.plugins=/opt/neo4j/plugins"; \
|
| 64 |
+
echo "dbms.security.auth_enabled=true"; \
|
|
|
|
|
|
|
| 65 |
echo "server.memory.heap.initial_size=512m"; \
|
| 66 |
echo "server.memory.heap.max_size=1g"; \
|
| 67 |
echo "server.memory.pagecache.size=256m"; \
|
|
|
|
| 86 |
COPY --from=frontend-builder /build/frontend/public ./public
|
| 87 |
|
| 88 |
# ββ Config files βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 89 |
+
COPY docker/nginx.conf /app/docker/nginx.conf
|
| 90 |
+
COPY docker/supervisord.conf /app/docker/supervisord.conf
|
| 91 |
+
COPY docker/entrypoint.sh /app/docker/entrypoint.sh
|
| 92 |
+
COPY docker/seed_on_startup.sh /app/docker/seed_on_startup.sh
|
| 93 |
|
| 94 |
+
RUN chmod +x /app/docker/entrypoint.sh /app/docker/seed_on_startup.sh
|
| 95 |
|
| 96 |
# ββ Nginx writable dirs (runs without root after init) ββββββββββββββββββββββββ
|
| 97 |
RUN mkdir -p /tmp/nginx-cache /tmp/nginx-body /tmp/nginx-run \
|
|
|
|
| 105 |
ENV NEO4J_USERNAME=neo4j
|
| 106 |
ENV NEO4J_PASSWORD=clinicalmatch2024
|
| 107 |
ENV NEO4J_DATABASE=neo4j
|
|
|
|
|
|
|
| 108 |
|
| 109 |
# LLM β OpenAI-compatible (set real values via HF Spaces secrets)
|
| 110 |
ENV OPENAI_API_KEY=""
|
backend/seeder_v2.log
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
============================================================
|
| 2 |
+
ClinicalMatch AI β Graph Seeder v2
|
| 3 |
+
100 k synthetic patients Β· 20 oncology conditions
|
| 4 |
+
============================================================
|
| 5 |
+
|
| 6 |
+
[1/5] Seeding clinical trials from ClinicalTrials.gov...
|
| 7 |
+
breast cancer: 50 trials fetched
|
| 8 |
+
prostate cancer: 50 trials fetched
|
| 9 |
+
non-small cell lung cancer: 50 trials fetched
|
| 10 |
+
colorectal cancer: 50 trials fetched
|
| 11 |
+
ovarian cancer: 50 trials fetched
|
| 12 |
+
melanoma: 50 trials fetched
|
| 13 |
+
leukemia: 50 trials fetched
|
| 14 |
+
lymphoma: 50 trials fetched
|
| 15 |
+
glioblastoma: 50 trials fetched
|
| 16 |
+
pancreatic cancer: 50 trials fetched
|
| 17 |
+
bladder cancer: 50 trials fetched
|
| 18 |
+
renal cell carcinoma: 50 trials fetched
|
| 19 |
+
thyroid cancer: 50 trials fetched
|
| 20 |
+
multiple myeloma: 50 trials fetched
|
| 21 |
+
endometrial cancer: 50 trials fetched
|
| 22 |
+
cervical cancer: 50 trials fetched
|
| 23 |
+
gastric cancer: 50 trials fetched
|
| 24 |
+
hepatocellular carcinoma: 50 trials fetched
|
| 25 |
+
head and neck cancer: 50 trials fetched
|
| 26 |
+
sarcoma: 50 trials fetched
|
| 27 |
+
Total trials seeded: 1000
|
| 28 |
+
|
| 29 |
+
[2/5] Seeding medications from RxNorm...
|
| 30 |
+
trastuzumab: 5 RxCUI concepts
|
| 31 |
+
pembrolizumab: 5 RxCUI concepts
|
| 32 |
+
nivolumab: 5 RxCUI concepts
|
| 33 |
+
osimertinib: 4 RxCUI concepts
|
| 34 |
+
olaparib: 5 RxCUI concepts
|
| 35 |
+
enzalutamide: 5 RxCUI concepts
|
| 36 |
+
bevacizumab: 5 RxCUI concepts
|
| 37 |
+
rituximab: 5 RxCUI concepts
|
| 38 |
+
imatinib: 5 RxCUI concepts
|
| 39 |
+
dabrafenib: 5 RxCUI concepts
|
| 40 |
+
vemurafenib: 2 RxCUI concepts
|
| 41 |
+
atezolizumab: 5 RxCUI concepts
|
| 42 |
+
durvalumab: 4 RxCUI concepts
|
| 43 |
+
cetuximab: 4 RxCUI concepts
|
| 44 |
+
erlotinib: 5 RxCUI concepts
|
| 45 |
+
capecitabine: 4 RxCUI concepts
|
| 46 |
+
Total medications seeded: 16
|
| 47 |
+
|
| 48 |
+
[3/5] Seeding diagnoses from ICD-10 CM...
|
| 49 |
+
ICD-10 C50: 20 codes
|
| 50 |
+
ICD-10 C61: 1 codes
|
| 51 |
+
ICD-10 C34: 16 codes
|
| 52 |
+
ICD-10 C18: 10 codes
|
| 53 |
+
ICD-10 C56: 4 codes
|
| 54 |
+
ICD-10 C43: 20 codes
|
| 55 |
+
ICD-10 C91: 20 codes
|
| 56 |
+
ICD-10 C85: 20 codes
|
| 57 |
+
ICD-10 C71: 10 codes
|
| 58 |
+
ICD-10 C25: 8 codes
|
| 59 |
+
Total diagnoses seeded: 129
|
| 60 |
+
|
| 61 |
+
[4/5] Seeding supporting literature from PubMed...
|
| 62 |
+
breast cancer: 5 publications linked
|
| 63 |
+
prostate cancer: 5 publications linked
|
| 64 |
+
non-small cell lung cancer: 5 publications linked
|
| 65 |
+
colorectal cancer: 5 publications linked
|
| 66 |
+
ovarian cancer: 5 publications linked
|
| 67 |
+
Total publications seeded: 25
|
| 68 |
+
|
| 69 |
+
[5/5] Seeding biomarkers (curated from COSMIC/NCIT)...
|
| 70 |
+
57 biomarkers seeded and linked to conditions
|
| 71 |
+
|
| 72 |
+
[+] Deriving eligibility relationships...
|
| 73 |
+
Eligibility relationships derived.
|
| 74 |
+
|
| 75 |
+
[6/6] Generating 100,000 clinically-informed synthetic patients...
|
| 76 |
+
(SEER incidence weights Β· TCGA biomarker prevalence Β· US Census demographics)
|
| 77 |
+
breast cancer: 17,222 patients β already done, skipping
|
| 78 |
+
non-small cell lung cancer: 14,444 patients (40 trials) [resuming from 4,000]
|
| 79 |
+
β³ wrote 10,444 patients | total so far: 31,666/100,000 | edges: 351,181
|
| 80 |
+
prostate cancer: 10,556 patients (44 trials)
|
| 81 |
+
β³ wrote 10,556 patients | total so far: 42,222/100,000 | edges: 766,259
|
| 82 |
+
colorectal cancer: 9,444 patients (34 trials)
|
| 83 |
+
β³ wrote 9,444 patients | total so far: 51,666/100,000 | edges: 1,047,777
|
| 84 |
+
melanoma: 6,111 patients (56 trials)
|
| 85 |
+
β³ wrote 6,111 patients | total so far: 57,777/100,000 | edges: 1,329,267
|
| 86 |
+
bladder cancer: 5,000 patients (41 trials)
|
| 87 |
+
β³ wrote 5,000 patients | total so far: 62,777/100,000 | edges: 1,509,909
|
| 88 |
+
renal cell carcinoma: 4,667 patients (42 trials)
|
| 89 |
+
β³ wrote 4,667 patients | total so far: 67,444/100,000 | edges: 1,687,802
|
| 90 |
+
lymphoma: 4,667 patients (46 trials)
|
| 91 |
+
β³ wrote 4,667 patients | total so far: 72,111/100,000 | edges: 1,847,153
|
| 92 |
+
endometrial cancer: 4,222 patients (40 trials)
|
| 93 |
+
β³ wrote 4,222 patients | total so far: 76,333/100,000 | edges: 1,992,865
|
| 94 |
+
leukemia: 3,889 patients (27 trials)
|
| 95 |
+
β³ wrote 3,889 patients | total so far: 80,222/100,000 | edges: 2,071,433
|
| 96 |
+
pancreatic cancer: 3,667 patients (35 trials)
|
| 97 |
+
β³ wrote 3,667 patients | total so far: 83,889/100,000 | edges: 2,172,901
|
| 98 |
+
thyroid cancer: 3,333 patients (41 trials)
|
| 99 |
+
β³ wrote 3,333 patients | total so far: 87,222/100,000 | edges: 2,302,009
|
| 100 |
+
multiple myeloma: 2,778 patients (50 trials)
|
| 101 |
+
β³ wrote 2,778 patients | total so far: 90,000/100,000 | edges: 2,415,994
|
| 102 |
+
gastric cancer: 2,000 patients (38 trials)
|
| 103 |
+
β³ wrote 2,000 patients | total so far: 92,000/100,000 | edges: 2,474,564
|
| 104 |
+
ovarian cancer: 2,000 patients (29 trials)
|
| 105 |
+
β³ wrote 2,000 patients | total so far: 94,000/100,000 | edges: 2,516,658
|
| 106 |
+
hepatocellular carcinoma: 1,667 patients (47 trials)
|
| 107 |
+
β³ wrote 1,667 patients | total so far: 95,667/100,000 | edges: 2,578,834
|
| 108 |
+
glioblastoma: 1,333 patients (45 trials)
|
| 109 |
+
β³ wrote 1,333 patients | total so far: 97,000/100,000 | edges: 2,623,714
|
| 110 |
+
head and neck cancer: 1,333 patients (49 trials)
|
| 111 |
+
β³ wrote 1,333 patients | total so far: 98,333/100,000 | edges: 2,677,369
|
| 112 |
+
cervical cancer: 889 patients (10 trials)
|
| 113 |
+
β³ wrote 889 patients | total so far: 99,222/100,000 | edges: 2,685,508
|
| 114 |
+
sarcoma: 778 patients (50 trials)
|
| 115 |
+
β³ wrote 778 patients | total so far: 100,000/100,000 | edges: 2,717,650
|
| 116 |
+
|
| 117 |
+
β Total patients: 100,000
|
| 118 |
+
β Total ELIGIBLE_FOR edges: 2,717,650
|
| 119 |
+
|
| 120 |
+
============================================================
|
| 121 |
+
Seeding complete in 21.1 min
|
| 122 |
+
Trials: 1000
|
| 123 |
+
Medications: 16
|
| 124 |
+
Diagnoses: 129
|
| 125 |
+
Publications: 25
|
| 126 |
+
Biomarkers: 57
|
| 127 |
+
Patients: 100,000
|
| 128 |
+
============================================================
|
docker-compose.yml
CHANGED
|
@@ -18,7 +18,7 @@ services:
|
|
| 18 |
container_name: clinicalmatch-neo4j
|
| 19 |
restart: unless-stopped
|
| 20 |
ports:
|
| 21 |
-
- "
|
| 22 |
- "7687:7687" # Bolt
|
| 23 |
volumes:
|
| 24 |
- neo4j_data:/data
|
|
|
|
| 18 |
container_name: clinicalmatch-neo4j
|
| 19 |
restart: unless-stopped
|
| 20 |
ports:
|
| 21 |
+
- "7474:7474" # Neo4j Browser
|
| 22 |
- "7687:7687" # Bolt
|
| 23 |
volumes:
|
| 24 |
- neo4j_data:/data
|
docker/Dockerfile
CHANGED
|
@@ -43,24 +43,17 @@ RUN curl -fsSL https://deb.nodesource.com/setup_20.x | bash - \
|
|
| 43 |
&& apt-get install -y --no-install-recommends nodejs \
|
| 44 |
&& rm -rf /var/lib/apt/lists/*
|
| 45 |
|
| 46 |
-
# ββ Neo4j Community
|
| 47 |
-
ENV NEO4J_VERSION=
|
| 48 |
ENV NEO4J_HOME=/opt/neo4j
|
| 49 |
ENV PATH="${NEO4J_HOME}/bin:${PATH}"
|
| 50 |
|
| 51 |
-
ENV APOC_VERSION=5.18.0
|
| 52 |
-
|
| 53 |
RUN wget -q "https://dist.neo4j.org/neo4j-community-${NEO4J_VERSION}-unix.tar.gz" \
|
| 54 |
&& tar -xzf "neo4j-community-${NEO4J_VERSION}-unix.tar.gz" -C /opt \
|
| 55 |
&& mv "/opt/neo4j-community-${NEO4J_VERSION}" /opt/neo4j \
|
| 56 |
&& rm "neo4j-community-${NEO4J_VERSION}-unix.tar.gz" \
|
| 57 |
&& rm -rf /opt/neo4j/data # will be symlinked to /data at runtime
|
| 58 |
|
| 59 |
-
# Download APOC plugin (Community-compatible jar)
|
| 60 |
-
RUN wget -q \
|
| 61 |
-
"https://github.com/neo4j/apoc/releases/download/${APOC_VERSION}/apoc-${APOC_VERSION}-core.jar" \
|
| 62 |
-
-O /opt/neo4j/plugins/apoc-${APOC_VERSION}-core.jar
|
| 63 |
-
|
| 64 |
# Neo4j configuration β listen on all interfaces, use /data for persistence
|
| 65 |
RUN { \
|
| 66 |
echo "server.bolt.listen_address=0.0.0.0:7687"; \
|
|
@@ -68,9 +61,7 @@ RUN { \
|
|
| 68 |
echo "server.directories.data=/data/neo4j/data"; \
|
| 69 |
echo "server.directories.logs=/data/neo4j/logs"; \
|
| 70 |
echo "server.directories.plugins=/opt/neo4j/plugins"; \
|
| 71 |
-
echo "dbms.security.auth_enabled=
|
| 72 |
-
echo "dbms.security.procedures.unrestricted=apoc.*"; \
|
| 73 |
-
echo "dbms.security.procedures.allowlist=apoc.*"; \
|
| 74 |
echo "server.memory.heap.initial_size=512m"; \
|
| 75 |
echo "server.memory.heap.max_size=1g"; \
|
| 76 |
echo "server.memory.pagecache.size=256m"; \
|
|
@@ -94,6 +85,9 @@ COPY --from=frontend-builder /build/frontend/.next/standalone ./
|
|
| 94 |
COPY --from=frontend-builder /build/frontend/.next/static ./.next/static
|
| 95 |
COPY --from=frontend-builder /build/frontend/public ./public
|
| 96 |
|
|
|
|
|
|
|
|
|
|
| 97 |
# ββ Config files βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 98 |
COPY docker/nginx.conf /app/docker/nginx.conf
|
| 99 |
COPY docker/supervisord.conf /app/docker/supervisord.conf
|
|
@@ -113,8 +107,6 @@ ENV NEO4J_URI=bolt://127.0.0.1:7687
|
|
| 113 |
ENV NEO4J_USERNAME=neo4j
|
| 114 |
ENV NEO4J_PASSWORD=clinicalmatch2024
|
| 115 |
ENV NEO4J_DATABASE=neo4j
|
| 116 |
-
# NEO4J_AUTH tells Neo4j 5.x to set this password on first boot (format: user/pass)
|
| 117 |
-
ENV NEO4J_AUTH=none
|
| 118 |
|
| 119 |
# LLM β OpenAI-compatible (set real values via HF Spaces secrets)
|
| 120 |
ENV OPENAI_API_KEY=""
|
|
|
|
| 43 |
&& apt-get install -y --no-install-recommends nodejs \
|
| 44 |
&& rm -rf /var/lib/apt/lists/*
|
| 45 |
|
| 46 |
+
# ββ Neo4j Community 2026.04.0 βββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 47 |
+
ENV NEO4J_VERSION=2026.04.0
|
| 48 |
ENV NEO4J_HOME=/opt/neo4j
|
| 49 |
ENV PATH="${NEO4J_HOME}/bin:${PATH}"
|
| 50 |
|
|
|
|
|
|
|
| 51 |
RUN wget -q "https://dist.neo4j.org/neo4j-community-${NEO4J_VERSION}-unix.tar.gz" \
|
| 52 |
&& tar -xzf "neo4j-community-${NEO4J_VERSION}-unix.tar.gz" -C /opt \
|
| 53 |
&& mv "/opt/neo4j-community-${NEO4J_VERSION}" /opt/neo4j \
|
| 54 |
&& rm "neo4j-community-${NEO4J_VERSION}-unix.tar.gz" \
|
| 55 |
&& rm -rf /opt/neo4j/data # will be symlinked to /data at runtime
|
| 56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
# Neo4j configuration β listen on all interfaces, use /data for persistence
|
| 58 |
RUN { \
|
| 59 |
echo "server.bolt.listen_address=0.0.0.0:7687"; \
|
|
|
|
| 61 |
echo "server.directories.data=/data/neo4j/data"; \
|
| 62 |
echo "server.directories.logs=/data/neo4j/logs"; \
|
| 63 |
echo "server.directories.plugins=/opt/neo4j/plugins"; \
|
| 64 |
+
echo "dbms.security.auth_enabled=true"; \
|
|
|
|
|
|
|
| 65 |
echo "server.memory.heap.initial_size=512m"; \
|
| 66 |
echo "server.memory.heap.max_size=1g"; \
|
| 67 |
echo "server.memory.pagecache.size=256m"; \
|
|
|
|
| 85 |
COPY --from=frontend-builder /build/frontend/.next/static ./.next/static
|
| 86 |
COPY --from=frontend-builder /build/frontend/public ./public
|
| 87 |
|
| 88 |
+
# ββ Neo4j seed dump ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 89 |
+
COPY docker/neo4j.dump /app/docker/neo4j.dump
|
| 90 |
+
|
| 91 |
# ββ Config files βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 92 |
COPY docker/nginx.conf /app/docker/nginx.conf
|
| 93 |
COPY docker/supervisord.conf /app/docker/supervisord.conf
|
|
|
|
| 107 |
ENV NEO4J_USERNAME=neo4j
|
| 108 |
ENV NEO4J_PASSWORD=clinicalmatch2024
|
| 109 |
ENV NEO4J_DATABASE=neo4j
|
|
|
|
|
|
|
| 110 |
|
| 111 |
# LLM β OpenAI-compatible (set real values via HF Spaces secrets)
|
| 112 |
ENV OPENAI_API_KEY=""
|
docker/entrypoint.sh
CHANGED
|
@@ -3,19 +3,28 @@ set -e
|
|
| 3 |
|
| 4 |
log() { echo "[entrypoint] $*"; }
|
| 5 |
|
| 6 |
-
# ββ Persistent data dirs βββββββββββββββββββββββββββββ
|
| 7 |
-
mkdir -p /data/neo4j/data /data/neo4j/logs
|
| 8 |
-
/tmp/nginx-cache /tmp/nginx-body /tmp/nginx-run
|
| 9 |
|
| 10 |
-
#
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
fi
|
| 19 |
|
|
|
|
|
|
|
|
|
|
| 20 |
log "Starting all services via supervisord..."
|
| 21 |
exec /usr/bin/supervisord -c /app/docker/supervisord.conf
|
|
|
|
| 3 |
|
| 4 |
log() { echo "[entrypoint] $*"; }
|
| 5 |
|
| 6 |
+
# ββ Persistent data dirs (HF Spaces mounts /data) βββββββββββββββββββββββββββββ
|
| 7 |
+
mkdir -p /data/neo4j/data /data/neo4j/logs /data/neo4j/transactions
|
|
|
|
| 8 |
|
| 9 |
+
# Point Neo4j config at the persistent volume paths (already set in neo4j.conf,
|
| 10 |
+
# but the data dir must exist before Neo4j starts or it refuses to launch)
|
| 11 |
+
mkdir -p /data/neo4j/data/databases /data/neo4j/data/dbms
|
| 12 |
+
|
| 13 |
+
# ββ First-boot: set initial password ββββββββββββββββββββββββββββββββββββββββββ
|
| 14 |
+
NEO4J_PASS="${NEO4J_PASSWORD:-clinicalmatch2024}"
|
| 15 |
+
|
| 16 |
+
if [ ! -f /data/.neo4j_initialized ]; then
|
| 17 |
+
log "First boot β setting Neo4j initial password..."
|
| 18 |
+
# neo4j-admin must write to the same dbms dir Neo4j will use
|
| 19 |
+
NEO4J_CONF=/opt/neo4j/conf \
|
| 20 |
+
neo4j-admin dbms set-initial-password "$NEO4J_PASS" 2>&1 || \
|
| 21 |
+
log "WARNING: set-initial-password failed (may already be set β continuing)"
|
| 22 |
+
touch /data/.neo4j_initialized
|
| 23 |
+
log "Password initialisation done."
|
| 24 |
fi
|
| 25 |
|
| 26 |
+
# ββ Nginx tmp dirs βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 27 |
+
mkdir -p /tmp/nginx-cache /tmp/nginx-body
|
| 28 |
+
|
| 29 |
log "Starting all services via supervisord..."
|
| 30 |
exec /usr/bin/supervisord -c /app/docker/supervisord.conf
|
docker/seed_on_startup.sh
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/bin/bash
|
| 2 |
+
# Waits for Neo4j and the FastAPI backend to be ready, then auto-seeds the graph
|
| 3 |
+
# if it is empty. Runs once at container startup via supervisord.
|
| 4 |
+
|
| 5 |
+
BACKEND="http://127.0.0.1:8000"
|
| 6 |
+
NEO4J_BOLT="127.0.0.1:7687"
|
| 7 |
+
SEED_FLAG="/data/.graph_seeded"
|
| 8 |
+
|
| 9 |
+
log() { echo "[seeder] $*"; }
|
| 10 |
+
|
| 11 |
+
wait_for_tcp() {
|
| 12 |
+
local host=$1 port=$2 label=$3 max=${4:-120}
|
| 13 |
+
local i=0
|
| 14 |
+
while ! (echo > /dev/tcp/"$host"/"$port") 2>/dev/null; do
|
| 15 |
+
i=$((i+1))
|
| 16 |
+
if [ $i -ge $max ]; then
|
| 17 |
+
log "ERROR: $label did not become reachable within ${max}s β aborting seed"
|
| 18 |
+
exit 1
|
| 19 |
+
fi
|
| 20 |
+
sleep 2
|
| 21 |
+
done
|
| 22 |
+
log "$label is reachable after $((i*2))s"
|
| 23 |
+
}
|
| 24 |
+
|
| 25 |
+
wait_for_http() {
|
| 26 |
+
local url=$1 label=$2 max=${3:-120}
|
| 27 |
+
local i=0
|
| 28 |
+
while ! curl -sf "$url" > /dev/null 2>&1; do
|
| 29 |
+
i=$((i+1))
|
| 30 |
+
if [ $i -ge $max ]; then
|
| 31 |
+
log "ERROR: $label did not respond within ${max}s β aborting seed"
|
| 32 |
+
exit 1
|
| 33 |
+
fi
|
| 34 |
+
sleep 2
|
| 35 |
+
done
|
| 36 |
+
log "$label ready after $((i*2))s"
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
log "Waiting for Neo4j bolt on $NEO4J_BOLT..."
|
| 40 |
+
wait_for_tcp 127.0.0.1 7687 "Neo4j bolt" 180
|
| 41 |
+
|
| 42 |
+
log "Waiting for FastAPI backend..."
|
| 43 |
+
wait_for_http "$BACKEND/health" "FastAPI /health" 120
|
| 44 |
+
|
| 45 |
+
# Check if already seeded (flag file OR graph has data)
|
| 46 |
+
if [ -f "$SEED_FLAG" ]; then
|
| 47 |
+
log "Seed flag found at $SEED_FLAG β skipping."
|
| 48 |
+
exit 0
|
| 49 |
+
fi
|
| 50 |
+
|
| 51 |
+
# Ask the backend how many patients are in the graph
|
| 52 |
+
PATIENT_COUNT=$(curl -sf "$BACKEND/api/v1/graph/stats" 2>/dev/null | \
|
| 53 |
+
python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('patients',0))" 2>/dev/null || echo 0)
|
| 54 |
+
|
| 55 |
+
log "Graph patient count: $PATIENT_COUNT"
|
| 56 |
+
|
| 57 |
+
if [ "$PATIENT_COUNT" -ge 100 ] 2>/dev/null; then
|
| 58 |
+
log "Graph already seeded ($PATIENT_COUNT patients) β skipping."
|
| 59 |
+
touch "$SEED_FLAG"
|
| 60 |
+
exit 0
|
| 61 |
+
fi
|
| 62 |
+
|
| 63 |
+
log "Graph is empty β triggering POST $BACKEND/seed ..."
|
| 64 |
+
HTTP_CODE=$(curl -sf -o /tmp/seed_response.json -w "%{http_code}" \
|
| 65 |
+
-X POST "$BACKEND/seed" \
|
| 66 |
+
-H "Content-Type: application/json" 2>&1)
|
| 67 |
+
|
| 68 |
+
if [ "$HTTP_CODE" = "200" ] || [ "$HTTP_CODE" = "202" ]; then
|
| 69 |
+
log "Seeding started successfully (HTTP $HTTP_CODE)."
|
| 70 |
+
log "Response: $(cat /tmp/seed_response.json 2>/dev/null)"
|
| 71 |
+
# Mark flag so future restarts don't re-trigger (seeder runs in background
|
| 72 |
+
# inside FastAPI; real completion check is patient count)
|
| 73 |
+
touch "$SEED_FLAG"
|
| 74 |
+
else
|
| 75 |
+
log "WARNING: /seed returned HTTP $HTTP_CODE β check backend logs."
|
| 76 |
+
cat /tmp/seed_response.json 2>/dev/null || true
|
| 77 |
+
fi
|
docker/supervisord.conf
CHANGED
|
@@ -16,28 +16,46 @@ serverurl=unix:///tmp/supervisor.sock
|
|
| 16 |
# ββ Neo4j Community ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 17 |
[program:neo4j]
|
| 18 |
command=/opt/neo4j/bin/neo4j console
|
| 19 |
-
environment=
|
| 20 |
-
NEO4J_HOME=/opt/neo4j,
|
| 21 |
-
JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64,
|
| 22 |
-
NEO4J_AUTH="none"
|
| 23 |
autostart=true
|
| 24 |
autorestart=true
|
| 25 |
startsecs=45
|
| 26 |
startretries=3
|
| 27 |
stdout_logfile=/tmp/neo4j.log
|
|
|
|
| 28 |
redirect_stderr=true
|
| 29 |
priority=10
|
| 30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 31 |
# ββ FastAPI backend ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 32 |
-
# No environment= block β inherits full container env so HF Spaces secrets are visible
|
| 33 |
[program:backend]
|
| 34 |
-
command=
|
| 35 |
directory=/app/backend
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
autostart=true
|
| 37 |
autorestart=true
|
| 38 |
-
startsecs=
|
| 39 |
startretries=5
|
| 40 |
stdout_logfile=/tmp/backend.log
|
|
|
|
| 41 |
redirect_stderr=true
|
| 42 |
priority=30
|
| 43 |
|
|
@@ -50,6 +68,7 @@ autostart=true
|
|
| 50 |
autorestart=true
|
| 51 |
startsecs=5
|
| 52 |
stdout_logfile=/tmp/frontend.log
|
|
|
|
| 53 |
redirect_stderr=true
|
| 54 |
priority=40
|
| 55 |
|
|
@@ -60,5 +79,6 @@ autostart=true
|
|
| 60 |
autorestart=true
|
| 61 |
startsecs=3
|
| 62 |
stdout_logfile=/tmp/nginx.log
|
|
|
|
| 63 |
redirect_stderr=true
|
| 64 |
priority=50
|
|
|
|
| 16 |
# ββ Neo4j Community ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 17 |
[program:neo4j]
|
| 18 |
command=/opt/neo4j/bin/neo4j console
|
| 19 |
+
environment=NEO4J_HOME=/opt/neo4j,JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64,NEO4J_CONF=/opt/neo4j/conf
|
|
|
|
|
|
|
|
|
|
| 20 |
autostart=true
|
| 21 |
autorestart=true
|
| 22 |
startsecs=45
|
| 23 |
startretries=3
|
| 24 |
stdout_logfile=/tmp/neo4j.log
|
| 25 |
+
stderr_logfile=/tmp/neo4j.log
|
| 26 |
redirect_stderr=true
|
| 27 |
priority=10
|
| 28 |
|
| 29 |
+
# ββ Auto-seeder (one-shot, runs after Neo4j + backend are live) βββββββββββββββ
|
| 30 |
+
[program:seeder]
|
| 31 |
+
command=/bin/bash /app/docker/seed_on_startup.sh
|
| 32 |
+
autostart=true
|
| 33 |
+
autorestart=false
|
| 34 |
+
startsecs=0
|
| 35 |
+
startretries=1
|
| 36 |
+
stdout_logfile=/tmp/seeder.log
|
| 37 |
+
stderr_logfile=/tmp/seeder.log
|
| 38 |
+
redirect_stderr=true
|
| 39 |
+
priority=99
|
| 40 |
+
|
| 41 |
# ββ FastAPI backend ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 42 |
[program:backend]
|
| 43 |
+
command=python3 -m uvicorn main:app --host 127.0.0.1 --port 8000 --workers 2
|
| 44 |
directory=/app/backend
|
| 45 |
+
environment=
|
| 46 |
+
NEO4J_URI="bolt://127.0.0.1:7687",
|
| 47 |
+
NEO4J_USERNAME="%(ENV_NEO4J_USERNAME)s",
|
| 48 |
+
NEO4J_PASSWORD="%(ENV_NEO4J_PASSWORD)s",
|
| 49 |
+
NEO4J_DATABASE="%(ENV_NEO4J_DATABASE)s",
|
| 50 |
+
OPENAI_API_KEY="%(ENV_OPENAI_API_KEY)s",
|
| 51 |
+
OPENAI_BASE_URL="%(ENV_OPENAI_BASE_URL)s",
|
| 52 |
+
OPENAI_MODEL="%(ENV_OPENAI_MODEL)s"
|
| 53 |
autostart=true
|
| 54 |
autorestart=true
|
| 55 |
+
startsecs=10
|
| 56 |
startretries=5
|
| 57 |
stdout_logfile=/tmp/backend.log
|
| 58 |
+
stderr_logfile=/tmp/backend.log
|
| 59 |
redirect_stderr=true
|
| 60 |
priority=30
|
| 61 |
|
|
|
|
| 68 |
autorestart=true
|
| 69 |
startsecs=5
|
| 70 |
stdout_logfile=/tmp/frontend.log
|
| 71 |
+
stderr_logfile=/tmp/frontend.log
|
| 72 |
redirect_stderr=true
|
| 73 |
priority=40
|
| 74 |
|
|
|
|
| 79 |
autorestart=true
|
| 80 |
startsecs=3
|
| 81 |
stdout_logfile=/tmp/nginx.log
|
| 82 |
+
stderr_logfile=/tmp/nginx.log
|
| 83 |
redirect_stderr=true
|
| 84 |
priority=50
|
mcp_manifest.json
CHANGED
|
@@ -10,8 +10,8 @@
|
|
| 10 |
"NEO4J_USERNAME": "neo4j",
|
| 11 |
"NEO4J_PASSWORD": "clinicalmatch2024",
|
| 12 |
"OPENAI_API_KEY": "<your-key>",
|
| 13 |
-
"OPENAI_BASE_URL": "https://
|
| 14 |
-
"OPENAI_MODEL": "
|
| 15 |
},
|
| 16 |
"tools": [
|
| 17 |
{
|
|
|
|
| 10 |
"NEO4J_USERNAME": "neo4j",
|
| 11 |
"NEO4J_PASSWORD": "clinicalmatch2024",
|
| 12 |
"OPENAI_API_KEY": "<your-key>",
|
| 13 |
+
"OPENAI_BASE_URL": "https://api.groq.com/openai/v1",
|
| 14 |
+
"OPENAI_MODEL": "qwen/qwen3-32b"
|
| 15 |
},
|
| 16 |
"tools": [
|
| 17 |
{
|