Spaces:
Sleeping
Sleeping
Initial deployment: ClinicalMatch AI v2.0 — FHIR R4 · MCP (9 tools) · A2A workflow · SHARP compliance · 100k synthetic patients · Neo4j graph · GraphRAG chatbot
59abb4f | from neo4j_setup import neo4j_conn | |
| def ingest_sample_data(): | |
| """Ingest rich sample data into Neo4j knowledge graph.""" | |
| # Clear existing sample data | |
| neo4j_conn.run_query("MATCH (n) WHERE n.sample = true DETACH DELETE n") | |
| queries = [ | |
| # Patients with rich profiles | |
| """ | |
| MERGE (p1:Patient {id: 'P001'}) | |
| SET p1 += {age: 45, gender: 'female', ethnicity: 'White', sample: true, | |
| zip_code: '02115', diagnosis_date: '2022-06-01'} | |
| """, | |
| """ | |
| MERGE (p2:Patient {id: 'P002'}) | |
| SET p2 += {age: 60, gender: 'male', ethnicity: 'Black/African American', sample: true, | |
| zip_code: '77030', diagnosis_date: '2021-11-15'} | |
| """, | |
| """ | |
| MERGE (p3:Patient {id: 'P003'}) | |
| SET p3 += {age: 38, gender: 'female', ethnicity: 'Hispanic/Latino', sample: true, | |
| zip_code: '94102', diagnosis_date: '2023-02-10'} | |
| """, | |
| """ | |
| MERGE (p4:Patient {id: 'P004'}) | |
| SET p4 += {age: 67, gender: 'male', ethnicity: 'Asian', sample: true, | |
| zip_code: '10001', diagnosis_date: '2022-09-20'} | |
| """, | |
| """ | |
| MERGE (p5:Patient {id: 'P005'}) | |
| SET p5 += {age: 34, gender: 'female', ethnicity: 'White', sample: true, | |
| zip_code: '60601', diagnosis_date: '2023-07-01'} | |
| """, | |
| # Diagnoses | |
| """MERGE (d1:Diagnosis {code: 'C50'}) SET d1.name = 'Breast Cancer', d1.snomed = '254837009'""", | |
| """MERGE (d2:Diagnosis {code: 'C61'}) SET d2.name = 'Prostate Cancer', d2.snomed = '399068003'""", | |
| """MERGE (d3:Diagnosis {code: 'C34'}) SET d3.name = 'Non-Small Cell Lung Cancer', d3.snomed = '363346000'""", | |
| """MERGE (d4:Diagnosis {code: 'C18'}) SET d4.name = 'Colorectal Cancer', d4.snomed = '93761005'""", | |
| # Biomarkers | |
| """MERGE (b1:Biomarker {id: 'HER2_POS'}) SET b1.name = 'HER2 Positive', b1.loinc = '85319-2'""", | |
| """MERGE (b2:Biomarker {id: 'EGFR_L858R'}) SET b2.name = 'EGFR L858R Mutation', b2.loinc = '81704-9'""", | |
| """MERGE (b3:Biomarker {id: 'BRCA2_POS'}) SET b3.name = 'BRCA2 Mutation', b3.loinc = '85319-2'""", | |
| """MERGE (b4:Biomarker {id: 'MSI_H'}) SET b4.name = 'MSI-High', b4.loinc = '85077-6'""", | |
| """MERGE (b5:Biomarker {id: 'PDL1_HIGH'}) SET b5.name = 'PD-L1 High (>50%)', b5.loinc = '73977-1'""", | |
| # Trials | |
| """ | |
| MERGE (t1:Trial {id: 'NCT04889131'}) | |
| SET t1 += {phase: 'PHASE2', condition: 'Breast Cancer', status: 'RECRUITING', | |
| title: 'Precision HER2+ Breast Cancer Study', min_age: 18, max_age: 75, | |
| enrollment_target: 150, enrolled: 87, sponsor: 'Dana-Farber'} | |
| """, | |
| """ | |
| MERGE (t2:Trial {id: 'NCT05123456'}) | |
| SET t2 += {phase: 'PHASE3', condition: 'Breast Cancer', status: 'RECRUITING', | |
| title: 'Immunotherapy Combination for Advanced Breast Cancer', min_age: 18, | |
| enrollment_target: 400, enrolled: 142, sponsor: 'Pharma Innovations Inc'} | |
| """, | |
| """ | |
| MERGE (t3:Trial {id: 'NCT05456789'}) | |
| SET t3 += {phase: 'PHASE2', condition: 'Prostate Cancer', status: 'RECRUITING', | |
| title: 'BRCA2 Prostate Cancer PARP Inhibitor Trial', min_age: 18, | |
| enrollment_target: 120, enrolled: 54, sponsor: 'Oncology Research Group'} | |
| """, | |
| """ | |
| MERGE (t4:Trial {id: 'NCT06112233'}) | |
| SET t4 += {phase: 'PHASE3', condition: 'Non-Small Cell Lung Cancer', status: 'RECRUITING', | |
| title: 'EGFR-Mutant NSCLC Targeted Therapy Study', min_age: 18, | |
| enrollment_target: 300, enrolled: 178, sponsor: 'Global Cancer Institute'} | |
| """, | |
| """ | |
| MERGE (t5:Trial {id: 'NCT05334455'}) | |
| SET t5 += {phase: 'PHASE2', condition: 'Colorectal Cancer', status: 'RECRUITING', | |
| title: 'MSI-H Colorectal Cancer Immunotherapy Study', min_age: 18, | |
| enrollment_target: 100, enrolled: 45, sponsor: 'NCI'} | |
| """, | |
| # Study Sites | |
| """ | |
| MERGE (s1:StudySite {id: 'DFCI'}) | |
| SET s1 += {name: 'Dana-Farber Cancer Institute', city: 'Boston', state: 'MA', | |
| lat: 42.3376, lon: -71.1083, active_trials: 4} | |
| """, | |
| """ | |
| MERGE (s2:StudySite {id: 'MDACC'}) | |
| SET s2 += {name: 'MD Anderson Cancer Center', city: 'Houston', state: 'TX', | |
| lat: 29.7066, lon: -95.3990, active_trials: 6} | |
| """, | |
| """ | |
| MERGE (s3:StudySite {id: 'MSK'}) | |
| SET s3 += {name: 'Memorial Sloan Kettering', city: 'New York', state: 'NY', | |
| lat: 40.7644, lon: -73.9581, active_trials: 5} | |
| """, | |
| # Patient-Diagnosis relationships | |
| """MATCH (p:Patient {id: 'P001'}), (d:Diagnosis {code: 'C50'}) MERGE (p)-[:HAS_DIAGNOSIS]->(d)""", | |
| """MATCH (p:Patient {id: 'P002'}), (d:Diagnosis {code: 'C61'}) MERGE (p)-[:HAS_DIAGNOSIS]->(d)""", | |
| """MATCH (p:Patient {id: 'P003'}), (d:Diagnosis {code: 'C50'}) MERGE (p)-[:HAS_DIAGNOSIS]->(d)""", | |
| """MATCH (p:Patient {id: 'P004'}), (d:Diagnosis {code: 'C34'}) MERGE (p)-[:HAS_DIAGNOSIS]->(d)""", | |
| """MATCH (p:Patient {id: 'P005'}), (d:Diagnosis {code: 'C18'}) MERGE (p)-[:HAS_DIAGNOSIS]->(d)""", | |
| # Patient-Biomarker relationships | |
| """MATCH (p:Patient {id: 'P001'}), (b:Biomarker {id: 'HER2_POS'}) MERGE (p)-[:HAS_BIOMARKER]->(b)""", | |
| """MATCH (p:Patient {id: 'P002'}), (b:Biomarker {id: 'BRCA2_POS'}) MERGE (p)-[:HAS_BIOMARKER]->(b)""", | |
| """MATCH (p:Patient {id: 'P004'}), (b:Biomarker {id: 'EGFR_L858R'}) MERGE (p)-[:HAS_BIOMARKER]->(b)""", | |
| """MATCH (p:Patient {id: 'P004'}), (b:Biomarker {id: 'PDL1_HIGH'}) MERGE (p)-[:HAS_BIOMARKER]->(b)""", | |
| """MATCH (p:Patient {id: 'P005'}), (b:Biomarker {id: 'MSI_H'}) MERGE (p)-[:HAS_BIOMARKER]->(b)""", | |
| # Diagnosis-Trial eligibility | |
| """MATCH (d:Diagnosis {code: 'C50'}), (t:Trial {id: 'NCT04889131'}) MERGE (d)-[:ELIGIBLE_FOR]->(t)""", | |
| """MATCH (d:Diagnosis {code: 'C50'}), (t:Trial {id: 'NCT05123456'}) MERGE (d)-[:ELIGIBLE_FOR]->(t)""", | |
| """MATCH (d:Diagnosis {code: 'C61'}), (t:Trial {id: 'NCT05456789'}) MERGE (d)-[:ELIGIBLE_FOR]->(t)""", | |
| """MATCH (d:Diagnosis {code: 'C34'}), (t:Trial {id: 'NCT06112233'}) MERGE (d)-[:ELIGIBLE_FOR]->(t)""", | |
| """MATCH (d:Diagnosis {code: 'C18'}), (t:Trial {id: 'NCT05334455'}) MERGE (d)-[:ELIGIBLE_FOR]->(t)""", | |
| # Trial-Site relationships | |
| """MATCH (t:Trial {id: 'NCT04889131'}), (s:StudySite {id: 'DFCI'}) MERGE (t)-[:CONDUCTED_AT]->(s)""", | |
| """MATCH (t:Trial {id: 'NCT04889131'}), (s:StudySite {id: 'MSK'}) MERGE (t)-[:CONDUCTED_AT]->(s)""", | |
| """MATCH (t:Trial {id: 'NCT05123456'}), (s:StudySite {id: 'MDACC'}) MERGE (t)-[:CONDUCTED_AT]->(s)""", | |
| """MATCH (t:Trial {id: 'NCT05123456'}), (s:StudySite {id: 'MSK'}) MERGE (t)-[:CONDUCTED_AT]->(s)""", | |
| """MATCH (t:Trial {id: 'NCT05456789'}), (s:StudySite {id: 'MDACC'}) MERGE (t)-[:CONDUCTED_AT]->(s)""", | |
| # Biomarker-Trial requirements | |
| """MATCH (b:Biomarker {id: 'HER2_POS'}), (t:Trial {id: 'NCT04889131'}) MERGE (b)-[:REQUIRED_FOR]->(t)""", | |
| """MATCH (b:Biomarker {id: 'EGFR_L858R'}), (t:Trial {id: 'NCT06112233'}) MERGE (b)-[:REQUIRED_FOR]->(t)""", | |
| """MATCH (b:Biomarker {id: 'MSI_H'}), (t:Trial {id: 'NCT05334455'}) MERGE (b)-[:REQUIRED_FOR]->(t)""", | |
| ] | |
| for query in queries: | |
| try: | |
| neo4j_conn.run_query(query) | |
| except Exception as e: | |
| print(f"Ingestion warning: {e}") | |
| print("Rich sample data ingested successfully.") | |
| if __name__ == "__main__": | |
| ingest_sample_data() | |
| neo4j_conn.close() | |