from neo4j_setup import neo4j_conn def ingest_sample_data(): """Ingest rich sample data into Neo4j knowledge graph.""" # Clear existing sample data neo4j_conn.run_query("MATCH (n) WHERE n.sample = true DETACH DELETE n") queries = [ # Patients with rich profiles """ MERGE (p1:Patient {id: 'P001'}) SET p1 += {age: 45, gender: 'female', ethnicity: 'White', sample: true, zip_code: '02115', diagnosis_date: '2022-06-01'} """, """ MERGE (p2:Patient {id: 'P002'}) SET p2 += {age: 60, gender: 'male', ethnicity: 'Black/African American', sample: true, zip_code: '77030', diagnosis_date: '2021-11-15'} """, """ MERGE (p3:Patient {id: 'P003'}) SET p3 += {age: 38, gender: 'female', ethnicity: 'Hispanic/Latino', sample: true, zip_code: '94102', diagnosis_date: '2023-02-10'} """, """ MERGE (p4:Patient {id: 'P004'}) SET p4 += {age: 67, gender: 'male', ethnicity: 'Asian', sample: true, zip_code: '10001', diagnosis_date: '2022-09-20'} """, """ MERGE (p5:Patient {id: 'P005'}) SET p5 += {age: 34, gender: 'female', ethnicity: 'White', sample: true, zip_code: '60601', diagnosis_date: '2023-07-01'} """, # Diagnoses """MERGE (d1:Diagnosis {code: 'C50'}) SET d1.name = 'Breast Cancer', d1.snomed = '254837009'""", """MERGE (d2:Diagnosis {code: 'C61'}) SET d2.name = 'Prostate Cancer', d2.snomed = '399068003'""", """MERGE (d3:Diagnosis {code: 'C34'}) SET d3.name = 'Non-Small Cell Lung Cancer', d3.snomed = '363346000'""", """MERGE (d4:Diagnosis {code: 'C18'}) SET d4.name = 'Colorectal Cancer', d4.snomed = '93761005'""", # Biomarkers """MERGE (b1:Biomarker {id: 'HER2_POS'}) SET b1.name = 'HER2 Positive', b1.loinc = '85319-2'""", """MERGE (b2:Biomarker {id: 'EGFR_L858R'}) SET b2.name = 'EGFR L858R Mutation', b2.loinc = '81704-9'""", """MERGE (b3:Biomarker {id: 'BRCA2_POS'}) SET b3.name = 'BRCA2 Mutation', b3.loinc = '85319-2'""", """MERGE (b4:Biomarker {id: 'MSI_H'}) SET b4.name = 'MSI-High', b4.loinc = '85077-6'""", """MERGE (b5:Biomarker {id: 'PDL1_HIGH'}) SET b5.name = 'PD-L1 High (>50%)', b5.loinc = '73977-1'""", # Trials """ MERGE (t1:Trial {id: 'NCT04889131'}) SET t1 += {phase: 'PHASE2', condition: 'Breast Cancer', status: 'RECRUITING', title: 'Precision HER2+ Breast Cancer Study', min_age: 18, max_age: 75, enrollment_target: 150, enrolled: 87, sponsor: 'Dana-Farber'} """, """ MERGE (t2:Trial {id: 'NCT05123456'}) SET t2 += {phase: 'PHASE3', condition: 'Breast Cancer', status: 'RECRUITING', title: 'Immunotherapy Combination for Advanced Breast Cancer', min_age: 18, enrollment_target: 400, enrolled: 142, sponsor: 'Pharma Innovations Inc'} """, """ MERGE (t3:Trial {id: 'NCT05456789'}) SET t3 += {phase: 'PHASE2', condition: 'Prostate Cancer', status: 'RECRUITING', title: 'BRCA2 Prostate Cancer PARP Inhibitor Trial', min_age: 18, enrollment_target: 120, enrolled: 54, sponsor: 'Oncology Research Group'} """, """ MERGE (t4:Trial {id: 'NCT06112233'}) SET t4 += {phase: 'PHASE3', condition: 'Non-Small Cell Lung Cancer', status: 'RECRUITING', title: 'EGFR-Mutant NSCLC Targeted Therapy Study', min_age: 18, enrollment_target: 300, enrolled: 178, sponsor: 'Global Cancer Institute'} """, """ MERGE (t5:Trial {id: 'NCT05334455'}) SET t5 += {phase: 'PHASE2', condition: 'Colorectal Cancer', status: 'RECRUITING', title: 'MSI-H Colorectal Cancer Immunotherapy Study', min_age: 18, enrollment_target: 100, enrolled: 45, sponsor: 'NCI'} """, # Study Sites """ MERGE (s1:StudySite {id: 'DFCI'}) SET s1 += {name: 'Dana-Farber Cancer Institute', city: 'Boston', state: 'MA', lat: 42.3376, lon: -71.1083, active_trials: 4} """, """ MERGE (s2:StudySite {id: 'MDACC'}) SET s2 += {name: 'MD Anderson Cancer Center', city: 'Houston', state: 'TX', lat: 29.7066, lon: -95.3990, active_trials: 6} """, """ MERGE (s3:StudySite {id: 'MSK'}) SET s3 += {name: 'Memorial Sloan Kettering', city: 'New York', state: 'NY', lat: 40.7644, lon: -73.9581, active_trials: 5} """, # Patient-Diagnosis relationships """MATCH (p:Patient {id: 'P001'}), (d:Diagnosis {code: 'C50'}) MERGE (p)-[:HAS_DIAGNOSIS]->(d)""", """MATCH (p:Patient {id: 'P002'}), (d:Diagnosis {code: 'C61'}) MERGE (p)-[:HAS_DIAGNOSIS]->(d)""", """MATCH (p:Patient {id: 'P003'}), (d:Diagnosis {code: 'C50'}) MERGE (p)-[:HAS_DIAGNOSIS]->(d)""", """MATCH (p:Patient {id: 'P004'}), (d:Diagnosis {code: 'C34'}) MERGE (p)-[:HAS_DIAGNOSIS]->(d)""", """MATCH (p:Patient {id: 'P005'}), (d:Diagnosis {code: 'C18'}) MERGE (p)-[:HAS_DIAGNOSIS]->(d)""", # Patient-Biomarker relationships """MATCH (p:Patient {id: 'P001'}), (b:Biomarker {id: 'HER2_POS'}) MERGE (p)-[:HAS_BIOMARKER]->(b)""", """MATCH (p:Patient {id: 'P002'}), (b:Biomarker {id: 'BRCA2_POS'}) MERGE (p)-[:HAS_BIOMARKER]->(b)""", """MATCH (p:Patient {id: 'P004'}), (b:Biomarker {id: 'EGFR_L858R'}) MERGE (p)-[:HAS_BIOMARKER]->(b)""", """MATCH (p:Patient {id: 'P004'}), (b:Biomarker {id: 'PDL1_HIGH'}) MERGE (p)-[:HAS_BIOMARKER]->(b)""", """MATCH (p:Patient {id: 'P005'}), (b:Biomarker {id: 'MSI_H'}) MERGE (p)-[:HAS_BIOMARKER]->(b)""", # Diagnosis-Trial eligibility """MATCH (d:Diagnosis {code: 'C50'}), (t:Trial {id: 'NCT04889131'}) MERGE (d)-[:ELIGIBLE_FOR]->(t)""", """MATCH (d:Diagnosis {code: 'C50'}), (t:Trial {id: 'NCT05123456'}) MERGE (d)-[:ELIGIBLE_FOR]->(t)""", """MATCH (d:Diagnosis {code: 'C61'}), (t:Trial {id: 'NCT05456789'}) MERGE (d)-[:ELIGIBLE_FOR]->(t)""", """MATCH (d:Diagnosis {code: 'C34'}), (t:Trial {id: 'NCT06112233'}) MERGE (d)-[:ELIGIBLE_FOR]->(t)""", """MATCH (d:Diagnosis {code: 'C18'}), (t:Trial {id: 'NCT05334455'}) MERGE (d)-[:ELIGIBLE_FOR]->(t)""", # Trial-Site relationships """MATCH (t:Trial {id: 'NCT04889131'}), (s:StudySite {id: 'DFCI'}) MERGE (t)-[:CONDUCTED_AT]->(s)""", """MATCH (t:Trial {id: 'NCT04889131'}), (s:StudySite {id: 'MSK'}) MERGE (t)-[:CONDUCTED_AT]->(s)""", """MATCH (t:Trial {id: 'NCT05123456'}), (s:StudySite {id: 'MDACC'}) MERGE (t)-[:CONDUCTED_AT]->(s)""", """MATCH (t:Trial {id: 'NCT05123456'}), (s:StudySite {id: 'MSK'}) MERGE (t)-[:CONDUCTED_AT]->(s)""", """MATCH (t:Trial {id: 'NCT05456789'}), (s:StudySite {id: 'MDACC'}) MERGE (t)-[:CONDUCTED_AT]->(s)""", # Biomarker-Trial requirements """MATCH (b:Biomarker {id: 'HER2_POS'}), (t:Trial {id: 'NCT04889131'}) MERGE (b)-[:REQUIRED_FOR]->(t)""", """MATCH (b:Biomarker {id: 'EGFR_L858R'}), (t:Trial {id: 'NCT06112233'}) MERGE (b)-[:REQUIRED_FOR]->(t)""", """MATCH (b:Biomarker {id: 'MSI_H'}), (t:Trial {id: 'NCT05334455'}) MERGE (b)-[:REQUIRED_FOR]->(t)""", ] for query in queries: try: neo4j_conn.run_query(query) except Exception as e: print(f"Ingestion warning: {e}") print("Rich sample data ingested successfully.") if __name__ == "__main__": ingest_sample_data() neo4j_conn.close()