Spaces:
Running
Running
File size: 7,783 Bytes
59abb4f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 | from neo4j_setup import neo4j_conn
def ingest_sample_data():
"""Ingest rich sample data into Neo4j knowledge graph."""
# Clear existing sample data
neo4j_conn.run_query("MATCH (n) WHERE n.sample = true DETACH DELETE n")
queries = [
# Patients with rich profiles
"""
MERGE (p1:Patient {id: 'P001'})
SET p1 += {age: 45, gender: 'female', ethnicity: 'White', sample: true,
zip_code: '02115', diagnosis_date: '2022-06-01'}
""",
"""
MERGE (p2:Patient {id: 'P002'})
SET p2 += {age: 60, gender: 'male', ethnicity: 'Black/African American', sample: true,
zip_code: '77030', diagnosis_date: '2021-11-15'}
""",
"""
MERGE (p3:Patient {id: 'P003'})
SET p3 += {age: 38, gender: 'female', ethnicity: 'Hispanic/Latino', sample: true,
zip_code: '94102', diagnosis_date: '2023-02-10'}
""",
"""
MERGE (p4:Patient {id: 'P004'})
SET p4 += {age: 67, gender: 'male', ethnicity: 'Asian', sample: true,
zip_code: '10001', diagnosis_date: '2022-09-20'}
""",
"""
MERGE (p5:Patient {id: 'P005'})
SET p5 += {age: 34, gender: 'female', ethnicity: 'White', sample: true,
zip_code: '60601', diagnosis_date: '2023-07-01'}
""",
# Diagnoses
"""MERGE (d1:Diagnosis {code: 'C50'}) SET d1.name = 'Breast Cancer', d1.snomed = '254837009'""",
"""MERGE (d2:Diagnosis {code: 'C61'}) SET d2.name = 'Prostate Cancer', d2.snomed = '399068003'""",
"""MERGE (d3:Diagnosis {code: 'C34'}) SET d3.name = 'Non-Small Cell Lung Cancer', d3.snomed = '363346000'""",
"""MERGE (d4:Diagnosis {code: 'C18'}) SET d4.name = 'Colorectal Cancer', d4.snomed = '93761005'""",
# Biomarkers
"""MERGE (b1:Biomarker {id: 'HER2_POS'}) SET b1.name = 'HER2 Positive', b1.loinc = '85319-2'""",
"""MERGE (b2:Biomarker {id: 'EGFR_L858R'}) SET b2.name = 'EGFR L858R Mutation', b2.loinc = '81704-9'""",
"""MERGE (b3:Biomarker {id: 'BRCA2_POS'}) SET b3.name = 'BRCA2 Mutation', b3.loinc = '85319-2'""",
"""MERGE (b4:Biomarker {id: 'MSI_H'}) SET b4.name = 'MSI-High', b4.loinc = '85077-6'""",
"""MERGE (b5:Biomarker {id: 'PDL1_HIGH'}) SET b5.name = 'PD-L1 High (>50%)', b5.loinc = '73977-1'""",
# Trials
"""
MERGE (t1:Trial {id: 'NCT04889131'})
SET t1 += {phase: 'PHASE2', condition: 'Breast Cancer', status: 'RECRUITING',
title: 'Precision HER2+ Breast Cancer Study', min_age: 18, max_age: 75,
enrollment_target: 150, enrolled: 87, sponsor: 'Dana-Farber'}
""",
"""
MERGE (t2:Trial {id: 'NCT05123456'})
SET t2 += {phase: 'PHASE3', condition: 'Breast Cancer', status: 'RECRUITING',
title: 'Immunotherapy Combination for Advanced Breast Cancer', min_age: 18,
enrollment_target: 400, enrolled: 142, sponsor: 'Pharma Innovations Inc'}
""",
"""
MERGE (t3:Trial {id: 'NCT05456789'})
SET t3 += {phase: 'PHASE2', condition: 'Prostate Cancer', status: 'RECRUITING',
title: 'BRCA2 Prostate Cancer PARP Inhibitor Trial', min_age: 18,
enrollment_target: 120, enrolled: 54, sponsor: 'Oncology Research Group'}
""",
"""
MERGE (t4:Trial {id: 'NCT06112233'})
SET t4 += {phase: 'PHASE3', condition: 'Non-Small Cell Lung Cancer', status: 'RECRUITING',
title: 'EGFR-Mutant NSCLC Targeted Therapy Study', min_age: 18,
enrollment_target: 300, enrolled: 178, sponsor: 'Global Cancer Institute'}
""",
"""
MERGE (t5:Trial {id: 'NCT05334455'})
SET t5 += {phase: 'PHASE2', condition: 'Colorectal Cancer', status: 'RECRUITING',
title: 'MSI-H Colorectal Cancer Immunotherapy Study', min_age: 18,
enrollment_target: 100, enrolled: 45, sponsor: 'NCI'}
""",
# Study Sites
"""
MERGE (s1:StudySite {id: 'DFCI'})
SET s1 += {name: 'Dana-Farber Cancer Institute', city: 'Boston', state: 'MA',
lat: 42.3376, lon: -71.1083, active_trials: 4}
""",
"""
MERGE (s2:StudySite {id: 'MDACC'})
SET s2 += {name: 'MD Anderson Cancer Center', city: 'Houston', state: 'TX',
lat: 29.7066, lon: -95.3990, active_trials: 6}
""",
"""
MERGE (s3:StudySite {id: 'MSK'})
SET s3 += {name: 'Memorial Sloan Kettering', city: 'New York', state: 'NY',
lat: 40.7644, lon: -73.9581, active_trials: 5}
""",
# Patient-Diagnosis relationships
"""MATCH (p:Patient {id: 'P001'}), (d:Diagnosis {code: 'C50'}) MERGE (p)-[:HAS_DIAGNOSIS]->(d)""",
"""MATCH (p:Patient {id: 'P002'}), (d:Diagnosis {code: 'C61'}) MERGE (p)-[:HAS_DIAGNOSIS]->(d)""",
"""MATCH (p:Patient {id: 'P003'}), (d:Diagnosis {code: 'C50'}) MERGE (p)-[:HAS_DIAGNOSIS]->(d)""",
"""MATCH (p:Patient {id: 'P004'}), (d:Diagnosis {code: 'C34'}) MERGE (p)-[:HAS_DIAGNOSIS]->(d)""",
"""MATCH (p:Patient {id: 'P005'}), (d:Diagnosis {code: 'C18'}) MERGE (p)-[:HAS_DIAGNOSIS]->(d)""",
# Patient-Biomarker relationships
"""MATCH (p:Patient {id: 'P001'}), (b:Biomarker {id: 'HER2_POS'}) MERGE (p)-[:HAS_BIOMARKER]->(b)""",
"""MATCH (p:Patient {id: 'P002'}), (b:Biomarker {id: 'BRCA2_POS'}) MERGE (p)-[:HAS_BIOMARKER]->(b)""",
"""MATCH (p:Patient {id: 'P004'}), (b:Biomarker {id: 'EGFR_L858R'}) MERGE (p)-[:HAS_BIOMARKER]->(b)""",
"""MATCH (p:Patient {id: 'P004'}), (b:Biomarker {id: 'PDL1_HIGH'}) MERGE (p)-[:HAS_BIOMARKER]->(b)""",
"""MATCH (p:Patient {id: 'P005'}), (b:Biomarker {id: 'MSI_H'}) MERGE (p)-[:HAS_BIOMARKER]->(b)""",
# Diagnosis-Trial eligibility
"""MATCH (d:Diagnosis {code: 'C50'}), (t:Trial {id: 'NCT04889131'}) MERGE (d)-[:ELIGIBLE_FOR]->(t)""",
"""MATCH (d:Diagnosis {code: 'C50'}), (t:Trial {id: 'NCT05123456'}) MERGE (d)-[:ELIGIBLE_FOR]->(t)""",
"""MATCH (d:Diagnosis {code: 'C61'}), (t:Trial {id: 'NCT05456789'}) MERGE (d)-[:ELIGIBLE_FOR]->(t)""",
"""MATCH (d:Diagnosis {code: 'C34'}), (t:Trial {id: 'NCT06112233'}) MERGE (d)-[:ELIGIBLE_FOR]->(t)""",
"""MATCH (d:Diagnosis {code: 'C18'}), (t:Trial {id: 'NCT05334455'}) MERGE (d)-[:ELIGIBLE_FOR]->(t)""",
# Trial-Site relationships
"""MATCH (t:Trial {id: 'NCT04889131'}), (s:StudySite {id: 'DFCI'}) MERGE (t)-[:CONDUCTED_AT]->(s)""",
"""MATCH (t:Trial {id: 'NCT04889131'}), (s:StudySite {id: 'MSK'}) MERGE (t)-[:CONDUCTED_AT]->(s)""",
"""MATCH (t:Trial {id: 'NCT05123456'}), (s:StudySite {id: 'MDACC'}) MERGE (t)-[:CONDUCTED_AT]->(s)""",
"""MATCH (t:Trial {id: 'NCT05123456'}), (s:StudySite {id: 'MSK'}) MERGE (t)-[:CONDUCTED_AT]->(s)""",
"""MATCH (t:Trial {id: 'NCT05456789'}), (s:StudySite {id: 'MDACC'}) MERGE (t)-[:CONDUCTED_AT]->(s)""",
# Biomarker-Trial requirements
"""MATCH (b:Biomarker {id: 'HER2_POS'}), (t:Trial {id: 'NCT04889131'}) MERGE (b)-[:REQUIRED_FOR]->(t)""",
"""MATCH (b:Biomarker {id: 'EGFR_L858R'}), (t:Trial {id: 'NCT06112233'}) MERGE (b)-[:REQUIRED_FOR]->(t)""",
"""MATCH (b:Biomarker {id: 'MSI_H'}), (t:Trial {id: 'NCT05334455'}) MERGE (b)-[:REQUIRED_FOR]->(t)""",
]
for query in queries:
try:
neo4j_conn.run_query(query)
except Exception as e:
print(f"Ingestion warning: {e}")
print("Rich sample data ingested successfully.")
if __name__ == "__main__":
ingest_sample_data()
neo4j_conn.close()
|