File size: 7,783 Bytes
59abb4f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
from neo4j_setup import neo4j_conn


def ingest_sample_data():
    """Ingest rich sample data into Neo4j knowledge graph."""
    # Clear existing sample data
    neo4j_conn.run_query("MATCH (n) WHERE n.sample = true DETACH DELETE n")

    queries = [
        # Patients with rich profiles
        """
        MERGE (p1:Patient {id: 'P001'})
        SET p1 += {age: 45, gender: 'female', ethnicity: 'White', sample: true,
                   zip_code: '02115', diagnosis_date: '2022-06-01'}
        """,
        """
        MERGE (p2:Patient {id: 'P002'})
        SET p2 += {age: 60, gender: 'male', ethnicity: 'Black/African American', sample: true,
                   zip_code: '77030', diagnosis_date: '2021-11-15'}
        """,
        """
        MERGE (p3:Patient {id: 'P003'})
        SET p3 += {age: 38, gender: 'female', ethnicity: 'Hispanic/Latino', sample: true,
                   zip_code: '94102', diagnosis_date: '2023-02-10'}
        """,
        """
        MERGE (p4:Patient {id: 'P004'})
        SET p4 += {age: 67, gender: 'male', ethnicity: 'Asian', sample: true,
                   zip_code: '10001', diagnosis_date: '2022-09-20'}
        """,
        """
        MERGE (p5:Patient {id: 'P005'})
        SET p5 += {age: 34, gender: 'female', ethnicity: 'White', sample: true,
                   zip_code: '60601', diagnosis_date: '2023-07-01'}
        """,

        # Diagnoses
        """MERGE (d1:Diagnosis {code: 'C50'}) SET d1.name = 'Breast Cancer', d1.snomed = '254837009'""",
        """MERGE (d2:Diagnosis {code: 'C61'}) SET d2.name = 'Prostate Cancer', d2.snomed = '399068003'""",
        """MERGE (d3:Diagnosis {code: 'C34'}) SET d3.name = 'Non-Small Cell Lung Cancer', d3.snomed = '363346000'""",
        """MERGE (d4:Diagnosis {code: 'C18'}) SET d4.name = 'Colorectal Cancer', d4.snomed = '93761005'""",

        # Biomarkers
        """MERGE (b1:Biomarker {id: 'HER2_POS'}) SET b1.name = 'HER2 Positive', b1.loinc = '85319-2'""",
        """MERGE (b2:Biomarker {id: 'EGFR_L858R'}) SET b2.name = 'EGFR L858R Mutation', b2.loinc = '81704-9'""",
        """MERGE (b3:Biomarker {id: 'BRCA2_POS'}) SET b3.name = 'BRCA2 Mutation', b3.loinc = '85319-2'""",
        """MERGE (b4:Biomarker {id: 'MSI_H'}) SET b4.name = 'MSI-High', b4.loinc = '85077-6'""",
        """MERGE (b5:Biomarker {id: 'PDL1_HIGH'}) SET b5.name = 'PD-L1 High (>50%)', b5.loinc = '73977-1'""",

        # Trials
        """
        MERGE (t1:Trial {id: 'NCT04889131'})
        SET t1 += {phase: 'PHASE2', condition: 'Breast Cancer', status: 'RECRUITING',
                   title: 'Precision HER2+ Breast Cancer Study', min_age: 18, max_age: 75,
                   enrollment_target: 150, enrolled: 87, sponsor: 'Dana-Farber'}
        """,
        """
        MERGE (t2:Trial {id: 'NCT05123456'})
        SET t2 += {phase: 'PHASE3', condition: 'Breast Cancer', status: 'RECRUITING',
                   title: 'Immunotherapy Combination for Advanced Breast Cancer', min_age: 18,
                   enrollment_target: 400, enrolled: 142, sponsor: 'Pharma Innovations Inc'}
        """,
        """
        MERGE (t3:Trial {id: 'NCT05456789'})
        SET t3 += {phase: 'PHASE2', condition: 'Prostate Cancer', status: 'RECRUITING',
                   title: 'BRCA2 Prostate Cancer PARP Inhibitor Trial', min_age: 18,
                   enrollment_target: 120, enrolled: 54, sponsor: 'Oncology Research Group'}
        """,
        """
        MERGE (t4:Trial {id: 'NCT06112233'})
        SET t4 += {phase: 'PHASE3', condition: 'Non-Small Cell Lung Cancer', status: 'RECRUITING',
                   title: 'EGFR-Mutant NSCLC Targeted Therapy Study', min_age: 18,
                   enrollment_target: 300, enrolled: 178, sponsor: 'Global Cancer Institute'}
        """,
        """
        MERGE (t5:Trial {id: 'NCT05334455'})
        SET t5 += {phase: 'PHASE2', condition: 'Colorectal Cancer', status: 'RECRUITING',
                   title: 'MSI-H Colorectal Cancer Immunotherapy Study', min_age: 18,
                   enrollment_target: 100, enrolled: 45, sponsor: 'NCI'}
        """,

        # Study Sites
        """
        MERGE (s1:StudySite {id: 'DFCI'})
        SET s1 += {name: 'Dana-Farber Cancer Institute', city: 'Boston', state: 'MA',
                   lat: 42.3376, lon: -71.1083, active_trials: 4}
        """,
        """
        MERGE (s2:StudySite {id: 'MDACC'})
        SET s2 += {name: 'MD Anderson Cancer Center', city: 'Houston', state: 'TX',
                   lat: 29.7066, lon: -95.3990, active_trials: 6}
        """,
        """
        MERGE (s3:StudySite {id: 'MSK'})
        SET s3 += {name: 'Memorial Sloan Kettering', city: 'New York', state: 'NY',
                   lat: 40.7644, lon: -73.9581, active_trials: 5}
        """,

        # Patient-Diagnosis relationships
        """MATCH (p:Patient {id: 'P001'}), (d:Diagnosis {code: 'C50'}) MERGE (p)-[:HAS_DIAGNOSIS]->(d)""",
        """MATCH (p:Patient {id: 'P002'}), (d:Diagnosis {code: 'C61'}) MERGE (p)-[:HAS_DIAGNOSIS]->(d)""",
        """MATCH (p:Patient {id: 'P003'}), (d:Diagnosis {code: 'C50'}) MERGE (p)-[:HAS_DIAGNOSIS]->(d)""",
        """MATCH (p:Patient {id: 'P004'}), (d:Diagnosis {code: 'C34'}) MERGE (p)-[:HAS_DIAGNOSIS]->(d)""",
        """MATCH (p:Patient {id: 'P005'}), (d:Diagnosis {code: 'C18'}) MERGE (p)-[:HAS_DIAGNOSIS]->(d)""",

        # Patient-Biomarker relationships
        """MATCH (p:Patient {id: 'P001'}), (b:Biomarker {id: 'HER2_POS'}) MERGE (p)-[:HAS_BIOMARKER]->(b)""",
        """MATCH (p:Patient {id: 'P002'}), (b:Biomarker {id: 'BRCA2_POS'}) MERGE (p)-[:HAS_BIOMARKER]->(b)""",
        """MATCH (p:Patient {id: 'P004'}), (b:Biomarker {id: 'EGFR_L858R'}) MERGE (p)-[:HAS_BIOMARKER]->(b)""",
        """MATCH (p:Patient {id: 'P004'}), (b:Biomarker {id: 'PDL1_HIGH'}) MERGE (p)-[:HAS_BIOMARKER]->(b)""",
        """MATCH (p:Patient {id: 'P005'}), (b:Biomarker {id: 'MSI_H'}) MERGE (p)-[:HAS_BIOMARKER]->(b)""",

        # Diagnosis-Trial eligibility
        """MATCH (d:Diagnosis {code: 'C50'}), (t:Trial {id: 'NCT04889131'}) MERGE (d)-[:ELIGIBLE_FOR]->(t)""",
        """MATCH (d:Diagnosis {code: 'C50'}), (t:Trial {id: 'NCT05123456'}) MERGE (d)-[:ELIGIBLE_FOR]->(t)""",
        """MATCH (d:Diagnosis {code: 'C61'}), (t:Trial {id: 'NCT05456789'}) MERGE (d)-[:ELIGIBLE_FOR]->(t)""",
        """MATCH (d:Diagnosis {code: 'C34'}), (t:Trial {id: 'NCT06112233'}) MERGE (d)-[:ELIGIBLE_FOR]->(t)""",
        """MATCH (d:Diagnosis {code: 'C18'}), (t:Trial {id: 'NCT05334455'}) MERGE (d)-[:ELIGIBLE_FOR]->(t)""",

        # Trial-Site relationships
        """MATCH (t:Trial {id: 'NCT04889131'}), (s:StudySite {id: 'DFCI'}) MERGE (t)-[:CONDUCTED_AT]->(s)""",
        """MATCH (t:Trial {id: 'NCT04889131'}), (s:StudySite {id: 'MSK'}) MERGE (t)-[:CONDUCTED_AT]->(s)""",
        """MATCH (t:Trial {id: 'NCT05123456'}), (s:StudySite {id: 'MDACC'}) MERGE (t)-[:CONDUCTED_AT]->(s)""",
        """MATCH (t:Trial {id: 'NCT05123456'}), (s:StudySite {id: 'MSK'}) MERGE (t)-[:CONDUCTED_AT]->(s)""",
        """MATCH (t:Trial {id: 'NCT05456789'}), (s:StudySite {id: 'MDACC'}) MERGE (t)-[:CONDUCTED_AT]->(s)""",

        # Biomarker-Trial requirements
        """MATCH (b:Biomarker {id: 'HER2_POS'}), (t:Trial {id: 'NCT04889131'}) MERGE (b)-[:REQUIRED_FOR]->(t)""",
        """MATCH (b:Biomarker {id: 'EGFR_L858R'}), (t:Trial {id: 'NCT06112233'}) MERGE (b)-[:REQUIRED_FOR]->(t)""",
        """MATCH (b:Biomarker {id: 'MSI_H'}), (t:Trial {id: 'NCT05334455'}) MERGE (b)-[:REQUIRED_FOR]->(t)""",
    ]

    for query in queries:
        try:
            neo4j_conn.run_query(query)
        except Exception as e:
            print(f"Ingestion warning: {e}")

    print("Rich sample data ingested successfully.")


if __name__ == "__main__":
    ingest_sample_data()
    neo4j_conn.close()