File size: 11,641 Bytes
2ed7323
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
#!/usr/bin/env python3
"""
Generate sample data for testing the Clinical Trial Matching Pipeline
"""

import pandas as pd
from datetime import datetime, timedelta

def create_sample_trials():
    """Create a sample trial database CSV."""
    
    trials = [
        {
            'nct_id': 'NCT12345678',
            'this_space': '''Metastatic non-small cell lung cancer (NSCLC) with EGFR exon 19 deletion or L858R mutation
Prior treatment: At least one prior platinum-based chemotherapy regimen
ECOG performance status: 0-2
Measurable disease per RECIST v1.1
Adequate organ function''',
            'trial_text': '''Phase III randomized study of osimertinib versus platinum-based chemotherapy in patients with 
EGFR-mutated metastatic NSCLC who have progressed on first-line EGFR TKI therapy. Primary endpoint is progression-free 
survival. Secondary endpoints include overall survival, objective response rate, and quality of life.''',
            'trial_boilerplate_text': '''No active brain metastases requiring immediate intervention
No prior treatment with third-generation EGFR TKIs
No interstitial lung disease or pneumonitis
No congestive heart failure NYHA class III-IV
No HIV, hepatitis B, or hepatitis C infection'''
        },
        {
            'nct_id': 'NCT23456789',
            'this_space': '''HER2-positive metastatic breast cancer
Prior treatment: Trastuzumab and pertuzumab in any setting
ECOG performance status: 0-1
Brain metastases allowed if treated and stable
LVEF β‰₯50%''',
            'trial_text': '''Phase II study of trastuzumab deruxtecan in HER2-positive metastatic breast cancer patients 
who have received prior trastuzumab and pertuzumab. Primary endpoint is objective response rate. Key secondary endpoints 
include duration of response, progression-free survival, and safety.''',
            'trial_boilerplate_text': '''No history of pneumonitis or interstitial lung disease
No concurrent cardiac dysfunction
No active hepatitis B or C infection
No pregnancy or breastfeeding'''
        },
        {
            'nct_id': 'NCT34567890',
            'this_space': '''Advanced melanoma with BRAF V600E or V600K mutation
Treatment-naive for metastatic disease (adjuvant therapy allowed if completed >6 months prior)
ECOG performance status: 0-1
No active autoimmune disease requiring systemic therapy
Adequate bone marrow, hepatic, and renal function''',
            'trial_text': '''Phase III randomized trial comparing dabrafenib plus trametinib versus vemurafenib monotherapy 
in previously untreated BRAF-mutant metastatic melanoma. Primary endpoint is overall survival. Secondary endpoints include 
progression-free survival, response rate, and toxicity.''',
            'trial_boilerplate_text': '''No prior systemic therapy for metastatic melanoma
No active brain metastases (treated and stable brain metastases allowed)
No history of inflammatory bowel disease
No significant cardiac disease
No HIV infection on antiretroviral therapy'''
        },
        {
            'nct_id': 'NCT45678901',
            'this_space': '''Microsatellite instability-high (MSI-H) or mismatch repair deficient (dMMR) advanced solid tumors
Progressive disease on or after prior standard therapy
ECOG performance status: 0-2
Measurable disease per RECIST v1.1
No prior checkpoint inhibitor therapy''',
            'trial_text': '''Phase II basket study of pembrolizumab in patients with MSI-H/dMMR advanced solid tumors. 
Primary endpoint is objective response rate by tumor type. Secondary endpoints include duration of response, 
progression-free survival, and overall survival.''',
            'trial_boilerplate_text': '''No active autoimmune disease requiring systemic therapy
No history of severe immune-related adverse events
No active pneumonitis or interstitial lung disease
No concurrent systemic corticosteroids (>10mg prednisone equivalent daily)
No HIV, hepatitis B, or hepatitis C infection'''
        },
        {
            'nct_id': 'NCT56789012',
            'this_space': '''Advanced or metastatic renal cell carcinoma (RCC), clear cell histology
No prior systemic therapy for advanced disease
Intermediate or poor risk per IMDC criteria
ECOG performance status: 0-1
Measurable disease per RECIST v1.1''',
            'trial_text': '''Phase III randomized study of cabozantinib plus nivolumab versus sunitinib in previously 
untreated advanced RCC. Primary endpoint is progression-free survival. Secondary endpoints include overall survival, 
objective response rate, and safety.''',
            'trial_boilerplate_text': '''No prior systemic therapy for metastatic RCC
No active brain metastases
No history of bowel perforation or fistula
No poorly controlled hypertension
No active hepatitis B or C infection
No significant cardiovascular disease'''
        }
    ]
    
    df = pd.DataFrame(trials)
    df.to_csv('sample_trials.csv', index=False)
    print(f"βœ“ Created sample_trials.csv with {len(df)} trials")
    return df

def create_sample_patient_notes():
    """Create sample patient clinical notes CSV."""
    
    base_date = datetime(2023, 1, 1)
    
    notes = [
        {
            'date': base_date,
            'text': 'Patient is a 67-year-old male with a 40 pack-year smoking history presenting with cough and weight loss. CT chest shows a 4.5 cm right upper lobe mass with mediastinal lymphadenopathy.',
            'note_type': 'clinical_note'
        },
        {
            'date': base_date + timedelta(days=7),
            'text': 'CT-guided lung biopsy performed. Pathology shows adenocarcinoma, moderately differentiated.',
            'note_type': 'pathology_report'
        },
        {
            'date': base_date + timedelta(days=14),
            'text': 'PET/CT shows FDG-avid right upper lobe mass (SUVmax 12.3), right hilar nodes (SUVmax 8.7), and mediastinal nodes (SUVmax 9.2). No distant metastatic disease identified.',
            'note_type': 'imaging_report'
        },
        {
            'date': base_date + timedelta(days=21),
            'text': '''Next-generation sequencing (NGS) performed on lung biopsy specimen. 
Results: EGFR exon 19 deletion (L747_A750delinsP) detected.
Other findings: TP53 p.R273H mutation, MYC amplification (copy number gain).
PD-L1 expression by immunohistochemistry: 75% tumor proportion score.
TMB: 4 mutations/Mb (low).
No ALK, ROS1, BRAF, MET, RET, or KRAS alterations detected.''',
            'note_type': 'ngs_report'
        },
        {
            'date': base_date + timedelta(days=28),
            'text': 'Mediastinoscopy with biopsy of station 4R and 7 lymph nodes. Pathology confirms metastatic adenocarcinoma. Clinical stage: T2aN2M0, stage IIIA.',
            'note_type': 'pathology_report'
        },
        {
            'date': base_date + timedelta(days=42),
            'text': 'Patient underwent concurrent chemoradiation with carboplatin/pemetrexed and 60 Gy radiation to primary tumor and mediastinum. Tolerated well with grade 2 esophagitis.',
            'note_type': 'clinical_note'
        },
        {
            'date': base_date + timedelta(days=112),
            'text': 'Post-treatment CT chest shows near-complete response of primary tumor (now 1.2 cm) and resolution of lymphadenopathy. Started consolidation durvalumab.',
            'note_type': 'imaging_report'
        },
        {
            'date': base_date + timedelta(days=280),
            'text': 'Surveillance CT shows new liver lesions (segment 6 and 7, largest 2.3 cm) and increase in size of lung primary to 3.1 cm. Progression of disease.',
            'note_type': 'imaging_report'
        },
        {
            'date': base_date + timedelta(days=287),
            'text': 'Patient now has metastatic NSCLC (stage IV). ECOG performance status 1. Discussed treatment options. Given EGFR mutation, recommend EGFR TKI therapy.',
            'note_type': 'clinical_note'
        },
        {
            'date': base_date + timedelta(days=294),
            'text': 'Started osimertinib 80 mg daily for EGFR-mutant metastatic NSCLC.',
            'note_type': 'clinical_note'
        },
        {
            'date': base_date + timedelta(days=378),
            'text': 'Restaging CT shows partial response. Liver lesions decreased to 1.2 and 0.9 cm. Primary lung tumor stable at 2.8 cm. Tolerating osimertinib well with mild diarrhea and dry skin.',
            'note_type': 'imaging_report'
        },
        {
            'date': base_date + timedelta(days=560),
            'text': 'Patient reports increased fatigue and back pain over past 3 weeks.',
            'note_type': 'clinical_note'
        },
        {
            'date': base_date + timedelta(days=567),
            'text': '''CT chest/abdomen/pelvis shows:
- Progression of liver metastases (segment 6: 3.8 cm, previously 1.2 cm; segment 7: 2.9 cm, previously 0.9 cm)
- New liver lesions in segments 4 and 5
- Lung primary increased to 4.2 cm
- New small pleural effusion
Assessment: Progressive disease on osimertinib.''',
            'note_type': 'imaging_report'
        },
        {
            'date': base_date + timedelta(days=574),
            'text': 'MRI brain with contrast shows no brain metastases. Patient has progressive EGFR-mutant NSCLC after first-line osimertinib. ECOG PS 1. Discussing clinical trial options for second-line therapy.',
            'note_type': 'clinical_note'
        }
    ]
    
    df = pd.DataFrame(notes)
    df.to_csv('sample_patient_notes.csv', index=False)
    print(f"βœ“ Created sample_patient_notes.csv with {len(df)} notes")
    return df

def create_sample_patient_summary():
    """Create a sample patient summary text file."""
    
    summary = """Cancer type: Non-small cell lung cancer (NSCLC)
Histology: Adenocarcinoma, moderately differentiated
Stage at diagnosis: Stage IIIA (T2aN2M0)
Current extent: Metastatic (stage IV) with liver metastases

Biomarkers:
- EGFR exon 19 deletion (L747_A750delinsP)
- TP53 p.R273H mutation
- MYC amplification
- PD-L1 75% TPS
- TMB: 4 mutations/Mb (low)

Treatment history:
# 1/28/2023 - 4/15/2023: Concurrent chemoradiation (carboplatin/pemetrexed with 60 Gy)
# 4/22/2023 - 10/5/2023: Consolidation durvalumab
# 10/19/2023 - present: Osimertinib 80 mg daily for metastatic disease

Disease course:
- Initial diagnosis: January 2023, stage IIIA
- Near-complete response to chemoradiation
- Progression to stage IV in September 2023 (liver metastases)
- Partial response to osimertinib
- Current progression on osimertinib (July 2024) after ~9 months of therapy

Current status:
- ECOG performance status: 1
- Progressive disease with liver metastases
- No brain metastases on recent MRI

Boilerplate:
No evidence of brain metastases (MRI brain 7/22/2024).
No history of pneumonitis, interstitial lung disease, congestive heart failure, HIV, or hepatitis infection documented.
Adequate performance status (ECOG 1).
"""
    
    with open('sample_patient_summary.txt', 'w') as f:
        f.write(summary)
    
    print(f"βœ“ Created sample_patient_summary.txt")
    return summary

if __name__ == "__main__":
    print("Generating sample data for Clinical Trial Matching Pipeline...\n")
    
    create_sample_trials()
    create_sample_patient_notes()
    create_sample_patient_summary()
    
    print("\nβœ“ All sample files created successfully!")
    print("\nFiles generated:")
    print("  - sample_trials.csv (5 clinical trials)")
    print("  - sample_patient_notes.csv (14 clinical notes)")
    print("  - sample_patient_summary.txt (pre-made summary)")
    print("\nYou can now use these files to test the Gradio application.")