Spaces:
Running
on
L4
Running
on
L4
| #!/usr/bin/env python3 | |
| """ | |
| Generate sample data for testing the Clinical Trial Matching Pipeline | |
| """ | |
| import pandas as pd | |
| from datetime import datetime, timedelta | |
| def create_sample_trials(): | |
| """Create a sample trial database CSV.""" | |
| trials = [ | |
| { | |
| 'nct_id': 'NCT12345678', | |
| 'this_space': '''Metastatic non-small cell lung cancer (NSCLC) with EGFR exon 19 deletion or L858R mutation | |
| Prior treatment: At least one prior platinum-based chemotherapy regimen | |
| ECOG performance status: 0-2 | |
| Measurable disease per RECIST v1.1 | |
| Adequate organ function''', | |
| 'trial_text': '''Phase III randomized study of osimertinib versus platinum-based chemotherapy in patients with | |
| EGFR-mutated metastatic NSCLC who have progressed on first-line EGFR TKI therapy. Primary endpoint is progression-free | |
| survival. Secondary endpoints include overall survival, objective response rate, and quality of life.''', | |
| 'trial_boilerplate_text': '''No active brain metastases requiring immediate intervention | |
| No prior treatment with third-generation EGFR TKIs | |
| No interstitial lung disease or pneumonitis | |
| No congestive heart failure NYHA class III-IV | |
| No HIV, hepatitis B, or hepatitis C infection''' | |
| }, | |
| { | |
| 'nct_id': 'NCT23456789', | |
| 'this_space': '''HER2-positive metastatic breast cancer | |
| Prior treatment: Trastuzumab and pertuzumab in any setting | |
| ECOG performance status: 0-1 | |
| Brain metastases allowed if treated and stable | |
| LVEF ≥50%''', | |
| 'trial_text': '''Phase II study of trastuzumab deruxtecan in HER2-positive metastatic breast cancer patients | |
| who have received prior trastuzumab and pertuzumab. Primary endpoint is objective response rate. Key secondary endpoints | |
| include duration of response, progression-free survival, and safety.''', | |
| 'trial_boilerplate_text': '''No history of pneumonitis or interstitial lung disease | |
| No concurrent cardiac dysfunction | |
| No active hepatitis B or C infection | |
| No pregnancy or breastfeeding''' | |
| }, | |
| { | |
| 'nct_id': 'NCT34567890', | |
| 'this_space': '''Advanced melanoma with BRAF V600E or V600K mutation | |
| Treatment-naive for metastatic disease (adjuvant therapy allowed if completed >6 months prior) | |
| ECOG performance status: 0-1 | |
| No active autoimmune disease requiring systemic therapy | |
| Adequate bone marrow, hepatic, and renal function''', | |
| 'trial_text': '''Phase III randomized trial comparing dabrafenib plus trametinib versus vemurafenib monotherapy | |
| in previously untreated BRAF-mutant metastatic melanoma. Primary endpoint is overall survival. Secondary endpoints include | |
| progression-free survival, response rate, and toxicity.''', | |
| 'trial_boilerplate_text': '''No prior systemic therapy for metastatic melanoma | |
| No active brain metastases (treated and stable brain metastases allowed) | |
| No history of inflammatory bowel disease | |
| No significant cardiac disease | |
| No HIV infection on antiretroviral therapy''' | |
| }, | |
| { | |
| 'nct_id': 'NCT45678901', | |
| 'this_space': '''Microsatellite instability-high (MSI-H) or mismatch repair deficient (dMMR) advanced solid tumors | |
| Progressive disease on or after prior standard therapy | |
| ECOG performance status: 0-2 | |
| Measurable disease per RECIST v1.1 | |
| No prior checkpoint inhibitor therapy''', | |
| 'trial_text': '''Phase II basket study of pembrolizumab in patients with MSI-H/dMMR advanced solid tumors. | |
| Primary endpoint is objective response rate by tumor type. Secondary endpoints include duration of response, | |
| progression-free survival, and overall survival.''', | |
| 'trial_boilerplate_text': '''No active autoimmune disease requiring systemic therapy | |
| No history of severe immune-related adverse events | |
| No active pneumonitis or interstitial lung disease | |
| No concurrent systemic corticosteroids (>10mg prednisone equivalent daily) | |
| No HIV, hepatitis B, or hepatitis C infection''' | |
| }, | |
| { | |
| 'nct_id': 'NCT56789012', | |
| 'this_space': '''Advanced or metastatic renal cell carcinoma (RCC), clear cell histology | |
| No prior systemic therapy for advanced disease | |
| Intermediate or poor risk per IMDC criteria | |
| ECOG performance status: 0-1 | |
| Measurable disease per RECIST v1.1''', | |
| 'trial_text': '''Phase III randomized study of cabozantinib plus nivolumab versus sunitinib in previously | |
| untreated advanced RCC. Primary endpoint is progression-free survival. Secondary endpoints include overall survival, | |
| objective response rate, and safety.''', | |
| 'trial_boilerplate_text': '''No prior systemic therapy for metastatic RCC | |
| No active brain metastases | |
| No history of bowel perforation or fistula | |
| No poorly controlled hypertension | |
| No active hepatitis B or C infection | |
| No significant cardiovascular disease''' | |
| } | |
| ] | |
| df = pd.DataFrame(trials) | |
| df.to_csv('sample_trials.csv', index=False) | |
| print(f"✓ Created sample_trials.csv with {len(df)} trials") | |
| return df | |
| def create_sample_patient_notes(): | |
| """Create sample patient clinical notes CSV.""" | |
| base_date = datetime(2023, 1, 1) | |
| notes = [ | |
| { | |
| 'date': base_date, | |
| 'text': 'Patient is a 67-year-old male with a 40 pack-year smoking history presenting with cough and weight loss. CT chest shows a 4.5 cm right upper lobe mass with mediastinal lymphadenopathy.', | |
| 'note_type': 'clinical_note' | |
| }, | |
| { | |
| 'date': base_date + timedelta(days=7), | |
| 'text': 'CT-guided lung biopsy performed. Pathology shows adenocarcinoma, moderately differentiated.', | |
| 'note_type': 'pathology_report' | |
| }, | |
| { | |
| 'date': base_date + timedelta(days=14), | |
| 'text': 'PET/CT shows FDG-avid right upper lobe mass (SUVmax 12.3), right hilar nodes (SUVmax 8.7), and mediastinal nodes (SUVmax 9.2). No distant metastatic disease identified.', | |
| 'note_type': 'imaging_report' | |
| }, | |
| { | |
| 'date': base_date + timedelta(days=21), | |
| 'text': '''Next-generation sequencing (NGS) performed on lung biopsy specimen. | |
| Results: EGFR exon 19 deletion (L747_A750delinsP) detected. | |
| Other findings: TP53 p.R273H mutation, MYC amplification (copy number gain). | |
| PD-L1 expression by immunohistochemistry: 75% tumor proportion score. | |
| TMB: 4 mutations/Mb (low). | |
| No ALK, ROS1, BRAF, MET, RET, or KRAS alterations detected.''', | |
| 'note_type': 'ngs_report' | |
| }, | |
| { | |
| 'date': base_date + timedelta(days=28), | |
| 'text': 'Mediastinoscopy with biopsy of station 4R and 7 lymph nodes. Pathology confirms metastatic adenocarcinoma. Clinical stage: T2aN2M0, stage IIIA.', | |
| 'note_type': 'pathology_report' | |
| }, | |
| { | |
| 'date': base_date + timedelta(days=42), | |
| 'text': 'Patient underwent concurrent chemoradiation with carboplatin/pemetrexed and 60 Gy radiation to primary tumor and mediastinum. Tolerated well with grade 2 esophagitis.', | |
| 'note_type': 'clinical_note' | |
| }, | |
| { | |
| 'date': base_date + timedelta(days=112), | |
| 'text': 'Post-treatment CT chest shows near-complete response of primary tumor (now 1.2 cm) and resolution of lymphadenopathy. Started consolidation durvalumab.', | |
| 'note_type': 'imaging_report' | |
| }, | |
| { | |
| 'date': base_date + timedelta(days=280), | |
| 'text': 'Surveillance CT shows new liver lesions (segment 6 and 7, largest 2.3 cm) and increase in size of lung primary to 3.1 cm. Progression of disease.', | |
| 'note_type': 'imaging_report' | |
| }, | |
| { | |
| 'date': base_date + timedelta(days=287), | |
| 'text': 'Patient now has metastatic NSCLC (stage IV). ECOG performance status 1. Discussed treatment options. Given EGFR mutation, recommend EGFR TKI therapy.', | |
| 'note_type': 'clinical_note' | |
| }, | |
| { | |
| 'date': base_date + timedelta(days=294), | |
| 'text': 'Started osimertinib 80 mg daily for EGFR-mutant metastatic NSCLC.', | |
| 'note_type': 'clinical_note' | |
| }, | |
| { | |
| 'date': base_date + timedelta(days=378), | |
| 'text': 'Restaging CT shows partial response. Liver lesions decreased to 1.2 and 0.9 cm. Primary lung tumor stable at 2.8 cm. Tolerating osimertinib well with mild diarrhea and dry skin.', | |
| 'note_type': 'imaging_report' | |
| }, | |
| { | |
| 'date': base_date + timedelta(days=560), | |
| 'text': 'Patient reports increased fatigue and back pain over past 3 weeks.', | |
| 'note_type': 'clinical_note' | |
| }, | |
| { | |
| 'date': base_date + timedelta(days=567), | |
| 'text': '''CT chest/abdomen/pelvis shows: | |
| - Progression of liver metastases (segment 6: 3.8 cm, previously 1.2 cm; segment 7: 2.9 cm, previously 0.9 cm) | |
| - New liver lesions in segments 4 and 5 | |
| - Lung primary increased to 4.2 cm | |
| - New small pleural effusion | |
| Assessment: Progressive disease on osimertinib.''', | |
| 'note_type': 'imaging_report' | |
| }, | |
| { | |
| 'date': base_date + timedelta(days=574), | |
| 'text': 'MRI brain with contrast shows no brain metastases. Patient has progressive EGFR-mutant NSCLC after first-line osimertinib. ECOG PS 1. Discussing clinical trial options for second-line therapy.', | |
| 'note_type': 'clinical_note' | |
| } | |
| ] | |
| df = pd.DataFrame(notes) | |
| df.to_csv('sample_patient_notes.csv', index=False) | |
| print(f"✓ Created sample_patient_notes.csv with {len(df)} notes") | |
| return df | |
| def create_sample_patient_summary(): | |
| """Create a sample patient summary text file.""" | |
| summary = """Age: 67 | |
| Sex: Male | |
| Cancer type: Non-small cell lung cancer (NSCLC) | |
| Histology: Adenocarcinoma, moderately differentiated | |
| Stage at diagnosis: Stage IIIA (T2aN2M0) | |
| Current extent: Metastatic (stage IV) with liver metastases | |
| Biomarkers: | |
| - EGFR exon 19 deletion (L747_A750delinsP) | |
| - TP53 p.R273H mutation | |
| - MYC amplification | |
| - PD-L1 75% TPS | |
| - TMB: 4 mutations/Mb (low) | |
| Treatment history: | |
| # 1/28/2023 - 4/15/2023: Concurrent chemoradiation (carboplatin/pemetrexed with 60 Gy) | |
| # 4/22/2023 - 10/5/2023: Consolidation durvalumab | |
| # 10/19/2023 - present: Osimertinib 80 mg daily for metastatic disease | |
| Disease course: | |
| - Initial diagnosis: January 2023, stage IIIA | |
| - Near-complete response to chemoradiation | |
| - Progression to stage IV in September 2023 (liver metastases) | |
| - Partial response to osimertinib | |
| - Current progression on osimertinib (July 2024) after ~9 months of therapy | |
| Current status: | |
| - ECOG performance status: 1 | |
| - Progressive disease with liver metastases | |
| - No brain metastases on recent MRI | |
| Boilerplate: | |
| No evidence of brain metastases (MRI brain 7/22/2024). | |
| No history of pneumonitis, interstitial lung disease, congestive heart failure, HIV, or hepatitis infection documented. | |
| Adequate performance status (ECOG 1). | |
| """ | |
| with open('sample_patient_summary.txt', 'w') as f: | |
| f.write(summary) | |
| print(f"✓ Created sample_patient_summary.txt") | |
| return summary | |
| if __name__ == "__main__": | |
| print("Generating sample data for Clinical Trial Matching Pipeline...\n") | |
| create_sample_trials() | |
| create_sample_patient_notes() | |
| create_sample_patient_summary() | |
| print("\n✓ All sample files created successfully!") | |
| print("\nFiles generated:") | |
| print(" - sample_trials.csv (5 clinical trials)") | |
| print(" - sample_patient_notes.csv (14 clinical notes)") | |
| print(" - sample_patient_summary.txt (pre-made summary)") | |
| print("\nYou can now use these files to test the Gradio application.") | |