yakilee Claude Opus 4.6 commited on
Commit
434ddb9
·
1 Parent(s): ec40c1c

feat: enhance mock data with evidence pointers and MOCK-NCT IDs

Browse files

- Add EvidencePointer and TrialEvidencePointer to all criteria for
end-to-end traceability (PRD requirement)
- Use MOCK-NCT prefix on trial IDs to clearly distinguish synthetic data
- Add SourceDocument references for clinic letter, pathology, and labs
- Structure criteria into named variables for readability

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. app/services/mock_data.py +183 -69
app/services/mock_data.py CHANGED
@@ -2,6 +2,10 @@
2
 
3
  Provides realistic sample PatientProfile, TrialCandidates, and EligibilityLedgers
4
  so the UI is fully demonstrable without real MedGemma/Gemini/MCP calls.
 
 
 
 
5
  """
6
 
7
  from __future__ import annotations
@@ -13,28 +17,72 @@ from trialpath.models import (
13
  Demographics,
14
  Diagnosis,
15
  EligibilityLedger,
 
16
  GapItem,
17
  OverallAssessment,
18
  PatientProfile,
19
  PerformanceStatus,
 
20
  Treatment,
21
  TrialCandidate,
 
22
  UnknownField,
23
  )
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  MOCK_PATIENT_PROFILE = PatientProfile(
26
  patient_id="MOCK-P001",
 
27
  demographics=Demographics(age=62, sex="Female"),
28
  diagnosis=Diagnosis(
29
  primary_condition="Non-Small Cell Lung Cancer",
30
  histology="adenocarcinoma",
31
  stage="IIIB",
32
  ),
33
- performance_status=PerformanceStatus(scale="ECOG", value=1),
 
 
 
 
34
  biomarkers=[
35
- Biomarker(name="EGFR", result="Exon 19 deletion"),
36
- Biomarker(name="ALK", result="Negative"),
37
- Biomarker(name="PD-L1", result="45%"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  ],
39
  treatments=[
40
  Treatment(drug_name="Carboplatin", line=1),
@@ -46,9 +94,12 @@ MOCK_PATIENT_PROFILE = PatientProfile(
46
  ],
47
  )
48
 
 
 
 
49
  MOCK_TRIAL_CANDIDATES = [
50
  TrialCandidate(
51
- nct_id="NCT04000001",
52
  title="KEYNOTE-999: Pembrolizumab + Chemo for NSCLC",
53
  conditions=["NSCLC"],
54
  phase="Phase 3",
@@ -56,7 +107,7 @@ MOCK_TRIAL_CANDIDATES = [
56
  fingerprint_text="Pembrolizumab combination therapy advanced NSCLC",
57
  ),
58
  TrialCandidate(
59
- nct_id="NCT04000002",
60
  title="FLAURA-2: Osimertinib + Chemo in EGFR-mutated NSCLC",
61
  conditions=["NSCLC", "EGFR mutation"],
62
  phase="Phase 3",
@@ -64,7 +115,7 @@ MOCK_TRIAL_CANDIDATES = [
64
  fingerprint_text="Osimertinib first-line EGFR mutated advanced NSCLC",
65
  ),
66
  TrialCandidate(
67
- nct_id="NCT04000003",
68
  title="CheckMate-817: Nivolumab + Ipilimumab in Advanced NSCLC",
69
  conditions=["NSCLC"],
70
  phase="Phase 3",
@@ -73,37 +124,132 @@ MOCK_TRIAL_CANDIDATES = [
73
  ),
74
  ]
75
 
76
- MOCK_ELIGIBILITY_LEDGERS = [
77
- EligibilityLedger(
78
- patient_id="MOCK-P001",
79
- nct_id="NCT04000001",
80
- overall_assessment=OverallAssessment.UNCERTAIN,
81
- criteria=[
82
- CriterionAssessment(
83
- criterion_id="inc_1",
84
- type="inclusion",
85
- text="Confirmed NSCLC diagnosis",
86
- decision=CriterionDecision.MET,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  ),
88
- CriterionAssessment(
89
- criterion_id="inc_2",
90
- type="inclusion",
91
- text="ECOG performance status 0-1",
92
- decision=CriterionDecision.MET,
 
 
 
 
 
 
 
 
93
  ),
94
- CriterionAssessment(
95
- criterion_id="inc_3",
96
- type="inclusion",
97
- text="PD-L1 TPS >= 50%",
98
- decision=CriterionDecision.NOT_MET,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  ),
100
- CriterionAssessment(
101
- criterion_id="exc_1",
102
- type="exclusion",
103
- text="No active brain metastases",
104
- decision=CriterionDecision.UNKNOWN,
 
 
 
 
 
 
 
 
105
  ),
106
  ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  gaps=[
108
  GapItem(
109
  description="Brain MRI results needed",
@@ -114,48 +260,16 @@ MOCK_ELIGIBILITY_LEDGERS = [
114
  ),
115
  EligibilityLedger(
116
  patient_id="MOCK-P001",
117
- nct_id="NCT04000002",
118
  overall_assessment=OverallAssessment.LIKELY_ELIGIBLE,
119
- criteria=[
120
- CriterionAssessment(
121
- criterion_id="inc_1",
122
- type="inclusion",
123
- text="Confirmed NSCLC with EGFR mutation",
124
- decision=CriterionDecision.MET,
125
- ),
126
- CriterionAssessment(
127
- criterion_id="inc_2",
128
- type="inclusion",
129
- text="ECOG 0-1",
130
- decision=CriterionDecision.MET,
131
- ),
132
- CriterionAssessment(
133
- criterion_id="inc_3",
134
- type="inclusion",
135
- text="Locally advanced or metastatic",
136
- decision=CriterionDecision.MET,
137
- ),
138
- ],
139
  gaps=[],
140
  ),
141
  EligibilityLedger(
142
  patient_id="MOCK-P001",
143
- nct_id="NCT04000003",
144
  overall_assessment=OverallAssessment.LIKELY_INELIGIBLE,
145
- criteria=[
146
- CriterionAssessment(
147
- criterion_id="inc_1",
148
- type="inclusion",
149
- text="Stage IV NSCLC",
150
- decision=CriterionDecision.NOT_MET,
151
- ),
152
- CriterionAssessment(
153
- criterion_id="inc_2",
154
- type="inclusion",
155
- text="No prior immunotherapy",
156
- decision=CriterionDecision.UNKNOWN,
157
- ),
158
- ],
159
  gaps=[
160
  GapItem(
161
  description="KRAS mutation status unknown",
 
2
 
3
  Provides realistic sample PatientProfile, TrialCandidates, and EligibilityLedgers
4
  so the UI is fully demonstrable without real MedGemma/Gemini/MCP calls.
5
+
6
+ NOTE: NCT IDs use a MOCK-NCT prefix to clearly indicate synthetic data.
7
+ Real trial names are used for demo realism, but the IDs are not real
8
+ ClinicalTrials.gov identifiers.
9
  """
10
 
11
  from __future__ import annotations
 
17
  Demographics,
18
  Diagnosis,
19
  EligibilityLedger,
20
+ EvidencePointer,
21
  GapItem,
22
  OverallAssessment,
23
  PatientProfile,
24
  PerformanceStatus,
25
+ SourceDocument,
26
  Treatment,
27
  TrialCandidate,
28
+ TrialEvidencePointer,
29
  UnknownField,
30
  )
31
 
32
+ # ---------------------------------------------------------------------------
33
+ # Source documents (synthetic)
34
+ # ---------------------------------------------------------------------------
35
+ _CLINIC_LETTER = SourceDocument(
36
+ doc_id="clinic_letter_2026-01.pdf",
37
+ type="clinic_letter",
38
+ meta={"author": "Dr. Mueller", "date": "2026-01-15"},
39
+ )
40
+ _PATHOLOGY_REPORT = SourceDocument(
41
+ doc_id="pathology_2025-11.pdf",
42
+ type="pathology",
43
+ meta={"lab": "Charite Berlin", "date": "2025-11-20"},
44
+ )
45
+ _LAB_RESULTS = SourceDocument(
46
+ doc_id="labs_2026-01.pdf",
47
+ type="lab",
48
+ meta={"date": "2026-01-28"},
49
+ )
50
+
51
+ # ---------------------------------------------------------------------------
52
+ # Patient profile
53
+ # ---------------------------------------------------------------------------
54
  MOCK_PATIENT_PROFILE = PatientProfile(
55
  patient_id="MOCK-P001",
56
+ source_docs=[_CLINIC_LETTER, _PATHOLOGY_REPORT, _LAB_RESULTS],
57
  demographics=Demographics(age=62, sex="Female"),
58
  diagnosis=Diagnosis(
59
  primary_condition="Non-Small Cell Lung Cancer",
60
  histology="adenocarcinoma",
61
  stage="IIIB",
62
  ),
63
+ performance_status=PerformanceStatus(
64
+ scale="ECOG",
65
+ value=1,
66
+ evidence=[EvidencePointer(doc_id="clinic_letter_2026-01.pdf", page=1)],
67
+ ),
68
  biomarkers=[
69
+ Biomarker(
70
+ name="EGFR",
71
+ result="Exon 19 deletion",
72
+ evidence=[
73
+ EvidencePointer(doc_id="pathology_2025-11.pdf", page=2, span_id="egfr_exon19")
74
+ ],
75
+ ),
76
+ Biomarker(
77
+ name="ALK",
78
+ result="Negative",
79
+ evidence=[EvidencePointer(doc_id="pathology_2025-11.pdf", page=2, span_id="alk_neg")],
80
+ ),
81
+ Biomarker(
82
+ name="PD-L1",
83
+ result="45%",
84
+ evidence=[EvidencePointer(doc_id="pathology_2025-11.pdf", page=3, span_id="pdl1_45")],
85
+ ),
86
  ],
87
  treatments=[
88
  Treatment(drug_name="Carboplatin", line=1),
 
94
  ],
95
  )
96
 
97
+ # ---------------------------------------------------------------------------
98
+ # Trial candidates
99
+ # ---------------------------------------------------------------------------
100
  MOCK_TRIAL_CANDIDATES = [
101
  TrialCandidate(
102
+ nct_id="MOCK-NCT-KEYNOTE999",
103
  title="KEYNOTE-999: Pembrolizumab + Chemo for NSCLC",
104
  conditions=["NSCLC"],
105
  phase="Phase 3",
 
107
  fingerprint_text="Pembrolizumab combination therapy advanced NSCLC",
108
  ),
109
  TrialCandidate(
110
+ nct_id="MOCK-NCT-FLAURA2",
111
  title="FLAURA-2: Osimertinib + Chemo in EGFR-mutated NSCLC",
112
  conditions=["NSCLC", "EGFR mutation"],
113
  phase="Phase 3",
 
115
  fingerprint_text="Osimertinib first-line EGFR mutated advanced NSCLC",
116
  ),
117
  TrialCandidate(
118
+ nct_id="MOCK-NCT-CM817",
119
  title="CheckMate-817: Nivolumab + Ipilimumab in Advanced NSCLC",
120
  conditions=["NSCLC"],
121
  phase="Phase 3",
 
124
  ),
125
  ]
126
 
127
+ # ---------------------------------------------------------------------------
128
+ # Eligibility ledgers -- evidence-linked per PRD requirement
129
+ # ---------------------------------------------------------------------------
130
+
131
+ # --- KEYNOTE-999: UNCERTAIN (PD-L1 not met at 45% vs >=50%, brain mets unknown) ---
132
+ _KEYNOTE_CRITERIA = [
133
+ CriterionAssessment(
134
+ criterion_id="inc_1",
135
+ type="inclusion",
136
+ text="Confirmed NSCLC diagnosis",
137
+ decision=CriterionDecision.MET,
138
+ patient_evidence=[
139
+ EvidencePointer(doc_id="pathology_2025-11.pdf", page=1, span_id="dx_nsclc"),
140
+ ],
141
+ trial_evidence=[
142
+ TrialEvidencePointer(field="eligibility_text.inclusion", offset_start=0, offset_end=42),
143
+ ],
144
+ ),
145
+ CriterionAssessment(
146
+ criterion_id="inc_2",
147
+ type="inclusion",
148
+ text="ECOG performance status 0-1",
149
+ decision=CriterionDecision.MET,
150
+ patient_evidence=[
151
+ EvidencePointer(doc_id="clinic_letter_2026-01.pdf", page=1, span_id="ecog_1"),
152
+ ],
153
+ trial_evidence=[
154
+ TrialEvidencePointer(
155
+ field="eligibility_text.inclusion", offset_start=43, offset_end=89
156
  ),
157
+ ],
158
+ ),
159
+ CriterionAssessment(
160
+ criterion_id="inc_3",
161
+ type="inclusion",
162
+ text="PD-L1 TPS >= 50%",
163
+ decision=CriterionDecision.NOT_MET,
164
+ patient_evidence=[
165
+ EvidencePointer(doc_id="pathology_2025-11.pdf", page=3, span_id="pdl1_45"),
166
+ ],
167
+ trial_evidence=[
168
+ TrialEvidencePointer(
169
+ field="eligibility_text.inclusion", offset_start=90, offset_end=130
170
  ),
171
+ ],
172
+ ),
173
+ CriterionAssessment(
174
+ criterion_id="exc_1",
175
+ type="exclusion",
176
+ text="No active brain metastases",
177
+ decision=CriterionDecision.UNKNOWN,
178
+ ),
179
+ ]
180
+
181
+ # --- FLAURA-2: LIKELY_ELIGIBLE (EGFR mutation confirmed, stage/ECOG met) ---
182
+ _FLAURA_CRITERIA = [
183
+ CriterionAssessment(
184
+ criterion_id="inc_1",
185
+ type="inclusion",
186
+ text="Confirmed NSCLC with EGFR mutation",
187
+ decision=CriterionDecision.MET,
188
+ patient_evidence=[
189
+ EvidencePointer(doc_id="pathology_2025-11.pdf", page=2, span_id="egfr_exon19"),
190
+ ],
191
+ trial_evidence=[
192
+ TrialEvidencePointer(field="eligibility_text.inclusion", offset_start=0, offset_end=54),
193
+ ],
194
+ ),
195
+ CriterionAssessment(
196
+ criterion_id="inc_2",
197
+ type="inclusion",
198
+ text="ECOG 0-1",
199
+ decision=CriterionDecision.MET,
200
+ patient_evidence=[
201
+ EvidencePointer(doc_id="clinic_letter_2026-01.pdf", page=1, span_id="ecog_1"),
202
+ ],
203
+ trial_evidence=[
204
+ TrialEvidencePointer(
205
+ field="eligibility_text.inclusion", offset_start=55, offset_end=78
206
  ),
207
+ ],
208
+ ),
209
+ CriterionAssessment(
210
+ criterion_id="inc_3",
211
+ type="inclusion",
212
+ text="Locally advanced or metastatic",
213
+ decision=CriterionDecision.MET,
214
+ patient_evidence=[
215
+ EvidencePointer(doc_id="clinic_letter_2026-01.pdf", page=1, span_id="stage_iiib"),
216
+ ],
217
+ trial_evidence=[
218
+ TrialEvidencePointer(
219
+ field="eligibility_text.inclusion", offset_start=79, offset_end=125
220
  ),
221
  ],
222
+ ),
223
+ ]
224
+
225
+ # --- CheckMate-817: LIKELY_INELIGIBLE (requires Stage IV, patient is IIIB) ---
226
+ _CM817_CRITERIA = [
227
+ CriterionAssessment(
228
+ criterion_id="inc_1",
229
+ type="inclusion",
230
+ text="Stage IV NSCLC",
231
+ decision=CriterionDecision.NOT_MET,
232
+ patient_evidence=[
233
+ EvidencePointer(doc_id="clinic_letter_2026-01.pdf", page=1, span_id="stage_iiib"),
234
+ ],
235
+ trial_evidence=[
236
+ TrialEvidencePointer(field="eligibility_text.inclusion", offset_start=0, offset_end=35),
237
+ ],
238
+ ),
239
+ CriterionAssessment(
240
+ criterion_id="inc_2",
241
+ type="inclusion",
242
+ text="No prior immunotherapy",
243
+ decision=CriterionDecision.UNKNOWN,
244
+ ),
245
+ ]
246
+
247
+ MOCK_ELIGIBILITY_LEDGERS = [
248
+ EligibilityLedger(
249
+ patient_id="MOCK-P001",
250
+ nct_id="MOCK-NCT-KEYNOTE999",
251
+ overall_assessment=OverallAssessment.UNCERTAIN,
252
+ criteria=_KEYNOTE_CRITERIA,
253
  gaps=[
254
  GapItem(
255
  description="Brain MRI results needed",
 
260
  ),
261
  EligibilityLedger(
262
  patient_id="MOCK-P001",
263
+ nct_id="MOCK-NCT-FLAURA2",
264
  overall_assessment=OverallAssessment.LIKELY_ELIGIBLE,
265
+ criteria=_FLAURA_CRITERIA,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
  gaps=[],
267
  ),
268
  EligibilityLedger(
269
  patient_id="MOCK-P001",
270
+ nct_id="MOCK-NCT-CM817",
271
  overall_assessment=OverallAssessment.LIKELY_INELIGIBLE,
272
+ criteria=_CM817_CRITERIA,
 
 
 
 
 
 
 
 
 
 
 
 
 
273
  gaps=[
274
  GapItem(
275
  description="KRAS mutation status unknown",