yakilee Claude Opus 4.6 commited on
Commit
6bb2c30
·
1 Parent(s): 743ac52

feat: add e2e smoke test

Browse files

Full journey smoke test verifying state transitions, data export,
gap loop, service chain with mocked services, data contract
serialization roundtrips, and latency budget.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. tests/test_e2e.py +231 -0
tests/test_e2e.py ADDED
@@ -0,0 +1,231 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """End-to-end smoke test: full journey with mocked services."""
2
+ from __future__ import annotations
3
+
4
+ from app.services.mock_data import (
5
+ MOCK_ELIGIBILITY_LEDGERS,
6
+ MOCK_PATIENT_PROFILE,
7
+ MOCK_TRIAL_CANDIDATES,
8
+ )
9
+ from app.services.state_manager import JOURNEY_STATES
10
+ from trialpath.models import (
11
+ EligibilityLedger,
12
+ PatientProfile,
13
+ SearchAnchors,
14
+ TrialCandidate,
15
+ )
16
+
17
+
18
+ class TestE2EJourney:
19
+ """Simulate the full 5-state journey: INGEST → PRESCREEN → VALIDATE → GAP → SUMMARY."""
20
+
21
+ def _build_session_state(self) -> dict:
22
+ """Create a minimal session state dict simulating Streamlit."""
23
+ return {
24
+ "journey_state": "INGEST",
25
+ "parlant_session_id": None,
26
+ "parlant_agent_id": None,
27
+ "parlant_session_active": False,
28
+ "patient_profile": None,
29
+ "uploaded_files": [],
30
+ "search_anchors": None,
31
+ "trial_candidates": [],
32
+ "eligibility_ledger": [],
33
+ "last_event_offset": 0,
34
+ }
35
+
36
+ def test_full_journey_state_transitions(self):
37
+ """Verify all state transitions complete in correct order."""
38
+ state = self._build_session_state()
39
+
40
+ # INGEST → PRESCREEN
41
+ assert state["journey_state"] == "INGEST"
42
+ state["patient_profile"] = MOCK_PATIENT_PROFILE
43
+ state["journey_state"] = "PRESCREEN"
44
+
45
+ # PRESCREEN → VALIDATE_TRIALS
46
+ assert state["journey_state"] == "PRESCREEN"
47
+ anchors = SearchAnchors(
48
+ condition="Non-Small Cell Lung Cancer",
49
+ biomarkers=["EGFR"],
50
+ stage="IIIB",
51
+ )
52
+ state["search_anchors"] = anchors
53
+ state["trial_candidates"] = list(MOCK_TRIAL_CANDIDATES)
54
+ state["journey_state"] = "VALIDATE_TRIALS"
55
+
56
+ # VALIDATE_TRIALS → GAP_FOLLOWUP
57
+ assert state["journey_state"] == "VALIDATE_TRIALS"
58
+ state["eligibility_ledger"] = list(MOCK_ELIGIBILITY_LEDGERS)
59
+ state["journey_state"] = "GAP_FOLLOWUP"
60
+
61
+ # GAP_FOLLOWUP → SUMMARY
62
+ assert state["journey_state"] == "GAP_FOLLOWUP"
63
+ state["journey_state"] = "SUMMARY"
64
+
65
+ assert state["journey_state"] == "SUMMARY"
66
+
67
+ def test_journey_produces_exportable_data(self):
68
+ """Verify end state has all data needed for doctor packet export."""
69
+ state = self._build_session_state()
70
+ state["patient_profile"] = MOCK_PATIENT_PROFILE
71
+ state["trial_candidates"] = list(MOCK_TRIAL_CANDIDATES)
72
+ state["eligibility_ledger"] = list(MOCK_ELIGIBILITY_LEDGERS)
73
+ state["journey_state"] = "SUMMARY"
74
+
75
+ # Verify export data
76
+ profile = state["patient_profile"]
77
+ ledgers = state["eligibility_ledger"]
78
+ trials = state["trial_candidates"]
79
+
80
+ assert isinstance(profile, PatientProfile)
81
+ assert len(trials) == 3
82
+ assert len(ledgers) == 3
83
+
84
+ eligible = sum(1 for lg in ledgers if lg.traffic_light == "green")
85
+ uncertain = sum(1 for lg in ledgers if lg.traffic_light == "yellow")
86
+ ineligible = sum(1 for lg in ledgers if lg.traffic_light == "red")
87
+
88
+ assert eligible == 1
89
+ assert uncertain == 1
90
+ assert ineligible == 1
91
+
92
+ def test_gap_loop_back_to_ingest(self):
93
+ """Verify GAP_FOLLOWUP can loop back to INGEST for new docs."""
94
+ state = self._build_session_state()
95
+ state["patient_profile"] = MOCK_PATIENT_PROFILE
96
+ state["trial_candidates"] = list(MOCK_TRIAL_CANDIDATES)
97
+ state["eligibility_ledger"] = list(MOCK_ELIGIBILITY_LEDGERS)
98
+ state["journey_state"] = "GAP_FOLLOWUP"
99
+
100
+ # User decides to upload more documents
101
+ state["journey_state"] = "INGEST"
102
+ assert state["journey_state"] == "INGEST"
103
+
104
+ # Existing data preserved for re-evaluation
105
+ assert state["patient_profile"] is not None
106
+ assert len(state["trial_candidates"]) == 3
107
+
108
+ def test_all_journey_states_reachable(self):
109
+ """Verify each of the 5 journey states can be reached."""
110
+ state = self._build_session_state()
111
+ visited = []
112
+
113
+ for target_state in JOURNEY_STATES:
114
+ state["journey_state"] = target_state
115
+ visited.append(state["journey_state"])
116
+
117
+ assert visited == JOURNEY_STATES
118
+ assert len(visited) == 5
119
+
120
+
121
+ class TestE2EWithMockedServices:
122
+ """E2E test using mocked service calls to verify data flow."""
123
+
124
+ def test_extract_to_search_to_evaluate_chain(
125
+ self, mock_medgemma, mock_gemini
126
+ ):
127
+ """Full service chain: extraction → search anchors → evaluate."""
128
+ from trialpath.services.gemini_planner import GeminiPlanner
129
+ from trialpath.services.medgemma_extractor import MedGemmaExtractor
130
+
131
+ # Step 1: Extract patient profile
132
+ extractor = MedGemmaExtractor()
133
+ profile = extractor.extract(["patient_notes.pdf"])
134
+ assert isinstance(profile, PatientProfile)
135
+
136
+ # Step 2: Generate search anchors
137
+ planner = GeminiPlanner()
138
+ anchors = planner.generate_search_anchors(profile)
139
+ assert isinstance(anchors, SearchAnchors)
140
+
141
+ # Step 3: Slice + evaluate criteria
142
+ criteria = planner.slice_criteria(MOCK_TRIAL_CANDIDATES[0])
143
+ assert len(criteria) >= 1
144
+
145
+ # Step 4: Evaluate each criterion
146
+ assessments = []
147
+ for c in criteria:
148
+ if c["type"] == "medical":
149
+ result = extractor.evaluate_medical_criterion(c["text"], profile)
150
+ else:
151
+ result = planner.evaluate_structural_criterion(c["text"], profile)
152
+ assessments.append({
153
+ "criterion": c["text"],
154
+ "decision": result["decision"],
155
+ "confidence": result.get("confidence", 0.5),
156
+ })
157
+ assert len(assessments) == len(criteria)
158
+
159
+ # Step 5: Aggregate into ledger
160
+ ledger = planner.aggregate_assessments(
161
+ profile=profile,
162
+ trial=MOCK_TRIAL_CANDIDATES[0],
163
+ assessments=assessments,
164
+ )
165
+ assert isinstance(ledger, EligibilityLedger)
166
+
167
+ def test_data_contracts_survive_serialization(self):
168
+ """Verify all data contracts survive JSON roundtrip."""
169
+ # PatientProfile
170
+ p_json = MOCK_PATIENT_PROFILE.model_dump_json()
171
+ p_restored = PatientProfile.model_validate_json(p_json)
172
+ assert p_restored.patient_id == MOCK_PATIENT_PROFILE.patient_id
173
+
174
+ # TrialCandidate
175
+ for t in MOCK_TRIAL_CANDIDATES:
176
+ t_json = t.model_dump_json()
177
+ t_restored = TrialCandidate.model_validate_json(t_json)
178
+ assert t_restored.nct_id == t.nct_id
179
+
180
+ # EligibilityLedger
181
+ for lg in MOCK_ELIGIBILITY_LEDGERS:
182
+ lg_json = lg.model_dump_json()
183
+ lg_restored = EligibilityLedger.model_validate_json(lg_json)
184
+ assert lg_restored.nct_id == lg.nct_id
185
+
186
+ # SearchAnchors
187
+ anchors = SearchAnchors(
188
+ condition="NSCLC",
189
+ biomarkers=["EGFR", "ALK"],
190
+ stage="IV",
191
+ )
192
+ a_json = anchors.model_dump_json()
193
+ a_restored = SearchAnchors.model_validate_json(a_json)
194
+ assert a_restored.condition == "NSCLC"
195
+
196
+
197
+ class TestE2ELatencyBudget:
198
+ """Verify operations complete within latency budget (mocked)."""
199
+
200
+ def test_mock_operations_are_fast(self, mock_medgemma, mock_gemini):
201
+ """With mocked services, full chain should complete near-instantly."""
202
+ import time
203
+
204
+ from trialpath.services.gemini_planner import GeminiPlanner
205
+ from trialpath.services.medgemma_extractor import MedGemmaExtractor
206
+
207
+ start = time.monotonic()
208
+
209
+ extractor = MedGemmaExtractor()
210
+ profile = extractor.extract(["doc.pdf"])
211
+
212
+ planner = GeminiPlanner()
213
+ planner.generate_search_anchors(profile)
214
+ criteria = planner.slice_criteria(MOCK_TRIAL_CANDIDATES[0])
215
+
216
+ for c in criteria:
217
+ if c["type"] == "medical":
218
+ extractor.evaluate_medical_criterion(c["text"], profile)
219
+ else:
220
+ planner.evaluate_structural_criterion(c["text"], profile)
221
+
222
+ planner.aggregate_assessments(
223
+ profile=profile,
224
+ trial=MOCK_TRIAL_CANDIDATES[0],
225
+ assessments=[],
226
+ )
227
+ planner.analyze_gaps(profile, list(MOCK_ELIGIBILITY_LEDGERS))
228
+
229
+ elapsed = time.monotonic() - start
230
+ # With mocks, should complete well under 1 second
231
+ assert elapsed < 1.0, f"Mock pipeline took {elapsed:.2f}s, expected < 1s"