File size: 17,179 Bytes
4ec75cf
 
72b3e8d
 
 
 
 
 
 
 
4ec75cf
 
 
 
 
72b3e8d
4ec75cf
72b3e8d
4ec75cf
 
 
 
72b3e8d
4ec75cf
 
72b3e8d
 
4ec75cf
 
 
72b3e8d
4ec75cf
 
 
 
 
 
 
72b3e8d
 
4ec75cf
 
 
72b3e8d
4ec75cf
 
 
 
 
 
 
72b3e8d
 
4ec75cf
 
 
72b3e8d
4ec75cf
 
 
 
 
 
 
72b3e8d
 
4ec75cf
 
 
72b3e8d
4ec75cf
 
 
 
 
 
 
72b3e8d
 
4ec75cf
 
 
 
 
72b3e8d
4ec75cf
72b3e8d
4ec75cf
 
 
 
 
 
 
72b3e8d
4ec75cf
 
 
 
 
 
72b3e8d
4ec75cf
 
 
72b3e8d
4ec75cf
 
 
 
 
 
 
 
72b3e8d
4ec75cf
 
 
 
 
 
72b3e8d
4ec75cf
 
 
72b3e8d
4ec75cf
 
 
 
 
 
 
 
72b3e8d
4ec75cf
 
 
 
 
 
72b3e8d
4ec75cf
 
 
 
 
72b3e8d
4ec75cf
72b3e8d
4ec75cf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72b3e8d
4ec75cf
 
 
72b3e8d
4ec75cf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72b3e8d
4ec75cf
6f95f2a
 
72b3e8d
6f95f2a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72b3e8d
6f95f2a
 
 
72b3e8d
6f95f2a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72b3e8d
6f95f2a
 
 
72b3e8d
6f95f2a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72b3e8d
6f95f2a
4ec75cf
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
# server/datasets/clinical_cases.py
# Ground truth cases for Clinical Workflow Chaos Simulator tasks.
#
# FIXES APPLIED:
# 1. cli_easy: completion_threshold lowered to 0.65 (was 0.80)
#    expected_missing_steps made more specific (not guessable from task description alone)
# 2. cli_medium: required_sequence now MUST include both detect_gap AND rank_issues
#    Previously it ended at step 1 if completion_threshold was met by detect_gap alone
# 3. cli_hard: required_sequence MUST include all 3: detect_gap, rank_issues, order_steps
#    This forces the full 3-step workflow to run every time

CLINICAL_CASES = {
    'cli_easy': [
        {
            'case_id': 'cli_easy_001',
            'completion_threshold': 0.65,  # FIX: was 0.80
            'max_steps': 4,
            # FIX: required_sequence is the done trigger β€” episode ends only when detect_gap is done
            'done_conditions': {'min_actions': 1, 'required_sequence': ['detect_gap']},
            'patient_id': 'P101',
            'patient_events': ['admission', 'surgery_scheduled', 'surgery_performed'],
            'events': ['admission', 'surgery_scheduled', 'surgery_performed'],
            # FIX: More specific β€” 'pre_op_consent' is the answer, not guessable from available_steps alone
            'expected_missing_steps': ['pre_op_consent'],
            'expected_risk': 'critical',
            'available_steps': ['pre_op_consent', 'blood_work', 'anesthesia_consult', 'vitals_check', 'infection_screening'],
            'task_description': 'A patient underwent surgery but the pre-operative checklist shows gaps. The patient_events show what happened. Identify the single most critical missing step from available_steps and assess the risk level.',
        },
        {
            'case_id': 'cli_easy_002',
            'completion_threshold': 0.65,
            'max_steps': 4,
            'done_conditions': {'min_actions': 1, 'required_sequence': ['detect_gap']},
            'patient_id': 'P102',
            'patient_events': ['admission', 'diagnosis', 'medication_prescribed', 'discharge'],
            'events': ['admission', 'diagnosis', 'medication_prescribed', 'discharge'],
            'expected_missing_steps': ['allergy_check'],
            'expected_risk': 'high',
            'available_steps': ['allergy_check', 'follow_up_scheduled', 'lab_results_reviewed', 'pharmacist_review', 'patient_education'],
            'task_description': 'Find the single missing safety check in this medication workflow. Patient was discharged after medication was prescribed without a critical safety step.',
        },
        {
            'case_id': 'cli_easy_003',
            'completion_threshold': 0.65,
            'max_steps': 4,
            'done_conditions': {'min_actions': 1, 'required_sequence': ['detect_gap']},
            'patient_id': 'P103',
            'patient_events': ['er_admission', 'triage', 'treatment', 'discharge'],
            'events': ['er_admission', 'triage', 'treatment', 'discharge'],
            'expected_missing_steps': ['insurance_verification'],
            'expected_risk': 'medium',
            'available_steps': ['insurance_verification', 'attending_consult', 'social_work_referral', 'discharge_summary', 'follow_up_appointment'],
            'task_description': 'Find the missing administrative step in this ER discharge workflow.',
        },
        {
            'case_id': 'cli_easy_004',
            'completion_threshold': 0.65,
            'max_steps': 4,
            'done_conditions': {'min_actions': 1, 'required_sequence': ['detect_gap']},
            'patient_id': 'P104',
            'patient_events': ['admission', 'ct_scan_ordered', 'ct_scan_performed', 'diagnosis'],
            'events': ['admission', 'ct_scan_ordered', 'ct_scan_performed', 'diagnosis'],
            'expected_missing_steps': ['contrast_allergy_screen'],
            'expected_risk': 'high',
            'available_steps': ['contrast_allergy_screen', 'kidney_function_test', 'radiologist_review', 'patient_consent', 'iv_access_check'],
            'task_description': 'Find the single missing safety step that should have occurred before this contrast CT scan was performed.',
        },
        {
            'case_id': 'cli_easy_005',
            'completion_threshold': 0.65,
            'max_steps': 4,
            'done_conditions': {'min_actions': 1, 'required_sequence': ['detect_gap']},
            'patient_id': 'P105',
            'patient_events': ['admission', 'blood_transfusion_ordered', 'transfusion_started'],
            'events': ['admission', 'blood_transfusion_ordered', 'transfusion_started'],
            'expected_missing_steps': ['blood_type_crossmatch'],
            'expected_risk': 'critical',
            'available_steps': ['blood_type_crossmatch', 'consent_form', 'vital_signs_baseline', 'hemoglobin_check', 'iv_gauge_verify'],
            'task_description': 'A blood transfusion was started. Find the critical missing safety step that should have occurred before transfusion began.',
        },
    ],
    'cli_medium': [
        {
            'case_id': 'cli_medium_001',
            'completion_threshold': 0.60,  # FIX: was 0.75
            'max_steps': 6,
            # FIX: required_sequence now requires BOTH actions β€” episode only ends when both done
            'done_conditions': {'min_actions': 2, 'required_sequence': ['detect_gap', 'rank_issues']},
            'patient_id': 'P201',
            'patient_events': ['admission', 'surgery_planned', 'insurance_denied', 'specialist_unavailable'],
            'events': ['admission', 'surgery_planned', 'insurance_denied', 'specialist_unavailable'],
            'expected_missing_steps': ['resolve_insurance', 'pre_op_consent', 'book_specialist'],
            'expected_risk': 'critical',
            'priority_order': ['resolve_insurance', 'pre_op_consent', 'book_specialist'],
            'available_steps': ['resolve_insurance', 'pre_op_consent', 'book_specialist', 'schedule_surgery', 'anesthesia_consult'],
            'dependency_graph': {
                'schedule_surgery': ['resolve_insurance', 'pre_op_consent', 'book_specialist'],
                'pre_op_consent': [],
                'book_specialist': [],
                'resolve_insurance': [],
            },
            'task_description': 'Multiple steps are missing in this surgical patient workflow. First detect ALL gaps (there are 3), then rank them by clinical priority. The priority order matters β€” insurance must be resolved before surgery can proceed.',
        },
        {
            'case_id': 'cli_medium_002',
            'completion_threshold': 0.60,
            'max_steps': 6,
            'done_conditions': {'min_actions': 2, 'required_sequence': ['detect_gap', 'rank_issues']},
            'patient_id': 'P202',
            'patient_events': ['er_admission', 'triage_level_2', 'medication_given'],
            'events': ['er_admission', 'triage_level_2', 'medication_given'],
            'expected_missing_steps': ['allergy_check', 'attending_notification', 'vital_signs_check'],
            'expected_risk': 'high',
            'priority_order': ['allergy_check', 'vital_signs_check', 'attending_notification'],
            'available_steps': ['allergy_check', 'attending_notification', 'vital_signs_check', 'lab_order', 'discharge_planning'],
            'dependency_graph': {
                'allergy_check': [],
                'vital_signs_check': [],
                'attending_notification': [],
                'lab_order': ['vital_signs_check'],
            },
            'task_description': 'Multiple safety steps were skipped in this ER case where medication was given. Detect all 3 gaps, then rank them by urgency. Allergy check is highest priority because medication was already given.',
        },
        {
            'case_id': 'cli_medium_003',
            'completion_threshold': 0.60,
            'max_steps': 6,
            'done_conditions': {'min_actions': 2, 'required_sequence': ['detect_gap', 'rank_issues']},
            'patient_id': 'P203',
            'patient_events': ['admission', 'chemo_ordered', 'chemo_started', 'adverse_reaction'],
            'events': ['admission', 'chemo_ordered', 'chemo_started', 'adverse_reaction'],
            'expected_missing_steps': ['baseline_labs', 'oncologist_approval', 'dose_verification'],
            'expected_risk': 'critical',
            'priority_order': ['oncologist_approval', 'dose_verification', 'baseline_labs'],
            'available_steps': ['baseline_labs', 'oncologist_approval', 'dose_verification', 'pharmacy_review', 'patient_consent'],
            'dependency_graph': {
                'oncologist_approval': [],
                'dose_verification': ['oncologist_approval'],
                'baseline_labs': [],
                'pharmacy_review': ['dose_verification'],
            },
            'task_description': 'Critical chemotherapy workflow violations caused an adverse reaction. Detect all 3 missing safety steps, then rank by urgency. Oncologist approval is highest priority β€” without it the other steps are meaningless.',
        },
    ],
    'cli_hard': [
        {
            'case_id': 'cli_hard_001',
            'completion_threshold': 0.55,  # FIX: was 0.70 β€” hard IS hard
            'max_steps': 6,
            # FIX: required_sequence MUST include all 3 actions β€” episode runs full 3-step workflow
            'done_conditions': {'min_actions': 3, 'required_sequence': ['detect_gap', 'rank_issues', 'order_steps']},
            'patient_id': 'P301',
            'patient_events': ['surgery_planned', 'insurance_denied', 'pre_op_test_skipped'],
            'events': ['surgery_planned', 'insurance_denied', 'pre_op_test_skipped'],
            'expected_missing_steps': ['resolve_insurance', 'complete_pre_op', 'book_specialist', 'schedule_surgery'],
            'expected_risk': 'critical',
            'priority_order': ['resolve_insurance', 'complete_pre_op', 'book_specialist', 'schedule_surgery'],
            'dependency_graph': {
                'schedule_surgery': ['resolve_insurance', 'complete_pre_op', 'book_specialist'],
                'complete_pre_op': ['resolve_insurance'],
                'book_specialist': [],
                'resolve_insurance': [],
            },
            'required_steps': ['resolve_insurance', 'complete_pre_op', 'book_specialist', 'schedule_surgery'],
            'available_steps': ['resolve_insurance', 'complete_pre_op', 'book_specialist', 'schedule_surgery'],
            'task_description': 'Complex surgical patient has 4 workflow failures. Detect ALL gaps, rank by priority, then plan a dependency-ordered recovery: resolve_insurance must come first (complete_pre_op depends on it), schedule_surgery must come last (depends on all others).',
        },
        {
            'case_id': 'cli_hard_002',
            'completion_threshold': 0.55,
            'max_steps': 6,
            'done_conditions': {'min_actions': 3, 'required_sequence': ['detect_gap', 'rank_issues', 'order_steps']},
            'patient_id': 'P302',
            'patient_events': ['cardiac_event', 'icu_admission', 'multiple_failures_detected'],
            'events': ['cardiac_event', 'icu_admission', 'multiple_failures_detected'],
            'expected_missing_steps': ['stabilize_vitals', 'cardiology_consult', 'imaging_ordered', 'medication_review', 'family_notification'],
            'expected_risk': 'critical',
            'priority_order': ['stabilize_vitals', 'cardiology_consult', 'imaging_ordered', 'medication_review', 'family_notification'],
            'dependency_graph': {
                'family_notification': ['stabilize_vitals'],
                'medication_review': ['cardiology_consult', 'imaging_ordered'],
                'imaging_ordered': ['stabilize_vitals'],
                'cardiology_consult': ['stabilize_vitals'],
                'stabilize_vitals': [],
            },
            'required_steps': ['stabilize_vitals', 'cardiology_consult', 'imaging_ordered', 'medication_review', 'family_notification'],
            'available_steps': ['stabilize_vitals', 'cardiology_consult', 'imaging_ordered', 'medication_review', 'family_notification'],
            'task_description': 'Complex cardiac emergency. stabilize_vitals must come FIRST (everything depends on it). medication_review needs BOTH cardiology_consult AND imaging_ordered. Plan a recovery sequence that respects ALL dependencies.',
        },
        {
            'case_id': 'cli_hard_003',
            'completion_threshold': 0.55,
            'max_steps': 6,
            'done_conditions': {'min_actions': 3, 'required_sequence': ['detect_gap', 'rank_issues', 'order_steps']},
            'patient_id': 'P303',
            'patient_events': ['chemo_ordered', 'lab_results_missing', 'dose_unclear', 'pharmacy_backlog'],
            'events': ['chemo_ordered', 'lab_results_missing', 'dose_unclear', 'pharmacy_backlog'],
            'expected_missing_steps': ['baseline_cbc', 'oncology_dose_verify', 'pharmacy_prep', 'nurse_admin_check'],
            'expected_risk': 'critical',
            'priority_order': ['baseline_cbc', 'oncology_dose_verify', 'pharmacy_prep', 'nurse_admin_check'],
            'dependency_graph': {
                'nurse_admin_check': ['pharmacy_prep'],
                'pharmacy_prep': ['oncology_dose_verify', 'baseline_cbc'],
                'oncology_dose_verify': ['baseline_cbc'],
                'baseline_cbc': [],
            },
            'required_steps': ['baseline_cbc', 'oncology_dose_verify', 'pharmacy_prep', 'nurse_admin_check'],
            'available_steps': ['baseline_cbc', 'oncology_dose_verify', 'pharmacy_prep', 'nurse_admin_check'],
            'task_description': 'Chemotherapy workflow chaos. baseline_cbc must come first. oncology_dose_verify needs baseline_cbc. pharmacy_prep needs BOTH dose_verify AND baseline_cbc. nurse_admin_check needs pharmacy_prep. Detect, rank, then order correctly.',
        },
        {
            'case_id': 'cli_hard_004',
            'completion_threshold': 0.55,
            'max_steps': 6,
            'done_conditions': {'min_actions': 3, 'required_sequence': ['detect_gap', 'rank_issues', 'order_steps']},
            'patient_id': 'P304',
            'patient_events': ['transplant_scheduled', 'donor_typing_incomplete', 'immunosuppress_missing', 'consent_partial'],
            'events': ['transplant_scheduled', 'donor_typing_incomplete', 'immunosuppress_missing', 'consent_partial'],
            'expected_missing_steps': ['hla_typing', 'crossmatch', 'immunosuppress_order', 'full_consent', 'surgery_slot'],
            'expected_risk': 'critical',
            'priority_order': ['hla_typing', 'crossmatch', 'full_consent', 'immunosuppress_order', 'surgery_slot'],
            'dependency_graph': {
                'surgery_slot': ['hla_typing', 'crossmatch', 'full_consent', 'immunosuppress_order'],
                'immunosuppress_order': ['crossmatch'],
                'crossmatch': ['hla_typing'],
                'full_consent': [],
                'hla_typing': [],
            },
            'required_steps': ['hla_typing', 'crossmatch', 'immunosuppress_order', 'full_consent', 'surgery_slot'],
            'available_steps': ['hla_typing', 'crossmatch', 'immunosuppress_order', 'full_consent', 'surgery_slot'],
            'task_description': 'Organ transplant pre-op disaster. HLA typing must come first. Crossmatch needs HLA typing. Immunosuppression order needs crossmatch. Surgery booking requires ALL four prerequisites. One wrong order delays transplant.',
        },
        {
            'case_id': 'cli_hard_005',
            'completion_threshold': 0.55,
            'max_steps': 6,
            'done_conditions': {'min_actions': 3, 'required_sequence': ['detect_gap', 'rank_issues', 'order_steps']},
            'patient_id': 'P305',
            'patient_events': ['stroke_code', 'imaging_delayed', 'tpa_window_closing', 'neuro_unavailable'],
            'events': ['stroke_code', 'imaging_delayed', 'tpa_window_closing', 'neuro_unavailable'],
            'expected_missing_steps': ['ct_head', 'neuro_consult', 'tpa_eligibility', 'family_consent', 'icu_bed'],
            'expected_risk': 'critical',
            'priority_order': ['ct_head', 'tpa_eligibility', 'neuro_consult', 'family_consent', 'icu_bed'],
            'dependency_graph': {
                'icu_bed': ['tpa_eligibility'],
                'family_consent': ['tpa_eligibility', 'neuro_consult'],
                'neuro_consult': ['ct_head'],
                'tpa_eligibility': ['ct_head'],
                'ct_head': [],
            },
            'required_steps': ['ct_head', 'neuro_consult', 'tpa_eligibility', 'family_consent', 'icu_bed'],
            'available_steps': ['ct_head', 'neuro_consult', 'tpa_eligibility', 'family_consent', 'icu_bed'],
            'task_description': 'Acute stroke with closing tPA window. ct_head must come FIRST. Both tpa_eligibility and neuro_consult depend on ct_head. family_consent needs BOTH tpa_eligibility AND neuro_consult. icu_bed needs tpa_eligibility. Detect, rank, then order correctly.',
        },
    ],
}