Spaces:
Running
Running
File size: 17,179 Bytes
4ec75cf 72b3e8d 4ec75cf 72b3e8d 4ec75cf 72b3e8d 4ec75cf 72b3e8d 4ec75cf 72b3e8d 4ec75cf 72b3e8d 4ec75cf 72b3e8d 4ec75cf 72b3e8d 4ec75cf 72b3e8d 4ec75cf 72b3e8d 4ec75cf 72b3e8d 4ec75cf 72b3e8d 4ec75cf 72b3e8d 4ec75cf 72b3e8d 4ec75cf 72b3e8d 4ec75cf 72b3e8d 4ec75cf 72b3e8d 4ec75cf 72b3e8d 4ec75cf 72b3e8d 4ec75cf 72b3e8d 4ec75cf 72b3e8d 4ec75cf 72b3e8d 4ec75cf 72b3e8d 4ec75cf 72b3e8d 4ec75cf 72b3e8d 4ec75cf 72b3e8d 4ec75cf 72b3e8d 4ec75cf 72b3e8d 4ec75cf 6f95f2a 72b3e8d 6f95f2a 72b3e8d 6f95f2a 72b3e8d 6f95f2a 72b3e8d 6f95f2a 72b3e8d 6f95f2a 72b3e8d 6f95f2a 4ec75cf | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 | # server/datasets/clinical_cases.py
# Ground truth cases for Clinical Workflow Chaos Simulator tasks.
#
# FIXES APPLIED:
# 1. cli_easy: completion_threshold lowered to 0.65 (was 0.80)
# expected_missing_steps made more specific (not guessable from task description alone)
# 2. cli_medium: required_sequence now MUST include both detect_gap AND rank_issues
# Previously it ended at step 1 if completion_threshold was met by detect_gap alone
# 3. cli_hard: required_sequence MUST include all 3: detect_gap, rank_issues, order_steps
# This forces the full 3-step workflow to run every time
CLINICAL_CASES = {
'cli_easy': [
{
'case_id': 'cli_easy_001',
'completion_threshold': 0.65, # FIX: was 0.80
'max_steps': 4,
# FIX: required_sequence is the done trigger β episode ends only when detect_gap is done
'done_conditions': {'min_actions': 1, 'required_sequence': ['detect_gap']},
'patient_id': 'P101',
'patient_events': ['admission', 'surgery_scheduled', 'surgery_performed'],
'events': ['admission', 'surgery_scheduled', 'surgery_performed'],
# FIX: More specific β 'pre_op_consent' is the answer, not guessable from available_steps alone
'expected_missing_steps': ['pre_op_consent'],
'expected_risk': 'critical',
'available_steps': ['pre_op_consent', 'blood_work', 'anesthesia_consult', 'vitals_check', 'infection_screening'],
'task_description': 'A patient underwent surgery but the pre-operative checklist shows gaps. The patient_events show what happened. Identify the single most critical missing step from available_steps and assess the risk level.',
},
{
'case_id': 'cli_easy_002',
'completion_threshold': 0.65,
'max_steps': 4,
'done_conditions': {'min_actions': 1, 'required_sequence': ['detect_gap']},
'patient_id': 'P102',
'patient_events': ['admission', 'diagnosis', 'medication_prescribed', 'discharge'],
'events': ['admission', 'diagnosis', 'medication_prescribed', 'discharge'],
'expected_missing_steps': ['allergy_check'],
'expected_risk': 'high',
'available_steps': ['allergy_check', 'follow_up_scheduled', 'lab_results_reviewed', 'pharmacist_review', 'patient_education'],
'task_description': 'Find the single missing safety check in this medication workflow. Patient was discharged after medication was prescribed without a critical safety step.',
},
{
'case_id': 'cli_easy_003',
'completion_threshold': 0.65,
'max_steps': 4,
'done_conditions': {'min_actions': 1, 'required_sequence': ['detect_gap']},
'patient_id': 'P103',
'patient_events': ['er_admission', 'triage', 'treatment', 'discharge'],
'events': ['er_admission', 'triage', 'treatment', 'discharge'],
'expected_missing_steps': ['insurance_verification'],
'expected_risk': 'medium',
'available_steps': ['insurance_verification', 'attending_consult', 'social_work_referral', 'discharge_summary', 'follow_up_appointment'],
'task_description': 'Find the missing administrative step in this ER discharge workflow.',
},
{
'case_id': 'cli_easy_004',
'completion_threshold': 0.65,
'max_steps': 4,
'done_conditions': {'min_actions': 1, 'required_sequence': ['detect_gap']},
'patient_id': 'P104',
'patient_events': ['admission', 'ct_scan_ordered', 'ct_scan_performed', 'diagnosis'],
'events': ['admission', 'ct_scan_ordered', 'ct_scan_performed', 'diagnosis'],
'expected_missing_steps': ['contrast_allergy_screen'],
'expected_risk': 'high',
'available_steps': ['contrast_allergy_screen', 'kidney_function_test', 'radiologist_review', 'patient_consent', 'iv_access_check'],
'task_description': 'Find the single missing safety step that should have occurred before this contrast CT scan was performed.',
},
{
'case_id': 'cli_easy_005',
'completion_threshold': 0.65,
'max_steps': 4,
'done_conditions': {'min_actions': 1, 'required_sequence': ['detect_gap']},
'patient_id': 'P105',
'patient_events': ['admission', 'blood_transfusion_ordered', 'transfusion_started'],
'events': ['admission', 'blood_transfusion_ordered', 'transfusion_started'],
'expected_missing_steps': ['blood_type_crossmatch'],
'expected_risk': 'critical',
'available_steps': ['blood_type_crossmatch', 'consent_form', 'vital_signs_baseline', 'hemoglobin_check', 'iv_gauge_verify'],
'task_description': 'A blood transfusion was started. Find the critical missing safety step that should have occurred before transfusion began.',
},
],
'cli_medium': [
{
'case_id': 'cli_medium_001',
'completion_threshold': 0.60, # FIX: was 0.75
'max_steps': 6,
# FIX: required_sequence now requires BOTH actions β episode only ends when both done
'done_conditions': {'min_actions': 2, 'required_sequence': ['detect_gap', 'rank_issues']},
'patient_id': 'P201',
'patient_events': ['admission', 'surgery_planned', 'insurance_denied', 'specialist_unavailable'],
'events': ['admission', 'surgery_planned', 'insurance_denied', 'specialist_unavailable'],
'expected_missing_steps': ['resolve_insurance', 'pre_op_consent', 'book_specialist'],
'expected_risk': 'critical',
'priority_order': ['resolve_insurance', 'pre_op_consent', 'book_specialist'],
'available_steps': ['resolve_insurance', 'pre_op_consent', 'book_specialist', 'schedule_surgery', 'anesthesia_consult'],
'dependency_graph': {
'schedule_surgery': ['resolve_insurance', 'pre_op_consent', 'book_specialist'],
'pre_op_consent': [],
'book_specialist': [],
'resolve_insurance': [],
},
'task_description': 'Multiple steps are missing in this surgical patient workflow. First detect ALL gaps (there are 3), then rank them by clinical priority. The priority order matters β insurance must be resolved before surgery can proceed.',
},
{
'case_id': 'cli_medium_002',
'completion_threshold': 0.60,
'max_steps': 6,
'done_conditions': {'min_actions': 2, 'required_sequence': ['detect_gap', 'rank_issues']},
'patient_id': 'P202',
'patient_events': ['er_admission', 'triage_level_2', 'medication_given'],
'events': ['er_admission', 'triage_level_2', 'medication_given'],
'expected_missing_steps': ['allergy_check', 'attending_notification', 'vital_signs_check'],
'expected_risk': 'high',
'priority_order': ['allergy_check', 'vital_signs_check', 'attending_notification'],
'available_steps': ['allergy_check', 'attending_notification', 'vital_signs_check', 'lab_order', 'discharge_planning'],
'dependency_graph': {
'allergy_check': [],
'vital_signs_check': [],
'attending_notification': [],
'lab_order': ['vital_signs_check'],
},
'task_description': 'Multiple safety steps were skipped in this ER case where medication was given. Detect all 3 gaps, then rank them by urgency. Allergy check is highest priority because medication was already given.',
},
{
'case_id': 'cli_medium_003',
'completion_threshold': 0.60,
'max_steps': 6,
'done_conditions': {'min_actions': 2, 'required_sequence': ['detect_gap', 'rank_issues']},
'patient_id': 'P203',
'patient_events': ['admission', 'chemo_ordered', 'chemo_started', 'adverse_reaction'],
'events': ['admission', 'chemo_ordered', 'chemo_started', 'adverse_reaction'],
'expected_missing_steps': ['baseline_labs', 'oncologist_approval', 'dose_verification'],
'expected_risk': 'critical',
'priority_order': ['oncologist_approval', 'dose_verification', 'baseline_labs'],
'available_steps': ['baseline_labs', 'oncologist_approval', 'dose_verification', 'pharmacy_review', 'patient_consent'],
'dependency_graph': {
'oncologist_approval': [],
'dose_verification': ['oncologist_approval'],
'baseline_labs': [],
'pharmacy_review': ['dose_verification'],
},
'task_description': 'Critical chemotherapy workflow violations caused an adverse reaction. Detect all 3 missing safety steps, then rank by urgency. Oncologist approval is highest priority β without it the other steps are meaningless.',
},
],
'cli_hard': [
{
'case_id': 'cli_hard_001',
'completion_threshold': 0.55, # FIX: was 0.70 β hard IS hard
'max_steps': 6,
# FIX: required_sequence MUST include all 3 actions β episode runs full 3-step workflow
'done_conditions': {'min_actions': 3, 'required_sequence': ['detect_gap', 'rank_issues', 'order_steps']},
'patient_id': 'P301',
'patient_events': ['surgery_planned', 'insurance_denied', 'pre_op_test_skipped'],
'events': ['surgery_planned', 'insurance_denied', 'pre_op_test_skipped'],
'expected_missing_steps': ['resolve_insurance', 'complete_pre_op', 'book_specialist', 'schedule_surgery'],
'expected_risk': 'critical',
'priority_order': ['resolve_insurance', 'complete_pre_op', 'book_specialist', 'schedule_surgery'],
'dependency_graph': {
'schedule_surgery': ['resolve_insurance', 'complete_pre_op', 'book_specialist'],
'complete_pre_op': ['resolve_insurance'],
'book_specialist': [],
'resolve_insurance': [],
},
'required_steps': ['resolve_insurance', 'complete_pre_op', 'book_specialist', 'schedule_surgery'],
'available_steps': ['resolve_insurance', 'complete_pre_op', 'book_specialist', 'schedule_surgery'],
'task_description': 'Complex surgical patient has 4 workflow failures. Detect ALL gaps, rank by priority, then plan a dependency-ordered recovery: resolve_insurance must come first (complete_pre_op depends on it), schedule_surgery must come last (depends on all others).',
},
{
'case_id': 'cli_hard_002',
'completion_threshold': 0.55,
'max_steps': 6,
'done_conditions': {'min_actions': 3, 'required_sequence': ['detect_gap', 'rank_issues', 'order_steps']},
'patient_id': 'P302',
'patient_events': ['cardiac_event', 'icu_admission', 'multiple_failures_detected'],
'events': ['cardiac_event', 'icu_admission', 'multiple_failures_detected'],
'expected_missing_steps': ['stabilize_vitals', 'cardiology_consult', 'imaging_ordered', 'medication_review', 'family_notification'],
'expected_risk': 'critical',
'priority_order': ['stabilize_vitals', 'cardiology_consult', 'imaging_ordered', 'medication_review', 'family_notification'],
'dependency_graph': {
'family_notification': ['stabilize_vitals'],
'medication_review': ['cardiology_consult', 'imaging_ordered'],
'imaging_ordered': ['stabilize_vitals'],
'cardiology_consult': ['stabilize_vitals'],
'stabilize_vitals': [],
},
'required_steps': ['stabilize_vitals', 'cardiology_consult', 'imaging_ordered', 'medication_review', 'family_notification'],
'available_steps': ['stabilize_vitals', 'cardiology_consult', 'imaging_ordered', 'medication_review', 'family_notification'],
'task_description': 'Complex cardiac emergency. stabilize_vitals must come FIRST (everything depends on it). medication_review needs BOTH cardiology_consult AND imaging_ordered. Plan a recovery sequence that respects ALL dependencies.',
},
{
'case_id': 'cli_hard_003',
'completion_threshold': 0.55,
'max_steps': 6,
'done_conditions': {'min_actions': 3, 'required_sequence': ['detect_gap', 'rank_issues', 'order_steps']},
'patient_id': 'P303',
'patient_events': ['chemo_ordered', 'lab_results_missing', 'dose_unclear', 'pharmacy_backlog'],
'events': ['chemo_ordered', 'lab_results_missing', 'dose_unclear', 'pharmacy_backlog'],
'expected_missing_steps': ['baseline_cbc', 'oncology_dose_verify', 'pharmacy_prep', 'nurse_admin_check'],
'expected_risk': 'critical',
'priority_order': ['baseline_cbc', 'oncology_dose_verify', 'pharmacy_prep', 'nurse_admin_check'],
'dependency_graph': {
'nurse_admin_check': ['pharmacy_prep'],
'pharmacy_prep': ['oncology_dose_verify', 'baseline_cbc'],
'oncology_dose_verify': ['baseline_cbc'],
'baseline_cbc': [],
},
'required_steps': ['baseline_cbc', 'oncology_dose_verify', 'pharmacy_prep', 'nurse_admin_check'],
'available_steps': ['baseline_cbc', 'oncology_dose_verify', 'pharmacy_prep', 'nurse_admin_check'],
'task_description': 'Chemotherapy workflow chaos. baseline_cbc must come first. oncology_dose_verify needs baseline_cbc. pharmacy_prep needs BOTH dose_verify AND baseline_cbc. nurse_admin_check needs pharmacy_prep. Detect, rank, then order correctly.',
},
{
'case_id': 'cli_hard_004',
'completion_threshold': 0.55,
'max_steps': 6,
'done_conditions': {'min_actions': 3, 'required_sequence': ['detect_gap', 'rank_issues', 'order_steps']},
'patient_id': 'P304',
'patient_events': ['transplant_scheduled', 'donor_typing_incomplete', 'immunosuppress_missing', 'consent_partial'],
'events': ['transplant_scheduled', 'donor_typing_incomplete', 'immunosuppress_missing', 'consent_partial'],
'expected_missing_steps': ['hla_typing', 'crossmatch', 'immunosuppress_order', 'full_consent', 'surgery_slot'],
'expected_risk': 'critical',
'priority_order': ['hla_typing', 'crossmatch', 'full_consent', 'immunosuppress_order', 'surgery_slot'],
'dependency_graph': {
'surgery_slot': ['hla_typing', 'crossmatch', 'full_consent', 'immunosuppress_order'],
'immunosuppress_order': ['crossmatch'],
'crossmatch': ['hla_typing'],
'full_consent': [],
'hla_typing': [],
},
'required_steps': ['hla_typing', 'crossmatch', 'immunosuppress_order', 'full_consent', 'surgery_slot'],
'available_steps': ['hla_typing', 'crossmatch', 'immunosuppress_order', 'full_consent', 'surgery_slot'],
'task_description': 'Organ transplant pre-op disaster. HLA typing must come first. Crossmatch needs HLA typing. Immunosuppression order needs crossmatch. Surgery booking requires ALL four prerequisites. One wrong order delays transplant.',
},
{
'case_id': 'cli_hard_005',
'completion_threshold': 0.55,
'max_steps': 6,
'done_conditions': {'min_actions': 3, 'required_sequence': ['detect_gap', 'rank_issues', 'order_steps']},
'patient_id': 'P305',
'patient_events': ['stroke_code', 'imaging_delayed', 'tpa_window_closing', 'neuro_unavailable'],
'events': ['stroke_code', 'imaging_delayed', 'tpa_window_closing', 'neuro_unavailable'],
'expected_missing_steps': ['ct_head', 'neuro_consult', 'tpa_eligibility', 'family_consent', 'icu_bed'],
'expected_risk': 'critical',
'priority_order': ['ct_head', 'tpa_eligibility', 'neuro_consult', 'family_consent', 'icu_bed'],
'dependency_graph': {
'icu_bed': ['tpa_eligibility'],
'family_consent': ['tpa_eligibility', 'neuro_consult'],
'neuro_consult': ['ct_head'],
'tpa_eligibility': ['ct_head'],
'ct_head': [],
},
'required_steps': ['ct_head', 'neuro_consult', 'tpa_eligibility', 'family_consent', 'icu_bed'],
'available_steps': ['ct_head', 'neuro_consult', 'tpa_eligibility', 'family_consent', 'icu_bed'],
'task_description': 'Acute stroke with closing tPA window. ct_head must come FIRST. Both tpa_eligibility and neuro_consult depend on ct_head. family_consent needs BOTH tpa_eligibility AND neuro_consult. icu_bed needs tpa_eligibility. Detect, rank, then order correctly.',
},
],
}
|