# server/datasets/clinical_cases.py # Ground truth cases for Clinical Workflow Chaos Simulator tasks. # # FIXES APPLIED: # 1. cli_easy: completion_threshold lowered to 0.65 (was 0.80) # expected_missing_steps made more specific (not guessable from task description alone) # 2. cli_medium: required_sequence now MUST include both detect_gap AND rank_issues # Previously it ended at step 1 if completion_threshold was met by detect_gap alone # 3. cli_hard: required_sequence MUST include all 3: detect_gap, rank_issues, order_steps # This forces the full 3-step workflow to run every time CLINICAL_CASES = { 'cli_easy': [ { 'case_id': 'cli_easy_001', 'completion_threshold': 0.65, # FIX: was 0.80 'max_steps': 4, # FIX: required_sequence is the done trigger — episode ends only when detect_gap is done 'done_conditions': {'min_actions': 1, 'required_sequence': ['detect_gap']}, 'patient_id': 'P101', 'patient_events': ['admission', 'surgery_scheduled', 'surgery_performed'], 'events': ['admission', 'surgery_scheduled', 'surgery_performed'], # FIX: More specific — 'pre_op_consent' is the answer, not guessable from available_steps alone 'expected_missing_steps': ['pre_op_consent'], 'expected_risk': 'critical', 'available_steps': ['pre_op_consent', 'blood_work', 'anesthesia_consult', 'vitals_check', 'infection_screening'], 'task_description': 'A patient underwent surgery but the pre-operative checklist shows gaps. The patient_events show what happened. Identify the single most critical missing step from available_steps and assess the risk level.', }, { 'case_id': 'cli_easy_002', 'completion_threshold': 0.65, 'max_steps': 4, 'done_conditions': {'min_actions': 1, 'required_sequence': ['detect_gap']}, 'patient_id': 'P102', 'patient_events': ['admission', 'diagnosis', 'medication_prescribed', 'discharge'], 'events': ['admission', 'diagnosis', 'medication_prescribed', 'discharge'], 'expected_missing_steps': ['allergy_check'], 'expected_risk': 'high', 'available_steps': ['allergy_check', 'follow_up_scheduled', 'lab_results_reviewed', 'pharmacist_review', 'patient_education'], 'task_description': 'Find the single missing safety check in this medication workflow. Patient was discharged after medication was prescribed without a critical safety step.', }, { 'case_id': 'cli_easy_003', 'completion_threshold': 0.65, 'max_steps': 4, 'done_conditions': {'min_actions': 1, 'required_sequence': ['detect_gap']}, 'patient_id': 'P103', 'patient_events': ['er_admission', 'triage', 'treatment', 'discharge'], 'events': ['er_admission', 'triage', 'treatment', 'discharge'], 'expected_missing_steps': ['insurance_verification'], 'expected_risk': 'medium', 'available_steps': ['insurance_verification', 'attending_consult', 'social_work_referral', 'discharge_summary', 'follow_up_appointment'], 'task_description': 'Find the missing administrative step in this ER discharge workflow.', }, { 'case_id': 'cli_easy_004', 'completion_threshold': 0.65, 'max_steps': 4, 'done_conditions': {'min_actions': 1, 'required_sequence': ['detect_gap']}, 'patient_id': 'P104', 'patient_events': ['admission', 'ct_scan_ordered', 'ct_scan_performed', 'diagnosis'], 'events': ['admission', 'ct_scan_ordered', 'ct_scan_performed', 'diagnosis'], 'expected_missing_steps': ['contrast_allergy_screen'], 'expected_risk': 'high', 'available_steps': ['contrast_allergy_screen', 'kidney_function_test', 'radiologist_review', 'patient_consent', 'iv_access_check'], 'task_description': 'Find the single missing safety step that should have occurred before this contrast CT scan was performed.', }, { 'case_id': 'cli_easy_005', 'completion_threshold': 0.65, 'max_steps': 4, 'done_conditions': {'min_actions': 1, 'required_sequence': ['detect_gap']}, 'patient_id': 'P105', 'patient_events': ['admission', 'blood_transfusion_ordered', 'transfusion_started'], 'events': ['admission', 'blood_transfusion_ordered', 'transfusion_started'], 'expected_missing_steps': ['blood_type_crossmatch'], 'expected_risk': 'critical', 'available_steps': ['blood_type_crossmatch', 'consent_form', 'vital_signs_baseline', 'hemoglobin_check', 'iv_gauge_verify'], 'task_description': 'A blood transfusion was started. Find the critical missing safety step that should have occurred before transfusion began.', }, ], 'cli_medium': [ { 'case_id': 'cli_medium_001', 'completion_threshold': 0.60, # FIX: was 0.75 'max_steps': 6, # FIX: required_sequence now requires BOTH actions — episode only ends when both done 'done_conditions': {'min_actions': 2, 'required_sequence': ['detect_gap', 'rank_issues']}, 'patient_id': 'P201', 'patient_events': ['admission', 'surgery_planned', 'insurance_denied', 'specialist_unavailable'], 'events': ['admission', 'surgery_planned', 'insurance_denied', 'specialist_unavailable'], 'expected_missing_steps': ['resolve_insurance', 'pre_op_consent', 'book_specialist'], 'expected_risk': 'critical', 'priority_order': ['resolve_insurance', 'pre_op_consent', 'book_specialist'], 'available_steps': ['resolve_insurance', 'pre_op_consent', 'book_specialist', 'schedule_surgery', 'anesthesia_consult'], 'dependency_graph': { 'schedule_surgery': ['resolve_insurance', 'pre_op_consent', 'book_specialist'], 'pre_op_consent': [], 'book_specialist': [], 'resolve_insurance': [], }, 'task_description': 'Multiple steps are missing in this surgical patient workflow. First detect ALL gaps (there are 3), then rank them by clinical priority. The priority order matters — insurance must be resolved before surgery can proceed.', }, { 'case_id': 'cli_medium_002', 'completion_threshold': 0.60, 'max_steps': 6, 'done_conditions': {'min_actions': 2, 'required_sequence': ['detect_gap', 'rank_issues']}, 'patient_id': 'P202', 'patient_events': ['er_admission', 'triage_level_2', 'medication_given'], 'events': ['er_admission', 'triage_level_2', 'medication_given'], 'expected_missing_steps': ['allergy_check', 'attending_notification', 'vital_signs_check'], 'expected_risk': 'high', 'priority_order': ['allergy_check', 'vital_signs_check', 'attending_notification'], 'available_steps': ['allergy_check', 'attending_notification', 'vital_signs_check', 'lab_order', 'discharge_planning'], 'dependency_graph': { 'allergy_check': [], 'vital_signs_check': [], 'attending_notification': [], 'lab_order': ['vital_signs_check'], }, 'task_description': 'Multiple safety steps were skipped in this ER case where medication was given. Detect all 3 gaps, then rank them by urgency. Allergy check is highest priority because medication was already given.', }, { 'case_id': 'cli_medium_003', 'completion_threshold': 0.60, 'max_steps': 6, 'done_conditions': {'min_actions': 2, 'required_sequence': ['detect_gap', 'rank_issues']}, 'patient_id': 'P203', 'patient_events': ['admission', 'chemo_ordered', 'chemo_started', 'adverse_reaction'], 'events': ['admission', 'chemo_ordered', 'chemo_started', 'adverse_reaction'], 'expected_missing_steps': ['baseline_labs', 'oncologist_approval', 'dose_verification'], 'expected_risk': 'critical', 'priority_order': ['oncologist_approval', 'dose_verification', 'baseline_labs'], 'available_steps': ['baseline_labs', 'oncologist_approval', 'dose_verification', 'pharmacy_review', 'patient_consent'], 'dependency_graph': { 'oncologist_approval': [], 'dose_verification': ['oncologist_approval'], 'baseline_labs': [], 'pharmacy_review': ['dose_verification'], }, 'task_description': 'Critical chemotherapy workflow violations caused an adverse reaction. Detect all 3 missing safety steps, then rank by urgency. Oncologist approval is highest priority — without it the other steps are meaningless.', }, ], 'cli_hard': [ { 'case_id': 'cli_hard_001', 'completion_threshold': 0.55, # FIX: was 0.70 — hard IS hard 'max_steps': 6, # FIX: required_sequence MUST include all 3 actions — episode runs full 3-step workflow 'done_conditions': {'min_actions': 3, 'required_sequence': ['detect_gap', 'rank_issues', 'order_steps']}, 'patient_id': 'P301', 'patient_events': ['surgery_planned', 'insurance_denied', 'pre_op_test_skipped'], 'events': ['surgery_planned', 'insurance_denied', 'pre_op_test_skipped'], 'expected_missing_steps': ['resolve_insurance', 'complete_pre_op', 'book_specialist', 'schedule_surgery'], 'expected_risk': 'critical', 'priority_order': ['resolve_insurance', 'complete_pre_op', 'book_specialist', 'schedule_surgery'], 'dependency_graph': { 'schedule_surgery': ['resolve_insurance', 'complete_pre_op', 'book_specialist'], 'complete_pre_op': ['resolve_insurance'], 'book_specialist': [], 'resolve_insurance': [], }, 'required_steps': ['resolve_insurance', 'complete_pre_op', 'book_specialist', 'schedule_surgery'], 'available_steps': ['resolve_insurance', 'complete_pre_op', 'book_specialist', 'schedule_surgery'], 'task_description': 'Complex surgical patient has 4 workflow failures. Detect ALL gaps, rank by priority, then plan a dependency-ordered recovery: resolve_insurance must come first (complete_pre_op depends on it), schedule_surgery must come last (depends on all others).', }, { 'case_id': 'cli_hard_002', 'completion_threshold': 0.55, 'max_steps': 6, 'done_conditions': {'min_actions': 3, 'required_sequence': ['detect_gap', 'rank_issues', 'order_steps']}, 'patient_id': 'P302', 'patient_events': ['cardiac_event', 'icu_admission', 'multiple_failures_detected'], 'events': ['cardiac_event', 'icu_admission', 'multiple_failures_detected'], 'expected_missing_steps': ['stabilize_vitals', 'cardiology_consult', 'imaging_ordered', 'medication_review', 'family_notification'], 'expected_risk': 'critical', 'priority_order': ['stabilize_vitals', 'cardiology_consult', 'imaging_ordered', 'medication_review', 'family_notification'], 'dependency_graph': { 'family_notification': ['stabilize_vitals'], 'medication_review': ['cardiology_consult', 'imaging_ordered'], 'imaging_ordered': ['stabilize_vitals'], 'cardiology_consult': ['stabilize_vitals'], 'stabilize_vitals': [], }, 'required_steps': ['stabilize_vitals', 'cardiology_consult', 'imaging_ordered', 'medication_review', 'family_notification'], 'available_steps': ['stabilize_vitals', 'cardiology_consult', 'imaging_ordered', 'medication_review', 'family_notification'], 'task_description': 'Complex cardiac emergency. stabilize_vitals must come FIRST (everything depends on it). medication_review needs BOTH cardiology_consult AND imaging_ordered. Plan a recovery sequence that respects ALL dependencies.', }, { 'case_id': 'cli_hard_003', 'completion_threshold': 0.55, 'max_steps': 6, 'done_conditions': {'min_actions': 3, 'required_sequence': ['detect_gap', 'rank_issues', 'order_steps']}, 'patient_id': 'P303', 'patient_events': ['chemo_ordered', 'lab_results_missing', 'dose_unclear', 'pharmacy_backlog'], 'events': ['chemo_ordered', 'lab_results_missing', 'dose_unclear', 'pharmacy_backlog'], 'expected_missing_steps': ['baseline_cbc', 'oncology_dose_verify', 'pharmacy_prep', 'nurse_admin_check'], 'expected_risk': 'critical', 'priority_order': ['baseline_cbc', 'oncology_dose_verify', 'pharmacy_prep', 'nurse_admin_check'], 'dependency_graph': { 'nurse_admin_check': ['pharmacy_prep'], 'pharmacy_prep': ['oncology_dose_verify', 'baseline_cbc'], 'oncology_dose_verify': ['baseline_cbc'], 'baseline_cbc': [], }, 'required_steps': ['baseline_cbc', 'oncology_dose_verify', 'pharmacy_prep', 'nurse_admin_check'], 'available_steps': ['baseline_cbc', 'oncology_dose_verify', 'pharmacy_prep', 'nurse_admin_check'], 'task_description': 'Chemotherapy workflow chaos. baseline_cbc must come first. oncology_dose_verify needs baseline_cbc. pharmacy_prep needs BOTH dose_verify AND baseline_cbc. nurse_admin_check needs pharmacy_prep. Detect, rank, then order correctly.', }, { 'case_id': 'cli_hard_004', 'completion_threshold': 0.55, 'max_steps': 6, 'done_conditions': {'min_actions': 3, 'required_sequence': ['detect_gap', 'rank_issues', 'order_steps']}, 'patient_id': 'P304', 'patient_events': ['transplant_scheduled', 'donor_typing_incomplete', 'immunosuppress_missing', 'consent_partial'], 'events': ['transplant_scheduled', 'donor_typing_incomplete', 'immunosuppress_missing', 'consent_partial'], 'expected_missing_steps': ['hla_typing', 'crossmatch', 'immunosuppress_order', 'full_consent', 'surgery_slot'], 'expected_risk': 'critical', 'priority_order': ['hla_typing', 'crossmatch', 'full_consent', 'immunosuppress_order', 'surgery_slot'], 'dependency_graph': { 'surgery_slot': ['hla_typing', 'crossmatch', 'full_consent', 'immunosuppress_order'], 'immunosuppress_order': ['crossmatch'], 'crossmatch': ['hla_typing'], 'full_consent': [], 'hla_typing': [], }, 'required_steps': ['hla_typing', 'crossmatch', 'immunosuppress_order', 'full_consent', 'surgery_slot'], 'available_steps': ['hla_typing', 'crossmatch', 'immunosuppress_order', 'full_consent', 'surgery_slot'], 'task_description': 'Organ transplant pre-op disaster. HLA typing must come first. Crossmatch needs HLA typing. Immunosuppression order needs crossmatch. Surgery booking requires ALL four prerequisites. One wrong order delays transplant.', }, { 'case_id': 'cli_hard_005', 'completion_threshold': 0.55, 'max_steps': 6, 'done_conditions': {'min_actions': 3, 'required_sequence': ['detect_gap', 'rank_issues', 'order_steps']}, 'patient_id': 'P305', 'patient_events': ['stroke_code', 'imaging_delayed', 'tpa_window_closing', 'neuro_unavailable'], 'events': ['stroke_code', 'imaging_delayed', 'tpa_window_closing', 'neuro_unavailable'], 'expected_missing_steps': ['ct_head', 'neuro_consult', 'tpa_eligibility', 'family_consent', 'icu_bed'], 'expected_risk': 'critical', 'priority_order': ['ct_head', 'tpa_eligibility', 'neuro_consult', 'family_consent', 'icu_bed'], 'dependency_graph': { 'icu_bed': ['tpa_eligibility'], 'family_consent': ['tpa_eligibility', 'neuro_consult'], 'neuro_consult': ['ct_head'], 'tpa_eligibility': ['ct_head'], 'ct_head': [], }, 'required_steps': ['ct_head', 'neuro_consult', 'tpa_eligibility', 'family_consent', 'icu_bed'], 'available_steps': ['ct_head', 'neuro_consult', 'tpa_eligibility', 'family_consent', 'icu_bed'], 'task_description': 'Acute stroke with closing tPA window. ct_head must come FIRST. Both tpa_eligibility and neuro_consult depend on ct_head. family_consent needs BOTH tpa_eligibility AND neuro_consult. icu_bed needs tpa_eligibility. Detect, rank, then order correctly.', }, ], }