EntropyEnv / server /datasets /clinical_cases.py
immortalindeed's picture
Major grading overhaul: difficulty multiplier, tighter scoring, mastery removal, precision penalties
72b3e8d
# server/datasets/clinical_cases.py
# Ground truth cases for Clinical Workflow Chaos Simulator tasks.
#
# FIXES APPLIED:
# 1. cli_easy: completion_threshold lowered to 0.65 (was 0.80)
# expected_missing_steps made more specific (not guessable from task description alone)
# 2. cli_medium: required_sequence now MUST include both detect_gap AND rank_issues
# Previously it ended at step 1 if completion_threshold was met by detect_gap alone
# 3. cli_hard: required_sequence MUST include all 3: detect_gap, rank_issues, order_steps
# This forces the full 3-step workflow to run every time
CLINICAL_CASES = {
'cli_easy': [
{
'case_id': 'cli_easy_001',
'completion_threshold': 0.65, # FIX: was 0.80
'max_steps': 4,
# FIX: required_sequence is the done trigger — episode ends only when detect_gap is done
'done_conditions': {'min_actions': 1, 'required_sequence': ['detect_gap']},
'patient_id': 'P101',
'patient_events': ['admission', 'surgery_scheduled', 'surgery_performed'],
'events': ['admission', 'surgery_scheduled', 'surgery_performed'],
# FIX: More specific — 'pre_op_consent' is the answer, not guessable from available_steps alone
'expected_missing_steps': ['pre_op_consent'],
'expected_risk': 'critical',
'available_steps': ['pre_op_consent', 'blood_work', 'anesthesia_consult', 'vitals_check', 'infection_screening'],
'task_description': 'A patient underwent surgery but the pre-operative checklist shows gaps. The patient_events show what happened. Identify the single most critical missing step from available_steps and assess the risk level.',
},
{
'case_id': 'cli_easy_002',
'completion_threshold': 0.65,
'max_steps': 4,
'done_conditions': {'min_actions': 1, 'required_sequence': ['detect_gap']},
'patient_id': 'P102',
'patient_events': ['admission', 'diagnosis', 'medication_prescribed', 'discharge'],
'events': ['admission', 'diagnosis', 'medication_prescribed', 'discharge'],
'expected_missing_steps': ['allergy_check'],
'expected_risk': 'high',
'available_steps': ['allergy_check', 'follow_up_scheduled', 'lab_results_reviewed', 'pharmacist_review', 'patient_education'],
'task_description': 'Find the single missing safety check in this medication workflow. Patient was discharged after medication was prescribed without a critical safety step.',
},
{
'case_id': 'cli_easy_003',
'completion_threshold': 0.65,
'max_steps': 4,
'done_conditions': {'min_actions': 1, 'required_sequence': ['detect_gap']},
'patient_id': 'P103',
'patient_events': ['er_admission', 'triage', 'treatment', 'discharge'],
'events': ['er_admission', 'triage', 'treatment', 'discharge'],
'expected_missing_steps': ['insurance_verification'],
'expected_risk': 'medium',
'available_steps': ['insurance_verification', 'attending_consult', 'social_work_referral', 'discharge_summary', 'follow_up_appointment'],
'task_description': 'Find the missing administrative step in this ER discharge workflow.',
},
{
'case_id': 'cli_easy_004',
'completion_threshold': 0.65,
'max_steps': 4,
'done_conditions': {'min_actions': 1, 'required_sequence': ['detect_gap']},
'patient_id': 'P104',
'patient_events': ['admission', 'ct_scan_ordered', 'ct_scan_performed', 'diagnosis'],
'events': ['admission', 'ct_scan_ordered', 'ct_scan_performed', 'diagnosis'],
'expected_missing_steps': ['contrast_allergy_screen'],
'expected_risk': 'high',
'available_steps': ['contrast_allergy_screen', 'kidney_function_test', 'radiologist_review', 'patient_consent', 'iv_access_check'],
'task_description': 'Find the single missing safety step that should have occurred before this contrast CT scan was performed.',
},
{
'case_id': 'cli_easy_005',
'completion_threshold': 0.65,
'max_steps': 4,
'done_conditions': {'min_actions': 1, 'required_sequence': ['detect_gap']},
'patient_id': 'P105',
'patient_events': ['admission', 'blood_transfusion_ordered', 'transfusion_started'],
'events': ['admission', 'blood_transfusion_ordered', 'transfusion_started'],
'expected_missing_steps': ['blood_type_crossmatch'],
'expected_risk': 'critical',
'available_steps': ['blood_type_crossmatch', 'consent_form', 'vital_signs_baseline', 'hemoglobin_check', 'iv_gauge_verify'],
'task_description': 'A blood transfusion was started. Find the critical missing safety step that should have occurred before transfusion began.',
},
],
'cli_medium': [
{
'case_id': 'cli_medium_001',
'completion_threshold': 0.60, # FIX: was 0.75
'max_steps': 6,
# FIX: required_sequence now requires BOTH actions — episode only ends when both done
'done_conditions': {'min_actions': 2, 'required_sequence': ['detect_gap', 'rank_issues']},
'patient_id': 'P201',
'patient_events': ['admission', 'surgery_planned', 'insurance_denied', 'specialist_unavailable'],
'events': ['admission', 'surgery_planned', 'insurance_denied', 'specialist_unavailable'],
'expected_missing_steps': ['resolve_insurance', 'pre_op_consent', 'book_specialist'],
'expected_risk': 'critical',
'priority_order': ['resolve_insurance', 'pre_op_consent', 'book_specialist'],
'available_steps': ['resolve_insurance', 'pre_op_consent', 'book_specialist', 'schedule_surgery', 'anesthesia_consult'],
'dependency_graph': {
'schedule_surgery': ['resolve_insurance', 'pre_op_consent', 'book_specialist'],
'pre_op_consent': [],
'book_specialist': [],
'resolve_insurance': [],
},
'task_description': 'Multiple steps are missing in this surgical patient workflow. First detect ALL gaps (there are 3), then rank them by clinical priority. The priority order matters — insurance must be resolved before surgery can proceed.',
},
{
'case_id': 'cli_medium_002',
'completion_threshold': 0.60,
'max_steps': 6,
'done_conditions': {'min_actions': 2, 'required_sequence': ['detect_gap', 'rank_issues']},
'patient_id': 'P202',
'patient_events': ['er_admission', 'triage_level_2', 'medication_given'],
'events': ['er_admission', 'triage_level_2', 'medication_given'],
'expected_missing_steps': ['allergy_check', 'attending_notification', 'vital_signs_check'],
'expected_risk': 'high',
'priority_order': ['allergy_check', 'vital_signs_check', 'attending_notification'],
'available_steps': ['allergy_check', 'attending_notification', 'vital_signs_check', 'lab_order', 'discharge_planning'],
'dependency_graph': {
'allergy_check': [],
'vital_signs_check': [],
'attending_notification': [],
'lab_order': ['vital_signs_check'],
},
'task_description': 'Multiple safety steps were skipped in this ER case where medication was given. Detect all 3 gaps, then rank them by urgency. Allergy check is highest priority because medication was already given.',
},
{
'case_id': 'cli_medium_003',
'completion_threshold': 0.60,
'max_steps': 6,
'done_conditions': {'min_actions': 2, 'required_sequence': ['detect_gap', 'rank_issues']},
'patient_id': 'P203',
'patient_events': ['admission', 'chemo_ordered', 'chemo_started', 'adverse_reaction'],
'events': ['admission', 'chemo_ordered', 'chemo_started', 'adverse_reaction'],
'expected_missing_steps': ['baseline_labs', 'oncologist_approval', 'dose_verification'],
'expected_risk': 'critical',
'priority_order': ['oncologist_approval', 'dose_verification', 'baseline_labs'],
'available_steps': ['baseline_labs', 'oncologist_approval', 'dose_verification', 'pharmacy_review', 'patient_consent'],
'dependency_graph': {
'oncologist_approval': [],
'dose_verification': ['oncologist_approval'],
'baseline_labs': [],
'pharmacy_review': ['dose_verification'],
},
'task_description': 'Critical chemotherapy workflow violations caused an adverse reaction. Detect all 3 missing safety steps, then rank by urgency. Oncologist approval is highest priority — without it the other steps are meaningless.',
},
],
'cli_hard': [
{
'case_id': 'cli_hard_001',
'completion_threshold': 0.55, # FIX: was 0.70 — hard IS hard
'max_steps': 6,
# FIX: required_sequence MUST include all 3 actions — episode runs full 3-step workflow
'done_conditions': {'min_actions': 3, 'required_sequence': ['detect_gap', 'rank_issues', 'order_steps']},
'patient_id': 'P301',
'patient_events': ['surgery_planned', 'insurance_denied', 'pre_op_test_skipped'],
'events': ['surgery_planned', 'insurance_denied', 'pre_op_test_skipped'],
'expected_missing_steps': ['resolve_insurance', 'complete_pre_op', 'book_specialist', 'schedule_surgery'],
'expected_risk': 'critical',
'priority_order': ['resolve_insurance', 'complete_pre_op', 'book_specialist', 'schedule_surgery'],
'dependency_graph': {
'schedule_surgery': ['resolve_insurance', 'complete_pre_op', 'book_specialist'],
'complete_pre_op': ['resolve_insurance'],
'book_specialist': [],
'resolve_insurance': [],
},
'required_steps': ['resolve_insurance', 'complete_pre_op', 'book_specialist', 'schedule_surgery'],
'available_steps': ['resolve_insurance', 'complete_pre_op', 'book_specialist', 'schedule_surgery'],
'task_description': 'Complex surgical patient has 4 workflow failures. Detect ALL gaps, rank by priority, then plan a dependency-ordered recovery: resolve_insurance must come first (complete_pre_op depends on it), schedule_surgery must come last (depends on all others).',
},
{
'case_id': 'cli_hard_002',
'completion_threshold': 0.55,
'max_steps': 6,
'done_conditions': {'min_actions': 3, 'required_sequence': ['detect_gap', 'rank_issues', 'order_steps']},
'patient_id': 'P302',
'patient_events': ['cardiac_event', 'icu_admission', 'multiple_failures_detected'],
'events': ['cardiac_event', 'icu_admission', 'multiple_failures_detected'],
'expected_missing_steps': ['stabilize_vitals', 'cardiology_consult', 'imaging_ordered', 'medication_review', 'family_notification'],
'expected_risk': 'critical',
'priority_order': ['stabilize_vitals', 'cardiology_consult', 'imaging_ordered', 'medication_review', 'family_notification'],
'dependency_graph': {
'family_notification': ['stabilize_vitals'],
'medication_review': ['cardiology_consult', 'imaging_ordered'],
'imaging_ordered': ['stabilize_vitals'],
'cardiology_consult': ['stabilize_vitals'],
'stabilize_vitals': [],
},
'required_steps': ['stabilize_vitals', 'cardiology_consult', 'imaging_ordered', 'medication_review', 'family_notification'],
'available_steps': ['stabilize_vitals', 'cardiology_consult', 'imaging_ordered', 'medication_review', 'family_notification'],
'task_description': 'Complex cardiac emergency. stabilize_vitals must come FIRST (everything depends on it). medication_review needs BOTH cardiology_consult AND imaging_ordered. Plan a recovery sequence that respects ALL dependencies.',
},
{
'case_id': 'cli_hard_003',
'completion_threshold': 0.55,
'max_steps': 6,
'done_conditions': {'min_actions': 3, 'required_sequence': ['detect_gap', 'rank_issues', 'order_steps']},
'patient_id': 'P303',
'patient_events': ['chemo_ordered', 'lab_results_missing', 'dose_unclear', 'pharmacy_backlog'],
'events': ['chemo_ordered', 'lab_results_missing', 'dose_unclear', 'pharmacy_backlog'],
'expected_missing_steps': ['baseline_cbc', 'oncology_dose_verify', 'pharmacy_prep', 'nurse_admin_check'],
'expected_risk': 'critical',
'priority_order': ['baseline_cbc', 'oncology_dose_verify', 'pharmacy_prep', 'nurse_admin_check'],
'dependency_graph': {
'nurse_admin_check': ['pharmacy_prep'],
'pharmacy_prep': ['oncology_dose_verify', 'baseline_cbc'],
'oncology_dose_verify': ['baseline_cbc'],
'baseline_cbc': [],
},
'required_steps': ['baseline_cbc', 'oncology_dose_verify', 'pharmacy_prep', 'nurse_admin_check'],
'available_steps': ['baseline_cbc', 'oncology_dose_verify', 'pharmacy_prep', 'nurse_admin_check'],
'task_description': 'Chemotherapy workflow chaos. baseline_cbc must come first. oncology_dose_verify needs baseline_cbc. pharmacy_prep needs BOTH dose_verify AND baseline_cbc. nurse_admin_check needs pharmacy_prep. Detect, rank, then order correctly.',
},
{
'case_id': 'cli_hard_004',
'completion_threshold': 0.55,
'max_steps': 6,
'done_conditions': {'min_actions': 3, 'required_sequence': ['detect_gap', 'rank_issues', 'order_steps']},
'patient_id': 'P304',
'patient_events': ['transplant_scheduled', 'donor_typing_incomplete', 'immunosuppress_missing', 'consent_partial'],
'events': ['transplant_scheduled', 'donor_typing_incomplete', 'immunosuppress_missing', 'consent_partial'],
'expected_missing_steps': ['hla_typing', 'crossmatch', 'immunosuppress_order', 'full_consent', 'surgery_slot'],
'expected_risk': 'critical',
'priority_order': ['hla_typing', 'crossmatch', 'full_consent', 'immunosuppress_order', 'surgery_slot'],
'dependency_graph': {
'surgery_slot': ['hla_typing', 'crossmatch', 'full_consent', 'immunosuppress_order'],
'immunosuppress_order': ['crossmatch'],
'crossmatch': ['hla_typing'],
'full_consent': [],
'hla_typing': [],
},
'required_steps': ['hla_typing', 'crossmatch', 'immunosuppress_order', 'full_consent', 'surgery_slot'],
'available_steps': ['hla_typing', 'crossmatch', 'immunosuppress_order', 'full_consent', 'surgery_slot'],
'task_description': 'Organ transplant pre-op disaster. HLA typing must come first. Crossmatch needs HLA typing. Immunosuppression order needs crossmatch. Surgery booking requires ALL four prerequisites. One wrong order delays transplant.',
},
{
'case_id': 'cli_hard_005',
'completion_threshold': 0.55,
'max_steps': 6,
'done_conditions': {'min_actions': 3, 'required_sequence': ['detect_gap', 'rank_issues', 'order_steps']},
'patient_id': 'P305',
'patient_events': ['stroke_code', 'imaging_delayed', 'tpa_window_closing', 'neuro_unavailable'],
'events': ['stroke_code', 'imaging_delayed', 'tpa_window_closing', 'neuro_unavailable'],
'expected_missing_steps': ['ct_head', 'neuro_consult', 'tpa_eligibility', 'family_consent', 'icu_bed'],
'expected_risk': 'critical',
'priority_order': ['ct_head', 'tpa_eligibility', 'neuro_consult', 'family_consent', 'icu_bed'],
'dependency_graph': {
'icu_bed': ['tpa_eligibility'],
'family_consent': ['tpa_eligibility', 'neuro_consult'],
'neuro_consult': ['ct_head'],
'tpa_eligibility': ['ct_head'],
'ct_head': [],
},
'required_steps': ['ct_head', 'neuro_consult', 'tpa_eligibility', 'family_consent', 'icu_bed'],
'available_steps': ['ct_head', 'neuro_consult', 'tpa_eligibility', 'family_consent', 'icu_bed'],
'task_description': 'Acute stroke with closing tPA window. ct_head must come FIRST. Both tpa_eligibility and neuro_consult depend on ct_head. family_consent needs BOTH tpa_eligibility AND neuro_consult. icu_bed needs tpa_eligibility. Detect, rank, then order correctly.',
},
],
}