Spaces:
Sleeping
Sleeping
| """ | |
| Task definitions for the Clinical Trial Protocol Review environment. | |
| Each task has: | |
| - A scenario (protocol + patient records + adverse events) | |
| - A deterministic grader returning a score in [0.0, 1.0] | |
| - A difficulty: easy | medium | hard | |
| """ | |
| from __future__ import annotations | |
| import re | |
| from typing import Any, Dict, List, Tuple | |
| # --------------------------------------------------------------------------- | |
| # Shared helpers | |
| # --------------------------------------------------------------------------- | |
| def _normalize(text: str) -> str: | |
| return text.lower().strip() | |
| def _mentions_any(text: str, keywords: List[str]) -> bool: | |
| t = _normalize(text) | |
| return any(k in t for k in keywords) | |
| def _finding_types_in(findings: List[Dict], ftype: str) -> List[Dict]: | |
| return [f for f in findings if _normalize(f.get("finding_type", "")) == _normalize(ftype)] | |
| # =========================================================================== | |
| # TASK 1 β Easy: Eligibility Violation Screening | |
| # =========================================================================== | |
| TASK1_PROTOCOL_SUMMARY = """ | |
| TRIAL: ONCO-2024-301 Phase II β Investigational drug XR-7 for metastatic colorectal cancer. | |
| SPONSOR: NovaPharma Inc. | |
| PHASE: II (Open-label, single-arm) | |
| INCLUSION CRITERIA (ALL must be met): | |
| IC-1: Age β₯ 18 and β€ 75 years | |
| IC-2: Histologically confirmed metastatic colorectal adenocarcinoma | |
| IC-3: ECOG Performance Status 0 or 1 | |
| IC-4: Adequate renal function: eGFR β₯ 60 mL/min/1.73mΒ² | |
| IC-5: No prior treatment with XR-7 or any KRAS-G12C inhibitor | |
| EXCLUSION CRITERIA (ANY disqualifies): | |
| EC-1: Active uncontrolled infection | |
| EC-2: Prior solid organ transplant | |
| EC-3: Pregnancy or breastfeeding | |
| EC-4: QTcF > 480 ms on screening ECG | |
| EC-5: Concurrent use of strong CYP3A4 inhibitors | |
| """ | |
| TASK1_PATIENT_RECORDS = [ | |
| { | |
| "subject_id": "PT-001", | |
| "age": 68, | |
| "diagnosis": "Metastatic colorectal adenocarcinoma", | |
| "ecog_ps": 1, | |
| "egfr": 72, | |
| "prior_kras_inhibitor": False, | |
| "active_infection": False, | |
| "organ_transplant": False, | |
| "pregnant": False, | |
| "qtcf_ms": 455, | |
| "cyp3a4_inhibitor": False, | |
| "notes": "Standard patient, enrolled without issues." | |
| }, | |
| { | |
| "subject_id": "PT-002", | |
| "age": 78, # VIOLATION: age > 75 (IC-1) | |
| "diagnosis": "Metastatic colorectal adenocarcinoma", | |
| "ecog_ps": 1, | |
| "egfr": 65, | |
| "prior_kras_inhibitor": False, | |
| "active_infection": False, | |
| "organ_transplant": False, | |
| "pregnant": False, | |
| "qtcf_ms": 460, | |
| "cyp3a4_inhibitor": False, | |
| "notes": "Enrolled 3 days after protocol amendment discussion (not yet approved)." | |
| }, | |
| { | |
| "subject_id": "PT-003", | |
| "age": 55, | |
| "diagnosis": "Metastatic colorectal adenocarcinoma", | |
| "ecog_ps": 2, # VIOLATION: ECOG PS = 2, must be 0 or 1 (IC-3) | |
| "egfr": 58, # VIOLATION: eGFR < 60 (IC-4) | |
| "prior_kras_inhibitor": False, | |
| "active_infection": False, | |
| "organ_transplant": False, | |
| "pregnant": False, | |
| "qtcf_ms": 470, | |
| "cyp3a4_inhibitor": False, | |
| "notes": "Patient self-reported feeling fatigued. Site enrolled after verbal PI approval." | |
| }, | |
| { | |
| "subject_id": "PT-004", | |
| "age": 61, | |
| "diagnosis": "Metastatic colorectal adenocarcinoma", | |
| "ecog_ps": 0, | |
| "egfr": 80, | |
| "prior_kras_inhibitor": True, # VIOLATION: prior KRAS-G12C inhibitor use (IC-5) | |
| "active_infection": False, | |
| "organ_transplant": False, | |
| "pregnant": False, | |
| "qtcf_ms": 440, | |
| "cyp3a4_inhibitor": False, | |
| "notes": "Patient had prior sotorasib therapy 14 months ago." | |
| }, | |
| { | |
| "subject_id": "PT-005", | |
| "age": 45, | |
| "diagnosis": "Metastatic colorectal adenocarcinoma", | |
| "ecog_ps": 1, | |
| "egfr": 90, | |
| "prior_kras_inhibitor": False, | |
| "active_infection": False, | |
| "organ_transplant": False, | |
| "pregnant": False, | |
| "qtcf_ms": 495, # VIOLATION: QTcF > 480 ms (EC-4) | |
| "cyp3a4_inhibitor": True, # VIOLATION: strong CYP3A4 inhibitor (EC-5) | |
| "notes": "Patient on ketoconazole for fungal infection. ECG borderline." | |
| }, | |
| ] | |
| # Ground truth: which subject_ids have violations and what kind | |
| TASK1_GROUND_TRUTH = { | |
| "PT-002": {"criteria": ["IC-1"], "severity": "critical"}, | |
| "PT-003": {"criteria": ["IC-3", "IC-4"], "severity": "critical"}, | |
| "PT-004": {"criteria": ["IC-5"], "severity": "critical"}, | |
| "PT-005": {"criteria": ["EC-4", "EC-5"], "severity": "critical"}, | |
| } | |
| def grade_task1(findings: List[Dict], rationale: str) -> Tuple[float, str]: | |
| """ | |
| Score Task 1: Eligibility Violation Screening | |
| Scoring: | |
| - 20 pts each for correctly identifying PT-002, PT-003, PT-004, PT-005 as having violations | |
| - PT-003 has 2 violations; partial credit (10 pts) if only one is found | |
| - PT-005 has 2 violations; partial credit (10 pts) if only one is found | |
| - PT-001 false-positive flag: -10 pts | |
| - Max: 100 pts β normalized to [0, 1] | |
| """ | |
| max_pts = 100 | |
| pts = 0 | |
| feedback_parts = [] | |
| # Collect all subject_ids mentioned in findings as violations | |
| violation_findings = [ | |
| f for f in findings | |
| if _normalize(f.get("finding_type", "")) in ( | |
| "eligibility_violation", "protocol_deviation" | |
| ) | |
| ] | |
| flagged_subjects = { | |
| str(f.get("subject_id", "")).strip().upper() | |
| for f in violation_findings | |
| } | |
| def desc_of(sid): | |
| descs = [ | |
| _normalize(f.get("description", "") + " " + f.get("recommendation", "")) | |
| for f in violation_findings | |
| if str(f.get("subject_id", "")).upper() == sid | |
| ] | |
| return " ".join(descs) | |
| # PT-002: age violation | |
| if "PT-002" in flagged_subjects: | |
| d = desc_of("PT-002") | |
| if _mentions_any(d, ["age", "78", "ic-1", "inclusion"]): | |
| pts += 20 | |
| feedback_parts.append("β PT-002 age violation correctly identified (+20)") | |
| else: | |
| pts += 10 | |
| feedback_parts.append("~ PT-002 flagged but violation type unclear (+10)") | |
| else: | |
| feedback_parts.append("β PT-002 age violation missed (0)") | |
| # PT-003: ECOG PS + eGFR | |
| if "PT-003" in flagged_subjects: | |
| d = desc_of("PT-003") | |
| found_ecog = _mentions_any(d, ["ecog", "performance", "ps", "ic-3"]) | |
| found_egfr = _mentions_any(d, ["egfr", "renal", "kidney", "58", "ic-4"]) | |
| if found_ecog and found_egfr: | |
| pts += 20 | |
| feedback_parts.append("β PT-003 both ECOG and eGFR violations found (+20)") | |
| elif found_ecog or found_egfr: | |
| pts += 10 | |
| feedback_parts.append("~ PT-003 only one of two violations found (+10)") | |
| else: | |
| pts += 8 | |
| feedback_parts.append("~ PT-003 flagged but violations not specified (+8)") | |
| else: | |
| feedback_parts.append("β PT-003 violations missed (0)") | |
| # PT-004: prior KRAS inhibitor | |
| if "PT-004" in flagged_subjects: | |
| d = desc_of("PT-004") | |
| if _mentions_any(d, ["kras", "sotorasib", "prior", "ic-5", "inhibitor"]): | |
| pts += 20 | |
| feedback_parts.append("β PT-004 prior KRAS inhibitor correctly identified (+20)") | |
| else: | |
| pts += 10 | |
| feedback_parts.append("~ PT-004 flagged but violation type unclear (+10)") | |
| else: | |
| feedback_parts.append("β PT-004 prior KRAS inhibitor violation missed (0)") | |
| # PT-005: QTcF + CYP3A4 | |
| if "PT-005" in flagged_subjects: | |
| d = desc_of("PT-005") | |
| found_qtcf = _mentions_any(d, ["qtcf", "qt", "ecg", "480", "495", "ec-4"]) | |
| found_cyp = _mentions_any(d, ["cyp", "ketoconazole", "ec-5", "inhibitor"]) | |
| if found_qtcf and found_cyp: | |
| pts += 20 | |
| feedback_parts.append("β PT-005 both QTcF and CYP3A4 violations found (+20)") | |
| elif found_qtcf or found_cyp: | |
| pts += 10 | |
| feedback_parts.append("~ PT-005 only one of two violations found (+10)") | |
| else: | |
| pts += 8 | |
| feedback_parts.append("~ PT-005 flagged but violations not specified (+8)") | |
| else: | |
| feedback_parts.append("β PT-005 violations missed (0)") | |
| # False positive penalty: PT-001 flagged | |
| if "PT-001" in flagged_subjects: | |
| pts -= 10 | |
| feedback_parts.append("β PT-001 incorrectly flagged as violation (-10)") | |
| else: | |
| pts += 20 | |
| feedback_parts.append("β PT-001 correctly not flagged as violation (+20)") | |
| score = max(0.01, min(0.99, pts / max_pts)) | |
| feedback = f"Task 1 Score: {pts}/{max_pts} ({score:.2f})\n" + "\n".join(feedback_parts) | |
| return score, feedback | |
| # =========================================================================== | |
| # TASK 2 β Medium: Adverse Event Severity Misclassification | |
| # =========================================================================== | |
| TASK2_PROTOCOL_SUMMARY = """ | |
| TRIAL: CARD-2024-112 Phase III β Drug BX-9 for heart failure (NYHA Class IIβIII). | |
| SPONSOR: CardioVita Therapeutics | |
| PHASE: III (Randomized, double-blind, placebo-controlled) | |
| ADVERSE EVENT CLASSIFICATION (per ICH E2A): | |
| Grade 1 (Mild) β Asymptomatic/mild symptoms; no intervention needed | |
| Grade 2 (Moderate)β Minimal intervention indicated; limits instrumental ADL | |
| Grade 3 (Severe) β Medically significant but not immediately life-threatening; | |
| hospitalization or prolongation of hospitalization | |
| Grade 4 (Life-threatening) β Urgent intervention indicated | |
| Grade 5 (Fatal) β Death related to AE | |
| SERIOUS ADVERSE EVENTS (SAEs) must be reported to sponsor within 24 hours if: | |
| - Any Grade 3β5 event | |
| - Any hospitalization (even Grade 2) | |
| - Any event deemed life-threatening by the investigator | |
| - Congenital anomaly/birth defect | |
| - Any other medically important event | |
| PROTOCOL NOTE: Dyspnea requiring hospitalization is a pre-specified AE of Special Interest (AESI). | |
| """ | |
| TASK2_ADVERSE_EVENTS = [ | |
| { | |
| "ae_id": "AE-001", | |
| "subject_id": "PT-201", | |
| "event": "Headache", | |
| "site_reported_grade": 1, | |
| "hospitalized": False, | |
| "duration_days": 2, | |
| "intervention": "Acetaminophen PRN", | |
| "notes": "Resolved spontaneously.", | |
| "correct_grade": 1, | |
| "is_sae": False, | |
| "misclassified": False, | |
| }, | |
| { | |
| "ae_id": "AE-002", | |
| "subject_id": "PT-202", | |
| "event": "Severe dyspnea with hospitalization for 3 days", | |
| "site_reported_grade": 2, # MISCLASSIFIED β should be Grade 3 SAE + AESI | |
| "hospitalized": True, | |
| "duration_days": 3, | |
| "intervention": "IV diuretics, oxygen supplementation", | |
| "notes": "Patient hospitalized. Site reported as Grade 2 and did not file SAE report.", | |
| "correct_grade": 3, | |
| "is_sae": True, | |
| "misclassified": True, | |
| "issue": "Grade downgraded; SAE not filed; AESI not flagged", | |
| }, | |
| { | |
| "ae_id": "AE-003", | |
| "subject_id": "PT-203", | |
| "event": "Mild peripheral edema", | |
| "site_reported_grade": 2, | |
| "hospitalized": False, | |
| "duration_days": 14, | |
| "intervention": "Dose reduction", | |
| "notes": "Managed with dose reduction. Ongoing.", | |
| "correct_grade": 2, | |
| "is_sae": False, | |
| "misclassified": False, | |
| }, | |
| { | |
| "ae_id": "AE-004", | |
| "subject_id": "PT-204", | |
| "event": "Ventricular tachycardia β 30-second episode, asymptomatic, resolved spontaneously", | |
| "site_reported_grade": 1, # MISCLASSIFIED β VT is Grade 3 minimum; potentially Grade 4 | |
| "hospitalized": False, | |
| "duration_days": 0, | |
| "intervention": "None; monitored", | |
| "notes": "Recorded on Holter monitor. Patient unaware. Site classified as Grade 1.", | |
| "correct_grade": 3, | |
| "is_sae": True, | |
| "misclassified": True, | |
| "issue": "VT always β₯ Grade 3; SAE report missing; cardiac safety signal", | |
| }, | |
| { | |
| "ae_id": "AE-005", | |
| "subject_id": "PT-205", | |
| "event": "Acute kidney injury β creatinine 3.2x ULN", | |
| "site_reported_grade": 2, # MISCLASSIFIED β creatinine >3x ULN = Grade 3 | |
| "hospitalized": True, | |
| "duration_days": 5, | |
| "intervention": "IV fluids, nephrology consult, drug held", | |
| "notes": "Creatinine peaked at 3.2x ULN. Patient hospitalized. Site reported Grade 2.", | |
| "correct_grade": 3, | |
| "is_sae": True, | |
| "misclassified": True, | |
| "issue": "Creatinine >3x ULN = Grade 3 per CTCAE; hospitalization = SAE", | |
| }, | |
| { | |
| "ae_id": "AE-006", | |
| "subject_id": "PT-206", | |
| "event": "Nausea with vomiting, unable to maintain oral intake for 24h", | |
| "site_reported_grade": 3, | |
| "hospitalized": True, | |
| "duration_days": 2, | |
| "intervention": "IV antiemetics, IV hydration", | |
| "notes": "SAE filed. Resolved with treatment.", | |
| "correct_grade": 3, | |
| "is_sae": True, | |
| "misclassified": False, | |
| }, | |
| { | |
| "ae_id": "AE-007", | |
| "subject_id": "PT-207", | |
| "event": "Fatigue β patient reports feeling tired but continues all daily activities", | |
| "site_reported_grade": 3, # MISCLASSIFIED β should be Grade 1 (continues ADL) | |
| "hospitalized": False, | |
| "duration_days": 7, | |
| "intervention": "None", | |
| "notes": "Site over-reported as Grade 3. No hospitalization, no intervention.", | |
| "correct_grade": 1, | |
| "is_sae": False, | |
| "misclassified": True, | |
| "issue": "Fatigue with no ADL limitation = Grade 1; Grade 3 is over-reporting", | |
| }, | |
| ] | |
| TASK2_MISCLASSIFIED_AE_IDS = {"AE-002", "AE-004", "AE-005", "AE-007"} | |
| def grade_task2(findings: List[Dict], rationale: str) -> Tuple[float, str]: | |
| """ | |
| Score Task 2: Adverse Event Severity Misclassification | |
| Max 100 pts: | |
| - 20 pts each for correctly identifying AE-002, AE-004, AE-005, AE-007 as misclassified | |
| - Must identify BOTH the wrong grade AND suggest correct grade for full credit (10 each) | |
| - AE-004 and AE-002: additional 5 pts for identifying SAE reporting failure | |
| - False positive (flagging AE-001, AE-003, AE-006): -8 pts each | |
| """ | |
| max_pts = 100 | |
| pts = 0 | |
| feedback_parts = [] | |
| ae_findings = [ | |
| f for f in findings | |
| if _normalize(f.get("finding_type", "")) in ( | |
| "adverse_event", "protocol_deviation", "safety_concern" | |
| ) | |
| ] | |
| def get_ae_descs(ae_id): | |
| descs = [] | |
| for f in ae_findings: | |
| desc = _normalize(f.get("description", "") + " " + f.get("recommendation", "")) | |
| subj = str(f.get("subject_id", "")).strip() | |
| # Match by AE ID or by subject ID | |
| if ae_id.lower() in desc or ae_id.replace("AE-", "ae-") in desc: | |
| descs.append(desc) | |
| # Also match by subject_id mapping | |
| ae_to_subj = { | |
| "AE-002": "PT-202", "AE-004": "PT-204", | |
| "AE-005": "PT-205", "AE-007": "PT-207", | |
| "AE-001": "PT-201", "AE-003": "PT-203", "AE-006": "PT-206", | |
| } | |
| sid = ae_to_subj.get(ae_id, "") | |
| for f in ae_findings: | |
| if str(f.get("subject_id", "")).strip().upper() == sid: | |
| descs.append(_normalize(f.get("description", "") + " " + f.get("recommendation", ""))) | |
| return " ".join(descs) | |
| def ae_flagged(ae_id): | |
| d = get_ae_descs(ae_id) | |
| return len(d) > 0 | |
| # AE-002: dyspnea β Grade 2β3, SAE, AESI | |
| if ae_flagged("AE-002"): | |
| d = get_ae_descs("AE-002") | |
| grade_correct = _mentions_any(d, ["grade 3", "grade-3", "grade3", "3", "sae", "serious"]) | |
| aesi_flag = _mentions_any(d, ["aesi", "special interest", "dyspnea", "sae", "hospitali"]) | |
| pts += 10 if grade_correct else 5 | |
| pts += 5 if aesi_flag else 0 | |
| feedback_parts.append(f"β AE-002 flagged {'with grade+SAE correction' if grade_correct and aesi_flag else 'partially'} (+{10 if grade_correct else 5}{'+5 AESI' if aesi_flag else ''})") | |
| else: | |
| feedback_parts.append("β AE-002 dyspnea misclassification missed (0)") | |
| # AE-004: VT β Grade 1β3, SAE | |
| if ae_flagged("AE-004"): | |
| d = get_ae_descs("AE-004") | |
| grade_correct = _mentions_any(d, ["grade 3", "grade-3", "3", "sae", "serious", "life"]) | |
| cardiac_signal = _mentions_any(d, ["cardiac", "ventricular", "vt", "safety", "sae"]) | |
| pts += 12 if grade_correct else 6 | |
| pts += 5 if cardiac_signal else 0 | |
| feedback_parts.append(f"β AE-004 VT flagged {'with cardiac SAE recognition' if cardiac_signal else 'partially'} (+{12 if grade_correct else 6}{'+5' if cardiac_signal else ''})") | |
| else: | |
| feedback_parts.append("β AE-004 VT misclassification missed (0)") | |
| # AE-005: AKI β Grade 2β3 | |
| if ae_flagged("AE-005"): | |
| d = get_ae_descs("AE-005") | |
| grade_correct = _mentions_any(d, ["grade 3", "3", "ctcae", "3x", "uln"]) | |
| pts += 15 if grade_correct else 8 | |
| feedback_parts.append(f"β AE-005 AKI flagged {'with CTCAE reference' if grade_correct else 'partially'} (+{15 if grade_correct else 8})") | |
| else: | |
| feedback_parts.append("β AE-005 AKI misclassification missed (0)") | |
| # AE-007: fatigue β Grade 3β1 (over-reporting) | |
| if ae_flagged("AE-007"): | |
| d = get_ae_descs("AE-007") | |
| grade_correct = _mentions_any(d, ["grade 1", "1", "over", "over-report", "adl", "downgrade"]) | |
| pts += 15 if grade_correct else 8 | |
| feedback_parts.append(f"β AE-007 fatigue over-reporting flagged {'correctly' if grade_correct else 'partially'} (+{15 if grade_correct else 8})") | |
| else: | |
| feedback_parts.append("β AE-007 fatigue over-reporting missed (0)") | |
| # Bonus for clean rationale covering systemic issue | |
| if _mentions_any(rationale, ["sae", "reporting", "systematic", "site training", "pattern"]): | |
| pts += 5 | |
| feedback_parts.append("β Rationale identifies systemic reporting issues (+5)") | |
| # Correct AEs not flagged (true negatives baseline) | |
| correct_ae_ids = ["AE-001", "AE-003", "AE-006"] | |
| fp_penalty = 0 | |
| for ae_id in correct_ae_ids: | |
| if ae_flagged(ae_id): | |
| pts -= 8 | |
| fp_penalty += 8 | |
| feedback_parts.append(f"β {ae_id} incorrectly flagged as misclassified (-8)") | |
| if fp_penalty == 0: | |
| pts += 33 # baseline for correctly leaving good AEs alone | |
| feedback_parts.append("β No false positives on correctly classified AEs (+33)") | |
| score = max(0.01, min(0.99, pts / max_pts)) | |
| feedback = f"Task 2 Score: {pts}/{max_pts} ({score:.2f})\n" + "\n".join(feedback_parts) | |
| return score, feedback | |
| # =========================================================================== | |
| # TASK 3 β Hard: Comprehensive Protocol Amendment Review | |
| # =========================================================================== | |
| TASK3_PROTOCOL_TEXT = """ | |
| PROTOCOL: NEURO-2024-450 Phase II β Drug NX-12 for treatment-resistant depression (TRD) | |
| VERSION: 1.2 (Under Review for Amendment) | |
| SPONSOR: MindBridge Pharmaceuticals | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| SECTION 3: STUDY DESIGN | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| Open-label, single-arm study. No control arm. Duration: 12 weeks active treatment. | |
| Primary endpoint: Change from baseline in MADRS score at Week 8. | |
| Secondary endpoint: Response rate (β₯50% MADRS reduction) at Week 12. | |
| PROPOSED CHANGE (Amendment A): | |
| Extend treatment from 12 β 24 weeks with no change to primary endpoint timing. | |
| PROPOSED CHANGE (Amendment B): | |
| Add an optional open-label extension (OLE) of up to 52 weeks for responders. | |
| Consent for OLE to be obtained at Week 12 visit (same day as eligibility assessment). | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| SECTION 4: PATIENT POPULATION | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| INCLUSION: | |
| - Age 22β65 | |
| - DSM-5 diagnosis of MDD, current episode resistant to β₯2 adequate antidepressant trials | |
| - MADRS score β₯28 at screening AND baseline (within 3 days of screening) | |
| - Capable of providing informed consent | |
| EXCLUSION: | |
| - Active suicidal ideation with plan or intent (C-SSRS score β₯4) | |
| - Current or recent (within 6 months) substance use disorder | |
| - Pregnancy or intent to become pregnant | |
| - Current use of MAOIs | |
| PROPOSED CHANGE (Amendment C): | |
| Relax exclusion EC-CSSRS: Allow enrollment of patients with C-SSRS score 4 | |
| (active suicidal ideation WITH plan, WITHOUT intent) if monitored weekly. | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| SECTION 5: SAFETY MONITORING | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| Data Safety Monitoring Board (DSMB): Annual review only. | |
| Suicidality monitoring: C-SSRS at baseline and Week 8. | |
| Blood pressure monitoring: At screening, Week 4, Week 12. | |
| NX-12 KNOWN RISKS (from Phase I): | |
| - Dose-dependent blood pressure elevation (mean +18 mmHg systolic at max dose) | |
| - Transient dissociative symptoms in 23% of patients at Weeks 1-2 | |
| - Two cases of hypertensive urgency in Phase I (n=42) | |
| PROPOSED CHANGE (Amendment D): | |
| Increase DSMB review from annual to quarterly. | |
| Add BP monitoring at Weeks 2 and 8. | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| SECTION 6: STATISTICAL ANALYSIS PLAN | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| Sample size: N=45 (powered for 80% power to detect 7-point MADRS change) | |
| Analysis population: Per-protocol (PP) only. | |
| Missing data: Last observation carried forward (LOCF). | |
| Interim analysis: None planned. | |
| PROPOSED CHANGE (Amendment E): | |
| Add a single interim analysis at 50% enrollment for futility only. | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| SECTION 7: INFORMED CONSENT | |
| βββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| Current consent process: Written informed consent obtained at screening visit. | |
| Consent document revision required for all amendments. | |
| PROPOSED CHANGE (Amendment F): | |
| Allow verbal consent for the OLE (Amendment B) to reduce patient burden, | |
| with written consent waived if patient signs OLE enrollment form. | |
| """ | |
| # Ground truth: what a qualified medical monitor / regulatory reviewer would flag | |
| TASK3_GROUND_TRUTH_ISSUES = { | |
| "amendment_c_safety": { | |
| "description": "Amendment C allows suicidal patients (C-SSRSβ₯4) with plan β unacceptable in TRD trial without intensive monitoring infrastructure beyond 'weekly check'", | |
| "severity": "critical", | |
| "keywords": ["suicid", "c-ssrs", "amendment c", "safety", "vulnerable", "risk"], | |
| "weight": 20, | |
| }, | |
| "amendment_b_consent_timing": { | |
| "description": "OLE consent at same visit as eligibility assessment creates undue influence / coercion risk", | |
| "severity": "major", | |
| "keywords": ["ole", "consent", "same day", "coercio", "undue", "extension", "amendment b"], | |
| "weight": 15, | |
| }, | |
| "amendment_f_verbal_consent": { | |
| "description": "Verbal consent for OLE is insufficient per ICH E6(R2) GCP β written consent required for all interventional trial participation", | |
| "severity": "critical", | |
| "keywords": ["verbal", "consent", "gcp", "ich", "written", "waiv", "amendment f"], | |
| "weight": 15, | |
| }, | |
| "suicidality_monitoring_gap": { | |
| "description": "C-SSRS only at baseline and Week 8 is insufficient for TRD + suicidal risk population; should be monthly minimum", | |
| "severity": "major", | |
| "keywords": ["c-ssrs", "suicid", "monitoring", "frequen", "weekly", "monthly", "trd"], | |
| "weight": 15, | |
| }, | |
| "bp_monitoring_still_insufficient": { | |
| "description": "Even with Amendment D, BP monitoring is missing at Weeks 1, 6, 16, 20 β given +18 mmHg known risk, monthly monitoring minimum is needed during OLE", | |
| "severity": "major", | |
| "keywords": ["blood pressure", "bp", "hypertens", "monitoring", "amendment d", "ole"], | |
| "weight": 10, | |
| }, | |
| "no_control_arm_bias": { | |
| "description": "Open-label single-arm design in TRD (high placebo response ~30-40%) without randomized comparator limits interpretability; Amendment A extension compounds this", | |
| "severity": "major", | |
| "keywords": ["open-label", "control", "placebo", "bias", "single-arm", "interpretab"], | |
| "weight": 10, | |
| }, | |
| "locf_missing_data": { | |
| "description": "LOCF is discouraged by FDA/EMA for psychiatric trials; multiple imputation or mixed-model repeated measures preferred", | |
| "severity": "minor", | |
| "keywords": ["locf", "missing data", "imputation", "fda", "ema", "mmrm"], | |
| "weight": 8, | |
| }, | |
| "interim_analysis_alpha_spend": { | |
| "description": "Amendment E adds futility interim but no alpha-spending rule or stopping boundaries specified β protocol gap", | |
| "severity": "minor", | |
| "keywords": ["interim", "alpha", "spend", "futility", "boundar", "amendment e"], | |
| "weight": 7, | |
| }, | |
| } | |
| def grade_task3(findings: List[Dict], rationale: str) -> Tuple[float, str]: | |
| """ | |
| Score Task 3: Comprehensive Protocol Amendment Review | |
| Scores based on how many ground-truth issues are identified, | |
| weighted by severity and quality of the finding description. | |
| Max ~100 pts. | |
| """ | |
| max_pts = 100 | |
| pts = 0 | |
| feedback_parts = [] | |
| all_text = rationale | |
| for f in findings: | |
| all_text += " " + f.get("description", "") + " " + f.get("recommendation", "") | |
| all_text = _normalize(all_text) | |
| for issue_key, issue in TASK3_GROUND_TRUTH_ISSUES.items(): | |
| keywords = issue["keywords"] | |
| weight = issue["weight"] | |
| hits = sum(1 for k in keywords if k in all_text) | |
| hit_rate = hits / len(keywords) | |
| if hit_rate >= 0.5: | |
| earned = int(weight * min(1.0, hit_rate + 0.2)) | |
| pts += earned | |
| feedback_parts.append(f"β [{issue['severity'].upper()}] {issue_key}: identified ({hits}/{len(keywords)} keywords, +{earned})") | |
| elif hit_rate >= 0.25: | |
| earned = int(weight * 0.4) | |
| pts += earned | |
| feedback_parts.append(f"~ [{issue['severity'].upper()}] {issue_key}: partially identified (+{earned})") | |
| else: | |
| feedback_parts.append(f"β [{issue['severity'].upper()}] {issue_key}: missed (0/{weight})") | |
| # Bonus for structured, actionable recommendations | |
| n_recommendations = sum( | |
| 1 for f in findings | |
| if len(f.get("recommendation", "")) > 30 | |
| ) | |
| if n_recommendations >= 5: | |
| pts += 5 | |
| feedback_parts.append(f"β {n_recommendations} actionable recommendations provided (+5)") | |
| elif n_recommendations >= 3: | |
| pts += 3 | |
| feedback_parts.append(f"~ {n_recommendations} recommendations provided (+3)") | |
| score = max(0.01, min(0.99, pts / max_pts)) | |
| feedback = f"Task 3 Score: {pts}/{max_pts} ({score:.2f})\n" + "\n".join(feedback_parts) | |
| return score, feedback | |
| # =========================================================================== | |
| # Task registry | |
| # =========================================================================== | |
| TASKS = { | |
| "eligibility_screening": { | |
| "name": "eligibility_screening", | |
| "difficulty": "easy", | |
| "description": "Identify protocol eligibility violations across 5 patient records for ONCO-2024-301.", | |
| "max_steps": 3, | |
| "protocol_summary": TASK1_PROTOCOL_SUMMARY, | |
| "patient_records": TASK1_PATIENT_RECORDS, | |
| "adverse_events": [], | |
| "protocol_text": "", | |
| "grader": grade_task1, | |
| }, | |
| "ae_classification": { | |
| "name": "ae_classification", | |
| "difficulty": "medium", | |
| "description": "Review 7 adverse events in CARD-2024-112 and identify all misclassifications.", | |
| "max_steps": 4, | |
| "protocol_summary": TASK2_PROTOCOL_SUMMARY, | |
| "patient_records": [], | |
| "adverse_events": TASK2_ADVERSE_EVENTS, | |
| "protocol_text": "", | |
| "grader": grade_task2, | |
| }, | |
| "protocol_amendment_review": { | |
| "name": "protocol_amendment_review", | |
| "difficulty": "hard", | |
| "description": "Comprehensively review 6 proposed protocol amendments for NEURO-2024-450 and produce structured findings.", | |
| "max_steps": 5, | |
| "protocol_summary": "NEURO-2024-450 Phase II β Drug NX-12 for treatment-resistant depression (TRD). 6 amendments under review.", | |
| "patient_records": [], | |
| "adverse_events": [], | |
| "protocol_text": TASK3_PROTOCOL_TEXT, | |
| "grader": grade_task3, | |
| }, | |
| } | |