Spaces:

DocUA
/

MedGemma_StructCore

Sleeping

App Files Files Community

DocUA commited on Feb 13

Commit

7a12bf7

0 Parent(s):

Initial StructCore Space

Browse files

Files changed (23) hide show

Analysis_Readmission/config/scoring_rules.json +1462 -0
Analysis_Readmission/config/scoring_rules_v3_20260207.json +1462 -0
Analysis_Readmission/config/snomed_problem_groups.json +1584 -0
Analysis_Readmission/config/symptom_urgency_groups.json +206 -0
Analysis_Readmission/readmission_risk_engine.py +1209 -0
README.md +70 -0
app.py +17 -0
apps/__init__.py +1 -0
apps/challenge_demo/README.md +84 -0
apps/challenge_demo/__init__.py +1 -0
apps/challenge_demo/app_challenge.py +275 -0
apps/challenge_demo/config/evidence_claims.json +44 -0
apps/challenge_demo/data/synthetic_cases.json +20 -0
apps/challenge_demo/hf_zero/README.md +70 -0
apps/challenge_demo/hf_zero/app.py +17 -0
apps/challenge_demo/hf_zero/requirements.txt +2 -0
apps/challenge_demo/services/__init__.py +1 -0
apps/challenge_demo/services/case_library.py +42 -0
apps/challenge_demo/services/evidence_service.py +24 -0
apps/challenge_demo/services/structcore_service.py +494 -0
kvt_utils.py +1141 -0
requirements.txt +2 -0
scripts/run_two_stage_structured_pipeline.py +0 -0

Analysis_Readmission/config/scoring_rules.json ADDED Viewed

	@@ -0,0 +1,1462 @@

+{
+  "_meta": {
+    "version": "1.0",
+    "description": "Complete scoring rules for all 9 ONTOLOGY clusters. Each cluster has range-based or categorical scoring with evidence-based weights.",
+    "max_theoretical_scores": {
+      "DEMOGRAPHICS": 10,
+      "VITALS": 25,
+      "LABS": 30,
+      "PROBLEMS": 40,
+      "SYMPTOMS": 15,
+      "MEDICATIONS": 15,
+      "PROCEDURES": 15,
+      "UTILIZATION": 20,
+      "DISPOSITION": 15,
+      "INTERACTIONS": 30,
+      "TOTAL_THEORETICAL_MAX": 215
+    },
+    "calibration": {
+      "description": "Logistic transform: P = 1 / (1 + exp(-(alpha + beta * score)))",
+      "alpha": -2.3475,
+      "beta": 0.017,
+      "baseline_readmission_rate": 0.2069,
+      "notes": "Calibrated on 203 MIMIC-IV admissions (8 clusters, FHIR labels, SNOMED v2). AUC=0.5555."
+    },
+    "risk_categories": [
+      {
+        "name": "Low",
+        "score_min": 0,
+        "score_max": 19,
+        "probability_range": "5-12%",
+        "color": "green"
+      },
+      {
+        "name": "Medium",
+        "score_min": 20,
+        "score_max": 39,
+        "probability_range": "13-28%",
+        "color": "yellow"
+      },
+      {
+        "name": "High",
+        "score_min": 40,
+        "score_max": 59,
+        "probability_range": "29-53%",
+        "color": "orange"
+      },
+      {
+        "name": "Critical",
+        "score_min": 60,
+        "score_max": 999,
+        "probability_range": "54%+",
+        "color": "red"
+      }
+    ]
+  },
+  "DEMOGRAPHICS": {
+    "max_score": 10,
+    "keywords": {
+      "Age": {
+        "type": "range",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 39,
+            "score": 0,
+            "label": "Young adult"
+          },
+          {
+            "min": 40,
+            "max": 54,
+            "score": 1,
+            "label": "Middle age"
+          },
+          {
+            "min": 55,
+            "max": 64,
+            "score": 2,
+            "label": "Pre-elderly"
+          },
+          {
+            "min": 65,
+            "max": 74,
+            "score": 4,
+            "label": "Young elderly"
+          },
+          {
+            "min": 75,
+            "max": 84,
+            "score": 6,
+            "label": "Old elderly"
+          },
+          {
+            "min": 85,
+            "max": 999,
+            "score": 8,
+            "label": "Very old"
+          }
+        ],
+        "missing_score": 2,
+        "evidence": "Age >65 is consistently associated with higher readmission (OR 1.3-1.8)"
+      },
+      "Sex": {
+        "type": "categorical",
+        "values": {
+          "male": 1,
+          "female": 0
+        },
+        "missing_score": 0,
+        "evidence": "Male sex associated with slightly higher 30-day readmission (OR ~1.1)"
+      }
+    }
+  },
+  "VITALS": {
+    "max_score": 25,
+    "keywords": {
+      "Heart Rate": {
+        "type": "range",
+        "unit": "bpm",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 49,
+            "score": 3,
+            "label": "Severe bradycardia"
+          },
+          {
+            "min": 50,
+            "max": 59,
+            "score": 1,
+            "label": "Mild bradycardia"
+          },
+          {
+            "min": 60,
+            "max": 100,
+            "score": 0,
+            "label": "Normal"
+          },
+          {
+            "min": 101,
+            "max": 110,
+            "score": 1,
+            "label": "Mild tachycardia"
+          },
+          {
+            "min": 111,
+            "max": 130,
+            "score": 3,
+            "label": "Tachycardia"
+          },
+          {
+            "min": 131,
+            "max": 999,
+            "score": 5,
+            "label": "Severe tachycardia"
+          }
+        ],
+        "plausibility": {
+          "min": 30,
+          "max": 220
+        }
+      },
+      "Systolic BP": {
+        "type": "range",
+        "unit": "mmHg",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 89,
+            "score": 5,
+            "label": "Hypotension"
+          },
+          {
+            "min": 90,
+            "max": 99,
+            "score": 3,
+            "label": "Borderline low"
+          },
+          {
+            "min": 100,
+            "max": 139,
+            "score": 0,
+            "label": "Normal"
+          },
+          {
+            "min": 140,
+            "max": 159,
+            "score": 1,
+            "label": "Stage 1 HTN"
+          },
+          {
+            "min": 160,
+            "max": 179,
+            "score": 2,
+            "label": "Stage 2 HTN"
+          },
+          {
+            "min": 180,
+            "max": 999,
+            "score": 4,
+            "label": "Hypertensive urgency"
+          }
+        ],
+        "plausibility": {
+          "min": 50,
+          "max": 260
+        }
+      },
+      "Diastolic BP": {
+        "type": "range",
+        "unit": "mmHg",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 59,
+            "score": 2,
+            "label": "Low diastolic"
+          },
+          {
+            "min": 60,
+            "max": 89,
+            "score": 0,
+            "label": "Normal"
+          },
+          {
+            "min": 90,
+            "max": 99,
+            "score": 1,
+            "label": "Elevated"
+          },
+          {
+            "min": 100,
+            "max": 999,
+            "score": 3,
+            "label": "High diastolic"
+          }
+        ],
+        "plausibility": {
+          "min": 20,
+          "max": 160
+        }
+      },
+      "Respiratory Rate": {
+        "type": "range",
+        "unit": "breaths/min",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 9,
+            "score": 4,
+            "label": "Bradypnea"
+          },
+          {
+            "min": 10,
+            "max": 11,
+            "score": 2,
+            "label": "Low normal"
+          },
+          {
+            "min": 12,
+            "max": 20,
+            "score": 0,
+            "label": "Normal"
+          },
+          {
+            "min": 21,
+            "max": 24,
+            "score": 2,
+            "label": "Mild tachypnea"
+          },
+          {
+            "min": 25,
+            "max": 30,
+            "score": 4,
+            "label": "Tachypnea"
+          },
+          {
+            "min": 31,
+            "max": 999,
+            "score": 6,
+            "label": "Severe tachypnea"
+          }
+        ],
+        "plausibility": {
+          "min": 5,
+          "max": 60
+        }
+      },
+      "Temperature": {
+        "type": "range",
+        "unit": "°F",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 96.7,
+            "score": 3,
+            "label": "Hypothermia"
+          },
+          {
+            "min": 96.8,
+            "max": 99.5,
+            "score": 0,
+            "label": "Normal"
+          },
+          {
+            "min": 99.6,
+            "max": 100.3,
+            "score": 1,
+            "label": "Low-grade fever"
+          },
+          {
+            "min": 100.4,
+            "max": 101.9,
+            "score": 2,
+            "label": "Fever"
+          },
+          {
+            "min": 102.0,
+            "max": 999,
+            "score": 4,
+            "label": "High fever"
+          }
+        ],
+        "plausibility": {
+          "min": 90,
+          "max": 110
+        }
+      },
+      "SpO2": {
+        "type": "range",
+        "unit": "%",
+        "ranges": [
+          {
+            "min": 95,
+            "max": 100,
+            "score": 0,
+            "label": "Normal"
+          },
+          {
+            "min": 92,
+            "max": 94,
+            "score": 2,
+            "label": "Mild hypoxia"
+          },
+          {
+            "min": 88,
+            "max": 91,
+            "score": 4,
+            "label": "Moderate hypoxia"
+          },
+          {
+            "min": 0,
+            "max": 87,
+            "score": 6,
+            "label": "Severe hypoxia"
+          }
+        ],
+        "plausibility": {
+          "min": 50,
+          "max": 100
+        }
+      },
+      "Weight": {
+        "type": "no_direct_score",
+        "note": "Weight alone does not score, but used in interaction patterns (e.g., BMI, fluid overload)",
+        "plausibility": {
+          "min": 20,
+          "max": 300
+        }
+      }
+    }
+  },
+  "LABS": {
+    "max_score": 30,
+    "keywords": {
+      "Hemoglobin": {
+        "type": "range",
+        "unit": "g/dL",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 6.9,
+            "score": 6,
+            "label": "Critical anemia"
+          },
+          {
+            "min": 7.0,
+            "max": 9.9,
+            "score": 4,
+            "label": "Moderate anemia"
+          },
+          {
+            "min": 10.0,
+            "max": 11.9,
+            "score": 2,
+            "label": "Mild anemia"
+          },
+          {
+            "min": 12.0,
+            "max": 17.0,
+            "score": 0,
+            "label": "Normal"
+          },
+          {
+            "min": 17.1,
+            "max": 999,
+            "score": 2,
+            "label": "Polycythemia"
+          }
+        ],
+        "plausibility": {
+          "min": 2,
+          "max": 25
+        }
+      },
+      "Hematocrit": {
+        "type": "range",
+        "unit": "%",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 20.9,
+            "score": 5,
+            "label": "Critical low"
+          },
+          {
+            "min": 21.0,
+            "max": 29.9,
+            "score": 3,
+            "label": "Low"
+          },
+          {
+            "min": 30.0,
+            "max": 35.9,
+            "score": 1,
+            "label": "Mildly low"
+          },
+          {
+            "min": 36.0,
+            "max": 45.0,
+            "score": 0,
+            "label": "Normal"
+          },
+          {
+            "min": 45.1,
+            "max": 999,
+            "score": 1,
+            "label": "Elevated"
+          }
+        ],
+        "plausibility": {
+          "min": 5,
+          "max": 70
+        }
+      },
+      "WBC": {
+        "type": "range",
+        "unit": "K/uL",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 3.9,
+            "score": 2,
+            "label": "Leukopenia"
+          },
+          {
+            "min": 4.0,
+            "max": 11.0,
+            "score": 0,
+            "label": "Normal"
+          },
+          {
+            "min": 11.1,
+            "max": 15.0,
+            "score": 1,
+            "label": "Mild leukocytosis"
+          },
+          {
+            "min": 15.1,
+            "max": 20.0,
+            "score": 3,
+            "label": "Moderate leukocytosis"
+          },
+          {
+            "min": 20.1,
+            "max": 999,
+            "score": 5,
+            "label": "Severe leukocytosis"
+          }
+        ],
+        "plausibility": {
+          "min": 0.1,
+          "max": 200
+        }
+      },
+      "Platelet": {
+        "type": "range",
+        "unit": "K/uL",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 49,
+            "score": 5,
+            "label": "Severe thrombocytopenia"
+          },
+          {
+            "min": 50,
+            "max": 99,
+            "score": 3,
+            "label": "Moderate thrombocytopenia"
+          },
+          {
+            "min": 100,
+            "max": 149,
+            "score": 1,
+            "label": "Mild thrombocytopenia"
+          },
+          {
+            "min": 150,
+            "max": 400,
+            "score": 0,
+            "label": "Normal"
+          },
+          {
+            "min": 401,
+            "max": 999,
+            "score": 2,
+            "label": "Thrombocytosis"
+          }
+        ],
+        "plausibility": {
+          "min": 1,
+          "max": 2000
+        }
+      },
+      "Sodium": {
+        "type": "range",
+        "unit": "mEq/L",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 129,
+            "score": 5,
+            "label": "Severe hyponatremia"
+          },
+          {
+            "min": 130,
+            "max": 134,
+            "score": 3,
+            "label": "Mild hyponatremia"
+          },
+          {
+            "min": 135,
+            "max": 145,
+            "score": 0,
+            "label": "Normal"
+          },
+          {
+            "min": 146,
+            "max": 150,
+            "score": 2,
+            "label": "Mild hypernatremia"
+          },
+          {
+            "min": 151,
+            "max": 999,
+            "score": 4,
+            "label": "Severe hypernatremia"
+          }
+        ],
+        "plausibility": {
+          "min": 100,
+          "max": 180
+        }
+      },
+      "Potassium": {
+        "type": "range",
+        "unit": "mEq/L",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 2.9,
+            "score": 5,
+            "label": "Severe hypokalemia"
+          },
+          {
+            "min": 3.0,
+            "max": 3.4,
+            "score": 2,
+            "label": "Mild hypokalemia"
+          },
+          {
+            "min": 3.5,
+            "max": 5.0,
+            "score": 0,
+            "label": "Normal"
+          },
+          {
+            "min": 5.1,
+            "max": 5.5,
+            "score": 2,
+            "label": "Mild hyperkalemia"
+          },
+          {
+            "min": 5.6,
+            "max": 6.0,
+            "score": 4,
+            "label": "Moderate hyperkalemia"
+          },
+          {
+            "min": 6.1,
+            "max": 999,
+            "score": 6,
+            "label": "Severe hyperkalemia"
+          }
+        ],
+        "plausibility": {
+          "min": 1.5,
+          "max": 8.0
+        }
+      },
+      "Creatinine": {
+        "type": "range",
+        "unit": "mg/dL",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 1.2,
+            "score": 0,
+            "label": "Normal"
+          },
+          {
+            "min": 1.3,
+            "max": 1.9,
+            "score": 2,
+            "label": "Mildly elevated"
+          },
+          {
+            "min": 2.0,
+            "max": 3.0,
+            "score": 4,
+            "label": "Moderate renal impairment"
+          },
+          {
+            "min": 3.1,
+            "max": 999,
+            "score": 6,
+            "label": "Severe renal impairment"
+          }
+        ],
+        "plausibility": {
+          "min": 0.1,
+          "max": 20
+        }
+      },
+      "BUN": {
+        "type": "range",
+        "unit": "mg/dL",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 6,
+            "score": 1,
+            "label": "Low (malnutrition?)"
+          },
+          {
+            "min": 7,
+            "max": 20,
+            "score": 0,
+            "label": "Normal"
+          },
+          {
+            "min": 21,
+            "max": 40,
+            "score": 2,
+            "label": "Mildly elevated"
+          },
+          {
+            "min": 41,
+            "max": 60,
+            "score": 4,
+            "label": "Moderate azotemia"
+          },
+          {
+            "min": 61,
+            "max": 999,
+            "score": 6,
+            "label": "Severe azotemia"
+          }
+        ],
+        "plausibility": {
+          "min": 1,
+          "max": 200
+        }
+      },
+      "Glucose": {
+        "type": "range",
+        "unit": "mg/dL",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 69,
+            "score": 4,
+            "label": "Hypoglycemia"
+          },
+          {
+            "min": 70,
+            "max": 140,
+            "score": 0,
+            "label": "Normal"
+          },
+          {
+            "min": 141,
+            "max": 200,
+            "score": 1,
+            "label": "Mild hyperglycemia"
+          },
+          {
+            "min": 201,
+            "max": 300,
+            "score": 2,
+            "label": "Moderate hyperglycemia"
+          },
+          {
+            "min": 301,
+            "max": 999,
+            "score": 4,
+            "label": "Severe hyperglycemia"
+          }
+        ],
+        "plausibility": {
+          "min": 20,
+          "max": 1000
+        }
+      },
+      "Bicarbonate": {
+        "type": "range",
+        "unit": "mEq/L",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 17,
+            "score": 4,
+            "label": "Severe acidosis"
+          },
+          {
+            "min": 18,
+            "max": 21,
+            "score": 2,
+            "label": "Mild acidosis"
+          },
+          {
+            "min": 22,
+            "max": 28,
+            "score": 0,
+            "label": "Normal"
+          },
+          {
+            "min": 29,
+            "max": 32,
+            "score": 1,
+            "label": "Mild alkalosis"
+          },
+          {
+            "min": 33,
+            "max": 999,
+            "score": 3,
+            "label": "Severe alkalosis"
+          }
+        ],
+        "plausibility": {
+          "min": 5,
+          "max": 50
+        }
+      }
+    }
+  },
+  "PROBLEMS": {
+    "max_score": 40,
+    "scoring_method": "snomed_group_weighted",
+    "config": {
+      "group_mapping_file": "snomed_problem_groups.json",
+      "multimorbidity_bonus": {
+        "description": "For each active problem group beyond the 3rd, add +1 (capped at +5)",
+        "threshold": 3,
+        "per_extra_group": 1,
+        "cap": 5
+      },
+      "value_filter": {
+        "include_values": [
+          "chronic",
+          "acute",
+          "exist"
+        ],
+        "exclude_values": [
+          "not exist"
+        ]
+      },
+      "score_cap": 40
+    }
+  },
+  "SYMPTOMS": {
+    "max_score": 15,
+    "scoring_method": "urgency_group_weighted",
+    "config": {
+      "group_mapping_file": "symptom_urgency_groups.json",
+      "severity_multiplier": {
+        "severe": 1.5,
+        "yes": 1.0,
+        "no": 0.0
+      },
+      "active_symptom_count_bonus": {
+        "description": "Bonus for having many active symptoms at once",
+        "threshold": 3,
+        "bonus": 2
+      },
+      "score_cap": 15
+    }
+  },
+  "MEDICATIONS": {
+    "max_score": 15,
+    "keywords": {
+      "Medication Count": {
+        "type": "range",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 4,
+            "score": 0,
+            "label": "Low"
+          },
+          {
+            "min": 5,
+            "max": 9,
+            "score": 1,
+            "label": "Moderate"
+          },
+          {
+            "min": 10,
+            "max": 14,
+            "score": 2,
+            "label": "High"
+          },
+          {
+            "min": 15,
+            "max": 999,
+            "score": 4,
+            "label": "Very high (polypharmacy)"
+          }
+        ]
+      },
+      "New Medications Count": {
+        "type": "range",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 1,
+            "score": 0,
+            "label": "Minimal"
+          },
+          {
+            "min": 2,
+            "max": 2,
+            "score": 1,
+            "label": "Moderate"
+          },
+          {
+            "min": 3,
+            "max": 4,
+            "score": 2,
+            "label": "High (adherence risk)"
+          },
+          {
+            "min": 5,
+            "max": 999,
+            "score": 4,
+            "label": "Very high (adherence crisis)"
+          }
+        ]
+      },
+      "Polypharmacy": {
+        "type": "categorical",
+        "values": {
+          "yes": 3,
+          "no": 0
+        },
+        "evidence": "Polypharmacy (>=5 meds) associated with 1.5-2x readmission OR"
+      },
+      "Anticoagulation": {
+        "type": "categorical",
+        "values": {
+          "yes": 2,
+          "no": 0
+        },
+        "evidence": "Anticoagulation = bleeding risk + INR management complexity"
+      },
+      "Insulin Therapy": {
+        "type": "categorical",
+        "values": {
+          "yes": 2,
+          "no": 0
+        },
+        "evidence": "Insulin management at home = hypoglycemia risk"
+      },
+      "Opioid Therapy": {
+        "type": "categorical",
+        "values": {
+          "yes": 3,
+          "no": 0
+        },
+        "evidence": "Opioid use associated with falls, constipation, respiratory depression"
+      },
+      "Diuretic Therapy": {
+        "type": "categorical",
+        "values": {
+          "yes": 1,
+          "no": 0
+        },
+        "evidence": "Diuretics = electrolyte monitoring, volume management"
+      }
+    }
+  },
+  "PROCEDURES": {
+    "max_score": 15,
+    "keywords": {
+      "Any Procedure": {
+        "type": "categorical",
+        "values": {
+          "yes": 2,
+          "no": 0
+        },
+        "note": "Only scored if no specific procedure flags are set"
+      },
+      "Surgery": {
+        "type": "categorical",
+        "values": {
+          "yes": 4,
+          "no": 0
+        },
+        "evidence": "Surgical patients have 15-20% higher readmission rate"
+      },
+      "Dialysis": {
+        "type": "categorical",
+        "values": {
+          "started": 5,
+          "done": 5,
+          "decided": 3,
+          "cancelled": 1,
+          "no": 0
+        },
+        "evidence": "New dialysis initiation associated with very high readmission (OR ~2.5)"
+      },
+      "Mechanical Ventilation": {
+        "type": "mixed",
+        "categorical_values": {
+          "no": 0
+        },
+        "numeric_rule": "Any numeric value > 0 scores 5 points (prolonged ventilation = ICU-level complexity)",
+        "score_if_any_positive": 5,
+        "evidence": "Mechanical ventilation = post-ICU syndrome, deconditioning, respiratory fragility"
+      }
+    }
+  },
+  "UTILIZATION": {
+    "max_score": 20,
+    "keywords": {
+      "Prior Admissions 12mo": {
+        "type": "range",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 0,
+            "score": 0,
+            "label": "No prior"
+          },
+          {
+            "min": 1,
+            "max": 1,
+            "score": 3,
+            "label": "One prior"
+          },
+          {
+            "min": 2,
+            "max": 3,
+            "score": 6,
+            "label": "Frequent"
+          },
+          {
+            "min": 4,
+            "max": 999,
+            "score": 10,
+            "label": "Super-utilizer"
+          }
+        ],
+        "evidence": "Prior admissions is the single strongest predictor (OR 2.0-3.5)"
+      },
+      "ED Visits 6mo": {
+        "type": "range",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 0,
+            "score": 0,
+            "label": "None"
+          },
+          {
+            "min": 1,
+            "max": 1,
+            "score": 2,
+            "label": "One visit"
+          },
+          {
+            "min": 2,
+            "max": 3,
+            "score": 4,
+            "label": "Multiple"
+          },
+          {
+            "min": 4,
+            "max": 999,
+            "score": 6,
+            "label": "Frequent ED user"
+          }
+        ]
+      },
+      "Days Since Last Admission": {
+        "type": "range",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 30,
+            "score": 4,
+            "label": "Very recent (<30d)"
+          },
+          {
+            "min": 31,
+            "max": 90,
+            "score": 2,
+            "label": "Recent (31-90d)"
+          },
+          {
+            "min": 91,
+            "max": 365,
+            "score": 1,
+            "label": "Within year"
+          },
+          {
+            "min": 366,
+            "max": 999999,
+            "score": 0,
+            "label": "Not recent"
+          }
+        ]
+      },
+      "Current Length of Stay": {
+        "type": "range",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 2,
+            "score": 2,
+            "label": "Very short (possible premature discharge)"
+          },
+          {
+            "min": 3,
+            "max": 6,
+            "score": 0,
+            "label": "Typical"
+          },
+          {
+            "min": 7,
+            "max": 13,
+            "score": 2,
+            "label": "Extended"
+          },
+          {
+            "min": 14,
+            "max": 999,
+            "score": 5,
+            "label": "Prolonged (complex case)"
+          }
+        ],
+        "evidence": "Both very short and very long LOS associated with higher readmission"
+      }
+    }
+  },
+  "DISPOSITION": {
+    "max_score": 15,
+    "keywords": {
+      "Discharge Disposition": {
+        "type": "categorical",
+        "values": {
+          "Home": 0,
+          "Home with Services": 2,
+          "Rehab": 4,
+          "SNF": 5,
+          "LTAC": 6,
+          "Hospice": 7,
+          "AMA": 8
+        },
+        "evidence": "AMA discharge has highest 30d readmission (OR ~3.0); SNF/LTAC also elevated"
+      },
+      "Mental Status": {
+        "type": "categorical",
+        "values": {
+          "alert": 0,
+          "oriented": 0,
+          "confused": 4,
+          "lethargic": 6
+        },
+        "evidence": "Altered mental status at discharge = post-delirium syndrome, medication errors, fall risk"
+      }
+    }
+  },
+  "INTERACTIONS": {
+    "description": "Cross-cluster clinical pattern detection. Bonus points when synergistic patterns are present.",
+    "patterns": [
+      {
+        "id": "sepsis_pattern",
+        "name": "Sepsis / SIRS Pattern",
+        "bonus": 10,
+        "conditions": {
+          "require_all": [
+            {
+              "cluster": "VITALS",
+              "keyword": "Heart Rate",
+              "operator": ">",
+              "value": 100
+            }
+          ],
+          "require_any_of": [
+            {
+              "cluster": "VITALS",
+              "keyword": "Systolic BP",
+              "operator": "<",
+              "value": 100
+            },
+            {
+              "cluster": "VITALS",
+              "keyword": "Respiratory Rate",
+              "operator": ">",
+              "value": 22
+            }
+          ],
+          "require_any_of_2": [
+            {
+              "cluster": "LABS",
+              "keyword": "WBC",
+              "operator": ">",
+              "value": 12
+            },
+            {
+              "cluster": "LABS",
+              "keyword": "WBC",
+              "operator": "<",
+              "value": 4
+            },
+            {
+              "cluster": "VITALS",
+              "keyword": "Temperature",
+              "operator": ">",
+              "value": 100.4
+            }
+          ]
+        },
+        "evidence": "qSOFA + SIRS criteria. Sepsis at discharge = very high readmission risk."
+      },
+      {
+        "id": "aki_pattern",
+        "name": "Acute Kidney Injury Pattern",
+        "bonus": 8,
+        "conditions": {
+          "require_all": [
+            {
+              "cluster": "LABS",
+              "keyword": "Creatinine",
+              "operator": ">",
+              "value": 1.5
+            },
+            {
+              "cluster": "LABS",
+              "keyword": "BUN",
+              "operator": ">",
+              "value": 30
+            }
+          ],
+          "require_any_of": [
+            {
+              "cluster": "LABS",
+              "keyword": "Potassium",
+              "operator": ">",
+              "value": 5.0
+            },
+            {
+              "cluster": "LABS",
+              "keyword": "Sodium",
+              "operator": "<",
+              "value": 135
+            },
+            {
+              "cluster": "LABS",
+              "keyword": "Bicarbonate",
+              "operator": "<",
+              "value": 22
+            }
+          ]
+        },
+        "evidence": "AKI with electrolyte derangement = unstable renal function, readmission OR ~2.0"
+      },
+      {
+        "id": "decompensated_hf",
+        "name": "Decompensated Heart Failure Pattern",
+        "bonus": 8,
+        "conditions": {
+          "require_problem_group": "heart_failure",
+          "require_any_of": [
+            {
+              "cluster": "SYMPTOMS",
+              "keyword_group": "edema_fluid"
+            },
+            {
+              "cluster": "SYMPTOMS",
+              "keyword_group": "respiratory_distress"
+            },
+            {
+              "cluster": "LABS",
+              "keyword": "BUN",
+              "operator": ">",
+              "value": 40
+            }
+          ]
+        },
+        "evidence": "CHF + fluid overload/dyspnea = decompensation, 25-30% 30d readmission"
+      },
+      {
+        "id": "frailty_syndrome",
+        "name": "Frailty Syndrome",
+        "bonus": 6,
+        "conditions": {
+          "require_all": [
+            {
+              "cluster": "DEMOGRAPHICS",
+              "keyword": "Age",
+              "operator": ">",
+              "value": 75
+            }
+          ],
+          "require_count_ge": {
+            "count": 2,
+            "from": [
+              {
+                "type": "problem_groups_active_ge",
+                "value": 3
+              },
+              {
+                "cluster": "LABS",
+                "keyword": "Hemoglobin",
+                "operator": "<",
+                "value": 10
+              },
+              {
+                "cluster": "DISPOSITION",
+                "keyword": "Mental Status",
+                "value_in": [
+                  "confused",
+                  "lethargic"
+                ]
+              },
+              {
+                "cluster": "DISPOSITION",
+                "keyword": "Discharge Disposition",
+                "value_in": [
+                  "SNF",
+                  "LTAC",
+                  "Rehab"
+                ]
+              }
+            ]
+          }
+        },
+        "evidence": "Frailty = age + multimorbidity + functional decline → readmission OR ~1.8"
+      },
+      {
+        "id": "unstable_discharge",
+        "name": "Unstable Discharge",
+        "bonus": 5,
+        "conditions": {
+          "require_any_of": [
+            {
+              "cluster": "DISPOSITION",
+              "keyword": "Discharge Disposition",
+              "value_in": [
+                "AMA"
+              ]
+            },
+            {
+              "compound_and": [
+                {
+                  "cluster": "DISPOSITION",
+                  "keyword": "Mental Status",
+                  "value_in": [
+                    "confused",
+                    "lethargic"
+                  ]
+                },
+                {
+                  "cluster": "DISPOSITION",
+                  "keyword": "Discharge Disposition",
+                  "value_in": [
+                    "Home"
+                  ]
+                }
+              ]
+            }
+          ]
+        },
+        "evidence": "AMA or confused-to-Home = highest readmission subgroup"
+      },
+      {
+        "id": "respiratory_failure",
+        "name": "Respiratory Failure Pattern",
+        "bonus": 6,
+        "conditions": {
+          "require_all": [
+            {
+              "cluster": "VITALS",
+              "keyword": "SpO2",
+              "operator": "<",
+              "value": 92
+            }
+          ],
+          "require_any_of": [
+            {
+              "cluster": "VITALS",
+              "keyword": "Respiratory Rate",
+              "operator": ">",
+              "value": 24
+            },
+            {
+              "cluster": "SYMPTOMS",
+              "keyword_group": "respiratory_distress"
+            }
+          ]
+        },
+        "evidence": "Hypoxia + tachypnea/dyspnea = respiratory failure, readmission OR ~2.0"
+      },
+      {
+        "id": "metabolic_crisis",
+        "name": "Metabolic Crisis (DKA/HHS)",
+        "bonus": 6,
+        "conditions": {
+          "require_all": [
+            {
+              "cluster": "LABS",
+              "keyword": "Glucose",
+              "operator": ">",
+              "value": 300
+            }
+          ],
+          "require_any_of": [
+            {
+              "cluster": "LABS",
+              "keyword": "Bicarbonate",
+              "operator": "<",
+              "value": 18
+            },
+            {
+              "cluster": "LABS",
+              "keyword": "Potassium",
+              "operator": ">",
+              "value": 5.5
+            }
+          ]
+        },
+        "evidence": "DKA/HHS at discharge = very high readmission, especially without insulin education"
+      },
+      {
+        "id": "bleeding_risk",
+        "name": "Active Bleeding Risk",
+        "bonus": 6,
+        "conditions": {
+          "require_all": [
+            {
+              "cluster": "LABS",
+              "keyword": "Hemoglobin",
+              "operator": "<",
+              "value": 8
+            }
+          ],
+          "require_any_of": [
+            {
+              "cluster": "LABS",
+              "keyword": "Platelet",
+              "operator": "<",
+              "value": 100
+            },
+            {
+              "cluster": "MEDICATIONS",
+              "keyword": "Anticoagulation",
+              "value_in": [
+                "yes"
+              ]
+            }
+          ]
+        },
+        "evidence": "Severe anemia + thrombocytopenia/anticoagulation = high bleeding readmission risk"
+      }
+    ]
+  },
+  "DAYS_PREDICTION": {
+    "description": "Maps composite risk score to estimated days-to-readmission.",
+    "models": {
+      "regression": {
+        "formula": "max(1, D_max * exp(-gamma * score))",
+        "parameters": {
+          "D_max": 20,
+          "gamma": 0.022
+        },
+        "expected_outputs": {
+          "score_10": 16.1,
+          "score_20": 12.9,
+          "score_40": 8.3,
+          "score_60": 5.3,
+          "score_80": 3.4,
+          "score_100": 2.2
+        }
+      },
+      "buckets": {
+        "urgent": {
+          "label": "0-7 days",
+          "condition": "estimated_days <= 7"
+        },
+        "near_term": {
+          "label": "8-14 days",
+          "condition": "7 < estimated_days <= 14"
+        },
+        "late": {
+          "label": "15-30 days",
+          "condition": "estimated_days > 14"
+        }
+      },
+      "survival": {
+        "description": "Exponential hazard model for P(readmit by day t)",
+        "formula": "P(t) = P_30d * (1 - exp(-(t/30) * k)) / (1 - exp(-k))",
+        "parameters": {
+          "k_base": 2.0,
+          "k_adjustment": "k = k_base + 0.02 * (score - 30)"
+        },
+        "notes": [
+          "k > 1 means hazard is front-loaded (higher risk patients readmit earlier)",
+          "k < 1 means hazard is back-loaded",
+          "P_30d is from the logistic calibration model"
+        ],
+        "output_horizons": [
+          7,
+          14,
+          21,
+          30
+        ]
+      }
+    }
+  }
+}

Analysis_Readmission/config/scoring_rules_v3_20260207.json ADDED Viewed

	@@ -0,0 +1,1462 @@

+{
+  "_meta": {
+    "version": "1.0",
+    "description": "Complete scoring rules for all 9 ONTOLOGY clusters. Each cluster has range-based or categorical scoring with evidence-based weights.",
+    "max_theoretical_scores": {
+      "DEMOGRAPHICS": 10,
+      "VITALS": 25,
+      "LABS": 30,
+      "PROBLEMS": 40,
+      "SYMPTOMS": 15,
+      "MEDICATIONS": 15,
+      "PROCEDURES": 15,
+      "UTILIZATION": 20,
+      "DISPOSITION": 15,
+      "INTERACTIONS": 30,
+      "TOTAL_THEORETICAL_MAX": 215
+    },
+    "calibration": {
+      "description": "Logistic transform: P = 1 / (1 + exp(-(alpha + beta * score)))",
+      "alpha": -2.3475,
+      "beta": 0.017,
+      "baseline_readmission_rate": 0.2069,
+      "notes": "Calibrated on 203 MIMIC-IV admissions (8 clusters, FHIR labels). AUC=0.5555."
+    },
+    "risk_categories": [
+      {
+        "name": "Low",
+        "score_min": 0,
+        "score_max": 19,
+        "probability_range": "5-12%",
+        "color": "green"
+      },
+      {
+        "name": "Medium",
+        "score_min": 20,
+        "score_max": 39,
+        "probability_range": "13-28%",
+        "color": "yellow"
+      },
+      {
+        "name": "High",
+        "score_min": 40,
+        "score_max": 59,
+        "probability_range": "29-53%",
+        "color": "orange"
+      },
+      {
+        "name": "Critical",
+        "score_min": 60,
+        "score_max": 999,
+        "probability_range": "54%+",
+        "color": "red"
+      }
+    ]
+  },
+  "DEMOGRAPHICS": {
+    "max_score": 10,
+    "keywords": {
+      "Age": {
+        "type": "range",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 39,
+            "score": 0,
+            "label": "Young adult"
+          },
+          {
+            "min": 40,
+            "max": 54,
+            "score": 1,
+            "label": "Middle age"
+          },
+          {
+            "min": 55,
+            "max": 64,
+            "score": 2,
+            "label": "Pre-elderly"
+          },
+          {
+            "min": 65,
+            "max": 74,
+            "score": 4,
+            "label": "Young elderly"
+          },
+          {
+            "min": 75,
+            "max": 84,
+            "score": 6,
+            "label": "Old elderly"
+          },
+          {
+            "min": 85,
+            "max": 999,
+            "score": 8,
+            "label": "Very old"
+          }
+        ],
+        "missing_score": 2,
+        "evidence": "Age >65 is consistently associated with higher readmission (OR 1.3-1.8)"
+      },
+      "Sex": {
+        "type": "categorical",
+        "values": {
+          "male": 1,
+          "female": 0
+        },
+        "missing_score": 0,
+        "evidence": "Male sex associated with slightly higher 30-day readmission (OR ~1.1)"
+      }
+    }
+  },
+  "VITALS": {
+    "max_score": 25,
+    "keywords": {
+      "Heart Rate": {
+        "type": "range",
+        "unit": "bpm",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 49,
+            "score": 3,
+            "label": "Severe bradycardia"
+          },
+          {
+            "min": 50,
+            "max": 59,
+            "score": 1,
+            "label": "Mild bradycardia"
+          },
+          {
+            "min": 60,
+            "max": 100,
+            "score": 0,
+            "label": "Normal"
+          },
+          {
+            "min": 101,
+            "max": 110,
+            "score": 1,
+            "label": "Mild tachycardia"
+          },
+          {
+            "min": 111,
+            "max": 130,
+            "score": 3,
+            "label": "Tachycardia"
+          },
+          {
+            "min": 131,
+            "max": 999,
+            "score": 5,
+            "label": "Severe tachycardia"
+          }
+        ],
+        "plausibility": {
+          "min": 30,
+          "max": 220
+        }
+      },
+      "Systolic BP": {
+        "type": "range",
+        "unit": "mmHg",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 89,
+            "score": 5,
+            "label": "Hypotension"
+          },
+          {
+            "min": 90,
+            "max": 99,
+            "score": 3,
+            "label": "Borderline low"
+          },
+          {
+            "min": 100,
+            "max": 139,
+            "score": 0,
+            "label": "Normal"
+          },
+          {
+            "min": 140,
+            "max": 159,
+            "score": 1,
+            "label": "Stage 1 HTN"
+          },
+          {
+            "min": 160,
+            "max": 179,
+            "score": 2,
+            "label": "Stage 2 HTN"
+          },
+          {
+            "min": 180,
+            "max": 999,
+            "score": 4,
+            "label": "Hypertensive urgency"
+          }
+        ],
+        "plausibility": {
+          "min": 50,
+          "max": 260
+        }
+      },
+      "Diastolic BP": {
+        "type": "range",
+        "unit": "mmHg",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 59,
+            "score": 2,
+            "label": "Low diastolic"
+          },
+          {
+            "min": 60,
+            "max": 89,
+            "score": 0,
+            "label": "Normal"
+          },
+          {
+            "min": 90,
+            "max": 99,
+            "score": 1,
+            "label": "Elevated"
+          },
+          {
+            "min": 100,
+            "max": 999,
+            "score": 3,
+            "label": "High diastolic"
+          }
+        ],
+        "plausibility": {
+          "min": 20,
+          "max": 160
+        }
+      },
+      "Respiratory Rate": {
+        "type": "range",
+        "unit": "breaths/min",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 9,
+            "score": 4,
+            "label": "Bradypnea"
+          },
+          {
+            "min": 10,
+            "max": 11,
+            "score": 2,
+            "label": "Low normal"
+          },
+          {
+            "min": 12,
+            "max": 20,
+            "score": 0,
+            "label": "Normal"
+          },
+          {
+            "min": 21,
+            "max": 24,
+            "score": 2,
+            "label": "Mild tachypnea"
+          },
+          {
+            "min": 25,
+            "max": 30,
+            "score": 4,
+            "label": "Tachypnea"
+          },
+          {
+            "min": 31,
+            "max": 999,
+            "score": 6,
+            "label": "Severe tachypnea"
+          }
+        ],
+        "plausibility": {
+          "min": 5,
+          "max": 60
+        }
+      },
+      "Temperature": {
+        "type": "range",
+        "unit": "°F",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 96.7,
+            "score": 3,
+            "label": "Hypothermia"
+          },
+          {
+            "min": 96.8,
+            "max": 99.5,
+            "score": 0,
+            "label": "Normal"
+          },
+          {
+            "min": 99.6,
+            "max": 100.3,
+            "score": 1,
+            "label": "Low-grade fever"
+          },
+          {
+            "min": 100.4,
+            "max": 101.9,
+            "score": 2,
+            "label": "Fever"
+          },
+          {
+            "min": 102.0,
+            "max": 999,
+            "score": 4,
+            "label": "High fever"
+          }
+        ],
+        "plausibility": {
+          "min": 90,
+          "max": 110
+        }
+      },
+      "SpO2": {
+        "type": "range",
+        "unit": "%",
+        "ranges": [
+          {
+            "min": 95,
+            "max": 100,
+            "score": 0,
+            "label": "Normal"
+          },
+          {
+            "min": 92,
+            "max": 94,
+            "score": 2,
+            "label": "Mild hypoxia"
+          },
+          {
+            "min": 88,
+            "max": 91,
+            "score": 4,
+            "label": "Moderate hypoxia"
+          },
+          {
+            "min": 0,
+            "max": 87,
+            "score": 6,
+            "label": "Severe hypoxia"
+          }
+        ],
+        "plausibility": {
+          "min": 50,
+          "max": 100
+        }
+      },
+      "Weight": {
+        "type": "no_direct_score",
+        "note": "Weight alone does not score, but used in interaction patterns (e.g., BMI, fluid overload)",
+        "plausibility": {
+          "min": 20,
+          "max": 300
+        }
+      }
+    }
+  },
+  "LABS": {
+    "max_score": 30,
+    "keywords": {
+      "Hemoglobin": {
+        "type": "range",
+        "unit": "g/dL",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 6.9,
+            "score": 6,
+            "label": "Critical anemia"
+          },
+          {
+            "min": 7.0,
+            "max": 9.9,
+            "score": 4,
+            "label": "Moderate anemia"
+          },
+          {
+            "min": 10.0,
+            "max": 11.9,
+            "score": 2,
+            "label": "Mild anemia"
+          },
+          {
+            "min": 12.0,
+            "max": 17.0,
+            "score": 0,
+            "label": "Normal"
+          },
+          {
+            "min": 17.1,
+            "max": 999,
+            "score": 2,
+            "label": "Polycythemia"
+          }
+        ],
+        "plausibility": {
+          "min": 2,
+          "max": 25
+        }
+      },
+      "Hematocrit": {
+        "type": "range",
+        "unit": "%",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 20.9,
+            "score": 5,
+            "label": "Critical low"
+          },
+          {
+            "min": 21.0,
+            "max": 29.9,
+            "score": 3,
+            "label": "Low"
+          },
+          {
+            "min": 30.0,
+            "max": 35.9,
+            "score": 1,
+            "label": "Mildly low"
+          },
+          {
+            "min": 36.0,
+            "max": 45.0,
+            "score": 0,
+            "label": "Normal"
+          },
+          {
+            "min": 45.1,
+            "max": 999,
+            "score": 1,
+            "label": "Elevated"
+          }
+        ],
+        "plausibility": {
+          "min": 5,
+          "max": 70
+        }
+      },
+      "WBC": {
+        "type": "range",
+        "unit": "K/uL",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 3.9,
+            "score": 2,
+            "label": "Leukopenia"
+          },
+          {
+            "min": 4.0,
+            "max": 11.0,
+            "score": 0,
+            "label": "Normal"
+          },
+          {
+            "min": 11.1,
+            "max": 15.0,
+            "score": 1,
+            "label": "Mild leukocytosis"
+          },
+          {
+            "min": 15.1,
+            "max": 20.0,
+            "score": 3,
+            "label": "Moderate leukocytosis"
+          },
+          {
+            "min": 20.1,
+            "max": 999,
+            "score": 5,
+            "label": "Severe leukocytosis"
+          }
+        ],
+        "plausibility": {
+          "min": 0.1,
+          "max": 200
+        }
+      },
+      "Platelet": {
+        "type": "range",
+        "unit": "K/uL",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 49,
+            "score": 5,
+            "label": "Severe thrombocytopenia"
+          },
+          {
+            "min": 50,
+            "max": 99,
+            "score": 3,
+            "label": "Moderate thrombocytopenia"
+          },
+          {
+            "min": 100,
+            "max": 149,
+            "score": 1,
+            "label": "Mild thrombocytopenia"
+          },
+          {
+            "min": 150,
+            "max": 400,
+            "score": 0,
+            "label": "Normal"
+          },
+          {
+            "min": 401,
+            "max": 999,
+            "score": 2,
+            "label": "Thrombocytosis"
+          }
+        ],
+        "plausibility": {
+          "min": 1,
+          "max": 2000
+        }
+      },
+      "Sodium": {
+        "type": "range",
+        "unit": "mEq/L",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 129,
+            "score": 5,
+            "label": "Severe hyponatremia"
+          },
+          {
+            "min": 130,
+            "max": 134,
+            "score": 3,
+            "label": "Mild hyponatremia"
+          },
+          {
+            "min": 135,
+            "max": 145,
+            "score": 0,
+            "label": "Normal"
+          },
+          {
+            "min": 146,
+            "max": 150,
+            "score": 2,
+            "label": "Mild hypernatremia"
+          },
+          {
+            "min": 151,
+            "max": 999,
+            "score": 4,
+            "label": "Severe hypernatremia"
+          }
+        ],
+        "plausibility": {
+          "min": 100,
+          "max": 180
+        }
+      },
+      "Potassium": {
+        "type": "range",
+        "unit": "mEq/L",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 2.9,
+            "score": 5,
+            "label": "Severe hypokalemia"
+          },
+          {
+            "min": 3.0,
+            "max": 3.4,
+            "score": 2,
+            "label": "Mild hypokalemia"
+          },
+          {
+            "min": 3.5,
+            "max": 5.0,
+            "score": 0,
+            "label": "Normal"
+          },
+          {
+            "min": 5.1,
+            "max": 5.5,
+            "score": 2,
+            "label": "Mild hyperkalemia"
+          },
+          {
+            "min": 5.6,
+            "max": 6.0,
+            "score": 4,
+            "label": "Moderate hyperkalemia"
+          },
+          {
+            "min": 6.1,
+            "max": 999,
+            "score": 6,
+            "label": "Severe hyperkalemia"
+          }
+        ],
+        "plausibility": {
+          "min": 1.5,
+          "max": 8.0
+        }
+      },
+      "Creatinine": {
+        "type": "range",
+        "unit": "mg/dL",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 1.2,
+            "score": 0,
+            "label": "Normal"
+          },
+          {
+            "min": 1.3,
+            "max": 1.9,
+            "score": 2,
+            "label": "Mildly elevated"
+          },
+          {
+            "min": 2.0,
+            "max": 3.0,
+            "score": 4,
+            "label": "Moderate renal impairment"
+          },
+          {
+            "min": 3.1,
+            "max": 999,
+            "score": 6,
+            "label": "Severe renal impairment"
+          }
+        ],
+        "plausibility": {
+          "min": 0.1,
+          "max": 20
+        }
+      },
+      "BUN": {
+        "type": "range",
+        "unit": "mg/dL",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 6,
+            "score": 1,
+            "label": "Low (malnutrition?)"
+          },
+          {
+            "min": 7,
+            "max": 20,
+            "score": 0,
+            "label": "Normal"
+          },
+          {
+            "min": 21,
+            "max": 40,
+            "score": 2,
+            "label": "Mildly elevated"
+          },
+          {
+            "min": 41,
+            "max": 60,
+            "score": 4,
+            "label": "Moderate azotemia"
+          },
+          {
+            "min": 61,
+            "max": 999,
+            "score": 6,
+            "label": "Severe azotemia"
+          }
+        ],
+        "plausibility": {
+          "min": 1,
+          "max": 200
+        }
+      },
+      "Glucose": {
+        "type": "range",
+        "unit": "mg/dL",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 69,
+            "score": 4,
+            "label": "Hypoglycemia"
+          },
+          {
+            "min": 70,
+            "max": 140,
+            "score": 0,
+            "label": "Normal"
+          },
+          {
+            "min": 141,
+            "max": 200,
+            "score": 1,
+            "label": "Mild hyperglycemia"
+          },
+          {
+            "min": 201,
+            "max": 300,
+            "score": 2,
+            "label": "Moderate hyperglycemia"
+          },
+          {
+            "min": 301,
+            "max": 999,
+            "score": 4,
+            "label": "Severe hyperglycemia"
+          }
+        ],
+        "plausibility": {
+          "min": 20,
+          "max": 1000
+        }
+      },
+      "Bicarbonate": {
+        "type": "range",
+        "unit": "mEq/L",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 17,
+            "score": 4,
+            "label": "Severe acidosis"
+          },
+          {
+            "min": 18,
+            "max": 21,
+            "score": 2,
+            "label": "Mild acidosis"
+          },
+          {
+            "min": 22,
+            "max": 28,
+            "score": 0,
+            "label": "Normal"
+          },
+          {
+            "min": 29,
+            "max": 32,
+            "score": 1,
+            "label": "Mild alkalosis"
+          },
+          {
+            "min": 33,
+            "max": 999,
+            "score": 3,
+            "label": "Severe alkalosis"
+          }
+        ],
+        "plausibility": {
+          "min": 5,
+          "max": 50
+        }
+      }
+    }
+  },
+  "PROBLEMS": {
+    "max_score": 40,
+    "scoring_method": "snomed_group_weighted",
+    "config": {
+      "group_mapping_file": "snomed_problem_groups.json",
+      "multimorbidity_bonus": {
+        "description": "For each active problem group beyond the 3rd, add +1 (capped at +5)",
+        "threshold": 3,
+        "per_extra_group": 1,
+        "cap": 5
+      },
+      "value_filter": {
+        "include_values": [
+          "chronic",
+          "acute",
+          "exist"
+        ],
+        "exclude_values": [
+          "not exist"
+        ]
+      },
+      "score_cap": 40
+    }
+  },
+  "SYMPTOMS": {
+    "max_score": 15,
+    "scoring_method": "urgency_group_weighted",
+    "config": {
+      "group_mapping_file": "symptom_urgency_groups.json",
+      "severity_multiplier": {
+        "severe": 1.5,
+        "yes": 1.0,
+        "no": 0.0
+      },
+      "active_symptom_count_bonus": {
+        "description": "Bonus for having many active symptoms at once",
+        "threshold": 3,
+        "bonus": 2
+      },
+      "score_cap": 15
+    }
+  },
+  "MEDICATIONS": {
+    "max_score": 15,
+    "keywords": {
+      "Medication Count": {
+        "type": "range",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 4,
+            "score": 0,
+            "label": "Low"
+          },
+          {
+            "min": 5,
+            "max": 9,
+            "score": 1,
+            "label": "Moderate"
+          },
+          {
+            "min": 10,
+            "max": 14,
+            "score": 2,
+            "label": "High"
+          },
+          {
+            "min": 15,
+            "max": 999,
+            "score": 4,
+            "label": "Very high (polypharmacy)"
+          }
+        ]
+      },
+      "New Medications Count": {
+        "type": "range",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 1,
+            "score": 0,
+            "label": "Minimal"
+          },
+          {
+            "min": 2,
+            "max": 2,
+            "score": 1,
+            "label": "Moderate"
+          },
+          {
+            "min": 3,
+            "max": 4,
+            "score": 2,
+            "label": "High (adherence risk)"
+          },
+          {
+            "min": 5,
+            "max": 999,
+            "score": 4,
+            "label": "Very high (adherence crisis)"
+          }
+        ]
+      },
+      "Polypharmacy": {
+        "type": "categorical",
+        "values": {
+          "yes": 3,
+          "no": 0
+        },
+        "evidence": "Polypharmacy (>=5 meds) associated with 1.5-2x readmission OR"
+      },
+      "Anticoagulation": {
+        "type": "categorical",
+        "values": {
+          "yes": 2,
+          "no": 0
+        },
+        "evidence": "Anticoagulation = bleeding risk + INR management complexity"
+      },
+      "Insulin Therapy": {
+        "type": "categorical",
+        "values": {
+          "yes": 2,
+          "no": 0
+        },
+        "evidence": "Insulin management at home = hypoglycemia risk"
+      },
+      "Opioid Therapy": {
+        "type": "categorical",
+        "values": {
+          "yes": 3,
+          "no": 0
+        },
+        "evidence": "Opioid use associated with falls, constipation, respiratory depression"
+      },
+      "Diuretic Therapy": {
+        "type": "categorical",
+        "values": {
+          "yes": 1,
+          "no": 0
+        },
+        "evidence": "Diuretics = electrolyte monitoring, volume management"
+      }
+    }
+  },
+  "PROCEDURES": {
+    "max_score": 15,
+    "keywords": {
+      "Any Procedure": {
+        "type": "categorical",
+        "values": {
+          "yes": 2,
+          "no": 0
+        },
+        "note": "Only scored if no specific procedure flags are set"
+      },
+      "Surgery": {
+        "type": "categorical",
+        "values": {
+          "yes": 4,
+          "no": 0
+        },
+        "evidence": "Surgical patients have 15-20% higher readmission rate"
+      },
+      "Dialysis": {
+        "type": "categorical",
+        "values": {
+          "started": 5,
+          "done": 5,
+          "decided": 3,
+          "cancelled": 1,
+          "no": 0
+        },
+        "evidence": "New dialysis initiation associated with very high readmission (OR ~2.5)"
+      },
+      "Mechanical Ventilation": {
+        "type": "mixed",
+        "categorical_values": {
+          "no": 0
+        },
+        "numeric_rule": "Any numeric value > 0 scores 5 points (prolonged ventilation = ICU-level complexity)",
+        "score_if_any_positive": 5,
+        "evidence": "Mechanical ventilation = post-ICU syndrome, deconditioning, respiratory fragility"
+      }
+    }
+  },
+  "UTILIZATION": {
+    "max_score": 20,
+    "keywords": {
+      "Prior Admissions 12mo": {
+        "type": "range",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 0,
+            "score": 0,
+            "label": "No prior"
+          },
+          {
+            "min": 1,
+            "max": 1,
+            "score": 3,
+            "label": "One prior"
+          },
+          {
+            "min": 2,
+            "max": 3,
+            "score": 6,
+            "label": "Frequent"
+          },
+          {
+            "min": 4,
+            "max": 999,
+            "score": 10,
+            "label": "Super-utilizer"
+          }
+        ],
+        "evidence": "Prior admissions is the single strongest predictor (OR 2.0-3.5)"
+      },
+      "ED Visits 6mo": {
+        "type": "range",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 0,
+            "score": 0,
+            "label": "None"
+          },
+          {
+            "min": 1,
+            "max": 1,
+            "score": 2,
+            "label": "One visit"
+          },
+          {
+            "min": 2,
+            "max": 3,
+            "score": 4,
+            "label": "Multiple"
+          },
+          {
+            "min": 4,
+            "max": 999,
+            "score": 6,
+            "label": "Frequent ED user"
+          }
+        ]
+      },
+      "Days Since Last Admission": {
+        "type": "range",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 30,
+            "score": 4,
+            "label": "Very recent (<30d)"
+          },
+          {
+            "min": 31,
+            "max": 90,
+            "score": 2,
+            "label": "Recent (31-90d)"
+          },
+          {
+            "min": 91,
+            "max": 365,
+            "score": 1,
+            "label": "Within year"
+          },
+          {
+            "min": 366,
+            "max": 999999,
+            "score": 0,
+            "label": "Not recent"
+          }
+        ]
+      },
+      "Current Length of Stay": {
+        "type": "range",
+        "ranges": [
+          {
+            "min": 0,
+            "max": 2,
+            "score": 2,
+            "label": "Very short (possible premature discharge)"
+          },
+          {
+            "min": 3,
+            "max": 6,
+            "score": 0,
+            "label": "Typical"
+          },
+          {
+            "min": 7,
+            "max": 13,
+            "score": 2,
+            "label": "Extended"
+          },
+          {
+            "min": 14,
+            "max": 999,
+            "score": 5,
+            "label": "Prolonged (complex case)"
+          }
+        ],
+        "evidence": "Both very short and very long LOS associated with higher readmission"
+      }
+    }
+  },
+  "DISPOSITION": {
+    "max_score": 15,
+    "keywords": {
+      "Discharge Disposition": {
+        "type": "categorical",
+        "values": {
+          "Home": 0,
+          "Home with Services": 2,
+          "Rehab": 4,
+          "SNF": 5,
+          "LTAC": 6,
+          "Hospice": 7,
+          "AMA": 8
+        },
+        "evidence": "AMA discharge has highest 30d readmission (OR ~3.0); SNF/LTAC also elevated"
+      },
+      "Mental Status": {
+        "type": "categorical",
+        "values": {
+          "alert": 0,
+          "oriented": 0,
+          "confused": 4,
+          "lethargic": 6
+        },
+        "evidence": "Altered mental status at discharge = post-delirium syndrome, medication errors, fall risk"
+      }
+    }
+  },
+  "INTERACTIONS": {
+    "description": "Cross-cluster clinical pattern detection. Bonus points when synergistic patterns are present.",
+    "patterns": [
+      {
+        "id": "sepsis_pattern",
+        "name": "Sepsis / SIRS Pattern",
+        "bonus": 10,
+        "conditions": {
+          "require_all": [
+            {
+              "cluster": "VITALS",
+              "keyword": "Heart Rate",
+              "operator": ">",
+              "value": 100
+            }
+          ],
+          "require_any_of": [
+            {
+              "cluster": "VITALS",
+              "keyword": "Systolic BP",
+              "operator": "<",
+              "value": 100
+            },
+            {
+              "cluster": "VITALS",
+              "keyword": "Respiratory Rate",
+              "operator": ">",
+              "value": 22
+            }
+          ],
+          "require_any_of_2": [
+            {
+              "cluster": "LABS",
+              "keyword": "WBC",
+              "operator": ">",
+              "value": 12
+            },
+            {
+              "cluster": "LABS",
+              "keyword": "WBC",
+              "operator": "<",
+              "value": 4
+            },
+            {
+              "cluster": "VITALS",
+              "keyword": "Temperature",
+              "operator": ">",
+              "value": 100.4
+            }
+          ]
+        },
+        "evidence": "qSOFA + SIRS criteria. Sepsis at discharge = very high readmission risk."
+      },
+      {
+        "id": "aki_pattern",
+        "name": "Acute Kidney Injury Pattern",
+        "bonus": 8,
+        "conditions": {
+          "require_all": [
+            {
+              "cluster": "LABS",
+              "keyword": "Creatinine",
+              "operator": ">",
+              "value": 1.5
+            },
+            {
+              "cluster": "LABS",
+              "keyword": "BUN",
+              "operator": ">",
+              "value": 30
+            }
+          ],
+          "require_any_of": [
+            {
+              "cluster": "LABS",
+              "keyword": "Potassium",
+              "operator": ">",
+              "value": 5.0
+            },
+            {
+              "cluster": "LABS",
+              "keyword": "Sodium",
+              "operator": "<",
+              "value": 135
+            },
+            {
+              "cluster": "LABS",
+              "keyword": "Bicarbonate",
+              "operator": "<",
+              "value": 22
+            }
+          ]
+        },
+        "evidence": "AKI with electrolyte derangement = unstable renal function, readmission OR ~2.0"
+      },
+      {
+        "id": "decompensated_hf",
+        "name": "Decompensated Heart Failure Pattern",
+        "bonus": 8,
+        "conditions": {
+          "require_problem_group": "heart_failure",
+          "require_any_of": [
+            {
+              "cluster": "SYMPTOMS",
+              "keyword_group": "edema_fluid"
+            },
+            {
+              "cluster": "SYMPTOMS",
+              "keyword_group": "respiratory_distress"
+            },
+            {
+              "cluster": "LABS",
+              "keyword": "BUN",
+              "operator": ">",
+              "value": 40
+            }
+          ]
+        },
+        "evidence": "CHF + fluid overload/dyspnea = decompensation, 25-30% 30d readmission"
+      },
+      {
+        "id": "frailty_syndrome",
+        "name": "Frailty Syndrome",
+        "bonus": 6,
+        "conditions": {
+          "require_all": [
+            {
+              "cluster": "DEMOGRAPHICS",
+              "keyword": "Age",
+              "operator": ">",
+              "value": 75
+            }
+          ],
+          "require_count_ge": {
+            "count": 2,
+            "from": [
+              {
+                "type": "problem_groups_active_ge",
+                "value": 3
+              },
+              {
+                "cluster": "LABS",
+                "keyword": "Hemoglobin",
+                "operator": "<",
+                "value": 10
+              },
+              {
+                "cluster": "DISPOSITION",
+                "keyword": "Mental Status",
+                "value_in": [
+                  "confused",
+                  "lethargic"
+                ]
+              },
+              {
+                "cluster": "DISPOSITION",
+                "keyword": "Discharge Disposition",
+                "value_in": [
+                  "SNF",
+                  "LTAC",
+                  "Rehab"
+                ]
+              }
+            ]
+          }
+        },
+        "evidence": "Frailty = age + multimorbidity + functional decline → readmission OR ~1.8"
+      },
+      {
+        "id": "unstable_discharge",
+        "name": "Unstable Discharge",
+        "bonus": 5,
+        "conditions": {
+          "require_any_of": [
+            {
+              "cluster": "DISPOSITION",
+              "keyword": "Discharge Disposition",
+              "value_in": [
+                "AMA"
+              ]
+            },
+            {
+              "compound_and": [
+                {
+                  "cluster": "DISPOSITION",
+                  "keyword": "Mental Status",
+                  "value_in": [
+                    "confused",
+                    "lethargic"
+                  ]
+                },
+                {
+                  "cluster": "DISPOSITION",
+                  "keyword": "Discharge Disposition",
+                  "value_in": [
+                    "Home"
+                  ]
+                }
+              ]
+            }
+          ]
+        },
+        "evidence": "AMA or confused-to-Home = highest readmission subgroup"
+      },
+      {
+        "id": "respiratory_failure",
+        "name": "Respiratory Failure Pattern",
+        "bonus": 6,
+        "conditions": {
+          "require_all": [
+            {
+              "cluster": "VITALS",
+              "keyword": "SpO2",
+              "operator": "<",
+              "value": 92
+            }
+          ],
+          "require_any_of": [
+            {
+              "cluster": "VITALS",
+              "keyword": "Respiratory Rate",
+              "operator": ">",
+              "value": 24
+            },
+            {
+              "cluster": "SYMPTOMS",
+              "keyword_group": "respiratory_distress"
+            }
+          ]
+        },
+        "evidence": "Hypoxia + tachypnea/dyspnea = respiratory failure, readmission OR ~2.0"
+      },
+      {
+        "id": "metabolic_crisis",
+        "name": "Metabolic Crisis (DKA/HHS)",
+        "bonus": 6,
+        "conditions": {
+          "require_all": [
+            {
+              "cluster": "LABS",
+              "keyword": "Glucose",
+              "operator": ">",
+              "value": 300
+            }
+          ],
+          "require_any_of": [
+            {
+              "cluster": "LABS",
+              "keyword": "Bicarbonate",
+              "operator": "<",
+              "value": 18
+            },
+            {
+              "cluster": "LABS",
+              "keyword": "Potassium",
+              "operator": ">",
+              "value": 5.5
+            }
+          ]
+        },
+        "evidence": "DKA/HHS at discharge = very high readmission, especially without insulin education"
+      },
+      {
+        "id": "bleeding_risk",
+        "name": "Active Bleeding Risk",
+        "bonus": 6,
+        "conditions": {
+          "require_all": [
+            {
+              "cluster": "LABS",
+              "keyword": "Hemoglobin",
+              "operator": "<",
+              "value": 8
+            }
+          ],
+          "require_any_of": [
+            {
+              "cluster": "LABS",
+              "keyword": "Platelet",
+              "operator": "<",
+              "value": 100
+            },
+            {
+              "cluster": "MEDICATIONS",
+              "keyword": "Anticoagulation",
+              "value_in": [
+                "yes"
+              ]
+            }
+          ]
+        },
+        "evidence": "Severe anemia + thrombocytopenia/anticoagulation = high bleeding readmission risk"
+      }
+    ]
+  },
+  "DAYS_PREDICTION": {
+    "description": "Maps composite risk score to estimated days-to-readmission.",
+    "models": {
+      "regression": {
+        "formula": "max(1, D_max * exp(-gamma * score))",
+        "parameters": {
+          "D_max": 20,
+          "gamma": 0.022
+        },
+        "expected_outputs": {
+          "score_10": 16.1,
+          "score_20": 12.9,
+          "score_40": 8.3,
+          "score_60": 5.3,
+          "score_80": 3.4,
+          "score_100": 2.2
+        }
+      },
+      "buckets": {
+        "urgent": {
+          "label": "0-7 days",
+          "condition": "estimated_days <= 7"
+        },
+        "near_term": {
+          "label": "8-14 days",
+          "condition": "7 < estimated_days <= 14"
+        },
+        "late": {
+          "label": "15-30 days",
+          "condition": "estimated_days > 14"
+        }
+      },
+      "survival": {
+        "description": "Exponential hazard model for P(readmit by day t)",
+        "formula": "P(t) = P_30d * (1 - exp(-(t/30) * k)) / (1 - exp(-k))",
+        "parameters": {
+          "k_base": 2.0,
+          "k_adjustment": "k = k_base + 0.02 * (score - 30)"
+        },
+        "notes": [
+          "k > 1 means hazard is front-loaded (higher risk patients readmit earlier)",
+          "k < 1 means hazard is back-loaded",
+          "P_30d is from the logistic calibration model"
+        ],
+        "output_horizons": [
+          7,
+          14,
+          21,
+          30
+        ]
+      }
+    }
+  }
+}

Analysis_Readmission/config/snomed_problem_groups.json ADDED Viewed

	@@ -0,0 +1,1584 @@

+{
+  "_meta": {
+    "version": "1.0",
+    "description": "SNOMED-CT concept groups for PROBLEMS cluster. Maps free-text diagnoses to clinically meaningful groups with readmission risk weights.",
+    "notes": [
+      "Each group has a primary SNOMED-CT code, risk_weight (0-10), and synonyms for fuzzy matching.",
+      "risk_weight reflects evidence-based contribution to 30-day readmission risk.",
+      "charlson=true means the group is part of Charlson Comorbidity Index.",
+      "Synonyms are lowercase for case-insensitive matching."
+    ]
+  },
+  "groups": [
+    {
+      "id": "heart_failure",
+      "name": "Heart Failure",
+      "snomed_ct": "84114007",
+      "icd10_range": [
+        "I50"
+      ],
+      "risk_weight": 8,
+      "charlson": true,
+      "synonyms": [
+        "heart failure",
+        "congestive heart failure",
+        "chf",
+        "systolic heart failure",
+        "diastolic heart failure",
+        "hfref",
+        "hfpef",
+        "left ventricular failure",
+        "right heart failure",
+        "biventricular failure",
+        "cardiomyopathy",
+        "dilated cardiomyopathy",
+        "ischemic cardiomyopathy",
+        "nonischemic cardiomyopathy",
+        "decompensated heart failure",
+        "acute on chronic heart failure",
+        "nyha class",
+        "cardiac decompensation",
+        "reduced ejection fraction",
+        "preserved ejection fraction",
+        "diastolic dysfunction",
+        "lvef",
+        "cardiac failure",
+        "left ventricular hypertrophy",
+        "lvh",
+        "concentric hypertrophy",
+        "heart murmur",
+        "cardiac murmur"
+      ]
+    },
+    {
+      "id": "copd",
+      "name": "COPD / Chronic Lung Disease",
+      "snomed_ct": "13645005",
+      "icd10_range": [
+        "J44",
+        "J43",
+        "J42"
+      ],
+      "risk_weight": 5,
+      "charlson": true,
+      "synonyms": [
+        "copd",
+        "chronic obstructive pulmonary disease",
+        "emphysema",
+        "chronic bronchitis",
+        "copd exacerbation",
+        "acute exacerbation of copd",
+        "chronic lung disease",
+        "chronic respiratory failure",
+        "oxygen dependent",
+        "home oxygen"
+      ]
+    },
+    {
+      "id": "diabetes_uncomplicated",
+      "name": "Diabetes Mellitus (uncomplicated)",
+      "snomed_ct": "73211009",
+      "icd10_range": [
+        "E11",
+        "E10"
+      ],
+      "risk_weight": 3,
+      "charlson": true,
+      "synonyms": [
+        "diabetes",
+        "diabetes mellitus",
+        "diabetes mellitus type 2",
+        "diabetes mellitus type 1",
+        "type 2 diabetes",
+        "type 1 diabetes",
+        "dm",
+        "dm2",
+        "dm1",
+        "t2dm",
+        "t1dm",
+        "iddm",
+        "niddm",
+        "adult onset diabetes",
+        "juvenile diabetes",
+        "insulin dependent diabetes"
+      ]
+    },
+    {
+      "id": "diabetes_complicated",
+      "name": "Diabetes with Complications",
+      "snomed_ct": "368581000119106",
+      "icd10_range": [
+        "E11.2",
+        "E11.3",
+        "E11.4",
+        "E11.5",
+        "E11.6"
+      ],
+      "risk_weight": 5,
+      "charlson": true,
+      "synonyms": [
+        "diabetic nephropathy",
+        "diabetic neuropathy",
+        "diabetic retinopathy",
+        "diabetic foot",
+        "diabetic ketoacidosis",
+        "dka",
+        "diabetic ulcer",
+        "diabetic gastroparesis",
+        "diabetes with renal manifestations",
+        "diabetes with ophthalmic manifestations",
+        "hyperosmolar hyperglycemic state",
+        "hhs"
+      ]
+    },
+    {
+      "id": "ckd",
+      "name": "Chronic Kidney Disease",
+      "snomed_ct": "709044004",
+      "icd10_range": [
+        "N18"
+      ],
+      "risk_weight": 6,
+      "charlson": true,
+      "synonyms": [
+        "chronic kidney disease",
+        "ckd",
+        "chronic renal failure",
+        "chronic renal insufficiency",
+        "end stage renal disease",
+        "esrd",
+        "stage 3 ckd",
+        "stage 4 ckd",
+        "stage 5 ckd",
+        "renal failure",
+        "kidney failure",
+        "dialysis dependent",
+        "hemodialysis",
+        "peritoneal dialysis",
+        "renal transplant",
+        "nephropathy",
+        "nephrotic syndrome",
+        "end-stage renal disease"
+      ]
+    },
+    {
+      "id": "aki",
+      "name": "Acute Kidney Injury",
+      "snomed_ct": "14669001",
+      "icd10_range": [
+        "N17"
+      ],
+      "risk_weight": 5,
+      "charlson": false,
+      "synonyms": [
+        "acute kidney injury",
+        "aki",
+        "acute renal failure",
+        "acute renal insufficiency",
+        "acute tubular necrosis",
+        "atn",
+        "prerenal azotemia",
+        "contrast nephropathy"
+      ]
+    },
+    {
+      "id": "cancer_solid",
+      "name": "Cancer (solid tumor, non-metastatic)",
+      "snomed_ct": "363346000",
+      "icd10_range": [
+        "C00-C75"
+      ],
+      "risk_weight": 5,
+      "charlson": true,
+      "synonyms": [
+        "cancer",
+        "malignancy",
+        "malignant neoplasm",
+        "carcinoma",
+        "adenocarcinoma",
+        "squamous cell carcinoma",
+        "lung cancer",
+        "breast cancer",
+        "colon cancer",
+        "prostate cancer",
+        "bladder cancer",
+        "renal cell carcinoma",
+        "pancreatic cancer",
+        "hepatocellular carcinoma",
+        "ovarian cancer",
+        "cervical cancer",
+        "endometrial cancer",
+        "thyroid cancer",
+        "gastric cancer",
+        "esophageal cancer",
+        "melanoma",
+        "sarcoma",
+        "lymphoma",
+        "leukemia",
+        "non-hodgkin lymphoma",
+        "hodgkin lymphoma",
+        "multiple myeloma",
+        "myelodysplastic syndrome",
+        "myeloproliferative disorder",
+        "meningioma",
+        "glioma",
+        "brain tumor",
+        "astrocytoma",
+        "schwannoma",
+        "pheochromocytoma",
+        "benign tumor",
+        "benign neoplasm",
+        "polyp",
+        "papilloma"
+      ]
+    },
+    {
+      "id": "cancer_metastatic",
+      "name": "Metastatic Cancer",
+      "snomed_ct": "315004001",
+      "icd10_range": [
+        "C77-C80"
+      ],
+      "risk_weight": 8,
+      "charlson": true,
+      "synonyms": [
+        "metastatic",
+        "metastasis",
+        "metastases",
+        "stage iv cancer",
+        "stage 4 cancer",
+        "disseminated",
+        "advanced cancer",
+        "brain metastasis",
+        "liver metastasis",
+        "bone metastasis",
+        "lung metastasis",
+        "widespread disease",
+        "terminal cancer"
+      ]
+    },
+    {
+      "id": "liver_disease",
+      "name": "Liver Disease",
+      "snomed_ct": "235856003",
+      "icd10_range": [
+        "K70-K77"
+      ],
+      "risk_weight": 5,
+      "charlson": true,
+      "synonyms": [
+        "cirrhosis",
+        "liver cirrhosis",
+        "hepatic cirrhosis",
+        "liver failure",
+        "hepatic failure",
+        "hepatitis",
+        "hepatitis b",
+        "hepatitis c",
+        "alcoholic liver disease",
+        "nafld",
+        "nash",
+        "nonalcoholic fatty liver disease",
+        "nonalcoholic steatohepatitis",
+        "portal hypertension",
+        "esophageal varices",
+        "hepatic encephalopathy",
+        "ascites",
+        "hepatorenal syndrome",
+        "liver transplant",
+        "transaminitis",
+        "elevated liver enzymes",
+        "elevated transaminases",
+        "alt elevation",
+        "ast elevation"
+      ]
+    },
+    {
+      "id": "cva_stroke",
+      "name": "Cerebrovascular Disease / Stroke",
+      "snomed_ct": "62914000",
+      "icd10_range": [
+        "I60-I69"
+      ],
+      "risk_weight": 4,
+      "charlson": true,
+      "synonyms": [
+        "stroke",
+        "cerebrovascular accident",
+        "cva",
+        "ischemic stroke",
+        "hemorrhagic stroke",
+        "transient ischemic attack",
+        "tia",
+        "cerebral infarction",
+        "intracranial hemorrhage",
+        "subarachnoid hemorrhage",
+        "subdural hematoma",
+        "carotid stenosis",
+        "vertebrobasilar insufficiency",
+        "cerebrovascular disease",
+        "carotid artery stenosis",
+        "bell's palsy",
+        "facial palsy",
+        "cerebral aneurysm",
+        "intracranial aneurysm",
+        "aneurysm clipping"
+      ]
+    },
+    {
+      "id": "mi_ihd",
+      "name": "Myocardial Infarction / Ischemic Heart Disease",
+      "snomed_ct": "22298006",
+      "icd10_range": [
+        "I21",
+        "I25"
+      ],
+      "risk_weight": 5,
+      "charlson": true,
+      "synonyms": [
+        "myocardial infarction",
+        "mi",
+        "heart attack",
+        "stemi",
+        "nstemi",
+        "acute coronary syndrome",
+        "acs",
+        "coronary artery disease",
+        "cad",
+        "unstable angina",
+        "angina pectoris",
+        "angina",
+        "triple vessel disease",
+        "left main disease",
+        "coronary occlusion",
+        "coronary thrombosis",
+        "ischemic heart disease",
+        "ihd",
+        "chest pain",
+        "angina equivalent",
+        "troponin elevation"
+      ]
+    },
+    {
+      "id": "atrial_fibrillation",
+      "name": "Atrial Fibrillation / Arrhythmia",
+      "snomed_ct": "49436004",
+      "icd10_range": [
+        "I48"
+      ],
+      "risk_weight": 3,
+      "charlson": false,
+      "synonyms": [
+        "atrial fibrillation",
+        "afib",
+        "a-fib",
+        "atrial flutter",
+        "supraventricular tachycardia",
+        "svt",
+        "ventricular tachycardia",
+        "vtach",
+        "ventricular fibrillation",
+        "vfib",
+        "arrhythmia",
+        "cardiac arrhythmia",
+        "sick sinus syndrome",
+        "bradycardia",
+        "heart block",
+        "av block",
+        "bundle branch block",
+        "prolonged qt",
+        "wolff-parkinson-white",
+        "wpw",
+        "paroxysmal atrial fibrillation",
+        "atrial tachycardia",
+        "palpitations",
+        "tachyarrhythmia",
+        "pacing",
+        "pacemaker"
+      ]
+    },
+    {
+      "id": "pvd",
+      "name": "Peripheral Vascular Disease",
+      "snomed_ct": "400047006",
+      "icd10_range": [
+        "I73"
+      ],
+      "risk_weight": 3,
+      "charlson": true,
+      "synonyms": [
+        "peripheral vascular disease",
+        "pvd",
+        "peripheral artery disease",
+        "pad",
+        "claudication",
+        "intermittent claudication",
+        "critical limb ischemia",
+        "gangrene",
+        "aortic aneurysm",
+        "abdominal aortic aneurysm",
+        "aaa",
+        "thoracic aortic aneurysm",
+        "aortic dissection",
+        "varicose veins",
+        "venous insufficiency",
+        "chronic venous insufficiency"
+      ]
+    },
+    {
+      "id": "vte",
+      "name": "Venous Thromboembolism",
+      "snomed_ct": "111293003",
+      "icd10_range": [
+        "I26",
+        "I82"
+      ],
+      "risk_weight": 4,
+      "charlson": false,
+      "synonyms": [
+        "pulmonary embolism",
+        "pe",
+        "dvt",
+        "deep vein thrombosis",
+        "deep venous thrombosis",
+        "venous thromboembolism",
+        "vte",
+        "saddle embolus",
+        "submassive pe",
+        "massive pe",
+        "iliac vein thrombosis",
+        "portal vein thrombosis",
+        "thrombophilia",
+        "hypercoagulable state",
+        "antiphospholipid syndrome"
+      ]
+    },
+    {
+      "id": "hypertension",
+      "name": "Hypertension",
+      "snomed_ct": "38341003",
+      "icd10_range": [
+        "I10-I15"
+      ],
+      "risk_weight": 2,
+      "charlson": false,
+      "synonyms": [
+        "hypertension",
+        "htn",
+        "high blood pressure",
+        "essential hypertension",
+        "malignant hypertension",
+        "hypertensive emergency",
+        "hypertensive urgency",
+        "resistant hypertension",
+        "secondary hypertension",
+        "pulmonary hypertension",
+        "pulmonary arterial hypertension"
+      ]
+    },
+    {
+      "id": "valvular",
+      "name": "Valvular Heart Disease",
+      "snomed_ct": "368009",
+      "icd10_range": [
+        "I34-I37"
+      ],
+      "risk_weight": 3,
+      "charlson": false,
+      "synonyms": [
+        "aortic stenosis",
+        "aortic valve stenosis",
+        "aortic regurgitation",
+        "aortic insufficiency",
+        "mitral stenosis",
+        "mitral regurgitation",
+        "mitral valve prolapse",
+        "tricuspid regurgitation",
+        "valve replacement",
+        "prosthetic valve",
+        "bioprosthetic valve",
+        "mechanical valve",
+        "endocarditis",
+        "infective endocarditis"
+      ]
+    },
+    {
+      "id": "dementia",
+      "name": "Dementia / Cognitive Decline",
+      "snomed_ct": "52448006",
+      "icd10_range": [
+        "F00-F03",
+        "G30"
+      ],
+      "risk_weight": 4,
+      "charlson": true,
+      "synonyms": [
+        "dementia",
+        "alzheimer",
+        "alzheimer's disease",
+        "vascular dementia",
+        "lewy body dementia",
+        "frontotemporal dementia",
+        "cognitive decline",
+        "cognitive impairment",
+        "mild cognitive impairment",
+        "mci",
+        "memory loss",
+        "encephalopathy",
+        "delirium",
+        "sundowning"
+      ]
+    },
+    {
+      "id": "depression",
+      "name": "Depression / Mood Disorders",
+      "snomed_ct": "35489007",
+      "icd10_range": [
+        "F32",
+        "F33",
+        "F31"
+      ],
+      "risk_weight": 3,
+      "charlson": false,
+      "synonyms": [
+        "depression",
+        "major depressive disorder",
+        "mdd",
+        "bipolar disorder",
+        "bipolar",
+        "bipolar i",
+        "bipolar ii",
+        "dysthymia",
+        "persistent depressive disorder",
+        "mood disorder",
+        "adjustment disorder",
+        "postpartum depression",
+        "seasonal affective disorder",
+        "treatment resistant depression"
+      ]
+    },
+    {
+      "id": "psychosis",
+      "name": "Psychotic Disorders",
+      "snomed_ct": "69322001",
+      "icd10_range": [
+        "F20-F29"
+      ],
+      "risk_weight": 4,
+      "charlson": false,
+      "synonyms": [
+        "schizophrenia",
+        "schizoaffective disorder",
+        "psychosis",
+        "psychotic disorder",
+        "paranoid schizophrenia",
+        "catatonia",
+        "delusional disorder",
+        "brief psychotic disorder"
+      ]
+    },
+    {
+      "id": "anxiety",
+      "name": "Anxiety Disorders",
+      "snomed_ct": "197480006",
+      "icd10_range": [
+        "F40",
+        "F41"
+      ],
+      "risk_weight": 2,
+      "charlson": false,
+      "synonyms": [
+        "anxiety",
+        "generalized anxiety disorder",
+        "gad",
+        "panic disorder",
+        "panic attack",
+        "social anxiety",
+        "agoraphobia",
+        "phobia",
+        "ptsd",
+        "post-traumatic stress disorder",
+        "obsessive compulsive disorder",
+        "ocd",
+        "posttraumatic stress disorder",
+        "post traumatic stress disorder",
+        "adhd",
+        "attention deficit hyperactivity disorder",
+        "attention deficit disorder"
+      ]
+    },
+    {
+      "id": "substance_abuse",
+      "name": "Substance Abuse / Dependence",
+      "snomed_ct": "66214007",
+      "icd10_range": [
+        "F10-F19"
+      ],
+      "risk_weight": 5,
+      "charlson": false,
+      "synonyms": [
+        "alcohol abuse",
+        "alcohol dependence",
+        "alcoholism",
+        "alcohol withdrawal",
+        "alcohol use disorder",
+        "drug abuse",
+        "substance abuse",
+        "substance use disorder",
+        "opioid dependence",
+        "opioid use disorder",
+        "cocaine abuse",
+        "cocaine dependence",
+        "benzodiazepine dependence",
+        "polysubstance abuse",
+        "drug overdose",
+        "heroin abuse",
+        "methamphetamine abuse",
+        "cannabis use disorder",
+        "tobacco use disorder",
+        "nicotine dependence",
+        "intravenous drug use",
+        "ivdu",
+        "tobacco abuse",
+        "smoking",
+        "tobacco dependence",
+        "tobacco use"
+      ]
+    },
+    {
+      "id": "sepsis",
+      "name": "Sepsis / Severe Infection",
+      "snomed_ct": "91302008",
+      "icd10_range": [
+        "A40",
+        "A41",
+        "R65.2"
+      ],
+      "risk_weight": 6,
+      "charlson": false,
+      "synonyms": [
+        "sepsis",
+        "severe sepsis",
+        "septic shock",
+        "bacteremia",
+        "fungemia",
+        "urosepsis",
+        "septicemia",
+        "systemic inflammatory response",
+        "sirs",
+        "blood stream infection",
+        "bsi"
+      ]
+    },
+    {
+      "id": "pneumonia",
+      "name": "Pneumonia / Lower Respiratory Infection",
+      "snomed_ct": "233604007",
+      "icd10_range": [
+        "J12-J18"
+      ],
+      "risk_weight": 4,
+      "charlson": false,
+      "synonyms": [
+        "pneumonia",
+        "community acquired pneumonia",
+        "cap",
+        "hospital acquired pneumonia",
+        "hap",
+        "ventilator associated pneumonia",
+        "vap",
+        "aspiration pneumonia",
+        "aspiration",
+        "lung abscess",
+        "empyema",
+        "bronchopneumonia",
+        "lobar pneumonia",
+        "respiratory infection",
+        "lower respiratory tract infection",
+        "sinusitis",
+        "upper respiratory infection",
+        "bronchitis",
+        "acute bronchitis"
+      ]
+    },
+    {
+      "id": "uti",
+      "name": "Urinary Tract Infection",
+      "snomed_ct": "68566005",
+      "icd10_range": [
+        "N39.0"
+      ],
+      "risk_weight": 2,
+      "charlson": false,
+      "synonyms": [
+        "urinary tract infection",
+        "uti",
+        "pyelonephritis",
+        "cystitis",
+        "urosepsis",
+        "catheter associated uti",
+        "cauti"
+      ]
+    },
+    {
+      "id": "gi_bleed",
+      "name": "GI Hemorrhage",
+      "snomed_ct": "74474003",
+      "icd10_range": [
+        "K92.0",
+        "K92.1",
+        "K92.2"
+      ],
+      "risk_weight": 5,
+      "charlson": false,
+      "synonyms": [
+        "gastrointestinal hemorrhage",
+        "gi bleed",
+        "gi bleeding",
+        "upper gi bleed",
+        "lower gi bleed",
+        "melena",
+        "hematochezia",
+        "hematemesis",
+        "variceal bleeding",
+        "peptic ulcer bleeding",
+        "diverticular bleeding",
+        "rectal bleeding",
+        "gastrointestinal bleeding",
+        "gi hemorrhage"
+      ]
+    },
+    {
+      "id": "gi_disease",
+      "name": "GI Disease (non-hemorrhage)",
+      "snomed_ct": "119292006",
+      "icd10_range": [
+        "K00-K93"
+      ],
+      "risk_weight": 2,
+      "charlson": false,
+      "synonyms": [
+        "gastroesophageal reflux disease",
+        "gerd",
+        "peptic ulcer",
+        "gastric ulcer",
+        "duodenal ulcer",
+        "crohn's disease",
+        "crohn disease",
+        "ulcerative colitis",
+        "inflammatory bowel disease",
+        "ibd",
+        "diverticulitis",
+        "diverticulosis",
+        "pancreatitis",
+        "acute pancreatitis",
+        "chronic pancreatitis",
+        "cholecystitis",
+        "cholelithiasis",
+        "gallstones",
+        "cholangitis",
+        "bowel obstruction",
+        "small bowel obstruction",
+        "ileus",
+        "celiac disease",
+        "gastroparesis",
+        "hiatal hernia",
+        "esophagitis",
+        "irritable bowel syndrome",
+        "ibs",
+        "clostridium difficile",
+        "c diff",
+        "c. difficile",
+        "appendicitis",
+        "peritonitis",
+        "constipation",
+        "chronic constipation",
+        "gastritis",
+        "hemorrhoids",
+        "dysphagia",
+        "nausea",
+        "esophageal stricture",
+        "colon polyps",
+        "choledocholithiasis",
+        "common bile duct stone",
+        "helicobacter pylori",
+        "h pylori",
+        "diarrhea",
+        "vomiting",
+        "abdominal pain",
+        "colonic polyp",
+        "rectal polyp",
+        "barrett's esophagus",
+        "biliary colic",
+        "gastroenteritis",
+        "food poisoning",
+        "colitis"
+      ]
+    },
+    {
+      "id": "anemia",
+      "name": "Anemia (chronic)",
+      "snomed_ct": "271737000",
+      "icd10_range": [
+        "D50-D64"
+      ],
+      "risk_weight": 3,
+      "charlson": false,
+      "synonyms": [
+        "anemia",
+        "iron deficiency anemia",
+        "anemia of chronic disease",
+        "chronic anemia",
+        "megaloblastic anemia",
+        "b12 deficiency",
+        "folate deficiency",
+        "pancytopenia",
+        "aplastic anemia",
+        "hemolytic anemia",
+        "sickle cell disease",
+        "sickle cell anemia",
+        "thalassemia",
+        "myelodysplastic syndrome"
+      ]
+    },
+    {
+      "id": "coagulopathy",
+      "name": "Coagulopathy / Bleeding Disorder",
+      "snomed_ct": "234466008",
+      "icd10_range": [
+        "D65-D69"
+      ],
+      "risk_weight": 4,
+      "charlson": false,
+      "synonyms": [
+        "thrombocytopenia",
+        "coagulopathy",
+        "dic",
+        "disseminated intravascular coagulation",
+        "heparin induced thrombocytopenia",
+        "hit",
+        "immune thrombocytopenic purpura",
+        "itp",
+        "von willebrand disease",
+        "hemophilia",
+        "anticoagulant related bleeding"
+      ]
+    },
+    {
+      "id": "obesity",
+      "name": "Obesity",
+      "snomed_ct": "414916001",
+      "icd10_range": [
+        "E66"
+      ],
+      "risk_weight": 2,
+      "charlson": false,
+      "synonyms": [
+        "obesity",
+        "morbid obesity",
+        "severe obesity",
+        "obese",
+        "bmi over 30",
+        "bmi over 40",
+        "class iii obesity",
+        "bariatric"
+      ]
+    },
+    {
+      "id": "thyroid",
+      "name": "Thyroid Disorders",
+      "snomed_ct": "14304000",
+      "icd10_range": [
+        "E00-E07"
+      ],
+      "risk_weight": 1,
+      "charlson": false,
+      "synonyms": [
+        "hypothyroidism",
+        "hyperthyroidism",
+        "thyroid disease",
+        "hashimoto",
+        "graves disease",
+        "thyroiditis",
+        "thyroid nodule",
+        "thyroid cancer",
+        "myxedema",
+        "thyroid storm",
+        "hyperparathyroidism",
+        "hypoparathyroidism",
+        "parathyroid disease"
+      ]
+    },
+    {
+      "id": "asthma",
+      "name": "Asthma",
+      "snomed_ct": "195967001",
+      "icd10_range": [
+        "J45"
+      ],
+      "risk_weight": 2,
+      "charlson": false,
+      "synonyms": [
+        "asthma",
+        "asthma exacerbation",
+        "acute asthma",
+        "status asthmaticus",
+        "reactive airway disease",
+        "bronchospasm",
+        "exercise induced asthma",
+        "allergic asthma"
+      ]
+    },
+    {
+      "id": "osa",
+      "name": "Obstructive Sleep Apnea",
+      "snomed_ct": "78275009",
+      "icd10_range": [
+        "G47.33"
+      ],
+      "risk_weight": 2,
+      "charlson": false,
+      "synonyms": [
+        "obstructive sleep apnea",
+        "osa",
+        "sleep apnea",
+        "central sleep apnea",
+        "sleep disordered breathing",
+        "cpap dependent"
+      ]
+    },
+    {
+      "id": "seizure",
+      "name": "Seizure Disorder / Epilepsy",
+      "snomed_ct": "84757009",
+      "icd10_range": [
+        "G40"
+      ],
+      "risk_weight": 3,
+      "charlson": false,
+      "synonyms": [
+        "seizure disorder",
+        "epilepsy",
+        "seizure",
+        "status epilepticus",
+        "convulsion",
+        "tonic-clonic seizure",
+        "grand mal seizure",
+        "focal seizure",
+        "absence seizure",
+        "breakthrough seizure",
+        "vertigo",
+        "dizziness",
+        "syncope",
+        "presyncope",
+        "ataxia",
+        "cerebellar ataxia",
+        "gait instability"
+      ]
+    },
+    {
+      "id": "falls_fracture",
+      "name": "Falls / Fracture",
+      "snomed_ct": "217082002",
+      "icd10_range": [
+        "W00-W19",
+        "S72"
+      ],
+      "risk_weight": 3,
+      "charlson": false,
+      "synonyms": [
+        "fall",
+        "falls",
+        "mechanical fall",
+        "fracture",
+        "hip fracture",
+        "femur fracture",
+        "vertebral fracture",
+        "compression fracture",
+        "rib fracture",
+        "pelvic fracture",
+        "pathologic fracture",
+        "fragility fracture"
+      ]
+    },
+    {
+      "id": "wound_infection",
+      "name": "Wound / Skin Infection",
+      "snomed_ct": "128045006",
+      "icd10_range": [
+        "L00-L08"
+      ],
+      "risk_weight": 3,
+      "charlson": false,
+      "synonyms": [
+        "cellulitis",
+        "abscess",
+        "wound infection",
+        "surgical site infection",
+        "ssi",
+        "osteomyelitis",
+        "necrotizing fasciitis",
+        "pressure ulcer",
+        "pressure injury",
+        "decubitus",
+        "diabetic foot infection",
+        "skin infection",
+        "mucositis",
+        "oral mucositis",
+        "stomatitis",
+        "discitis",
+        "vertebral discitis",
+        "spinal infection"
+      ]
+    },
+    {
+      "id": "electrolyte",
+      "name": "Electrolyte Disorders",
+      "snomed_ct": "237840007",
+      "icd10_range": [
+        "E87"
+      ],
+      "risk_weight": 3,
+      "charlson": false,
+      "synonyms": [
+        "hyponatremia",
+        "hypernatremia",
+        "hypokalemia",
+        "hyperkalemia",
+        "hypocalcemia",
+        "hypercalcemia",
+        "hypomagnesemia",
+        "hypermagnesemia",
+        "hypophosphatemia",
+        "metabolic acidosis",
+        "metabolic alkalosis",
+        "electrolyte imbalance",
+        "electrolyte abnormality",
+        "hyperglycemia",
+        "hypoglycemia",
+        "lactic acidosis"
+      ]
+    },
+    {
+      "id": "malnutrition",
+      "name": "Malnutrition / Failure to Thrive",
+      "snomed_ct": "248325000",
+      "icd10_range": [
+        "E40-E46",
+        "R62"
+      ],
+      "risk_weight": 4,
+      "charlson": false,
+      "synonyms": [
+        "malnutrition",
+        "protein calorie malnutrition",
+        "cachexia",
+        "failure to thrive",
+        "kwashiorkor",
+        "marasmus",
+        "severe malnutrition",
+        "nutritional deficiency",
+        "sarcopenia",
+        "wasting"
+      ]
+    },
+    {
+      "id": "connective_tissue",
+      "name": "Connective Tissue / Autoimmune Disease",
+      "snomed_ct": "105969002",
+      "icd10_range": [
+        "M30-M36"
+      ],
+      "risk_weight": 3,
+      "charlson": true,
+      "synonyms": [
+        "rheumatoid arthritis",
+        "lupus",
+        "systemic lupus erythematosus",
+        "sle",
+        "scleroderma",
+        "vasculitis",
+        "polymyalgia rheumatica",
+        "dermatomyositis",
+        "polymyositis",
+        "sjogren syndrome",
+        "mixed connective tissue disease",
+        "ankylosing spondylitis",
+        "multiple sclerosis",
+        "ms",
+        "psoriasis",
+        "psoriatic arthritis"
+      ]
+    },
+    {
+      "id": "hiv_aids",
+      "name": "HIV / AIDS",
+      "snomed_ct": "86406008",
+      "icd10_range": [
+        "B20-B24"
+      ],
+      "risk_weight": 4,
+      "charlson": true,
+      "synonyms": [
+        "hiv",
+        "aids",
+        "human immunodeficiency virus",
+        "acquired immunodeficiency syndrome",
+        "hiv positive",
+        "hiv infection"
+      ]
+    },
+    {
+      "id": "transplant",
+      "name": "Organ Transplant",
+      "snomed_ct": "77465005",
+      "icd10_range": [
+        "Z94"
+      ],
+      "risk_weight": 5,
+      "charlson": false,
+      "synonyms": [
+        "transplant",
+        "organ transplant",
+        "kidney transplant",
+        "liver transplant",
+        "heart transplant",
+        "lung transplant",
+        "bone marrow transplant",
+        "stem cell transplant",
+        "graft versus host disease",
+        "gvhd",
+        "transplant rejection",
+        "immunosuppression"
+      ]
+    },
+    {
+      "id": "hyperlipidemia",
+      "name": "Hyperlipidemia",
+      "snomed_ct": "55822004",
+      "icd10_range": [
+        "E78"
+      ],
+      "risk_weight": 1,
+      "charlson": false,
+      "synonyms": [
+        "hyperlipidemia",
+        "hypercholesterolemia",
+        "dyslipidemia",
+        "high cholesterol",
+        "hypertriglyceridemia",
+        "mixed hyperlipidemia"
+      ]
+    },
+    {
+      "id": "bph_urological",
+      "name": "BPH / Urological",
+      "snomed_ct": "266569009",
+      "icd10_range": [
+        "N40"
+      ],
+      "risk_weight": 1,
+      "charlson": false,
+      "synonyms": [
+        "benign prostatic hyperplasia",
+        "bph",
+        "benign prostatic hypertrophy",
+        "urinary retention",
+        "urinary incontinence",
+        "overactive bladder",
+        "neurogenic bladder",
+        "nephrolithiasis",
+        "kidney stone",
+        "renal calculus",
+        "ureteral stone",
+        "hydronephrosis",
+        "hematuria",
+        "gross hematuria",
+        "microscopic hematuria",
+        "uterine fibroids",
+        "endometriosis",
+        "ovarian cyst",
+        "renal stone",
+        "urolithiasis",
+        "bladder stone"
+      ]
+    },
+    {
+      "id": "osteoarthritis",
+      "name": "Osteoarthritis / Degenerative Joint",
+      "snomed_ct": "396275006",
+      "icd10_range": [
+        "M15-M19"
+      ],
+      "risk_weight": 1,
+      "charlson": false,
+      "synonyms": [
+        "osteoarthritis",
+        "degenerative joint disease",
+        "djd",
+        "joint replacement",
+        "knee replacement",
+        "hip replacement",
+        "total knee arthroplasty",
+        "total hip arthroplasty",
+        "spinal stenosis",
+        "cervical spondylosis",
+        "lumbar spondylosis",
+        "degenerative disc disease",
+        "herniated disc",
+        "scoliosis",
+        "kyphosis",
+        "spinal deformity",
+        "plantar fasciitis",
+        "tendinitis",
+        "bursitis",
+        "rotator cuff"
+      ]
+    },
+    {
+      "id": "osteoporosis",
+      "name": "Osteoporosis",
+      "snomed_ct": "64859006",
+      "icd10_range": [
+        "M80-M81"
+      ],
+      "risk_weight": 2,
+      "charlson": false,
+      "synonyms": [
+        "osteoporosis",
+        "osteopenia",
+        "low bone density",
+        "bone loss",
+        "vitamin d deficiency"
+      ]
+    },
+    {
+      "id": "gout",
+      "name": "Gout / Crystal Arthropathy",
+      "snomed_ct": "90560007",
+      "icd10_range": [
+        "M10"
+      ],
+      "risk_weight": 1,
+      "charlson": false,
+      "synonyms": [
+        "gout",
+        "gouty arthritis",
+        "gout flare",
+        "pseudogout",
+        "calcium pyrophosphate",
+        "crystal arthropathy",
+        "hyperuricemia"
+      ]
+    },
+    {
+      "id": "migraine",
+      "name": "Migraine / Headache Disorders",
+      "snomed_ct": "37796009",
+      "icd10_range": [
+        "G43"
+      ],
+      "risk_weight": 1,
+      "charlson": false,
+      "synonyms": [
+        "migraine",
+        "migraine with aura",
+        "migraine without aura",
+        "chronic migraine",
+        "tension headache",
+        "cluster headache"
+      ]
+    },
+    {
+      "id": "glaucoma",
+      "name": "Glaucoma / Eye Disease",
+      "snomed_ct": "23986001",
+      "icd10_range": [
+        "H40"
+      ],
+      "risk_weight": 1,
+      "charlson": false,
+      "synonyms": [
+        "glaucoma",
+        "open angle glaucoma",
+        "angle closure glaucoma",
+        "macular degeneration",
+        "cataracts",
+        "diabetic retinopathy",
+        "retinal detachment",
+        "blindness",
+        "vision loss",
+        "visual impairment",
+        "macular hole"
+      ]
+    },
+    {
+      "id": "peripheral_neuropathy",
+      "name": "Peripheral Neuropathy",
+      "snomed_ct": "302226006",
+      "icd10_range": [
+        "G60-G64"
+      ],
+      "risk_weight": 2,
+      "charlson": false,
+      "synonyms": [
+        "peripheral neuropathy",
+        "neuropathy",
+        "polyneuropathy",
+        "mononeuropathy",
+        "carpal tunnel",
+        "radiculopathy",
+        "sciatica",
+        "nerve entrapment",
+        "foot drop",
+        "peroneal neuropathy",
+        "hearing loss",
+        "sensorineural hearing loss",
+        "hard of hearing",
+        "hearing impairment",
+        "deafness",
+        "tinnitus"
+      ]
+    },
+    {
+      "id": "pleural_respiratory",
+      "name": "Pleural / Pulmonary Effusion",
+      "snomed_ct": "60046008",
+      "icd10_range": [
+        "J90",
+        "J91"
+      ],
+      "risk_weight": 3,
+      "charlson": false,
+      "synonyms": [
+        "pleural effusion",
+        "pulmonary edema",
+        "pulmonary effusion",
+        "hydrothorax",
+        "empyema",
+        "hemothorax",
+        "pneumothorax",
+        "pleurisy",
+        "pulmonary fibrosis",
+        "interstitial lung disease",
+        "bronchiectasis",
+        "atelectasis",
+        "pulmonary nodule",
+        "lung nodule",
+        "pericardial effusion",
+        "pericarditis",
+        "pericardial tamponade"
+      ]
+    },
+    {
+      "id": "hypotension_shock",
+      "name": "Hypotension / Shock",
+      "snomed_ct": "45007003",
+      "icd10_range": [
+        "I95",
+        "R57"
+      ],
+      "risk_weight": 4,
+      "charlson": false,
+      "synonyms": [
+        "hypotension",
+        "orthostatic hypotension",
+        "postural hypotension",
+        "shock",
+        "cardiogenic shock",
+        "hypovolemic shock",
+        "distributive shock",
+        "hemorrhagic shock",
+        "vasopressor dependent",
+        "vasovagal",
+        "neurocardiogenic syncope"
+      ]
+    },
+    {
+      "id": "chronic_pain",
+      "name": "Chronic Pain Syndrome",
+      "snomed_ct": "82423001",
+      "icd10_range": [
+        "G89"
+      ],
+      "risk_weight": 2,
+      "charlson": false,
+      "synonyms": [
+        "chronic pain",
+        "chronic back pain",
+        "chronic low back pain",
+        "fibromyalgia",
+        "complex regional pain syndrome",
+        "crps",
+        "neuropathic pain",
+        "chronic headache",
+        "chronic neck pain",
+        "back pain",
+        "low back pain",
+        "neck pain",
+        "joint pain"
+      ]
+    },
+    {
+      "id": "hernia",
+      "name": "Hernia",
+      "snomed_ct": "414403008",
+      "icd10_range": [
+        "K40-K46"
+      ],
+      "risk_weight": 1,
+      "charlson": false,
+      "synonyms": [
+        "hernia",
+        "inguinal hernia",
+        "umbilical hernia",
+        "incisional hernia",
+        "ventral hernia",
+        "hiatal hernia",
+        "femoral hernia",
+        "paraesophageal hernia"
+      ]
+    },
+    {
+      "id": "hematologic_abnormality",
+      "name": "Hematologic Abnormalities",
+      "snomed_ct": "414022008",
+      "icd10_range": [
+        "D70-D77"
+      ],
+      "risk_weight": 2,
+      "charlson": false,
+      "synonyms": [
+        "leukocytosis",
+        "lymphadenopathy",
+        "neutropenia",
+        "agranulocytosis",
+        "lymphopenia",
+        "eosinophilia",
+        "splenomegaly",
+        "lymphoma",
+        "monoclonal gammopathy",
+        "polycythemia",
+        "thrombocytosis",
+        "elevated wbc"
+      ]
+    },
+    {
+      "id": "sleep_insomnia",
+      "name": "Sleep / Insomnia Disorders",
+      "snomed_ct": "193462001",
+      "icd10_range": [
+        "G47"
+      ],
+      "risk_weight": 1,
+      "charlson": false,
+      "synonyms": [
+        "insomnia",
+        "sleep disorder",
+        "restless leg syndrome",
+        "narcolepsy",
+        "parasomnia"
+      ]
+    },
+    {
+      "id": "allergy_immunology",
+      "name": "Allergy / Immunological",
+      "snomed_ct": "419076005",
+      "icd10_range": [
+        "J30",
+        "T78"
+      ],
+      "risk_weight": 1,
+      "charlson": false,
+      "synonyms": [
+        "allergic rhinitis",
+        "allergy",
+        "allergies",
+        "drug allergy",
+        "food allergy",
+        "anaphylaxis",
+        "angioedema",
+        "urticaria",
+        "eczema",
+        "dermatitis",
+        "atopic dermatitis",
+        "contact dermatitis",
+        "psoriasis",
+        "hirsutism",
+        "alopecia areata"
+      ]
+    },
+    {
+      "id": "fever_infection",
+      "name": "Fever / Systemic Infection",
+      "snomed_ct": "386661006",
+      "icd10_range": [
+        "R50"
+      ],
+      "risk_weight": 2,
+      "charlson": false,
+      "synonyms": [
+        "fever",
+        "febrile",
+        "pyrexia",
+        "chills",
+        "rigors",
+        "night sweats",
+        "infection",
+        "viral infection",
+        "bacterial infection",
+        "mumps",
+        "measles",
+        "chickenpox",
+        "shingles",
+        "herpes zoster",
+        "herpes simplex"
+      ]
+    },
+    {
+      "id": "pregnancy_ob",
+      "name": "Pregnancy / Obstetric",
+      "snomed_ct": "77386006",
+      "icd10_range": [
+        "O00-O99"
+      ],
+      "risk_weight": 1,
+      "charlson": false,
+      "synonyms": [
+        "pregnancy",
+        "pregnant",
+        "preeclampsia",
+        "eclampsia",
+        "gestational diabetes",
+        "preterm labor",
+        "cesarean section",
+        "postpartum",
+        "ectopic pregnancy",
+        "miscarriage"
+      ]
+    },
+    {
+      "id": "skin_dermatologic",
+      "name": "Dermatologic / Skin Conditions",
+      "snomed_ct": "95320005",
+      "icd10_range": [
+        "L00-L99"
+      ],
+      "risk_weight": 1,
+      "charlson": false,
+      "synonyms": [
+        "rash",
+        "skin lesion",
+        "pruritus",
+        "alopecia",
+        "acne",
+        "wound",
+        "laceration",
+        "burn",
+        "skin graft",
+        "basal cell carcinoma",
+        "squamous cell carcinoma skin"
+      ]
+    }
+  ]
+}

Analysis_Readmission/config/symptom_urgency_groups.json ADDED Viewed

	@@ -0,0 +1,206 @@

+{
+  "_meta": {
+    "version": "1.0",
+    "description": "SNOMED-CT symptom urgency groups for SYMPTOMS cluster. Maps free-text symptoms to clinical urgency categories with readmission risk weights.",
+    "notes": [
+      "Each group represents a clinical urgency category.",
+      "risk_weight (0-5) reflects urgency/impact on readmission.",
+      "severity_multiplier: severe=1.5, yes=1.0, no=0.0",
+      "Synonyms are lowercase for case-insensitive matching."
+    ]
+  },
+  "groups": [
+    {
+      "id": "respiratory_distress",
+      "name": "Respiratory Distress",
+      "snomed_ct": "267036007",
+      "risk_weight": 4,
+      "synonyms": [
+        "dyspnea", "shortness of breath", "sob",
+        "difficulty breathing", "respiratory distress",
+        "air hunger", "orthopnea",
+        "paroxysmal nocturnal dyspnea", "pnd",
+        "breathlessness", "tachypnea",
+        "labored breathing", "respiratory failure"
+      ]
+    },
+    {
+      "id": "cardiac_symptoms",
+      "name": "Cardiac Symptoms",
+      "snomed_ct": "29857009",
+      "risk_weight": 4,
+      "synonyms": [
+        "chest pain", "chest tightness", "chest pressure",
+        "angina", "palpitations", "irregular heartbeat",
+        "racing heart", "tachycardia",
+        "bradycardia", "heart racing",
+        "substernal chest pain", "precordial pain"
+      ]
+    },
+    {
+      "id": "neurological_symptoms",
+      "name": "Neurological Symptoms",
+      "snomed_ct": "102957003",
+      "risk_weight": 4,
+      "synonyms": [
+        "confusion", "altered mental status",
+        "disorientation", "lethargy", "obtunded",
+        "unresponsive", "syncope", "loss of consciousness",
+        "seizure", "convulsion", "tremor",
+        "aphasia", "dysarthria", "slurred speech",
+        "weakness", "hemiparesis", "hemiplegia",
+        "numbness", "tingling", "paresthesia",
+        "visual changes", "blurred vision", "blurry vision", "diplopia",
+        "facial droop", "delirium", "photophobia",
+        "vertigo", "ataxia", "gait instability"
+      ]
+    },
+    {
+      "id": "gi_symptoms",
+      "name": "GI Symptoms",
+      "snomed_ct": "422587007",
+      "risk_weight": 2,
+      "synonyms": [
+        "nausea", "vomiting", "emesis",
+        "abdominal pain", "abdominal distension",
+        "bloating", "diarrhea", "constipation",
+        "melena", "hematochezia", "hematemesis",
+        "blood in stool", "rectal bleeding",
+        "dysphagia", "difficulty swallowing",
+        "anorexia", "loss of appetite",
+        "early satiety", "heartburn"
+      ]
+    },
+    {
+      "id": "pain",
+      "name": "Pain (significant)",
+      "snomed_ct": "22253000",
+      "risk_weight": 2,
+      "synonyms": [
+        "pain", "severe pain", "acute pain",
+        "chronic pain", "back pain", "flank pain",
+        "headache", "migraine",
+        "joint pain", "arthralgia", "myalgia",
+        "bone pain", "neck pain",
+        "pleuritic pain", "pleurisy"
+      ]
+    },
+    {
+      "id": "fever_infection",
+      "name": "Fever / Infection Signs",
+      "snomed_ct": "386661006",
+      "risk_weight": 3,
+      "synonyms": [
+        "fever", "febrile", "chills", "rigors",
+        "night sweats", "diaphoresis", "sweats",
+        "malaise", "body aches",
+        "purulent drainage", "wound drainage"
+      ]
+    },
+    {
+      "id": "edema_fluid",
+      "name": "Edema / Fluid Overload",
+      "snomed_ct": "267038008",
+      "risk_weight": 3,
+      "synonyms": [
+        "edema", "swelling", "peripheral edema",
+        "lower extremity edema", "pitting edema",
+        "anasarca", "ascites", "fluid overload",
+        "weight gain", "pulmonary edema",
+        "pleural effusion"
+      ]
+    },
+    {
+      "id": "bleeding",
+      "name": "Bleeding / Hemorrhage",
+      "snomed_ct": "131148009",
+      "risk_weight": 4,
+      "synonyms": [
+        "bleeding", "hemorrhage",
+        "epistaxis", "hemoptysis",
+        "hematuria", "bruising", "petechiae",
+        "ecchymosis", "purpura"
+      ]
+    },
+    {
+      "id": "constitutional",
+      "name": "Constitutional Symptoms",
+      "snomed_ct": "84229001",
+      "risk_weight": 2,
+      "synonyms": [
+        "fatigue", "weakness", "generalized weakness",
+        "malaise", "lethargy", "drowsiness",
+        "weight loss", "unintentional weight loss",
+        "failure to thrive", "deconditioning",
+        "functional decline", "decreased appetite",
+        "insomnia", "sleep disturbance"
+      ]
+    },
+    {
+      "id": "cough_respiratory",
+      "name": "Cough / Upper Respiratory",
+      "snomed_ct": "49727002",
+      "risk_weight": 2,
+      "synonyms": [
+        "cough", "productive cough", "dry cough",
+        "hemoptysis", "wheezing", "stridor",
+        "sore throat", "hoarseness",
+        "nasal congestion", "rhinorrhea",
+        "sputum production",
+        "crackles", "rales", "rhonchi",
+        "hypoxia", "desaturation"
+      ]
+    },
+    {
+      "id": "skin_symptoms",
+      "name": "Skin Symptoms",
+      "snomed_ct": "95320005",
+      "risk_weight": 1,
+      "synonyms": [
+        "rash", "skin rash", "pruritus", "itching",
+        "jaundice", "pallor", "cyanosis",
+        "erythema", "urticaria", "hives",
+        "wound", "skin lesion", "skin breakdown"
+      ]
+    },
+    {
+      "id": "psychiatric_symptoms",
+      "name": "Psychiatric Symptoms",
+      "snomed_ct": "74732009",
+      "risk_weight": 3,
+      "synonyms": [
+        "anxiety", "agitation", "restlessness",
+        "hallucinations", "delusions", "paranoia",
+        "suicidal ideation", "self harm",
+        "depression", "depressed mood",
+        "mania", "hypomania",
+        "insomnia", "psychomotor agitation",
+        "psychomotor retardation"
+      ]
+    },
+    {
+      "id": "dizziness",
+      "name": "Dizziness / Presyncope",
+      "snomed_ct": "404640003",
+      "risk_weight": 2,
+      "synonyms": [
+        "dizziness", "lightheadedness",
+        "presyncope", "near syncope",
+        "unsteadiness", "disequilibrium",
+        "postural hypotension"
+      ]
+    },
+    {
+      "id": "urinary_symptoms",
+      "name": "Urinary Symptoms",
+      "snomed_ct": "249274008",
+      "risk_weight": 1,
+      "synonyms": [
+        "dysuria", "frequency", "urgency",
+        "urinary retention", "incontinence",
+        "hematuria", "oliguria", "anuria",
+        "polyuria", "nocturia"
+      ]
+    }
+  ]
+}

Analysis_Readmission/readmission_risk_engine.py ADDED Viewed

	@@ -0,0 +1,1209 @@

+#!/usr/bin/env python3
+"""Rule-based 30-day readmission risk classification engine.
+Reference implementation of the algorithm described in ALGORITHM_DESIGN.md.
+Input: TOON lines (CLUSTER|Keyword|Value|Timestamp)
+Output: Risk classification + days-to-readmission prediction
+Usage:
+    # From TOON string
+    engine = ReadmissionRiskEngine()
+    result = engine.score_from_toon(toon_text)
+    print(result)
+    # From TOON file
+    result = engine.score_from_file("path/to/extraction.txt")
+    # From JSONL training data
+    results = engine.score_from_jsonl("dspy_fine_tuning/data/trainset_full.jsonl")
+"""
+from __future__ import annotations
+import json
+import math
+import re
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple, Union
+# ---------------------------------------------------------------------------
+# Data classes
+# ---------------------------------------------------------------------------
+@dataclass
+class ParsedFact:
+    cluster: str
+    keyword: str
+    value: Union[float, str]
+    timestamp: str
+    is_numeric: bool
+    plausibility_ok: bool = True
+@dataclass
+class ClusterScore:
+    cluster: str
+    score: int
+    max_score: int
+    contributing_factors: List[str] = field(default_factory=list)
+@dataclass
+class InteractionResult:
+    pattern_id: str
+    pattern_name: str
+    bonus: int
+    description: str
+@dataclass
+class SurvivalCurve:
+    """P(readmit by day t) for several horizons."""
+    horizons: Dict[int, float]  # {7: 0.05, 14: 0.12, 21: 0.18, 30: 0.23}
+@dataclass
+class RiskResult:
+    # Scores
+    composite_score: int
+    cluster_scores: Dict[str, ClusterScore]
+    interaction_bonus: int
+    interactions_triggered: List[InteractionResult]
+    # Risk classification
+    probability: float
+    risk_category: str  # Low / Medium / High / Critical
+    risk_color: str
+    # Days prediction
+    estimated_days: float
+    days_bucket: str  # "0-7 days" / "8-14 days" / "15-30 days"
+    survival_curve: SurvivalCurve
+    # Explainability
+    risk_factors: List[str]
+    protective_factors: List[str]
+    missing_clusters: List[str]
+    data_completeness: float
+    confidence: str  # high / medium / low
+    # Raw data
+    n_facts_parsed: int
+    n_facts_dropped: int
+# ---------------------------------------------------------------------------
+# Constants
+# ---------------------------------------------------------------------------
+VALID_CLUSTERS = {
+    "DEMOGRAPHICS", "VITALS", "LABS", "PROBLEMS", "SYMPTOMS",
+    "MEDICATIONS", "PROCEDURES", "UTILIZATION", "DISPOSITION",
+}
+NUMERIC_CLUSTERS = {"VITALS", "LABS", "UTILIZATION"}
+OBJECTIVE_CLUSTERS = {"DEMOGRAPHICS", "VITALS", "LABS", "UTILIZATION", "DISPOSITION"}
+# ---------------------------------------------------------------------------
+# Engine
+# ---------------------------------------------------------------------------
+class ReadmissionRiskEngine:
+    """Main entry point for readmission risk scoring."""
+    def __init__(self, config_dir: Optional[Path] = None):
+        if config_dir is None:
+            config_dir = Path(__file__).parent / "config"
+        self._config_dir = config_dir
+        self._scoring_rules = self._load_json("scoring_rules.json")
+        self._problem_groups = self._load_json("snomed_problem_groups.json")["groups"]
+        self._symptom_groups = self._load_json("symptom_urgency_groups.json")["groups"]
+        # Build lookup indexes
+        self._problem_synonym_index = self._build_synonym_index(self._problem_groups)
+        self._symptom_synonym_index = self._build_synonym_index(self._symptom_groups)
+        # Calibration parameters
+        cal = self._scoring_rules["_meta"]["calibration"]
+        self._alpha = cal["alpha"]
+        self._beta = cal["beta"]
+        # Days prediction parameters
+        days_cfg = self._scoring_rules["DAYS_PREDICTION"]["models"]
+        reg = days_cfg["regression"]["parameters"]
+        self._d_max = reg["D_max"]
+        self._gamma = reg["gamma"]
+        surv = days_cfg["survival"]["parameters"]
+        self._k_base = surv["k_base"]
+    # -- Loading helpers ----------------------------------------------------
+    def _load_json(self, filename: str) -> Dict[str, Any]:
+        p = self._config_dir / filename
+        return json.loads(p.read_text(encoding="utf-8"))
+    @staticmethod
+    def _build_synonym_index(groups: List[Dict]) -> Dict[str, str]:
+        """Map lowercase synonym → group id."""
+        idx: Dict[str, str] = {}
+        for g in groups:
+            gid = g["id"]
+            for syn in g.get("synonyms", []):
+                key = syn.strip().lower()
+                if key not in idx:
+                    idx[key] = gid
+        return idx
+    def _match_to_group(
+        self,
+        keyword: str,
+        synonym_index: Dict[str, str],
+        groups: List[Dict],
+    ) -> Optional[Dict]:
+        """Smart matching: exact > word-boundary substring > raw substring.
+        Avoids false matches like 'tia' in 'essential' by preferring
+        word-boundary matches and longer synonyms.
+        """
+        kw_lower = keyword.strip().lower()
+        # 1) Exact match (full keyword == synonym)
+        gid = synonym_index.get(kw_lower)
+        if gid:
+            return self._group_by_id(groups, gid)
+        # Tokenize keyword into words for word-boundary matching
+        kw_words = set(re.split(r"[\s,;/\-()]+", kw_lower))
+        # 2) Word-boundary match: synonym is a whole word within the keyword
+        #    OR keyword starts/ends with the synonym as a distinct token
+        best_wb_match: Optional[str] = None
+        best_wb_len = 0
+        # 3) Raw substring match (fallback, requires min 4 chars to avoid noise)
+        best_sub_match: Optional[str] = None
+        best_sub_len = 0
+        for syn, gid in synonym_index.items():
+            if syn not in kw_lower:
+                continue
+            # Check if it's a word-boundary match
+            is_word_match = (
+                syn in kw_words  # exact word token
+                or kw_lower.startswith(syn + " ")
+                or kw_lower.endswith(" " + syn)
+                or (" " + syn + " ") in kw_lower
+            )
+            if is_word_match and len(syn) > best_wb_len:
+                best_wb_match = gid
+                best_wb_len = len(syn)
+            elif not is_word_match and len(syn) >= 4 and len(syn) > best_sub_len:
+                # Only use raw substring for synonyms >= 4 chars
+                best_sub_match = gid
+                best_sub_len = len(syn)
+        # Prefer word-boundary matches over raw substring
+        chosen = best_wb_match or best_sub_match
+        if chosen:
+            return self._group_by_id(groups, chosen)
+        return None
+    # -- Layer 1: Parser & Normalizer ----------------------------------------
+    @staticmethod
+    def _try_parse_float(value: str) -> Optional[float]:
+        """Best-effort numeric parse.
+        Stage2 should emit numeric-only values for numeric fields, but in practice
+        we sometimes see light decoration like '3 days'. For scoring purposes we
+        accept the first numeric token, but we avoid parsing ratios like '120/80'.
+        """
+        s = (value or "").strip()
+        if not s:
+            return None
+        # Avoid BP-style ratios and similar formats.
+        if "/" in s:
+            return None
+        # Fast path: pure float
+        try:
+            return float(s)
+        except Exception:
+            pass
+        # Fallback: extract first numeric token
+        m = re.search(r"[-+]?\d+(?:\.\d+)?", s)
+        if not m:
+            return None
+        try:
+            return float(m.group(0))
+        except Exception:
+            return None
+    @staticmethod
+    def _split_semantic_items(value: str, *, limit: int = 20) -> List[str]:
+        """Split a semicolon/comma/newline separated list into normalized items."""
+        raw = (value or "").strip()
+        if not raw:
+            return []
+        parts: List[str] = []
+        for seg in re.split(r"[;\n]+", raw):
+            seg = seg.strip()
+            if not seg:
+                continue
+            for item in seg.split(","):
+                it = " ".join(item.strip().split())
+                if not it:
+                    continue
+                parts.append(it.strip(" -"))
+                if len(parts) >= limit:
+                    break
+            if len(parts) >= limit:
+                break
+        # Dedup while preserving order.
+        out: List[str] = []
+        seen: set[str] = set()
+        for it in parts:
+            k = it.casefold()
+            if k in seen:
+                continue
+            seen.add(k)
+            out.append(it)
+        return out
+    @staticmethod
+    def _strip_prefix(keyword: str, prefixes: List[str]) -> str:
+        k = (keyword or "").strip()
+        k_cf = k.casefold()
+        for p in prefixes:
+            p_cf = p.casefold()
+            if k_cf.startswith(p_cf):
+                k = k[len(p) :].strip()
+                k_cf = k.casefold()
+        return k
+    @staticmethod
+    def _normalize_discharge_disposition(value: str) -> str:
+        """Normalize common discharge disposition variants to the scoring allowlist."""
+        v = (value or "").strip()
+        v_cf = v.casefold()
+        if not v:
+            return v
+        # Canonical allowlist (scoring_rules.json): Home, Home with Services, Rehab, SNF, LTAC, Hospice, AMA
+        if v_cf in {"home with service", "home w service", "home with svc", "home w/ service"}:
+            return "Home with Services"
+        if v_cf in {"home with services", "home w services", "home w/ services", "home health", "home health care"}:
+            return "Home with Services"
+        if v_cf in {"hospice residence", "hospice care"}:
+            return "Hospice"
+        return v
+    @staticmethod
+    def _normalize_mental_status(value: str) -> str:
+        v = (value or "").strip()
+        v_cf = v.casefold()
+        if not v:
+            return v
+        if "alert" in v_cf and "orient" in v_cf:
+            return "alert"
+        if v_cf in {"a&o", "ao", "a/ox3", "a/ox4"}:
+            return "alert"
+        return v
+    def parse_toon(self, toon_text: str) -> Tuple[Dict[str, List[ParsedFact]], int, int]:
+        """Parse TOON text into structured facts.
+        Returns (facts_by_cluster, n_parsed, n_dropped).
+        """
+        facts: Dict[str, List[ParsedFact]] = {}
+        n_parsed = 0
+        n_dropped = 0
+        seen_objective: set = set()
+        for raw_line in toon_text.strip().splitlines():
+            line = raw_line.strip()
+            if not line or line.startswith("#"):
+                continue
+            parts = line.split("|")
+            if len(parts) != 4:
+                n_dropped += 1
+                continue
+            cluster, keyword, value, timestamp = (p.strip() for p in parts)
+            if cluster not in VALID_CLUSTERS:
+                n_dropped += 1
+                continue
+            # Strip common semantic prefixes embedded in the keyword.
+            if cluster == "PROBLEMS":
+                keyword = self._strip_prefix(keyword, ["PMH:", "PMH/Comorbidities:", "Discharge Dx:", "Working Dx:", "Complication:", "Complications:"])
+            elif cluster == "SYMPTOMS":
+                keyword = self._strip_prefix(keyword, ["ADM:", "DC:"])
+            # Expand common Stage2 aggregate semantic lines into per-item facts.
+            # This makes the scorer robust to model drift like:
+            #   PROBLEMS|Discharge Dx|CHF; COPD|Discharge
+            # instead of emitting one line per diagnosis.
+            if cluster == "PROBLEMS":
+                kw_cf = keyword.strip().casefold()
+                acute_keys = {"discharge dx", "working dx", "complication", "complications"}
+                chronic_keys = {"pmh/comorbidities", "pmh", "comorbidities", "past medical history"}
+                items = self._split_semantic_items(value)
+                if kw_cf in acute_keys and items:
+                    for it in items:
+                        fact = ParsedFact(
+                            cluster="PROBLEMS",
+                            keyword=it,
+                            value="acute",
+                            timestamp="Discharge",
+                            is_numeric=False,
+                            plausibility_ok=True,
+                        )
+                        facts.setdefault("PROBLEMS", []).append(fact)
+                        n_parsed += 1
+                    continue
+                if kw_cf in chronic_keys and items:
+                    for it in items:
+                        fact = ParsedFact(
+                            cluster="PROBLEMS",
+                            keyword=it,
+                            value="chronic",
+                            timestamp="Past",
+                            is_numeric=False,
+                            plausibility_ok=True,
+                        )
+                        facts.setdefault("PROBLEMS", []).append(fact)
+                        n_parsed += 1
+                    continue
+            # Numeric parsing:
+            # - Strictly numeric clusters MUST parse (else drop).
+            # - Non-numeric clusters may still have numeric keywords (e.g. MEDICATIONS Medication Count,
+            #   PROCEDURES Mechanical Ventilation days). Those should parse so scoring rules apply.
+            is_numeric = False
+            parsed_value: Union[float, str] = value
+            kw_rules = self._scoring_rules.get(cluster, {}).get("keywords", {}).get(keyword, {})
+            kw_type = kw_rules.get("type") if isinstance(kw_rules, dict) else None
+            if cluster in NUMERIC_CLUSTERS:
+                v = self._try_parse_float(value)
+                if v is None:
+                    n_dropped += 1
+                    continue
+                parsed_value = v
+                is_numeric = True
+            elif kw_type == "range":
+                v = self._try_parse_float(value)
+                if v is None:
+                    n_dropped += 1
+                    continue
+                parsed_value = v
+                is_numeric = True
+            elif kw_type == "mixed":
+                # Mixed: numeric is optional; keep as string if parsing fails.
+                v = self._try_parse_float(value)
+                if v is not None:
+                    parsed_value = v
+                    is_numeric = True
+            # Plausibility check
+            plausibility_ok = True
+            if is_numeric:
+                plausibility_ok = self._check_plausibility(cluster, keyword, parsed_value)
+            # Dedup for objective clusters
+            if cluster in OBJECTIVE_CLUSTERS:
+                key = (cluster, keyword)
+                if key in seen_objective:
+                    # Keep the one with better timestamp
+                    n_dropped += 1
+                    continue
+                seen_objective.add(key)
+            fact = ParsedFact(
+                cluster=cluster,
+                keyword=keyword,
+                value=parsed_value,
+                timestamp=timestamp,
+                is_numeric=is_numeric,
+                plausibility_ok=plausibility_ok,
+            )
+            facts.setdefault(cluster, []).append(fact)
+            n_parsed += 1
+        return facts, n_parsed, n_dropped
+    def _check_plausibility(self, cluster: str, keyword: str, value: float) -> bool:
+        cluster_rules = self._scoring_rules.get(cluster, {}).get("keywords", {})
+        kw_rules = cluster_rules.get(keyword, {})
+        plaus = kw_rules.get("plausibility")
+        if plaus:
+            return plaus["min"] <= value <= plaus["max"]
+        return True
+    # -- Layer 2: Concept Mapper --------------------------------------------
+    def map_problem_to_group(self, keyword: str) -> Optional[Dict]:
+        """Map a PROBLEMS keyword to a SNOMED concept group."""
+        return self._match_to_group(keyword, self._problem_synonym_index, self._problem_groups)
+    def map_symptom_to_group(self, keyword: str) -> Optional[Dict]:
+        """Map a SYMPTOMS keyword to an urgency group."""
+        return self._match_to_group(keyword, self._symptom_synonym_index, self._symptom_groups)
+    @staticmethod
+    def _group_by_id(groups: List[Dict], gid: str) -> Optional[Dict]:
+        for g in groups:
+            if g["id"] == gid:
+                return g
+        return None
+    # -- Layer 3: Cluster Scorers -------------------------------------------
+    def _score_range_keyword(self, rules: Dict, value: float) -> Tuple[int, str]:
+        """Score a numeric value using range rules. Returns (score, label)."""
+        for r in rules.get("ranges", []):
+            if r["min"] <= value <= r["max"]:
+                return r["score"], r.get("label", "")
+        return 0, ""
+    def score_demographics(self, facts: List[ParsedFact]) -> ClusterScore:
+        rules = self._scoring_rules["DEMOGRAPHICS"]["keywords"]
+        score = 0
+        factors: List[str] = []
+        age_found = False
+        for f in facts:
+            if f.keyword == "Age" and f.is_numeric:
+                age_found = True
+                pts, label = self._score_range_keyword(rules["Age"], f.value)
+                score += pts
+                if pts > 0:
+                    factors.append(f"Age {int(f.value)} ({label}, +{pts})")
+            elif f.keyword == "Sex":
+                val = str(f.value).lower()
+                pts = rules["Sex"]["values"].get(val, 0)
+                score += pts
+                if pts > 0:
+                    factors.append(f"Sex={val} (+{pts})")
+        if not age_found:
+            default = rules["Age"].get("missing_score", 2)
+            score += default
+            factors.append(f"Age missing (default +{default})")
+        return ClusterScore("DEMOGRAPHICS", score, 10, factors)
+    def score_vitals(self, facts: List[ParsedFact]) -> ClusterScore:
+        rules = self._scoring_rules["VITALS"]["keywords"]
+        score = 0
+        factors: List[str] = []
+        for f in facts:
+            if not f.is_numeric or not f.plausibility_ok:
+                continue
+            kw_rules = rules.get(f.keyword)
+            if not kw_rules or kw_rules.get("type") == "no_direct_score":
+                continue
+            pts, label = self._score_range_keyword(kw_rules, f.value)
+            score += pts
+            if pts > 0:
+                factors.append(f"{f.keyword}={f.value} ({label}, +{pts})")
+        return ClusterScore("VITALS", score, 25, factors)
+    def score_labs(self, facts: List[ParsedFact]) -> ClusterScore:
+        rules = self._scoring_rules["LABS"]["keywords"]
+        score = 0
+        factors: List[str] = []
+        for f in facts:
+            if not f.is_numeric or not f.plausibility_ok:
+                continue
+            kw_rules = rules.get(f.keyword)
+            if not kw_rules:
+                continue
+            pts, label = self._score_range_keyword(kw_rules, f.value)
+            score += pts
+            if pts > 0:
+                factors.append(f"{f.keyword}={f.value} ({label}, +{pts})")
+        return ClusterScore("LABS", score, 30, factors)
+    def score_problems(self, facts: List[ParsedFact]) -> ClusterScore:
+        score = 0
+        factors: List[str] = []
+        active_groups: Dict[str, int] = {}  # group_id -> max weight
+        include_values = {"chronic", "acute", "exist"}
+        for f in facts:
+            val = str(f.value).lower().strip()
+            if val not in include_values:
+                continue
+            group = self.map_problem_to_group(f.keyword)
+            if group:
+                gid = group["id"]
+                w = group["risk_weight"]
+                if gid not in active_groups or w > active_groups[gid]:
+                    active_groups[gid] = w
+                    factors.append(f"{f.keyword} → {group['name']} (weight {w})")
+        base_score = sum(active_groups.values())
+        # Multimorbidity bonus
+        n_groups = len(active_groups)
+        mm_bonus = 0
+        if n_groups > 3:
+            mm_bonus = min(n_groups - 3, 5)
+            factors.append(f"Multimorbidity: {n_groups} groups (+{mm_bonus})")
+        score = min(base_score + mm_bonus, 40)
+        return ClusterScore("PROBLEMS", score, 40, factors)
+    def score_symptoms(self, facts: List[ParsedFact]) -> ClusterScore:
+        sev_mult = {"severe": 1.5, "yes": 1.0, "no": 0.0}
+        score = 0.0
+        factors: List[str] = []
+        active_groups: Dict[str, float] = {}
+        active_count = 0
+        for f in facts:
+            val = str(f.value).lower().strip()
+            mult = sev_mult.get(val, 0.0)
+            if mult == 0.0:
+                continue
+            active_count += 1
+            group = self.map_symptom_to_group(f.keyword)
+            if group:
+                gid = group["id"]
+                w = group["risk_weight"] * mult
+                if gid not in active_groups or w > active_groups[gid]:
+                    active_groups[gid] = w
+                    factors.append(f"{f.keyword}={val} → {group['name']} (+{w:.1f})")
+        base_score = sum(active_groups.values())
+        # Active symptom count bonus
+        bonus = 0
+        if active_count > 3:
+            bonus = 2
+            factors.append(f"Active symptoms: {active_count} (>3, +2)")
+        score = min(int(round(base_score + bonus)), 15)
+        return ClusterScore("SYMPTOMS", score, 15, factors)
+    def score_medications(self, facts: List[ParsedFact]) -> ClusterScore:
+        rules = self._scoring_rules["MEDICATIONS"]["keywords"]
+        score = 0
+        factors: List[str] = []
+        med_count_val: Optional[float] = None
+        for f in facts:
+            kw_rules = rules.get(f.keyword)
+            if not kw_rules:
+                continue
+            if kw_rules["type"] == "range" and f.is_numeric:
+                pts, label = self._score_range_keyword(kw_rules, f.value)
+                score += pts
+                if f.keyword == "Medication Count":
+                    med_count_val = f.value
+                if pts > 0:
+                    factors.append(f"{f.keyword}={f.value} ({label}, +{pts})")
+            elif kw_rules["type"] == "categorical":
+                val = str(f.value).lower().strip()
+                pts = kw_rules["values"].get(val, 0)
+                score += pts
+                if pts > 0:
+                    factors.append(f"{f.keyword}={val} (+{pts})")
+        # Derived polypharmacy: if med_count >= 5 and Polypharmacy not already scored
+        polypharmacy_scored = any("Polypharmacy" in f for f in factors)
+        if med_count_val is not None and med_count_val >= 5 and not polypharmacy_scored:
+            score += 3
+            factors.append(f"Derived Polypharmacy (Med Count={int(med_count_val)} >=5, +3)")
+        return ClusterScore("MEDICATIONS", min(score, 15), 15, factors)
+    def score_procedures(self, facts: List[ParsedFact]) -> ClusterScore:
+        rules = self._scoring_rules["PROCEDURES"]["keywords"]
+        score = 0
+        factors: List[str] = []
+        specific_scored = False
+        for f in facts:
+            kw_rules = rules.get(f.keyword)
+            if not kw_rules:
+                continue
+            if f.keyword == "Mechanical Ventilation":
+                # Mixed type: numeric > 0 or categorical
+                if f.is_numeric and f.value > 0:
+                    score += kw_rules["score_if_any_positive"]
+                    factors.append(f"Mechanical Ventilation={f.value} days (+{kw_rules['score_if_any_positive']})")
+                    specific_scored = True
+                elif str(f.value).lower().strip() != "no":
+                    score += kw_rules["score_if_any_positive"]
+                    factors.append(f"Mechanical Ventilation={f.value} (+{kw_rules['score_if_any_positive']})")
+                    specific_scored = True
+            elif f.keyword == "Dialysis":
+                val = str(f.value).lower().strip()
+                pts = kw_rules["values"].get(val, 0)
+                score += pts
+                if pts > 0:
+                    factors.append(f"Dialysis={val} (+{pts})")
+                    specific_scored = True
+            elif f.keyword == "Surgery":
+                val = str(f.value).lower().strip()
+                pts = kw_rules["values"].get(val, 0)
+                score += pts
+                if pts > 0:
+                    factors.append(f"Surgery={val} (+{pts})")
+                    specific_scored = True
+            elif f.keyword == "Any Procedure":
+                # Only score if no specific procedure was scored
+                pass  # handled below
+        # Fallback: Any Procedure
+        if not specific_scored:
+            for f in facts:
+                if f.keyword == "Any Procedure":
+                    val = str(f.value).lower().strip()
+                    pts = rules["Any Procedure"]["values"].get(val, 0)
+                    score += pts
+                    if pts > 0:
+                        factors.append(f"Any Procedure={val} (generic fallback, +{pts})")
+                    break
+        return ClusterScore("PROCEDURES", min(score, 15), 15, factors)
+    def score_utilization(self, facts: List[ParsedFact]) -> ClusterScore:
+        rules = self._scoring_rules["UTILIZATION"]["keywords"]
+        score = 0
+        factors: List[str] = []
+        for f in facts:
+            if not f.is_numeric:
+                continue
+            kw_rules = rules.get(f.keyword)
+            if not kw_rules:
+                continue
+            pts, label = self._score_range_keyword(kw_rules, f.value)
+            score += pts
+            if pts > 0:
+                factors.append(f"{f.keyword}={f.value} ({label}, +{pts})")
+        return ClusterScore("UTILIZATION", min(score, 20), 20, factors)
+    def score_disposition(self, facts: List[ParsedFact]) -> ClusterScore:
+        rules = self._scoring_rules["DISPOSITION"]["keywords"]
+        score = 0
+        factors: List[str] = []
+        for f in facts:
+            kw_rules = rules.get(f.keyword)
+            if not kw_rules:
+                continue
+            val = str(f.value).strip()
+            if f.keyword == "Discharge Disposition":
+                val = self._normalize_discharge_disposition(val)
+            elif f.keyword == "Mental Status":
+                val = self._normalize_mental_status(val)
+            # Try exact match first, then case-insensitive
+            pts = kw_rules["values"].get(val, kw_rules["values"].get(val.lower(), 0))
+            score += pts
+            if pts > 0:
+                factors.append(f"{f.keyword}={val} (+{pts})")
+        return ClusterScore("DISPOSITION", min(score, 15), 15, factors)
+    # -- Layer 4: Pattern Detector ------------------------------------------
+    def detect_interactions(
+        self,
+        facts: Dict[str, List[ParsedFact]],
+        cluster_scores: Dict[str, ClusterScore],
+    ) -> List[InteractionResult]:
+        """Detect cross-cluster clinical patterns."""
+        results: List[InteractionResult] = []
+        # Helper: get numeric value for a cluster/keyword
+        def get_val(cluster: str, keyword: str) -> Optional[float]:
+            for f in facts.get(cluster, []):
+                if f.keyword == keyword and f.is_numeric:
+                    return f.value
+            return None
+        def get_str(cluster: str, keyword: str) -> Optional[str]:
+            for f in facts.get(cluster, []):
+                if f.keyword == keyword:
+                    return str(f.value).lower().strip()
+            return None
+        def has_symptom_group(group_id: str) -> bool:
+            for f in facts.get("SYMPTOMS", []):
+                val = str(f.value).lower().strip()
+                if val in ("yes", "severe"):
+                    g = self.map_symptom_to_group(f.keyword)
+                    if g and g["id"] == group_id:
+                        return True
+            return False
+        def has_problem_group(group_id: str) -> bool:
+            for f in facts.get("PROBLEMS", []):
+                val = str(f.value).lower().strip()
+                if val in ("chronic", "acute", "exist"):
+                    g = self.map_problem_to_group(f.keyword)
+                    if g and g["id"] == group_id:
+                        return True
+            return False
+        # --- Sepsis Pattern ---
+        hr = get_val("VITALS", "Heart Rate")
+        sbp = get_val("VITALS", "Systolic BP")
+        rr = get_val("VITALS", "Respiratory Rate")
+        wbc = get_val("LABS", "WBC")
+        temp = get_val("VITALS", "Temperature")
+        if hr is not None and hr > 100:
+            has_hemodynamic = (sbp is not None and sbp < 100) or (rr is not None and rr > 22)
+            has_infection = (
+                (wbc is not None and (wbc > 12 or wbc < 4))
+                or (temp is not None and temp > 100.4)
+            )
+            if has_hemodynamic and has_infection:
+                results.append(InteractionResult(
+                    "sepsis_pattern", "Sepsis / SIRS Pattern", 10,
+                    f"HR={hr}, SBP={sbp}, RR={rr}, WBC={wbc}, Temp={temp}",
+                ))
+        # --- AKI Pattern ---
+        cr = get_val("LABS", "Creatinine")
+        bun = get_val("LABS", "BUN")
+        k = get_val("LABS", "Potassium")
+        na = get_val("LABS", "Sodium")
+        bicarb = get_val("LABS", "Bicarbonate")
+        if cr is not None and cr > 1.5 and bun is not None and bun > 30:
+            has_electrolyte = (
+                (k is not None and k > 5.0)
+                or (na is not None and na < 135)
+                or (bicarb is not None and bicarb < 22)
+            )
+            if has_electrolyte:
+                results.append(InteractionResult(
+                    "aki_pattern", "Acute Kidney Injury Pattern", 8,
+                    f"Cr={cr}, BUN={bun}, K={k}, Na={na}, Bicarb={bicarb}",
+                ))
+        # --- Decompensated HF ---
+        if has_problem_group("heart_failure"):
+            has_decomp_sign = (
+                has_symptom_group("edema_fluid")
+                or has_symptom_group("respiratory_distress")
+                or (bun is not None and bun > 40)
+            )
+            if has_decomp_sign:
+                results.append(InteractionResult(
+                    "decompensated_hf", "Decompensated Heart Failure", 8,
+                    "Heart failure + fluid overload/dyspnea/elevated BUN",
+                ))
+        # --- Frailty Syndrome ---
+        age = get_val("DEMOGRAPHICS", "Age")
+        hgb = get_val("LABS", "Hemoglobin")
+        mental = get_str("DISPOSITION", "Mental Status")
+        disp = get_str("DISPOSITION", "Discharge Disposition")
+        n_problem_groups = len(set(
+            self.map_problem_to_group(f.keyword)["id"]
+            for f in facts.get("PROBLEMS", [])
+            if str(f.value).lower().strip() in ("chronic", "acute", "exist")
+            and self.map_problem_to_group(f.keyword) is not None
+        ))
+        if age is not None and age > 75:
+            frailty_count = 0
+            if n_problem_groups >= 3:
+                frailty_count += 1
+            if hgb is not None and hgb < 10:
+                frailty_count += 1
+            if mental in ("confused", "lethargic"):
+                frailty_count += 1
+            if disp in ("snf", "ltac", "rehab"):
+                frailty_count += 1
+            if frailty_count >= 2:
+                results.append(InteractionResult(
+                    "frailty_syndrome", "Frailty Syndrome", 6,
+                    f"Age={age}, problems={n_problem_groups}, Hgb={hgb}, mental={mental}, disp={disp}",
+                ))
+        # --- Unstable Discharge ---
+        if disp == "ama":
+            results.append(InteractionResult(
+                "unstable_discharge", "Unstable Discharge (AMA)", 5,
+                "Discharge Against Medical Advice",
+            ))
+        elif mental in ("confused", "lethargic") and disp in ("home", None):
+            results.append(InteractionResult(
+                "unstable_discharge", "Unstable Discharge (altered + Home)", 5,
+                f"Mental={mental}, Disposition={disp}",
+            ))
+        # --- Respiratory Failure ---
+        spo2 = get_val("VITALS", "SpO2")
+        if spo2 is not None and spo2 < 92:
+            has_resp = (rr is not None and rr > 24) or has_symptom_group("respiratory_distress")
+            if has_resp:
+                results.append(InteractionResult(
+                    "respiratory_failure", "Respiratory Failure Pattern", 6,
+                    f"SpO2={spo2}, RR={rr}",
+                ))
+        # --- Metabolic Crisis ---
+        glucose = get_val("LABS", "Glucose")
+        if glucose is not None and glucose > 300:
+            has_metabolic = (
+                (bicarb is not None and bicarb < 18)
+                or (k is not None and k > 5.5)
+            )
+            if has_metabolic:
+                results.append(InteractionResult(
+                    "metabolic_crisis", "Metabolic Crisis (DKA/HHS)", 6,
+                    f"Glucose={glucose}, Bicarb={bicarb}, K={k}",
+                ))
+        # --- Bleeding Risk ---
+        plt = get_val("LABS", "Platelet")
+        anticoag = get_str("MEDICATIONS", "Anticoagulation")
+        if hgb is not None and hgb < 8:
+            has_bleed_risk = (
+                (plt is not None and plt < 100)
+                or anticoag == "yes"
+            )
+            if has_bleed_risk:
+                results.append(InteractionResult(
+                    "bleeding_risk", "Active Bleeding Risk", 6,
+                    f"Hgb={hgb}, Plt={plt}, Anticoag={anticoag}",
+                ))
+        return results
+    # -- Layer 5: Risk Aggregator -------------------------------------------
+    def _logistic(self, score: int) -> float:
+        """Convert composite score to probability via logistic function."""
+        z = self._alpha + self._beta * score
+        return 1.0 / (1.0 + math.exp(-z))
+    def _classify_risk(self, score: int) -> Tuple[str, str]:
+        """Return (category, color) for a given composite score."""
+        for cat in self._scoring_rules["_meta"]["risk_categories"]:
+            if cat["score_min"] <= score <= cat["score_max"]:
+                return cat["name"], cat["color"]
+        return "Critical", "red"
+    # -- Layer 6: Days Predictor --------------------------------------------
+    def _predict_days(self, score: int) -> float:
+        """Estimate days to readmission (point estimate)."""
+        return max(1.0, self._d_max * math.exp(-self._gamma * score))
+    def _predict_bucket(self, estimated_days: float) -> str:
+        if estimated_days <= 7:
+            return "0-7 days"
+        elif estimated_days <= 14:
+            return "8-14 days"
+        else:
+            return "15-30 days"
+    def _predict_survival(self, score: int, p_30d: float) -> SurvivalCurve:
+        """Compute P(readmit by day t) for several horizons."""
+        k = self._k_base + 0.02 * (score - 30)
+        k = max(0.5, k)  # floor to avoid degenerate cases
+        horizons: Dict[int, float] = {}
+        denom = 1.0 - math.exp(-k)
+        if abs(denom) < 1e-9:
+            denom = 1e-9
+        for t in [7, 14, 21, 30]:
+            f_t = (1.0 - math.exp(-(t / 30.0) * k)) / denom
+            p_t = p_30d * f_t
+            horizons[t] = round(min(max(p_t, 0.0), 1.0), 4)
+        return SurvivalCurve(horizons=horizons)
+    # -- Main Scoring Pipeline -----------------------------------------------
+    def score(self, facts: Dict[str, List[ParsedFact]], n_parsed: int = 0, n_dropped: int = 0) -> RiskResult:
+        """Run full scoring pipeline on parsed facts."""
+        # Layer 3: Cluster scores
+        cluster_scores: Dict[str, ClusterScore] = {}
+        cluster_scores["DEMOGRAPHICS"] = self.score_demographics(facts.get("DEMOGRAPHICS", []))
+        cluster_scores["VITALS"] = self.score_vitals(facts.get("VITALS", []))
+        cluster_scores["LABS"] = self.score_labs(facts.get("LABS", []))
+        cluster_scores["PROBLEMS"] = self.score_problems(facts.get("PROBLEMS", []))
+        cluster_scores["SYMPTOMS"] = self.score_symptoms(facts.get("SYMPTOMS", []))
+        cluster_scores["MEDICATIONS"] = self.score_medications(facts.get("MEDICATIONS", []))
+        cluster_scores["PROCEDURES"] = self.score_procedures(facts.get("PROCEDURES", []))
+        cluster_scores["UTILIZATION"] = self.score_utilization(facts.get("UTILIZATION", []))
+        cluster_scores["DISPOSITION"] = self.score_disposition(facts.get("DISPOSITION", []))
+        # Layer 4: Interaction detection
+        interactions = self.detect_interactions(facts, cluster_scores)
+        interaction_bonus = sum(i.bonus for i in interactions)
+        # Layer 5: Aggregate
+        composite = sum(cs.score for cs in cluster_scores.values()) + interaction_bonus
+        probability = self._logistic(composite)
+        category, color = self._classify_risk(composite)
+        # Layer 6: Days prediction
+        est_days = self._predict_days(composite)
+        bucket = self._predict_bucket(est_days)
+        survival = self._predict_survival(composite, probability)
+        # Explainability
+        risk_factors: List[str] = []
+        protective_factors: List[str] = []
+        for cs in cluster_scores.values():
+            risk_factors.extend(cs.contributing_factors)
+        # Identify protective factors (normal values in important clusters)
+        for cluster in ["VITALS", "LABS"]:
+            cs = cluster_scores[cluster]
+            if cs.score == 0 and facts.get(cluster):
+                protective_factors.append(f"Normal {cluster.lower()} at discharge")
+        if cluster_scores["DISPOSITION"].score == 0 and facts.get("DISPOSITION"):
+            protective_factors.append("Stable disposition (Home, alert)")
+        for i in interactions:
+            risk_factors.append(f"[PATTERN] {i.pattern_name} (+{i.bonus})")
+        # Missing data
+        missing_clusters = [c for c in VALID_CLUSTERS if c not in facts or not facts[c]]
+        completeness = 1.0 - len(missing_clusters) / len(VALID_CLUSTERS)
+        if completeness >= 0.7:
+            confidence = "high"
+        elif completeness >= 0.5:
+            confidence = "medium"
+        else:
+            confidence = "low"
+        return RiskResult(
+            composite_score=composite,
+            cluster_scores=cluster_scores,
+            interaction_bonus=interaction_bonus,
+            interactions_triggered=interactions,
+            probability=round(probability, 4),
+            risk_category=category,
+            risk_color=color,
+            estimated_days=round(est_days, 1),
+            days_bucket=bucket,
+            survival_curve=survival,
+            risk_factors=risk_factors,
+            protective_factors=protective_factors,
+            missing_clusters=sorted(missing_clusters),
+            data_completeness=round(completeness, 2),
+            confidence=confidence,
+            n_facts_parsed=n_parsed,
+            n_facts_dropped=n_dropped,
+        )
+    # -- Convenience Methods ------------------------------------------------
+    def score_from_toon(self, toon_text: str) -> RiskResult:
+        """Score from raw TOON text."""
+        facts, n_parsed, n_dropped = self.parse_toon(toon_text)
+        return self.score(facts, n_parsed, n_dropped)
+    def score_from_file(self, path: Union[str, Path]) -> RiskResult:
+        """Score from a TOON text file."""
+        text = Path(path).read_text(encoding="utf-8")
+        return self.score_from_toon(text)
+    def score_from_jsonl(self, path: Union[str, Path], limit: int = 0) -> List[Tuple[str, RiskResult]]:
+        """Score all entries in a JSONL file (trainset_full format).
+        Returns list of (hadm_id, RiskResult).
+        """
+        results: List[Tuple[str, RiskResult]] = []
+        p = Path(path)
+        with p.open("r", encoding="utf-8") as f:
+            for i, line in enumerate(f):
+                if limit and i >= limit:
+                    break
+                obj = json.loads(line)
+                hadm_id = str(obj.get("hadm_id", f"row_{i}"))
+                completion = obj.get("completion", "")
+                if completion:
+                    result = self.score_from_toon(completion)
+                    results.append((hadm_id, result))
+        return results
+# ---------------------------------------------------------------------------
+# Pretty-printing
+# ---------------------------------------------------------------------------
+def format_result(result: RiskResult, hadm_id: str = "") -> str:
+    """Format RiskResult as human-readable report."""
+    lines: List[str] = []
+    header = f"=== Readmission Risk Report"
+    if hadm_id:
+        header += f" (hadm_id: {hadm_id})"
+    header += " ==="
+    lines.append(header)
+    lines.append("")
+    # Summary
+    lines.append(f"RISK: {result.risk_category} ({result.risk_color})")
+    lines.append(f"Probability of 30-day readmission: {result.probability:.1%}")
+    lines.append(f"Composite score: {result.composite_score}")
+    lines.append(f"Confidence: {result.confidence} (data completeness: {result.data_completeness:.0%})")
+    lines.append("")
+    # Days prediction
+    lines.append("--- Days-to-Readmission Prediction ---")
+    lines.append(f"Point estimate: ~{result.estimated_days:.0f} days")
+    lines.append(f"Bucket: {result.days_bucket}")
+    lines.append("Survival curve:")
+    for t, p in sorted(result.survival_curve.horizons.items()):
+        lines.append(f"  P(readmit by day {t:2d}): {p:.1%}")
+    lines.append("")
+    # Cluster breakdown
+    lines.append("--- Cluster Scores ---")
+    for cluster in ["DEMOGRAPHICS", "VITALS", "LABS", "PROBLEMS", "SYMPTOMS",
+                     "MEDICATIONS", "PROCEDURES", "UTILIZATION", "DISPOSITION"]:
+        cs = result.cluster_scores.get(cluster)
+        if cs:
+            lines.append(f"  {cluster}: {cs.score}/{cs.max_score}")
+    lines.append(f"  INTERACTIONS: +{result.interaction_bonus}")
+    lines.append(f"  TOTAL: {result.composite_score}")
+    lines.append("")
+    # Risk factors
+    if result.risk_factors:
+        lines.append("--- Risk Factors ---")
+        for rf in result.risk_factors:
+            lines.append(f"  - {rf}")
+        lines.append("")
+    # Protective factors
+    if result.protective_factors:
+        lines.append("--- Protective Factors ---")
+        for pf in result.protective_factors:
+            lines.append(f"  + {pf}")
+        lines.append("")
+    # Triggered patterns
+    if result.interactions_triggered:
+        lines.append("--- Clinical Patterns Detected ---")
+        for ix in result.interactions_triggered:
+            lines.append(f"  [{ix.pattern_id}] {ix.pattern_name}: +{ix.bonus} pts")
+            lines.append(f"    Evidence: {ix.description}")
+        lines.append("")
+    # Missing data
+    if result.missing_clusters:
+        lines.append(f"--- Missing Data ({len(result.missing_clusters)} clusters) ---")
+        for mc in result.missing_clusters:
+            lines.append(f"  ? {mc}")
+        lines.append("")
+    lines.append(f"Facts parsed: {result.n_facts_parsed}, dropped: {result.n_facts_dropped}")
+    return "\n".join(lines)
+# ---------------------------------------------------------------------------
+# CLI
+# ---------------------------------------------------------------------------
+def main():
+    import argparse
+    ap = argparse.ArgumentParser(description="Rule-based 30-day readmission risk engine")
+    sub = ap.add_subparsers(dest="cmd")
+    # Score a single TOON file
+    p_file = sub.add_parser("file", help="Score a single TOON file")
+    p_file.add_argument("path", help="Path to TOON text file")
+    # Score from JSONL
+    p_jsonl = sub.add_parser("jsonl", help="Score all entries in a JSONL file")
+    p_jsonl.add_argument("path", help="Path to JSONL file")
+    p_jsonl.add_argument("--limit", type=int, default=0, help="Limit number of entries")
+    p_jsonl.add_argument("--summary", action="store_true", help="Show summary statistics only")
+    # Score from inline TOON text
+    p_inline = sub.add_parser("inline", help="Score inline TOON text (pipe to stdin)")
+    args = ap.parse_args()
+    engine = ReadmissionRiskEngine()
+    if args.cmd == "file":
+        result = engine.score_from_file(args.path)
+        print(format_result(result))
+    elif args.cmd == "jsonl":
+        results = engine.score_from_jsonl(args.path, limit=args.limit)
+        if args.summary:
+            scores = [r.composite_score for _, r in results]
+            probs = [r.probability for _, r in results]
+            categories = {}
+            for _, r in results:
+                categories[r.risk_category] = categories.get(r.risk_category, 0) + 1
+            print(f"=== Summary ({len(results)} patients) ===")
+            print(f"Score: mean={sum(scores)/len(scores):.1f}, "
+                  f"min={min(scores)}, max={max(scores)}, "
+                  f"median={sorted(scores)[len(scores)//2]}")
+            print(f"P(readmit): mean={sum(probs)/len(probs):.1%}")
+            print("Risk categories:")
+            for cat in ["Low", "Medium", "High", "Critical"]:
+                n = categories.get(cat, 0)
+                pct = n / len(results) * 100 if results else 0
+                print(f"  {cat}: {n} ({pct:.0f}%)")
+            days = [r.estimated_days for _, r in results]
+            print(f"Days estimate: mean={sum(days)/len(days):.1f}, "
+                  f"min={min(days):.1f}, max={max(days):.1f}")
+        else:
+            for hadm_id, result in results:
+                print(format_result(result, hadm_id))
+                print("\n" + "=" * 60 + "\n")
+    elif args.cmd == "inline":
+        import sys
+        toon_text = sys.stdin.read()
+        result = engine.score_from_toon(toon_text)
+        print(format_result(result))
+    else:
+        ap.print_help()
+if __name__ == "__main__":
+    main()

README.md ADDED Viewed

	@@ -0,0 +1,70 @@

+---
+title: MedGemma StructCore Demo
+emoji: 🩺
+colorFrom: blue
+colorTo: teal
+sdk: gradio
+python_version: "3.10"
+app_file: app.py
+pinned: false
+---
+# MedGemma StructCore Demo (HF Spaces Zero)
+This directory contains deployment assets for Hugging Face Spaces Zero.
+## What is included
+- `app.py`: Space entrypoint for the StructCore demo UI.
+- `requirements.txt`: minimal dependencies for this demo.
+## Recommended deployment flow
+Use the packaging script from the repository root:
+```bash
+bash scripts/prepare_hf_zero_challenge_space.sh
+```
+It creates a ready-to-push bundle in:
+```text
+.dist/hf_zero_challenge_demo_space/
+```
+Then push that bundle to your HF Space repository.
+## Model repository (two-stage)
+Target model repo:
+- `https://huggingface.co/DocUA/medgemma-1.5-4b-it-gguf-q5-k-m-two-stage`
+Upload/update Stage1 and Stage2 artifacts from this project repo:
+```bash
+python3 scripts/hf_upload_two_stage_models.py \
+  --repo-id DocUA/medgemma-1.5-4b-it-gguf-q5-k-m-two-stage \
+  --stage1-file /absolute/path/to/stage1.gguf \
+  --stage2-file /absolute/path/to/stage2.gguf \
+  --stage1-path-in-repo stage1/medgemma-stage1-q5_k_m.gguf \
+  --stage2-path-in-repo stage2/medgemma-stage2-q5_k_m.gguf
+```
+Requires `HF_TOKEN` with write access to the model repo.
+## Space runtime configuration
+Set these variables/secrets in the HF Space settings:
+- `STRUCTCORE_BACKEND_MODE=pipeline` (or `mock` as safe default)
+- `STRUCTCORE_STAGE1_URL=<your_openai_compat_stage1_url>`
+- `STRUCTCORE_STAGE1_MODEL=<model_alias_from_stage1_/v1/models>`
+- `STRUCTCORE_STAGE2_URL=<your_openai_compat_stage2_url>`
+- `STRUCTCORE_STAGE2_MODEL=<model_alias_from_stage2_/v1/models>`
+Important:
+- Space itself does not serve GGUF automatically from the model repo.
+- GGUF files in HF model repo are the source-of-truth artifacts.
+- Actual inference in `pipeline` mode requires reachable OpenAI-compatible endpoints running those artifacts.

app.py ADDED Viewed

	@@ -0,0 +1,17 @@

+from __future__ import annotations
+import os
+from apps.challenge_demo.app_challenge import build_demo
+demo = build_demo()
+if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=int(os.getenv("PORT", "7860")),
+        show_error=True,
+    )

apps/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Application packages."""

apps/challenge_demo/README.md ADDED Viewed

	@@ -0,0 +1,84 @@

+# MedGemma StructCore Demo App
+This is the implementation-focused demo app for:
+**MedGemma StructCore: Local-First Clinical Structuring Engine for EHR**
+## Run
+```bash
+python3 apps/challenge_demo/app_challenge.py
+```
+Open: `http://localhost:7863`
+## Deploy to Hugging Face Spaces Zero
+Prepare a minimal Space bundle:
+```bash
+bash scripts/prepare_hf_zero_challenge_space.sh
+```
+Bundle output:
+```text
+.dist/hf_zero_challenge_demo_space/
+```
+Push that directory to your HF Space repository. The bundle includes:
+- Space entrypoint `app.py`
+- minimal `requirements.txt`
+- demo code (`apps/challenge_demo`)
+- parser/risk dependencies (`kvt_utils.py`, `Analysis_Readmission/readmission_risk_engine.py`, config JSONs)
+Note: in HF Space, default mode should remain `mock`. `pipeline` mode requires external Stage1/Stage2 servers reachable from the Space.
+### Two-stage model artifacts on HF
+Model repo (source-of-truth artifacts):
+- `https://huggingface.co/DocUA/medgemma-1.5-4b-it-gguf-q5-k-m-two-stage`
+Upload/update artifacts:
+```bash
+python3 scripts/hf_upload_two_stage_models.py \
+  --repo-id DocUA/medgemma-1.5-4b-it-gguf-q5-k-m-two-stage \
+  --stage1-file /absolute/path/to/stage1.gguf \
+  --stage2-file /absolute/path/to/stage2.gguf
+```
+Space should be configured via env vars:
+- `STRUCTCORE_STAGE1_URL`, `STRUCTCORE_STAGE1_MODEL`
+- `STRUCTCORE_STAGE2_URL`, `STRUCTCORE_STAGE2_MODEL`
+- optional: `STRUCTCORE_BACKEND_MODE=mock|pipeline`
+## Modes
+- `mock`:
+  - offline deterministic extraction (fast, no model server required),
+  - useful for demo recording and UI development.
+- `pipeline`:
+  - runs real Stage1/Stage2 using existing runners,
+  - requires local OpenAI-compatible model servers.
+If pipeline mode fails and fallback is enabled, app falls back to mock mode.
+## Architecture
+- `app_challenge.py`: Gradio UI and orchestration glue.
+- `services/structcore_service.py`: execution modes, normalization, risk scoring.
+- `services/case_library.py`: synthetic demo cases.
+- `services/evidence_service.py`: claim/evidence board data.
+- `config/evidence_claims.json`: status-labeled claims.
+- `data/synthetic_cases.json`: synthetic note samples.
+## Notes
+- This demo is extraction-first.
+- Readmission risk is presented as a downstream use case.
+- Public demos should use synthetic notes only.

apps/challenge_demo/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """MedGemma StructCore challenge demo app."""

apps/challenge_demo/app_challenge.py ADDED Viewed

	@@ -0,0 +1,275 @@

+from __future__ import annotations
+import json
+import os
+import sys
+from pathlib import Path
+from typing import Dict, List, Tuple
+# Allow running as a script: `python apps/challenge_demo/app_challenge.py`
+if __package__ in {None, ""}:
+    repo_root = Path(__file__).resolve().parents[2]
+    if str(repo_root) not in sys.path:
+        sys.path.insert(0, str(repo_root))
+import gradio as gr
+import pandas as pd
+from apps.challenge_demo.services.case_library import get_case, load_cases
+from apps.challenge_demo.services.evidence_service import load_evidence_rows
+from apps.challenge_demo.services.structcore_service import (
+    StructCoreConfig,
+    lines_to_rows,
+    result_to_debug_json,
+    run_structcore,
+)
+def _default_case_id() -> str:
+    cases = load_cases()
+    return cases[0].id if cases else "custom"
+def _case_choices() -> List[Tuple[str, str]]:
+    out = []
+    for c in load_cases():
+        out.append((f"{c.title} ({c.id})", c.id))
+    out.append(("Custom note", "custom"))
+    return out
+def _on_case_change(case_id: str) -> Tuple[str, str]:
+    if not case_id or case_id == "custom":
+        return "", "Manual mode: paste your own note text."
+    c = get_case(case_id)
+    if c is None:
+        return "", "Case not found."
+    return c.text, f"**{c.title}**\n\n{c.description}"
+def _format_status(note_id: str, backend_mode: str, duration_sec: float, gate_summary: Dict, warnings: List[str], error: str | None) -> str:
+    ok = "yes" if gate_summary.get("parse_success") else "no"
+    clusters = ", ".join(gate_summary.get("clusters_present") or []) or "none"
+    lines = gate_summary.get("output_lines", 0)
+    parts = [
+        f"### Run Status",
+        f"- Note ID: `{note_id}`",
+        f"- Backend mode: `{backend_mode}`",
+        f"- Parse success: `{ok}`",
+        f"- Output lines: `{lines}`",
+        f"- Clusters: `{clusters}`",
+        f"- Duration (sec): `{duration_sec}`",
+    ]
+    if warnings:
+        parts.append("- Warnings:")
+        parts.extend([f"  - {w}" for w in warnings])
+    if error:
+        parts.append(f"- Error: `{error}`")
+    return "\n".join(parts)
+def _format_risk_summary(risk: Dict | None) -> Tuple[str, str]:
+    if not risk:
+        return "No risk output available for this run.", "{}"
+    prob = risk.get("probability")
+    category = risk.get("risk_category")
+    score = risk.get("composite_score")
+    completeness = risk.get("data_completeness")
+    factors = risk.get("risk_factors") or []
+    bullets = [
+        "### Readmission Risk Summary",
+        f"- Category: `{category}`",
+        f"- Probability: `{prob}`",
+        f"- Composite score: `{score}`",
+        f"- Data completeness: `{completeness}`",
+    ]
+    if factors:
+        bullets.append("- Top risk factors:")
+        for it in factors[:5]:
+            bullets.append(f"  - {it}")
+    return "\n".join(bullets), json.dumps(risk, ensure_ascii=False, indent=2)
+def _run_demo(
+    case_id: str,
+    note_text: str,
+    backend_mode: str,
+    stage1_url: str,
+    stage1_model: str,
+    stage2_url: str,
+    stage2_model: str,
+    fallback_to_mock: bool,
+) -> Tuple[str, str, str, pd.DataFrame, str, str, str, str]:
+    note = (note_text or "").strip()
+    effective_case_id = case_id or "custom"
+    if not note and effective_case_id != "custom":
+        c = get_case(effective_case_id)
+        if c is not None:
+            note = c.text
+    cfg = StructCoreConfig(
+        backend_mode=(backend_mode or "mock").strip(),
+        stage1_url=(stage1_url or "").strip(),
+        stage1_model=(stage1_model or "").strip(),
+        stage2_url=(stage2_url or "").strip(),
+        stage2_model=(stage2_model or "").strip(),
+        fallback_to_mock_on_error=bool(fallback_to_mock),
+    )
+    result = run_structcore(note, effective_case_id, cfg)
+    status_md = _format_status(
+        note_id=result.note_id,
+        backend_mode=result.backend_mode,
+        duration_sec=result.duration_sec,
+        gate_summary=result.gate_summary,
+        warnings=result.warnings,
+        error=result.error,
+    )
+    rows = lines_to_rows(result.normalized_lines)
+    df = pd.DataFrame(rows, columns=["CLUSTER", "Keyword", "Value", "Timestamp"])
+    risk_md, risk_json = _format_risk_summary(result.risk)
+    return (
+        status_md,
+        result.stage1_summary,
+        result.stage2_raw,
+        df,
+        json.dumps(result.gate_summary, ensure_ascii=False, indent=2),
+        risk_md,
+        risk_json,
+        result_to_debug_json(result),
+    )
+def build_demo() -> gr.Blocks:
+    cfg_defaults = StructCoreConfig()
+    case_choices = _case_choices()
+    default_case_id = _default_case_id()
+    initial_case = get_case(default_case_id)
+    initial_text = initial_case.text if initial_case else ""
+    initial_desc = f"**{initial_case.title}**\n\n{initial_case.description}" if initial_case else "Manual mode"
+    evidence_df = pd.DataFrame(load_evidence_rows(), columns=["Claim ID", "Claim", "Metric", "Status", "Artifact"])
+    with gr.Blocks(title="MedGemma StructCore Demo") as demo:
+        gr.Markdown(
+            """
+# MedGemma StructCore Demo
+**MedGemma StructCore: Local-First Clinical Structuring Engine for EHR**
+This demo is extraction-first: free-text EHR -> structured KVT4 facts -> optional downstream readmission risk view.
+"""
+        )
+        with gr.Tab("1) Case Input"):
+            case_id = gr.Dropdown(label="Synthetic case", choices=case_choices, value=default_case_id)
+            case_desc = gr.Markdown(initial_desc)
+            note_text = gr.Textbox(label="Clinical note text", lines=14, value=initial_text)
+            with gr.Row():
+                backend_mode = gr.Radio(
+                    label="Backend mode",
+                    choices=["mock", "pipeline"],
+                    value=os.getenv("STRUCTCORE_BACKEND_MODE", "mock"),
+                    info="mock = offline deterministic demo, pipeline = Stage1/Stage2 runners with local model servers",
+                )
+                fallback_to_mock = gr.Checkbox(
+                    label="Fallback to mock if pipeline fails",
+                    value=True,
+                )
+            with gr.Accordion("Pipeline settings", open=False):
+                stage1_url = gr.Textbox(label="Stage1 URL", value=cfg_defaults.stage1_url)
+                stage1_model = gr.Textbox(label="Stage1 model", value=cfg_defaults.stage1_model)
+                stage2_url = gr.Textbox(label="Stage2 URL", value=cfg_defaults.stage2_url)
+                stage2_model = gr.Textbox(label="Stage2 model", value=cfg_defaults.stage2_model)
+            run_btn = gr.Button("Run StructCore", variant="primary")
+            status_md = gr.Markdown()
+        with gr.Tab("2) StructCore Inspector"):
+            stage1_summary = gr.Textbox(label="Stage1 summary", lines=14)
+            stage2_raw = gr.Textbox(label="Stage2 raw output", lines=14)
+            normalized_df = gr.Dataframe(
+                label="Normalized KVT4 facts",
+                headers=["CLUSTER", "Keyword", "Value", "Timestamp"],
+                datatype=["str", "str", "str", "str"],
+                row_count=8,
+            )
+            gate_json = gr.Textbox(label="Quality gate summary", lines=10)
+        with gr.Tab("3) Risk View"):
+            risk_md = gr.Markdown()
+            risk_json = gr.Textbox(label="Risk payload (JSON)", lines=18)
+        with gr.Tab("4) Evidence Board"):
+            gr.Markdown("All claims should be interpreted with explicit status labels.")
+            gr.Dataframe(
+                value=evidence_df,
+                headers=["Claim ID", "Claim", "Metric", "Status", "Artifact"],
+                datatype=["str", "str", "str", "str", "str"],
+                interactive=False,
+                wrap=True,
+                row_count=len(evidence_df),
+                label="Evidence claims",
+            )
+        with gr.Accordion("Debug JSON", open=False):
+            debug_json = gr.Textbox(label="Full run payload", lines=18)
+        case_id.change(fn=_on_case_change, inputs=[case_id], outputs=[note_text, case_desc])
+        run_btn.click(
+            fn=_run_demo,
+            inputs=[
+                case_id,
+                note_text,
+                backend_mode,
+                stage1_url,
+                stage1_model,
+                stage2_url,
+                stage2_model,
+                fallback_to_mock,
+            ],
+            outputs=[
+                status_md,
+                stage1_summary,
+                stage2_raw,
+                normalized_df,
+                gate_json,
+                risk_md,
+                risk_json,
+                debug_json,
+            ],
+        )
+    return demo
+def main() -> None:
+    demo = build_demo()
+    launch_kwargs = {
+        "server_name": "0.0.0.0",
+        "server_port": 7863,
+        "show_error": True,
+    }
+    try:
+        demo.launch(ssr_mode=False, **launch_kwargs)
+    except TypeError as exc:
+        # Older gradio versions do not support ssr_mode.
+        if "ssr_mode" not in str(exc):
+            raise
+        demo.launch(**launch_kwargs)
+if __name__ == "__main__":
+    main()

apps/challenge_demo/config/evidence_claims.json ADDED Viewed

	@@ -0,0 +1,44 @@

+[
+  {
+    "claim_id": "C01",
+    "claim": "Stage2 KVT4 format stability is high",
+    "metric": "99.74% valid format",
+    "status": "Verified",
+    "artifact": "Analysis_Challenge/Spec_Challenge.md"
+  },
+  {
+    "claim_id": "C02",
+    "claim": "Stage1 structured output parse is stable",
+    "metric": "98%+ parse rate (test50)",
+    "status": "Verified",
+    "artifact": "Analysis_Challenge/Spec_Challenge.md"
+  },
+  {
+    "claim_id": "C03",
+    "claim": "Track B rule-engine benchmark",
+    "metric": "AUROC 0.6024 [0.5882, 0.6167]",
+    "status": "Verified",
+    "artifact": "results/benchmark/20260207_trackB_hosp_v3_a1_ruleengine_labs50k/metrics_summary.json"
+  },
+  {
+    "claim_id": "C04",
+    "claim": "SGR v4.1 notes-based uplift on independent proxy set",
+    "metric": "AUROC 0.6462 vs 0.511 baseline",
+    "status": "Preliminary",
+    "artifact": "results/benchmark/20260209_212920_sgr_v41_clean_test50new/"
+  },
+  {
+    "claim_id": "C05",
+    "claim": "Production extraction path is DSPy-free",
+    "metric": "OpenAI-compatible runtime path",
+    "status": "Verified",
+    "artifact": "Docs/DSPY_VIABILITY_DECISION.md"
+  },
+  {
+    "claim_id": "C06",
+    "claim": "Corrected larger-N SGR replication",
+    "metric": "N=200 corrected run",
+    "status": "Planned",
+    "artifact": "results/benchmark/<future_run_id>/"
+  }
+]

apps/challenge_demo/data/synthetic_cases.json ADDED Viewed

	@@ -0,0 +1,20 @@

+[
+  {
+    "id": "low_risk_followup",
+    "title": "Low Risk Follow-up",
+    "description": "Stable patient with mild abnormalities and home discharge.",
+    "text": "45-year-old female admitted for observation with atypical chest discomfort. Heart rate 78, blood pressure 122/76, respiratory rate 16, temperature 36.8, SpO2 98%. Labs: hemoglobin 13.2, hematocrit 39.8, WBC 7.4, platelet 240, sodium 139, potassium 4.2, creatinine 0.8, BUN 14, glucose 102, bicarbonate 24. One prior admission in 12 months. Discharge disposition: home. Alert and oriented at discharge."
+  },
+  {
+    "id": "moderate_risk_multimorbidity",
+    "title": "Moderate Risk Multimorbidity",
+    "description": "Older patient with chronic disease burden and moderate physiologic stress.",
+    "text": "68-year-old male with diabetes and hypertension admitted for dyspnea. HR 96, BP 148/88, RR 20, Temp 37.4, O2 sat 93%. Labs notable for hemoglobin 11.4, WBC 11.9, sodium 134, potassium 4.9, creatinine 1.5, BUN 31, glucose 182, bicarbonate 20. Two ED visits in last 6 months, two prior admissions in 12 months, current length of stay 6 days. Insulin therapy continued. Discharge to home with support services."
+  },
+  {
+    "id": "high_risk_complex",
+    "title": "High Risk Complex",
+    "description": "Complex discharge with severe derangements and high utilization.",
+    "text": "75-year-old male with CHF, CKD, COPD and atrial fibrillation admitted with worsening shortness of breath. Heart rate 118, blood pressure 168/98, respiratory rate 28, temperature 38.2, SpO2 88%, weight 92. Labs: hemoglobin 9.8, hematocrit 30.1, WBC 15.7, platelet 420, sodium 129, potassium 5.6, creatinine 2.3, BUN 48, glucose 236, bicarbonate 17. Four ED visits in six months, three prior admissions in 12 months, days since last admission 18, length of stay 13 days. On diuretic therapy, anticoagulation and opioid therapy. Mechanical ventilation required during stay. Discharge disposition skilled nursing facility. Intermittently confused at discharge."
+  }
+]

apps/challenge_demo/hf_zero/README.md ADDED Viewed

	@@ -0,0 +1,70 @@

+---
+title: MedGemma StructCore Demo
+emoji: 🩺
+colorFrom: blue
+colorTo: teal
+sdk: gradio
+python_version: "3.10"
+app_file: app.py
+pinned: false
+---
+# MedGemma StructCore Demo (HF Spaces Zero)
+This directory contains deployment assets for Hugging Face Spaces Zero.
+## What is included
+- `app.py`: Space entrypoint for the StructCore demo UI.
+- `requirements.txt`: minimal dependencies for this demo.
+## Recommended deployment flow
+Use the packaging script from the repository root:
+```bash
+bash scripts/prepare_hf_zero_challenge_space.sh
+```
+It creates a ready-to-push bundle in:
+```text
+.dist/hf_zero_challenge_demo_space/
+```
+Then push that bundle to your HF Space repository.
+## Model repository (two-stage)
+Target model repo:
+- `https://huggingface.co/DocUA/medgemma-1.5-4b-it-gguf-q5-k-m-two-stage`
+Upload/update Stage1 and Stage2 artifacts from this project repo:
+```bash
+python3 scripts/hf_upload_two_stage_models.py \
+  --repo-id DocUA/medgemma-1.5-4b-it-gguf-q5-k-m-two-stage \
+  --stage1-file /absolute/path/to/stage1.gguf \
+  --stage2-file /absolute/path/to/stage2.gguf \
+  --stage1-path-in-repo stage1/medgemma-stage1-q5_k_m.gguf \
+  --stage2-path-in-repo stage2/medgemma-stage2-q5_k_m.gguf
+```
+Requires `HF_TOKEN` with write access to the model repo.
+## Space runtime configuration
+Set these variables/secrets in the HF Space settings:
+- `STRUCTCORE_BACKEND_MODE=pipeline` (or `mock` as safe default)
+- `STRUCTCORE_STAGE1_URL=<your_openai_compat_stage1_url>`
+- `STRUCTCORE_STAGE1_MODEL=<model_alias_from_stage1_/v1/models>`
+- `STRUCTCORE_STAGE2_URL=<your_openai_compat_stage2_url>`
+- `STRUCTCORE_STAGE2_MODEL=<model_alias_from_stage2_/v1/models>`
+Important:
+- Space itself does not serve GGUF automatically from the model repo.
+- GGUF files in HF model repo are the source-of-truth artifacts.
+- Actual inference in `pipeline` mode requires reachable OpenAI-compatible endpoints running those artifacts.

apps/challenge_demo/hf_zero/app.py ADDED Viewed

	@@ -0,0 +1,17 @@

+from __future__ import annotations
+import os
+from apps.challenge_demo.app_challenge import build_demo
+demo = build_demo()
+if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=int(os.getenv("PORT", "7860")),
+        show_error=True,
+    )

apps/challenge_demo/hf_zero/requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ gradio>=4.44,<6
2	+ pandas>=2.0,<3

apps/challenge_demo/services/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ """Service layer for StructCore demo."""

apps/challenge_demo/services/case_library.py ADDED Viewed

	@@ -0,0 +1,42 @@

+from __future__ import annotations
+import json
+from dataclasses import dataclass
+from pathlib import Path
+from typing import List
+DATA_PATH = Path(__file__).resolve().parents[1] / "data" / "synthetic_cases.json"
+@dataclass(frozen=True)
+class SyntheticCase:
+    id: str
+    title: str
+    description: str
+    text: str
+def load_cases() -> List[SyntheticCase]:
+    raw = json.loads(DATA_PATH.read_text(encoding="utf-8"))
+    out: List[SyntheticCase] = []
+    for row in raw:
+        out.append(
+            SyntheticCase(
+                id=str(row.get("id", "")).strip(),
+                title=str(row.get("title", "")).strip(),
+                description=str(row.get("description", "")).strip(),
+                text=str(row.get("text", "")).strip(),
+            )
+        )
+    return [c for c in out if c.id and c.title and c.text]
+def get_case(case_id: str) -> SyntheticCase | None:
+    target = (case_id or "").strip()
+    if not target:
+        return None
+    for item in load_cases():
+        if item.id == target:
+            return item
+    return None

apps/challenge_demo/services/evidence_service.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from __future__ import annotations
+import json
+from pathlib import Path
+from typing import Dict, List
+EVIDENCE_PATH = Path(__file__).resolve().parents[1] / "config" / "evidence_claims.json"
+def load_evidence_rows() -> List[Dict[str, str]]:
+    data = json.loads(EVIDENCE_PATH.read_text(encoding="utf-8"))
+    rows: List[Dict[str, str]] = []
+    for item in data:
+        rows.append(
+            {
+                "Claim ID": str(item.get("claim_id", "")).strip(),
+                "Claim": str(item.get("claim", "")).strip(),
+                "Metric": str(item.get("metric", "")).strip(),
+                "Status": str(item.get("status", "")).strip(),
+                "Artifact": str(item.get("artifact", "")).strip(),
+            }
+        )
+    return rows

apps/challenge_demo/services/structcore_service.py ADDED Viewed

	@@ -0,0 +1,494 @@

+from __future__ import annotations
+import json
+import os
+import re
+import subprocess
+import sys
+import tempfile
+import time
+from dataclasses import asdict, dataclass, field
+from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
+from Analysis_Readmission.readmission_risk_engine import ReadmissionRiskEngine
+from kvt_utils import extract_kvt_fact_lines, normalize_readmission_kvt4_lines
+REPO_ROOT = Path(__file__).resolve().parents[3]
+PIPELINE_SCRIPT = REPO_ROOT / "scripts" / "run_two_stage_structured_pipeline.py"
+VALID_CLUSTERS = {
+    "DEMOGRAPHICS",
+    "VITALS",
+    "LABS",
+    "DISPOSITION",
+    "MEDICATIONS",
+    "PROCEDURES",
+    "UTILIZATION",
+    "PROBLEMS",
+    "SYMPTOMS",
+}
+@dataclass
+class StructCoreConfig:
+    backend_mode: str = "mock"  # mock | pipeline
+    python_executable: str = sys.executable
+    stage1_url: str = os.getenv("STRUCTCORE_STAGE1_URL", os.getenv("OPENAI_COMPAT_URL", "http://127.0.0.1:1245"))
+    stage1_model: str = os.getenv("STRUCTCORE_STAGE1_MODEL", os.getenv("OPENAI_COMPAT_MODEL_STAGE1", "medgemma-base-q5_k_m"))
+    stage1_profile: str = "sgr_v2"
+    stage1_max_tokens: int = 768
+    stage1_temperature: float = 0.0
+    stage2_url: str = os.getenv("STRUCTCORE_STAGE2_URL", os.getenv("OPENAI_COMPAT_URL", "http://127.0.0.1:1246"))
+    stage2_model: str = os.getenv("STRUCTCORE_STAGE2_MODEL", os.getenv("OPENAI_COMPAT_MODEL_STAGE2", "medgemma-ft-lora-adapters-q5_k_m"))
+    stage2_scope: str = "all"
+    stage2_output_mode: str = "lines"
+    stage2_max_tokens: int = 768
+    stage2_temperature: float = 0.0
+    fallback_to_mock_on_error: bool = True
+@dataclass
+class StructCoreResult:
+    backend_mode: str
+    note_id: str
+    stage1_summary: str
+    stage2_raw: str
+    stage2_lines: List[str]
+    normalized_lines: List[str]
+    normalization_stats: Dict[str, Any]
+    gate_summary: Dict[str, Any]
+    risk: Optional[Dict[str, Any]]
+    warnings: List[str] = field(default_factory=list)
+    error: Optional[str] = None
+    duration_sec: float = 0.0
+_ENGINE: Optional[ReadmissionRiskEngine] = None
+def _get_engine() -> ReadmissionRiskEngine:
+    global _ENGINE
+    if _ENGINE is None:
+        _ENGINE = ReadmissionRiskEngine()
+    return _ENGINE
+def run_structcore(note_text: str, note_id: str, cfg: StructCoreConfig) -> StructCoreResult:
+    text = (note_text or "").strip()
+    if not text:
+        return StructCoreResult(
+            backend_mode=cfg.backend_mode,
+            note_id=note_id,
+            stage1_summary="",
+            stage2_raw="",
+            stage2_lines=[],
+            normalized_lines=[],
+            normalization_stats={},
+            gate_summary={"parse_success": False, "reason": "empty_input"},
+            risk=None,
+            warnings=["Input note is empty."],
+            error="empty_input",
+            duration_sec=0.0,
+        )
+    if cfg.backend_mode == "pipeline":
+        try:
+            return _run_pipeline_backend(text, note_id, cfg)
+        except Exception as exc:  # noqa: BLE001
+            if not cfg.fallback_to_mock_on_error:
+                return StructCoreResult(
+                    backend_mode="pipeline",
+                    note_id=note_id,
+                    stage1_summary="",
+                    stage2_raw="",
+                    stage2_lines=[],
+                    normalized_lines=[],
+                    normalization_stats={},
+                    gate_summary={"parse_success": False, "reason": "pipeline_error"},
+                    risk=None,
+                    warnings=[],
+                    error=f"pipeline_error: {exc}",
+                    duration_sec=0.0,
+                )
+            mock = _run_mock_backend(text, note_id)
+            mock.backend_mode = "mock (pipeline fallback)"
+            mock.warnings.insert(0, f"Pipeline backend failed, fallback enabled: {exc}")
+            return mock
+    return _run_mock_backend(text, note_id)
+def _run_pipeline_backend(note_text: str, note_id: str, cfg: StructCoreConfig) -> StructCoreResult:
+    start = time.perf_counter()
+    hadm_id = 990001
+    with tempfile.TemporaryDirectory(prefix="structcore_demo_") as tmp_dir_str:
+        tmp_dir = Path(tmp_dir_str)
+        cohort_root = tmp_dir / "cohort"
+        out_dir = tmp_dir / "out"
+        hadm_dir = cohort_root / str(hadm_id)
+        hadm_dir.mkdir(parents=True, exist_ok=True)
+        (hadm_dir / f"ehr_{hadm_id}.txt").write_text(note_text, encoding="utf-8")
+        stage1_cmd = [
+            cfg.python_executable,
+            str(PIPELINE_SCRIPT),
+            "--cohort-root",
+            str(cohort_root),
+            "--out-dir",
+            str(out_dir),
+            "--hadm-ids",
+            str(hadm_id),
+            "--num-docs",
+            "1",
+            "--allow-missing-gt",
+            "stage1",
+            "--url",
+            cfg.stage1_url,
+            "--model",
+            cfg.stage1_model,
+            "--profile",
+            cfg.stage1_profile,
+            "--max-tokens",
+            str(int(cfg.stage1_max_tokens)),
+            "--temperature",
+            str(float(cfg.stage1_temperature)),
+            "--overwrite-stage1",
+        ]
+        stage2_cmd = [
+            cfg.python_executable,
+            str(PIPELINE_SCRIPT),
+            "--cohort-root",
+            str(cohort_root),
+            "--out-dir",
+            str(out_dir),
+            "--hadm-ids",
+            str(hadm_id),
+            "--num-docs",
+            "1",
+            "--allow-missing-gt",
+            "stage2",
+            "--url",
+            cfg.stage2_url,
+            "--model",
+            cfg.stage2_model,
+            "--scope",
+            cfg.stage2_scope,
+            "--output-mode",
+            cfg.stage2_output_mode,
+            "--max-tokens",
+            str(int(cfg.stage2_max_tokens)),
+            "--temperature",
+            str(float(cfg.stage2_temperature)),
+            "--overwrite-stage2",
+        ]
+        _run_cmd(stage1_cmd)
+        _run_cmd(stage2_cmd)
+        per_dir = out_dir / str(hadm_id)
+        stage1_summary = _read_optional(per_dir / "stage1.md")
+        stage2_raw = _read_optional(per_dir / "stage2_raw.txt")
+        stage2_lines_text = _read_optional(per_dir / "stage2_facts.txt")
+        raw_lines = extract_kvt_fact_lines(stage2_lines_text if stage2_lines_text.strip() else stage2_raw)
+    normalized_lines, normalization_stats = normalize_readmission_kvt4_lines(raw_lines)
+    risk = _score_risk(normalized_lines)
+    gate_summary = _build_gate_summary(normalized_lines, normalization_stats)
+    return StructCoreResult(
+        backend_mode="pipeline",
+        note_id=note_id,
+        stage1_summary=stage1_summary,
+        stage2_raw=stage2_raw,
+        stage2_lines=raw_lines,
+        normalized_lines=normalized_lines,
+        normalization_stats=normalization_stats,
+        gate_summary=gate_summary,
+        risk=risk,
+        warnings=[],
+        error=None,
+        duration_sec=round(time.perf_counter() - start, 3),
+    )
+def _run_cmd(cmd: List[str]) -> None:
+    proc = subprocess.run(
+        cmd,
+        cwd=str(REPO_ROOT),
+        capture_output=True,
+        text=True,
+        check=False,
+    )
+    if proc.returncode != 0:
+        stderr = (proc.stderr or "").strip()
+        stdout = (proc.stdout or "").strip()
+        msg = stderr or stdout or f"Command failed with exit code {proc.returncode}"
+        raise RuntimeError(msg)
+def _read_optional(path: Path) -> str:
+    if not path.exists():
+        return ""
+    return path.read_text(encoding="utf-8", errors="replace")
+def _run_mock_backend(note_text: str, note_id: str) -> StructCoreResult:
+    start = time.perf_counter()
+    stage2_lines = _heuristic_extract_kvt(note_text)
+    stage2_raw = "\n".join(stage2_lines)
+    stage1_summary = _render_stage1_like_summary(stage2_lines)
+    normalized_lines, normalization_stats = normalize_readmission_kvt4_lines(stage2_lines)
+    risk = _score_risk(normalized_lines)
+    gate_summary = _build_gate_summary(normalized_lines, normalization_stats)
+    warnings: List[str] = []
+    if not normalized_lines:
+        warnings.append("No valid KVT4 facts after normalization.")
+    return StructCoreResult(
+        backend_mode="mock",
+        note_id=note_id,
+        stage1_summary=stage1_summary,
+        stage2_raw=stage2_raw,
+        stage2_lines=stage2_lines,
+        normalized_lines=normalized_lines,
+        normalization_stats=normalization_stats,
+        gate_summary=gate_summary,
+        risk=risk,
+        warnings=warnings,
+        error=None,
+        duration_sec=round(time.perf_counter() - start, 3),
+    )
+def _score_risk(normalized_lines: List[str]) -> Optional[Dict[str, Any]]:
+    if not normalized_lines:
+        return None
+    engine = _get_engine()
+    result = engine.score_from_toon("\n".join(normalized_lines))
+    return asdict(result)
+def _build_gate_summary(lines: List[str], stats: Dict[str, Any]) -> Dict[str, Any]:
+    clusters = []
+    seen_clusters = set()
+    for line in lines:
+        parts = line.split("|")
+        if len(parts) != 4:
+            continue
+        c = parts[0].strip().upper()
+        if c and c not in seen_clusters:
+            seen_clusters.add(c)
+            clusters.append(c)
+    return {
+        "parse_success": bool(lines),
+        "output_lines": len(lines),
+        "clusters_present": clusters,
+        "all_clusters_valid": all(c in VALID_CLUSTERS for c in clusters),
+        "duplicates_after_dedup": int(stats.get("duplicates_after_dedup", 0)) if isinstance(stats, dict) else 0,
+        "canonical_keyword_rate_strict": stats.get("canonical_keyword_rate_strict") if isinstance(stats, dict) else None,
+        "numeric_only_rate_vitals_labs": stats.get("numeric_only_rate_vitals_labs") if isinstance(stats, dict) else None,
+    }
+def _render_stage1_like_summary(lines: List[str]) -> str:
+    grouped: Dict[str, List[Tuple[str, str, str]]] = {}
+    for line in lines:
+        parts = line.split("|")
+        if len(parts) != 4:
+            continue
+        cluster, key, value, ts = [p.strip() for p in parts]
+        grouped.setdefault(cluster.upper(), []).append((key, value, ts))
+    ordered_clusters = [
+        "DEMOGRAPHICS",
+        "VITALS",
+        "LABS",
+        "DISPOSITION",
+        "MEDICATIONS",
+        "PROCEDURES",
+        "UTILIZATION",
+        "PROBLEMS",
+        "SYMPTOMS",
+    ]
+    out: List[str] = []
+    for cluster in ordered_clusters:
+        items = grouped.get(cluster, [])
+        if not items:
+            continue
+        out.append(f"## {cluster}")
+        for key, value, ts in items:
+            out.append(f"- {key}={value} ({ts})")
+        out.append("")
+    return "\n".join(out).strip()
+def _heuristic_extract_kvt(note_text: str) -> List[str]:
+    text = note_text or ""
+    lowered = text.lower()
+    lines: List[str] = []
+    seen = set()
+    def add(cluster: str, keyword: str, value: str, timestamp: str) -> None:
+        key = (cluster, keyword)
+        if key in seen:
+            return
+        seen.add(key)
+        lines.append(f"{cluster}|{keyword}|{value}|{timestamp}")
+    def m1(pattern: str) -> Optional[str]:
+        m = re.search(pattern, text, flags=re.IGNORECASE)
+        return m.group(1) if m else None
+    age = m1(r"\b(\d{1,3})\s*(?:y/o|yo|year-old|years old)\b")
+    if age:
+        add("DEMOGRAPHICS", "Age", age, "Admission")
+    if re.search(r"\bfemale\b", lowered):
+        add("DEMOGRAPHICS", "Sex", "female", "Admission")
+    elif re.search(r"\bmale\b", lowered):
+        add("DEMOGRAPHICS", "Sex", "male", "Admission")
+    hr = m1(r"(?:heart\s*rate|\bhr\b|pulse)\s*[:=]?\s*(\d{2,3}(?:\.\d+)?)")
+    if hr:
+        add("VITALS", "Heart Rate", hr, "Admission")
+    bp = re.search(r"(?:blood\s*pressure|\bbp\b)\s*[:=]?\s*(\d{2,3})\s*/\s*(\d{2,3})", text, flags=re.IGNORECASE)
+    if bp:
+        add("VITALS", "Systolic BP", bp.group(1), "Admission")
+        add("VITALS", "Diastolic BP", bp.group(2), "Admission")
+    rr = m1(r"(?:respiratory\s*rate|\brr\b|\bresp\b)\s*[:=]?\s*(\d{1,2}(?:\.\d+)?)")
+    if rr:
+        add("VITALS", "Respiratory Rate", rr, "Admission")
+    temp = m1(r"(?:temperature|\btemp\b)\s*[:=]?\s*(\d{2}(?:\.\d+)?)")
+    if temp:
+        add("VITALS", "Temperature", temp, "Admission")
+    spo2 = m1(r"(?:spo2|o2\s*sat|oxygen\s*saturation)\s*[:=]?\s*(\d{2,3}(?:\.\d+)?)\s*%?")
+    if spo2:
+        add("VITALS", "SpO2", spo2, "Admission")
+    weight = m1(r"\bweight\s*[:=]?\s*(\d{2,3}(?:\.\d+)?)")
+    if weight:
+        add("VITALS", "Weight", weight, "Admission")
+    lab_patterns = [
+        ("Hemoglobin", r"(?:hemoglobin|\bhgb\b)\s*[:=]?\s*(\d{1,2}(?:\.\d+)?)"),
+        ("Hematocrit", r"(?:hematocrit|\bhct\b)\s*[:=]?\s*(\d{1,2}(?:\.\d+)?)"),
+        ("WBC", r"\bwbc\b\s*[:=]?\s*(\d{1,2}(?:\.\d+)?)"),
+        ("Platelet", r"(?:platelet|\bplt\b)\s*[:=]?\s*(\d{2,4}(?:\.\d+)?)"),
+        ("Sodium", r"(?:sodium|\bna\b)\s*[:=]?\s*(\d{2,3}(?:\.\d+)?)"),
+        ("Potassium", r"(?:potassium|\bk\b)\s*[:=]?\s*(\d(?:\.\d+)?)"),
+        ("Creatinine", r"(?:creatinine|\bcr\b)\s*[:=]?\s*(\d(?:\.\d+)?)"),
+        ("BUN", r"\bbun\b\s*[:=]?\s*(\d{1,3}(?:\.\d+)?)"),
+        ("Glucose", r"\bglucose\b\s*[:=]?\s*(\d{2,3}(?:\.\d+)?)"),
+        ("Bicarbonate", r"(?:bicarbonate|\bhco3\b|bicarb)\s*[:=]?\s*(\d{1,2}(?:\.\d+)?)"),
+    ]
+    for keyword, pattern in lab_patterns:
+        val = m1(pattern)
+        if val:
+            add("LABS", keyword, val, "Admission")
+    prior_adm = m1(r"(\d+)\s*(?:prior|previous)\s*admissions?\s*(?:in|within)?\s*12\s*months")
+    if prior_adm:
+        add("UTILIZATION", "Prior Admissions 12mo", prior_adm, "Past")
+    ed_visits = m1(r"(\d+)\s*(?:ed|er|emergency)\s*visits?\s*(?:in|within)?\s*(?:last\s*)?6\s*months")
+    if ed_visits:
+        add("UTILIZATION", "ED Visits 6mo", ed_visits, "Past")
+    days_last = m1(r"days\s*since\s*last\s*admission\s*[:=]?\s*(\d+)")
+    if days_last:
+        add("UTILIZATION", "Days Since Last Admission", days_last, "Past")
+    los = m1(r"(?:length\s*of\s*stay|\blos\b)\s*[:=]?\s*(\d+)")
+    if los:
+        add("UTILIZATION", "Current Length of Stay", los, "Admission")
+    if "skilled nursing" in lowered or "snf" in lowered:
+        add("DISPOSITION", "Discharge Disposition", "Skilled Nursing Facility", "Discharge")
+    elif "home" in lowered:
+        add("DISPOSITION", "Discharge Disposition", "Home", "Discharge")
+    if re.search(r"confus|disorient", lowered):
+        add("DISPOSITION", "Mental Status", "Altered", "Discharge")
+    elif re.search(r"alert and oriented|a&o", lowered):
+        add("DISPOSITION", "Mental Status", "Normal", "Discharge")
+    if re.search(r"warfarin|apixaban|rivaroxaban|heparin|anticoag", lowered):
+        add("MEDICATIONS", "Anticoagulation", "yes", "Discharge")
+    if re.search(r"insulin", lowered):
+        add("MEDICATIONS", "Insulin Therapy", "yes", "Discharge")
+    if re.search(r"opioid|morphine|oxycodone|hydromorphone|fentanyl", lowered):
+        add("MEDICATIONS", "Opioid Therapy", "yes", "Discharge")
+    if re.search(r"diuretic|furosemide|torsemide|bumetanide", lowered):
+        add("MEDICATIONS", "Diuretic Therapy", "yes", "Discharge")
+    if re.search(r"mechanical ventilation|intubat", lowered):
+        add("PROCEDURES", "Mechanical Ventilation", "yes", "Admission")
+    if re.search(r"dialysis", lowered):
+        add("PROCEDURES", "Dialysis", "yes", "Admission")
+    if re.search(r"surgery|operative|operation", lowered):
+        add("PROCEDURES", "Surgery", "yes", "Admission")
+    problem_terms = {
+        "heart failure": "Heart Failure",
+        "chf": "Heart Failure",
+        "ckd": "Chronic Kidney Disease",
+        "copd": "COPD",
+        "atrial fibrillation": "Atrial Fibrillation",
+        "diabetes": "Diabetes Mellitus",
+        "hypertension": "Hypertension",
+    }
+    for token, label in problem_terms.items():
+        if token in lowered:
+            add("PROBLEMS", label, "chronic", "Past")
+    symptom_terms = {
+        "shortness of breath": "Dyspnea",
+        "dyspnea": "Dyspnea",
+        "chest pain": "Chest Pain",
+        "fever": "Fever",
+    }
+    for token, label in symptom_terms.items():
+        if token in lowered:
+            add("SYMPTOMS", label, "present", "Admission")
+    return lines
+def lines_to_rows(lines: List[str]) -> List[Dict[str, str]]:
+    rows: List[Dict[str, str]] = []
+    for line in lines:
+        parts = line.split("|")
+        if len(parts) != 4:
+            continue
+        rows.append(
+            {
+                "CLUSTER": parts[0].strip(),
+                "Keyword": parts[1].strip(),
+                "Value": parts[2].strip(),
+                "Timestamp": parts[3].strip(),
+            }
+        )
+    return rows
+def result_to_debug_json(result: StructCoreResult) -> str:
+    return json.dumps(asdict(result), ensure_ascii=False, indent=2)

kvt_utils.py ADDED Viewed

	@@ -0,0 +1,1141 @@

+"""
+DSPy-free utilities for KVT4 parsing and normalization.
+This module contains all parsing, normalization, and validation logic
+that does NOT depend on DSPy. It can be used in production pipelines
+without importing the DSPy framework.
+Extracted from dspy_integration.py as part of Phase 0 decomposition.
+"""
+import ast
+import json
+import os
+import re
+from typing import List, Optional
+# =============================================================================
+# REGEX PATTERNS
+# =============================================================================
+_MEDGEMMA_INTERNAL_TOKEN_RE = re.compile(r"<unused\d+>")
+_MEDGEMMA_THOUGHT_LINE_RE = re.compile(r"^\s*(<unused\d+>\w*\s*)?thought\b.*$", re.IGNORECASE)
+_DSPY_QUOTED_FACT_RE = re.compile(r"«([^»]+)»")
+_PARTIAL_JSON_FACT_RE = re.compile(
+    r"""\{\s*["']cluster["']\s*:\s*["']([^"']+)["']\s*,\s*"""
+    r"""["']keyword["']\s*:\s*["']([^"']+)["']\s*,\s*"""
+    r"""["']value["']\s*:\s*["']([^"']+)["']\s*,\s*"""
+    r"""["']timestamp["']\s*:\s*["']([^"']+)["']\s*\}""",
+    re.IGNORECASE,
+)
+_PARTIAL_GROUPED_CLUSTER_BLOCK_RE = re.compile(
+    r'"(?P<cluster>DEMOGRAPHICS|VITALS|LABS|PROBLEMS|SYMPTOMS|MEDICATIONS|PROCEDURES|UTILIZATION|DISPOSITION)"\s*:\s*\[',
+    re.IGNORECASE,
+)
+_PARTIAL_GROUPED_ITEM_RE = re.compile(
+    r"""\{\s*["']K["']\s*:\s*["'](?P<k>[^"']+)["']\s*,\s*"""
+    r"""["']V["']\s*:\s*(?P<v>"[^"]*"|-?\d+(?:\.\d+)?|true|false)\s*,\s*"""
+    r"""["']T["']\s*:\s*["'](?P<t>[^"']+)["']\s*\}""",
+    re.IGNORECASE,
+)
+# =============================================================================
+# CANONICAL KEYWORDS (MVP)
+# =============================================================================
+CANONICAL_VITALS = [
+    "Heart Rate",
+    "Systolic BP",
+    "Diastolic BP",
+    "Respiratory Rate",
+    "Temperature",
+    "SpO2",
+    "Weight",
+]
+CANONICAL_LABS = [
+    "Hemoglobin",
+    "Hematocrit",
+    "WBC",
+    "Platelet",
+    "Sodium",
+    "Potassium",
+    "Creatinine",
+    "BUN",
+    "Glucose",
+    "Bicarbonate",
+]
+CANONICAL_DEMOGRAPHICS = [
+    "Age",
+    "Sex",
+]
+STRICT_KEYWORDS_READMISSION: dict[str, set[str]] = {
+    "DEMOGRAPHICS": set(CANONICAL_DEMOGRAPHICS),
+    "VITALS": set(CANONICAL_VITALS),
+    "LABS": set(CANONICAL_LABS),
+    "MEDICATIONS": {
+        "Medication Count",
+        "New Medications Count",
+        "Polypharmacy",
+        "Anticoagulation",
+        "Insulin Therapy",
+        "Opioid Therapy",
+        "Diuretic Therapy",
+    },
+    "PROCEDURES": {
+        "Any Procedure",
+        "Surgery",
+        "Dialysis",
+        "Mechanical Ventilation",
+    },
+    "UTILIZATION": {
+        "Prior Admissions 12mo",
+        "ED Visits 6mo",
+        "Days Since Last Admission",
+        "Current Length of Stay",
+    },
+    "DISPOSITION": {
+        "Discharge Disposition",
+        "Mental Status",
+    },
+}
+READMISSION_CLUSTERS = {
+    "DEMOGRAPHICS",
+    "VITALS",
+    "LABS",
+    "PROBLEMS",
+    "SYMPTOMS",
+    "MEDICATIONS",
+    "PROCEDURES",
+    "UTILIZATION",
+    "DISPOSITION",
+}
+# =============================================================================
+# OUTPUT PARSING HELPERS
+# =============================================================================
+def strip_medgemma_internal_tokens(text: str) -> str:
+    """Remove MedGemma internal tokens and thinking blocks from text.
+    IMPORTANT: Only strips the internal token itself (e.g., "<unused95>").
+    Does NOT consume adjacent alphanumerics, because models sometimes emit tokens
+    immediately followed by a fact prefix (e.g., "<unused95>DEMOGRAPHICS|..."),
+    and we must not delete "DEMOGRAPHICS".
+    """
+    if not text:
+        return ""
+    # Remove internal tokens
+    cleaned = _MEDGEMMA_INTERNAL_TOKEN_RE.sub("", text)
+    # Drop explicit thought lines and thinking blocks
+    lines = []
+    in_thinking_block = False
+    for line in cleaned.splitlines():
+        line_lower = line.lower().strip()
+        # Check if entering thinking mode
+        if any(marker in line_lower for marker in ['thought', 'the user wants', 'here\'s my plan', 'input:', 'output:', 'constraints:']):
+            in_thinking_block = True
+            continue
+        # Check if exiting thinking mode (actual fact payload starts).
+        # Support both KVT4 lines and JSON-like payload fragments.
+        if in_thinking_block:
+            looks_like_kvt4 = ('|' in line and line.count('|') >= 3)
+            looks_like_json_payload = (
+                '{"K"' in line
+                or '"facts"' in line
+                or line.lstrip().startswith("{")
+                or line.strip().startswith("```json")
+            )
+            if looks_like_kvt4 or looks_like_json_payload:
+                in_thinking_block = False
+        # Skip if in thinking block
+        if in_thinking_block:
+            continue
+        # Skip thought lines
+        if _MEDGEMMA_THOUGHT_LINE_RE.match(line):
+            continue
+        lines.append(line)
+    return "\n".join(lines)
+def _looks_like_kvt_fact(line: str) -> bool:
+    """Validate if a line looks like a valid KVT4 fact.
+    Expects either:
+    - 4-part: CLUSTER|Keyword|Value|Timestamp  (preferred)
+    - 3-part: Keyword|Value|Timestamp         (legacy)
+    """
+    if not line:
+        return False
+    s = line.strip()
+    if len(s) < 5 or len(s) > 400:
+        return False
+    pipe_count = s.count("|")
+    if pipe_count not in (2, 3):
+        return False
+    parts = [p.strip() for p in s.split("|")]
+    if len(parts) not in (3, 4):
+        return False
+    # Keep parser permissive by default (unit tests expect 3-part legacy facts too).
+    allow_kvt3 = str(os.getenv("ALLOW_KVT3", "1")).strip() == "1"
+    if len(parts) == 3 and not allow_kvt3:
+        return False
+    parts_lower = [p.lower() for p in parts]
+    # Filter common headers / schema lines.
+    if parts_lower == ["k", "v", "t"]:
+        return False
+    if parts_lower == ["category", "keyword", "value", "timestamp"]:
+        return False
+    if (
+        len(parts_lower) == 4
+        and parts_lower[0].startswith("category")
+        and parts_lower[1] == "keyword"
+        and parts_lower[2] == "value"
+        and parts_lower[3].startswith("timestamp")
+    ):
+        return False
+    if "format" in parts_lower[0] and parts_lower[1:3] == ["keyword", "value"]:
+        return False
+    if parts_lower[0].startswith(("format", "output format")) and "timestamp" in parts_lower[-1]:
+        return False
+    # Filter instruction lines
+    if any(marker in parts_lower[1] for marker in ['any diagnosis', 'any symptom', 'any procedure', 'value:']):
+        return False
+    if '(' in parts[1] and ')' in parts[1]:
+        return False
+    # Length heuristics to avoid capturing prose with incidental pipes.
+    if len(parts[0]) > 80 or len(parts[1]) > 80 or len(parts[2]) > 200:
+        return False
+    if len(parts) == 4 and len(parts[3]) > 40:
+        return False
+    # Word-count heuristics: KVT lines are short phrases, not full sentences.
+    w0 = len(parts[0].split())
+    w1 = len(parts[1].split())
+    w2 = len(parts[2].split())
+    if w0 > 8 or w1 > 8 or w2 > 14:
+        return False
+    if len(parts) == 4 and len(parts[3].split()) > 4:
+        return False
+    return all(parts)
+def _normalize_kvt_fact(line: str) -> str:
+    """Normalize a KVT fact line by stripping whitespace and quotes."""
+    parts = [p.strip().strip("«»\"'") for p in line.strip().split("|")]
+    return "|".join(parts)
+def _map_category_to_cluster(category: str) -> str:
+    """Map category aliases to canonical cluster names."""
+    c = (category or "").strip().lower()
+    if not c:
+        return ""
+    mapping = {
+        "vitals": "VITALS",
+        "vital": "VITALS",
+        "labs": "LABS",
+        "lab": "LABS",
+        "demographics": "DEMOGRAPHICS",
+        "demo": "DEMOGRAPHICS",
+        "conditions": "PROBLEMS",
+        "condition": "PROBLEMS",
+        "problems": "PROBLEMS",
+        "problem": "PROBLEMS",
+        "symptoms": "SYMPTOMS",
+        "symptom": "SYMPTOMS",
+        "medications": "MEDICATIONS",
+        "medication": "MEDICATIONS",
+        "procedures": "PROCEDURES",
+        "procedure": "PROCEDURES",
+        "utilization": "UTILIZATION",
+        "disposition": "DISPOSITION",
+    }
+    return mapping.get(c, category.strip())
+def _infer_cluster_from_keyword(keyword: str) -> str:
+    """Infer cluster from keyword using canonical lists."""
+    k = (keyword or "").strip()
+    if not k:
+        return ""
+    if k in CANONICAL_VITALS:
+        return "VITALS"
+    if k in CANONICAL_LABS:
+        return "LABS"
+    if k in CANONICAL_DEMOGRAPHICS:
+        return "DEMOGRAPHICS"
+    # Minimal readmission-fixed keywords
+    if k in {"Prior Admissions 12mo", "ED Visits 6mo", "Days Since Last Admission", "Current Length of Stay"}:
+        return "UTILIZATION"
+    if k in {"Discharge Disposition", "Mental Status"}:
+        return "DISPOSITION"
+    if k in {"Any Procedure", "Surgery", "Dialysis", "Mechanical Ventilation"}:
+        return "PROCEDURES"
+    if k in {"Medication Count", "New Medications Count", "Polypharmacy", "Anticoagulation", "Insulin Therapy", "Opioid Therapy", "Diuretic Therapy"}:
+        return "MEDICATIONS"
+    return ""
+def _kvt4_from_fact_dict(d: dict) -> Optional[str]:
+    """Convert structured fact dict into CLUSTER|Keyword|Value|Timestamp."""
+    if not isinstance(d, dict):
+        return None
+    def _first_present(*keys: str):
+        for key in keys:
+            if key in d and d[key] is not None:
+                return d[key]
+        return None
+    # Accept multiple key spellings
+    cluster = _first_present("cluster", "Cluster", "C", "category", "Category")
+    keyword = _first_present("keyword", "Keyword", "K")
+    value = _first_present("value", "Value", "V")
+    timestamp = _first_present("timestamp", "Timestamp", "T")
+    keyword_s = str(keyword).strip() if keyword is not None else ""
+    value_s = str(value).strip() if value is not None else ""
+    timestamp_s = str(timestamp).strip() if timestamp is not None else ""
+    cluster_s = str(cluster).strip() if cluster is not None else ""
+    cluster_s = _map_category_to_cluster(cluster_s)
+    if not cluster_s:
+        cluster_s = _infer_cluster_from_keyword(keyword_s)
+    if not cluster_s:
+        cluster_s = "UNKNOWN"
+    if not keyword_s or not value_s:
+        return None
+    if not timestamp_s:
+        timestamp_s = "Unknown"
+    return f"{cluster_s}|{keyword_s}|{value_s}|{timestamp_s}"
+def _fact_dict_has_explicit_cluster(d: dict) -> bool:
+    if not isinstance(d, dict):
+        return False
+    for key in ("cluster", "Cluster", "CLUSTER", "C", "category", "Category"):
+        v = d.get(key)
+        if v is not None and str(v).strip():
+            return True
+    return False
+def extract_kvt_fact_lines(text: str) -> List[str]:
+    """
+    Extract candidate K|V|T / Category|K|V|T lines from arbitrary model output.
+    Handles common formats:
+    - Plain pipe-delimited lines
+    - DSPy-rendered lists like: [1] «Vitals|Temperature|37.2°C|20240110»
+    - JSON objects/lists containing "facts"
+    - Python literal lists of strings
+    """
+    if not text:
+        return []
+    cleaned = strip_medgemma_internal_tokens(text).strip()
+    if not cleaned:
+        return []
+    candidates: List[str] = []
+    structured_extracted = False
+    def _dedupe_preserve_order(items: List[str]) -> List[str]:
+        out: List[str] = []
+        seen = set()
+        for it in items:
+            if it not in seen:
+                seen.add(it)
+                out.append(it)
+        return out
+    def add_fact(s: str) -> None:
+        s2 = _normalize_kvt_fact(s)
+        if _looks_like_kvt_fact(s2):
+            candidates.append(s2)
+    def _map_category_to_cluster(category: str) -> str:
+        c = (category or "").strip().lower()
+        if not c:
+            return ""
+        mapping = {
+            "vitals": "VITALS",
+            "vital": "VITALS",
+            "labs": "LABS",
+            "lab": "LABS",
+            "demographics": "DEMOGRAPHICS",
+            "demo": "DEMOGRAPHICS",
+            "conditions": "PROBLEMS",
+            "condition": "PROBLEMS",
+            "problems": "PROBLEMS",
+            "problem": "PROBLEMS",
+            "symptoms": "SYMPTOMS",
+            "symptom": "SYMPTOMS",
+            "medications": "MEDICATIONS",
+            "medication": "MEDICATIONS",
+            "procedures": "PROCEDURES",
+            "procedure": "PROCEDURES",
+            "utilization": "UTILIZATION",
+            "disposition": "DISPOSITION",
+        }
+        return mapping.get(c, category.strip())
+    def _infer_cluster_from_keyword(keyword: str) -> str:
+        k = (keyword or "").strip()
+        if not k:
+            return ""
+        if k in CANONICAL_VITALS:
+            return "VITALS"
+        if k in CANONICAL_LABS:
+            return "LABS"
+        if k in CANONICAL_DEMOGRAPHICS:
+            return "DEMOGRAPHICS"
+        # Minimal readmission-fixed keywords (from prompts/prompt.py ontology).
+        if k in {"Prior Admissions 12mo", "ED Visits 6mo", "Days Since Last Admission", "Current Length of Stay"}:
+            return "UTILIZATION"
+        if k in {"Discharge Disposition", "Mental Status"}:
+            return "DISPOSITION"
+        if k in {"Any Procedure", "Surgery", "Dialysis", "Mechanical Ventilation"}:
+            return "PROCEDURES"
+        if k in {"Medication Count", "New Medications Count", "Polypharmacy", "Anticoagulation", "Insulin Therapy", "Opioid Therapy", "Diuretic Therapy"}:
+            return "MEDICATIONS"
+        return ""
+    def _kvt4_from_fact_dict(d: dict) -> Optional[str]:
+        """Convert common structured fact dicts into CLUSTER|Keyword|Value|Timestamp."""
+        if not isinstance(d, dict):
+            return None
+        def _first_present(*keys: str):
+            for key in keys:
+                if key in d and d[key] is not None:
+                    return d[key]
+            return None
+        # Accept multiple key spellings (legacy + short keys).
+        cluster = _first_present("cluster", "Cluster", "CLUSTER", "C", "category", "Category")
+        keyword = _first_present("keyword", "Keyword", "KEYWORD", "K")
+        value = _first_present("value", "Value", "VALUE", "V")
+        timestamp = _first_present("timestamp", "Timestamp", "TIMESTAMP", "T")
+        keyword_s = str(keyword).strip() if keyword is not None else ""
+        value_s = str(value).strip() if value is not None else ""
+        timestamp_s = str(timestamp).strip() if timestamp is not None else ""
+        # Drop prompt-template placeholders that are not real facts.
+        if keyword_s.casefold() in {"keyword", "k"} and value_s.casefold() in {"value", "v"}:
+            if timestamp_s.casefold() in {"timestamp", "t", "unknown", "admission", "discharge", "past"}:
+                return None
+        cluster_s = str(cluster).strip() if cluster is not None else ""
+        # If we got a "category" like "vitals/labs", map it into prompt-style clusters.
+        cluster_s = _map_category_to_cluster(cluster_s)
+        if not cluster_s:
+            cluster_s = _infer_cluster_from_keyword(keyword_s)
+        if not cluster_s:
+            cluster_s = "UNKNOWN"
+        if not keyword_s or not value_s:
+            return None
+        if not timestamp_s:
+            timestamp_s = "Unknown"
+        return f"{cluster_s}|{keyword_s}|{value_s}|{timestamp_s}"
+    def _kvt4_lines_from_grouped_obj(obj: dict) -> List[str]:
+        """Convert grouped JSON object into KVT4 lines.
+        Supported layout:
+        {
+          "LABS":[{"K":"Creatinine","V":1.2,"T":"Discharge"}],
+          "PROBLEMS":[{"K":"Hypertension","V":"chronic","T":"Past"}]
+        }
+        """
+        if not isinstance(obj, dict):
+            return []
+        out_lines: List[str] = []
+        for raw_cluster, raw_entries in obj.items():
+            cluster_norm = _map_category_to_cluster(str(raw_cluster).strip())
+            cluster_upper = cluster_norm.upper()
+            if cluster_upper not in READMISSION_CLUSTERS:
+                continue
+            entries: List[dict] = []
+            if isinstance(raw_entries, list):
+                entries = [it for it in raw_entries if isinstance(it, dict)]
+            elif isinstance(raw_entries, dict):
+                entries = [raw_entries]
+            else:
+                continue
+            for ent in entries:
+                keyword = ent["K"] if "K" in ent else ent.get("keyword", ent.get("Keyword"))
+                value = ent["V"] if "V" in ent else ent.get("value", ent.get("Value"))
+                timestamp = ent["T"] if "T" in ent else ent.get("timestamp", ent.get("Timestamp"))
+                fact_obj = {
+                    "cluster": cluster_upper,
+                    "keyword": keyword,
+                    "value": value,
+                    "timestamp": timestamp,
+                }
+                ln = _kvt4_from_fact_dict(fact_obj)
+                if ln:
+                    out_lines.append(ln)
+        return out_lines
+    # 1) JSON / Python list attempts (whole string + best-effort substrings)
+    json_like = cleaned
+    substrings: List[str] = [json_like]
+    first_obj = json_like.find("{")
+    last_obj = json_like.rfind("}")
+    if first_obj != -1 and last_obj != -1 and last_obj > first_obj:
+        substrings.append(json_like[first_obj : last_obj + 1])
+    first_arr = json_like.find("[")
+    last_arr = json_like.rfind("]")
+    if first_arr != -1 and last_arr != -1 and last_arr > first_arr:
+        substrings.append(json_like[first_arr : last_arr + 1])
+    cleaned_strip = cleaned.strip()
+    for s in list(dict.fromkeys(substrings)):
+        s_strip = s.strip()
+        if not s_strip:
+            continue
+        is_derived_array_substring = (
+            s_strip.startswith("[") and s_strip.endswith("]") and s_strip != cleaned_strip
+        )
+        try:
+            before = len(candidates)
+            obj = json.loads(s_strip)
+            if isinstance(obj, dict):
+                facts = obj.get("facts")
+                if isinstance(facts, list):
+                    for it in facts:
+                        if isinstance(it, str):
+                            add_fact(it)
+                        elif isinstance(it, dict):
+                            ln = _kvt4_from_fact_dict(it)
+                            if ln:
+                                add_fact(ln)
+                else:
+                    grouped_lines = _kvt4_lines_from_grouped_obj(obj)
+                    if grouped_lines:
+                        for ln in grouped_lines:
+                            add_fact(ln)
+                        continue
+                    # Sometimes the whole object is a single fact dict.
+                    ln = _kvt4_from_fact_dict(obj)
+                    if ln:
+                        add_fact(ln)
+            elif isinstance(obj, list):
+                for it in obj:
+                    if isinstance(it, str):
+                        add_fact(it)
+                    elif isinstance(it, dict):
+                        # Avoid duplicate UNKNOWN facts when a grouped JSON object is
+                        # also parsed via its inner array substring (cluster context lost).
+                        if is_derived_array_substring and not _fact_dict_has_explicit_cluster(it):
+                            continue
+                        ln = _kvt4_from_fact_dict(it)
+                        if ln:
+                            add_fact(ln)
+            if len(candidates) > before:
+                structured_extracted = True
+        except Exception:
+            pass
+        try:
+            before = len(candidates)
+            obj = ast.literal_eval(s_strip)
+            if isinstance(obj, dict):
+                facts = obj.get("facts") if isinstance(obj.get("facts"), list) else None
+                if facts is not None:
+                    for it in facts:
+                        if isinstance(it, str):
+                            add_fact(it)
+                        elif isinstance(it, dict):
+                            ln = _kvt4_from_fact_dict(it)
+                            if ln:
+                                add_fact(ln)
+                else:
+                    grouped_lines = _kvt4_lines_from_grouped_obj(obj)
+                    if grouped_lines:
+                        for ln in grouped_lines:
+                            add_fact(ln)
+                        continue
+                    ln = _kvt4_from_fact_dict(obj)
+                    if ln:
+                        add_fact(ln)
+            elif isinstance(obj, list):
+                for it in obj:
+                    if isinstance(it, str):
+                        add_fact(it)
+                    elif isinstance(it, dict):
+                        if is_derived_array_substring and not _fact_dict_has_explicit_cluster(it):
+                            continue
+                        ln = _kvt4_from_fact_dict(it)
+                        if ln:
+                            add_fact(ln)
+            if len(candidates) > before:
+                structured_extracted = True
+        except Exception:
+            pass
+    # If we already extracted structured facts, do not run heuristic recovery
+    # branches below (they may introduce noisy duplicates on valid JSON payloads).
+    if structured_extracted and candidates:
+        return _dedupe_preserve_order(candidates)
+    # 1b) Partial/truncated JSON recovery:
+    # If the model output is cut mid-stream, json.loads fails even when many
+    # complete fact objects were already emitted. Recover those complete objects.
+    for m in _PARTIAL_JSON_FACT_RE.finditer(cleaned):
+        c, k, v, t = [x.strip() for x in m.groups()]
+        if c and k and v and t:
+            add_fact(f"{c}|{k}|{v}|{t}")
+    # 1c) Partial/truncated grouped JSON recovery:
+    # Recover complete {"K","V","T"} entries within each cluster block even when
+    # root JSON is truncated and json.loads fails.
+    cluster_hits = list(_PARTIAL_GROUPED_CLUSTER_BLOCK_RE.finditer(cleaned))
+    if cluster_hits:
+        for idx, hit in enumerate(cluster_hits):
+            cluster = str(hit.group("cluster") or "").strip().upper()
+            block_start = hit.end()
+            block_end = cluster_hits[idx + 1].start() if idx + 1 < len(cluster_hits) else len(cleaned)
+            block = cleaned[block_start:block_end]
+            for item in _PARTIAL_GROUPED_ITEM_RE.finditer(block):
+                k = str(item.group("k") or "").strip()
+                t = str(item.group("t") or "").strip()
+                v_tok = str(item.group("v") or "").strip()
+                if not k or not t:
+                    continue
+                if v_tok.startswith('"') and v_tok.endswith('"') and len(v_tok) >= 2:
+                    v = v_tok[1:-1]
+                elif v_tok.casefold() in {"true", "false"}:
+                    v = v_tok.casefold()
+                else:
+                    v = v_tok
+                if v:
+                    add_fact(f"{cluster}|{k}|{v}|{t}")
+    # 2) Extract between DSPy quotes «...»
+    for m in _DSPY_QUOTED_FACT_RE.finditer(cleaned):
+        inner = m.group(1).strip()
+        if "|" in inner:
+            add_fact(inner)
+    # 2b) Narrative markdown recovery.
+    # Some small models emit facts as multi-line markdown blocks:
+    #   **CLUSTER:** DEMOGRAPHICS
+    #   **Keyword:** Sex
+    #   **Value:** male
+    #   **Timestamp:** Admission
+    # Recover these into KVT4 lines.
+    _narrative_kv_re = re.compile(
+        r"\*{0,2}(cluster|keyword|value|timestamp)\s*:?\s*\*{0,2}\s*(.+)",
+        re.IGNORECASE,
+    )
+    cur: dict = {}
+    for line in cleaned.splitlines():
+        m = _narrative_kv_re.match(line.strip())
+        if not m:
+            continue
+        field = m.group(1).strip().lower()
+        val = m.group(2).strip().strip("*").strip()
+        if field == "cluster":
+            if cur.get("cluster") and cur.get("keyword") and cur.get("value"):
+                ts = cur.get("timestamp", "Unknown")
+                add_fact(f"{cur['cluster']}|{cur['keyword']}|{cur['value']}|{ts}")
+            cur = {"cluster": val}
+        elif field == "keyword":
+            # Flush previous fact within the same cluster before starting a new keyword
+            if cur.get("cluster") and cur.get("keyword") and cur.get("value"):
+                ts = cur.get("timestamp", "Unknown")
+                add_fact(f"{cur['cluster']}|{cur['keyword']}|{cur['value']}|{ts}")
+            cluster_keep = cur.get("cluster", "")
+            cur = {"cluster": cluster_keep, "keyword": val}
+        elif field in ("value", "timestamp"):
+            cur[field] = val
+    # flush last accumulated fact
+    if cur.get("cluster") and cur.get("keyword") and cur.get("value"):
+        ts = cur.get("timestamp", "Unknown")
+        add_fact(f"{cur['cluster']}|{cur['keyword']}|{cur['value']}|{ts}")
+    # 2c) Cluster-heading + inline JSON item recovery.
+    # Some models emit planning text like:
+    #   * **VITALS:**
+    #     ... -> {"K":"Heart Rate","V":54,"T":"Admission"}
+    # Recover such entries by tracking the current cluster heading.
+    heading_re = re.compile(r"\*{0,2}\s*([A-Z][A-Z ]{2,})\s*:\s*\*{0,2}\s*$")
+    cluster_inline_re = re.compile(
+        r"\b(DEMOGRAPHICS|VITALS|LABS|PROBLEMS|SYMPTOMS|MEDICATIONS|PROCEDURES|UTILIZATION|DISPOSITION)\b",
+        re.IGNORECASE,
+    )
+    item_re = re.compile(
+        r'\{\s*"K"\s*:\s*"(?P<k>[^"]+)"\s*,\s*"V"\s*:\s*(?P<v>"[^"]*"|-?\d+(?:\.\d+)?|true|false)\s*,\s*"T"\s*:\s*"(?P<t>Past|Admission|Discharge|Unknown)"\s*\}',
+        re.IGNORECASE,
+    )
+    cur_cluster = ""
+    for raw_line in cleaned.splitlines():
+        line = raw_line.strip()
+        if not line:
+            continue
+        # Avoid cluster-bleed on compact one-line JSON objects:
+        # grouped payloads should be handled by structured parsing above.
+        if line.startswith("{") or line.startswith("["):
+            continue
+        # Accept headings like "**VITALS:**", "VITALS:", "*   **VITALS:**"
+        norm = re.sub(r"^[*•\-\s]+", "", line)
+        norm = norm.strip("* ").strip()
+        hm = heading_re.match(norm)
+        if hm:
+            c_raw = hm.group(1).strip().upper()
+            c_norm = _map_category_to_cluster(c_raw)
+            c_up = str(c_norm).strip().upper()
+            if c_up in READMISSION_CLUSTERS:
+                cur_cluster = c_up
+            continue
+        # Inline headings like:
+        # "- **VITALS:** ... -> {\"K\":\"Heart Rate\",...}"
+        cm = cluster_inline_re.search(norm)
+        if cm:
+            c_raw = cm.group(1).strip().upper()
+            c_norm = _map_category_to_cluster(c_raw)
+            c_up = str(c_norm).strip().upper()
+            if c_up in READMISSION_CLUSTERS:
+                cur_cluster = c_up
+        if not cur_cluster:
+            continue
+        for m in item_re.finditer(line):
+            k = str(m.group("k") or "").strip()
+            v_tok = str(m.group("v") or "").strip()
+            t = str(m.group("t") or "").strip()
+            if not k or not t:
+                continue
+            if v_tok.startswith('"') and v_tok.endswith('"') and len(v_tok) >= 2:
+                v = v_tok[1:-1]
+            else:
+                v = v_tok.casefold() if v_tok.casefold() in {"true", "false"} else v_tok
+            if v:
+                add_fact(f"{cur_cluster}|{k}|{v}|{t}")
+    # 3) Line-by-line heuristics (bullets / numbering / quoted JSON fragments)
+    for line in cleaned.splitlines():
+        s = line.strip()
+        if not s:
+            continue
+        s = re.sub(r"^\[\d+\]\s*", "", s)
+        s = re.sub(r"^[-*•]\s*", "", s)
+        s = s.strip().strip("«»\"'")
+        s = s.rstrip(",")
+        if "|" in s:
+            add_fact(s)
+    # De-duplicate while preserving order
+    return _dedupe_preserve_order(candidates)
+def normalize_readmission_kvt4_lines(lines: List[str]) -> tuple[List[str], dict]:
+    """Normalize KVT4 lines into canonical READMISSION_MVP form.
+    Goals:
+    - Boost strict-format usability by deterministic canonicalization
+    - Reduce drift (Blood Pressure -> SBP/DBP, Oxygen Saturation -> SpO2, etc.)
+    - Enforce numeric-only values for VITALS/LABS (+ known numeric fields)
+    - Enforce at most one line per (CLUSTER, Keyword) via timestamp-priority dedupe
+    Returns: (normalized_lines, stats)
+    """
+    def _parse_line(line: str) -> Optional[tuple[str, str, str, str]]:
+        if not isinstance(line, str):
+            return None
+        s = line.strip()
+        if s.count("|") != 3:
+            return None
+        parts = [p.strip() for p in s.split("|")]
+        if len(parts) != 4:
+            return None
+        c, k, v, t = parts
+        if not c or not k or not v:
+            return None
+        return c, k, v, t or "Unknown"
+    def _normalize_timestamp(t: str) -> str:
+        tt = (t or "").strip()
+        if tt in {"Admission", "Discharge", "Past", "Unknown"}:
+            return tt
+        return "Unknown"
+    def _fill_unknown_timestamp(cluster: str, keyword: str, value: str) -> str:
+        """Best-effort timestamp fill for strict-eval stability.
+        Policy is ontology-driven (not note-section heuristics):
+        - DEMOGRAPHICS/VITALS/LABS/SYMPTOMS/MEDICATIONS/PROCEDURES: Admission
+        - DISPOSITION: Discharge
+        - UTILIZATION: Past
+        - PROBLEMS: Past if chronic, Discharge if acute, else Past
+        """
+        c = (cluster or "").strip().upper()
+        v = (value or "").strip().lower()
+        if c == "DISPOSITION":
+            return "Discharge"
+        if c == "UTILIZATION":
+            return "Past"
+        if c == "PROBLEMS":
+            if v == "acute":
+                return "Discharge"
+            if v == "chronic":
+                return "Past"
+            # Default: history-like framing
+            return "Past"
+        if c in {"DEMOGRAPHICS", "VITALS", "LABS", "SYMPTOMS", "MEDICATIONS", "PROCEDURES"}:
+            return "Admission"
+        return "Admission"
+    def _first_number(value: str) -> Optional[str]:
+        m = re.search(r"-?\d+(?:\.\d+)?", value or "")
+        return m.group(0) if m else None
+    # Keyword aliases (strict clusters).
+    vital_alias = {
+        "HR": "Heart Rate",
+        "Pulse": "Heart Rate",
+        "Temp": "Temperature",
+        "O2 Sat": "SpO2",
+        "Oxygen Saturation": "SpO2",
+        "SpO2": "SpO2",
+        "Resp": "Respiratory Rate",
+        "RR": "Respiratory Rate",
+        "Blood Pressure": "Blood Pressure",  # special-case splitter
+        "BP": "Blood Pressure",
+        "Systolic": "Systolic BP",
+        "Diastolic": "Diastolic BP",
+        "SBP": "Systolic BP",
+        "DBP": "Diastolic BP",
+    }
+    lab_alias = {
+        "Hgb": "Hemoglobin",
+        "Hct": "Hematocrit",
+        "Plt": "Platelet",
+        "Platelets": "Platelet",
+        "Na": "Sodium",
+        "K": "Potassium",
+        "Cr": "Creatinine",
+        "HCO3": "Bicarbonate",
+        "Bicarb": "Bicarbonate",
+        "WBC": "WBC",
+        "BUN": "BUN",
+    }
+    sex_alias = {"m": "male", "male": "male", "f": "female", "female": "female"}
+    # Dedupe priority can be configured per mode.
+    # For full readmission feature set we generally care about discharge/most-recent.
+    ts_priority = [s.strip() for s in os.getenv("MEDGEMMA_TIMESTAMP_PRIORITY", "Discharge,Admission,Past,Unknown").split(",") if s.strip()]
+    ts_rank = {t: i for i, t in enumerate(ts_priority)}
+    stats = {
+        "input_lines": len(lines or []),
+        "parsed_kvt4": 0,
+        "dropped_placeholders": 0,
+        "dropped_noncanonical": 0,
+        "dropped_by_allowed_clusters": 0,
+        "expanded_bp": 0,
+        "dedup_dropped": 0,
+        "output_lines": 0,
+        "canonical_keyword_rate_strict": None,
+        "numeric_only_rate_vitals_labs": None,
+        "duplicates_after_dedup": 0,
+    }
+    allowed_clusters_env = os.getenv("MEDGEMMA_ALLOWED_CLUSTERS", "").strip()
+    allowed_clusters = None
+    if allowed_clusters_env:
+        allowed_clusters = {c.strip().upper() for c in allowed_clusters_env.split(",") if c.strip()}
+    # First pass: normalize + expand BP
+    normalized_candidates: List[tuple[str, str, str, str]] = []
+    fill_unknown = os.getenv("MEDGEMMA_TIMESTAMP_FILL_UNKNOWN", "1").strip().lower() in {"1", "true", "yes"}
+    for line in lines or []:
+        parsed = _parse_line(line)
+        if not parsed:
+            continue
+        c, k, v, t = parsed
+        stats["parsed_kvt4"] += 1
+        c_up = str(c).strip().upper()
+        if allowed_clusters is not None and c_up not in allowed_clusters:
+            stats["dropped_by_allowed_clusters"] += 1
+            continue
+        t_norm = _normalize_timestamp(t)
+        k_norm = k.strip()
+        v_norm = v.strip()
+        # Drop obvious placeholders
+        if v_norm in {"___", "__", "_", "N/A", "NA", "null", "None"}:
+            stats["dropped_placeholders"] += 1
+            continue
+        # Cluster-specific normalization
+        if c_up == "DEMOGRAPHICS":
+            if k_norm == "Sex":
+                vv = sex_alias.get(v_norm.strip().lower())
+                if not vv:
+                    stats["dropped_noncanonical"] += 1
+                    continue
+                v_norm = vv
+            elif k_norm == "Age":
+                num = _first_number(v_norm)
+                if not num:
+                    stats["dropped_noncanonical"] += 1
+                    continue
+                v_norm = num
+        elif c_up == "VITALS":
+            k_norm = vital_alias.get(k_norm, k_norm)
+            if k_norm == "Blood Pressure":
+                # Expand 120/80 -> SBP + DBP
+                m = re.search(r"(\d+(?:\.\d+)?)\s*/\s*(\d+(?:\.\d+)?)", v_norm)
+                if not m:
+                    stats["dropped_noncanonical"] += 1
+                    continue
+                sbp, dbp = m.group(1), m.group(2)
+                normalized_candidates.append(("VITALS", "Systolic BP", sbp, t_norm))
+                normalized_candidates.append(("VITALS", "Diastolic BP", dbp, t_norm))
+                stats["expanded_bp"] += 1
+                continue
+            # Enforce numeric-only for vitals
+            num = _first_number(v_norm)
+            if not num:
+                stats["dropped_noncanonical"] += 1
+                continue
+            v_norm = num
+        elif c_up == "LABS":
+            k_norm = lab_alias.get(k_norm, k_norm)
+            num = _first_number(v_norm)
+            if not num:
+                stats["dropped_noncanonical"] += 1
+                continue
+            v_norm = num
+        elif c_up == "UTILIZATION":
+            num = _first_number(v_norm)
+            if not num:
+                stats["dropped_noncanonical"] += 1
+                continue
+            v_norm = num
+        elif c_up == "MEDICATIONS":
+            if k_norm in {"Medication Count", "New Medications Count"}:
+                num = _first_number(v_norm)
+                if not num:
+                    stats["dropped_noncanonical"] += 1
+                    continue
+                v_norm = num
+            elif k_norm in {"Polypharmacy", "Anticoagulation", "Insulin Therapy", "Opioid Therapy", "Diuretic Therapy"}:
+                vv = v_norm.strip().lower()
+                if vv in {"yes", "y", "true", "1"}:
+                    v_norm = "yes"
+                elif vv in {"no", "n", "false", "0"}:
+                    v_norm = "no"
+                else:
+                    stats["dropped_noncanonical"] += 1
+                    continue
+        elif c_up == "PROCEDURES":
+            if k_norm in {"Any Procedure", "Surgery"}:
+                vv = v_norm.strip().lower()
+                if vv in {"yes", "y", "true", "1"}:
+                    v_norm = "yes"
+                elif vv in {"no", "n", "false", "0"}:
+                    v_norm = "no"
+                else:
+                    stats["dropped_noncanonical"] += 1
+                    continue
+            elif k_norm == "Dialysis":
+                vv = v_norm.strip().lower()
+                allowed = {"decided", "started", "done", "cancelled", "no"}
+                if vv not in allowed:
+                    stats["dropped_noncanonical"] += 1
+                    continue
+                v_norm = vv
+            elif k_norm == "Mechanical Ventilation":
+                vv = v_norm.strip().lower()
+                if "no" == vv:
+                    v_norm = "no"
+                else:
+                    num = _first_number(v_norm)
+                    if not num:
+                        stats["dropped_noncanonical"] += 1
+                        continue
+                    v_norm = num
+        elif c_up == "DISPOSITION":
+            if k_norm == "Discharge Disposition":
+                vv = v_norm.strip().lower()
+                # Normalize into the prompt enums.
+                if "home with" in vv or "home w" in vv or "services" in vv:
+                    v_norm = "Home with Services"
+                elif vv == "home" or vv.startswith("home "):
+                    v_norm = "Home"
+                elif "snf" in vv or "skilled nursing" in vv:
+                    v_norm = "SNF"
+                elif "rehab" in vv:
+                    v_norm = "Rehab"
+                elif "ltac" in vv:
+                    v_norm = "LTAC"
+                elif "hospice" in vv:
+                    v_norm = "Hospice"
+                elif "ama" in vv or "against medical advice" in vv:
+                    v_norm = "AMA"
+                else:
+                    stats["dropped_noncanonical"] += 1
+                    continue
+            elif k_norm == "Mental Status":
+                vv = v_norm.strip().lower()
+                if "confus" in vv:
+                    v_norm = "confused"
+                elif "letharg" in vv:
+                    v_norm = "lethargic"
+                elif "alert" in vv:
+                    v_norm = "alert"
+                elif "orient" in vv:
+                    v_norm = "oriented"
+                else:
+                    stats["dropped_noncanonical"] += 1
+                    continue
+        elif c_up == "PROBLEMS":
+            vv = re.sub(r"\s+", " ", v_norm.strip().lower())
+            if vv in {"chronic", "acute", "exist", "not exist"}:
+                v_norm = vv
+            elif vv in {"past", "history", "historical", "pmh", "chronic condition", "chronic disease"}:
+                v_norm = "chronic"
+            elif vv in {"discharge", "discharged", "active", "current"}:
+                v_norm = "acute"
+            elif vv in {"present", "yes", "true", "1", "positive", "confirmed", "exists"}:
+                v_norm = "exist"
+            elif vv in {"no", "none", "false", "0", "absent", "negative", "not present", "ruled out"}:
+                v_norm = "not exist"
+            else:
+                stats["dropped_noncanonical"] += 1
+                continue
+        elif c_up == "SYMPTOMS":
+            vv = re.sub(r"\s+", " ", v_norm.strip().lower())
+            if vv in {"yes", "no", "severe"}:
+                v_norm = vv
+            elif vv in {"present", "positive", "true", "1", "y", "symptomatic"}:
+                v_norm = "yes"
+            elif vv in {"none", "absent", "negative", "false", "0", "n", "denied", "denies"}:
+                v_norm = "no"
+            elif "severe" in vv or vv in {"marked", "significant"}:
+                v_norm = "severe"
+            else:
+                stats["dropped_noncanonical"] += 1
+                continue
+        # Drop non-canonical keywords for strict clusters (objective ones).
+        if c_up in STRICT_KEYWORDS_READMISSION:
+            if k_norm not in STRICT_KEYWORDS_READMISSION[c_up]:
+                stats["dropped_noncanonical"] += 1
+                continue
+        if fill_unknown and t_norm == "Unknown":
+            t_norm = _fill_unknown_timestamp(c_up, k_norm, v_norm)
+        normalized_candidates.append((c_up, k_norm, v_norm, t_norm))
+    # Second pass: dedupe by (CLUSTER, Keyword) using timestamp priority.
+    best: dict[tuple[str, str], tuple[str, str, str, str]] = {}
+    for c, k, v, t in normalized_candidates:
+        key = (c, k)
+        cur = best.get(key)
+        if cur is None:
+            best[key] = (c, k, v, t)
+            continue
+        _, _, _, t_prev = cur
+        r_new = ts_rank.get(t, 999)
+        r_prev = ts_rank.get(t_prev, 999)
+        if r_new < r_prev:
+            best[key] = (c, k, v, t)
+        else:
+            stats["dedup_dropped"] += 1
+    out_lines = [f"{c}|{k}|{v}|{t}" for (c, k), (c, k, v, t) in best.items()]
+    out_lines.sort(key=lambda s: (s.split("|")[0], s.split("|")[1]))
+    # Metrics: canonical + numeric-only compliance for VITALS/LABS.
+    strict_total = 0
+    strict_ok = 0
+    vitlab_total = 0
+    vitlab_numeric = 0
+    key_counts: dict[tuple[str, str], int] = {}
+    for ln in out_lines:
+        parsed = _parse_line(ln)
+        if not parsed:
+            continue
+        c, k, v, _t = parsed
+        key_counts[(c, k)] = key_counts.get((c, k), 0) + 1
+        if c in STRICT_KEYWORDS_READMISSION:
+            strict_total += 1
+            if k in STRICT_KEYWORDS_READMISSION[c]:
+                strict_ok += 1
+        if c in {"VITALS", "LABS"}:
+            vitlab_total += 1
+            if re.fullmatch(r"-?\d+(?:\.\d+)?", v.strip()):
+                vitlab_numeric += 1
+    stats["duplicates_after_dedup"] = sum(1 for cnt in key_counts.values() if cnt > 1)
+    stats["output_lines"] = len(out_lines)
+    stats["canonical_keyword_rate_strict"] = (strict_ok / strict_total) if strict_total else 1.0
+    stats["numeric_only_rate_vitals_labs"] = (vitlab_numeric / vitlab_total) if vitlab_total else 1.0
+    return out_lines, stats
+def _normalize_mode(mode: Optional[str]) -> str:
+    """Normalize mode string to canonical format."""
+    if not mode:
+        return "READMISSION_DISCHARGE"
+    mode = mode.upper().replace("-", "_")
+    if mode in {"CCDE", "CCDE_ADMISSION"}:
+        return "CCDE_ADMISSION"
+    elif mode in {"TABULAR", "READMISSION_TABULAR", "MVP_TABULAR", "TOON_TABULAR"}:
+        return "READMISSION_TABULAR"
+    elif mode in {"STRUCTURED", "READMISSION_STRUCTURED", "PYDANTIC", "STRUCTURED_OUTPUT"}:
+        return "READMISSION_STRUCTURED"
+    else:
+        return "READMISSION_DISCHARGE"
+# =============================================================================
+# CUSTOM DSPy ADAPTER FOR MEDGEMMA (Local Transformers)
+# =============================================================================

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ gradio>=4.44,<6
2	+ pandas>=2.0,<3

scripts/run_two_stage_structured_pipeline.py ADDED Viewed

The diff for this file is too large to render. See raw diff