DocUA commited on
Commit
7a12bf7
·
0 Parent(s):

Initial StructCore Space

Browse files
Analysis_Readmission/config/scoring_rules.json ADDED
@@ -0,0 +1,1462 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_meta": {
3
+ "version": "1.0",
4
+ "description": "Complete scoring rules for all 9 ONTOLOGY clusters. Each cluster has range-based or categorical scoring with evidence-based weights.",
5
+ "max_theoretical_scores": {
6
+ "DEMOGRAPHICS": 10,
7
+ "VITALS": 25,
8
+ "LABS": 30,
9
+ "PROBLEMS": 40,
10
+ "SYMPTOMS": 15,
11
+ "MEDICATIONS": 15,
12
+ "PROCEDURES": 15,
13
+ "UTILIZATION": 20,
14
+ "DISPOSITION": 15,
15
+ "INTERACTIONS": 30,
16
+ "TOTAL_THEORETICAL_MAX": 215
17
+ },
18
+ "calibration": {
19
+ "description": "Logistic transform: P = 1 / (1 + exp(-(alpha + beta * score)))",
20
+ "alpha": -2.3475,
21
+ "beta": 0.017,
22
+ "baseline_readmission_rate": 0.2069,
23
+ "notes": "Calibrated on 203 MIMIC-IV admissions (8 clusters, FHIR labels, SNOMED v2). AUC=0.5555."
24
+ },
25
+ "risk_categories": [
26
+ {
27
+ "name": "Low",
28
+ "score_min": 0,
29
+ "score_max": 19,
30
+ "probability_range": "5-12%",
31
+ "color": "green"
32
+ },
33
+ {
34
+ "name": "Medium",
35
+ "score_min": 20,
36
+ "score_max": 39,
37
+ "probability_range": "13-28%",
38
+ "color": "yellow"
39
+ },
40
+ {
41
+ "name": "High",
42
+ "score_min": 40,
43
+ "score_max": 59,
44
+ "probability_range": "29-53%",
45
+ "color": "orange"
46
+ },
47
+ {
48
+ "name": "Critical",
49
+ "score_min": 60,
50
+ "score_max": 999,
51
+ "probability_range": "54%+",
52
+ "color": "red"
53
+ }
54
+ ]
55
+ },
56
+ "DEMOGRAPHICS": {
57
+ "max_score": 10,
58
+ "keywords": {
59
+ "Age": {
60
+ "type": "range",
61
+ "ranges": [
62
+ {
63
+ "min": 0,
64
+ "max": 39,
65
+ "score": 0,
66
+ "label": "Young adult"
67
+ },
68
+ {
69
+ "min": 40,
70
+ "max": 54,
71
+ "score": 1,
72
+ "label": "Middle age"
73
+ },
74
+ {
75
+ "min": 55,
76
+ "max": 64,
77
+ "score": 2,
78
+ "label": "Pre-elderly"
79
+ },
80
+ {
81
+ "min": 65,
82
+ "max": 74,
83
+ "score": 4,
84
+ "label": "Young elderly"
85
+ },
86
+ {
87
+ "min": 75,
88
+ "max": 84,
89
+ "score": 6,
90
+ "label": "Old elderly"
91
+ },
92
+ {
93
+ "min": 85,
94
+ "max": 999,
95
+ "score": 8,
96
+ "label": "Very old"
97
+ }
98
+ ],
99
+ "missing_score": 2,
100
+ "evidence": "Age >65 is consistently associated with higher readmission (OR 1.3-1.8)"
101
+ },
102
+ "Sex": {
103
+ "type": "categorical",
104
+ "values": {
105
+ "male": 1,
106
+ "female": 0
107
+ },
108
+ "missing_score": 0,
109
+ "evidence": "Male sex associated with slightly higher 30-day readmission (OR ~1.1)"
110
+ }
111
+ }
112
+ },
113
+ "VITALS": {
114
+ "max_score": 25,
115
+ "keywords": {
116
+ "Heart Rate": {
117
+ "type": "range",
118
+ "unit": "bpm",
119
+ "ranges": [
120
+ {
121
+ "min": 0,
122
+ "max": 49,
123
+ "score": 3,
124
+ "label": "Severe bradycardia"
125
+ },
126
+ {
127
+ "min": 50,
128
+ "max": 59,
129
+ "score": 1,
130
+ "label": "Mild bradycardia"
131
+ },
132
+ {
133
+ "min": 60,
134
+ "max": 100,
135
+ "score": 0,
136
+ "label": "Normal"
137
+ },
138
+ {
139
+ "min": 101,
140
+ "max": 110,
141
+ "score": 1,
142
+ "label": "Mild tachycardia"
143
+ },
144
+ {
145
+ "min": 111,
146
+ "max": 130,
147
+ "score": 3,
148
+ "label": "Tachycardia"
149
+ },
150
+ {
151
+ "min": 131,
152
+ "max": 999,
153
+ "score": 5,
154
+ "label": "Severe tachycardia"
155
+ }
156
+ ],
157
+ "plausibility": {
158
+ "min": 30,
159
+ "max": 220
160
+ }
161
+ },
162
+ "Systolic BP": {
163
+ "type": "range",
164
+ "unit": "mmHg",
165
+ "ranges": [
166
+ {
167
+ "min": 0,
168
+ "max": 89,
169
+ "score": 5,
170
+ "label": "Hypotension"
171
+ },
172
+ {
173
+ "min": 90,
174
+ "max": 99,
175
+ "score": 3,
176
+ "label": "Borderline low"
177
+ },
178
+ {
179
+ "min": 100,
180
+ "max": 139,
181
+ "score": 0,
182
+ "label": "Normal"
183
+ },
184
+ {
185
+ "min": 140,
186
+ "max": 159,
187
+ "score": 1,
188
+ "label": "Stage 1 HTN"
189
+ },
190
+ {
191
+ "min": 160,
192
+ "max": 179,
193
+ "score": 2,
194
+ "label": "Stage 2 HTN"
195
+ },
196
+ {
197
+ "min": 180,
198
+ "max": 999,
199
+ "score": 4,
200
+ "label": "Hypertensive urgency"
201
+ }
202
+ ],
203
+ "plausibility": {
204
+ "min": 50,
205
+ "max": 260
206
+ }
207
+ },
208
+ "Diastolic BP": {
209
+ "type": "range",
210
+ "unit": "mmHg",
211
+ "ranges": [
212
+ {
213
+ "min": 0,
214
+ "max": 59,
215
+ "score": 2,
216
+ "label": "Low diastolic"
217
+ },
218
+ {
219
+ "min": 60,
220
+ "max": 89,
221
+ "score": 0,
222
+ "label": "Normal"
223
+ },
224
+ {
225
+ "min": 90,
226
+ "max": 99,
227
+ "score": 1,
228
+ "label": "Elevated"
229
+ },
230
+ {
231
+ "min": 100,
232
+ "max": 999,
233
+ "score": 3,
234
+ "label": "High diastolic"
235
+ }
236
+ ],
237
+ "plausibility": {
238
+ "min": 20,
239
+ "max": 160
240
+ }
241
+ },
242
+ "Respiratory Rate": {
243
+ "type": "range",
244
+ "unit": "breaths/min",
245
+ "ranges": [
246
+ {
247
+ "min": 0,
248
+ "max": 9,
249
+ "score": 4,
250
+ "label": "Bradypnea"
251
+ },
252
+ {
253
+ "min": 10,
254
+ "max": 11,
255
+ "score": 2,
256
+ "label": "Low normal"
257
+ },
258
+ {
259
+ "min": 12,
260
+ "max": 20,
261
+ "score": 0,
262
+ "label": "Normal"
263
+ },
264
+ {
265
+ "min": 21,
266
+ "max": 24,
267
+ "score": 2,
268
+ "label": "Mild tachypnea"
269
+ },
270
+ {
271
+ "min": 25,
272
+ "max": 30,
273
+ "score": 4,
274
+ "label": "Tachypnea"
275
+ },
276
+ {
277
+ "min": 31,
278
+ "max": 999,
279
+ "score": 6,
280
+ "label": "Severe tachypnea"
281
+ }
282
+ ],
283
+ "plausibility": {
284
+ "min": 5,
285
+ "max": 60
286
+ }
287
+ },
288
+ "Temperature": {
289
+ "type": "range",
290
+ "unit": "°F",
291
+ "ranges": [
292
+ {
293
+ "min": 0,
294
+ "max": 96.7,
295
+ "score": 3,
296
+ "label": "Hypothermia"
297
+ },
298
+ {
299
+ "min": 96.8,
300
+ "max": 99.5,
301
+ "score": 0,
302
+ "label": "Normal"
303
+ },
304
+ {
305
+ "min": 99.6,
306
+ "max": 100.3,
307
+ "score": 1,
308
+ "label": "Low-grade fever"
309
+ },
310
+ {
311
+ "min": 100.4,
312
+ "max": 101.9,
313
+ "score": 2,
314
+ "label": "Fever"
315
+ },
316
+ {
317
+ "min": 102.0,
318
+ "max": 999,
319
+ "score": 4,
320
+ "label": "High fever"
321
+ }
322
+ ],
323
+ "plausibility": {
324
+ "min": 90,
325
+ "max": 110
326
+ }
327
+ },
328
+ "SpO2": {
329
+ "type": "range",
330
+ "unit": "%",
331
+ "ranges": [
332
+ {
333
+ "min": 95,
334
+ "max": 100,
335
+ "score": 0,
336
+ "label": "Normal"
337
+ },
338
+ {
339
+ "min": 92,
340
+ "max": 94,
341
+ "score": 2,
342
+ "label": "Mild hypoxia"
343
+ },
344
+ {
345
+ "min": 88,
346
+ "max": 91,
347
+ "score": 4,
348
+ "label": "Moderate hypoxia"
349
+ },
350
+ {
351
+ "min": 0,
352
+ "max": 87,
353
+ "score": 6,
354
+ "label": "Severe hypoxia"
355
+ }
356
+ ],
357
+ "plausibility": {
358
+ "min": 50,
359
+ "max": 100
360
+ }
361
+ },
362
+ "Weight": {
363
+ "type": "no_direct_score",
364
+ "note": "Weight alone does not score, but used in interaction patterns (e.g., BMI, fluid overload)",
365
+ "plausibility": {
366
+ "min": 20,
367
+ "max": 300
368
+ }
369
+ }
370
+ }
371
+ },
372
+ "LABS": {
373
+ "max_score": 30,
374
+ "keywords": {
375
+ "Hemoglobin": {
376
+ "type": "range",
377
+ "unit": "g/dL",
378
+ "ranges": [
379
+ {
380
+ "min": 0,
381
+ "max": 6.9,
382
+ "score": 6,
383
+ "label": "Critical anemia"
384
+ },
385
+ {
386
+ "min": 7.0,
387
+ "max": 9.9,
388
+ "score": 4,
389
+ "label": "Moderate anemia"
390
+ },
391
+ {
392
+ "min": 10.0,
393
+ "max": 11.9,
394
+ "score": 2,
395
+ "label": "Mild anemia"
396
+ },
397
+ {
398
+ "min": 12.0,
399
+ "max": 17.0,
400
+ "score": 0,
401
+ "label": "Normal"
402
+ },
403
+ {
404
+ "min": 17.1,
405
+ "max": 999,
406
+ "score": 2,
407
+ "label": "Polycythemia"
408
+ }
409
+ ],
410
+ "plausibility": {
411
+ "min": 2,
412
+ "max": 25
413
+ }
414
+ },
415
+ "Hematocrit": {
416
+ "type": "range",
417
+ "unit": "%",
418
+ "ranges": [
419
+ {
420
+ "min": 0,
421
+ "max": 20.9,
422
+ "score": 5,
423
+ "label": "Critical low"
424
+ },
425
+ {
426
+ "min": 21.0,
427
+ "max": 29.9,
428
+ "score": 3,
429
+ "label": "Low"
430
+ },
431
+ {
432
+ "min": 30.0,
433
+ "max": 35.9,
434
+ "score": 1,
435
+ "label": "Mildly low"
436
+ },
437
+ {
438
+ "min": 36.0,
439
+ "max": 45.0,
440
+ "score": 0,
441
+ "label": "Normal"
442
+ },
443
+ {
444
+ "min": 45.1,
445
+ "max": 999,
446
+ "score": 1,
447
+ "label": "Elevated"
448
+ }
449
+ ],
450
+ "plausibility": {
451
+ "min": 5,
452
+ "max": 70
453
+ }
454
+ },
455
+ "WBC": {
456
+ "type": "range",
457
+ "unit": "K/uL",
458
+ "ranges": [
459
+ {
460
+ "min": 0,
461
+ "max": 3.9,
462
+ "score": 2,
463
+ "label": "Leukopenia"
464
+ },
465
+ {
466
+ "min": 4.0,
467
+ "max": 11.0,
468
+ "score": 0,
469
+ "label": "Normal"
470
+ },
471
+ {
472
+ "min": 11.1,
473
+ "max": 15.0,
474
+ "score": 1,
475
+ "label": "Mild leukocytosis"
476
+ },
477
+ {
478
+ "min": 15.1,
479
+ "max": 20.0,
480
+ "score": 3,
481
+ "label": "Moderate leukocytosis"
482
+ },
483
+ {
484
+ "min": 20.1,
485
+ "max": 999,
486
+ "score": 5,
487
+ "label": "Severe leukocytosis"
488
+ }
489
+ ],
490
+ "plausibility": {
491
+ "min": 0.1,
492
+ "max": 200
493
+ }
494
+ },
495
+ "Platelet": {
496
+ "type": "range",
497
+ "unit": "K/uL",
498
+ "ranges": [
499
+ {
500
+ "min": 0,
501
+ "max": 49,
502
+ "score": 5,
503
+ "label": "Severe thrombocytopenia"
504
+ },
505
+ {
506
+ "min": 50,
507
+ "max": 99,
508
+ "score": 3,
509
+ "label": "Moderate thrombocytopenia"
510
+ },
511
+ {
512
+ "min": 100,
513
+ "max": 149,
514
+ "score": 1,
515
+ "label": "Mild thrombocytopenia"
516
+ },
517
+ {
518
+ "min": 150,
519
+ "max": 400,
520
+ "score": 0,
521
+ "label": "Normal"
522
+ },
523
+ {
524
+ "min": 401,
525
+ "max": 999,
526
+ "score": 2,
527
+ "label": "Thrombocytosis"
528
+ }
529
+ ],
530
+ "plausibility": {
531
+ "min": 1,
532
+ "max": 2000
533
+ }
534
+ },
535
+ "Sodium": {
536
+ "type": "range",
537
+ "unit": "mEq/L",
538
+ "ranges": [
539
+ {
540
+ "min": 0,
541
+ "max": 129,
542
+ "score": 5,
543
+ "label": "Severe hyponatremia"
544
+ },
545
+ {
546
+ "min": 130,
547
+ "max": 134,
548
+ "score": 3,
549
+ "label": "Mild hyponatremia"
550
+ },
551
+ {
552
+ "min": 135,
553
+ "max": 145,
554
+ "score": 0,
555
+ "label": "Normal"
556
+ },
557
+ {
558
+ "min": 146,
559
+ "max": 150,
560
+ "score": 2,
561
+ "label": "Mild hypernatremia"
562
+ },
563
+ {
564
+ "min": 151,
565
+ "max": 999,
566
+ "score": 4,
567
+ "label": "Severe hypernatremia"
568
+ }
569
+ ],
570
+ "plausibility": {
571
+ "min": 100,
572
+ "max": 180
573
+ }
574
+ },
575
+ "Potassium": {
576
+ "type": "range",
577
+ "unit": "mEq/L",
578
+ "ranges": [
579
+ {
580
+ "min": 0,
581
+ "max": 2.9,
582
+ "score": 5,
583
+ "label": "Severe hypokalemia"
584
+ },
585
+ {
586
+ "min": 3.0,
587
+ "max": 3.4,
588
+ "score": 2,
589
+ "label": "Mild hypokalemia"
590
+ },
591
+ {
592
+ "min": 3.5,
593
+ "max": 5.0,
594
+ "score": 0,
595
+ "label": "Normal"
596
+ },
597
+ {
598
+ "min": 5.1,
599
+ "max": 5.5,
600
+ "score": 2,
601
+ "label": "Mild hyperkalemia"
602
+ },
603
+ {
604
+ "min": 5.6,
605
+ "max": 6.0,
606
+ "score": 4,
607
+ "label": "Moderate hyperkalemia"
608
+ },
609
+ {
610
+ "min": 6.1,
611
+ "max": 999,
612
+ "score": 6,
613
+ "label": "Severe hyperkalemia"
614
+ }
615
+ ],
616
+ "plausibility": {
617
+ "min": 1.5,
618
+ "max": 8.0
619
+ }
620
+ },
621
+ "Creatinine": {
622
+ "type": "range",
623
+ "unit": "mg/dL",
624
+ "ranges": [
625
+ {
626
+ "min": 0,
627
+ "max": 1.2,
628
+ "score": 0,
629
+ "label": "Normal"
630
+ },
631
+ {
632
+ "min": 1.3,
633
+ "max": 1.9,
634
+ "score": 2,
635
+ "label": "Mildly elevated"
636
+ },
637
+ {
638
+ "min": 2.0,
639
+ "max": 3.0,
640
+ "score": 4,
641
+ "label": "Moderate renal impairment"
642
+ },
643
+ {
644
+ "min": 3.1,
645
+ "max": 999,
646
+ "score": 6,
647
+ "label": "Severe renal impairment"
648
+ }
649
+ ],
650
+ "plausibility": {
651
+ "min": 0.1,
652
+ "max": 20
653
+ }
654
+ },
655
+ "BUN": {
656
+ "type": "range",
657
+ "unit": "mg/dL",
658
+ "ranges": [
659
+ {
660
+ "min": 0,
661
+ "max": 6,
662
+ "score": 1,
663
+ "label": "Low (malnutrition?)"
664
+ },
665
+ {
666
+ "min": 7,
667
+ "max": 20,
668
+ "score": 0,
669
+ "label": "Normal"
670
+ },
671
+ {
672
+ "min": 21,
673
+ "max": 40,
674
+ "score": 2,
675
+ "label": "Mildly elevated"
676
+ },
677
+ {
678
+ "min": 41,
679
+ "max": 60,
680
+ "score": 4,
681
+ "label": "Moderate azotemia"
682
+ },
683
+ {
684
+ "min": 61,
685
+ "max": 999,
686
+ "score": 6,
687
+ "label": "Severe azotemia"
688
+ }
689
+ ],
690
+ "plausibility": {
691
+ "min": 1,
692
+ "max": 200
693
+ }
694
+ },
695
+ "Glucose": {
696
+ "type": "range",
697
+ "unit": "mg/dL",
698
+ "ranges": [
699
+ {
700
+ "min": 0,
701
+ "max": 69,
702
+ "score": 4,
703
+ "label": "Hypoglycemia"
704
+ },
705
+ {
706
+ "min": 70,
707
+ "max": 140,
708
+ "score": 0,
709
+ "label": "Normal"
710
+ },
711
+ {
712
+ "min": 141,
713
+ "max": 200,
714
+ "score": 1,
715
+ "label": "Mild hyperglycemia"
716
+ },
717
+ {
718
+ "min": 201,
719
+ "max": 300,
720
+ "score": 2,
721
+ "label": "Moderate hyperglycemia"
722
+ },
723
+ {
724
+ "min": 301,
725
+ "max": 999,
726
+ "score": 4,
727
+ "label": "Severe hyperglycemia"
728
+ }
729
+ ],
730
+ "plausibility": {
731
+ "min": 20,
732
+ "max": 1000
733
+ }
734
+ },
735
+ "Bicarbonate": {
736
+ "type": "range",
737
+ "unit": "mEq/L",
738
+ "ranges": [
739
+ {
740
+ "min": 0,
741
+ "max": 17,
742
+ "score": 4,
743
+ "label": "Severe acidosis"
744
+ },
745
+ {
746
+ "min": 18,
747
+ "max": 21,
748
+ "score": 2,
749
+ "label": "Mild acidosis"
750
+ },
751
+ {
752
+ "min": 22,
753
+ "max": 28,
754
+ "score": 0,
755
+ "label": "Normal"
756
+ },
757
+ {
758
+ "min": 29,
759
+ "max": 32,
760
+ "score": 1,
761
+ "label": "Mild alkalosis"
762
+ },
763
+ {
764
+ "min": 33,
765
+ "max": 999,
766
+ "score": 3,
767
+ "label": "Severe alkalosis"
768
+ }
769
+ ],
770
+ "plausibility": {
771
+ "min": 5,
772
+ "max": 50
773
+ }
774
+ }
775
+ }
776
+ },
777
+ "PROBLEMS": {
778
+ "max_score": 40,
779
+ "scoring_method": "snomed_group_weighted",
780
+ "config": {
781
+ "group_mapping_file": "snomed_problem_groups.json",
782
+ "multimorbidity_bonus": {
783
+ "description": "For each active problem group beyond the 3rd, add +1 (capped at +5)",
784
+ "threshold": 3,
785
+ "per_extra_group": 1,
786
+ "cap": 5
787
+ },
788
+ "value_filter": {
789
+ "include_values": [
790
+ "chronic",
791
+ "acute",
792
+ "exist"
793
+ ],
794
+ "exclude_values": [
795
+ "not exist"
796
+ ]
797
+ },
798
+ "score_cap": 40
799
+ }
800
+ },
801
+ "SYMPTOMS": {
802
+ "max_score": 15,
803
+ "scoring_method": "urgency_group_weighted",
804
+ "config": {
805
+ "group_mapping_file": "symptom_urgency_groups.json",
806
+ "severity_multiplier": {
807
+ "severe": 1.5,
808
+ "yes": 1.0,
809
+ "no": 0.0
810
+ },
811
+ "active_symptom_count_bonus": {
812
+ "description": "Bonus for having many active symptoms at once",
813
+ "threshold": 3,
814
+ "bonus": 2
815
+ },
816
+ "score_cap": 15
817
+ }
818
+ },
819
+ "MEDICATIONS": {
820
+ "max_score": 15,
821
+ "keywords": {
822
+ "Medication Count": {
823
+ "type": "range",
824
+ "ranges": [
825
+ {
826
+ "min": 0,
827
+ "max": 4,
828
+ "score": 0,
829
+ "label": "Low"
830
+ },
831
+ {
832
+ "min": 5,
833
+ "max": 9,
834
+ "score": 1,
835
+ "label": "Moderate"
836
+ },
837
+ {
838
+ "min": 10,
839
+ "max": 14,
840
+ "score": 2,
841
+ "label": "High"
842
+ },
843
+ {
844
+ "min": 15,
845
+ "max": 999,
846
+ "score": 4,
847
+ "label": "Very high (polypharmacy)"
848
+ }
849
+ ]
850
+ },
851
+ "New Medications Count": {
852
+ "type": "range",
853
+ "ranges": [
854
+ {
855
+ "min": 0,
856
+ "max": 1,
857
+ "score": 0,
858
+ "label": "Minimal"
859
+ },
860
+ {
861
+ "min": 2,
862
+ "max": 2,
863
+ "score": 1,
864
+ "label": "Moderate"
865
+ },
866
+ {
867
+ "min": 3,
868
+ "max": 4,
869
+ "score": 2,
870
+ "label": "High (adherence risk)"
871
+ },
872
+ {
873
+ "min": 5,
874
+ "max": 999,
875
+ "score": 4,
876
+ "label": "Very high (adherence crisis)"
877
+ }
878
+ ]
879
+ },
880
+ "Polypharmacy": {
881
+ "type": "categorical",
882
+ "values": {
883
+ "yes": 3,
884
+ "no": 0
885
+ },
886
+ "evidence": "Polypharmacy (>=5 meds) associated with 1.5-2x readmission OR"
887
+ },
888
+ "Anticoagulation": {
889
+ "type": "categorical",
890
+ "values": {
891
+ "yes": 2,
892
+ "no": 0
893
+ },
894
+ "evidence": "Anticoagulation = bleeding risk + INR management complexity"
895
+ },
896
+ "Insulin Therapy": {
897
+ "type": "categorical",
898
+ "values": {
899
+ "yes": 2,
900
+ "no": 0
901
+ },
902
+ "evidence": "Insulin management at home = hypoglycemia risk"
903
+ },
904
+ "Opioid Therapy": {
905
+ "type": "categorical",
906
+ "values": {
907
+ "yes": 3,
908
+ "no": 0
909
+ },
910
+ "evidence": "Opioid use associated with falls, constipation, respiratory depression"
911
+ },
912
+ "Diuretic Therapy": {
913
+ "type": "categorical",
914
+ "values": {
915
+ "yes": 1,
916
+ "no": 0
917
+ },
918
+ "evidence": "Diuretics = electrolyte monitoring, volume management"
919
+ }
920
+ }
921
+ },
922
+ "PROCEDURES": {
923
+ "max_score": 15,
924
+ "keywords": {
925
+ "Any Procedure": {
926
+ "type": "categorical",
927
+ "values": {
928
+ "yes": 2,
929
+ "no": 0
930
+ },
931
+ "note": "Only scored if no specific procedure flags are set"
932
+ },
933
+ "Surgery": {
934
+ "type": "categorical",
935
+ "values": {
936
+ "yes": 4,
937
+ "no": 0
938
+ },
939
+ "evidence": "Surgical patients have 15-20% higher readmission rate"
940
+ },
941
+ "Dialysis": {
942
+ "type": "categorical",
943
+ "values": {
944
+ "started": 5,
945
+ "done": 5,
946
+ "decided": 3,
947
+ "cancelled": 1,
948
+ "no": 0
949
+ },
950
+ "evidence": "New dialysis initiation associated with very high readmission (OR ~2.5)"
951
+ },
952
+ "Mechanical Ventilation": {
953
+ "type": "mixed",
954
+ "categorical_values": {
955
+ "no": 0
956
+ },
957
+ "numeric_rule": "Any numeric value > 0 scores 5 points (prolonged ventilation = ICU-level complexity)",
958
+ "score_if_any_positive": 5,
959
+ "evidence": "Mechanical ventilation = post-ICU syndrome, deconditioning, respiratory fragility"
960
+ }
961
+ }
962
+ },
963
+ "UTILIZATION": {
964
+ "max_score": 20,
965
+ "keywords": {
966
+ "Prior Admissions 12mo": {
967
+ "type": "range",
968
+ "ranges": [
969
+ {
970
+ "min": 0,
971
+ "max": 0,
972
+ "score": 0,
973
+ "label": "No prior"
974
+ },
975
+ {
976
+ "min": 1,
977
+ "max": 1,
978
+ "score": 3,
979
+ "label": "One prior"
980
+ },
981
+ {
982
+ "min": 2,
983
+ "max": 3,
984
+ "score": 6,
985
+ "label": "Frequent"
986
+ },
987
+ {
988
+ "min": 4,
989
+ "max": 999,
990
+ "score": 10,
991
+ "label": "Super-utilizer"
992
+ }
993
+ ],
994
+ "evidence": "Prior admissions is the single strongest predictor (OR 2.0-3.5)"
995
+ },
996
+ "ED Visits 6mo": {
997
+ "type": "range",
998
+ "ranges": [
999
+ {
1000
+ "min": 0,
1001
+ "max": 0,
1002
+ "score": 0,
1003
+ "label": "None"
1004
+ },
1005
+ {
1006
+ "min": 1,
1007
+ "max": 1,
1008
+ "score": 2,
1009
+ "label": "One visit"
1010
+ },
1011
+ {
1012
+ "min": 2,
1013
+ "max": 3,
1014
+ "score": 4,
1015
+ "label": "Multiple"
1016
+ },
1017
+ {
1018
+ "min": 4,
1019
+ "max": 999,
1020
+ "score": 6,
1021
+ "label": "Frequent ED user"
1022
+ }
1023
+ ]
1024
+ },
1025
+ "Days Since Last Admission": {
1026
+ "type": "range",
1027
+ "ranges": [
1028
+ {
1029
+ "min": 0,
1030
+ "max": 30,
1031
+ "score": 4,
1032
+ "label": "Very recent (<30d)"
1033
+ },
1034
+ {
1035
+ "min": 31,
1036
+ "max": 90,
1037
+ "score": 2,
1038
+ "label": "Recent (31-90d)"
1039
+ },
1040
+ {
1041
+ "min": 91,
1042
+ "max": 365,
1043
+ "score": 1,
1044
+ "label": "Within year"
1045
+ },
1046
+ {
1047
+ "min": 366,
1048
+ "max": 999999,
1049
+ "score": 0,
1050
+ "label": "Not recent"
1051
+ }
1052
+ ]
1053
+ },
1054
+ "Current Length of Stay": {
1055
+ "type": "range",
1056
+ "ranges": [
1057
+ {
1058
+ "min": 0,
1059
+ "max": 2,
1060
+ "score": 2,
1061
+ "label": "Very short (possible premature discharge)"
1062
+ },
1063
+ {
1064
+ "min": 3,
1065
+ "max": 6,
1066
+ "score": 0,
1067
+ "label": "Typical"
1068
+ },
1069
+ {
1070
+ "min": 7,
1071
+ "max": 13,
1072
+ "score": 2,
1073
+ "label": "Extended"
1074
+ },
1075
+ {
1076
+ "min": 14,
1077
+ "max": 999,
1078
+ "score": 5,
1079
+ "label": "Prolonged (complex case)"
1080
+ }
1081
+ ],
1082
+ "evidence": "Both very short and very long LOS associated with higher readmission"
1083
+ }
1084
+ }
1085
+ },
1086
+ "DISPOSITION": {
1087
+ "max_score": 15,
1088
+ "keywords": {
1089
+ "Discharge Disposition": {
1090
+ "type": "categorical",
1091
+ "values": {
1092
+ "Home": 0,
1093
+ "Home with Services": 2,
1094
+ "Rehab": 4,
1095
+ "SNF": 5,
1096
+ "LTAC": 6,
1097
+ "Hospice": 7,
1098
+ "AMA": 8
1099
+ },
1100
+ "evidence": "AMA discharge has highest 30d readmission (OR ~3.0); SNF/LTAC also elevated"
1101
+ },
1102
+ "Mental Status": {
1103
+ "type": "categorical",
1104
+ "values": {
1105
+ "alert": 0,
1106
+ "oriented": 0,
1107
+ "confused": 4,
1108
+ "lethargic": 6
1109
+ },
1110
+ "evidence": "Altered mental status at discharge = post-delirium syndrome, medication errors, fall risk"
1111
+ }
1112
+ }
1113
+ },
1114
+ "INTERACTIONS": {
1115
+ "description": "Cross-cluster clinical pattern detection. Bonus points when synergistic patterns are present.",
1116
+ "patterns": [
1117
+ {
1118
+ "id": "sepsis_pattern",
1119
+ "name": "Sepsis / SIRS Pattern",
1120
+ "bonus": 10,
1121
+ "conditions": {
1122
+ "require_all": [
1123
+ {
1124
+ "cluster": "VITALS",
1125
+ "keyword": "Heart Rate",
1126
+ "operator": ">",
1127
+ "value": 100
1128
+ }
1129
+ ],
1130
+ "require_any_of": [
1131
+ {
1132
+ "cluster": "VITALS",
1133
+ "keyword": "Systolic BP",
1134
+ "operator": "<",
1135
+ "value": 100
1136
+ },
1137
+ {
1138
+ "cluster": "VITALS",
1139
+ "keyword": "Respiratory Rate",
1140
+ "operator": ">",
1141
+ "value": 22
1142
+ }
1143
+ ],
1144
+ "require_any_of_2": [
1145
+ {
1146
+ "cluster": "LABS",
1147
+ "keyword": "WBC",
1148
+ "operator": ">",
1149
+ "value": 12
1150
+ },
1151
+ {
1152
+ "cluster": "LABS",
1153
+ "keyword": "WBC",
1154
+ "operator": "<",
1155
+ "value": 4
1156
+ },
1157
+ {
1158
+ "cluster": "VITALS",
1159
+ "keyword": "Temperature",
1160
+ "operator": ">",
1161
+ "value": 100.4
1162
+ }
1163
+ ]
1164
+ },
1165
+ "evidence": "qSOFA + SIRS criteria. Sepsis at discharge = very high readmission risk."
1166
+ },
1167
+ {
1168
+ "id": "aki_pattern",
1169
+ "name": "Acute Kidney Injury Pattern",
1170
+ "bonus": 8,
1171
+ "conditions": {
1172
+ "require_all": [
1173
+ {
1174
+ "cluster": "LABS",
1175
+ "keyword": "Creatinine",
1176
+ "operator": ">",
1177
+ "value": 1.5
1178
+ },
1179
+ {
1180
+ "cluster": "LABS",
1181
+ "keyword": "BUN",
1182
+ "operator": ">",
1183
+ "value": 30
1184
+ }
1185
+ ],
1186
+ "require_any_of": [
1187
+ {
1188
+ "cluster": "LABS",
1189
+ "keyword": "Potassium",
1190
+ "operator": ">",
1191
+ "value": 5.0
1192
+ },
1193
+ {
1194
+ "cluster": "LABS",
1195
+ "keyword": "Sodium",
1196
+ "operator": "<",
1197
+ "value": 135
1198
+ },
1199
+ {
1200
+ "cluster": "LABS",
1201
+ "keyword": "Bicarbonate",
1202
+ "operator": "<",
1203
+ "value": 22
1204
+ }
1205
+ ]
1206
+ },
1207
+ "evidence": "AKI with electrolyte derangement = unstable renal function, readmission OR ~2.0"
1208
+ },
1209
+ {
1210
+ "id": "decompensated_hf",
1211
+ "name": "Decompensated Heart Failure Pattern",
1212
+ "bonus": 8,
1213
+ "conditions": {
1214
+ "require_problem_group": "heart_failure",
1215
+ "require_any_of": [
1216
+ {
1217
+ "cluster": "SYMPTOMS",
1218
+ "keyword_group": "edema_fluid"
1219
+ },
1220
+ {
1221
+ "cluster": "SYMPTOMS",
1222
+ "keyword_group": "respiratory_distress"
1223
+ },
1224
+ {
1225
+ "cluster": "LABS",
1226
+ "keyword": "BUN",
1227
+ "operator": ">",
1228
+ "value": 40
1229
+ }
1230
+ ]
1231
+ },
1232
+ "evidence": "CHF + fluid overload/dyspnea = decompensation, 25-30% 30d readmission"
1233
+ },
1234
+ {
1235
+ "id": "frailty_syndrome",
1236
+ "name": "Frailty Syndrome",
1237
+ "bonus": 6,
1238
+ "conditions": {
1239
+ "require_all": [
1240
+ {
1241
+ "cluster": "DEMOGRAPHICS",
1242
+ "keyword": "Age",
1243
+ "operator": ">",
1244
+ "value": 75
1245
+ }
1246
+ ],
1247
+ "require_count_ge": {
1248
+ "count": 2,
1249
+ "from": [
1250
+ {
1251
+ "type": "problem_groups_active_ge",
1252
+ "value": 3
1253
+ },
1254
+ {
1255
+ "cluster": "LABS",
1256
+ "keyword": "Hemoglobin",
1257
+ "operator": "<",
1258
+ "value": 10
1259
+ },
1260
+ {
1261
+ "cluster": "DISPOSITION",
1262
+ "keyword": "Mental Status",
1263
+ "value_in": [
1264
+ "confused",
1265
+ "lethargic"
1266
+ ]
1267
+ },
1268
+ {
1269
+ "cluster": "DISPOSITION",
1270
+ "keyword": "Discharge Disposition",
1271
+ "value_in": [
1272
+ "SNF",
1273
+ "LTAC",
1274
+ "Rehab"
1275
+ ]
1276
+ }
1277
+ ]
1278
+ }
1279
+ },
1280
+ "evidence": "Frailty = age + multimorbidity + functional decline → readmission OR ~1.8"
1281
+ },
1282
+ {
1283
+ "id": "unstable_discharge",
1284
+ "name": "Unstable Discharge",
1285
+ "bonus": 5,
1286
+ "conditions": {
1287
+ "require_any_of": [
1288
+ {
1289
+ "cluster": "DISPOSITION",
1290
+ "keyword": "Discharge Disposition",
1291
+ "value_in": [
1292
+ "AMA"
1293
+ ]
1294
+ },
1295
+ {
1296
+ "compound_and": [
1297
+ {
1298
+ "cluster": "DISPOSITION",
1299
+ "keyword": "Mental Status",
1300
+ "value_in": [
1301
+ "confused",
1302
+ "lethargic"
1303
+ ]
1304
+ },
1305
+ {
1306
+ "cluster": "DISPOSITION",
1307
+ "keyword": "Discharge Disposition",
1308
+ "value_in": [
1309
+ "Home"
1310
+ ]
1311
+ }
1312
+ ]
1313
+ }
1314
+ ]
1315
+ },
1316
+ "evidence": "AMA or confused-to-Home = highest readmission subgroup"
1317
+ },
1318
+ {
1319
+ "id": "respiratory_failure",
1320
+ "name": "Respiratory Failure Pattern",
1321
+ "bonus": 6,
1322
+ "conditions": {
1323
+ "require_all": [
1324
+ {
1325
+ "cluster": "VITALS",
1326
+ "keyword": "SpO2",
1327
+ "operator": "<",
1328
+ "value": 92
1329
+ }
1330
+ ],
1331
+ "require_any_of": [
1332
+ {
1333
+ "cluster": "VITALS",
1334
+ "keyword": "Respiratory Rate",
1335
+ "operator": ">",
1336
+ "value": 24
1337
+ },
1338
+ {
1339
+ "cluster": "SYMPTOMS",
1340
+ "keyword_group": "respiratory_distress"
1341
+ }
1342
+ ]
1343
+ },
1344
+ "evidence": "Hypoxia + tachypnea/dyspnea = respiratory failure, readmission OR ~2.0"
1345
+ },
1346
+ {
1347
+ "id": "metabolic_crisis",
1348
+ "name": "Metabolic Crisis (DKA/HHS)",
1349
+ "bonus": 6,
1350
+ "conditions": {
1351
+ "require_all": [
1352
+ {
1353
+ "cluster": "LABS",
1354
+ "keyword": "Glucose",
1355
+ "operator": ">",
1356
+ "value": 300
1357
+ }
1358
+ ],
1359
+ "require_any_of": [
1360
+ {
1361
+ "cluster": "LABS",
1362
+ "keyword": "Bicarbonate",
1363
+ "operator": "<",
1364
+ "value": 18
1365
+ },
1366
+ {
1367
+ "cluster": "LABS",
1368
+ "keyword": "Potassium",
1369
+ "operator": ">",
1370
+ "value": 5.5
1371
+ }
1372
+ ]
1373
+ },
1374
+ "evidence": "DKA/HHS at discharge = very high readmission, especially without insulin education"
1375
+ },
1376
+ {
1377
+ "id": "bleeding_risk",
1378
+ "name": "Active Bleeding Risk",
1379
+ "bonus": 6,
1380
+ "conditions": {
1381
+ "require_all": [
1382
+ {
1383
+ "cluster": "LABS",
1384
+ "keyword": "Hemoglobin",
1385
+ "operator": "<",
1386
+ "value": 8
1387
+ }
1388
+ ],
1389
+ "require_any_of": [
1390
+ {
1391
+ "cluster": "LABS",
1392
+ "keyword": "Platelet",
1393
+ "operator": "<",
1394
+ "value": 100
1395
+ },
1396
+ {
1397
+ "cluster": "MEDICATIONS",
1398
+ "keyword": "Anticoagulation",
1399
+ "value_in": [
1400
+ "yes"
1401
+ ]
1402
+ }
1403
+ ]
1404
+ },
1405
+ "evidence": "Severe anemia + thrombocytopenia/anticoagulation = high bleeding readmission risk"
1406
+ }
1407
+ ]
1408
+ },
1409
+ "DAYS_PREDICTION": {
1410
+ "description": "Maps composite risk score to estimated days-to-readmission.",
1411
+ "models": {
1412
+ "regression": {
1413
+ "formula": "max(1, D_max * exp(-gamma * score))",
1414
+ "parameters": {
1415
+ "D_max": 20,
1416
+ "gamma": 0.022
1417
+ },
1418
+ "expected_outputs": {
1419
+ "score_10": 16.1,
1420
+ "score_20": 12.9,
1421
+ "score_40": 8.3,
1422
+ "score_60": 5.3,
1423
+ "score_80": 3.4,
1424
+ "score_100": 2.2
1425
+ }
1426
+ },
1427
+ "buckets": {
1428
+ "urgent": {
1429
+ "label": "0-7 days",
1430
+ "condition": "estimated_days <= 7"
1431
+ },
1432
+ "near_term": {
1433
+ "label": "8-14 days",
1434
+ "condition": "7 < estimated_days <= 14"
1435
+ },
1436
+ "late": {
1437
+ "label": "15-30 days",
1438
+ "condition": "estimated_days > 14"
1439
+ }
1440
+ },
1441
+ "survival": {
1442
+ "description": "Exponential hazard model for P(readmit by day t)",
1443
+ "formula": "P(t) = P_30d * (1 - exp(-(t/30) * k)) / (1 - exp(-k))",
1444
+ "parameters": {
1445
+ "k_base": 2.0,
1446
+ "k_adjustment": "k = k_base + 0.02 * (score - 30)"
1447
+ },
1448
+ "notes": [
1449
+ "k > 1 means hazard is front-loaded (higher risk patients readmit earlier)",
1450
+ "k < 1 means hazard is back-loaded",
1451
+ "P_30d is from the logistic calibration model"
1452
+ ],
1453
+ "output_horizons": [
1454
+ 7,
1455
+ 14,
1456
+ 21,
1457
+ 30
1458
+ ]
1459
+ }
1460
+ }
1461
+ }
1462
+ }
Analysis_Readmission/config/scoring_rules_v3_20260207.json ADDED
@@ -0,0 +1,1462 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_meta": {
3
+ "version": "1.0",
4
+ "description": "Complete scoring rules for all 9 ONTOLOGY clusters. Each cluster has range-based or categorical scoring with evidence-based weights.",
5
+ "max_theoretical_scores": {
6
+ "DEMOGRAPHICS": 10,
7
+ "VITALS": 25,
8
+ "LABS": 30,
9
+ "PROBLEMS": 40,
10
+ "SYMPTOMS": 15,
11
+ "MEDICATIONS": 15,
12
+ "PROCEDURES": 15,
13
+ "UTILIZATION": 20,
14
+ "DISPOSITION": 15,
15
+ "INTERACTIONS": 30,
16
+ "TOTAL_THEORETICAL_MAX": 215
17
+ },
18
+ "calibration": {
19
+ "description": "Logistic transform: P = 1 / (1 + exp(-(alpha + beta * score)))",
20
+ "alpha": -2.3475,
21
+ "beta": 0.017,
22
+ "baseline_readmission_rate": 0.2069,
23
+ "notes": "Calibrated on 203 MIMIC-IV admissions (8 clusters, FHIR labels). AUC=0.5555."
24
+ },
25
+ "risk_categories": [
26
+ {
27
+ "name": "Low",
28
+ "score_min": 0,
29
+ "score_max": 19,
30
+ "probability_range": "5-12%",
31
+ "color": "green"
32
+ },
33
+ {
34
+ "name": "Medium",
35
+ "score_min": 20,
36
+ "score_max": 39,
37
+ "probability_range": "13-28%",
38
+ "color": "yellow"
39
+ },
40
+ {
41
+ "name": "High",
42
+ "score_min": 40,
43
+ "score_max": 59,
44
+ "probability_range": "29-53%",
45
+ "color": "orange"
46
+ },
47
+ {
48
+ "name": "Critical",
49
+ "score_min": 60,
50
+ "score_max": 999,
51
+ "probability_range": "54%+",
52
+ "color": "red"
53
+ }
54
+ ]
55
+ },
56
+ "DEMOGRAPHICS": {
57
+ "max_score": 10,
58
+ "keywords": {
59
+ "Age": {
60
+ "type": "range",
61
+ "ranges": [
62
+ {
63
+ "min": 0,
64
+ "max": 39,
65
+ "score": 0,
66
+ "label": "Young adult"
67
+ },
68
+ {
69
+ "min": 40,
70
+ "max": 54,
71
+ "score": 1,
72
+ "label": "Middle age"
73
+ },
74
+ {
75
+ "min": 55,
76
+ "max": 64,
77
+ "score": 2,
78
+ "label": "Pre-elderly"
79
+ },
80
+ {
81
+ "min": 65,
82
+ "max": 74,
83
+ "score": 4,
84
+ "label": "Young elderly"
85
+ },
86
+ {
87
+ "min": 75,
88
+ "max": 84,
89
+ "score": 6,
90
+ "label": "Old elderly"
91
+ },
92
+ {
93
+ "min": 85,
94
+ "max": 999,
95
+ "score": 8,
96
+ "label": "Very old"
97
+ }
98
+ ],
99
+ "missing_score": 2,
100
+ "evidence": "Age >65 is consistently associated with higher readmission (OR 1.3-1.8)"
101
+ },
102
+ "Sex": {
103
+ "type": "categorical",
104
+ "values": {
105
+ "male": 1,
106
+ "female": 0
107
+ },
108
+ "missing_score": 0,
109
+ "evidence": "Male sex associated with slightly higher 30-day readmission (OR ~1.1)"
110
+ }
111
+ }
112
+ },
113
+ "VITALS": {
114
+ "max_score": 25,
115
+ "keywords": {
116
+ "Heart Rate": {
117
+ "type": "range",
118
+ "unit": "bpm",
119
+ "ranges": [
120
+ {
121
+ "min": 0,
122
+ "max": 49,
123
+ "score": 3,
124
+ "label": "Severe bradycardia"
125
+ },
126
+ {
127
+ "min": 50,
128
+ "max": 59,
129
+ "score": 1,
130
+ "label": "Mild bradycardia"
131
+ },
132
+ {
133
+ "min": 60,
134
+ "max": 100,
135
+ "score": 0,
136
+ "label": "Normal"
137
+ },
138
+ {
139
+ "min": 101,
140
+ "max": 110,
141
+ "score": 1,
142
+ "label": "Mild tachycardia"
143
+ },
144
+ {
145
+ "min": 111,
146
+ "max": 130,
147
+ "score": 3,
148
+ "label": "Tachycardia"
149
+ },
150
+ {
151
+ "min": 131,
152
+ "max": 999,
153
+ "score": 5,
154
+ "label": "Severe tachycardia"
155
+ }
156
+ ],
157
+ "plausibility": {
158
+ "min": 30,
159
+ "max": 220
160
+ }
161
+ },
162
+ "Systolic BP": {
163
+ "type": "range",
164
+ "unit": "mmHg",
165
+ "ranges": [
166
+ {
167
+ "min": 0,
168
+ "max": 89,
169
+ "score": 5,
170
+ "label": "Hypotension"
171
+ },
172
+ {
173
+ "min": 90,
174
+ "max": 99,
175
+ "score": 3,
176
+ "label": "Borderline low"
177
+ },
178
+ {
179
+ "min": 100,
180
+ "max": 139,
181
+ "score": 0,
182
+ "label": "Normal"
183
+ },
184
+ {
185
+ "min": 140,
186
+ "max": 159,
187
+ "score": 1,
188
+ "label": "Stage 1 HTN"
189
+ },
190
+ {
191
+ "min": 160,
192
+ "max": 179,
193
+ "score": 2,
194
+ "label": "Stage 2 HTN"
195
+ },
196
+ {
197
+ "min": 180,
198
+ "max": 999,
199
+ "score": 4,
200
+ "label": "Hypertensive urgency"
201
+ }
202
+ ],
203
+ "plausibility": {
204
+ "min": 50,
205
+ "max": 260
206
+ }
207
+ },
208
+ "Diastolic BP": {
209
+ "type": "range",
210
+ "unit": "mmHg",
211
+ "ranges": [
212
+ {
213
+ "min": 0,
214
+ "max": 59,
215
+ "score": 2,
216
+ "label": "Low diastolic"
217
+ },
218
+ {
219
+ "min": 60,
220
+ "max": 89,
221
+ "score": 0,
222
+ "label": "Normal"
223
+ },
224
+ {
225
+ "min": 90,
226
+ "max": 99,
227
+ "score": 1,
228
+ "label": "Elevated"
229
+ },
230
+ {
231
+ "min": 100,
232
+ "max": 999,
233
+ "score": 3,
234
+ "label": "High diastolic"
235
+ }
236
+ ],
237
+ "plausibility": {
238
+ "min": 20,
239
+ "max": 160
240
+ }
241
+ },
242
+ "Respiratory Rate": {
243
+ "type": "range",
244
+ "unit": "breaths/min",
245
+ "ranges": [
246
+ {
247
+ "min": 0,
248
+ "max": 9,
249
+ "score": 4,
250
+ "label": "Bradypnea"
251
+ },
252
+ {
253
+ "min": 10,
254
+ "max": 11,
255
+ "score": 2,
256
+ "label": "Low normal"
257
+ },
258
+ {
259
+ "min": 12,
260
+ "max": 20,
261
+ "score": 0,
262
+ "label": "Normal"
263
+ },
264
+ {
265
+ "min": 21,
266
+ "max": 24,
267
+ "score": 2,
268
+ "label": "Mild tachypnea"
269
+ },
270
+ {
271
+ "min": 25,
272
+ "max": 30,
273
+ "score": 4,
274
+ "label": "Tachypnea"
275
+ },
276
+ {
277
+ "min": 31,
278
+ "max": 999,
279
+ "score": 6,
280
+ "label": "Severe tachypnea"
281
+ }
282
+ ],
283
+ "plausibility": {
284
+ "min": 5,
285
+ "max": 60
286
+ }
287
+ },
288
+ "Temperature": {
289
+ "type": "range",
290
+ "unit": "°F",
291
+ "ranges": [
292
+ {
293
+ "min": 0,
294
+ "max": 96.7,
295
+ "score": 3,
296
+ "label": "Hypothermia"
297
+ },
298
+ {
299
+ "min": 96.8,
300
+ "max": 99.5,
301
+ "score": 0,
302
+ "label": "Normal"
303
+ },
304
+ {
305
+ "min": 99.6,
306
+ "max": 100.3,
307
+ "score": 1,
308
+ "label": "Low-grade fever"
309
+ },
310
+ {
311
+ "min": 100.4,
312
+ "max": 101.9,
313
+ "score": 2,
314
+ "label": "Fever"
315
+ },
316
+ {
317
+ "min": 102.0,
318
+ "max": 999,
319
+ "score": 4,
320
+ "label": "High fever"
321
+ }
322
+ ],
323
+ "plausibility": {
324
+ "min": 90,
325
+ "max": 110
326
+ }
327
+ },
328
+ "SpO2": {
329
+ "type": "range",
330
+ "unit": "%",
331
+ "ranges": [
332
+ {
333
+ "min": 95,
334
+ "max": 100,
335
+ "score": 0,
336
+ "label": "Normal"
337
+ },
338
+ {
339
+ "min": 92,
340
+ "max": 94,
341
+ "score": 2,
342
+ "label": "Mild hypoxia"
343
+ },
344
+ {
345
+ "min": 88,
346
+ "max": 91,
347
+ "score": 4,
348
+ "label": "Moderate hypoxia"
349
+ },
350
+ {
351
+ "min": 0,
352
+ "max": 87,
353
+ "score": 6,
354
+ "label": "Severe hypoxia"
355
+ }
356
+ ],
357
+ "plausibility": {
358
+ "min": 50,
359
+ "max": 100
360
+ }
361
+ },
362
+ "Weight": {
363
+ "type": "no_direct_score",
364
+ "note": "Weight alone does not score, but used in interaction patterns (e.g., BMI, fluid overload)",
365
+ "plausibility": {
366
+ "min": 20,
367
+ "max": 300
368
+ }
369
+ }
370
+ }
371
+ },
372
+ "LABS": {
373
+ "max_score": 30,
374
+ "keywords": {
375
+ "Hemoglobin": {
376
+ "type": "range",
377
+ "unit": "g/dL",
378
+ "ranges": [
379
+ {
380
+ "min": 0,
381
+ "max": 6.9,
382
+ "score": 6,
383
+ "label": "Critical anemia"
384
+ },
385
+ {
386
+ "min": 7.0,
387
+ "max": 9.9,
388
+ "score": 4,
389
+ "label": "Moderate anemia"
390
+ },
391
+ {
392
+ "min": 10.0,
393
+ "max": 11.9,
394
+ "score": 2,
395
+ "label": "Mild anemia"
396
+ },
397
+ {
398
+ "min": 12.0,
399
+ "max": 17.0,
400
+ "score": 0,
401
+ "label": "Normal"
402
+ },
403
+ {
404
+ "min": 17.1,
405
+ "max": 999,
406
+ "score": 2,
407
+ "label": "Polycythemia"
408
+ }
409
+ ],
410
+ "plausibility": {
411
+ "min": 2,
412
+ "max": 25
413
+ }
414
+ },
415
+ "Hematocrit": {
416
+ "type": "range",
417
+ "unit": "%",
418
+ "ranges": [
419
+ {
420
+ "min": 0,
421
+ "max": 20.9,
422
+ "score": 5,
423
+ "label": "Critical low"
424
+ },
425
+ {
426
+ "min": 21.0,
427
+ "max": 29.9,
428
+ "score": 3,
429
+ "label": "Low"
430
+ },
431
+ {
432
+ "min": 30.0,
433
+ "max": 35.9,
434
+ "score": 1,
435
+ "label": "Mildly low"
436
+ },
437
+ {
438
+ "min": 36.0,
439
+ "max": 45.0,
440
+ "score": 0,
441
+ "label": "Normal"
442
+ },
443
+ {
444
+ "min": 45.1,
445
+ "max": 999,
446
+ "score": 1,
447
+ "label": "Elevated"
448
+ }
449
+ ],
450
+ "plausibility": {
451
+ "min": 5,
452
+ "max": 70
453
+ }
454
+ },
455
+ "WBC": {
456
+ "type": "range",
457
+ "unit": "K/uL",
458
+ "ranges": [
459
+ {
460
+ "min": 0,
461
+ "max": 3.9,
462
+ "score": 2,
463
+ "label": "Leukopenia"
464
+ },
465
+ {
466
+ "min": 4.0,
467
+ "max": 11.0,
468
+ "score": 0,
469
+ "label": "Normal"
470
+ },
471
+ {
472
+ "min": 11.1,
473
+ "max": 15.0,
474
+ "score": 1,
475
+ "label": "Mild leukocytosis"
476
+ },
477
+ {
478
+ "min": 15.1,
479
+ "max": 20.0,
480
+ "score": 3,
481
+ "label": "Moderate leukocytosis"
482
+ },
483
+ {
484
+ "min": 20.1,
485
+ "max": 999,
486
+ "score": 5,
487
+ "label": "Severe leukocytosis"
488
+ }
489
+ ],
490
+ "plausibility": {
491
+ "min": 0.1,
492
+ "max": 200
493
+ }
494
+ },
495
+ "Platelet": {
496
+ "type": "range",
497
+ "unit": "K/uL",
498
+ "ranges": [
499
+ {
500
+ "min": 0,
501
+ "max": 49,
502
+ "score": 5,
503
+ "label": "Severe thrombocytopenia"
504
+ },
505
+ {
506
+ "min": 50,
507
+ "max": 99,
508
+ "score": 3,
509
+ "label": "Moderate thrombocytopenia"
510
+ },
511
+ {
512
+ "min": 100,
513
+ "max": 149,
514
+ "score": 1,
515
+ "label": "Mild thrombocytopenia"
516
+ },
517
+ {
518
+ "min": 150,
519
+ "max": 400,
520
+ "score": 0,
521
+ "label": "Normal"
522
+ },
523
+ {
524
+ "min": 401,
525
+ "max": 999,
526
+ "score": 2,
527
+ "label": "Thrombocytosis"
528
+ }
529
+ ],
530
+ "plausibility": {
531
+ "min": 1,
532
+ "max": 2000
533
+ }
534
+ },
535
+ "Sodium": {
536
+ "type": "range",
537
+ "unit": "mEq/L",
538
+ "ranges": [
539
+ {
540
+ "min": 0,
541
+ "max": 129,
542
+ "score": 5,
543
+ "label": "Severe hyponatremia"
544
+ },
545
+ {
546
+ "min": 130,
547
+ "max": 134,
548
+ "score": 3,
549
+ "label": "Mild hyponatremia"
550
+ },
551
+ {
552
+ "min": 135,
553
+ "max": 145,
554
+ "score": 0,
555
+ "label": "Normal"
556
+ },
557
+ {
558
+ "min": 146,
559
+ "max": 150,
560
+ "score": 2,
561
+ "label": "Mild hypernatremia"
562
+ },
563
+ {
564
+ "min": 151,
565
+ "max": 999,
566
+ "score": 4,
567
+ "label": "Severe hypernatremia"
568
+ }
569
+ ],
570
+ "plausibility": {
571
+ "min": 100,
572
+ "max": 180
573
+ }
574
+ },
575
+ "Potassium": {
576
+ "type": "range",
577
+ "unit": "mEq/L",
578
+ "ranges": [
579
+ {
580
+ "min": 0,
581
+ "max": 2.9,
582
+ "score": 5,
583
+ "label": "Severe hypokalemia"
584
+ },
585
+ {
586
+ "min": 3.0,
587
+ "max": 3.4,
588
+ "score": 2,
589
+ "label": "Mild hypokalemia"
590
+ },
591
+ {
592
+ "min": 3.5,
593
+ "max": 5.0,
594
+ "score": 0,
595
+ "label": "Normal"
596
+ },
597
+ {
598
+ "min": 5.1,
599
+ "max": 5.5,
600
+ "score": 2,
601
+ "label": "Mild hyperkalemia"
602
+ },
603
+ {
604
+ "min": 5.6,
605
+ "max": 6.0,
606
+ "score": 4,
607
+ "label": "Moderate hyperkalemia"
608
+ },
609
+ {
610
+ "min": 6.1,
611
+ "max": 999,
612
+ "score": 6,
613
+ "label": "Severe hyperkalemia"
614
+ }
615
+ ],
616
+ "plausibility": {
617
+ "min": 1.5,
618
+ "max": 8.0
619
+ }
620
+ },
621
+ "Creatinine": {
622
+ "type": "range",
623
+ "unit": "mg/dL",
624
+ "ranges": [
625
+ {
626
+ "min": 0,
627
+ "max": 1.2,
628
+ "score": 0,
629
+ "label": "Normal"
630
+ },
631
+ {
632
+ "min": 1.3,
633
+ "max": 1.9,
634
+ "score": 2,
635
+ "label": "Mildly elevated"
636
+ },
637
+ {
638
+ "min": 2.0,
639
+ "max": 3.0,
640
+ "score": 4,
641
+ "label": "Moderate renal impairment"
642
+ },
643
+ {
644
+ "min": 3.1,
645
+ "max": 999,
646
+ "score": 6,
647
+ "label": "Severe renal impairment"
648
+ }
649
+ ],
650
+ "plausibility": {
651
+ "min": 0.1,
652
+ "max": 20
653
+ }
654
+ },
655
+ "BUN": {
656
+ "type": "range",
657
+ "unit": "mg/dL",
658
+ "ranges": [
659
+ {
660
+ "min": 0,
661
+ "max": 6,
662
+ "score": 1,
663
+ "label": "Low (malnutrition?)"
664
+ },
665
+ {
666
+ "min": 7,
667
+ "max": 20,
668
+ "score": 0,
669
+ "label": "Normal"
670
+ },
671
+ {
672
+ "min": 21,
673
+ "max": 40,
674
+ "score": 2,
675
+ "label": "Mildly elevated"
676
+ },
677
+ {
678
+ "min": 41,
679
+ "max": 60,
680
+ "score": 4,
681
+ "label": "Moderate azotemia"
682
+ },
683
+ {
684
+ "min": 61,
685
+ "max": 999,
686
+ "score": 6,
687
+ "label": "Severe azotemia"
688
+ }
689
+ ],
690
+ "plausibility": {
691
+ "min": 1,
692
+ "max": 200
693
+ }
694
+ },
695
+ "Glucose": {
696
+ "type": "range",
697
+ "unit": "mg/dL",
698
+ "ranges": [
699
+ {
700
+ "min": 0,
701
+ "max": 69,
702
+ "score": 4,
703
+ "label": "Hypoglycemia"
704
+ },
705
+ {
706
+ "min": 70,
707
+ "max": 140,
708
+ "score": 0,
709
+ "label": "Normal"
710
+ },
711
+ {
712
+ "min": 141,
713
+ "max": 200,
714
+ "score": 1,
715
+ "label": "Mild hyperglycemia"
716
+ },
717
+ {
718
+ "min": 201,
719
+ "max": 300,
720
+ "score": 2,
721
+ "label": "Moderate hyperglycemia"
722
+ },
723
+ {
724
+ "min": 301,
725
+ "max": 999,
726
+ "score": 4,
727
+ "label": "Severe hyperglycemia"
728
+ }
729
+ ],
730
+ "plausibility": {
731
+ "min": 20,
732
+ "max": 1000
733
+ }
734
+ },
735
+ "Bicarbonate": {
736
+ "type": "range",
737
+ "unit": "mEq/L",
738
+ "ranges": [
739
+ {
740
+ "min": 0,
741
+ "max": 17,
742
+ "score": 4,
743
+ "label": "Severe acidosis"
744
+ },
745
+ {
746
+ "min": 18,
747
+ "max": 21,
748
+ "score": 2,
749
+ "label": "Mild acidosis"
750
+ },
751
+ {
752
+ "min": 22,
753
+ "max": 28,
754
+ "score": 0,
755
+ "label": "Normal"
756
+ },
757
+ {
758
+ "min": 29,
759
+ "max": 32,
760
+ "score": 1,
761
+ "label": "Mild alkalosis"
762
+ },
763
+ {
764
+ "min": 33,
765
+ "max": 999,
766
+ "score": 3,
767
+ "label": "Severe alkalosis"
768
+ }
769
+ ],
770
+ "plausibility": {
771
+ "min": 5,
772
+ "max": 50
773
+ }
774
+ }
775
+ }
776
+ },
777
+ "PROBLEMS": {
778
+ "max_score": 40,
779
+ "scoring_method": "snomed_group_weighted",
780
+ "config": {
781
+ "group_mapping_file": "snomed_problem_groups.json",
782
+ "multimorbidity_bonus": {
783
+ "description": "For each active problem group beyond the 3rd, add +1 (capped at +5)",
784
+ "threshold": 3,
785
+ "per_extra_group": 1,
786
+ "cap": 5
787
+ },
788
+ "value_filter": {
789
+ "include_values": [
790
+ "chronic",
791
+ "acute",
792
+ "exist"
793
+ ],
794
+ "exclude_values": [
795
+ "not exist"
796
+ ]
797
+ },
798
+ "score_cap": 40
799
+ }
800
+ },
801
+ "SYMPTOMS": {
802
+ "max_score": 15,
803
+ "scoring_method": "urgency_group_weighted",
804
+ "config": {
805
+ "group_mapping_file": "symptom_urgency_groups.json",
806
+ "severity_multiplier": {
807
+ "severe": 1.5,
808
+ "yes": 1.0,
809
+ "no": 0.0
810
+ },
811
+ "active_symptom_count_bonus": {
812
+ "description": "Bonus for having many active symptoms at once",
813
+ "threshold": 3,
814
+ "bonus": 2
815
+ },
816
+ "score_cap": 15
817
+ }
818
+ },
819
+ "MEDICATIONS": {
820
+ "max_score": 15,
821
+ "keywords": {
822
+ "Medication Count": {
823
+ "type": "range",
824
+ "ranges": [
825
+ {
826
+ "min": 0,
827
+ "max": 4,
828
+ "score": 0,
829
+ "label": "Low"
830
+ },
831
+ {
832
+ "min": 5,
833
+ "max": 9,
834
+ "score": 1,
835
+ "label": "Moderate"
836
+ },
837
+ {
838
+ "min": 10,
839
+ "max": 14,
840
+ "score": 2,
841
+ "label": "High"
842
+ },
843
+ {
844
+ "min": 15,
845
+ "max": 999,
846
+ "score": 4,
847
+ "label": "Very high (polypharmacy)"
848
+ }
849
+ ]
850
+ },
851
+ "New Medications Count": {
852
+ "type": "range",
853
+ "ranges": [
854
+ {
855
+ "min": 0,
856
+ "max": 1,
857
+ "score": 0,
858
+ "label": "Minimal"
859
+ },
860
+ {
861
+ "min": 2,
862
+ "max": 2,
863
+ "score": 1,
864
+ "label": "Moderate"
865
+ },
866
+ {
867
+ "min": 3,
868
+ "max": 4,
869
+ "score": 2,
870
+ "label": "High (adherence risk)"
871
+ },
872
+ {
873
+ "min": 5,
874
+ "max": 999,
875
+ "score": 4,
876
+ "label": "Very high (adherence crisis)"
877
+ }
878
+ ]
879
+ },
880
+ "Polypharmacy": {
881
+ "type": "categorical",
882
+ "values": {
883
+ "yes": 3,
884
+ "no": 0
885
+ },
886
+ "evidence": "Polypharmacy (>=5 meds) associated with 1.5-2x readmission OR"
887
+ },
888
+ "Anticoagulation": {
889
+ "type": "categorical",
890
+ "values": {
891
+ "yes": 2,
892
+ "no": 0
893
+ },
894
+ "evidence": "Anticoagulation = bleeding risk + INR management complexity"
895
+ },
896
+ "Insulin Therapy": {
897
+ "type": "categorical",
898
+ "values": {
899
+ "yes": 2,
900
+ "no": 0
901
+ },
902
+ "evidence": "Insulin management at home = hypoglycemia risk"
903
+ },
904
+ "Opioid Therapy": {
905
+ "type": "categorical",
906
+ "values": {
907
+ "yes": 3,
908
+ "no": 0
909
+ },
910
+ "evidence": "Opioid use associated with falls, constipation, respiratory depression"
911
+ },
912
+ "Diuretic Therapy": {
913
+ "type": "categorical",
914
+ "values": {
915
+ "yes": 1,
916
+ "no": 0
917
+ },
918
+ "evidence": "Diuretics = electrolyte monitoring, volume management"
919
+ }
920
+ }
921
+ },
922
+ "PROCEDURES": {
923
+ "max_score": 15,
924
+ "keywords": {
925
+ "Any Procedure": {
926
+ "type": "categorical",
927
+ "values": {
928
+ "yes": 2,
929
+ "no": 0
930
+ },
931
+ "note": "Only scored if no specific procedure flags are set"
932
+ },
933
+ "Surgery": {
934
+ "type": "categorical",
935
+ "values": {
936
+ "yes": 4,
937
+ "no": 0
938
+ },
939
+ "evidence": "Surgical patients have 15-20% higher readmission rate"
940
+ },
941
+ "Dialysis": {
942
+ "type": "categorical",
943
+ "values": {
944
+ "started": 5,
945
+ "done": 5,
946
+ "decided": 3,
947
+ "cancelled": 1,
948
+ "no": 0
949
+ },
950
+ "evidence": "New dialysis initiation associated with very high readmission (OR ~2.5)"
951
+ },
952
+ "Mechanical Ventilation": {
953
+ "type": "mixed",
954
+ "categorical_values": {
955
+ "no": 0
956
+ },
957
+ "numeric_rule": "Any numeric value > 0 scores 5 points (prolonged ventilation = ICU-level complexity)",
958
+ "score_if_any_positive": 5,
959
+ "evidence": "Mechanical ventilation = post-ICU syndrome, deconditioning, respiratory fragility"
960
+ }
961
+ }
962
+ },
963
+ "UTILIZATION": {
964
+ "max_score": 20,
965
+ "keywords": {
966
+ "Prior Admissions 12mo": {
967
+ "type": "range",
968
+ "ranges": [
969
+ {
970
+ "min": 0,
971
+ "max": 0,
972
+ "score": 0,
973
+ "label": "No prior"
974
+ },
975
+ {
976
+ "min": 1,
977
+ "max": 1,
978
+ "score": 3,
979
+ "label": "One prior"
980
+ },
981
+ {
982
+ "min": 2,
983
+ "max": 3,
984
+ "score": 6,
985
+ "label": "Frequent"
986
+ },
987
+ {
988
+ "min": 4,
989
+ "max": 999,
990
+ "score": 10,
991
+ "label": "Super-utilizer"
992
+ }
993
+ ],
994
+ "evidence": "Prior admissions is the single strongest predictor (OR 2.0-3.5)"
995
+ },
996
+ "ED Visits 6mo": {
997
+ "type": "range",
998
+ "ranges": [
999
+ {
1000
+ "min": 0,
1001
+ "max": 0,
1002
+ "score": 0,
1003
+ "label": "None"
1004
+ },
1005
+ {
1006
+ "min": 1,
1007
+ "max": 1,
1008
+ "score": 2,
1009
+ "label": "One visit"
1010
+ },
1011
+ {
1012
+ "min": 2,
1013
+ "max": 3,
1014
+ "score": 4,
1015
+ "label": "Multiple"
1016
+ },
1017
+ {
1018
+ "min": 4,
1019
+ "max": 999,
1020
+ "score": 6,
1021
+ "label": "Frequent ED user"
1022
+ }
1023
+ ]
1024
+ },
1025
+ "Days Since Last Admission": {
1026
+ "type": "range",
1027
+ "ranges": [
1028
+ {
1029
+ "min": 0,
1030
+ "max": 30,
1031
+ "score": 4,
1032
+ "label": "Very recent (<30d)"
1033
+ },
1034
+ {
1035
+ "min": 31,
1036
+ "max": 90,
1037
+ "score": 2,
1038
+ "label": "Recent (31-90d)"
1039
+ },
1040
+ {
1041
+ "min": 91,
1042
+ "max": 365,
1043
+ "score": 1,
1044
+ "label": "Within year"
1045
+ },
1046
+ {
1047
+ "min": 366,
1048
+ "max": 999999,
1049
+ "score": 0,
1050
+ "label": "Not recent"
1051
+ }
1052
+ ]
1053
+ },
1054
+ "Current Length of Stay": {
1055
+ "type": "range",
1056
+ "ranges": [
1057
+ {
1058
+ "min": 0,
1059
+ "max": 2,
1060
+ "score": 2,
1061
+ "label": "Very short (possible premature discharge)"
1062
+ },
1063
+ {
1064
+ "min": 3,
1065
+ "max": 6,
1066
+ "score": 0,
1067
+ "label": "Typical"
1068
+ },
1069
+ {
1070
+ "min": 7,
1071
+ "max": 13,
1072
+ "score": 2,
1073
+ "label": "Extended"
1074
+ },
1075
+ {
1076
+ "min": 14,
1077
+ "max": 999,
1078
+ "score": 5,
1079
+ "label": "Prolonged (complex case)"
1080
+ }
1081
+ ],
1082
+ "evidence": "Both very short and very long LOS associated with higher readmission"
1083
+ }
1084
+ }
1085
+ },
1086
+ "DISPOSITION": {
1087
+ "max_score": 15,
1088
+ "keywords": {
1089
+ "Discharge Disposition": {
1090
+ "type": "categorical",
1091
+ "values": {
1092
+ "Home": 0,
1093
+ "Home with Services": 2,
1094
+ "Rehab": 4,
1095
+ "SNF": 5,
1096
+ "LTAC": 6,
1097
+ "Hospice": 7,
1098
+ "AMA": 8
1099
+ },
1100
+ "evidence": "AMA discharge has highest 30d readmission (OR ~3.0); SNF/LTAC also elevated"
1101
+ },
1102
+ "Mental Status": {
1103
+ "type": "categorical",
1104
+ "values": {
1105
+ "alert": 0,
1106
+ "oriented": 0,
1107
+ "confused": 4,
1108
+ "lethargic": 6
1109
+ },
1110
+ "evidence": "Altered mental status at discharge = post-delirium syndrome, medication errors, fall risk"
1111
+ }
1112
+ }
1113
+ },
1114
+ "INTERACTIONS": {
1115
+ "description": "Cross-cluster clinical pattern detection. Bonus points when synergistic patterns are present.",
1116
+ "patterns": [
1117
+ {
1118
+ "id": "sepsis_pattern",
1119
+ "name": "Sepsis / SIRS Pattern",
1120
+ "bonus": 10,
1121
+ "conditions": {
1122
+ "require_all": [
1123
+ {
1124
+ "cluster": "VITALS",
1125
+ "keyword": "Heart Rate",
1126
+ "operator": ">",
1127
+ "value": 100
1128
+ }
1129
+ ],
1130
+ "require_any_of": [
1131
+ {
1132
+ "cluster": "VITALS",
1133
+ "keyword": "Systolic BP",
1134
+ "operator": "<",
1135
+ "value": 100
1136
+ },
1137
+ {
1138
+ "cluster": "VITALS",
1139
+ "keyword": "Respiratory Rate",
1140
+ "operator": ">",
1141
+ "value": 22
1142
+ }
1143
+ ],
1144
+ "require_any_of_2": [
1145
+ {
1146
+ "cluster": "LABS",
1147
+ "keyword": "WBC",
1148
+ "operator": ">",
1149
+ "value": 12
1150
+ },
1151
+ {
1152
+ "cluster": "LABS",
1153
+ "keyword": "WBC",
1154
+ "operator": "<",
1155
+ "value": 4
1156
+ },
1157
+ {
1158
+ "cluster": "VITALS",
1159
+ "keyword": "Temperature",
1160
+ "operator": ">",
1161
+ "value": 100.4
1162
+ }
1163
+ ]
1164
+ },
1165
+ "evidence": "qSOFA + SIRS criteria. Sepsis at discharge = very high readmission risk."
1166
+ },
1167
+ {
1168
+ "id": "aki_pattern",
1169
+ "name": "Acute Kidney Injury Pattern",
1170
+ "bonus": 8,
1171
+ "conditions": {
1172
+ "require_all": [
1173
+ {
1174
+ "cluster": "LABS",
1175
+ "keyword": "Creatinine",
1176
+ "operator": ">",
1177
+ "value": 1.5
1178
+ },
1179
+ {
1180
+ "cluster": "LABS",
1181
+ "keyword": "BUN",
1182
+ "operator": ">",
1183
+ "value": 30
1184
+ }
1185
+ ],
1186
+ "require_any_of": [
1187
+ {
1188
+ "cluster": "LABS",
1189
+ "keyword": "Potassium",
1190
+ "operator": ">",
1191
+ "value": 5.0
1192
+ },
1193
+ {
1194
+ "cluster": "LABS",
1195
+ "keyword": "Sodium",
1196
+ "operator": "<",
1197
+ "value": 135
1198
+ },
1199
+ {
1200
+ "cluster": "LABS",
1201
+ "keyword": "Bicarbonate",
1202
+ "operator": "<",
1203
+ "value": 22
1204
+ }
1205
+ ]
1206
+ },
1207
+ "evidence": "AKI with electrolyte derangement = unstable renal function, readmission OR ~2.0"
1208
+ },
1209
+ {
1210
+ "id": "decompensated_hf",
1211
+ "name": "Decompensated Heart Failure Pattern",
1212
+ "bonus": 8,
1213
+ "conditions": {
1214
+ "require_problem_group": "heart_failure",
1215
+ "require_any_of": [
1216
+ {
1217
+ "cluster": "SYMPTOMS",
1218
+ "keyword_group": "edema_fluid"
1219
+ },
1220
+ {
1221
+ "cluster": "SYMPTOMS",
1222
+ "keyword_group": "respiratory_distress"
1223
+ },
1224
+ {
1225
+ "cluster": "LABS",
1226
+ "keyword": "BUN",
1227
+ "operator": ">",
1228
+ "value": 40
1229
+ }
1230
+ ]
1231
+ },
1232
+ "evidence": "CHF + fluid overload/dyspnea = decompensation, 25-30% 30d readmission"
1233
+ },
1234
+ {
1235
+ "id": "frailty_syndrome",
1236
+ "name": "Frailty Syndrome",
1237
+ "bonus": 6,
1238
+ "conditions": {
1239
+ "require_all": [
1240
+ {
1241
+ "cluster": "DEMOGRAPHICS",
1242
+ "keyword": "Age",
1243
+ "operator": ">",
1244
+ "value": 75
1245
+ }
1246
+ ],
1247
+ "require_count_ge": {
1248
+ "count": 2,
1249
+ "from": [
1250
+ {
1251
+ "type": "problem_groups_active_ge",
1252
+ "value": 3
1253
+ },
1254
+ {
1255
+ "cluster": "LABS",
1256
+ "keyword": "Hemoglobin",
1257
+ "operator": "<",
1258
+ "value": 10
1259
+ },
1260
+ {
1261
+ "cluster": "DISPOSITION",
1262
+ "keyword": "Mental Status",
1263
+ "value_in": [
1264
+ "confused",
1265
+ "lethargic"
1266
+ ]
1267
+ },
1268
+ {
1269
+ "cluster": "DISPOSITION",
1270
+ "keyword": "Discharge Disposition",
1271
+ "value_in": [
1272
+ "SNF",
1273
+ "LTAC",
1274
+ "Rehab"
1275
+ ]
1276
+ }
1277
+ ]
1278
+ }
1279
+ },
1280
+ "evidence": "Frailty = age + multimorbidity + functional decline → readmission OR ~1.8"
1281
+ },
1282
+ {
1283
+ "id": "unstable_discharge",
1284
+ "name": "Unstable Discharge",
1285
+ "bonus": 5,
1286
+ "conditions": {
1287
+ "require_any_of": [
1288
+ {
1289
+ "cluster": "DISPOSITION",
1290
+ "keyword": "Discharge Disposition",
1291
+ "value_in": [
1292
+ "AMA"
1293
+ ]
1294
+ },
1295
+ {
1296
+ "compound_and": [
1297
+ {
1298
+ "cluster": "DISPOSITION",
1299
+ "keyword": "Mental Status",
1300
+ "value_in": [
1301
+ "confused",
1302
+ "lethargic"
1303
+ ]
1304
+ },
1305
+ {
1306
+ "cluster": "DISPOSITION",
1307
+ "keyword": "Discharge Disposition",
1308
+ "value_in": [
1309
+ "Home"
1310
+ ]
1311
+ }
1312
+ ]
1313
+ }
1314
+ ]
1315
+ },
1316
+ "evidence": "AMA or confused-to-Home = highest readmission subgroup"
1317
+ },
1318
+ {
1319
+ "id": "respiratory_failure",
1320
+ "name": "Respiratory Failure Pattern",
1321
+ "bonus": 6,
1322
+ "conditions": {
1323
+ "require_all": [
1324
+ {
1325
+ "cluster": "VITALS",
1326
+ "keyword": "SpO2",
1327
+ "operator": "<",
1328
+ "value": 92
1329
+ }
1330
+ ],
1331
+ "require_any_of": [
1332
+ {
1333
+ "cluster": "VITALS",
1334
+ "keyword": "Respiratory Rate",
1335
+ "operator": ">",
1336
+ "value": 24
1337
+ },
1338
+ {
1339
+ "cluster": "SYMPTOMS",
1340
+ "keyword_group": "respiratory_distress"
1341
+ }
1342
+ ]
1343
+ },
1344
+ "evidence": "Hypoxia + tachypnea/dyspnea = respiratory failure, readmission OR ~2.0"
1345
+ },
1346
+ {
1347
+ "id": "metabolic_crisis",
1348
+ "name": "Metabolic Crisis (DKA/HHS)",
1349
+ "bonus": 6,
1350
+ "conditions": {
1351
+ "require_all": [
1352
+ {
1353
+ "cluster": "LABS",
1354
+ "keyword": "Glucose",
1355
+ "operator": ">",
1356
+ "value": 300
1357
+ }
1358
+ ],
1359
+ "require_any_of": [
1360
+ {
1361
+ "cluster": "LABS",
1362
+ "keyword": "Bicarbonate",
1363
+ "operator": "<",
1364
+ "value": 18
1365
+ },
1366
+ {
1367
+ "cluster": "LABS",
1368
+ "keyword": "Potassium",
1369
+ "operator": ">",
1370
+ "value": 5.5
1371
+ }
1372
+ ]
1373
+ },
1374
+ "evidence": "DKA/HHS at discharge = very high readmission, especially without insulin education"
1375
+ },
1376
+ {
1377
+ "id": "bleeding_risk",
1378
+ "name": "Active Bleeding Risk",
1379
+ "bonus": 6,
1380
+ "conditions": {
1381
+ "require_all": [
1382
+ {
1383
+ "cluster": "LABS",
1384
+ "keyword": "Hemoglobin",
1385
+ "operator": "<",
1386
+ "value": 8
1387
+ }
1388
+ ],
1389
+ "require_any_of": [
1390
+ {
1391
+ "cluster": "LABS",
1392
+ "keyword": "Platelet",
1393
+ "operator": "<",
1394
+ "value": 100
1395
+ },
1396
+ {
1397
+ "cluster": "MEDICATIONS",
1398
+ "keyword": "Anticoagulation",
1399
+ "value_in": [
1400
+ "yes"
1401
+ ]
1402
+ }
1403
+ ]
1404
+ },
1405
+ "evidence": "Severe anemia + thrombocytopenia/anticoagulation = high bleeding readmission risk"
1406
+ }
1407
+ ]
1408
+ },
1409
+ "DAYS_PREDICTION": {
1410
+ "description": "Maps composite risk score to estimated days-to-readmission.",
1411
+ "models": {
1412
+ "regression": {
1413
+ "formula": "max(1, D_max * exp(-gamma * score))",
1414
+ "parameters": {
1415
+ "D_max": 20,
1416
+ "gamma": 0.022
1417
+ },
1418
+ "expected_outputs": {
1419
+ "score_10": 16.1,
1420
+ "score_20": 12.9,
1421
+ "score_40": 8.3,
1422
+ "score_60": 5.3,
1423
+ "score_80": 3.4,
1424
+ "score_100": 2.2
1425
+ }
1426
+ },
1427
+ "buckets": {
1428
+ "urgent": {
1429
+ "label": "0-7 days",
1430
+ "condition": "estimated_days <= 7"
1431
+ },
1432
+ "near_term": {
1433
+ "label": "8-14 days",
1434
+ "condition": "7 < estimated_days <= 14"
1435
+ },
1436
+ "late": {
1437
+ "label": "15-30 days",
1438
+ "condition": "estimated_days > 14"
1439
+ }
1440
+ },
1441
+ "survival": {
1442
+ "description": "Exponential hazard model for P(readmit by day t)",
1443
+ "formula": "P(t) = P_30d * (1 - exp(-(t/30) * k)) / (1 - exp(-k))",
1444
+ "parameters": {
1445
+ "k_base": 2.0,
1446
+ "k_adjustment": "k = k_base + 0.02 * (score - 30)"
1447
+ },
1448
+ "notes": [
1449
+ "k > 1 means hazard is front-loaded (higher risk patients readmit earlier)",
1450
+ "k < 1 means hazard is back-loaded",
1451
+ "P_30d is from the logistic calibration model"
1452
+ ],
1453
+ "output_horizons": [
1454
+ 7,
1455
+ 14,
1456
+ 21,
1457
+ 30
1458
+ ]
1459
+ }
1460
+ }
1461
+ }
1462
+ }
Analysis_Readmission/config/snomed_problem_groups.json ADDED
@@ -0,0 +1,1584 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_meta": {
3
+ "version": "1.0",
4
+ "description": "SNOMED-CT concept groups for PROBLEMS cluster. Maps free-text diagnoses to clinically meaningful groups with readmission risk weights.",
5
+ "notes": [
6
+ "Each group has a primary SNOMED-CT code, risk_weight (0-10), and synonyms for fuzzy matching.",
7
+ "risk_weight reflects evidence-based contribution to 30-day readmission risk.",
8
+ "charlson=true means the group is part of Charlson Comorbidity Index.",
9
+ "Synonyms are lowercase for case-insensitive matching."
10
+ ]
11
+ },
12
+ "groups": [
13
+ {
14
+ "id": "heart_failure",
15
+ "name": "Heart Failure",
16
+ "snomed_ct": "84114007",
17
+ "icd10_range": [
18
+ "I50"
19
+ ],
20
+ "risk_weight": 8,
21
+ "charlson": true,
22
+ "synonyms": [
23
+ "heart failure",
24
+ "congestive heart failure",
25
+ "chf",
26
+ "systolic heart failure",
27
+ "diastolic heart failure",
28
+ "hfref",
29
+ "hfpef",
30
+ "left ventricular failure",
31
+ "right heart failure",
32
+ "biventricular failure",
33
+ "cardiomyopathy",
34
+ "dilated cardiomyopathy",
35
+ "ischemic cardiomyopathy",
36
+ "nonischemic cardiomyopathy",
37
+ "decompensated heart failure",
38
+ "acute on chronic heart failure",
39
+ "nyha class",
40
+ "cardiac decompensation",
41
+ "reduced ejection fraction",
42
+ "preserved ejection fraction",
43
+ "diastolic dysfunction",
44
+ "lvef",
45
+ "cardiac failure",
46
+ "left ventricular hypertrophy",
47
+ "lvh",
48
+ "concentric hypertrophy",
49
+ "heart murmur",
50
+ "cardiac murmur"
51
+ ]
52
+ },
53
+ {
54
+ "id": "copd",
55
+ "name": "COPD / Chronic Lung Disease",
56
+ "snomed_ct": "13645005",
57
+ "icd10_range": [
58
+ "J44",
59
+ "J43",
60
+ "J42"
61
+ ],
62
+ "risk_weight": 5,
63
+ "charlson": true,
64
+ "synonyms": [
65
+ "copd",
66
+ "chronic obstructive pulmonary disease",
67
+ "emphysema",
68
+ "chronic bronchitis",
69
+ "copd exacerbation",
70
+ "acute exacerbation of copd",
71
+ "chronic lung disease",
72
+ "chronic respiratory failure",
73
+ "oxygen dependent",
74
+ "home oxygen"
75
+ ]
76
+ },
77
+ {
78
+ "id": "diabetes_uncomplicated",
79
+ "name": "Diabetes Mellitus (uncomplicated)",
80
+ "snomed_ct": "73211009",
81
+ "icd10_range": [
82
+ "E11",
83
+ "E10"
84
+ ],
85
+ "risk_weight": 3,
86
+ "charlson": true,
87
+ "synonyms": [
88
+ "diabetes",
89
+ "diabetes mellitus",
90
+ "diabetes mellitus type 2",
91
+ "diabetes mellitus type 1",
92
+ "type 2 diabetes",
93
+ "type 1 diabetes",
94
+ "dm",
95
+ "dm2",
96
+ "dm1",
97
+ "t2dm",
98
+ "t1dm",
99
+ "iddm",
100
+ "niddm",
101
+ "adult onset diabetes",
102
+ "juvenile diabetes",
103
+ "insulin dependent diabetes"
104
+ ]
105
+ },
106
+ {
107
+ "id": "diabetes_complicated",
108
+ "name": "Diabetes with Complications",
109
+ "snomed_ct": "368581000119106",
110
+ "icd10_range": [
111
+ "E11.2",
112
+ "E11.3",
113
+ "E11.4",
114
+ "E11.5",
115
+ "E11.6"
116
+ ],
117
+ "risk_weight": 5,
118
+ "charlson": true,
119
+ "synonyms": [
120
+ "diabetic nephropathy",
121
+ "diabetic neuropathy",
122
+ "diabetic retinopathy",
123
+ "diabetic foot",
124
+ "diabetic ketoacidosis",
125
+ "dka",
126
+ "diabetic ulcer",
127
+ "diabetic gastroparesis",
128
+ "diabetes with renal manifestations",
129
+ "diabetes with ophthalmic manifestations",
130
+ "hyperosmolar hyperglycemic state",
131
+ "hhs"
132
+ ]
133
+ },
134
+ {
135
+ "id": "ckd",
136
+ "name": "Chronic Kidney Disease",
137
+ "snomed_ct": "709044004",
138
+ "icd10_range": [
139
+ "N18"
140
+ ],
141
+ "risk_weight": 6,
142
+ "charlson": true,
143
+ "synonyms": [
144
+ "chronic kidney disease",
145
+ "ckd",
146
+ "chronic renal failure",
147
+ "chronic renal insufficiency",
148
+ "end stage renal disease",
149
+ "esrd",
150
+ "stage 3 ckd",
151
+ "stage 4 ckd",
152
+ "stage 5 ckd",
153
+ "renal failure",
154
+ "kidney failure",
155
+ "dialysis dependent",
156
+ "hemodialysis",
157
+ "peritoneal dialysis",
158
+ "renal transplant",
159
+ "nephropathy",
160
+ "nephrotic syndrome",
161
+ "end-stage renal disease"
162
+ ]
163
+ },
164
+ {
165
+ "id": "aki",
166
+ "name": "Acute Kidney Injury",
167
+ "snomed_ct": "14669001",
168
+ "icd10_range": [
169
+ "N17"
170
+ ],
171
+ "risk_weight": 5,
172
+ "charlson": false,
173
+ "synonyms": [
174
+ "acute kidney injury",
175
+ "aki",
176
+ "acute renal failure",
177
+ "acute renal insufficiency",
178
+ "acute tubular necrosis",
179
+ "atn",
180
+ "prerenal azotemia",
181
+ "contrast nephropathy"
182
+ ]
183
+ },
184
+ {
185
+ "id": "cancer_solid",
186
+ "name": "Cancer (solid tumor, non-metastatic)",
187
+ "snomed_ct": "363346000",
188
+ "icd10_range": [
189
+ "C00-C75"
190
+ ],
191
+ "risk_weight": 5,
192
+ "charlson": true,
193
+ "synonyms": [
194
+ "cancer",
195
+ "malignancy",
196
+ "malignant neoplasm",
197
+ "carcinoma",
198
+ "adenocarcinoma",
199
+ "squamous cell carcinoma",
200
+ "lung cancer",
201
+ "breast cancer",
202
+ "colon cancer",
203
+ "prostate cancer",
204
+ "bladder cancer",
205
+ "renal cell carcinoma",
206
+ "pancreatic cancer",
207
+ "hepatocellular carcinoma",
208
+ "ovarian cancer",
209
+ "cervical cancer",
210
+ "endometrial cancer",
211
+ "thyroid cancer",
212
+ "gastric cancer",
213
+ "esophageal cancer",
214
+ "melanoma",
215
+ "sarcoma",
216
+ "lymphoma",
217
+ "leukemia",
218
+ "non-hodgkin lymphoma",
219
+ "hodgkin lymphoma",
220
+ "multiple myeloma",
221
+ "myelodysplastic syndrome",
222
+ "myeloproliferative disorder",
223
+ "meningioma",
224
+ "glioma",
225
+ "brain tumor",
226
+ "astrocytoma",
227
+ "schwannoma",
228
+ "pheochromocytoma",
229
+ "benign tumor",
230
+ "benign neoplasm",
231
+ "polyp",
232
+ "papilloma"
233
+ ]
234
+ },
235
+ {
236
+ "id": "cancer_metastatic",
237
+ "name": "Metastatic Cancer",
238
+ "snomed_ct": "315004001",
239
+ "icd10_range": [
240
+ "C77-C80"
241
+ ],
242
+ "risk_weight": 8,
243
+ "charlson": true,
244
+ "synonyms": [
245
+ "metastatic",
246
+ "metastasis",
247
+ "metastases",
248
+ "stage iv cancer",
249
+ "stage 4 cancer",
250
+ "disseminated",
251
+ "advanced cancer",
252
+ "brain metastasis",
253
+ "liver metastasis",
254
+ "bone metastasis",
255
+ "lung metastasis",
256
+ "widespread disease",
257
+ "terminal cancer"
258
+ ]
259
+ },
260
+ {
261
+ "id": "liver_disease",
262
+ "name": "Liver Disease",
263
+ "snomed_ct": "235856003",
264
+ "icd10_range": [
265
+ "K70-K77"
266
+ ],
267
+ "risk_weight": 5,
268
+ "charlson": true,
269
+ "synonyms": [
270
+ "cirrhosis",
271
+ "liver cirrhosis",
272
+ "hepatic cirrhosis",
273
+ "liver failure",
274
+ "hepatic failure",
275
+ "hepatitis",
276
+ "hepatitis b",
277
+ "hepatitis c",
278
+ "alcoholic liver disease",
279
+ "nafld",
280
+ "nash",
281
+ "nonalcoholic fatty liver disease",
282
+ "nonalcoholic steatohepatitis",
283
+ "portal hypertension",
284
+ "esophageal varices",
285
+ "hepatic encephalopathy",
286
+ "ascites",
287
+ "hepatorenal syndrome",
288
+ "liver transplant",
289
+ "transaminitis",
290
+ "elevated liver enzymes",
291
+ "elevated transaminases",
292
+ "alt elevation",
293
+ "ast elevation"
294
+ ]
295
+ },
296
+ {
297
+ "id": "cva_stroke",
298
+ "name": "Cerebrovascular Disease / Stroke",
299
+ "snomed_ct": "62914000",
300
+ "icd10_range": [
301
+ "I60-I69"
302
+ ],
303
+ "risk_weight": 4,
304
+ "charlson": true,
305
+ "synonyms": [
306
+ "stroke",
307
+ "cerebrovascular accident",
308
+ "cva",
309
+ "ischemic stroke",
310
+ "hemorrhagic stroke",
311
+ "transient ischemic attack",
312
+ "tia",
313
+ "cerebral infarction",
314
+ "intracranial hemorrhage",
315
+ "subarachnoid hemorrhage",
316
+ "subdural hematoma",
317
+ "carotid stenosis",
318
+ "vertebrobasilar insufficiency",
319
+ "cerebrovascular disease",
320
+ "carotid artery stenosis",
321
+ "bell's palsy",
322
+ "facial palsy",
323
+ "cerebral aneurysm",
324
+ "intracranial aneurysm",
325
+ "aneurysm clipping"
326
+ ]
327
+ },
328
+ {
329
+ "id": "mi_ihd",
330
+ "name": "Myocardial Infarction / Ischemic Heart Disease",
331
+ "snomed_ct": "22298006",
332
+ "icd10_range": [
333
+ "I21",
334
+ "I25"
335
+ ],
336
+ "risk_weight": 5,
337
+ "charlson": true,
338
+ "synonyms": [
339
+ "myocardial infarction",
340
+ "mi",
341
+ "heart attack",
342
+ "stemi",
343
+ "nstemi",
344
+ "acute coronary syndrome",
345
+ "acs",
346
+ "coronary artery disease",
347
+ "cad",
348
+ "unstable angina",
349
+ "angina pectoris",
350
+ "angina",
351
+ "triple vessel disease",
352
+ "left main disease",
353
+ "coronary occlusion",
354
+ "coronary thrombosis",
355
+ "ischemic heart disease",
356
+ "ihd",
357
+ "chest pain",
358
+ "angina equivalent",
359
+ "troponin elevation"
360
+ ]
361
+ },
362
+ {
363
+ "id": "atrial_fibrillation",
364
+ "name": "Atrial Fibrillation / Arrhythmia",
365
+ "snomed_ct": "49436004",
366
+ "icd10_range": [
367
+ "I48"
368
+ ],
369
+ "risk_weight": 3,
370
+ "charlson": false,
371
+ "synonyms": [
372
+ "atrial fibrillation",
373
+ "afib",
374
+ "a-fib",
375
+ "atrial flutter",
376
+ "supraventricular tachycardia",
377
+ "svt",
378
+ "ventricular tachycardia",
379
+ "vtach",
380
+ "ventricular fibrillation",
381
+ "vfib",
382
+ "arrhythmia",
383
+ "cardiac arrhythmia",
384
+ "sick sinus syndrome",
385
+ "bradycardia",
386
+ "heart block",
387
+ "av block",
388
+ "bundle branch block",
389
+ "prolonged qt",
390
+ "wolff-parkinson-white",
391
+ "wpw",
392
+ "paroxysmal atrial fibrillation",
393
+ "atrial tachycardia",
394
+ "palpitations",
395
+ "tachyarrhythmia",
396
+ "pacing",
397
+ "pacemaker"
398
+ ]
399
+ },
400
+ {
401
+ "id": "pvd",
402
+ "name": "Peripheral Vascular Disease",
403
+ "snomed_ct": "400047006",
404
+ "icd10_range": [
405
+ "I73"
406
+ ],
407
+ "risk_weight": 3,
408
+ "charlson": true,
409
+ "synonyms": [
410
+ "peripheral vascular disease",
411
+ "pvd",
412
+ "peripheral artery disease",
413
+ "pad",
414
+ "claudication",
415
+ "intermittent claudication",
416
+ "critical limb ischemia",
417
+ "gangrene",
418
+ "aortic aneurysm",
419
+ "abdominal aortic aneurysm",
420
+ "aaa",
421
+ "thoracic aortic aneurysm",
422
+ "aortic dissection",
423
+ "varicose veins",
424
+ "venous insufficiency",
425
+ "chronic venous insufficiency"
426
+ ]
427
+ },
428
+ {
429
+ "id": "vte",
430
+ "name": "Venous Thromboembolism",
431
+ "snomed_ct": "111293003",
432
+ "icd10_range": [
433
+ "I26",
434
+ "I82"
435
+ ],
436
+ "risk_weight": 4,
437
+ "charlson": false,
438
+ "synonyms": [
439
+ "pulmonary embolism",
440
+ "pe",
441
+ "dvt",
442
+ "deep vein thrombosis",
443
+ "deep venous thrombosis",
444
+ "venous thromboembolism",
445
+ "vte",
446
+ "saddle embolus",
447
+ "submassive pe",
448
+ "massive pe",
449
+ "iliac vein thrombosis",
450
+ "portal vein thrombosis",
451
+ "thrombophilia",
452
+ "hypercoagulable state",
453
+ "antiphospholipid syndrome"
454
+ ]
455
+ },
456
+ {
457
+ "id": "hypertension",
458
+ "name": "Hypertension",
459
+ "snomed_ct": "38341003",
460
+ "icd10_range": [
461
+ "I10-I15"
462
+ ],
463
+ "risk_weight": 2,
464
+ "charlson": false,
465
+ "synonyms": [
466
+ "hypertension",
467
+ "htn",
468
+ "high blood pressure",
469
+ "essential hypertension",
470
+ "malignant hypertension",
471
+ "hypertensive emergency",
472
+ "hypertensive urgency",
473
+ "resistant hypertension",
474
+ "secondary hypertension",
475
+ "pulmonary hypertension",
476
+ "pulmonary arterial hypertension"
477
+ ]
478
+ },
479
+ {
480
+ "id": "valvular",
481
+ "name": "Valvular Heart Disease",
482
+ "snomed_ct": "368009",
483
+ "icd10_range": [
484
+ "I34-I37"
485
+ ],
486
+ "risk_weight": 3,
487
+ "charlson": false,
488
+ "synonyms": [
489
+ "aortic stenosis",
490
+ "aortic valve stenosis",
491
+ "aortic regurgitation",
492
+ "aortic insufficiency",
493
+ "mitral stenosis",
494
+ "mitral regurgitation",
495
+ "mitral valve prolapse",
496
+ "tricuspid regurgitation",
497
+ "valve replacement",
498
+ "prosthetic valve",
499
+ "bioprosthetic valve",
500
+ "mechanical valve",
501
+ "endocarditis",
502
+ "infective endocarditis"
503
+ ]
504
+ },
505
+ {
506
+ "id": "dementia",
507
+ "name": "Dementia / Cognitive Decline",
508
+ "snomed_ct": "52448006",
509
+ "icd10_range": [
510
+ "F00-F03",
511
+ "G30"
512
+ ],
513
+ "risk_weight": 4,
514
+ "charlson": true,
515
+ "synonyms": [
516
+ "dementia",
517
+ "alzheimer",
518
+ "alzheimer's disease",
519
+ "vascular dementia",
520
+ "lewy body dementia",
521
+ "frontotemporal dementia",
522
+ "cognitive decline",
523
+ "cognitive impairment",
524
+ "mild cognitive impairment",
525
+ "mci",
526
+ "memory loss",
527
+ "encephalopathy",
528
+ "delirium",
529
+ "sundowning"
530
+ ]
531
+ },
532
+ {
533
+ "id": "depression",
534
+ "name": "Depression / Mood Disorders",
535
+ "snomed_ct": "35489007",
536
+ "icd10_range": [
537
+ "F32",
538
+ "F33",
539
+ "F31"
540
+ ],
541
+ "risk_weight": 3,
542
+ "charlson": false,
543
+ "synonyms": [
544
+ "depression",
545
+ "major depressive disorder",
546
+ "mdd",
547
+ "bipolar disorder",
548
+ "bipolar",
549
+ "bipolar i",
550
+ "bipolar ii",
551
+ "dysthymia",
552
+ "persistent depressive disorder",
553
+ "mood disorder",
554
+ "adjustment disorder",
555
+ "postpartum depression",
556
+ "seasonal affective disorder",
557
+ "treatment resistant depression"
558
+ ]
559
+ },
560
+ {
561
+ "id": "psychosis",
562
+ "name": "Psychotic Disorders",
563
+ "snomed_ct": "69322001",
564
+ "icd10_range": [
565
+ "F20-F29"
566
+ ],
567
+ "risk_weight": 4,
568
+ "charlson": false,
569
+ "synonyms": [
570
+ "schizophrenia",
571
+ "schizoaffective disorder",
572
+ "psychosis",
573
+ "psychotic disorder",
574
+ "paranoid schizophrenia",
575
+ "catatonia",
576
+ "delusional disorder",
577
+ "brief psychotic disorder"
578
+ ]
579
+ },
580
+ {
581
+ "id": "anxiety",
582
+ "name": "Anxiety Disorders",
583
+ "snomed_ct": "197480006",
584
+ "icd10_range": [
585
+ "F40",
586
+ "F41"
587
+ ],
588
+ "risk_weight": 2,
589
+ "charlson": false,
590
+ "synonyms": [
591
+ "anxiety",
592
+ "generalized anxiety disorder",
593
+ "gad",
594
+ "panic disorder",
595
+ "panic attack",
596
+ "social anxiety",
597
+ "agoraphobia",
598
+ "phobia",
599
+ "ptsd",
600
+ "post-traumatic stress disorder",
601
+ "obsessive compulsive disorder",
602
+ "ocd",
603
+ "posttraumatic stress disorder",
604
+ "post traumatic stress disorder",
605
+ "adhd",
606
+ "attention deficit hyperactivity disorder",
607
+ "attention deficit disorder"
608
+ ]
609
+ },
610
+ {
611
+ "id": "substance_abuse",
612
+ "name": "Substance Abuse / Dependence",
613
+ "snomed_ct": "66214007",
614
+ "icd10_range": [
615
+ "F10-F19"
616
+ ],
617
+ "risk_weight": 5,
618
+ "charlson": false,
619
+ "synonyms": [
620
+ "alcohol abuse",
621
+ "alcohol dependence",
622
+ "alcoholism",
623
+ "alcohol withdrawal",
624
+ "alcohol use disorder",
625
+ "drug abuse",
626
+ "substance abuse",
627
+ "substance use disorder",
628
+ "opioid dependence",
629
+ "opioid use disorder",
630
+ "cocaine abuse",
631
+ "cocaine dependence",
632
+ "benzodiazepine dependence",
633
+ "polysubstance abuse",
634
+ "drug overdose",
635
+ "heroin abuse",
636
+ "methamphetamine abuse",
637
+ "cannabis use disorder",
638
+ "tobacco use disorder",
639
+ "nicotine dependence",
640
+ "intravenous drug use",
641
+ "ivdu",
642
+ "tobacco abuse",
643
+ "smoking",
644
+ "tobacco dependence",
645
+ "tobacco use"
646
+ ]
647
+ },
648
+ {
649
+ "id": "sepsis",
650
+ "name": "Sepsis / Severe Infection",
651
+ "snomed_ct": "91302008",
652
+ "icd10_range": [
653
+ "A40",
654
+ "A41",
655
+ "R65.2"
656
+ ],
657
+ "risk_weight": 6,
658
+ "charlson": false,
659
+ "synonyms": [
660
+ "sepsis",
661
+ "severe sepsis",
662
+ "septic shock",
663
+ "bacteremia",
664
+ "fungemia",
665
+ "urosepsis",
666
+ "septicemia",
667
+ "systemic inflammatory response",
668
+ "sirs",
669
+ "blood stream infection",
670
+ "bsi"
671
+ ]
672
+ },
673
+ {
674
+ "id": "pneumonia",
675
+ "name": "Pneumonia / Lower Respiratory Infection",
676
+ "snomed_ct": "233604007",
677
+ "icd10_range": [
678
+ "J12-J18"
679
+ ],
680
+ "risk_weight": 4,
681
+ "charlson": false,
682
+ "synonyms": [
683
+ "pneumonia",
684
+ "community acquired pneumonia",
685
+ "cap",
686
+ "hospital acquired pneumonia",
687
+ "hap",
688
+ "ventilator associated pneumonia",
689
+ "vap",
690
+ "aspiration pneumonia",
691
+ "aspiration",
692
+ "lung abscess",
693
+ "empyema",
694
+ "bronchopneumonia",
695
+ "lobar pneumonia",
696
+ "respiratory infection",
697
+ "lower respiratory tract infection",
698
+ "sinusitis",
699
+ "upper respiratory infection",
700
+ "bronchitis",
701
+ "acute bronchitis"
702
+ ]
703
+ },
704
+ {
705
+ "id": "uti",
706
+ "name": "Urinary Tract Infection",
707
+ "snomed_ct": "68566005",
708
+ "icd10_range": [
709
+ "N39.0"
710
+ ],
711
+ "risk_weight": 2,
712
+ "charlson": false,
713
+ "synonyms": [
714
+ "urinary tract infection",
715
+ "uti",
716
+ "pyelonephritis",
717
+ "cystitis",
718
+ "urosepsis",
719
+ "catheter associated uti",
720
+ "cauti"
721
+ ]
722
+ },
723
+ {
724
+ "id": "gi_bleed",
725
+ "name": "GI Hemorrhage",
726
+ "snomed_ct": "74474003",
727
+ "icd10_range": [
728
+ "K92.0",
729
+ "K92.1",
730
+ "K92.2"
731
+ ],
732
+ "risk_weight": 5,
733
+ "charlson": false,
734
+ "synonyms": [
735
+ "gastrointestinal hemorrhage",
736
+ "gi bleed",
737
+ "gi bleeding",
738
+ "upper gi bleed",
739
+ "lower gi bleed",
740
+ "melena",
741
+ "hematochezia",
742
+ "hematemesis",
743
+ "variceal bleeding",
744
+ "peptic ulcer bleeding",
745
+ "diverticular bleeding",
746
+ "rectal bleeding",
747
+ "gastrointestinal bleeding",
748
+ "gi hemorrhage"
749
+ ]
750
+ },
751
+ {
752
+ "id": "gi_disease",
753
+ "name": "GI Disease (non-hemorrhage)",
754
+ "snomed_ct": "119292006",
755
+ "icd10_range": [
756
+ "K00-K93"
757
+ ],
758
+ "risk_weight": 2,
759
+ "charlson": false,
760
+ "synonyms": [
761
+ "gastroesophageal reflux disease",
762
+ "gerd",
763
+ "peptic ulcer",
764
+ "gastric ulcer",
765
+ "duodenal ulcer",
766
+ "crohn's disease",
767
+ "crohn disease",
768
+ "ulcerative colitis",
769
+ "inflammatory bowel disease",
770
+ "ibd",
771
+ "diverticulitis",
772
+ "diverticulosis",
773
+ "pancreatitis",
774
+ "acute pancreatitis",
775
+ "chronic pancreatitis",
776
+ "cholecystitis",
777
+ "cholelithiasis",
778
+ "gallstones",
779
+ "cholangitis",
780
+ "bowel obstruction",
781
+ "small bowel obstruction",
782
+ "ileus",
783
+ "celiac disease",
784
+ "gastroparesis",
785
+ "hiatal hernia",
786
+ "esophagitis",
787
+ "irritable bowel syndrome",
788
+ "ibs",
789
+ "clostridium difficile",
790
+ "c diff",
791
+ "c. difficile",
792
+ "appendicitis",
793
+ "peritonitis",
794
+ "constipation",
795
+ "chronic constipation",
796
+ "gastritis",
797
+ "hemorrhoids",
798
+ "dysphagia",
799
+ "nausea",
800
+ "esophageal stricture",
801
+ "colon polyps",
802
+ "choledocholithiasis",
803
+ "common bile duct stone",
804
+ "helicobacter pylori",
805
+ "h pylori",
806
+ "diarrhea",
807
+ "vomiting",
808
+ "abdominal pain",
809
+ "colonic polyp",
810
+ "rectal polyp",
811
+ "barrett's esophagus",
812
+ "biliary colic",
813
+ "gastroenteritis",
814
+ "food poisoning",
815
+ "colitis"
816
+ ]
817
+ },
818
+ {
819
+ "id": "anemia",
820
+ "name": "Anemia (chronic)",
821
+ "snomed_ct": "271737000",
822
+ "icd10_range": [
823
+ "D50-D64"
824
+ ],
825
+ "risk_weight": 3,
826
+ "charlson": false,
827
+ "synonyms": [
828
+ "anemia",
829
+ "iron deficiency anemia",
830
+ "anemia of chronic disease",
831
+ "chronic anemia",
832
+ "megaloblastic anemia",
833
+ "b12 deficiency",
834
+ "folate deficiency",
835
+ "pancytopenia",
836
+ "aplastic anemia",
837
+ "hemolytic anemia",
838
+ "sickle cell disease",
839
+ "sickle cell anemia",
840
+ "thalassemia",
841
+ "myelodysplastic syndrome"
842
+ ]
843
+ },
844
+ {
845
+ "id": "coagulopathy",
846
+ "name": "Coagulopathy / Bleeding Disorder",
847
+ "snomed_ct": "234466008",
848
+ "icd10_range": [
849
+ "D65-D69"
850
+ ],
851
+ "risk_weight": 4,
852
+ "charlson": false,
853
+ "synonyms": [
854
+ "thrombocytopenia",
855
+ "coagulopathy",
856
+ "dic",
857
+ "disseminated intravascular coagulation",
858
+ "heparin induced thrombocytopenia",
859
+ "hit",
860
+ "immune thrombocytopenic purpura",
861
+ "itp",
862
+ "von willebrand disease",
863
+ "hemophilia",
864
+ "anticoagulant related bleeding"
865
+ ]
866
+ },
867
+ {
868
+ "id": "obesity",
869
+ "name": "Obesity",
870
+ "snomed_ct": "414916001",
871
+ "icd10_range": [
872
+ "E66"
873
+ ],
874
+ "risk_weight": 2,
875
+ "charlson": false,
876
+ "synonyms": [
877
+ "obesity",
878
+ "morbid obesity",
879
+ "severe obesity",
880
+ "obese",
881
+ "bmi over 30",
882
+ "bmi over 40",
883
+ "class iii obesity",
884
+ "bariatric"
885
+ ]
886
+ },
887
+ {
888
+ "id": "thyroid",
889
+ "name": "Thyroid Disorders",
890
+ "snomed_ct": "14304000",
891
+ "icd10_range": [
892
+ "E00-E07"
893
+ ],
894
+ "risk_weight": 1,
895
+ "charlson": false,
896
+ "synonyms": [
897
+ "hypothyroidism",
898
+ "hyperthyroidism",
899
+ "thyroid disease",
900
+ "hashimoto",
901
+ "graves disease",
902
+ "thyroiditis",
903
+ "thyroid nodule",
904
+ "thyroid cancer",
905
+ "myxedema",
906
+ "thyroid storm",
907
+ "hyperparathyroidism",
908
+ "hypoparathyroidism",
909
+ "parathyroid disease"
910
+ ]
911
+ },
912
+ {
913
+ "id": "asthma",
914
+ "name": "Asthma",
915
+ "snomed_ct": "195967001",
916
+ "icd10_range": [
917
+ "J45"
918
+ ],
919
+ "risk_weight": 2,
920
+ "charlson": false,
921
+ "synonyms": [
922
+ "asthma",
923
+ "asthma exacerbation",
924
+ "acute asthma",
925
+ "status asthmaticus",
926
+ "reactive airway disease",
927
+ "bronchospasm",
928
+ "exercise induced asthma",
929
+ "allergic asthma"
930
+ ]
931
+ },
932
+ {
933
+ "id": "osa",
934
+ "name": "Obstructive Sleep Apnea",
935
+ "snomed_ct": "78275009",
936
+ "icd10_range": [
937
+ "G47.33"
938
+ ],
939
+ "risk_weight": 2,
940
+ "charlson": false,
941
+ "synonyms": [
942
+ "obstructive sleep apnea",
943
+ "osa",
944
+ "sleep apnea",
945
+ "central sleep apnea",
946
+ "sleep disordered breathing",
947
+ "cpap dependent"
948
+ ]
949
+ },
950
+ {
951
+ "id": "seizure",
952
+ "name": "Seizure Disorder / Epilepsy",
953
+ "snomed_ct": "84757009",
954
+ "icd10_range": [
955
+ "G40"
956
+ ],
957
+ "risk_weight": 3,
958
+ "charlson": false,
959
+ "synonyms": [
960
+ "seizure disorder",
961
+ "epilepsy",
962
+ "seizure",
963
+ "status epilepticus",
964
+ "convulsion",
965
+ "tonic-clonic seizure",
966
+ "grand mal seizure",
967
+ "focal seizure",
968
+ "absence seizure",
969
+ "breakthrough seizure",
970
+ "vertigo",
971
+ "dizziness",
972
+ "syncope",
973
+ "presyncope",
974
+ "ataxia",
975
+ "cerebellar ataxia",
976
+ "gait instability"
977
+ ]
978
+ },
979
+ {
980
+ "id": "falls_fracture",
981
+ "name": "Falls / Fracture",
982
+ "snomed_ct": "217082002",
983
+ "icd10_range": [
984
+ "W00-W19",
985
+ "S72"
986
+ ],
987
+ "risk_weight": 3,
988
+ "charlson": false,
989
+ "synonyms": [
990
+ "fall",
991
+ "falls",
992
+ "mechanical fall",
993
+ "fracture",
994
+ "hip fracture",
995
+ "femur fracture",
996
+ "vertebral fracture",
997
+ "compression fracture",
998
+ "rib fracture",
999
+ "pelvic fracture",
1000
+ "pathologic fracture",
1001
+ "fragility fracture"
1002
+ ]
1003
+ },
1004
+ {
1005
+ "id": "wound_infection",
1006
+ "name": "Wound / Skin Infection",
1007
+ "snomed_ct": "128045006",
1008
+ "icd10_range": [
1009
+ "L00-L08"
1010
+ ],
1011
+ "risk_weight": 3,
1012
+ "charlson": false,
1013
+ "synonyms": [
1014
+ "cellulitis",
1015
+ "abscess",
1016
+ "wound infection",
1017
+ "surgical site infection",
1018
+ "ssi",
1019
+ "osteomyelitis",
1020
+ "necrotizing fasciitis",
1021
+ "pressure ulcer",
1022
+ "pressure injury",
1023
+ "decubitus",
1024
+ "diabetic foot infection",
1025
+ "skin infection",
1026
+ "mucositis",
1027
+ "oral mucositis",
1028
+ "stomatitis",
1029
+ "discitis",
1030
+ "vertebral discitis",
1031
+ "spinal infection"
1032
+ ]
1033
+ },
1034
+ {
1035
+ "id": "electrolyte",
1036
+ "name": "Electrolyte Disorders",
1037
+ "snomed_ct": "237840007",
1038
+ "icd10_range": [
1039
+ "E87"
1040
+ ],
1041
+ "risk_weight": 3,
1042
+ "charlson": false,
1043
+ "synonyms": [
1044
+ "hyponatremia",
1045
+ "hypernatremia",
1046
+ "hypokalemia",
1047
+ "hyperkalemia",
1048
+ "hypocalcemia",
1049
+ "hypercalcemia",
1050
+ "hypomagnesemia",
1051
+ "hypermagnesemia",
1052
+ "hypophosphatemia",
1053
+ "metabolic acidosis",
1054
+ "metabolic alkalosis",
1055
+ "electrolyte imbalance",
1056
+ "electrolyte abnormality",
1057
+ "hyperglycemia",
1058
+ "hypoglycemia",
1059
+ "lactic acidosis"
1060
+ ]
1061
+ },
1062
+ {
1063
+ "id": "malnutrition",
1064
+ "name": "Malnutrition / Failure to Thrive",
1065
+ "snomed_ct": "248325000",
1066
+ "icd10_range": [
1067
+ "E40-E46",
1068
+ "R62"
1069
+ ],
1070
+ "risk_weight": 4,
1071
+ "charlson": false,
1072
+ "synonyms": [
1073
+ "malnutrition",
1074
+ "protein calorie malnutrition",
1075
+ "cachexia",
1076
+ "failure to thrive",
1077
+ "kwashiorkor",
1078
+ "marasmus",
1079
+ "severe malnutrition",
1080
+ "nutritional deficiency",
1081
+ "sarcopenia",
1082
+ "wasting"
1083
+ ]
1084
+ },
1085
+ {
1086
+ "id": "connective_tissue",
1087
+ "name": "Connective Tissue / Autoimmune Disease",
1088
+ "snomed_ct": "105969002",
1089
+ "icd10_range": [
1090
+ "M30-M36"
1091
+ ],
1092
+ "risk_weight": 3,
1093
+ "charlson": true,
1094
+ "synonyms": [
1095
+ "rheumatoid arthritis",
1096
+ "lupus",
1097
+ "systemic lupus erythematosus",
1098
+ "sle",
1099
+ "scleroderma",
1100
+ "vasculitis",
1101
+ "polymyalgia rheumatica",
1102
+ "dermatomyositis",
1103
+ "polymyositis",
1104
+ "sjogren syndrome",
1105
+ "mixed connective tissue disease",
1106
+ "ankylosing spondylitis",
1107
+ "multiple sclerosis",
1108
+ "ms",
1109
+ "psoriasis",
1110
+ "psoriatic arthritis"
1111
+ ]
1112
+ },
1113
+ {
1114
+ "id": "hiv_aids",
1115
+ "name": "HIV / AIDS",
1116
+ "snomed_ct": "86406008",
1117
+ "icd10_range": [
1118
+ "B20-B24"
1119
+ ],
1120
+ "risk_weight": 4,
1121
+ "charlson": true,
1122
+ "synonyms": [
1123
+ "hiv",
1124
+ "aids",
1125
+ "human immunodeficiency virus",
1126
+ "acquired immunodeficiency syndrome",
1127
+ "hiv positive",
1128
+ "hiv infection"
1129
+ ]
1130
+ },
1131
+ {
1132
+ "id": "transplant",
1133
+ "name": "Organ Transplant",
1134
+ "snomed_ct": "77465005",
1135
+ "icd10_range": [
1136
+ "Z94"
1137
+ ],
1138
+ "risk_weight": 5,
1139
+ "charlson": false,
1140
+ "synonyms": [
1141
+ "transplant",
1142
+ "organ transplant",
1143
+ "kidney transplant",
1144
+ "liver transplant",
1145
+ "heart transplant",
1146
+ "lung transplant",
1147
+ "bone marrow transplant",
1148
+ "stem cell transplant",
1149
+ "graft versus host disease",
1150
+ "gvhd",
1151
+ "transplant rejection",
1152
+ "immunosuppression"
1153
+ ]
1154
+ },
1155
+ {
1156
+ "id": "hyperlipidemia",
1157
+ "name": "Hyperlipidemia",
1158
+ "snomed_ct": "55822004",
1159
+ "icd10_range": [
1160
+ "E78"
1161
+ ],
1162
+ "risk_weight": 1,
1163
+ "charlson": false,
1164
+ "synonyms": [
1165
+ "hyperlipidemia",
1166
+ "hypercholesterolemia",
1167
+ "dyslipidemia",
1168
+ "high cholesterol",
1169
+ "hypertriglyceridemia",
1170
+ "mixed hyperlipidemia"
1171
+ ]
1172
+ },
1173
+ {
1174
+ "id": "bph_urological",
1175
+ "name": "BPH / Urological",
1176
+ "snomed_ct": "266569009",
1177
+ "icd10_range": [
1178
+ "N40"
1179
+ ],
1180
+ "risk_weight": 1,
1181
+ "charlson": false,
1182
+ "synonyms": [
1183
+ "benign prostatic hyperplasia",
1184
+ "bph",
1185
+ "benign prostatic hypertrophy",
1186
+ "urinary retention",
1187
+ "urinary incontinence",
1188
+ "overactive bladder",
1189
+ "neurogenic bladder",
1190
+ "nephrolithiasis",
1191
+ "kidney stone",
1192
+ "renal calculus",
1193
+ "ureteral stone",
1194
+ "hydronephrosis",
1195
+ "hematuria",
1196
+ "gross hematuria",
1197
+ "microscopic hematuria",
1198
+ "uterine fibroids",
1199
+ "endometriosis",
1200
+ "ovarian cyst",
1201
+ "renal stone",
1202
+ "urolithiasis",
1203
+ "bladder stone"
1204
+ ]
1205
+ },
1206
+ {
1207
+ "id": "osteoarthritis",
1208
+ "name": "Osteoarthritis / Degenerative Joint",
1209
+ "snomed_ct": "396275006",
1210
+ "icd10_range": [
1211
+ "M15-M19"
1212
+ ],
1213
+ "risk_weight": 1,
1214
+ "charlson": false,
1215
+ "synonyms": [
1216
+ "osteoarthritis",
1217
+ "degenerative joint disease",
1218
+ "djd",
1219
+ "joint replacement",
1220
+ "knee replacement",
1221
+ "hip replacement",
1222
+ "total knee arthroplasty",
1223
+ "total hip arthroplasty",
1224
+ "spinal stenosis",
1225
+ "cervical spondylosis",
1226
+ "lumbar spondylosis",
1227
+ "degenerative disc disease",
1228
+ "herniated disc",
1229
+ "scoliosis",
1230
+ "kyphosis",
1231
+ "spinal deformity",
1232
+ "plantar fasciitis",
1233
+ "tendinitis",
1234
+ "bursitis",
1235
+ "rotator cuff"
1236
+ ]
1237
+ },
1238
+ {
1239
+ "id": "osteoporosis",
1240
+ "name": "Osteoporosis",
1241
+ "snomed_ct": "64859006",
1242
+ "icd10_range": [
1243
+ "M80-M81"
1244
+ ],
1245
+ "risk_weight": 2,
1246
+ "charlson": false,
1247
+ "synonyms": [
1248
+ "osteoporosis",
1249
+ "osteopenia",
1250
+ "low bone density",
1251
+ "bone loss",
1252
+ "vitamin d deficiency"
1253
+ ]
1254
+ },
1255
+ {
1256
+ "id": "gout",
1257
+ "name": "Gout / Crystal Arthropathy",
1258
+ "snomed_ct": "90560007",
1259
+ "icd10_range": [
1260
+ "M10"
1261
+ ],
1262
+ "risk_weight": 1,
1263
+ "charlson": false,
1264
+ "synonyms": [
1265
+ "gout",
1266
+ "gouty arthritis",
1267
+ "gout flare",
1268
+ "pseudogout",
1269
+ "calcium pyrophosphate",
1270
+ "crystal arthropathy",
1271
+ "hyperuricemia"
1272
+ ]
1273
+ },
1274
+ {
1275
+ "id": "migraine",
1276
+ "name": "Migraine / Headache Disorders",
1277
+ "snomed_ct": "37796009",
1278
+ "icd10_range": [
1279
+ "G43"
1280
+ ],
1281
+ "risk_weight": 1,
1282
+ "charlson": false,
1283
+ "synonyms": [
1284
+ "migraine",
1285
+ "migraine with aura",
1286
+ "migraine without aura",
1287
+ "chronic migraine",
1288
+ "tension headache",
1289
+ "cluster headache"
1290
+ ]
1291
+ },
1292
+ {
1293
+ "id": "glaucoma",
1294
+ "name": "Glaucoma / Eye Disease",
1295
+ "snomed_ct": "23986001",
1296
+ "icd10_range": [
1297
+ "H40"
1298
+ ],
1299
+ "risk_weight": 1,
1300
+ "charlson": false,
1301
+ "synonyms": [
1302
+ "glaucoma",
1303
+ "open angle glaucoma",
1304
+ "angle closure glaucoma",
1305
+ "macular degeneration",
1306
+ "cataracts",
1307
+ "diabetic retinopathy",
1308
+ "retinal detachment",
1309
+ "blindness",
1310
+ "vision loss",
1311
+ "visual impairment",
1312
+ "macular hole"
1313
+ ]
1314
+ },
1315
+ {
1316
+ "id": "peripheral_neuropathy",
1317
+ "name": "Peripheral Neuropathy",
1318
+ "snomed_ct": "302226006",
1319
+ "icd10_range": [
1320
+ "G60-G64"
1321
+ ],
1322
+ "risk_weight": 2,
1323
+ "charlson": false,
1324
+ "synonyms": [
1325
+ "peripheral neuropathy",
1326
+ "neuropathy",
1327
+ "polyneuropathy",
1328
+ "mononeuropathy",
1329
+ "carpal tunnel",
1330
+ "radiculopathy",
1331
+ "sciatica",
1332
+ "nerve entrapment",
1333
+ "foot drop",
1334
+ "peroneal neuropathy",
1335
+ "hearing loss",
1336
+ "sensorineural hearing loss",
1337
+ "hard of hearing",
1338
+ "hearing impairment",
1339
+ "deafness",
1340
+ "tinnitus"
1341
+ ]
1342
+ },
1343
+ {
1344
+ "id": "pleural_respiratory",
1345
+ "name": "Pleural / Pulmonary Effusion",
1346
+ "snomed_ct": "60046008",
1347
+ "icd10_range": [
1348
+ "J90",
1349
+ "J91"
1350
+ ],
1351
+ "risk_weight": 3,
1352
+ "charlson": false,
1353
+ "synonyms": [
1354
+ "pleural effusion",
1355
+ "pulmonary edema",
1356
+ "pulmonary effusion",
1357
+ "hydrothorax",
1358
+ "empyema",
1359
+ "hemothorax",
1360
+ "pneumothorax",
1361
+ "pleurisy",
1362
+ "pulmonary fibrosis",
1363
+ "interstitial lung disease",
1364
+ "bronchiectasis",
1365
+ "atelectasis",
1366
+ "pulmonary nodule",
1367
+ "lung nodule",
1368
+ "pericardial effusion",
1369
+ "pericarditis",
1370
+ "pericardial tamponade"
1371
+ ]
1372
+ },
1373
+ {
1374
+ "id": "hypotension_shock",
1375
+ "name": "Hypotension / Shock",
1376
+ "snomed_ct": "45007003",
1377
+ "icd10_range": [
1378
+ "I95",
1379
+ "R57"
1380
+ ],
1381
+ "risk_weight": 4,
1382
+ "charlson": false,
1383
+ "synonyms": [
1384
+ "hypotension",
1385
+ "orthostatic hypotension",
1386
+ "postural hypotension",
1387
+ "shock",
1388
+ "cardiogenic shock",
1389
+ "hypovolemic shock",
1390
+ "distributive shock",
1391
+ "hemorrhagic shock",
1392
+ "vasopressor dependent",
1393
+ "vasovagal",
1394
+ "neurocardiogenic syncope"
1395
+ ]
1396
+ },
1397
+ {
1398
+ "id": "chronic_pain",
1399
+ "name": "Chronic Pain Syndrome",
1400
+ "snomed_ct": "82423001",
1401
+ "icd10_range": [
1402
+ "G89"
1403
+ ],
1404
+ "risk_weight": 2,
1405
+ "charlson": false,
1406
+ "synonyms": [
1407
+ "chronic pain",
1408
+ "chronic back pain",
1409
+ "chronic low back pain",
1410
+ "fibromyalgia",
1411
+ "complex regional pain syndrome",
1412
+ "crps",
1413
+ "neuropathic pain",
1414
+ "chronic headache",
1415
+ "chronic neck pain",
1416
+ "back pain",
1417
+ "low back pain",
1418
+ "neck pain",
1419
+ "joint pain"
1420
+ ]
1421
+ },
1422
+ {
1423
+ "id": "hernia",
1424
+ "name": "Hernia",
1425
+ "snomed_ct": "414403008",
1426
+ "icd10_range": [
1427
+ "K40-K46"
1428
+ ],
1429
+ "risk_weight": 1,
1430
+ "charlson": false,
1431
+ "synonyms": [
1432
+ "hernia",
1433
+ "inguinal hernia",
1434
+ "umbilical hernia",
1435
+ "incisional hernia",
1436
+ "ventral hernia",
1437
+ "hiatal hernia",
1438
+ "femoral hernia",
1439
+ "paraesophageal hernia"
1440
+ ]
1441
+ },
1442
+ {
1443
+ "id": "hematologic_abnormality",
1444
+ "name": "Hematologic Abnormalities",
1445
+ "snomed_ct": "414022008",
1446
+ "icd10_range": [
1447
+ "D70-D77"
1448
+ ],
1449
+ "risk_weight": 2,
1450
+ "charlson": false,
1451
+ "synonyms": [
1452
+ "leukocytosis",
1453
+ "lymphadenopathy",
1454
+ "neutropenia",
1455
+ "agranulocytosis",
1456
+ "lymphopenia",
1457
+ "eosinophilia",
1458
+ "splenomegaly",
1459
+ "lymphoma",
1460
+ "monoclonal gammopathy",
1461
+ "polycythemia",
1462
+ "thrombocytosis",
1463
+ "elevated wbc"
1464
+ ]
1465
+ },
1466
+ {
1467
+ "id": "sleep_insomnia",
1468
+ "name": "Sleep / Insomnia Disorders",
1469
+ "snomed_ct": "193462001",
1470
+ "icd10_range": [
1471
+ "G47"
1472
+ ],
1473
+ "risk_weight": 1,
1474
+ "charlson": false,
1475
+ "synonyms": [
1476
+ "insomnia",
1477
+ "sleep disorder",
1478
+ "restless leg syndrome",
1479
+ "narcolepsy",
1480
+ "parasomnia"
1481
+ ]
1482
+ },
1483
+ {
1484
+ "id": "allergy_immunology",
1485
+ "name": "Allergy / Immunological",
1486
+ "snomed_ct": "419076005",
1487
+ "icd10_range": [
1488
+ "J30",
1489
+ "T78"
1490
+ ],
1491
+ "risk_weight": 1,
1492
+ "charlson": false,
1493
+ "synonyms": [
1494
+ "allergic rhinitis",
1495
+ "allergy",
1496
+ "allergies",
1497
+ "drug allergy",
1498
+ "food allergy",
1499
+ "anaphylaxis",
1500
+ "angioedema",
1501
+ "urticaria",
1502
+ "eczema",
1503
+ "dermatitis",
1504
+ "atopic dermatitis",
1505
+ "contact dermatitis",
1506
+ "psoriasis",
1507
+ "hirsutism",
1508
+ "alopecia areata"
1509
+ ]
1510
+ },
1511
+ {
1512
+ "id": "fever_infection",
1513
+ "name": "Fever / Systemic Infection",
1514
+ "snomed_ct": "386661006",
1515
+ "icd10_range": [
1516
+ "R50"
1517
+ ],
1518
+ "risk_weight": 2,
1519
+ "charlson": false,
1520
+ "synonyms": [
1521
+ "fever",
1522
+ "febrile",
1523
+ "pyrexia",
1524
+ "chills",
1525
+ "rigors",
1526
+ "night sweats",
1527
+ "infection",
1528
+ "viral infection",
1529
+ "bacterial infection",
1530
+ "mumps",
1531
+ "measles",
1532
+ "chickenpox",
1533
+ "shingles",
1534
+ "herpes zoster",
1535
+ "herpes simplex"
1536
+ ]
1537
+ },
1538
+ {
1539
+ "id": "pregnancy_ob",
1540
+ "name": "Pregnancy / Obstetric",
1541
+ "snomed_ct": "77386006",
1542
+ "icd10_range": [
1543
+ "O00-O99"
1544
+ ],
1545
+ "risk_weight": 1,
1546
+ "charlson": false,
1547
+ "synonyms": [
1548
+ "pregnancy",
1549
+ "pregnant",
1550
+ "preeclampsia",
1551
+ "eclampsia",
1552
+ "gestational diabetes",
1553
+ "preterm labor",
1554
+ "cesarean section",
1555
+ "postpartum",
1556
+ "ectopic pregnancy",
1557
+ "miscarriage"
1558
+ ]
1559
+ },
1560
+ {
1561
+ "id": "skin_dermatologic",
1562
+ "name": "Dermatologic / Skin Conditions",
1563
+ "snomed_ct": "95320005",
1564
+ "icd10_range": [
1565
+ "L00-L99"
1566
+ ],
1567
+ "risk_weight": 1,
1568
+ "charlson": false,
1569
+ "synonyms": [
1570
+ "rash",
1571
+ "skin lesion",
1572
+ "pruritus",
1573
+ "alopecia",
1574
+ "acne",
1575
+ "wound",
1576
+ "laceration",
1577
+ "burn",
1578
+ "skin graft",
1579
+ "basal cell carcinoma",
1580
+ "squamous cell carcinoma skin"
1581
+ ]
1582
+ }
1583
+ ]
1584
+ }
Analysis_Readmission/config/symptom_urgency_groups.json ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_meta": {
3
+ "version": "1.0",
4
+ "description": "SNOMED-CT symptom urgency groups for SYMPTOMS cluster. Maps free-text symptoms to clinical urgency categories with readmission risk weights.",
5
+ "notes": [
6
+ "Each group represents a clinical urgency category.",
7
+ "risk_weight (0-5) reflects urgency/impact on readmission.",
8
+ "severity_multiplier: severe=1.5, yes=1.0, no=0.0",
9
+ "Synonyms are lowercase for case-insensitive matching."
10
+ ]
11
+ },
12
+ "groups": [
13
+ {
14
+ "id": "respiratory_distress",
15
+ "name": "Respiratory Distress",
16
+ "snomed_ct": "267036007",
17
+ "risk_weight": 4,
18
+ "synonyms": [
19
+ "dyspnea", "shortness of breath", "sob",
20
+ "difficulty breathing", "respiratory distress",
21
+ "air hunger", "orthopnea",
22
+ "paroxysmal nocturnal dyspnea", "pnd",
23
+ "breathlessness", "tachypnea",
24
+ "labored breathing", "respiratory failure"
25
+ ]
26
+ },
27
+ {
28
+ "id": "cardiac_symptoms",
29
+ "name": "Cardiac Symptoms",
30
+ "snomed_ct": "29857009",
31
+ "risk_weight": 4,
32
+ "synonyms": [
33
+ "chest pain", "chest tightness", "chest pressure",
34
+ "angina", "palpitations", "irregular heartbeat",
35
+ "racing heart", "tachycardia",
36
+ "bradycardia", "heart racing",
37
+ "substernal chest pain", "precordial pain"
38
+ ]
39
+ },
40
+ {
41
+ "id": "neurological_symptoms",
42
+ "name": "Neurological Symptoms",
43
+ "snomed_ct": "102957003",
44
+ "risk_weight": 4,
45
+ "synonyms": [
46
+ "confusion", "altered mental status",
47
+ "disorientation", "lethargy", "obtunded",
48
+ "unresponsive", "syncope", "loss of consciousness",
49
+ "seizure", "convulsion", "tremor",
50
+ "aphasia", "dysarthria", "slurred speech",
51
+ "weakness", "hemiparesis", "hemiplegia",
52
+ "numbness", "tingling", "paresthesia",
53
+ "visual changes", "blurred vision", "blurry vision", "diplopia",
54
+ "facial droop", "delirium", "photophobia",
55
+ "vertigo", "ataxia", "gait instability"
56
+ ]
57
+ },
58
+ {
59
+ "id": "gi_symptoms",
60
+ "name": "GI Symptoms",
61
+ "snomed_ct": "422587007",
62
+ "risk_weight": 2,
63
+ "synonyms": [
64
+ "nausea", "vomiting", "emesis",
65
+ "abdominal pain", "abdominal distension",
66
+ "bloating", "diarrhea", "constipation",
67
+ "melena", "hematochezia", "hematemesis",
68
+ "blood in stool", "rectal bleeding",
69
+ "dysphagia", "difficulty swallowing",
70
+ "anorexia", "loss of appetite",
71
+ "early satiety", "heartburn"
72
+ ]
73
+ },
74
+ {
75
+ "id": "pain",
76
+ "name": "Pain (significant)",
77
+ "snomed_ct": "22253000",
78
+ "risk_weight": 2,
79
+ "synonyms": [
80
+ "pain", "severe pain", "acute pain",
81
+ "chronic pain", "back pain", "flank pain",
82
+ "headache", "migraine",
83
+ "joint pain", "arthralgia", "myalgia",
84
+ "bone pain", "neck pain",
85
+ "pleuritic pain", "pleurisy"
86
+ ]
87
+ },
88
+ {
89
+ "id": "fever_infection",
90
+ "name": "Fever / Infection Signs",
91
+ "snomed_ct": "386661006",
92
+ "risk_weight": 3,
93
+ "synonyms": [
94
+ "fever", "febrile", "chills", "rigors",
95
+ "night sweats", "diaphoresis", "sweats",
96
+ "malaise", "body aches",
97
+ "purulent drainage", "wound drainage"
98
+ ]
99
+ },
100
+ {
101
+ "id": "edema_fluid",
102
+ "name": "Edema / Fluid Overload",
103
+ "snomed_ct": "267038008",
104
+ "risk_weight": 3,
105
+ "synonyms": [
106
+ "edema", "swelling", "peripheral edema",
107
+ "lower extremity edema", "pitting edema",
108
+ "anasarca", "ascites", "fluid overload",
109
+ "weight gain", "pulmonary edema",
110
+ "pleural effusion"
111
+ ]
112
+ },
113
+ {
114
+ "id": "bleeding",
115
+ "name": "Bleeding / Hemorrhage",
116
+ "snomed_ct": "131148009",
117
+ "risk_weight": 4,
118
+ "synonyms": [
119
+ "bleeding", "hemorrhage",
120
+ "epistaxis", "hemoptysis",
121
+ "hematuria", "bruising", "petechiae",
122
+ "ecchymosis", "purpura"
123
+ ]
124
+ },
125
+ {
126
+ "id": "constitutional",
127
+ "name": "Constitutional Symptoms",
128
+ "snomed_ct": "84229001",
129
+ "risk_weight": 2,
130
+ "synonyms": [
131
+ "fatigue", "weakness", "generalized weakness",
132
+ "malaise", "lethargy", "drowsiness",
133
+ "weight loss", "unintentional weight loss",
134
+ "failure to thrive", "deconditioning",
135
+ "functional decline", "decreased appetite",
136
+ "insomnia", "sleep disturbance"
137
+ ]
138
+ },
139
+ {
140
+ "id": "cough_respiratory",
141
+ "name": "Cough / Upper Respiratory",
142
+ "snomed_ct": "49727002",
143
+ "risk_weight": 2,
144
+ "synonyms": [
145
+ "cough", "productive cough", "dry cough",
146
+ "hemoptysis", "wheezing", "stridor",
147
+ "sore throat", "hoarseness",
148
+ "nasal congestion", "rhinorrhea",
149
+ "sputum production",
150
+ "crackles", "rales", "rhonchi",
151
+ "hypoxia", "desaturation"
152
+ ]
153
+ },
154
+ {
155
+ "id": "skin_symptoms",
156
+ "name": "Skin Symptoms",
157
+ "snomed_ct": "95320005",
158
+ "risk_weight": 1,
159
+ "synonyms": [
160
+ "rash", "skin rash", "pruritus", "itching",
161
+ "jaundice", "pallor", "cyanosis",
162
+ "erythema", "urticaria", "hives",
163
+ "wound", "skin lesion", "skin breakdown"
164
+ ]
165
+ },
166
+ {
167
+ "id": "psychiatric_symptoms",
168
+ "name": "Psychiatric Symptoms",
169
+ "snomed_ct": "74732009",
170
+ "risk_weight": 3,
171
+ "synonyms": [
172
+ "anxiety", "agitation", "restlessness",
173
+ "hallucinations", "delusions", "paranoia",
174
+ "suicidal ideation", "self harm",
175
+ "depression", "depressed mood",
176
+ "mania", "hypomania",
177
+ "insomnia", "psychomotor agitation",
178
+ "psychomotor retardation"
179
+ ]
180
+ },
181
+ {
182
+ "id": "dizziness",
183
+ "name": "Dizziness / Presyncope",
184
+ "snomed_ct": "404640003",
185
+ "risk_weight": 2,
186
+ "synonyms": [
187
+ "dizziness", "lightheadedness",
188
+ "presyncope", "near syncope",
189
+ "unsteadiness", "disequilibrium",
190
+ "postural hypotension"
191
+ ]
192
+ },
193
+ {
194
+ "id": "urinary_symptoms",
195
+ "name": "Urinary Symptoms",
196
+ "snomed_ct": "249274008",
197
+ "risk_weight": 1,
198
+ "synonyms": [
199
+ "dysuria", "frequency", "urgency",
200
+ "urinary retention", "incontinence",
201
+ "hematuria", "oliguria", "anuria",
202
+ "polyuria", "nocturia"
203
+ ]
204
+ }
205
+ ]
206
+ }
Analysis_Readmission/readmission_risk_engine.py ADDED
@@ -0,0 +1,1209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """Rule-based 30-day readmission risk classification engine.
3
+
4
+ Reference implementation of the algorithm described in ALGORITHM_DESIGN.md.
5
+
6
+ Input: TOON lines (CLUSTER|Keyword|Value|Timestamp)
7
+ Output: Risk classification + days-to-readmission prediction
8
+
9
+ Usage:
10
+ # From TOON string
11
+ engine = ReadmissionRiskEngine()
12
+ result = engine.score_from_toon(toon_text)
13
+ print(result)
14
+
15
+ # From TOON file
16
+ result = engine.score_from_file("path/to/extraction.txt")
17
+
18
+ # From JSONL training data
19
+ results = engine.score_from_jsonl("dspy_fine_tuning/data/trainset_full.jsonl")
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ import json
25
+ import math
26
+ import re
27
+ from dataclasses import dataclass, field
28
+ from pathlib import Path
29
+ from typing import Any, Dict, List, Optional, Tuple, Union
30
+
31
+
32
+ # ---------------------------------------------------------------------------
33
+ # Data classes
34
+ # ---------------------------------------------------------------------------
35
+
36
+ @dataclass
37
+ class ParsedFact:
38
+ cluster: str
39
+ keyword: str
40
+ value: Union[float, str]
41
+ timestamp: str
42
+ is_numeric: bool
43
+ plausibility_ok: bool = True
44
+
45
+
46
+ @dataclass
47
+ class ClusterScore:
48
+ cluster: str
49
+ score: int
50
+ max_score: int
51
+ contributing_factors: List[str] = field(default_factory=list)
52
+
53
+
54
+ @dataclass
55
+ class InteractionResult:
56
+ pattern_id: str
57
+ pattern_name: str
58
+ bonus: int
59
+ description: str
60
+
61
+
62
+ @dataclass
63
+ class SurvivalCurve:
64
+ """P(readmit by day t) for several horizons."""
65
+ horizons: Dict[int, float] # {7: 0.05, 14: 0.12, 21: 0.18, 30: 0.23}
66
+
67
+
68
+ @dataclass
69
+ class RiskResult:
70
+ # Scores
71
+ composite_score: int
72
+ cluster_scores: Dict[str, ClusterScore]
73
+ interaction_bonus: int
74
+ interactions_triggered: List[InteractionResult]
75
+
76
+ # Risk classification
77
+ probability: float
78
+ risk_category: str # Low / Medium / High / Critical
79
+ risk_color: str
80
+
81
+ # Days prediction
82
+ estimated_days: float
83
+ days_bucket: str # "0-7 days" / "8-14 days" / "15-30 days"
84
+ survival_curve: SurvivalCurve
85
+
86
+ # Explainability
87
+ risk_factors: List[str]
88
+ protective_factors: List[str]
89
+ missing_clusters: List[str]
90
+ data_completeness: float
91
+ confidence: str # high / medium / low
92
+
93
+ # Raw data
94
+ n_facts_parsed: int
95
+ n_facts_dropped: int
96
+
97
+
98
+ # ---------------------------------------------------------------------------
99
+ # Constants
100
+ # ---------------------------------------------------------------------------
101
+
102
+ VALID_CLUSTERS = {
103
+ "DEMOGRAPHICS", "VITALS", "LABS", "PROBLEMS", "SYMPTOMS",
104
+ "MEDICATIONS", "PROCEDURES", "UTILIZATION", "DISPOSITION",
105
+ }
106
+
107
+ NUMERIC_CLUSTERS = {"VITALS", "LABS", "UTILIZATION"}
108
+
109
+ OBJECTIVE_CLUSTERS = {"DEMOGRAPHICS", "VITALS", "LABS", "UTILIZATION", "DISPOSITION"}
110
+
111
+
112
+ # ---------------------------------------------------------------------------
113
+ # Engine
114
+ # ---------------------------------------------------------------------------
115
+
116
+ class ReadmissionRiskEngine:
117
+ """Main entry point for readmission risk scoring."""
118
+
119
+ def __init__(self, config_dir: Optional[Path] = None):
120
+ if config_dir is None:
121
+ config_dir = Path(__file__).parent / "config"
122
+ self._config_dir = config_dir
123
+ self._scoring_rules = self._load_json("scoring_rules.json")
124
+ self._problem_groups = self._load_json("snomed_problem_groups.json")["groups"]
125
+ self._symptom_groups = self._load_json("symptom_urgency_groups.json")["groups"]
126
+
127
+ # Build lookup indexes
128
+ self._problem_synonym_index = self._build_synonym_index(self._problem_groups)
129
+ self._symptom_synonym_index = self._build_synonym_index(self._symptom_groups)
130
+
131
+ # Calibration parameters
132
+ cal = self._scoring_rules["_meta"]["calibration"]
133
+ self._alpha = cal["alpha"]
134
+ self._beta = cal["beta"]
135
+
136
+ # Days prediction parameters
137
+ days_cfg = self._scoring_rules["DAYS_PREDICTION"]["models"]
138
+ reg = days_cfg["regression"]["parameters"]
139
+ self._d_max = reg["D_max"]
140
+ self._gamma = reg["gamma"]
141
+
142
+ surv = days_cfg["survival"]["parameters"]
143
+ self._k_base = surv["k_base"]
144
+
145
+ # -- Loading helpers ----------------------------------------------------
146
+
147
+ def _load_json(self, filename: str) -> Dict[str, Any]:
148
+ p = self._config_dir / filename
149
+ return json.loads(p.read_text(encoding="utf-8"))
150
+
151
+ @staticmethod
152
+ def _build_synonym_index(groups: List[Dict]) -> Dict[str, str]:
153
+ """Map lowercase synonym → group id."""
154
+ idx: Dict[str, str] = {}
155
+ for g in groups:
156
+ gid = g["id"]
157
+ for syn in g.get("synonyms", []):
158
+ key = syn.strip().lower()
159
+ if key not in idx:
160
+ idx[key] = gid
161
+ return idx
162
+
163
+ def _match_to_group(
164
+ self,
165
+ keyword: str,
166
+ synonym_index: Dict[str, str],
167
+ groups: List[Dict],
168
+ ) -> Optional[Dict]:
169
+ """Smart matching: exact > word-boundary substring > raw substring.
170
+
171
+ Avoids false matches like 'tia' in 'essential' by preferring
172
+ word-boundary matches and longer synonyms.
173
+ """
174
+ kw_lower = keyword.strip().lower()
175
+
176
+ # 1) Exact match (full keyword == synonym)
177
+ gid = synonym_index.get(kw_lower)
178
+ if gid:
179
+ return self._group_by_id(groups, gid)
180
+
181
+ # Tokenize keyword into words for word-boundary matching
182
+ kw_words = set(re.split(r"[\s,;/\-()]+", kw_lower))
183
+
184
+ # 2) Word-boundary match: synonym is a whole word within the keyword
185
+ # OR keyword starts/ends with the synonym as a distinct token
186
+ best_wb_match: Optional[str] = None
187
+ best_wb_len = 0
188
+
189
+ # 3) Raw substring match (fallback, requires min 4 chars to avoid noise)
190
+ best_sub_match: Optional[str] = None
191
+ best_sub_len = 0
192
+
193
+ for syn, gid in synonym_index.items():
194
+ if syn not in kw_lower:
195
+ continue
196
+
197
+ # Check if it's a word-boundary match
198
+ is_word_match = (
199
+ syn in kw_words # exact word token
200
+ or kw_lower.startswith(syn + " ")
201
+ or kw_lower.endswith(" " + syn)
202
+ or (" " + syn + " ") in kw_lower
203
+ )
204
+
205
+ if is_word_match and len(syn) > best_wb_len:
206
+ best_wb_match = gid
207
+ best_wb_len = len(syn)
208
+ elif not is_word_match and len(syn) >= 4 and len(syn) > best_sub_len:
209
+ # Only use raw substring for synonyms >= 4 chars
210
+ best_sub_match = gid
211
+ best_sub_len = len(syn)
212
+
213
+ # Prefer word-boundary matches over raw substring
214
+ chosen = best_wb_match or best_sub_match
215
+ if chosen:
216
+ return self._group_by_id(groups, chosen)
217
+
218
+ return None
219
+
220
+ # -- Layer 1: Parser & Normalizer ----------------------------------------
221
+
222
+ @staticmethod
223
+ def _try_parse_float(value: str) -> Optional[float]:
224
+ """Best-effort numeric parse.
225
+
226
+ Stage2 should emit numeric-only values for numeric fields, but in practice
227
+ we sometimes see light decoration like '3 days'. For scoring purposes we
228
+ accept the first numeric token, but we avoid parsing ratios like '120/80'.
229
+ """
230
+ s = (value or "").strip()
231
+ if not s:
232
+ return None
233
+ # Avoid BP-style ratios and similar formats.
234
+ if "/" in s:
235
+ return None
236
+ # Fast path: pure float
237
+ try:
238
+ return float(s)
239
+ except Exception:
240
+ pass
241
+ # Fallback: extract first numeric token
242
+ m = re.search(r"[-+]?\d+(?:\.\d+)?", s)
243
+ if not m:
244
+ return None
245
+ try:
246
+ return float(m.group(0))
247
+ except Exception:
248
+ return None
249
+
250
+ @staticmethod
251
+ def _split_semantic_items(value: str, *, limit: int = 20) -> List[str]:
252
+ """Split a semicolon/comma/newline separated list into normalized items."""
253
+ raw = (value or "").strip()
254
+ if not raw:
255
+ return []
256
+ parts: List[str] = []
257
+ for seg in re.split(r"[;\n]+", raw):
258
+ seg = seg.strip()
259
+ if not seg:
260
+ continue
261
+ for item in seg.split(","):
262
+ it = " ".join(item.strip().split())
263
+ if not it:
264
+ continue
265
+ parts.append(it.strip(" -"))
266
+ if len(parts) >= limit:
267
+ break
268
+ if len(parts) >= limit:
269
+ break
270
+ # Dedup while preserving order.
271
+ out: List[str] = []
272
+ seen: set[str] = set()
273
+ for it in parts:
274
+ k = it.casefold()
275
+ if k in seen:
276
+ continue
277
+ seen.add(k)
278
+ out.append(it)
279
+ return out
280
+
281
+ @staticmethod
282
+ def _strip_prefix(keyword: str, prefixes: List[str]) -> str:
283
+ k = (keyword or "").strip()
284
+ k_cf = k.casefold()
285
+ for p in prefixes:
286
+ p_cf = p.casefold()
287
+ if k_cf.startswith(p_cf):
288
+ k = k[len(p) :].strip()
289
+ k_cf = k.casefold()
290
+ return k
291
+
292
+ @staticmethod
293
+ def _normalize_discharge_disposition(value: str) -> str:
294
+ """Normalize common discharge disposition variants to the scoring allowlist."""
295
+ v = (value or "").strip()
296
+ v_cf = v.casefold()
297
+ if not v:
298
+ return v
299
+ # Canonical allowlist (scoring_rules.json): Home, Home with Services, Rehab, SNF, LTAC, Hospice, AMA
300
+ if v_cf in {"home with service", "home w service", "home with svc", "home w/ service"}:
301
+ return "Home with Services"
302
+ if v_cf in {"home with services", "home w services", "home w/ services", "home health", "home health care"}:
303
+ return "Home with Services"
304
+ if v_cf in {"hospice residence", "hospice care"}:
305
+ return "Hospice"
306
+ return v
307
+
308
+ @staticmethod
309
+ def _normalize_mental_status(value: str) -> str:
310
+ v = (value or "").strip()
311
+ v_cf = v.casefold()
312
+ if not v:
313
+ return v
314
+ if "alert" in v_cf and "orient" in v_cf:
315
+ return "alert"
316
+ if v_cf in {"a&o", "ao", "a/ox3", "a/ox4"}:
317
+ return "alert"
318
+ return v
319
+
320
+ def parse_toon(self, toon_text: str) -> Tuple[Dict[str, List[ParsedFact]], int, int]:
321
+ """Parse TOON text into structured facts.
322
+
323
+ Returns (facts_by_cluster, n_parsed, n_dropped).
324
+ """
325
+ facts: Dict[str, List[ParsedFact]] = {}
326
+ n_parsed = 0
327
+ n_dropped = 0
328
+ seen_objective: set = set()
329
+
330
+ for raw_line in toon_text.strip().splitlines():
331
+ line = raw_line.strip()
332
+ if not line or line.startswith("#"):
333
+ continue
334
+
335
+ parts = line.split("|")
336
+ if len(parts) != 4:
337
+ n_dropped += 1
338
+ continue
339
+
340
+ cluster, keyword, value, timestamp = (p.strip() for p in parts)
341
+
342
+ if cluster not in VALID_CLUSTERS:
343
+ n_dropped += 1
344
+ continue
345
+
346
+ # Strip common semantic prefixes embedded in the keyword.
347
+ if cluster == "PROBLEMS":
348
+ keyword = self._strip_prefix(keyword, ["PMH:", "PMH/Comorbidities:", "Discharge Dx:", "Working Dx:", "Complication:", "Complications:"])
349
+ elif cluster == "SYMPTOMS":
350
+ keyword = self._strip_prefix(keyword, ["ADM:", "DC:"])
351
+
352
+ # Expand common Stage2 aggregate semantic lines into per-item facts.
353
+ # This makes the scorer robust to model drift like:
354
+ # PROBLEMS|Discharge Dx|CHF; COPD|Discharge
355
+ # instead of emitting one line per diagnosis.
356
+ if cluster == "PROBLEMS":
357
+ kw_cf = keyword.strip().casefold()
358
+ acute_keys = {"discharge dx", "working dx", "complication", "complications"}
359
+ chronic_keys = {"pmh/comorbidities", "pmh", "comorbidities", "past medical history"}
360
+ items = self._split_semantic_items(value)
361
+ if kw_cf in acute_keys and items:
362
+ for it in items:
363
+ fact = ParsedFact(
364
+ cluster="PROBLEMS",
365
+ keyword=it,
366
+ value="acute",
367
+ timestamp="Discharge",
368
+ is_numeric=False,
369
+ plausibility_ok=True,
370
+ )
371
+ facts.setdefault("PROBLEMS", []).append(fact)
372
+ n_parsed += 1
373
+ continue
374
+ if kw_cf in chronic_keys and items:
375
+ for it in items:
376
+ fact = ParsedFact(
377
+ cluster="PROBLEMS",
378
+ keyword=it,
379
+ value="chronic",
380
+ timestamp="Past",
381
+ is_numeric=False,
382
+ plausibility_ok=True,
383
+ )
384
+ facts.setdefault("PROBLEMS", []).append(fact)
385
+ n_parsed += 1
386
+ continue
387
+
388
+ # Numeric parsing:
389
+ # - Strictly numeric clusters MUST parse (else drop).
390
+ # - Non-numeric clusters may still have numeric keywords (e.g. MEDICATIONS Medication Count,
391
+ # PROCEDURES Mechanical Ventilation days). Those should parse so scoring rules apply.
392
+ is_numeric = False
393
+ parsed_value: Union[float, str] = value
394
+
395
+ kw_rules = self._scoring_rules.get(cluster, {}).get("keywords", {}).get(keyword, {})
396
+ kw_type = kw_rules.get("type") if isinstance(kw_rules, dict) else None
397
+
398
+ if cluster in NUMERIC_CLUSTERS:
399
+ v = self._try_parse_float(value)
400
+ if v is None:
401
+ n_dropped += 1
402
+ continue
403
+ parsed_value = v
404
+ is_numeric = True
405
+ elif kw_type == "range":
406
+ v = self._try_parse_float(value)
407
+ if v is None:
408
+ n_dropped += 1
409
+ continue
410
+ parsed_value = v
411
+ is_numeric = True
412
+ elif kw_type == "mixed":
413
+ # Mixed: numeric is optional; keep as string if parsing fails.
414
+ v = self._try_parse_float(value)
415
+ if v is not None:
416
+ parsed_value = v
417
+ is_numeric = True
418
+
419
+ # Plausibility check
420
+ plausibility_ok = True
421
+ if is_numeric:
422
+ plausibility_ok = self._check_plausibility(cluster, keyword, parsed_value)
423
+
424
+ # Dedup for objective clusters
425
+ if cluster in OBJECTIVE_CLUSTERS:
426
+ key = (cluster, keyword)
427
+ if key in seen_objective:
428
+ # Keep the one with better timestamp
429
+ n_dropped += 1
430
+ continue
431
+ seen_objective.add(key)
432
+
433
+ fact = ParsedFact(
434
+ cluster=cluster,
435
+ keyword=keyword,
436
+ value=parsed_value,
437
+ timestamp=timestamp,
438
+ is_numeric=is_numeric,
439
+ plausibility_ok=plausibility_ok,
440
+ )
441
+ facts.setdefault(cluster, []).append(fact)
442
+ n_parsed += 1
443
+
444
+ return facts, n_parsed, n_dropped
445
+
446
+ def _check_plausibility(self, cluster: str, keyword: str, value: float) -> bool:
447
+ cluster_rules = self._scoring_rules.get(cluster, {}).get("keywords", {})
448
+ kw_rules = cluster_rules.get(keyword, {})
449
+ plaus = kw_rules.get("plausibility")
450
+ if plaus:
451
+ return plaus["min"] <= value <= plaus["max"]
452
+ return True
453
+
454
+ # -- Layer 2: Concept Mapper --------------------------------------------
455
+
456
+ def map_problem_to_group(self, keyword: str) -> Optional[Dict]:
457
+ """Map a PROBLEMS keyword to a SNOMED concept group."""
458
+ return self._match_to_group(keyword, self._problem_synonym_index, self._problem_groups)
459
+
460
+ def map_symptom_to_group(self, keyword: str) -> Optional[Dict]:
461
+ """Map a SYMPTOMS keyword to an urgency group."""
462
+ return self._match_to_group(keyword, self._symptom_synonym_index, self._symptom_groups)
463
+
464
+ @staticmethod
465
+ def _group_by_id(groups: List[Dict], gid: str) -> Optional[Dict]:
466
+ for g in groups:
467
+ if g["id"] == gid:
468
+ return g
469
+ return None
470
+
471
+ # -- Layer 3: Cluster Scorers -------------------------------------------
472
+
473
+ def _score_range_keyword(self, rules: Dict, value: float) -> Tuple[int, str]:
474
+ """Score a numeric value using range rules. Returns (score, label)."""
475
+ for r in rules.get("ranges", []):
476
+ if r["min"] <= value <= r["max"]:
477
+ return r["score"], r.get("label", "")
478
+ return 0, ""
479
+
480
+ def score_demographics(self, facts: List[ParsedFact]) -> ClusterScore:
481
+ rules = self._scoring_rules["DEMOGRAPHICS"]["keywords"]
482
+ score = 0
483
+ factors: List[str] = []
484
+
485
+ age_found = False
486
+ for f in facts:
487
+ if f.keyword == "Age" and f.is_numeric:
488
+ age_found = True
489
+ pts, label = self._score_range_keyword(rules["Age"], f.value)
490
+ score += pts
491
+ if pts > 0:
492
+ factors.append(f"Age {int(f.value)} ({label}, +{pts})")
493
+ elif f.keyword == "Sex":
494
+ val = str(f.value).lower()
495
+ pts = rules["Sex"]["values"].get(val, 0)
496
+ score += pts
497
+ if pts > 0:
498
+ factors.append(f"Sex={val} (+{pts})")
499
+
500
+ if not age_found:
501
+ default = rules["Age"].get("missing_score", 2)
502
+ score += default
503
+ factors.append(f"Age missing (default +{default})")
504
+
505
+ return ClusterScore("DEMOGRAPHICS", score, 10, factors)
506
+
507
+ def score_vitals(self, facts: List[ParsedFact]) -> ClusterScore:
508
+ rules = self._scoring_rules["VITALS"]["keywords"]
509
+ score = 0
510
+ factors: List[str] = []
511
+
512
+ for f in facts:
513
+ if not f.is_numeric or not f.plausibility_ok:
514
+ continue
515
+ kw_rules = rules.get(f.keyword)
516
+ if not kw_rules or kw_rules.get("type") == "no_direct_score":
517
+ continue
518
+ pts, label = self._score_range_keyword(kw_rules, f.value)
519
+ score += pts
520
+ if pts > 0:
521
+ factors.append(f"{f.keyword}={f.value} ({label}, +{pts})")
522
+
523
+ return ClusterScore("VITALS", score, 25, factors)
524
+
525
+ def score_labs(self, facts: List[ParsedFact]) -> ClusterScore:
526
+ rules = self._scoring_rules["LABS"]["keywords"]
527
+ score = 0
528
+ factors: List[str] = []
529
+
530
+ for f in facts:
531
+ if not f.is_numeric or not f.plausibility_ok:
532
+ continue
533
+ kw_rules = rules.get(f.keyword)
534
+ if not kw_rules:
535
+ continue
536
+ pts, label = self._score_range_keyword(kw_rules, f.value)
537
+ score += pts
538
+ if pts > 0:
539
+ factors.append(f"{f.keyword}={f.value} ({label}, +{pts})")
540
+
541
+ return ClusterScore("LABS", score, 30, factors)
542
+
543
+ def score_problems(self, facts: List[ParsedFact]) -> ClusterScore:
544
+ score = 0
545
+ factors: List[str] = []
546
+ active_groups: Dict[str, int] = {} # group_id -> max weight
547
+
548
+ include_values = {"chronic", "acute", "exist"}
549
+
550
+ for f in facts:
551
+ val = str(f.value).lower().strip()
552
+ if val not in include_values:
553
+ continue
554
+
555
+ group = self.map_problem_to_group(f.keyword)
556
+ if group:
557
+ gid = group["id"]
558
+ w = group["risk_weight"]
559
+ if gid not in active_groups or w > active_groups[gid]:
560
+ active_groups[gid] = w
561
+ factors.append(f"{f.keyword} → {group['name']} (weight {w})")
562
+
563
+ base_score = sum(active_groups.values())
564
+
565
+ # Multimorbidity bonus
566
+ n_groups = len(active_groups)
567
+ mm_bonus = 0
568
+ if n_groups > 3:
569
+ mm_bonus = min(n_groups - 3, 5)
570
+ factors.append(f"Multimorbidity: {n_groups} groups (+{mm_bonus})")
571
+
572
+ score = min(base_score + mm_bonus, 40)
573
+ return ClusterScore("PROBLEMS", score, 40, factors)
574
+
575
+ def score_symptoms(self, facts: List[ParsedFact]) -> ClusterScore:
576
+ sev_mult = {"severe": 1.5, "yes": 1.0, "no": 0.0}
577
+ score = 0.0
578
+ factors: List[str] = []
579
+ active_groups: Dict[str, float] = {}
580
+ active_count = 0
581
+
582
+ for f in facts:
583
+ val = str(f.value).lower().strip()
584
+ mult = sev_mult.get(val, 0.0)
585
+ if mult == 0.0:
586
+ continue
587
+
588
+ active_count += 1
589
+ group = self.map_symptom_to_group(f.keyword)
590
+ if group:
591
+ gid = group["id"]
592
+ w = group["risk_weight"] * mult
593
+ if gid not in active_groups or w > active_groups[gid]:
594
+ active_groups[gid] = w
595
+ factors.append(f"{f.keyword}={val} → {group['name']} (+{w:.1f})")
596
+
597
+ base_score = sum(active_groups.values())
598
+
599
+ # Active symptom count bonus
600
+ bonus = 0
601
+ if active_count > 3:
602
+ bonus = 2
603
+ factors.append(f"Active symptoms: {active_count} (>3, +2)")
604
+
605
+ score = min(int(round(base_score + bonus)), 15)
606
+ return ClusterScore("SYMPTOMS", score, 15, factors)
607
+
608
+ def score_medications(self, facts: List[ParsedFact]) -> ClusterScore:
609
+ rules = self._scoring_rules["MEDICATIONS"]["keywords"]
610
+ score = 0
611
+ factors: List[str] = []
612
+ med_count_val: Optional[float] = None
613
+
614
+ for f in facts:
615
+ kw_rules = rules.get(f.keyword)
616
+ if not kw_rules:
617
+ continue
618
+
619
+ if kw_rules["type"] == "range" and f.is_numeric:
620
+ pts, label = self._score_range_keyword(kw_rules, f.value)
621
+ score += pts
622
+ if f.keyword == "Medication Count":
623
+ med_count_val = f.value
624
+ if pts > 0:
625
+ factors.append(f"{f.keyword}={f.value} ({label}, +{pts})")
626
+
627
+ elif kw_rules["type"] == "categorical":
628
+ val = str(f.value).lower().strip()
629
+ pts = kw_rules["values"].get(val, 0)
630
+ score += pts
631
+ if pts > 0:
632
+ factors.append(f"{f.keyword}={val} (+{pts})")
633
+
634
+ # Derived polypharmacy: if med_count >= 5 and Polypharmacy not already scored
635
+ polypharmacy_scored = any("Polypharmacy" in f for f in factors)
636
+ if med_count_val is not None and med_count_val >= 5 and not polypharmacy_scored:
637
+ score += 3
638
+ factors.append(f"Derived Polypharmacy (Med Count={int(med_count_val)} >=5, +3)")
639
+
640
+ return ClusterScore("MEDICATIONS", min(score, 15), 15, factors)
641
+
642
+ def score_procedures(self, facts: List[ParsedFact]) -> ClusterScore:
643
+ rules = self._scoring_rules["PROCEDURES"]["keywords"]
644
+ score = 0
645
+ factors: List[str] = []
646
+ specific_scored = False
647
+
648
+ for f in facts:
649
+ kw_rules = rules.get(f.keyword)
650
+ if not kw_rules:
651
+ continue
652
+
653
+ if f.keyword == "Mechanical Ventilation":
654
+ # Mixed type: numeric > 0 or categorical
655
+ if f.is_numeric and f.value > 0:
656
+ score += kw_rules["score_if_any_positive"]
657
+ factors.append(f"Mechanical Ventilation={f.value} days (+{kw_rules['score_if_any_positive']})")
658
+ specific_scored = True
659
+ elif str(f.value).lower().strip() != "no":
660
+ score += kw_rules["score_if_any_positive"]
661
+ factors.append(f"Mechanical Ventilation={f.value} (+{kw_rules['score_if_any_positive']})")
662
+ specific_scored = True
663
+
664
+ elif f.keyword == "Dialysis":
665
+ val = str(f.value).lower().strip()
666
+ pts = kw_rules["values"].get(val, 0)
667
+ score += pts
668
+ if pts > 0:
669
+ factors.append(f"Dialysis={val} (+{pts})")
670
+ specific_scored = True
671
+
672
+ elif f.keyword == "Surgery":
673
+ val = str(f.value).lower().strip()
674
+ pts = kw_rules["values"].get(val, 0)
675
+ score += pts
676
+ if pts > 0:
677
+ factors.append(f"Surgery={val} (+{pts})")
678
+ specific_scored = True
679
+
680
+ elif f.keyword == "Any Procedure":
681
+ # Only score if no specific procedure was scored
682
+ pass # handled below
683
+
684
+ # Fallback: Any Procedure
685
+ if not specific_scored:
686
+ for f in facts:
687
+ if f.keyword == "Any Procedure":
688
+ val = str(f.value).lower().strip()
689
+ pts = rules["Any Procedure"]["values"].get(val, 0)
690
+ score += pts
691
+ if pts > 0:
692
+ factors.append(f"Any Procedure={val} (generic fallback, +{pts})")
693
+ break
694
+
695
+ return ClusterScore("PROCEDURES", min(score, 15), 15, factors)
696
+
697
+ def score_utilization(self, facts: List[ParsedFact]) -> ClusterScore:
698
+ rules = self._scoring_rules["UTILIZATION"]["keywords"]
699
+ score = 0
700
+ factors: List[str] = []
701
+
702
+ for f in facts:
703
+ if not f.is_numeric:
704
+ continue
705
+ kw_rules = rules.get(f.keyword)
706
+ if not kw_rules:
707
+ continue
708
+ pts, label = self._score_range_keyword(kw_rules, f.value)
709
+ score += pts
710
+ if pts > 0:
711
+ factors.append(f"{f.keyword}={f.value} ({label}, +{pts})")
712
+
713
+ return ClusterScore("UTILIZATION", min(score, 20), 20, factors)
714
+
715
+ def score_disposition(self, facts: List[ParsedFact]) -> ClusterScore:
716
+ rules = self._scoring_rules["DISPOSITION"]["keywords"]
717
+ score = 0
718
+ factors: List[str] = []
719
+
720
+ for f in facts:
721
+ kw_rules = rules.get(f.keyword)
722
+ if not kw_rules:
723
+ continue
724
+ val = str(f.value).strip()
725
+ if f.keyword == "Discharge Disposition":
726
+ val = self._normalize_discharge_disposition(val)
727
+ elif f.keyword == "Mental Status":
728
+ val = self._normalize_mental_status(val)
729
+ # Try exact match first, then case-insensitive
730
+ pts = kw_rules["values"].get(val, kw_rules["values"].get(val.lower(), 0))
731
+ score += pts
732
+ if pts > 0:
733
+ factors.append(f"{f.keyword}={val} (+{pts})")
734
+
735
+ return ClusterScore("DISPOSITION", min(score, 15), 15, factors)
736
+
737
+ # -- Layer 4: Pattern Detector ------------------------------------------
738
+
739
+ def detect_interactions(
740
+ self,
741
+ facts: Dict[str, List[ParsedFact]],
742
+ cluster_scores: Dict[str, ClusterScore],
743
+ ) -> List[InteractionResult]:
744
+ """Detect cross-cluster clinical patterns."""
745
+ results: List[InteractionResult] = []
746
+
747
+ # Helper: get numeric value for a cluster/keyword
748
+ def get_val(cluster: str, keyword: str) -> Optional[float]:
749
+ for f in facts.get(cluster, []):
750
+ if f.keyword == keyword and f.is_numeric:
751
+ return f.value
752
+ return None
753
+
754
+ def get_str(cluster: str, keyword: str) -> Optional[str]:
755
+ for f in facts.get(cluster, []):
756
+ if f.keyword == keyword:
757
+ return str(f.value).lower().strip()
758
+ return None
759
+
760
+ def has_symptom_group(group_id: str) -> bool:
761
+ for f in facts.get("SYMPTOMS", []):
762
+ val = str(f.value).lower().strip()
763
+ if val in ("yes", "severe"):
764
+ g = self.map_symptom_to_group(f.keyword)
765
+ if g and g["id"] == group_id:
766
+ return True
767
+ return False
768
+
769
+ def has_problem_group(group_id: str) -> bool:
770
+ for f in facts.get("PROBLEMS", []):
771
+ val = str(f.value).lower().strip()
772
+ if val in ("chronic", "acute", "exist"):
773
+ g = self.map_problem_to_group(f.keyword)
774
+ if g and g["id"] == group_id:
775
+ return True
776
+ return False
777
+
778
+ # --- Sepsis Pattern ---
779
+ hr = get_val("VITALS", "Heart Rate")
780
+ sbp = get_val("VITALS", "Systolic BP")
781
+ rr = get_val("VITALS", "Respiratory Rate")
782
+ wbc = get_val("LABS", "WBC")
783
+ temp = get_val("VITALS", "Temperature")
784
+
785
+ if hr is not None and hr > 100:
786
+ has_hemodynamic = (sbp is not None and sbp < 100) or (rr is not None and rr > 22)
787
+ has_infection = (
788
+ (wbc is not None and (wbc > 12 or wbc < 4))
789
+ or (temp is not None and temp > 100.4)
790
+ )
791
+ if has_hemodynamic and has_infection:
792
+ results.append(InteractionResult(
793
+ "sepsis_pattern", "Sepsis / SIRS Pattern", 10,
794
+ f"HR={hr}, SBP={sbp}, RR={rr}, WBC={wbc}, Temp={temp}",
795
+ ))
796
+
797
+ # --- AKI Pattern ---
798
+ cr = get_val("LABS", "Creatinine")
799
+ bun = get_val("LABS", "BUN")
800
+ k = get_val("LABS", "Potassium")
801
+ na = get_val("LABS", "Sodium")
802
+ bicarb = get_val("LABS", "Bicarbonate")
803
+
804
+ if cr is not None and cr > 1.5 and bun is not None and bun > 30:
805
+ has_electrolyte = (
806
+ (k is not None and k > 5.0)
807
+ or (na is not None and na < 135)
808
+ or (bicarb is not None and bicarb < 22)
809
+ )
810
+ if has_electrolyte:
811
+ results.append(InteractionResult(
812
+ "aki_pattern", "Acute Kidney Injury Pattern", 8,
813
+ f"Cr={cr}, BUN={bun}, K={k}, Na={na}, Bicarb={bicarb}",
814
+ ))
815
+
816
+ # --- Decompensated HF ---
817
+ if has_problem_group("heart_failure"):
818
+ has_decomp_sign = (
819
+ has_symptom_group("edema_fluid")
820
+ or has_symptom_group("respiratory_distress")
821
+ or (bun is not None and bun > 40)
822
+ )
823
+ if has_decomp_sign:
824
+ results.append(InteractionResult(
825
+ "decompensated_hf", "Decompensated Heart Failure", 8,
826
+ "Heart failure + fluid overload/dyspnea/elevated BUN",
827
+ ))
828
+
829
+ # --- Frailty Syndrome ---
830
+ age = get_val("DEMOGRAPHICS", "Age")
831
+ hgb = get_val("LABS", "Hemoglobin")
832
+ mental = get_str("DISPOSITION", "Mental Status")
833
+ disp = get_str("DISPOSITION", "Discharge Disposition")
834
+ n_problem_groups = len(set(
835
+ self.map_problem_to_group(f.keyword)["id"]
836
+ for f in facts.get("PROBLEMS", [])
837
+ if str(f.value).lower().strip() in ("chronic", "acute", "exist")
838
+ and self.map_problem_to_group(f.keyword) is not None
839
+ ))
840
+
841
+ if age is not None and age > 75:
842
+ frailty_count = 0
843
+ if n_problem_groups >= 3:
844
+ frailty_count += 1
845
+ if hgb is not None and hgb < 10:
846
+ frailty_count += 1
847
+ if mental in ("confused", "lethargic"):
848
+ frailty_count += 1
849
+ if disp in ("snf", "ltac", "rehab"):
850
+ frailty_count += 1
851
+ if frailty_count >= 2:
852
+ results.append(InteractionResult(
853
+ "frailty_syndrome", "Frailty Syndrome", 6,
854
+ f"Age={age}, problems={n_problem_groups}, Hgb={hgb}, mental={mental}, disp={disp}",
855
+ ))
856
+
857
+ # --- Unstable Discharge ---
858
+ if disp == "ama":
859
+ results.append(InteractionResult(
860
+ "unstable_discharge", "Unstable Discharge (AMA)", 5,
861
+ "Discharge Against Medical Advice",
862
+ ))
863
+ elif mental in ("confused", "lethargic") and disp in ("home", None):
864
+ results.append(InteractionResult(
865
+ "unstable_discharge", "Unstable Discharge (altered + Home)", 5,
866
+ f"Mental={mental}, Disposition={disp}",
867
+ ))
868
+
869
+ # --- Respiratory Failure ---
870
+ spo2 = get_val("VITALS", "SpO2")
871
+ if spo2 is not None and spo2 < 92:
872
+ has_resp = (rr is not None and rr > 24) or has_symptom_group("respiratory_distress")
873
+ if has_resp:
874
+ results.append(InteractionResult(
875
+ "respiratory_failure", "Respiratory Failure Pattern", 6,
876
+ f"SpO2={spo2}, RR={rr}",
877
+ ))
878
+
879
+ # --- Metabolic Crisis ---
880
+ glucose = get_val("LABS", "Glucose")
881
+ if glucose is not None and glucose > 300:
882
+ has_metabolic = (
883
+ (bicarb is not None and bicarb < 18)
884
+ or (k is not None and k > 5.5)
885
+ )
886
+ if has_metabolic:
887
+ results.append(InteractionResult(
888
+ "metabolic_crisis", "Metabolic Crisis (DKA/HHS)", 6,
889
+ f"Glucose={glucose}, Bicarb={bicarb}, K={k}",
890
+ ))
891
+
892
+ # --- Bleeding Risk ---
893
+ plt = get_val("LABS", "Platelet")
894
+ anticoag = get_str("MEDICATIONS", "Anticoagulation")
895
+ if hgb is not None and hgb < 8:
896
+ has_bleed_risk = (
897
+ (plt is not None and plt < 100)
898
+ or anticoag == "yes"
899
+ )
900
+ if has_bleed_risk:
901
+ results.append(InteractionResult(
902
+ "bleeding_risk", "Active Bleeding Risk", 6,
903
+ f"Hgb={hgb}, Plt={plt}, Anticoag={anticoag}",
904
+ ))
905
+
906
+ return results
907
+
908
+ # -- Layer 5: Risk Aggregator -------------------------------------------
909
+
910
+ def _logistic(self, score: int) -> float:
911
+ """Convert composite score to probability via logistic function."""
912
+ z = self._alpha + self._beta * score
913
+ return 1.0 / (1.0 + math.exp(-z))
914
+
915
+ def _classify_risk(self, score: int) -> Tuple[str, str]:
916
+ """Return (category, color) for a given composite score."""
917
+ for cat in self._scoring_rules["_meta"]["risk_categories"]:
918
+ if cat["score_min"] <= score <= cat["score_max"]:
919
+ return cat["name"], cat["color"]
920
+ return "Critical", "red"
921
+
922
+ # -- Layer 6: Days Predictor --------------------------------------------
923
+
924
+ def _predict_days(self, score: int) -> float:
925
+ """Estimate days to readmission (point estimate)."""
926
+ return max(1.0, self._d_max * math.exp(-self._gamma * score))
927
+
928
+ def _predict_bucket(self, estimated_days: float) -> str:
929
+ if estimated_days <= 7:
930
+ return "0-7 days"
931
+ elif estimated_days <= 14:
932
+ return "8-14 days"
933
+ else:
934
+ return "15-30 days"
935
+
936
+ def _predict_survival(self, score: int, p_30d: float) -> SurvivalCurve:
937
+ """Compute P(readmit by day t) for several horizons."""
938
+ k = self._k_base + 0.02 * (score - 30)
939
+ k = max(0.5, k) # floor to avoid degenerate cases
940
+
941
+ horizons: Dict[int, float] = {}
942
+ denom = 1.0 - math.exp(-k)
943
+ if abs(denom) < 1e-9:
944
+ denom = 1e-9
945
+
946
+ for t in [7, 14, 21, 30]:
947
+ f_t = (1.0 - math.exp(-(t / 30.0) * k)) / denom
948
+ p_t = p_30d * f_t
949
+ horizons[t] = round(min(max(p_t, 0.0), 1.0), 4)
950
+
951
+ return SurvivalCurve(horizons=horizons)
952
+
953
+ # -- Main Scoring Pipeline -----------------------------------------------
954
+
955
+ def score(self, facts: Dict[str, List[ParsedFact]], n_parsed: int = 0, n_dropped: int = 0) -> RiskResult:
956
+ """Run full scoring pipeline on parsed facts."""
957
+
958
+ # Layer 3: Cluster scores
959
+ cluster_scores: Dict[str, ClusterScore] = {}
960
+ cluster_scores["DEMOGRAPHICS"] = self.score_demographics(facts.get("DEMOGRAPHICS", []))
961
+ cluster_scores["VITALS"] = self.score_vitals(facts.get("VITALS", []))
962
+ cluster_scores["LABS"] = self.score_labs(facts.get("LABS", []))
963
+ cluster_scores["PROBLEMS"] = self.score_problems(facts.get("PROBLEMS", []))
964
+ cluster_scores["SYMPTOMS"] = self.score_symptoms(facts.get("SYMPTOMS", []))
965
+ cluster_scores["MEDICATIONS"] = self.score_medications(facts.get("MEDICATIONS", []))
966
+ cluster_scores["PROCEDURES"] = self.score_procedures(facts.get("PROCEDURES", []))
967
+ cluster_scores["UTILIZATION"] = self.score_utilization(facts.get("UTILIZATION", []))
968
+ cluster_scores["DISPOSITION"] = self.score_disposition(facts.get("DISPOSITION", []))
969
+
970
+ # Layer 4: Interaction detection
971
+ interactions = self.detect_interactions(facts, cluster_scores)
972
+ interaction_bonus = sum(i.bonus for i in interactions)
973
+
974
+ # Layer 5: Aggregate
975
+ composite = sum(cs.score for cs in cluster_scores.values()) + interaction_bonus
976
+ probability = self._logistic(composite)
977
+ category, color = self._classify_risk(composite)
978
+
979
+ # Layer 6: Days prediction
980
+ est_days = self._predict_days(composite)
981
+ bucket = self._predict_bucket(est_days)
982
+ survival = self._predict_survival(composite, probability)
983
+
984
+ # Explainability
985
+ risk_factors: List[str] = []
986
+ protective_factors: List[str] = []
987
+ for cs in cluster_scores.values():
988
+ risk_factors.extend(cs.contributing_factors)
989
+
990
+ # Identify protective factors (normal values in important clusters)
991
+ for cluster in ["VITALS", "LABS"]:
992
+ cs = cluster_scores[cluster]
993
+ if cs.score == 0 and facts.get(cluster):
994
+ protective_factors.append(f"Normal {cluster.lower()} at discharge")
995
+ if cluster_scores["DISPOSITION"].score == 0 and facts.get("DISPOSITION"):
996
+ protective_factors.append("Stable disposition (Home, alert)")
997
+
998
+ for i in interactions:
999
+ risk_factors.append(f"[PATTERN] {i.pattern_name} (+{i.bonus})")
1000
+
1001
+ # Missing data
1002
+ missing_clusters = [c for c in VALID_CLUSTERS if c not in facts or not facts[c]]
1003
+ completeness = 1.0 - len(missing_clusters) / len(VALID_CLUSTERS)
1004
+
1005
+ if completeness >= 0.7:
1006
+ confidence = "high"
1007
+ elif completeness >= 0.5:
1008
+ confidence = "medium"
1009
+ else:
1010
+ confidence = "low"
1011
+
1012
+ return RiskResult(
1013
+ composite_score=composite,
1014
+ cluster_scores=cluster_scores,
1015
+ interaction_bonus=interaction_bonus,
1016
+ interactions_triggered=interactions,
1017
+ probability=round(probability, 4),
1018
+ risk_category=category,
1019
+ risk_color=color,
1020
+ estimated_days=round(est_days, 1),
1021
+ days_bucket=bucket,
1022
+ survival_curve=survival,
1023
+ risk_factors=risk_factors,
1024
+ protective_factors=protective_factors,
1025
+ missing_clusters=sorted(missing_clusters),
1026
+ data_completeness=round(completeness, 2),
1027
+ confidence=confidence,
1028
+ n_facts_parsed=n_parsed,
1029
+ n_facts_dropped=n_dropped,
1030
+ )
1031
+
1032
+ # -- Convenience Methods ------------------------------------------------
1033
+
1034
+ def score_from_toon(self, toon_text: str) -> RiskResult:
1035
+ """Score from raw TOON text."""
1036
+ facts, n_parsed, n_dropped = self.parse_toon(toon_text)
1037
+ return self.score(facts, n_parsed, n_dropped)
1038
+
1039
+ def score_from_file(self, path: Union[str, Path]) -> RiskResult:
1040
+ """Score from a TOON text file."""
1041
+ text = Path(path).read_text(encoding="utf-8")
1042
+ return self.score_from_toon(text)
1043
+
1044
+ def score_from_jsonl(self, path: Union[str, Path], limit: int = 0) -> List[Tuple[str, RiskResult]]:
1045
+ """Score all entries in a JSONL file (trainset_full format).
1046
+
1047
+ Returns list of (hadm_id, RiskResult).
1048
+ """
1049
+ results: List[Tuple[str, RiskResult]] = []
1050
+ p = Path(path)
1051
+ with p.open("r", encoding="utf-8") as f:
1052
+ for i, line in enumerate(f):
1053
+ if limit and i >= limit:
1054
+ break
1055
+ obj = json.loads(line)
1056
+ hadm_id = str(obj.get("hadm_id", f"row_{i}"))
1057
+ completion = obj.get("completion", "")
1058
+ if completion:
1059
+ result = self.score_from_toon(completion)
1060
+ results.append((hadm_id, result))
1061
+ return results
1062
+
1063
+
1064
+ # ---------------------------------------------------------------------------
1065
+ # Pretty-printing
1066
+ # ---------------------------------------------------------------------------
1067
+
1068
+ def format_result(result: RiskResult, hadm_id: str = "") -> str:
1069
+ """Format RiskResult as human-readable report."""
1070
+ lines: List[str] = []
1071
+ header = f"=== Readmission Risk Report"
1072
+ if hadm_id:
1073
+ header += f" (hadm_id: {hadm_id})"
1074
+ header += " ==="
1075
+ lines.append(header)
1076
+ lines.append("")
1077
+
1078
+ # Summary
1079
+ lines.append(f"RISK: {result.risk_category} ({result.risk_color})")
1080
+ lines.append(f"Probability of 30-day readmission: {result.probability:.1%}")
1081
+ lines.append(f"Composite score: {result.composite_score}")
1082
+ lines.append(f"Confidence: {result.confidence} (data completeness: {result.data_completeness:.0%})")
1083
+ lines.append("")
1084
+
1085
+ # Days prediction
1086
+ lines.append("--- Days-to-Readmission Prediction ---")
1087
+ lines.append(f"Point estimate: ~{result.estimated_days:.0f} days")
1088
+ lines.append(f"Bucket: {result.days_bucket}")
1089
+ lines.append("Survival curve:")
1090
+ for t, p in sorted(result.survival_curve.horizons.items()):
1091
+ lines.append(f" P(readmit by day {t:2d}): {p:.1%}")
1092
+ lines.append("")
1093
+
1094
+ # Cluster breakdown
1095
+ lines.append("--- Cluster Scores ---")
1096
+ for cluster in ["DEMOGRAPHICS", "VITALS", "LABS", "PROBLEMS", "SYMPTOMS",
1097
+ "MEDICATIONS", "PROCEDURES", "UTILIZATION", "DISPOSITION"]:
1098
+ cs = result.cluster_scores.get(cluster)
1099
+ if cs:
1100
+ lines.append(f" {cluster}: {cs.score}/{cs.max_score}")
1101
+ lines.append(f" INTERACTIONS: +{result.interaction_bonus}")
1102
+ lines.append(f" TOTAL: {result.composite_score}")
1103
+ lines.append("")
1104
+
1105
+ # Risk factors
1106
+ if result.risk_factors:
1107
+ lines.append("--- Risk Factors ---")
1108
+ for rf in result.risk_factors:
1109
+ lines.append(f" - {rf}")
1110
+ lines.append("")
1111
+
1112
+ # Protective factors
1113
+ if result.protective_factors:
1114
+ lines.append("--- Protective Factors ---")
1115
+ for pf in result.protective_factors:
1116
+ lines.append(f" + {pf}")
1117
+ lines.append("")
1118
+
1119
+ # Triggered patterns
1120
+ if result.interactions_triggered:
1121
+ lines.append("--- Clinical Patterns Detected ---")
1122
+ for ix in result.interactions_triggered:
1123
+ lines.append(f" [{ix.pattern_id}] {ix.pattern_name}: +{ix.bonus} pts")
1124
+ lines.append(f" Evidence: {ix.description}")
1125
+ lines.append("")
1126
+
1127
+ # Missing data
1128
+ if result.missing_clusters:
1129
+ lines.append(f"--- Missing Data ({len(result.missing_clusters)} clusters) ---")
1130
+ for mc in result.missing_clusters:
1131
+ lines.append(f" ? {mc}")
1132
+ lines.append("")
1133
+
1134
+ lines.append(f"Facts parsed: {result.n_facts_parsed}, dropped: {result.n_facts_dropped}")
1135
+
1136
+ return "\n".join(lines)
1137
+
1138
+
1139
+ # ---------------------------------------------------------------------------
1140
+ # CLI
1141
+ # ---------------------------------------------------------------------------
1142
+
1143
+ def main():
1144
+ import argparse
1145
+
1146
+ ap = argparse.ArgumentParser(description="Rule-based 30-day readmission risk engine")
1147
+ sub = ap.add_subparsers(dest="cmd")
1148
+
1149
+ # Score a single TOON file
1150
+ p_file = sub.add_parser("file", help="Score a single TOON file")
1151
+ p_file.add_argument("path", help="Path to TOON text file")
1152
+
1153
+ # Score from JSONL
1154
+ p_jsonl = sub.add_parser("jsonl", help="Score all entries in a JSONL file")
1155
+ p_jsonl.add_argument("path", help="Path to JSONL file")
1156
+ p_jsonl.add_argument("--limit", type=int, default=0, help="Limit number of entries")
1157
+ p_jsonl.add_argument("--summary", action="store_true", help="Show summary statistics only")
1158
+
1159
+ # Score from inline TOON text
1160
+ p_inline = sub.add_parser("inline", help="Score inline TOON text (pipe to stdin)")
1161
+
1162
+ args = ap.parse_args()
1163
+ engine = ReadmissionRiskEngine()
1164
+
1165
+ if args.cmd == "file":
1166
+ result = engine.score_from_file(args.path)
1167
+ print(format_result(result))
1168
+
1169
+ elif args.cmd == "jsonl":
1170
+ results = engine.score_from_jsonl(args.path, limit=args.limit)
1171
+
1172
+ if args.summary:
1173
+ scores = [r.composite_score for _, r in results]
1174
+ probs = [r.probability for _, r in results]
1175
+ categories = {}
1176
+ for _, r in results:
1177
+ categories[r.risk_category] = categories.get(r.risk_category, 0) + 1
1178
+
1179
+ print(f"=== Summary ({len(results)} patients) ===")
1180
+ print(f"Score: mean={sum(scores)/len(scores):.1f}, "
1181
+ f"min={min(scores)}, max={max(scores)}, "
1182
+ f"median={sorted(scores)[len(scores)//2]}")
1183
+ print(f"P(readmit): mean={sum(probs)/len(probs):.1%}")
1184
+ print("Risk categories:")
1185
+ for cat in ["Low", "Medium", "High", "Critical"]:
1186
+ n = categories.get(cat, 0)
1187
+ pct = n / len(results) * 100 if results else 0
1188
+ print(f" {cat}: {n} ({pct:.0f}%)")
1189
+
1190
+ days = [r.estimated_days for _, r in results]
1191
+ print(f"Days estimate: mean={sum(days)/len(days):.1f}, "
1192
+ f"min={min(days):.1f}, max={max(days):.1f}")
1193
+ else:
1194
+ for hadm_id, result in results:
1195
+ print(format_result(result, hadm_id))
1196
+ print("\n" + "=" * 60 + "\n")
1197
+
1198
+ elif args.cmd == "inline":
1199
+ import sys
1200
+ toon_text = sys.stdin.read()
1201
+ result = engine.score_from_toon(toon_text)
1202
+ print(format_result(result))
1203
+
1204
+ else:
1205
+ ap.print_help()
1206
+
1207
+
1208
+ if __name__ == "__main__":
1209
+ main()
README.md ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: MedGemma StructCore Demo
3
+ emoji: 🩺
4
+ colorFrom: blue
5
+ colorTo: teal
6
+ sdk: gradio
7
+ python_version: "3.10"
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ # MedGemma StructCore Demo (HF Spaces Zero)
13
+
14
+ This directory contains deployment assets for Hugging Face Spaces Zero.
15
+
16
+ ## What is included
17
+
18
+ - `app.py`: Space entrypoint for the StructCore demo UI.
19
+ - `requirements.txt`: minimal dependencies for this demo.
20
+
21
+ ## Recommended deployment flow
22
+
23
+ Use the packaging script from the repository root:
24
+
25
+ ```bash
26
+ bash scripts/prepare_hf_zero_challenge_space.sh
27
+ ```
28
+
29
+ It creates a ready-to-push bundle in:
30
+
31
+ ```text
32
+ .dist/hf_zero_challenge_demo_space/
33
+ ```
34
+
35
+ Then push that bundle to your HF Space repository.
36
+
37
+ ## Model repository (two-stage)
38
+
39
+ Target model repo:
40
+
41
+ - `https://huggingface.co/DocUA/medgemma-1.5-4b-it-gguf-q5-k-m-two-stage`
42
+
43
+ Upload/update Stage1 and Stage2 artifacts from this project repo:
44
+
45
+ ```bash
46
+ python3 scripts/hf_upload_two_stage_models.py \
47
+ --repo-id DocUA/medgemma-1.5-4b-it-gguf-q5-k-m-two-stage \
48
+ --stage1-file /absolute/path/to/stage1.gguf \
49
+ --stage2-file /absolute/path/to/stage2.gguf \
50
+ --stage1-path-in-repo stage1/medgemma-stage1-q5_k_m.gguf \
51
+ --stage2-path-in-repo stage2/medgemma-stage2-q5_k_m.gguf
52
+ ```
53
+
54
+ Requires `HF_TOKEN` with write access to the model repo.
55
+
56
+ ## Space runtime configuration
57
+
58
+ Set these variables/secrets in the HF Space settings:
59
+
60
+ - `STRUCTCORE_BACKEND_MODE=pipeline` (or `mock` as safe default)
61
+ - `STRUCTCORE_STAGE1_URL=<your_openai_compat_stage1_url>`
62
+ - `STRUCTCORE_STAGE1_MODEL=<model_alias_from_stage1_/v1/models>`
63
+ - `STRUCTCORE_STAGE2_URL=<your_openai_compat_stage2_url>`
64
+ - `STRUCTCORE_STAGE2_MODEL=<model_alias_from_stage2_/v1/models>`
65
+
66
+ Important:
67
+
68
+ - Space itself does not serve GGUF automatically from the model repo.
69
+ - GGUF files in HF model repo are the source-of-truth artifacts.
70
+ - Actual inference in `pipeline` mode requires reachable OpenAI-compatible endpoints running those artifacts.
app.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+
5
+ from apps.challenge_demo.app_challenge import build_demo
6
+
7
+
8
+ demo = build_demo()
9
+
10
+
11
+ if __name__ == "__main__":
12
+ demo.launch(
13
+ server_name="0.0.0.0",
14
+ server_port=int(os.getenv("PORT", "7860")),
15
+ show_error=True,
16
+ )
17
+
apps/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Application packages."""
apps/challenge_demo/README.md ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MedGemma StructCore Demo App
2
+
3
+ This is the implementation-focused demo app for:
4
+
5
+ **MedGemma StructCore: Local-First Clinical Structuring Engine for EHR**
6
+
7
+ ## Run
8
+
9
+ ```bash
10
+ python3 apps/challenge_demo/app_challenge.py
11
+ ```
12
+
13
+ Open: `http://localhost:7863`
14
+
15
+ ## Deploy to Hugging Face Spaces Zero
16
+
17
+ Prepare a minimal Space bundle:
18
+
19
+ ```bash
20
+ bash scripts/prepare_hf_zero_challenge_space.sh
21
+ ```
22
+
23
+ Bundle output:
24
+
25
+ ```text
26
+ .dist/hf_zero_challenge_demo_space/
27
+ ```
28
+
29
+ Push that directory to your HF Space repository. The bundle includes:
30
+
31
+ - Space entrypoint `app.py`
32
+ - minimal `requirements.txt`
33
+ - demo code (`apps/challenge_demo`)
34
+ - parser/risk dependencies (`kvt_utils.py`, `Analysis_Readmission/readmission_risk_engine.py`, config JSONs)
35
+
36
+ Note: in HF Space, default mode should remain `mock`. `pipeline` mode requires external Stage1/Stage2 servers reachable from the Space.
37
+
38
+ ### Two-stage model artifacts on HF
39
+
40
+ Model repo (source-of-truth artifacts):
41
+
42
+ - `https://huggingface.co/DocUA/medgemma-1.5-4b-it-gguf-q5-k-m-two-stage`
43
+
44
+ Upload/update artifacts:
45
+
46
+ ```bash
47
+ python3 scripts/hf_upload_two_stage_models.py \
48
+ --repo-id DocUA/medgemma-1.5-4b-it-gguf-q5-k-m-two-stage \
49
+ --stage1-file /absolute/path/to/stage1.gguf \
50
+ --stage2-file /absolute/path/to/stage2.gguf
51
+ ```
52
+
53
+ Space should be configured via env vars:
54
+
55
+ - `STRUCTCORE_STAGE1_URL`, `STRUCTCORE_STAGE1_MODEL`
56
+ - `STRUCTCORE_STAGE2_URL`, `STRUCTCORE_STAGE2_MODEL`
57
+ - optional: `STRUCTCORE_BACKEND_MODE=mock|pipeline`
58
+
59
+ ## Modes
60
+
61
+ - `mock`:
62
+ - offline deterministic extraction (fast, no model server required),
63
+ - useful for demo recording and UI development.
64
+
65
+ - `pipeline`:
66
+ - runs real Stage1/Stage2 using existing runners,
67
+ - requires local OpenAI-compatible model servers.
68
+
69
+ If pipeline mode fails and fallback is enabled, app falls back to mock mode.
70
+
71
+ ## Architecture
72
+
73
+ - `app_challenge.py`: Gradio UI and orchestration glue.
74
+ - `services/structcore_service.py`: execution modes, normalization, risk scoring.
75
+ - `services/case_library.py`: synthetic demo cases.
76
+ - `services/evidence_service.py`: claim/evidence board data.
77
+ - `config/evidence_claims.json`: status-labeled claims.
78
+ - `data/synthetic_cases.json`: synthetic note samples.
79
+
80
+ ## Notes
81
+
82
+ - This demo is extraction-first.
83
+ - Readmission risk is presented as a downstream use case.
84
+ - Public demos should use synthetic notes only.
apps/challenge_demo/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """MedGemma StructCore challenge demo app."""
apps/challenge_demo/app_challenge.py ADDED
@@ -0,0 +1,275 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ import sys
6
+ from pathlib import Path
7
+ from typing import Dict, List, Tuple
8
+
9
+ # Allow running as a script: `python apps/challenge_demo/app_challenge.py`
10
+ if __package__ in {None, ""}:
11
+ repo_root = Path(__file__).resolve().parents[2]
12
+ if str(repo_root) not in sys.path:
13
+ sys.path.insert(0, str(repo_root))
14
+
15
+ import gradio as gr
16
+ import pandas as pd
17
+
18
+ from apps.challenge_demo.services.case_library import get_case, load_cases
19
+ from apps.challenge_demo.services.evidence_service import load_evidence_rows
20
+ from apps.challenge_demo.services.structcore_service import (
21
+ StructCoreConfig,
22
+ lines_to_rows,
23
+ result_to_debug_json,
24
+ run_structcore,
25
+ )
26
+
27
+
28
+ def _default_case_id() -> str:
29
+ cases = load_cases()
30
+ return cases[0].id if cases else "custom"
31
+
32
+
33
+ def _case_choices() -> List[Tuple[str, str]]:
34
+ out = []
35
+ for c in load_cases():
36
+ out.append((f"{c.title} ({c.id})", c.id))
37
+ out.append(("Custom note", "custom"))
38
+ return out
39
+
40
+
41
+ def _on_case_change(case_id: str) -> Tuple[str, str]:
42
+ if not case_id or case_id == "custom":
43
+ return "", "Manual mode: paste your own note text."
44
+ c = get_case(case_id)
45
+ if c is None:
46
+ return "", "Case not found."
47
+ return c.text, f"**{c.title}**\n\n{c.description}"
48
+
49
+
50
+ def _format_status(note_id: str, backend_mode: str, duration_sec: float, gate_summary: Dict, warnings: List[str], error: str | None) -> str:
51
+ ok = "yes" if gate_summary.get("parse_success") else "no"
52
+ clusters = ", ".join(gate_summary.get("clusters_present") or []) or "none"
53
+ lines = gate_summary.get("output_lines", 0)
54
+
55
+ parts = [
56
+ f"### Run Status",
57
+ f"- Note ID: `{note_id}`",
58
+ f"- Backend mode: `{backend_mode}`",
59
+ f"- Parse success: `{ok}`",
60
+ f"- Output lines: `{lines}`",
61
+ f"- Clusters: `{clusters}`",
62
+ f"- Duration (sec): `{duration_sec}`",
63
+ ]
64
+ if warnings:
65
+ parts.append("- Warnings:")
66
+ parts.extend([f" - {w}" for w in warnings])
67
+ if error:
68
+ parts.append(f"- Error: `{error}`")
69
+ return "\n".join(parts)
70
+
71
+
72
+ def _format_risk_summary(risk: Dict | None) -> Tuple[str, str]:
73
+ if not risk:
74
+ return "No risk output available for this run.", "{}"
75
+
76
+ prob = risk.get("probability")
77
+ category = risk.get("risk_category")
78
+ score = risk.get("composite_score")
79
+ completeness = risk.get("data_completeness")
80
+ factors = risk.get("risk_factors") or []
81
+
82
+ bullets = [
83
+ "### Readmission Risk Summary",
84
+ f"- Category: `{category}`",
85
+ f"- Probability: `{prob}`",
86
+ f"- Composite score: `{score}`",
87
+ f"- Data completeness: `{completeness}`",
88
+ ]
89
+ if factors:
90
+ bullets.append("- Top risk factors:")
91
+ for it in factors[:5]:
92
+ bullets.append(f" - {it}")
93
+
94
+ return "\n".join(bullets), json.dumps(risk, ensure_ascii=False, indent=2)
95
+
96
+
97
+ def _run_demo(
98
+ case_id: str,
99
+ note_text: str,
100
+ backend_mode: str,
101
+ stage1_url: str,
102
+ stage1_model: str,
103
+ stage2_url: str,
104
+ stage2_model: str,
105
+ fallback_to_mock: bool,
106
+ ) -> Tuple[str, str, str, pd.DataFrame, str, str, str, str]:
107
+ note = (note_text or "").strip()
108
+ effective_case_id = case_id or "custom"
109
+
110
+ if not note and effective_case_id != "custom":
111
+ c = get_case(effective_case_id)
112
+ if c is not None:
113
+ note = c.text
114
+
115
+ cfg = StructCoreConfig(
116
+ backend_mode=(backend_mode or "mock").strip(),
117
+ stage1_url=(stage1_url or "").strip(),
118
+ stage1_model=(stage1_model or "").strip(),
119
+ stage2_url=(stage2_url or "").strip(),
120
+ stage2_model=(stage2_model or "").strip(),
121
+ fallback_to_mock_on_error=bool(fallback_to_mock),
122
+ )
123
+
124
+ result = run_structcore(note, effective_case_id, cfg)
125
+
126
+ status_md = _format_status(
127
+ note_id=result.note_id,
128
+ backend_mode=result.backend_mode,
129
+ duration_sec=result.duration_sec,
130
+ gate_summary=result.gate_summary,
131
+ warnings=result.warnings,
132
+ error=result.error,
133
+ )
134
+
135
+ rows = lines_to_rows(result.normalized_lines)
136
+ df = pd.DataFrame(rows, columns=["CLUSTER", "Keyword", "Value", "Timestamp"])
137
+
138
+ risk_md, risk_json = _format_risk_summary(result.risk)
139
+
140
+ return (
141
+ status_md,
142
+ result.stage1_summary,
143
+ result.stage2_raw,
144
+ df,
145
+ json.dumps(result.gate_summary, ensure_ascii=False, indent=2),
146
+ risk_md,
147
+ risk_json,
148
+ result_to_debug_json(result),
149
+ )
150
+
151
+
152
+ def build_demo() -> gr.Blocks:
153
+ cfg_defaults = StructCoreConfig()
154
+ case_choices = _case_choices()
155
+ default_case_id = _default_case_id()
156
+ initial_case = get_case(default_case_id)
157
+ initial_text = initial_case.text if initial_case else ""
158
+ initial_desc = f"**{initial_case.title}**\n\n{initial_case.description}" if initial_case else "Manual mode"
159
+
160
+ evidence_df = pd.DataFrame(load_evidence_rows(), columns=["Claim ID", "Claim", "Metric", "Status", "Artifact"])
161
+
162
+ with gr.Blocks(title="MedGemma StructCore Demo") as demo:
163
+ gr.Markdown(
164
+ """
165
+ # MedGemma StructCore Demo
166
+
167
+ **MedGemma StructCore: Local-First Clinical Structuring Engine for EHR**
168
+
169
+ This demo is extraction-first: free-text EHR -> structured KVT4 facts -> optional downstream readmission risk view.
170
+ """
171
+ )
172
+
173
+ with gr.Tab("1) Case Input"):
174
+ case_id = gr.Dropdown(label="Synthetic case", choices=case_choices, value=default_case_id)
175
+ case_desc = gr.Markdown(initial_desc)
176
+ note_text = gr.Textbox(label="Clinical note text", lines=14, value=initial_text)
177
+
178
+ with gr.Row():
179
+ backend_mode = gr.Radio(
180
+ label="Backend mode",
181
+ choices=["mock", "pipeline"],
182
+ value=os.getenv("STRUCTCORE_BACKEND_MODE", "mock"),
183
+ info="mock = offline deterministic demo, pipeline = Stage1/Stage2 runners with local model servers",
184
+ )
185
+ fallback_to_mock = gr.Checkbox(
186
+ label="Fallback to mock if pipeline fails",
187
+ value=True,
188
+ )
189
+
190
+ with gr.Accordion("Pipeline settings", open=False):
191
+ stage1_url = gr.Textbox(label="Stage1 URL", value=cfg_defaults.stage1_url)
192
+ stage1_model = gr.Textbox(label="Stage1 model", value=cfg_defaults.stage1_model)
193
+ stage2_url = gr.Textbox(label="Stage2 URL", value=cfg_defaults.stage2_url)
194
+ stage2_model = gr.Textbox(label="Stage2 model", value=cfg_defaults.stage2_model)
195
+
196
+ run_btn = gr.Button("Run StructCore", variant="primary")
197
+ status_md = gr.Markdown()
198
+
199
+ with gr.Tab("2) StructCore Inspector"):
200
+ stage1_summary = gr.Textbox(label="Stage1 summary", lines=14)
201
+ stage2_raw = gr.Textbox(label="Stage2 raw output", lines=14)
202
+ normalized_df = gr.Dataframe(
203
+ label="Normalized KVT4 facts",
204
+ headers=["CLUSTER", "Keyword", "Value", "Timestamp"],
205
+ datatype=["str", "str", "str", "str"],
206
+ row_count=8,
207
+ )
208
+ gate_json = gr.Textbox(label="Quality gate summary", lines=10)
209
+
210
+ with gr.Tab("3) Risk View"):
211
+ risk_md = gr.Markdown()
212
+ risk_json = gr.Textbox(label="Risk payload (JSON)", lines=18)
213
+
214
+ with gr.Tab("4) Evidence Board"):
215
+ gr.Markdown("All claims should be interpreted with explicit status labels.")
216
+ gr.Dataframe(
217
+ value=evidence_df,
218
+ headers=["Claim ID", "Claim", "Metric", "Status", "Artifact"],
219
+ datatype=["str", "str", "str", "str", "str"],
220
+ interactive=False,
221
+ wrap=True,
222
+ row_count=len(evidence_df),
223
+ label="Evidence claims",
224
+ )
225
+
226
+ with gr.Accordion("Debug JSON", open=False):
227
+ debug_json = gr.Textbox(label="Full run payload", lines=18)
228
+
229
+ case_id.change(fn=_on_case_change, inputs=[case_id], outputs=[note_text, case_desc])
230
+
231
+ run_btn.click(
232
+ fn=_run_demo,
233
+ inputs=[
234
+ case_id,
235
+ note_text,
236
+ backend_mode,
237
+ stage1_url,
238
+ stage1_model,
239
+ stage2_url,
240
+ stage2_model,
241
+ fallback_to_mock,
242
+ ],
243
+ outputs=[
244
+ status_md,
245
+ stage1_summary,
246
+ stage2_raw,
247
+ normalized_df,
248
+ gate_json,
249
+ risk_md,
250
+ risk_json,
251
+ debug_json,
252
+ ],
253
+ )
254
+
255
+ return demo
256
+
257
+
258
+ def main() -> None:
259
+ demo = build_demo()
260
+ launch_kwargs = {
261
+ "server_name": "0.0.0.0",
262
+ "server_port": 7863,
263
+ "show_error": True,
264
+ }
265
+ try:
266
+ demo.launch(ssr_mode=False, **launch_kwargs)
267
+ except TypeError as exc:
268
+ # Older gradio versions do not support ssr_mode.
269
+ if "ssr_mode" not in str(exc):
270
+ raise
271
+ demo.launch(**launch_kwargs)
272
+
273
+
274
+ if __name__ == "__main__":
275
+ main()
apps/challenge_demo/config/evidence_claims.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "claim_id": "C01",
4
+ "claim": "Stage2 KVT4 format stability is high",
5
+ "metric": "99.74% valid format",
6
+ "status": "Verified",
7
+ "artifact": "Analysis_Challenge/Spec_Challenge.md"
8
+ },
9
+ {
10
+ "claim_id": "C02",
11
+ "claim": "Stage1 structured output parse is stable",
12
+ "metric": "98%+ parse rate (test50)",
13
+ "status": "Verified",
14
+ "artifact": "Analysis_Challenge/Spec_Challenge.md"
15
+ },
16
+ {
17
+ "claim_id": "C03",
18
+ "claim": "Track B rule-engine benchmark",
19
+ "metric": "AUROC 0.6024 [0.5882, 0.6167]",
20
+ "status": "Verified",
21
+ "artifact": "results/benchmark/20260207_trackB_hosp_v3_a1_ruleengine_labs50k/metrics_summary.json"
22
+ },
23
+ {
24
+ "claim_id": "C04",
25
+ "claim": "SGR v4.1 notes-based uplift on independent proxy set",
26
+ "metric": "AUROC 0.6462 vs 0.511 baseline",
27
+ "status": "Preliminary",
28
+ "artifact": "results/benchmark/20260209_212920_sgr_v41_clean_test50new/"
29
+ },
30
+ {
31
+ "claim_id": "C05",
32
+ "claim": "Production extraction path is DSPy-free",
33
+ "metric": "OpenAI-compatible runtime path",
34
+ "status": "Verified",
35
+ "artifact": "Docs/DSPY_VIABILITY_DECISION.md"
36
+ },
37
+ {
38
+ "claim_id": "C06",
39
+ "claim": "Corrected larger-N SGR replication",
40
+ "metric": "N=200 corrected run",
41
+ "status": "Planned",
42
+ "artifact": "results/benchmark/<future_run_id>/"
43
+ }
44
+ ]
apps/challenge_demo/data/synthetic_cases.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "id": "low_risk_followup",
4
+ "title": "Low Risk Follow-up",
5
+ "description": "Stable patient with mild abnormalities and home discharge.",
6
+ "text": "45-year-old female admitted for observation with atypical chest discomfort. Heart rate 78, blood pressure 122/76, respiratory rate 16, temperature 36.8, SpO2 98%. Labs: hemoglobin 13.2, hematocrit 39.8, WBC 7.4, platelet 240, sodium 139, potassium 4.2, creatinine 0.8, BUN 14, glucose 102, bicarbonate 24. One prior admission in 12 months. Discharge disposition: home. Alert and oriented at discharge."
7
+ },
8
+ {
9
+ "id": "moderate_risk_multimorbidity",
10
+ "title": "Moderate Risk Multimorbidity",
11
+ "description": "Older patient with chronic disease burden and moderate physiologic stress.",
12
+ "text": "68-year-old male with diabetes and hypertension admitted for dyspnea. HR 96, BP 148/88, RR 20, Temp 37.4, O2 sat 93%. Labs notable for hemoglobin 11.4, WBC 11.9, sodium 134, potassium 4.9, creatinine 1.5, BUN 31, glucose 182, bicarbonate 20. Two ED visits in last 6 months, two prior admissions in 12 months, current length of stay 6 days. Insulin therapy continued. Discharge to home with support services."
13
+ },
14
+ {
15
+ "id": "high_risk_complex",
16
+ "title": "High Risk Complex",
17
+ "description": "Complex discharge with severe derangements and high utilization.",
18
+ "text": "75-year-old male with CHF, CKD, COPD and atrial fibrillation admitted with worsening shortness of breath. Heart rate 118, blood pressure 168/98, respiratory rate 28, temperature 38.2, SpO2 88%, weight 92. Labs: hemoglobin 9.8, hematocrit 30.1, WBC 15.7, platelet 420, sodium 129, potassium 5.6, creatinine 2.3, BUN 48, glucose 236, bicarbonate 17. Four ED visits in six months, three prior admissions in 12 months, days since last admission 18, length of stay 13 days. On diuretic therapy, anticoagulation and opioid therapy. Mechanical ventilation required during stay. Discharge disposition skilled nursing facility. Intermittently confused at discharge."
19
+ }
20
+ ]
apps/challenge_demo/hf_zero/README.md ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: MedGemma StructCore Demo
3
+ emoji: 🩺
4
+ colorFrom: blue
5
+ colorTo: teal
6
+ sdk: gradio
7
+ python_version: "3.10"
8
+ app_file: app.py
9
+ pinned: false
10
+ ---
11
+
12
+ # MedGemma StructCore Demo (HF Spaces Zero)
13
+
14
+ This directory contains deployment assets for Hugging Face Spaces Zero.
15
+
16
+ ## What is included
17
+
18
+ - `app.py`: Space entrypoint for the StructCore demo UI.
19
+ - `requirements.txt`: minimal dependencies for this demo.
20
+
21
+ ## Recommended deployment flow
22
+
23
+ Use the packaging script from the repository root:
24
+
25
+ ```bash
26
+ bash scripts/prepare_hf_zero_challenge_space.sh
27
+ ```
28
+
29
+ It creates a ready-to-push bundle in:
30
+
31
+ ```text
32
+ .dist/hf_zero_challenge_demo_space/
33
+ ```
34
+
35
+ Then push that bundle to your HF Space repository.
36
+
37
+ ## Model repository (two-stage)
38
+
39
+ Target model repo:
40
+
41
+ - `https://huggingface.co/DocUA/medgemma-1.5-4b-it-gguf-q5-k-m-two-stage`
42
+
43
+ Upload/update Stage1 and Stage2 artifacts from this project repo:
44
+
45
+ ```bash
46
+ python3 scripts/hf_upload_two_stage_models.py \
47
+ --repo-id DocUA/medgemma-1.5-4b-it-gguf-q5-k-m-two-stage \
48
+ --stage1-file /absolute/path/to/stage1.gguf \
49
+ --stage2-file /absolute/path/to/stage2.gguf \
50
+ --stage1-path-in-repo stage1/medgemma-stage1-q5_k_m.gguf \
51
+ --stage2-path-in-repo stage2/medgemma-stage2-q5_k_m.gguf
52
+ ```
53
+
54
+ Requires `HF_TOKEN` with write access to the model repo.
55
+
56
+ ## Space runtime configuration
57
+
58
+ Set these variables/secrets in the HF Space settings:
59
+
60
+ - `STRUCTCORE_BACKEND_MODE=pipeline` (or `mock` as safe default)
61
+ - `STRUCTCORE_STAGE1_URL=<your_openai_compat_stage1_url>`
62
+ - `STRUCTCORE_STAGE1_MODEL=<model_alias_from_stage1_/v1/models>`
63
+ - `STRUCTCORE_STAGE2_URL=<your_openai_compat_stage2_url>`
64
+ - `STRUCTCORE_STAGE2_MODEL=<model_alias_from_stage2_/v1/models>`
65
+
66
+ Important:
67
+
68
+ - Space itself does not serve GGUF automatically from the model repo.
69
+ - GGUF files in HF model repo are the source-of-truth artifacts.
70
+ - Actual inference in `pipeline` mode requires reachable OpenAI-compatible endpoints running those artifacts.
apps/challenge_demo/hf_zero/app.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import os
4
+
5
+ from apps.challenge_demo.app_challenge import build_demo
6
+
7
+
8
+ demo = build_demo()
9
+
10
+
11
+ if __name__ == "__main__":
12
+ demo.launch(
13
+ server_name="0.0.0.0",
14
+ server_port=int(os.getenv("PORT", "7860")),
15
+ show_error=True,
16
+ )
17
+
apps/challenge_demo/hf_zero/requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ gradio>=4.44,<6
2
+ pandas>=2.0,<3
apps/challenge_demo/services/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ """Service layer for StructCore demo."""
apps/challenge_demo/services/case_library.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from dataclasses import dataclass
5
+ from pathlib import Path
6
+ from typing import List
7
+
8
+
9
+ DATA_PATH = Path(__file__).resolve().parents[1] / "data" / "synthetic_cases.json"
10
+
11
+
12
+ @dataclass(frozen=True)
13
+ class SyntheticCase:
14
+ id: str
15
+ title: str
16
+ description: str
17
+ text: str
18
+
19
+
20
+ def load_cases() -> List[SyntheticCase]:
21
+ raw = json.loads(DATA_PATH.read_text(encoding="utf-8"))
22
+ out: List[SyntheticCase] = []
23
+ for row in raw:
24
+ out.append(
25
+ SyntheticCase(
26
+ id=str(row.get("id", "")).strip(),
27
+ title=str(row.get("title", "")).strip(),
28
+ description=str(row.get("description", "")).strip(),
29
+ text=str(row.get("text", "")).strip(),
30
+ )
31
+ )
32
+ return [c for c in out if c.id and c.title and c.text]
33
+
34
+
35
+ def get_case(case_id: str) -> SyntheticCase | None:
36
+ target = (case_id or "").strip()
37
+ if not target:
38
+ return None
39
+ for item in load_cases():
40
+ if item.id == target:
41
+ return item
42
+ return None
apps/challenge_demo/services/evidence_service.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from typing import Dict, List
6
+
7
+
8
+ EVIDENCE_PATH = Path(__file__).resolve().parents[1] / "config" / "evidence_claims.json"
9
+
10
+
11
+ def load_evidence_rows() -> List[Dict[str, str]]:
12
+ data = json.loads(EVIDENCE_PATH.read_text(encoding="utf-8"))
13
+ rows: List[Dict[str, str]] = []
14
+ for item in data:
15
+ rows.append(
16
+ {
17
+ "Claim ID": str(item.get("claim_id", "")).strip(),
18
+ "Claim": str(item.get("claim", "")).strip(),
19
+ "Metric": str(item.get("metric", "")).strip(),
20
+ "Status": str(item.get("status", "")).strip(),
21
+ "Artifact": str(item.get("artifact", "")).strip(),
22
+ }
23
+ )
24
+ return rows
apps/challenge_demo/services/structcore_service.py ADDED
@@ -0,0 +1,494 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ import re
6
+ import subprocess
7
+ import sys
8
+ import tempfile
9
+ import time
10
+ from dataclasses import asdict, dataclass, field
11
+ from pathlib import Path
12
+ from typing import Any, Dict, List, Optional, Tuple
13
+
14
+ from Analysis_Readmission.readmission_risk_engine import ReadmissionRiskEngine
15
+ from kvt_utils import extract_kvt_fact_lines, normalize_readmission_kvt4_lines
16
+
17
+
18
+ REPO_ROOT = Path(__file__).resolve().parents[3]
19
+ PIPELINE_SCRIPT = REPO_ROOT / "scripts" / "run_two_stage_structured_pipeline.py"
20
+
21
+ VALID_CLUSTERS = {
22
+ "DEMOGRAPHICS",
23
+ "VITALS",
24
+ "LABS",
25
+ "DISPOSITION",
26
+ "MEDICATIONS",
27
+ "PROCEDURES",
28
+ "UTILIZATION",
29
+ "PROBLEMS",
30
+ "SYMPTOMS",
31
+ }
32
+
33
+
34
+ @dataclass
35
+ class StructCoreConfig:
36
+ backend_mode: str = "mock" # mock | pipeline
37
+ python_executable: str = sys.executable
38
+
39
+ stage1_url: str = os.getenv("STRUCTCORE_STAGE1_URL", os.getenv("OPENAI_COMPAT_URL", "http://127.0.0.1:1245"))
40
+ stage1_model: str = os.getenv("STRUCTCORE_STAGE1_MODEL", os.getenv("OPENAI_COMPAT_MODEL_STAGE1", "medgemma-base-q5_k_m"))
41
+ stage1_profile: str = "sgr_v2"
42
+ stage1_max_tokens: int = 768
43
+ stage1_temperature: float = 0.0
44
+
45
+ stage2_url: str = os.getenv("STRUCTCORE_STAGE2_URL", os.getenv("OPENAI_COMPAT_URL", "http://127.0.0.1:1246"))
46
+ stage2_model: str = os.getenv("STRUCTCORE_STAGE2_MODEL", os.getenv("OPENAI_COMPAT_MODEL_STAGE2", "medgemma-ft-lora-adapters-q5_k_m"))
47
+ stage2_scope: str = "all"
48
+ stage2_output_mode: str = "lines"
49
+ stage2_max_tokens: int = 768
50
+ stage2_temperature: float = 0.0
51
+
52
+ fallback_to_mock_on_error: bool = True
53
+
54
+
55
+ @dataclass
56
+ class StructCoreResult:
57
+ backend_mode: str
58
+ note_id: str
59
+ stage1_summary: str
60
+ stage2_raw: str
61
+ stage2_lines: List[str]
62
+ normalized_lines: List[str]
63
+ normalization_stats: Dict[str, Any]
64
+ gate_summary: Dict[str, Any]
65
+ risk: Optional[Dict[str, Any]]
66
+ warnings: List[str] = field(default_factory=list)
67
+ error: Optional[str] = None
68
+ duration_sec: float = 0.0
69
+
70
+
71
+ _ENGINE: Optional[ReadmissionRiskEngine] = None
72
+
73
+
74
+ def _get_engine() -> ReadmissionRiskEngine:
75
+ global _ENGINE
76
+ if _ENGINE is None:
77
+ _ENGINE = ReadmissionRiskEngine()
78
+ return _ENGINE
79
+
80
+
81
+ def run_structcore(note_text: str, note_id: str, cfg: StructCoreConfig) -> StructCoreResult:
82
+ text = (note_text or "").strip()
83
+ if not text:
84
+ return StructCoreResult(
85
+ backend_mode=cfg.backend_mode,
86
+ note_id=note_id,
87
+ stage1_summary="",
88
+ stage2_raw="",
89
+ stage2_lines=[],
90
+ normalized_lines=[],
91
+ normalization_stats={},
92
+ gate_summary={"parse_success": False, "reason": "empty_input"},
93
+ risk=None,
94
+ warnings=["Input note is empty."],
95
+ error="empty_input",
96
+ duration_sec=0.0,
97
+ )
98
+
99
+ if cfg.backend_mode == "pipeline":
100
+ try:
101
+ return _run_pipeline_backend(text, note_id, cfg)
102
+ except Exception as exc: # noqa: BLE001
103
+ if not cfg.fallback_to_mock_on_error:
104
+ return StructCoreResult(
105
+ backend_mode="pipeline",
106
+ note_id=note_id,
107
+ stage1_summary="",
108
+ stage2_raw="",
109
+ stage2_lines=[],
110
+ normalized_lines=[],
111
+ normalization_stats={},
112
+ gate_summary={"parse_success": False, "reason": "pipeline_error"},
113
+ risk=None,
114
+ warnings=[],
115
+ error=f"pipeline_error: {exc}",
116
+ duration_sec=0.0,
117
+ )
118
+ mock = _run_mock_backend(text, note_id)
119
+ mock.backend_mode = "mock (pipeline fallback)"
120
+ mock.warnings.insert(0, f"Pipeline backend failed, fallback enabled: {exc}")
121
+ return mock
122
+
123
+ return _run_mock_backend(text, note_id)
124
+
125
+
126
+ def _run_pipeline_backend(note_text: str, note_id: str, cfg: StructCoreConfig) -> StructCoreResult:
127
+ start = time.perf_counter()
128
+ hadm_id = 990001
129
+
130
+ with tempfile.TemporaryDirectory(prefix="structcore_demo_") as tmp_dir_str:
131
+ tmp_dir = Path(tmp_dir_str)
132
+ cohort_root = tmp_dir / "cohort"
133
+ out_dir = tmp_dir / "out"
134
+
135
+ hadm_dir = cohort_root / str(hadm_id)
136
+ hadm_dir.mkdir(parents=True, exist_ok=True)
137
+ (hadm_dir / f"ehr_{hadm_id}.txt").write_text(note_text, encoding="utf-8")
138
+
139
+ stage1_cmd = [
140
+ cfg.python_executable,
141
+ str(PIPELINE_SCRIPT),
142
+ "--cohort-root",
143
+ str(cohort_root),
144
+ "--out-dir",
145
+ str(out_dir),
146
+ "--hadm-ids",
147
+ str(hadm_id),
148
+ "--num-docs",
149
+ "1",
150
+ "--allow-missing-gt",
151
+ "stage1",
152
+ "--url",
153
+ cfg.stage1_url,
154
+ "--model",
155
+ cfg.stage1_model,
156
+ "--profile",
157
+ cfg.stage1_profile,
158
+ "--max-tokens",
159
+ str(int(cfg.stage1_max_tokens)),
160
+ "--temperature",
161
+ str(float(cfg.stage1_temperature)),
162
+ "--overwrite-stage1",
163
+ ]
164
+
165
+ stage2_cmd = [
166
+ cfg.python_executable,
167
+ str(PIPELINE_SCRIPT),
168
+ "--cohort-root",
169
+ str(cohort_root),
170
+ "--out-dir",
171
+ str(out_dir),
172
+ "--hadm-ids",
173
+ str(hadm_id),
174
+ "--num-docs",
175
+ "1",
176
+ "--allow-missing-gt",
177
+ "stage2",
178
+ "--url",
179
+ cfg.stage2_url,
180
+ "--model",
181
+ cfg.stage2_model,
182
+ "--scope",
183
+ cfg.stage2_scope,
184
+ "--output-mode",
185
+ cfg.stage2_output_mode,
186
+ "--max-tokens",
187
+ str(int(cfg.stage2_max_tokens)),
188
+ "--temperature",
189
+ str(float(cfg.stage2_temperature)),
190
+ "--overwrite-stage2",
191
+ ]
192
+
193
+ _run_cmd(stage1_cmd)
194
+ _run_cmd(stage2_cmd)
195
+
196
+ per_dir = out_dir / str(hadm_id)
197
+ stage1_summary = _read_optional(per_dir / "stage1.md")
198
+ stage2_raw = _read_optional(per_dir / "stage2_raw.txt")
199
+ stage2_lines_text = _read_optional(per_dir / "stage2_facts.txt")
200
+
201
+ raw_lines = extract_kvt_fact_lines(stage2_lines_text if stage2_lines_text.strip() else stage2_raw)
202
+
203
+ normalized_lines, normalization_stats = normalize_readmission_kvt4_lines(raw_lines)
204
+ risk = _score_risk(normalized_lines)
205
+ gate_summary = _build_gate_summary(normalized_lines, normalization_stats)
206
+
207
+ return StructCoreResult(
208
+ backend_mode="pipeline",
209
+ note_id=note_id,
210
+ stage1_summary=stage1_summary,
211
+ stage2_raw=stage2_raw,
212
+ stage2_lines=raw_lines,
213
+ normalized_lines=normalized_lines,
214
+ normalization_stats=normalization_stats,
215
+ gate_summary=gate_summary,
216
+ risk=risk,
217
+ warnings=[],
218
+ error=None,
219
+ duration_sec=round(time.perf_counter() - start, 3),
220
+ )
221
+
222
+
223
+ def _run_cmd(cmd: List[str]) -> None:
224
+ proc = subprocess.run(
225
+ cmd,
226
+ cwd=str(REPO_ROOT),
227
+ capture_output=True,
228
+ text=True,
229
+ check=False,
230
+ )
231
+ if proc.returncode != 0:
232
+ stderr = (proc.stderr or "").strip()
233
+ stdout = (proc.stdout or "").strip()
234
+ msg = stderr or stdout or f"Command failed with exit code {proc.returncode}"
235
+ raise RuntimeError(msg)
236
+
237
+
238
+ def _read_optional(path: Path) -> str:
239
+ if not path.exists():
240
+ return ""
241
+ return path.read_text(encoding="utf-8", errors="replace")
242
+
243
+
244
+ def _run_mock_backend(note_text: str, note_id: str) -> StructCoreResult:
245
+ start = time.perf_counter()
246
+
247
+ stage2_lines = _heuristic_extract_kvt(note_text)
248
+ stage2_raw = "\n".join(stage2_lines)
249
+ stage1_summary = _render_stage1_like_summary(stage2_lines)
250
+
251
+ normalized_lines, normalization_stats = normalize_readmission_kvt4_lines(stage2_lines)
252
+ risk = _score_risk(normalized_lines)
253
+ gate_summary = _build_gate_summary(normalized_lines, normalization_stats)
254
+
255
+ warnings: List[str] = []
256
+ if not normalized_lines:
257
+ warnings.append("No valid KVT4 facts after normalization.")
258
+
259
+ return StructCoreResult(
260
+ backend_mode="mock",
261
+ note_id=note_id,
262
+ stage1_summary=stage1_summary,
263
+ stage2_raw=stage2_raw,
264
+ stage2_lines=stage2_lines,
265
+ normalized_lines=normalized_lines,
266
+ normalization_stats=normalization_stats,
267
+ gate_summary=gate_summary,
268
+ risk=risk,
269
+ warnings=warnings,
270
+ error=None,
271
+ duration_sec=round(time.perf_counter() - start, 3),
272
+ )
273
+
274
+
275
+ def _score_risk(normalized_lines: List[str]) -> Optional[Dict[str, Any]]:
276
+ if not normalized_lines:
277
+ return None
278
+ engine = _get_engine()
279
+ result = engine.score_from_toon("\n".join(normalized_lines))
280
+ return asdict(result)
281
+
282
+
283
+ def _build_gate_summary(lines: List[str], stats: Dict[str, Any]) -> Dict[str, Any]:
284
+ clusters = []
285
+ seen_clusters = set()
286
+ for line in lines:
287
+ parts = line.split("|")
288
+ if len(parts) != 4:
289
+ continue
290
+ c = parts[0].strip().upper()
291
+ if c and c not in seen_clusters:
292
+ seen_clusters.add(c)
293
+ clusters.append(c)
294
+
295
+ return {
296
+ "parse_success": bool(lines),
297
+ "output_lines": len(lines),
298
+ "clusters_present": clusters,
299
+ "all_clusters_valid": all(c in VALID_CLUSTERS for c in clusters),
300
+ "duplicates_after_dedup": int(stats.get("duplicates_after_dedup", 0)) if isinstance(stats, dict) else 0,
301
+ "canonical_keyword_rate_strict": stats.get("canonical_keyword_rate_strict") if isinstance(stats, dict) else None,
302
+ "numeric_only_rate_vitals_labs": stats.get("numeric_only_rate_vitals_labs") if isinstance(stats, dict) else None,
303
+ }
304
+
305
+
306
+ def _render_stage1_like_summary(lines: List[str]) -> str:
307
+ grouped: Dict[str, List[Tuple[str, str, str]]] = {}
308
+ for line in lines:
309
+ parts = line.split("|")
310
+ if len(parts) != 4:
311
+ continue
312
+ cluster, key, value, ts = [p.strip() for p in parts]
313
+ grouped.setdefault(cluster.upper(), []).append((key, value, ts))
314
+
315
+ ordered_clusters = [
316
+ "DEMOGRAPHICS",
317
+ "VITALS",
318
+ "LABS",
319
+ "DISPOSITION",
320
+ "MEDICATIONS",
321
+ "PROCEDURES",
322
+ "UTILIZATION",
323
+ "PROBLEMS",
324
+ "SYMPTOMS",
325
+ ]
326
+
327
+ out: List[str] = []
328
+ for cluster in ordered_clusters:
329
+ items = grouped.get(cluster, [])
330
+ if not items:
331
+ continue
332
+ out.append(f"## {cluster}")
333
+ for key, value, ts in items:
334
+ out.append(f"- {key}={value} ({ts})")
335
+ out.append("")
336
+
337
+ return "\n".join(out).strip()
338
+
339
+
340
+ def _heuristic_extract_kvt(note_text: str) -> List[str]:
341
+ text = note_text or ""
342
+ lowered = text.lower()
343
+ lines: List[str] = []
344
+ seen = set()
345
+
346
+ def add(cluster: str, keyword: str, value: str, timestamp: str) -> None:
347
+ key = (cluster, keyword)
348
+ if key in seen:
349
+ return
350
+ seen.add(key)
351
+ lines.append(f"{cluster}|{keyword}|{value}|{timestamp}")
352
+
353
+ def m1(pattern: str) -> Optional[str]:
354
+ m = re.search(pattern, text, flags=re.IGNORECASE)
355
+ return m.group(1) if m else None
356
+
357
+ age = m1(r"\b(\d{1,3})\s*(?:y/o|yo|year-old|years old)\b")
358
+ if age:
359
+ add("DEMOGRAPHICS", "Age", age, "Admission")
360
+
361
+ if re.search(r"\bfemale\b", lowered):
362
+ add("DEMOGRAPHICS", "Sex", "female", "Admission")
363
+ elif re.search(r"\bmale\b", lowered):
364
+ add("DEMOGRAPHICS", "Sex", "male", "Admission")
365
+
366
+ hr = m1(r"(?:heart\s*rate|\bhr\b|pulse)\s*[:=]?\s*(\d{2,3}(?:\.\d+)?)")
367
+ if hr:
368
+ add("VITALS", "Heart Rate", hr, "Admission")
369
+
370
+ bp = re.search(r"(?:blood\s*pressure|\bbp\b)\s*[:=]?\s*(\d{2,3})\s*/\s*(\d{2,3})", text, flags=re.IGNORECASE)
371
+ if bp:
372
+ add("VITALS", "Systolic BP", bp.group(1), "Admission")
373
+ add("VITALS", "Diastolic BP", bp.group(2), "Admission")
374
+
375
+ rr = m1(r"(?:respiratory\s*rate|\brr\b|\bresp\b)\s*[:=]?\s*(\d{1,2}(?:\.\d+)?)")
376
+ if rr:
377
+ add("VITALS", "Respiratory Rate", rr, "Admission")
378
+
379
+ temp = m1(r"(?:temperature|\btemp\b)\s*[:=]?\s*(\d{2}(?:\.\d+)?)")
380
+ if temp:
381
+ add("VITALS", "Temperature", temp, "Admission")
382
+
383
+ spo2 = m1(r"(?:spo2|o2\s*sat|oxygen\s*saturation)\s*[:=]?\s*(\d{2,3}(?:\.\d+)?)\s*%?")
384
+ if spo2:
385
+ add("VITALS", "SpO2", spo2, "Admission")
386
+
387
+ weight = m1(r"\bweight\s*[:=]?\s*(\d{2,3}(?:\.\d+)?)")
388
+ if weight:
389
+ add("VITALS", "Weight", weight, "Admission")
390
+
391
+ lab_patterns = [
392
+ ("Hemoglobin", r"(?:hemoglobin|\bhgb\b)\s*[:=]?\s*(\d{1,2}(?:\.\d+)?)"),
393
+ ("Hematocrit", r"(?:hematocrit|\bhct\b)\s*[:=]?\s*(\d{1,2}(?:\.\d+)?)"),
394
+ ("WBC", r"\bwbc\b\s*[:=]?\s*(\d{1,2}(?:\.\d+)?)"),
395
+ ("Platelet", r"(?:platelet|\bplt\b)\s*[:=]?\s*(\d{2,4}(?:\.\d+)?)"),
396
+ ("Sodium", r"(?:sodium|\bna\b)\s*[:=]?\s*(\d{2,3}(?:\.\d+)?)"),
397
+ ("Potassium", r"(?:potassium|\bk\b)\s*[:=]?\s*(\d(?:\.\d+)?)"),
398
+ ("Creatinine", r"(?:creatinine|\bcr\b)\s*[:=]?\s*(\d(?:\.\d+)?)"),
399
+ ("BUN", r"\bbun\b\s*[:=]?\s*(\d{1,3}(?:\.\d+)?)"),
400
+ ("Glucose", r"\bglucose\b\s*[:=]?\s*(\d{2,3}(?:\.\d+)?)"),
401
+ ("Bicarbonate", r"(?:bicarbonate|\bhco3\b|bicarb)\s*[:=]?\s*(\d{1,2}(?:\.\d+)?)"),
402
+ ]
403
+ for keyword, pattern in lab_patterns:
404
+ val = m1(pattern)
405
+ if val:
406
+ add("LABS", keyword, val, "Admission")
407
+
408
+ prior_adm = m1(r"(\d+)\s*(?:prior|previous)\s*admissions?\s*(?:in|within)?\s*12\s*months")
409
+ if prior_adm:
410
+ add("UTILIZATION", "Prior Admissions 12mo", prior_adm, "Past")
411
+
412
+ ed_visits = m1(r"(\d+)\s*(?:ed|er|emergency)\s*visits?\s*(?:in|within)?\s*(?:last\s*)?6\s*months")
413
+ if ed_visits:
414
+ add("UTILIZATION", "ED Visits 6mo", ed_visits, "Past")
415
+
416
+ days_last = m1(r"days\s*since\s*last\s*admission\s*[:=]?\s*(\d+)")
417
+ if days_last:
418
+ add("UTILIZATION", "Days Since Last Admission", days_last, "Past")
419
+
420
+ los = m1(r"(?:length\s*of\s*stay|\blos\b)\s*[:=]?\s*(\d+)")
421
+ if los:
422
+ add("UTILIZATION", "Current Length of Stay", los, "Admission")
423
+
424
+ if "skilled nursing" in lowered or "snf" in lowered:
425
+ add("DISPOSITION", "Discharge Disposition", "Skilled Nursing Facility", "Discharge")
426
+ elif "home" in lowered:
427
+ add("DISPOSITION", "Discharge Disposition", "Home", "Discharge")
428
+
429
+ if re.search(r"confus|disorient", lowered):
430
+ add("DISPOSITION", "Mental Status", "Altered", "Discharge")
431
+ elif re.search(r"alert and oriented|a&o", lowered):
432
+ add("DISPOSITION", "Mental Status", "Normal", "Discharge")
433
+
434
+ if re.search(r"warfarin|apixaban|rivaroxaban|heparin|anticoag", lowered):
435
+ add("MEDICATIONS", "Anticoagulation", "yes", "Discharge")
436
+ if re.search(r"insulin", lowered):
437
+ add("MEDICATIONS", "Insulin Therapy", "yes", "Discharge")
438
+ if re.search(r"opioid|morphine|oxycodone|hydromorphone|fentanyl", lowered):
439
+ add("MEDICATIONS", "Opioid Therapy", "yes", "Discharge")
440
+ if re.search(r"diuretic|furosemide|torsemide|bumetanide", lowered):
441
+ add("MEDICATIONS", "Diuretic Therapy", "yes", "Discharge")
442
+
443
+ if re.search(r"mechanical ventilation|intubat", lowered):
444
+ add("PROCEDURES", "Mechanical Ventilation", "yes", "Admission")
445
+ if re.search(r"dialysis", lowered):
446
+ add("PROCEDURES", "Dialysis", "yes", "Admission")
447
+ if re.search(r"surgery|operative|operation", lowered):
448
+ add("PROCEDURES", "Surgery", "yes", "Admission")
449
+
450
+ problem_terms = {
451
+ "heart failure": "Heart Failure",
452
+ "chf": "Heart Failure",
453
+ "ckd": "Chronic Kidney Disease",
454
+ "copd": "COPD",
455
+ "atrial fibrillation": "Atrial Fibrillation",
456
+ "diabetes": "Diabetes Mellitus",
457
+ "hypertension": "Hypertension",
458
+ }
459
+ for token, label in problem_terms.items():
460
+ if token in lowered:
461
+ add("PROBLEMS", label, "chronic", "Past")
462
+
463
+ symptom_terms = {
464
+ "shortness of breath": "Dyspnea",
465
+ "dyspnea": "Dyspnea",
466
+ "chest pain": "Chest Pain",
467
+ "fever": "Fever",
468
+ }
469
+ for token, label in symptom_terms.items():
470
+ if token in lowered:
471
+ add("SYMPTOMS", label, "present", "Admission")
472
+
473
+ return lines
474
+
475
+
476
+ def lines_to_rows(lines: List[str]) -> List[Dict[str, str]]:
477
+ rows: List[Dict[str, str]] = []
478
+ for line in lines:
479
+ parts = line.split("|")
480
+ if len(parts) != 4:
481
+ continue
482
+ rows.append(
483
+ {
484
+ "CLUSTER": parts[0].strip(),
485
+ "Keyword": parts[1].strip(),
486
+ "Value": parts[2].strip(),
487
+ "Timestamp": parts[3].strip(),
488
+ }
489
+ )
490
+ return rows
491
+
492
+
493
+ def result_to_debug_json(result: StructCoreResult) -> str:
494
+ return json.dumps(asdict(result), ensure_ascii=False, indent=2)
kvt_utils.py ADDED
@@ -0,0 +1,1141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ DSPy-free utilities for KVT4 parsing and normalization.
3
+
4
+ This module contains all parsing, normalization, and validation logic
5
+ that does NOT depend on DSPy. It can be used in production pipelines
6
+ without importing the DSPy framework.
7
+
8
+ Extracted from dspy_integration.py as part of Phase 0 decomposition.
9
+ """
10
+
11
+ import ast
12
+ import json
13
+ import os
14
+ import re
15
+ from typing import List, Optional
16
+
17
+ # =============================================================================
18
+ # REGEX PATTERNS
19
+ # =============================================================================
20
+
21
+ _MEDGEMMA_INTERNAL_TOKEN_RE = re.compile(r"<unused\d+>")
22
+ _MEDGEMMA_THOUGHT_LINE_RE = re.compile(r"^\s*(<unused\d+>\w*\s*)?thought\b.*$", re.IGNORECASE)
23
+ _DSPY_QUOTED_FACT_RE = re.compile(r"«([^»]+)»")
24
+ _PARTIAL_JSON_FACT_RE = re.compile(
25
+ r"""\{\s*["']cluster["']\s*:\s*["']([^"']+)["']\s*,\s*"""
26
+ r"""["']keyword["']\s*:\s*["']([^"']+)["']\s*,\s*"""
27
+ r"""["']value["']\s*:\s*["']([^"']+)["']\s*,\s*"""
28
+ r"""["']timestamp["']\s*:\s*["']([^"']+)["']\s*\}""",
29
+ re.IGNORECASE,
30
+ )
31
+ _PARTIAL_GROUPED_CLUSTER_BLOCK_RE = re.compile(
32
+ r'"(?P<cluster>DEMOGRAPHICS|VITALS|LABS|PROBLEMS|SYMPTOMS|MEDICATIONS|PROCEDURES|UTILIZATION|DISPOSITION)"\s*:\s*\[',
33
+ re.IGNORECASE,
34
+ )
35
+ _PARTIAL_GROUPED_ITEM_RE = re.compile(
36
+ r"""\{\s*["']K["']\s*:\s*["'](?P<k>[^"']+)["']\s*,\s*"""
37
+ r"""["']V["']\s*:\s*(?P<v>"[^"]*"|-?\d+(?:\.\d+)?|true|false)\s*,\s*"""
38
+ r"""["']T["']\s*:\s*["'](?P<t>[^"']+)["']\s*\}""",
39
+ re.IGNORECASE,
40
+ )
41
+
42
+
43
+ # =============================================================================
44
+ # CANONICAL KEYWORDS (MVP)
45
+ # =============================================================================
46
+
47
+ CANONICAL_VITALS = [
48
+ "Heart Rate",
49
+ "Systolic BP",
50
+ "Diastolic BP",
51
+ "Respiratory Rate",
52
+ "Temperature",
53
+ "SpO2",
54
+ "Weight",
55
+ ]
56
+
57
+ CANONICAL_LABS = [
58
+ "Hemoglobin",
59
+ "Hematocrit",
60
+ "WBC",
61
+ "Platelet",
62
+ "Sodium",
63
+ "Potassium",
64
+ "Creatinine",
65
+ "BUN",
66
+ "Glucose",
67
+ "Bicarbonate",
68
+ ]
69
+
70
+ CANONICAL_DEMOGRAPHICS = [
71
+ "Age",
72
+ "Sex",
73
+ ]
74
+
75
+ STRICT_KEYWORDS_READMISSION: dict[str, set[str]] = {
76
+ "DEMOGRAPHICS": set(CANONICAL_DEMOGRAPHICS),
77
+ "VITALS": set(CANONICAL_VITALS),
78
+ "LABS": set(CANONICAL_LABS),
79
+ "MEDICATIONS": {
80
+ "Medication Count",
81
+ "New Medications Count",
82
+ "Polypharmacy",
83
+ "Anticoagulation",
84
+ "Insulin Therapy",
85
+ "Opioid Therapy",
86
+ "Diuretic Therapy",
87
+ },
88
+ "PROCEDURES": {
89
+ "Any Procedure",
90
+ "Surgery",
91
+ "Dialysis",
92
+ "Mechanical Ventilation",
93
+ },
94
+ "UTILIZATION": {
95
+ "Prior Admissions 12mo",
96
+ "ED Visits 6mo",
97
+ "Days Since Last Admission",
98
+ "Current Length of Stay",
99
+ },
100
+ "DISPOSITION": {
101
+ "Discharge Disposition",
102
+ "Mental Status",
103
+ },
104
+ }
105
+ READMISSION_CLUSTERS = {
106
+ "DEMOGRAPHICS",
107
+ "VITALS",
108
+ "LABS",
109
+ "PROBLEMS",
110
+ "SYMPTOMS",
111
+ "MEDICATIONS",
112
+ "PROCEDURES",
113
+ "UTILIZATION",
114
+ "DISPOSITION",
115
+ }
116
+
117
+
118
+ # =============================================================================
119
+ # OUTPUT PARSING HELPERS
120
+ # =============================================================================
121
+
122
+ def strip_medgemma_internal_tokens(text: str) -> str:
123
+ """Remove MedGemma internal tokens and thinking blocks from text.
124
+
125
+ IMPORTANT: Only strips the internal token itself (e.g., "<unused95>").
126
+ Does NOT consume adjacent alphanumerics, because models sometimes emit tokens
127
+ immediately followed by a fact prefix (e.g., "<unused95>DEMOGRAPHICS|..."),
128
+ and we must not delete "DEMOGRAPHICS".
129
+ """
130
+ if not text:
131
+ return ""
132
+
133
+ # Remove internal tokens
134
+ cleaned = _MEDGEMMA_INTERNAL_TOKEN_RE.sub("", text)
135
+
136
+ # Drop explicit thought lines and thinking blocks
137
+ lines = []
138
+ in_thinking_block = False
139
+ for line in cleaned.splitlines():
140
+ line_lower = line.lower().strip()
141
+
142
+ # Check if entering thinking mode
143
+ if any(marker in line_lower for marker in ['thought', 'the user wants', 'here\'s my plan', 'input:', 'output:', 'constraints:']):
144
+ in_thinking_block = True
145
+ continue
146
+
147
+ # Check if exiting thinking mode (actual fact payload starts).
148
+ # Support both KVT4 lines and JSON-like payload fragments.
149
+ if in_thinking_block:
150
+ looks_like_kvt4 = ('|' in line and line.count('|') >= 3)
151
+ looks_like_json_payload = (
152
+ '{"K"' in line
153
+ or '"facts"' in line
154
+ or line.lstrip().startswith("{")
155
+ or line.strip().startswith("```json")
156
+ )
157
+ if looks_like_kvt4 or looks_like_json_payload:
158
+ in_thinking_block = False
159
+
160
+ # Skip if in thinking block
161
+ if in_thinking_block:
162
+ continue
163
+
164
+ # Skip thought lines
165
+ if _MEDGEMMA_THOUGHT_LINE_RE.match(line):
166
+ continue
167
+
168
+ lines.append(line)
169
+
170
+ return "\n".join(lines)
171
+
172
+
173
+ def _looks_like_kvt_fact(line: str) -> bool:
174
+ """Validate if a line looks like a valid KVT4 fact.
175
+
176
+ Expects either:
177
+ - 4-part: CLUSTER|Keyword|Value|Timestamp (preferred)
178
+ - 3-part: Keyword|Value|Timestamp (legacy)
179
+ """
180
+ if not line:
181
+ return False
182
+ s = line.strip()
183
+ if len(s) < 5 or len(s) > 400:
184
+ return False
185
+
186
+ pipe_count = s.count("|")
187
+ if pipe_count not in (2, 3):
188
+ return False
189
+ parts = [p.strip() for p in s.split("|")]
190
+ if len(parts) not in (3, 4):
191
+ return False
192
+
193
+ # Keep parser permissive by default (unit tests expect 3-part legacy facts too).
194
+ allow_kvt3 = str(os.getenv("ALLOW_KVT3", "1")).strip() == "1"
195
+ if len(parts) == 3 and not allow_kvt3:
196
+ return False
197
+ parts_lower = [p.lower() for p in parts]
198
+
199
+ # Filter common headers / schema lines.
200
+ if parts_lower == ["k", "v", "t"]:
201
+ return False
202
+ if parts_lower == ["category", "keyword", "value", "timestamp"]:
203
+ return False
204
+ if (
205
+ len(parts_lower) == 4
206
+ and parts_lower[0].startswith("category")
207
+ and parts_lower[1] == "keyword"
208
+ and parts_lower[2] == "value"
209
+ and parts_lower[3].startswith("timestamp")
210
+ ):
211
+ return False
212
+ if "format" in parts_lower[0] and parts_lower[1:3] == ["keyword", "value"]:
213
+ return False
214
+ if parts_lower[0].startswith(("format", "output format")) and "timestamp" in parts_lower[-1]:
215
+ return False
216
+
217
+ # Filter instruction lines
218
+ if any(marker in parts_lower[1] for marker in ['any diagnosis', 'any symptom', 'any procedure', 'value:']):
219
+ return False
220
+ if '(' in parts[1] and ')' in parts[1]:
221
+ return False
222
+
223
+ # Length heuristics to avoid capturing prose with incidental pipes.
224
+ if len(parts[0]) > 80 or len(parts[1]) > 80 or len(parts[2]) > 200:
225
+ return False
226
+ if len(parts) == 4 and len(parts[3]) > 40:
227
+ return False
228
+
229
+ # Word-count heuristics: KVT lines are short phrases, not full sentences.
230
+ w0 = len(parts[0].split())
231
+ w1 = len(parts[1].split())
232
+ w2 = len(parts[2].split())
233
+ if w0 > 8 or w1 > 8 or w2 > 14:
234
+ return False
235
+ if len(parts) == 4 and len(parts[3].split()) > 4:
236
+ return False
237
+ return all(parts)
238
+
239
+
240
+ def _normalize_kvt_fact(line: str) -> str:
241
+ """Normalize a KVT fact line by stripping whitespace and quotes."""
242
+ parts = [p.strip().strip("«»\"'") for p in line.strip().split("|")]
243
+ return "|".join(parts)
244
+
245
+
246
+ def _map_category_to_cluster(category: str) -> str:
247
+ """Map category aliases to canonical cluster names."""
248
+ c = (category or "").strip().lower()
249
+ if not c:
250
+ return ""
251
+ mapping = {
252
+ "vitals": "VITALS",
253
+ "vital": "VITALS",
254
+ "labs": "LABS",
255
+ "lab": "LABS",
256
+ "demographics": "DEMOGRAPHICS",
257
+ "demo": "DEMOGRAPHICS",
258
+ "conditions": "PROBLEMS",
259
+ "condition": "PROBLEMS",
260
+ "problems": "PROBLEMS",
261
+ "problem": "PROBLEMS",
262
+ "symptoms": "SYMPTOMS",
263
+ "symptom": "SYMPTOMS",
264
+ "medications": "MEDICATIONS",
265
+ "medication": "MEDICATIONS",
266
+ "procedures": "PROCEDURES",
267
+ "procedure": "PROCEDURES",
268
+ "utilization": "UTILIZATION",
269
+ "disposition": "DISPOSITION",
270
+ }
271
+ return mapping.get(c, category.strip())
272
+
273
+
274
+ def _infer_cluster_from_keyword(keyword: str) -> str:
275
+ """Infer cluster from keyword using canonical lists."""
276
+ k = (keyword or "").strip()
277
+ if not k:
278
+ return ""
279
+ if k in CANONICAL_VITALS:
280
+ return "VITALS"
281
+ if k in CANONICAL_LABS:
282
+ return "LABS"
283
+ if k in CANONICAL_DEMOGRAPHICS:
284
+ return "DEMOGRAPHICS"
285
+ # Minimal readmission-fixed keywords
286
+ if k in {"Prior Admissions 12mo", "ED Visits 6mo", "Days Since Last Admission", "Current Length of Stay"}:
287
+ return "UTILIZATION"
288
+ if k in {"Discharge Disposition", "Mental Status"}:
289
+ return "DISPOSITION"
290
+ if k in {"Any Procedure", "Surgery", "Dialysis", "Mechanical Ventilation"}:
291
+ return "PROCEDURES"
292
+ if k in {"Medication Count", "New Medications Count", "Polypharmacy", "Anticoagulation", "Insulin Therapy", "Opioid Therapy", "Diuretic Therapy"}:
293
+ return "MEDICATIONS"
294
+ return ""
295
+
296
+
297
+ def _kvt4_from_fact_dict(d: dict) -> Optional[str]:
298
+ """Convert structured fact dict into CLUSTER|Keyword|Value|Timestamp."""
299
+ if not isinstance(d, dict):
300
+ return None
301
+
302
+ def _first_present(*keys: str):
303
+ for key in keys:
304
+ if key in d and d[key] is not None:
305
+ return d[key]
306
+ return None
307
+
308
+ # Accept multiple key spellings
309
+ cluster = _first_present("cluster", "Cluster", "C", "category", "Category")
310
+ keyword = _first_present("keyword", "Keyword", "K")
311
+ value = _first_present("value", "Value", "V")
312
+ timestamp = _first_present("timestamp", "Timestamp", "T")
313
+
314
+ keyword_s = str(keyword).strip() if keyword is not None else ""
315
+ value_s = str(value).strip() if value is not None else ""
316
+ timestamp_s = str(timestamp).strip() if timestamp is not None else ""
317
+
318
+ cluster_s = str(cluster).strip() if cluster is not None else ""
319
+ cluster_s = _map_category_to_cluster(cluster_s)
320
+
321
+ if not cluster_s:
322
+ cluster_s = _infer_cluster_from_keyword(keyword_s)
323
+ if not cluster_s:
324
+ cluster_s = "UNKNOWN"
325
+
326
+ if not keyword_s or not value_s:
327
+ return None
328
+ if not timestamp_s:
329
+ timestamp_s = "Unknown"
330
+
331
+ return f"{cluster_s}|{keyword_s}|{value_s}|{timestamp_s}"
332
+
333
+
334
+ def _fact_dict_has_explicit_cluster(d: dict) -> bool:
335
+ if not isinstance(d, dict):
336
+ return False
337
+ for key in ("cluster", "Cluster", "CLUSTER", "C", "category", "Category"):
338
+ v = d.get(key)
339
+ if v is not None and str(v).strip():
340
+ return True
341
+ return False
342
+
343
+
344
+ def extract_kvt_fact_lines(text: str) -> List[str]:
345
+ """
346
+ Extract candidate K|V|T / Category|K|V|T lines from arbitrary model output.
347
+
348
+ Handles common formats:
349
+ - Plain pipe-delimited lines
350
+ - DSPy-rendered lists like: [1] «Vitals|Temperature|37.2°C|20240110»
351
+ - JSON objects/lists containing "facts"
352
+ - Python literal lists of strings
353
+ """
354
+ if not text:
355
+ return []
356
+
357
+ cleaned = strip_medgemma_internal_tokens(text).strip()
358
+ if not cleaned:
359
+ return []
360
+
361
+ candidates: List[str] = []
362
+ structured_extracted = False
363
+
364
+ def _dedupe_preserve_order(items: List[str]) -> List[str]:
365
+ out: List[str] = []
366
+ seen = set()
367
+ for it in items:
368
+ if it not in seen:
369
+ seen.add(it)
370
+ out.append(it)
371
+ return out
372
+
373
+ def add_fact(s: str) -> None:
374
+ s2 = _normalize_kvt_fact(s)
375
+ if _looks_like_kvt_fact(s2):
376
+ candidates.append(s2)
377
+
378
+ def _map_category_to_cluster(category: str) -> str:
379
+ c = (category or "").strip().lower()
380
+ if not c:
381
+ return ""
382
+ mapping = {
383
+ "vitals": "VITALS",
384
+ "vital": "VITALS",
385
+ "labs": "LABS",
386
+ "lab": "LABS",
387
+ "demographics": "DEMOGRAPHICS",
388
+ "demo": "DEMOGRAPHICS",
389
+ "conditions": "PROBLEMS",
390
+ "condition": "PROBLEMS",
391
+ "problems": "PROBLEMS",
392
+ "problem": "PROBLEMS",
393
+ "symptoms": "SYMPTOMS",
394
+ "symptom": "SYMPTOMS",
395
+ "medications": "MEDICATIONS",
396
+ "medication": "MEDICATIONS",
397
+ "procedures": "PROCEDURES",
398
+ "procedure": "PROCEDURES",
399
+ "utilization": "UTILIZATION",
400
+ "disposition": "DISPOSITION",
401
+ }
402
+ return mapping.get(c, category.strip())
403
+
404
+ def _infer_cluster_from_keyword(keyword: str) -> str:
405
+ k = (keyword or "").strip()
406
+ if not k:
407
+ return ""
408
+ if k in CANONICAL_VITALS:
409
+ return "VITALS"
410
+ if k in CANONICAL_LABS:
411
+ return "LABS"
412
+ if k in CANONICAL_DEMOGRAPHICS:
413
+ return "DEMOGRAPHICS"
414
+ # Minimal readmission-fixed keywords (from prompts/prompt.py ontology).
415
+ if k in {"Prior Admissions 12mo", "ED Visits 6mo", "Days Since Last Admission", "Current Length of Stay"}:
416
+ return "UTILIZATION"
417
+ if k in {"Discharge Disposition", "Mental Status"}:
418
+ return "DISPOSITION"
419
+ if k in {"Any Procedure", "Surgery", "Dialysis", "Mechanical Ventilation"}:
420
+ return "PROCEDURES"
421
+ if k in {"Medication Count", "New Medications Count", "Polypharmacy", "Anticoagulation", "Insulin Therapy", "Opioid Therapy", "Diuretic Therapy"}:
422
+ return "MEDICATIONS"
423
+ return ""
424
+
425
+ def _kvt4_from_fact_dict(d: dict) -> Optional[str]:
426
+ """Convert common structured fact dicts into CLUSTER|Keyword|Value|Timestamp."""
427
+ if not isinstance(d, dict):
428
+ return None
429
+
430
+ def _first_present(*keys: str):
431
+ for key in keys:
432
+ if key in d and d[key] is not None:
433
+ return d[key]
434
+ return None
435
+
436
+ # Accept multiple key spellings (legacy + short keys).
437
+ cluster = _first_present("cluster", "Cluster", "CLUSTER", "C", "category", "Category")
438
+ keyword = _first_present("keyword", "Keyword", "KEYWORD", "K")
439
+ value = _first_present("value", "Value", "VALUE", "V")
440
+ timestamp = _first_present("timestamp", "Timestamp", "TIMESTAMP", "T")
441
+
442
+ keyword_s = str(keyword).strip() if keyword is not None else ""
443
+ value_s = str(value).strip() if value is not None else ""
444
+ timestamp_s = str(timestamp).strip() if timestamp is not None else ""
445
+ # Drop prompt-template placeholders that are not real facts.
446
+ if keyword_s.casefold() in {"keyword", "k"} and value_s.casefold() in {"value", "v"}:
447
+ if timestamp_s.casefold() in {"timestamp", "t", "unknown", "admission", "discharge", "past"}:
448
+ return None
449
+
450
+ cluster_s = str(cluster).strip() if cluster is not None else ""
451
+ # If we got a "category" like "vitals/labs", map it into prompt-style clusters.
452
+ cluster_s = _map_category_to_cluster(cluster_s)
453
+
454
+ if not cluster_s:
455
+ cluster_s = _infer_cluster_from_keyword(keyword_s)
456
+ if not cluster_s:
457
+ cluster_s = "UNKNOWN"
458
+
459
+ if not keyword_s or not value_s:
460
+ return None
461
+ if not timestamp_s:
462
+ timestamp_s = "Unknown"
463
+
464
+ return f"{cluster_s}|{keyword_s}|{value_s}|{timestamp_s}"
465
+
466
+ def _kvt4_lines_from_grouped_obj(obj: dict) -> List[str]:
467
+ """Convert grouped JSON object into KVT4 lines.
468
+
469
+ Supported layout:
470
+ {
471
+ "LABS":[{"K":"Creatinine","V":1.2,"T":"Discharge"}],
472
+ "PROBLEMS":[{"K":"Hypertension","V":"chronic","T":"Past"}]
473
+ }
474
+ """
475
+ if not isinstance(obj, dict):
476
+ return []
477
+
478
+ out_lines: List[str] = []
479
+ for raw_cluster, raw_entries in obj.items():
480
+ cluster_norm = _map_category_to_cluster(str(raw_cluster).strip())
481
+ cluster_upper = cluster_norm.upper()
482
+ if cluster_upper not in READMISSION_CLUSTERS:
483
+ continue
484
+
485
+ entries: List[dict] = []
486
+ if isinstance(raw_entries, list):
487
+ entries = [it for it in raw_entries if isinstance(it, dict)]
488
+ elif isinstance(raw_entries, dict):
489
+ entries = [raw_entries]
490
+ else:
491
+ continue
492
+
493
+ for ent in entries:
494
+ keyword = ent["K"] if "K" in ent else ent.get("keyword", ent.get("Keyword"))
495
+ value = ent["V"] if "V" in ent else ent.get("value", ent.get("Value"))
496
+ timestamp = ent["T"] if "T" in ent else ent.get("timestamp", ent.get("Timestamp"))
497
+ fact_obj = {
498
+ "cluster": cluster_upper,
499
+ "keyword": keyword,
500
+ "value": value,
501
+ "timestamp": timestamp,
502
+ }
503
+ ln = _kvt4_from_fact_dict(fact_obj)
504
+ if ln:
505
+ out_lines.append(ln)
506
+ return out_lines
507
+
508
+ # 1) JSON / Python list attempts (whole string + best-effort substrings)
509
+ json_like = cleaned
510
+ substrings: List[str] = [json_like]
511
+ first_obj = json_like.find("{")
512
+ last_obj = json_like.rfind("}")
513
+ if first_obj != -1 and last_obj != -1 and last_obj > first_obj:
514
+ substrings.append(json_like[first_obj : last_obj + 1])
515
+ first_arr = json_like.find("[")
516
+ last_arr = json_like.rfind("]")
517
+ if first_arr != -1 and last_arr != -1 and last_arr > first_arr:
518
+ substrings.append(json_like[first_arr : last_arr + 1])
519
+
520
+ cleaned_strip = cleaned.strip()
521
+ for s in list(dict.fromkeys(substrings)):
522
+ s_strip = s.strip()
523
+ if not s_strip:
524
+ continue
525
+ is_derived_array_substring = (
526
+ s_strip.startswith("[") and s_strip.endswith("]") and s_strip != cleaned_strip
527
+ )
528
+ try:
529
+ before = len(candidates)
530
+ obj = json.loads(s_strip)
531
+ if isinstance(obj, dict):
532
+ facts = obj.get("facts")
533
+ if isinstance(facts, list):
534
+ for it in facts:
535
+ if isinstance(it, str):
536
+ add_fact(it)
537
+ elif isinstance(it, dict):
538
+ ln = _kvt4_from_fact_dict(it)
539
+ if ln:
540
+ add_fact(ln)
541
+ else:
542
+ grouped_lines = _kvt4_lines_from_grouped_obj(obj)
543
+ if grouped_lines:
544
+ for ln in grouped_lines:
545
+ add_fact(ln)
546
+ continue
547
+ # Sometimes the whole object is a single fact dict.
548
+ ln = _kvt4_from_fact_dict(obj)
549
+ if ln:
550
+ add_fact(ln)
551
+ elif isinstance(obj, list):
552
+ for it in obj:
553
+ if isinstance(it, str):
554
+ add_fact(it)
555
+ elif isinstance(it, dict):
556
+ # Avoid duplicate UNKNOWN facts when a grouped JSON object is
557
+ # also parsed via its inner array substring (cluster context lost).
558
+ if is_derived_array_substring and not _fact_dict_has_explicit_cluster(it):
559
+ continue
560
+ ln = _kvt4_from_fact_dict(it)
561
+ if ln:
562
+ add_fact(ln)
563
+ if len(candidates) > before:
564
+ structured_extracted = True
565
+ except Exception:
566
+ pass
567
+
568
+ try:
569
+ before = len(candidates)
570
+ obj = ast.literal_eval(s_strip)
571
+ if isinstance(obj, dict):
572
+ facts = obj.get("facts") if isinstance(obj.get("facts"), list) else None
573
+ if facts is not None:
574
+ for it in facts:
575
+ if isinstance(it, str):
576
+ add_fact(it)
577
+ elif isinstance(it, dict):
578
+ ln = _kvt4_from_fact_dict(it)
579
+ if ln:
580
+ add_fact(ln)
581
+ else:
582
+ grouped_lines = _kvt4_lines_from_grouped_obj(obj)
583
+ if grouped_lines:
584
+ for ln in grouped_lines:
585
+ add_fact(ln)
586
+ continue
587
+ ln = _kvt4_from_fact_dict(obj)
588
+ if ln:
589
+ add_fact(ln)
590
+ elif isinstance(obj, list):
591
+ for it in obj:
592
+ if isinstance(it, str):
593
+ add_fact(it)
594
+ elif isinstance(it, dict):
595
+ if is_derived_array_substring and not _fact_dict_has_explicit_cluster(it):
596
+ continue
597
+ ln = _kvt4_from_fact_dict(it)
598
+ if ln:
599
+ add_fact(ln)
600
+ if len(candidates) > before:
601
+ structured_extracted = True
602
+ except Exception:
603
+ pass
604
+
605
+ # If we already extracted structured facts, do not run heuristic recovery
606
+ # branches below (they may introduce noisy duplicates on valid JSON payloads).
607
+ if structured_extracted and candidates:
608
+ return _dedupe_preserve_order(candidates)
609
+
610
+ # 1b) Partial/truncated JSON recovery:
611
+ # If the model output is cut mid-stream, json.loads fails even when many
612
+ # complete fact objects were already emitted. Recover those complete objects.
613
+ for m in _PARTIAL_JSON_FACT_RE.finditer(cleaned):
614
+ c, k, v, t = [x.strip() for x in m.groups()]
615
+ if c and k and v and t:
616
+ add_fact(f"{c}|{k}|{v}|{t}")
617
+
618
+ # 1c) Partial/truncated grouped JSON recovery:
619
+ # Recover complete {"K","V","T"} entries within each cluster block even when
620
+ # root JSON is truncated and json.loads fails.
621
+ cluster_hits = list(_PARTIAL_GROUPED_CLUSTER_BLOCK_RE.finditer(cleaned))
622
+ if cluster_hits:
623
+ for idx, hit in enumerate(cluster_hits):
624
+ cluster = str(hit.group("cluster") or "").strip().upper()
625
+ block_start = hit.end()
626
+ block_end = cluster_hits[idx + 1].start() if idx + 1 < len(cluster_hits) else len(cleaned)
627
+ block = cleaned[block_start:block_end]
628
+ for item in _PARTIAL_GROUPED_ITEM_RE.finditer(block):
629
+ k = str(item.group("k") or "").strip()
630
+ t = str(item.group("t") or "").strip()
631
+ v_tok = str(item.group("v") or "").strip()
632
+ if not k or not t:
633
+ continue
634
+ if v_tok.startswith('"') and v_tok.endswith('"') and len(v_tok) >= 2:
635
+ v = v_tok[1:-1]
636
+ elif v_tok.casefold() in {"true", "false"}:
637
+ v = v_tok.casefold()
638
+ else:
639
+ v = v_tok
640
+ if v:
641
+ add_fact(f"{cluster}|{k}|{v}|{t}")
642
+
643
+ # 2) Extract between DSPy quotes «...»
644
+ for m in _DSPY_QUOTED_FACT_RE.finditer(cleaned):
645
+ inner = m.group(1).strip()
646
+ if "|" in inner:
647
+ add_fact(inner)
648
+
649
+ # 2b) Narrative markdown recovery.
650
+ # Some small models emit facts as multi-line markdown blocks:
651
+ # **CLUSTER:** DEMOGRAPHICS
652
+ # **Keyword:** Sex
653
+ # **Value:** male
654
+ # **Timestamp:** Admission
655
+ # Recover these into KVT4 lines.
656
+ _narrative_kv_re = re.compile(
657
+ r"\*{0,2}(cluster|keyword|value|timestamp)\s*:?\s*\*{0,2}\s*(.+)",
658
+ re.IGNORECASE,
659
+ )
660
+ cur: dict = {}
661
+ for line in cleaned.splitlines():
662
+ m = _narrative_kv_re.match(line.strip())
663
+ if not m:
664
+ continue
665
+ field = m.group(1).strip().lower()
666
+ val = m.group(2).strip().strip("*").strip()
667
+ if field == "cluster":
668
+ if cur.get("cluster") and cur.get("keyword") and cur.get("value"):
669
+ ts = cur.get("timestamp", "Unknown")
670
+ add_fact(f"{cur['cluster']}|{cur['keyword']}|{cur['value']}|{ts}")
671
+ cur = {"cluster": val}
672
+ elif field == "keyword":
673
+ # Flush previous fact within the same cluster before starting a new keyword
674
+ if cur.get("cluster") and cur.get("keyword") and cur.get("value"):
675
+ ts = cur.get("timestamp", "Unknown")
676
+ add_fact(f"{cur['cluster']}|{cur['keyword']}|{cur['value']}|{ts}")
677
+ cluster_keep = cur.get("cluster", "")
678
+ cur = {"cluster": cluster_keep, "keyword": val}
679
+ elif field in ("value", "timestamp"):
680
+ cur[field] = val
681
+ # flush last accumulated fact
682
+ if cur.get("cluster") and cur.get("keyword") and cur.get("value"):
683
+ ts = cur.get("timestamp", "Unknown")
684
+ add_fact(f"{cur['cluster']}|{cur['keyword']}|{cur['value']}|{ts}")
685
+
686
+ # 2c) Cluster-heading + inline JSON item recovery.
687
+ # Some models emit planning text like:
688
+ # * **VITALS:**
689
+ # ... -> {"K":"Heart Rate","V":54,"T":"Admission"}
690
+ # Recover such entries by tracking the current cluster heading.
691
+ heading_re = re.compile(r"\*{0,2}\s*([A-Z][A-Z ]{2,})\s*:\s*\*{0,2}\s*$")
692
+ cluster_inline_re = re.compile(
693
+ r"\b(DEMOGRAPHICS|VITALS|LABS|PROBLEMS|SYMPTOMS|MEDICATIONS|PROCEDURES|UTILIZATION|DISPOSITION)\b",
694
+ re.IGNORECASE,
695
+ )
696
+ item_re = re.compile(
697
+ r'\{\s*"K"\s*:\s*"(?P<k>[^"]+)"\s*,\s*"V"\s*:\s*(?P<v>"[^"]*"|-?\d+(?:\.\d+)?|true|false)\s*,\s*"T"\s*:\s*"(?P<t>Past|Admission|Discharge|Unknown)"\s*\}',
698
+ re.IGNORECASE,
699
+ )
700
+ cur_cluster = ""
701
+ for raw_line in cleaned.splitlines():
702
+ line = raw_line.strip()
703
+ if not line:
704
+ continue
705
+ # Avoid cluster-bleed on compact one-line JSON objects:
706
+ # grouped payloads should be handled by structured parsing above.
707
+ if line.startswith("{") or line.startswith("["):
708
+ continue
709
+
710
+ # Accept headings like "**VITALS:**", "VITALS:", "* **VITALS:**"
711
+ norm = re.sub(r"^[*•\-\s]+", "", line)
712
+ norm = norm.strip("* ").strip()
713
+ hm = heading_re.match(norm)
714
+ if hm:
715
+ c_raw = hm.group(1).strip().upper()
716
+ c_norm = _map_category_to_cluster(c_raw)
717
+ c_up = str(c_norm).strip().upper()
718
+ if c_up in READMISSION_CLUSTERS:
719
+ cur_cluster = c_up
720
+ continue
721
+
722
+ # Inline headings like:
723
+ # "- **VITALS:** ... -> {\"K\":\"Heart Rate\",...}"
724
+ cm = cluster_inline_re.search(norm)
725
+ if cm:
726
+ c_raw = cm.group(1).strip().upper()
727
+ c_norm = _map_category_to_cluster(c_raw)
728
+ c_up = str(c_norm).strip().upper()
729
+ if c_up in READMISSION_CLUSTERS:
730
+ cur_cluster = c_up
731
+
732
+ if not cur_cluster:
733
+ continue
734
+ for m in item_re.finditer(line):
735
+ k = str(m.group("k") or "").strip()
736
+ v_tok = str(m.group("v") or "").strip()
737
+ t = str(m.group("t") or "").strip()
738
+ if not k or not t:
739
+ continue
740
+ if v_tok.startswith('"') and v_tok.endswith('"') and len(v_tok) >= 2:
741
+ v = v_tok[1:-1]
742
+ else:
743
+ v = v_tok.casefold() if v_tok.casefold() in {"true", "false"} else v_tok
744
+ if v:
745
+ add_fact(f"{cur_cluster}|{k}|{v}|{t}")
746
+
747
+ # 3) Line-by-line heuristics (bullets / numbering / quoted JSON fragments)
748
+ for line in cleaned.splitlines():
749
+ s = line.strip()
750
+ if not s:
751
+ continue
752
+ s = re.sub(r"^\[\d+\]\s*", "", s)
753
+ s = re.sub(r"^[-*•]\s*", "", s)
754
+ s = s.strip().strip("«»\"'")
755
+ s = s.rstrip(",")
756
+ if "|" in s:
757
+ add_fact(s)
758
+
759
+ # De-duplicate while preserving order
760
+ return _dedupe_preserve_order(candidates)
761
+
762
+ def normalize_readmission_kvt4_lines(lines: List[str]) -> tuple[List[str], dict]:
763
+ """Normalize KVT4 lines into canonical READMISSION_MVP form.
764
+
765
+ Goals:
766
+ - Boost strict-format usability by deterministic canonicalization
767
+ - Reduce drift (Blood Pressure -> SBP/DBP, Oxygen Saturation -> SpO2, etc.)
768
+ - Enforce numeric-only values for VITALS/LABS (+ known numeric fields)
769
+ - Enforce at most one line per (CLUSTER, Keyword) via timestamp-priority dedupe
770
+
771
+ Returns: (normalized_lines, stats)
772
+ """
773
+
774
+ def _parse_line(line: str) -> Optional[tuple[str, str, str, str]]:
775
+ if not isinstance(line, str):
776
+ return None
777
+ s = line.strip()
778
+ if s.count("|") != 3:
779
+ return None
780
+ parts = [p.strip() for p in s.split("|")]
781
+ if len(parts) != 4:
782
+ return None
783
+ c, k, v, t = parts
784
+ if not c or not k or not v:
785
+ return None
786
+ return c, k, v, t or "Unknown"
787
+
788
+ def _normalize_timestamp(t: str) -> str:
789
+ tt = (t or "").strip()
790
+ if tt in {"Admission", "Discharge", "Past", "Unknown"}:
791
+ return tt
792
+ return "Unknown"
793
+
794
+ def _fill_unknown_timestamp(cluster: str, keyword: str, value: str) -> str:
795
+ """Best-effort timestamp fill for strict-eval stability.
796
+
797
+ Policy is ontology-driven (not note-section heuristics):
798
+ - DEMOGRAPHICS/VITALS/LABS/SYMPTOMS/MEDICATIONS/PROCEDURES: Admission
799
+ - DISPOSITION: Discharge
800
+ - UTILIZATION: Past
801
+ - PROBLEMS: Past if chronic, Discharge if acute, else Past
802
+ """
803
+ c = (cluster or "").strip().upper()
804
+ v = (value or "").strip().lower()
805
+
806
+ if c == "DISPOSITION":
807
+ return "Discharge"
808
+ if c == "UTILIZATION":
809
+ return "Past"
810
+ if c == "PROBLEMS":
811
+ if v == "acute":
812
+ return "Discharge"
813
+ if v == "chronic":
814
+ return "Past"
815
+ # Default: history-like framing
816
+ return "Past"
817
+ if c in {"DEMOGRAPHICS", "VITALS", "LABS", "SYMPTOMS", "MEDICATIONS", "PROCEDURES"}:
818
+ return "Admission"
819
+ return "Admission"
820
+
821
+ def _first_number(value: str) -> Optional[str]:
822
+ m = re.search(r"-?\d+(?:\.\d+)?", value or "")
823
+ return m.group(0) if m else None
824
+
825
+ # Keyword aliases (strict clusters).
826
+ vital_alias = {
827
+ "HR": "Heart Rate",
828
+ "Pulse": "Heart Rate",
829
+ "Temp": "Temperature",
830
+ "O2 Sat": "SpO2",
831
+ "Oxygen Saturation": "SpO2",
832
+ "SpO2": "SpO2",
833
+ "Resp": "Respiratory Rate",
834
+ "RR": "Respiratory Rate",
835
+ "Blood Pressure": "Blood Pressure", # special-case splitter
836
+ "BP": "Blood Pressure",
837
+ "Systolic": "Systolic BP",
838
+ "Diastolic": "Diastolic BP",
839
+ "SBP": "Systolic BP",
840
+ "DBP": "Diastolic BP",
841
+ }
842
+ lab_alias = {
843
+ "Hgb": "Hemoglobin",
844
+ "Hct": "Hematocrit",
845
+ "Plt": "Platelet",
846
+ "Platelets": "Platelet",
847
+ "Na": "Sodium",
848
+ "K": "Potassium",
849
+ "Cr": "Creatinine",
850
+ "HCO3": "Bicarbonate",
851
+ "Bicarb": "Bicarbonate",
852
+ "WBC": "WBC",
853
+ "BUN": "BUN",
854
+ }
855
+ sex_alias = {"m": "male", "male": "male", "f": "female", "female": "female"}
856
+
857
+ # Dedupe priority can be configured per mode.
858
+ # For full readmission feature set we generally care about discharge/most-recent.
859
+ ts_priority = [s.strip() for s in os.getenv("MEDGEMMA_TIMESTAMP_PRIORITY", "Discharge,Admission,Past,Unknown").split(",") if s.strip()]
860
+ ts_rank = {t: i for i, t in enumerate(ts_priority)}
861
+
862
+ stats = {
863
+ "input_lines": len(lines or []),
864
+ "parsed_kvt4": 0,
865
+ "dropped_placeholders": 0,
866
+ "dropped_noncanonical": 0,
867
+ "dropped_by_allowed_clusters": 0,
868
+ "expanded_bp": 0,
869
+ "dedup_dropped": 0,
870
+ "output_lines": 0,
871
+ "canonical_keyword_rate_strict": None,
872
+ "numeric_only_rate_vitals_labs": None,
873
+ "duplicates_after_dedup": 0,
874
+ }
875
+
876
+ allowed_clusters_env = os.getenv("MEDGEMMA_ALLOWED_CLUSTERS", "").strip()
877
+ allowed_clusters = None
878
+ if allowed_clusters_env:
879
+ allowed_clusters = {c.strip().upper() for c in allowed_clusters_env.split(",") if c.strip()}
880
+
881
+ # First pass: normalize + expand BP
882
+ normalized_candidates: List[tuple[str, str, str, str]] = []
883
+ fill_unknown = os.getenv("MEDGEMMA_TIMESTAMP_FILL_UNKNOWN", "1").strip().lower() in {"1", "true", "yes"}
884
+ for line in lines or []:
885
+ parsed = _parse_line(line)
886
+ if not parsed:
887
+ continue
888
+ c, k, v, t = parsed
889
+ stats["parsed_kvt4"] += 1
890
+
891
+ c_up = str(c).strip().upper()
892
+ if allowed_clusters is not None and c_up not in allowed_clusters:
893
+ stats["dropped_by_allowed_clusters"] += 1
894
+ continue
895
+ t_norm = _normalize_timestamp(t)
896
+ k_norm = k.strip()
897
+ v_norm = v.strip()
898
+
899
+ # Drop obvious placeholders
900
+ if v_norm in {"___", "__", "_", "N/A", "NA", "null", "None"}:
901
+ stats["dropped_placeholders"] += 1
902
+ continue
903
+
904
+ # Cluster-specific normalization
905
+ if c_up == "DEMOGRAPHICS":
906
+ if k_norm == "Sex":
907
+ vv = sex_alias.get(v_norm.strip().lower())
908
+ if not vv:
909
+ stats["dropped_noncanonical"] += 1
910
+ continue
911
+ v_norm = vv
912
+ elif k_norm == "Age":
913
+ num = _first_number(v_norm)
914
+ if not num:
915
+ stats["dropped_noncanonical"] += 1
916
+ continue
917
+ v_norm = num
918
+
919
+ elif c_up == "VITALS":
920
+ k_norm = vital_alias.get(k_norm, k_norm)
921
+ if k_norm == "Blood Pressure":
922
+ # Expand 120/80 -> SBP + DBP
923
+ m = re.search(r"(\d+(?:\.\d+)?)\s*/\s*(\d+(?:\.\d+)?)", v_norm)
924
+ if not m:
925
+ stats["dropped_noncanonical"] += 1
926
+ continue
927
+ sbp, dbp = m.group(1), m.group(2)
928
+ normalized_candidates.append(("VITALS", "Systolic BP", sbp, t_norm))
929
+ normalized_candidates.append(("VITALS", "Diastolic BP", dbp, t_norm))
930
+ stats["expanded_bp"] += 1
931
+ continue
932
+
933
+ # Enforce numeric-only for vitals
934
+ num = _first_number(v_norm)
935
+ if not num:
936
+ stats["dropped_noncanonical"] += 1
937
+ continue
938
+ v_norm = num
939
+
940
+ elif c_up == "LABS":
941
+ k_norm = lab_alias.get(k_norm, k_norm)
942
+ num = _first_number(v_norm)
943
+ if not num:
944
+ stats["dropped_noncanonical"] += 1
945
+ continue
946
+ v_norm = num
947
+
948
+ elif c_up == "UTILIZATION":
949
+ num = _first_number(v_norm)
950
+ if not num:
951
+ stats["dropped_noncanonical"] += 1
952
+ continue
953
+ v_norm = num
954
+
955
+ elif c_up == "MEDICATIONS":
956
+ if k_norm in {"Medication Count", "New Medications Count"}:
957
+ num = _first_number(v_norm)
958
+ if not num:
959
+ stats["dropped_noncanonical"] += 1
960
+ continue
961
+ v_norm = num
962
+ elif k_norm in {"Polypharmacy", "Anticoagulation", "Insulin Therapy", "Opioid Therapy", "Diuretic Therapy"}:
963
+ vv = v_norm.strip().lower()
964
+ if vv in {"yes", "y", "true", "1"}:
965
+ v_norm = "yes"
966
+ elif vv in {"no", "n", "false", "0"}:
967
+ v_norm = "no"
968
+ else:
969
+ stats["dropped_noncanonical"] += 1
970
+ continue
971
+
972
+ elif c_up == "PROCEDURES":
973
+ if k_norm in {"Any Procedure", "Surgery"}:
974
+ vv = v_norm.strip().lower()
975
+ if vv in {"yes", "y", "true", "1"}:
976
+ v_norm = "yes"
977
+ elif vv in {"no", "n", "false", "0"}:
978
+ v_norm = "no"
979
+ else:
980
+ stats["dropped_noncanonical"] += 1
981
+ continue
982
+ elif k_norm == "Dialysis":
983
+ vv = v_norm.strip().lower()
984
+ allowed = {"decided", "started", "done", "cancelled", "no"}
985
+ if vv not in allowed:
986
+ stats["dropped_noncanonical"] += 1
987
+ continue
988
+ v_norm = vv
989
+ elif k_norm == "Mechanical Ventilation":
990
+ vv = v_norm.strip().lower()
991
+ if "no" == vv:
992
+ v_norm = "no"
993
+ else:
994
+ num = _first_number(v_norm)
995
+ if not num:
996
+ stats["dropped_noncanonical"] += 1
997
+ continue
998
+ v_norm = num
999
+
1000
+ elif c_up == "DISPOSITION":
1001
+ if k_norm == "Discharge Disposition":
1002
+ vv = v_norm.strip().lower()
1003
+ # Normalize into the prompt enums.
1004
+ if "home with" in vv or "home w" in vv or "services" in vv:
1005
+ v_norm = "Home with Services"
1006
+ elif vv == "home" or vv.startswith("home "):
1007
+ v_norm = "Home"
1008
+ elif "snf" in vv or "skilled nursing" in vv:
1009
+ v_norm = "SNF"
1010
+ elif "rehab" in vv:
1011
+ v_norm = "Rehab"
1012
+ elif "ltac" in vv:
1013
+ v_norm = "LTAC"
1014
+ elif "hospice" in vv:
1015
+ v_norm = "Hospice"
1016
+ elif "ama" in vv or "against medical advice" in vv:
1017
+ v_norm = "AMA"
1018
+ else:
1019
+ stats["dropped_noncanonical"] += 1
1020
+ continue
1021
+ elif k_norm == "Mental Status":
1022
+ vv = v_norm.strip().lower()
1023
+ if "confus" in vv:
1024
+ v_norm = "confused"
1025
+ elif "letharg" in vv:
1026
+ v_norm = "lethargic"
1027
+ elif "alert" in vv:
1028
+ v_norm = "alert"
1029
+ elif "orient" in vv:
1030
+ v_norm = "oriented"
1031
+ else:
1032
+ stats["dropped_noncanonical"] += 1
1033
+ continue
1034
+
1035
+ elif c_up == "PROBLEMS":
1036
+ vv = re.sub(r"\s+", " ", v_norm.strip().lower())
1037
+ if vv in {"chronic", "acute", "exist", "not exist"}:
1038
+ v_norm = vv
1039
+ elif vv in {"past", "history", "historical", "pmh", "chronic condition", "chronic disease"}:
1040
+ v_norm = "chronic"
1041
+ elif vv in {"discharge", "discharged", "active", "current"}:
1042
+ v_norm = "acute"
1043
+ elif vv in {"present", "yes", "true", "1", "positive", "confirmed", "exists"}:
1044
+ v_norm = "exist"
1045
+ elif vv in {"no", "none", "false", "0", "absent", "negative", "not present", "ruled out"}:
1046
+ v_norm = "not exist"
1047
+ else:
1048
+ stats["dropped_noncanonical"] += 1
1049
+ continue
1050
+
1051
+ elif c_up == "SYMPTOMS":
1052
+ vv = re.sub(r"\s+", " ", v_norm.strip().lower())
1053
+ if vv in {"yes", "no", "severe"}:
1054
+ v_norm = vv
1055
+ elif vv in {"present", "positive", "true", "1", "y", "symptomatic"}:
1056
+ v_norm = "yes"
1057
+ elif vv in {"none", "absent", "negative", "false", "0", "n", "denied", "denies"}:
1058
+ v_norm = "no"
1059
+ elif "severe" in vv or vv in {"marked", "significant"}:
1060
+ v_norm = "severe"
1061
+ else:
1062
+ stats["dropped_noncanonical"] += 1
1063
+ continue
1064
+
1065
+ # Drop non-canonical keywords for strict clusters (objective ones).
1066
+ if c_up in STRICT_KEYWORDS_READMISSION:
1067
+ if k_norm not in STRICT_KEYWORDS_READMISSION[c_up]:
1068
+ stats["dropped_noncanonical"] += 1
1069
+ continue
1070
+
1071
+ if fill_unknown and t_norm == "Unknown":
1072
+ t_norm = _fill_unknown_timestamp(c_up, k_norm, v_norm)
1073
+
1074
+ normalized_candidates.append((c_up, k_norm, v_norm, t_norm))
1075
+
1076
+ # Second pass: dedupe by (CLUSTER, Keyword) using timestamp priority.
1077
+ best: dict[tuple[str, str], tuple[str, str, str, str]] = {}
1078
+ for c, k, v, t in normalized_candidates:
1079
+ key = (c, k)
1080
+ cur = best.get(key)
1081
+ if cur is None:
1082
+ best[key] = (c, k, v, t)
1083
+ continue
1084
+ _, _, _, t_prev = cur
1085
+ r_new = ts_rank.get(t, 999)
1086
+ r_prev = ts_rank.get(t_prev, 999)
1087
+ if r_new < r_prev:
1088
+ best[key] = (c, k, v, t)
1089
+ else:
1090
+ stats["dedup_dropped"] += 1
1091
+
1092
+ out_lines = [f"{c}|{k}|{v}|{t}" for (c, k), (c, k, v, t) in best.items()]
1093
+ out_lines.sort(key=lambda s: (s.split("|")[0], s.split("|")[1]))
1094
+
1095
+ # Metrics: canonical + numeric-only compliance for VITALS/LABS.
1096
+ strict_total = 0
1097
+ strict_ok = 0
1098
+ vitlab_total = 0
1099
+ vitlab_numeric = 0
1100
+ key_counts: dict[tuple[str, str], int] = {}
1101
+ for ln in out_lines:
1102
+ parsed = _parse_line(ln)
1103
+ if not parsed:
1104
+ continue
1105
+ c, k, v, _t = parsed
1106
+ key_counts[(c, k)] = key_counts.get((c, k), 0) + 1
1107
+ if c in STRICT_KEYWORDS_READMISSION:
1108
+ strict_total += 1
1109
+ if k in STRICT_KEYWORDS_READMISSION[c]:
1110
+ strict_ok += 1
1111
+ if c in {"VITALS", "LABS"}:
1112
+ vitlab_total += 1
1113
+ if re.fullmatch(r"-?\d+(?:\.\d+)?", v.strip()):
1114
+ vitlab_numeric += 1
1115
+
1116
+ stats["duplicates_after_dedup"] = sum(1 for cnt in key_counts.values() if cnt > 1)
1117
+ stats["output_lines"] = len(out_lines)
1118
+ stats["canonical_keyword_rate_strict"] = (strict_ok / strict_total) if strict_total else 1.0
1119
+ stats["numeric_only_rate_vitals_labs"] = (vitlab_numeric / vitlab_total) if vitlab_total else 1.0
1120
+
1121
+ return out_lines, stats
1122
+
1123
+
1124
+ def _normalize_mode(mode: Optional[str]) -> str:
1125
+ """Normalize mode string to canonical format."""
1126
+ if not mode:
1127
+ return "READMISSION_DISCHARGE"
1128
+ mode = mode.upper().replace("-", "_")
1129
+ if mode in {"CCDE", "CCDE_ADMISSION"}:
1130
+ return "CCDE_ADMISSION"
1131
+ elif mode in {"TABULAR", "READMISSION_TABULAR", "MVP_TABULAR", "TOON_TABULAR"}:
1132
+ return "READMISSION_TABULAR"
1133
+ elif mode in {"STRUCTURED", "READMISSION_STRUCTURED", "PYDANTIC", "STRUCTURED_OUTPUT"}:
1134
+ return "READMISSION_STRUCTURED"
1135
+ else:
1136
+ return "READMISSION_DISCHARGE"
1137
+
1138
+
1139
+ # =============================================================================
1140
+ # CUSTOM DSPy ADAPTER FOR MEDGEMMA (Local Transformers)
1141
+ # =============================================================================
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ gradio>=4.44,<6
2
+ pandas>=2.0,<3
scripts/run_two_stage_structured_pipeline.py ADDED
The diff for this file is too large to render. See raw diff