Vrda commited on
Commit
2bcf854
·
verified ·
1 Parent(s): 61b6ca2

Upload app.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app.py +426 -0
app.py ADDED
@@ -0,0 +1,426 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Internal Medicine Discharge Letter Error-Check — Streamlit App
3
+ Prospective study: AI-assisted error detection in ED discharge letters
4
+ """
5
+
6
+ import streamlit as st
7
+ import time
8
+ import json
9
+ from datetime import datetime
10
+ from pathlib import Path
11
+ from backend import run_error_check
12
+
13
+ FEEDBACK_FILE = Path(__file__).parent / "feedback_data.json"
14
+
15
+ # -------------------------------------------------------------------------
16
+ # Feedback persistence
17
+ # -------------------------------------------------------------------------
18
+
19
+ def save_feedback(entry: dict) -> int:
20
+ if FEEDBACK_FILE.exists():
21
+ with open(FEEDBACK_FILE, "r", encoding="utf-8") as f:
22
+ data = json.load(f)
23
+ else:
24
+ data = []
25
+ data.append(entry)
26
+ with open(FEEDBACK_FILE, "w", encoding="utf-8") as f:
27
+ json.dump(data, f, ensure_ascii=False, indent=2)
28
+ return len(data)
29
+
30
+
31
+ # -------------------------------------------------------------------------
32
+ # Page config & CSS
33
+ # -------------------------------------------------------------------------
34
+
35
+ st.set_page_config(
36
+ page_title="IM Error-Check",
37
+ page_icon="\U0001FA7A",
38
+ layout="wide",
39
+ )
40
+
41
+ st.markdown("""
42
+ <style>
43
+ .error-card {
44
+ background: #fff5f5; border-left: 4px solid #e53e3e;
45
+ border-radius: 8px; padding: 0.8rem 1rem; margin: 0.5rem 0;
46
+ }
47
+ .suggestion-card {
48
+ background: #f0fff4; border-left: 4px solid #38a169;
49
+ border-radius: 8px; padding: 0.8rem 1rem; margin: 0.5rem 0;
50
+ }
51
+ .model-header-a {
52
+ background: #ebf8ff; border-left: 4px solid #3182ce;
53
+ border-radius: 8px; padding: 0.6rem 1rem; margin-bottom: 0.5rem;
54
+ }
55
+ .model-header-b {
56
+ background: #faf5ff; border-left: 4px solid #805ad5;
57
+ border-radius: 8px; padding: 0.6rem 1rem; margin-bottom: 0.5rem;
58
+ }
59
+ .severity-critical { color: #c53030; font-weight: bold; }
60
+ .severity-high { color: #dd6b20; font-weight: bold; }
61
+ .severity-medium { color: #d69e2e; }
62
+ .severity-low { color: #38a169; }
63
+ .category-badge {
64
+ display: inline-block; background: #edf2f7; color: #4a5568;
65
+ padding: 2px 8px; border-radius: 12px; font-size: 0.8em; margin-right: 4px;
66
+ }
67
+ </style>
68
+ """, unsafe_allow_html=True)
69
+
70
+
71
+ SAMPLE = """Adresa: VUKOVARSKA 45, SPLIT
72
+ Datum dolaska: 10.03.2026. 14:22
73
+ Datum rođenja: 15.05.1958.
74
+ Datum otpusta: 10.03.2026. 18:45
75
+
76
+ Trijažna kategorija: 3
77
+
78
+ Dijagnoze
79
+ I21.0 Akutni transmuralni infarkt miokarda prednje stijenke
80
+
81
+ Podaci s trijaže
82
+ Trijaž.kat:3; Puls:92/min; RR:155/95 mmHg; SpO2:94%; Tax: 36.8C; GCS:15;
83
+
84
+ Razlog dolaska
85
+ Bolovi u prsištu od jutros, stezajućeg karaktera s propagacijom u lijevu ruku. Trajanje > 30 min. Uzeo 2x NTG sprej bez učinka.
86
+
87
+ Anamneza
88
+ Osobna: arterijska hipertenzija, DM tip 2, dislipidemija. Terapija: Ramipril 5mg, Metformin 1000mg 2x1, Atorvastatin 20mg.
89
+
90
+ Status
91
+ Pri svijesti, blijed, znojav. Auskultatorno: srčana akcija ritmična, tonovi tiši, bez šumova. Pluća: bazalno obostrano oslabljen šum disanja.
92
+
93
+ Laboratorij
94
+ Troponin I: 2.8 ng/mL (ref <0.04), CK-MB: 45 U/L, L: 12.3, CRP: 8.5
95
+ Na: 138, K: 4.2, Kreatinin: 128 umol/L (eGFR 52), GUK: 14.2 mmol/L
96
+
97
+ EKG: ST elevacija V1-V4, recipročne promjene II, III, aVF
98
+
99
+ Terapija
100
+ Aspirin 300mg stat, zatim 100mg 1x1
101
+ Klopidogrel 300mg stat, zatim 75mg 1x1
102
+ Heparin 5000 IU i.v. bolus
103
+ Morphin 4mg i.v.
104
+ Metformin 1000mg nastaviti 2x1
105
+ Atorvastatin 40mg 1x1
106
+
107
+ Zaključak
108
+ Pacijent s akutnim STEMI prednje stijenke. Transportiran u Kath lab.
109
+ Preporučen kontrolni pregled za 14 dana."""
110
+
111
+ # -------------------------------------------------------------------------
112
+ # Session state
113
+ # -------------------------------------------------------------------------
114
+
115
+ for key, default in [
116
+ ("input_text", ""),
117
+ ("result", None),
118
+ ("elapsed", 0),
119
+ ("run_analysis", False),
120
+ ("physician_id", ""),
121
+ ]:
122
+ if key not in st.session_state:
123
+ st.session_state[key] = default
124
+
125
+
126
+ def load_sample():
127
+ st.session_state.input_text = SAMPLE
128
+
129
+
130
+ def trigger_analysis():
131
+ st.session_state.run_analysis = True
132
+
133
+
134
+ # -------------------------------------------------------------------------
135
+ # Header
136
+ # -------------------------------------------------------------------------
137
+
138
+ st.title("\U0001FA7A Internal Medicine — Discharge Letter Error-Check")
139
+ st.markdown("*AI-assisted error detection for Internal Medicine Emergency Department*")
140
+ st.warning(
141
+ "\u26A0\uFE0F **RESEARCH TOOL**: AI-generated findings require physician verification. "
142
+ "Do not use as sole basis for clinical decisions."
143
+ )
144
+
145
+ # Sidebar
146
+ with st.sidebar:
147
+ st.header("About")
148
+ st.markdown(
149
+ "Compares **Qwen 3 32B** and **Llama 4 Scout** for detecting errors "
150
+ "in discharge letters."
151
+ )
152
+ st.markdown("---")
153
+ st.markdown("**Steps:** Paste letter \u2192 Analyze \u2192 Review \u2192 Rate")
154
+ st.markdown("---")
155
+ st.text_input(
156
+ "Physician ID (anonymous):",
157
+ placeholder="e.g. Physician A",
158
+ key="physician_id",
159
+ )
160
+ if FEEDBACK_FILE.exists():
161
+ with open(FEEDBACK_FILE, "r", encoding="utf-8") as f:
162
+ count = len(json.load(f))
163
+ st.metric("Cases collected", count)
164
+
165
+ # -------------------------------------------------------------------------
166
+ # Input
167
+ # -------------------------------------------------------------------------
168
+
169
+ st.header("Discharge Letter Input")
170
+ st.button("Load Sample Case", on_click=load_sample)
171
+
172
+ st.text_area(
173
+ "Paste discharge letter (Croatian):",
174
+ height=220,
175
+ placeholder="Zalijepite otpusno pismo ovdje...",
176
+ key="input_text",
177
+ )
178
+
179
+ st.button("Analyze", type="primary", on_click=trigger_analysis)
180
+
181
+ # -------------------------------------------------------------------------
182
+ # Run analysis
183
+ # -------------------------------------------------------------------------
184
+
185
+ if st.session_state.run_analysis and st.session_state.input_text.strip():
186
+ st.session_state.run_analysis = False
187
+ with st.spinner("Running error-check with both AI models (15-45 seconds)..."):
188
+ start = time.time()
189
+ st.session_state.result = run_error_check(st.session_state.input_text)
190
+ st.session_state.elapsed = time.time() - start
191
+ st.rerun()
192
+
193
+
194
+ # -------------------------------------------------------------------------
195
+ # Helper: render a model's output
196
+ # -------------------------------------------------------------------------
197
+
198
+ SEVERITY_LABELS = {
199
+ "critical": "\U0001F534 Critical",
200
+ "high": "\U0001F7E0 High",
201
+ "medium": "\U0001F7E1 Medium",
202
+ "low": "\U0001F7E2 Low",
203
+ }
204
+
205
+ CATEGORY_LABELS = {
206
+ "medication_error": "Medication",
207
+ "diagnostic_error": "Diagnostic",
208
+ "dosing_error": "Dosing",
209
+ "documentation_error": "Documentation",
210
+ "lab_interpretation_error": "Lab Interpretation",
211
+ "contraindication": "Contraindication",
212
+ "omission": "Omission",
213
+ "other": "Other",
214
+ "documentation_quality": "Documentation Quality",
215
+ "clinical_workflow": "Clinical Workflow",
216
+ "patient_safety": "Patient Safety",
217
+ "completeness": "Completeness",
218
+ }
219
+
220
+
221
+ def render_model_output(result, header_class: str):
222
+ if not result.success:
223
+ st.error(f"Model error: {result.error_message}")
224
+ return
225
+
226
+ st.caption(f"Response time: {result.latency_seconds}s")
227
+
228
+ if result.summary:
229
+ st.markdown(f"**Summary:** {result.summary}")
230
+
231
+ # Errors
232
+ if result.errors:
233
+ for i, err in enumerate(result.errors, 1):
234
+ sev = SEVERITY_LABELS.get(err.severity, err.severity)
235
+ cat = CATEGORY_LABELS.get(err.category, err.category)
236
+ st.markdown(
237
+ f'<div class="error-card">'
238
+ f"<strong>Error {i}</strong> &mdash; {sev} &nbsp;"
239
+ f'<span class="category-badge">{cat}</span><br>'
240
+ f"{err.description}"
241
+ f"{'<br><em>Quote: \"' + err.quote + '\"</em>' if err.quote else ''}"
242
+ f"</div>",
243
+ unsafe_allow_html=True,
244
+ )
245
+ else:
246
+ st.info("No errors identified.")
247
+
248
+ # Suggestions
249
+ if result.suggestions:
250
+ for i, sug in enumerate(result.suggestions, 1):
251
+ cat = CATEGORY_LABELS.get(sug.category, sug.category)
252
+ st.markdown(
253
+ f'<div class="suggestion-card">'
254
+ f"<strong>Suggestion {i}</strong> &nbsp;"
255
+ f'<span class="category-badge">{cat}</span><br>'
256
+ f"{sug.description}"
257
+ f"</div>",
258
+ unsafe_allow_html=True,
259
+ )
260
+
261
+
262
+ # -------------------------------------------------------------------------
263
+ # Display results
264
+ # -------------------------------------------------------------------------
265
+
266
+ if st.session_state.result:
267
+ r = st.session_state.result
268
+
269
+ st.markdown("---")
270
+ st.header("Analysis Results")
271
+ st.success(
272
+ f"Completed in {st.session_state.elapsed:.1f}s "
273
+ f"(translation: {r.translation_latency}s, "
274
+ f"Model A: {r.model_a_result.latency_seconds}s, "
275
+ f"Model B: {r.model_b_result.latency_seconds}s)"
276
+ )
277
+
278
+ with st.expander("English Translation"):
279
+ st.markdown(r.translated_text)
280
+
281
+ st.subheader("Model Comparison")
282
+
283
+ col_a, col_b = st.columns(2, gap="large")
284
+
285
+ with col_a:
286
+ st.markdown(
287
+ '<div class="model-header-a"><h4 style="color:#3182ce; margin:0">'
288
+ "Qwen 3 32B</h4></div>",
289
+ unsafe_allow_html=True,
290
+ )
291
+ render_model_output(r.model_a_result, "model-header-a")
292
+
293
+ with col_b:
294
+ st.markdown(
295
+ '<div class="model-header-b"><h4 style="color:#805ad5; margin:0">'
296
+ "Llama 4 Scout</h4></div>",
297
+ unsafe_allow_html=True,
298
+ )
299
+ render_model_output(r.model_b_result, "model-header-b")
300
+
301
+ # -----------------------------------------------------------------
302
+ # Feedback
303
+ # -----------------------------------------------------------------
304
+
305
+ st.markdown("---")
306
+ st.subheader("Physician Feedback (Research)")
307
+ st.markdown(
308
+ "*Rate each model's output. Your feedback is essential for evaluating "
309
+ "AI error-detection performance.*"
310
+ )
311
+
312
+ VALIDITY_OPTIONS = ["Valid", "Partially Valid", "Invalid"]
313
+ RATING_OPTIONS = ["1 - Poor", "2 - Fair", "3 - Good", "4 - Very Good", "5 - Excellent"]
314
+
315
+ feedback_data = {}
316
+
317
+ for model_key, model_label, res in [
318
+ ("model_a", "Qwen 3 32B", r.model_a_result),
319
+ ("model_b", "Llama 4 Scout", r.model_b_result),
320
+ ]:
321
+ st.markdown(f"#### {model_label}")
322
+
323
+ error_ratings = []
324
+ if res.success and res.errors:
325
+ st.markdown("**Rate each error:**")
326
+ for i, err in enumerate(res.errors):
327
+ c1, c2 = st.columns([3, 1])
328
+ with c1:
329
+ st.markdown(
330
+ f"*Error {i+1}:* {err.description[:120]}{'...' if len(err.description) > 120 else ''}"
331
+ )
332
+ with c2:
333
+ validity = st.selectbox(
334
+ f"Validity",
335
+ VALIDITY_OPTIONS,
336
+ key=f"{model_key}_err_{i}_validity",
337
+ label_visibility="collapsed",
338
+ )
339
+ cat_correct = st.checkbox(
340
+ f"Category correct ({CATEGORY_LABELS.get(err.category, err.category)})?",
341
+ value=True,
342
+ key=f"{model_key}_err_{i}_cat",
343
+ )
344
+ error_ratings.append({
345
+ "error_text": err.description,
346
+ "model_category": err.category,
347
+ "model_severity": err.severity,
348
+ "validity": validity.lower().replace(" ", "_"),
349
+ "category_correct": cat_correct,
350
+ })
351
+ elif res.success:
352
+ st.info("Model found no errors — rate the overall output below.")
353
+
354
+ suggestions_useful = st.select_slider(
355
+ f"**Suggestions usefulness:**",
356
+ options=RATING_OPTIONS,
357
+ value="3 - Good",
358
+ key=f"{model_key}_sug_useful",
359
+ )
360
+ overall_usefulness = st.select_slider(
361
+ f"**Overall usefulness:**",
362
+ options=RATING_OPTIONS,
363
+ value="3 - Good",
364
+ key=f"{model_key}_overall",
365
+ )
366
+ safety_severity = st.select_slider(
367
+ f"**Safety concern severity** (1=no concern, 5=critical risk):",
368
+ options=RATING_OPTIONS,
369
+ value="1 - Poor",
370
+ key=f"{model_key}_safety",
371
+ )
372
+
373
+ feedback_data[model_key] = {
374
+ "errors": error_ratings,
375
+ "suggestions_useful": suggestions_useful,
376
+ "overall_usefulness": overall_usefulness,
377
+ "safety_concern_severity": safety_severity,
378
+ }
379
+
380
+ st.markdown("---")
381
+
382
+ # Missed errors
383
+ st.markdown("#### Missed Errors")
384
+ missed_errors = st.text_area(
385
+ "Did either model miss errors that should have been found? Describe them here:",
386
+ placeholder="e.g. Both models missed that Metformin is contraindicated with eGFR < 30...",
387
+ key="missed_errors",
388
+ height=80,
389
+ )
390
+
391
+ # General comments
392
+ comments = st.text_area(
393
+ "Additional comments (optional):",
394
+ placeholder="Any other observations about the models' performance?",
395
+ key="fb_comments",
396
+ height=80,
397
+ )
398
+
399
+ if st.button("Submit Feedback", type="secondary"):
400
+ if not st.session_state.physician_id.strip():
401
+ st.warning("Please enter a Physician ID in the sidebar before submitting.")
402
+ else:
403
+ entry = {
404
+ "timestamp": datetime.now().isoformat(),
405
+ "physician_id": st.session_state.physician_id.strip(),
406
+ "clinical_input": st.session_state.input_text,
407
+ "translation": r.translated_text,
408
+ "model_a_output": r.model_a_result.raw_response,
409
+ "model_b_output": r.model_b_result.raw_response,
410
+ "model_a_latency": r.model_a_result.latency_seconds,
411
+ "model_b_latency": r.model_b_result.latency_seconds,
412
+ "translation_latency": r.translation_latency,
413
+ "total_latency": round(st.session_state.elapsed, 2),
414
+ "ratings": feedback_data,
415
+ "missed_errors": missed_errors,
416
+ "comments": comments,
417
+ }
418
+ count = save_feedback(entry)
419
+ st.success(f"Feedback saved! (Total entries: {count})")
420
+ st.balloons()
421
+
422
+ st.markdown("---")
423
+ st.caption(
424
+ "Internal Medicine Error-Check | Prospective Research Study 2026 | "
425
+ "Requires physician verification"
426
+ )