Spaces:

Vrda
/

im-error-check

Running

App Files Files Community

Vrda commited on 13 days ago

Commit

2bcf854

verified ·

1 Parent(s): 61b6ca2

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +426 -0

app.py ADDED Viewed

	@@ -0,0 +1,426 @@

+"""
+Internal Medicine Discharge Letter Error-Check — Streamlit App
+Prospective study: AI-assisted error detection in ED discharge letters
+"""
+import streamlit as st
+import time
+import json
+from datetime import datetime
+from pathlib import Path
+from backend import run_error_check
+FEEDBACK_FILE = Path(__file__).parent / "feedback_data.json"
+# -------------------------------------------------------------------------
+# Feedback persistence
+# -------------------------------------------------------------------------
+def save_feedback(entry: dict) -> int:
+    if FEEDBACK_FILE.exists():
+        with open(FEEDBACK_FILE, "r", encoding="utf-8") as f:
+            data = json.load(f)
+    else:
+        data = []
+    data.append(entry)
+    with open(FEEDBACK_FILE, "w", encoding="utf-8") as f:
+        json.dump(data, f, ensure_ascii=False, indent=2)
+    return len(data)
+# -------------------------------------------------------------------------
+# Page config & CSS
+# -------------------------------------------------------------------------
+st.set_page_config(
+    page_title="IM Error-Check",
+    page_icon="\U0001FA7A",
+    layout="wide",
+)
+st.markdown("""
+<style>
+    .error-card {
+        background: #fff5f5; border-left: 4px solid #e53e3e;
+        border-radius: 8px; padding: 0.8rem 1rem; margin: 0.5rem 0;
+    }
+    .suggestion-card {
+        background: #f0fff4; border-left: 4px solid #38a169;
+        border-radius: 8px; padding: 0.8rem 1rem; margin: 0.5rem 0;
+    }
+    .model-header-a {
+        background: #ebf8ff; border-left: 4px solid #3182ce;
+        border-radius: 8px; padding: 0.6rem 1rem; margin-bottom: 0.5rem;
+    }
+    .model-header-b {
+        background: #faf5ff; border-left: 4px solid #805ad5;
+        border-radius: 8px; padding: 0.6rem 1rem; margin-bottom: 0.5rem;
+    }
+    .severity-critical { color: #c53030; font-weight: bold; }
+    .severity-high { color: #dd6b20; font-weight: bold; }
+    .severity-medium { color: #d69e2e; }
+    .severity-low { color: #38a169; }
+    .category-badge {
+        display: inline-block; background: #edf2f7; color: #4a5568;
+        padding: 2px 8px; border-radius: 12px; font-size: 0.8em; margin-right: 4px;
+    }
+</style>
+""", unsafe_allow_html=True)
+SAMPLE = """Adresa: VUKOVARSKA 45, SPLIT
+Datum dolaska: 10.03.2026. 14:22
+Datum rođenja: 15.05.1958.
+Datum otpusta: 10.03.2026. 18:45
+Trijažna kategorija: 3
+Dijagnoze
+I21.0 Akutni transmuralni infarkt miokarda prednje stijenke
+Podaci s trijaže
+Trijaž.kat:3; Puls:92/min; RR:155/95 mmHg; SpO2:94%; Tax: 36.8C; GCS:15;
+Razlog dolaska
+Bolovi u prsištu od jutros, stezajućeg karaktera s propagacijom u lijevu ruku. Trajanje > 30 min. Uzeo 2x NTG sprej bez učinka.
+Anamneza
+Osobna: arterijska hipertenzija, DM tip 2, dislipidemija. Terapija: Ramipril 5mg, Metformin 1000mg 2x1, Atorvastatin 20mg.
+Status
+Pri svijesti, blijed, znojav. Auskultatorno: srčana akcija ritmična, tonovi tiši, bez šumova. Pluća: bazalno obostrano oslabljen šum disanja.
+Laboratorij
+Troponin I: 2.8 ng/mL (ref <0.04), CK-MB: 45 U/L, L: 12.3, CRP: 8.5
+Na: 138, K: 4.2, Kreatinin: 128 umol/L (eGFR 52), GUK: 14.2 mmol/L
+EKG: ST elevacija V1-V4, recipročne promjene II, III, aVF
+Terapija
+Aspirin 300mg stat, zatim 100mg 1x1
+Klopidogrel 300mg stat, zatim 75mg 1x1
+Heparin 5000 IU i.v. bolus
+Morphin 4mg i.v.
+Metformin 1000mg nastaviti 2x1
+Atorvastatin 40mg 1x1
+Zaključak
+Pacijent s akutnim STEMI prednje stijenke. Transportiran u Kath lab.
+Preporučen kontrolni pregled za 14 dana."""
+# -------------------------------------------------------------------------
+# Session state
+# -------------------------------------------------------------------------
+for key, default in [
+    ("input_text", ""),
+    ("result", None),
+    ("elapsed", 0),
+    ("run_analysis", False),
+    ("physician_id", ""),
+]:
+    if key not in st.session_state:
+        st.session_state[key] = default
+def load_sample():
+    st.session_state.input_text = SAMPLE
+def trigger_analysis():
+    st.session_state.run_analysis = True
+# -------------------------------------------------------------------------
+# Header
+# -------------------------------------------------------------------------
+st.title("\U0001FA7A Internal Medicine — Discharge Letter Error-Check")
+st.markdown("*AI-assisted error detection for Internal Medicine Emergency Department*")
+st.warning(
+    "\u26A0\uFE0F **RESEARCH TOOL**: AI-generated findings require physician verification. "
+    "Do not use as sole basis for clinical decisions."
+)
+# Sidebar
+with st.sidebar:
+    st.header("About")
+    st.markdown(
+        "Compares **Qwen 3 32B** and **Llama 4 Scout** for detecting errors "
+        "in discharge letters."
+    )
+    st.markdown("---")
+    st.markdown("**Steps:** Paste letter \u2192 Analyze \u2192 Review \u2192 Rate")
+    st.markdown("---")
+    st.text_input(
+        "Physician ID (anonymous):",
+        placeholder="e.g. Physician A",
+        key="physician_id",
+    )
+    if FEEDBACK_FILE.exists():
+        with open(FEEDBACK_FILE, "r", encoding="utf-8") as f:
+            count = len(json.load(f))
+        st.metric("Cases collected", count)
+# -------------------------------------------------------------------------
+# Input
+# -------------------------------------------------------------------------
+st.header("Discharge Letter Input")
+st.button("Load Sample Case", on_click=load_sample)
+st.text_area(
+    "Paste discharge letter (Croatian):",
+    height=220,
+    placeholder="Zalijepite otpusno pismo ovdje...",
+    key="input_text",
+)
+st.button("Analyze", type="primary", on_click=trigger_analysis)
+# -------------------------------------------------------------------------
+# Run analysis
+# -------------------------------------------------------------------------
+if st.session_state.run_analysis and st.session_state.input_text.strip():
+    st.session_state.run_analysis = False
+    with st.spinner("Running error-check with both AI models (15-45 seconds)..."):
+        start = time.time()
+        st.session_state.result = run_error_check(st.session_state.input_text)
+        st.session_state.elapsed = time.time() - start
+    st.rerun()
+# -------------------------------------------------------------------------
+# Helper: render a model's output
+# -------------------------------------------------------------------------
+SEVERITY_LABELS = {
+    "critical": "\U0001F534 Critical",
+    "high": "\U0001F7E0 High",
+    "medium": "\U0001F7E1 Medium",
+    "low": "\U0001F7E2 Low",
+}
+CATEGORY_LABELS = {
+    "medication_error": "Medication",
+    "diagnostic_error": "Diagnostic",
+    "dosing_error": "Dosing",
+    "documentation_error": "Documentation",
+    "lab_interpretation_error": "Lab Interpretation",
+    "contraindication": "Contraindication",
+    "omission": "Omission",
+    "other": "Other",
+    "documentation_quality": "Documentation Quality",
+    "clinical_workflow": "Clinical Workflow",
+    "patient_safety": "Patient Safety",
+    "completeness": "Completeness",
+}
+def render_model_output(result, header_class: str):
+    if not result.success:
+        st.error(f"Model error: {result.error_message}")
+        return
+    st.caption(f"Response time: {result.latency_seconds}s")
+    if result.summary:
+        st.markdown(f"**Summary:** {result.summary}")
+    # Errors
+    if result.errors:
+        for i, err in enumerate(result.errors, 1):
+            sev = SEVERITY_LABELS.get(err.severity, err.severity)
+            cat = CATEGORY_LABELS.get(err.category, err.category)
+            st.markdown(
+                f'<div class="error-card">'
+                f"<strong>Error {i}</strong> &mdash; {sev} &nbsp;"
+                f'<span class="category-badge">{cat}</span><br>'
+                f"{err.description}"
+                f"{'<br><em>Quote: \"' + err.quote + '\"</em>' if err.quote else ''}"
+                f"</div>",
+                unsafe_allow_html=True,
+            )
+    else:
+        st.info("No errors identified.")
+    # Suggestions
+    if result.suggestions:
+        for i, sug in enumerate(result.suggestions, 1):
+            cat = CATEGORY_LABELS.get(sug.category, sug.category)
+            st.markdown(
+                f'<div class="suggestion-card">'
+                f"<strong>Suggestion {i}</strong> &nbsp;"
+                f'<span class="category-badge">{cat}</span><br>'
+                f"{sug.description}"
+                f"</div>",
+                unsafe_allow_html=True,
+            )
+# -------------------------------------------------------------------------
+# Display results
+# -------------------------------------------------------------------------
+if st.session_state.result:
+    r = st.session_state.result
+    st.markdown("---")
+    st.header("Analysis Results")
+    st.success(
+        f"Completed in {st.session_state.elapsed:.1f}s "
+        f"(translation: {r.translation_latency}s, "
+        f"Model A: {r.model_a_result.latency_seconds}s, "
+        f"Model B: {r.model_b_result.latency_seconds}s)"
+    )
+    with st.expander("English Translation"):
+        st.markdown(r.translated_text)
+    st.subheader("Model Comparison")
+    col_a, col_b = st.columns(2, gap="large")
+    with col_a:
+        st.markdown(
+            '<div class="model-header-a"><h4 style="color:#3182ce; margin:0">'
+            "Qwen 3 32B</h4></div>",
+            unsafe_allow_html=True,
+        )
+        render_model_output(r.model_a_result, "model-header-a")
+    with col_b:
+        st.markdown(
+            '<div class="model-header-b"><h4 style="color:#805ad5; margin:0">'
+            "Llama 4 Scout</h4></div>",
+            unsafe_allow_html=True,
+        )
+        render_model_output(r.model_b_result, "model-header-b")
+    # -----------------------------------------------------------------
+    # Feedback
+    # -----------------------------------------------------------------
+    st.markdown("---")
+    st.subheader("Physician Feedback (Research)")
+    st.markdown(
+        "*Rate each model's output. Your feedback is essential for evaluating "
+        "AI error-detection performance.*"
+    )
+    VALIDITY_OPTIONS = ["Valid", "Partially Valid", "Invalid"]
+    RATING_OPTIONS = ["1 - Poor", "2 - Fair", "3 - Good", "4 - Very Good", "5 - Excellent"]
+    feedback_data = {}
+    for model_key, model_label, res in [
+        ("model_a", "Qwen 3 32B", r.model_a_result),
+        ("model_b", "Llama 4 Scout", r.model_b_result),
+    ]:
+        st.markdown(f"#### {model_label}")
+        error_ratings = []
+        if res.success and res.errors:
+            st.markdown("**Rate each error:**")
+            for i, err in enumerate(res.errors):
+                c1, c2 = st.columns([3, 1])
+                with c1:
+                    st.markdown(
+                        f"*Error {i+1}:* {err.description[:120]}{'...' if len(err.description) > 120 else ''}"
+                    )
+                with c2:
+                    validity = st.selectbox(
+                        f"Validity",
+                        VALIDITY_OPTIONS,
+                        key=f"{model_key}_err_{i}_validity",
+                        label_visibility="collapsed",
+                    )
+                cat_correct = st.checkbox(
+                    f"Category correct ({CATEGORY_LABELS.get(err.category, err.category)})?",
+                    value=True,
+                    key=f"{model_key}_err_{i}_cat",
+                )
+                error_ratings.append({
+                    "error_text": err.description,
+                    "model_category": err.category,
+                    "model_severity": err.severity,
+                    "validity": validity.lower().replace(" ", "_"),
+                    "category_correct": cat_correct,
+                })
+        elif res.success:
+            st.info("Model found no errors — rate the overall output below.")
+        suggestions_useful = st.select_slider(
+            f"**Suggestions usefulness:**",
+            options=RATING_OPTIONS,
+            value="3 - Good",
+            key=f"{model_key}_sug_useful",
+        )
+        overall_usefulness = st.select_slider(
+            f"**Overall usefulness:**",
+            options=RATING_OPTIONS,
+            value="3 - Good",
+            key=f"{model_key}_overall",
+        )
+        safety_severity = st.select_slider(
+            f"**Safety concern severity** (1=no concern, 5=critical risk):",
+            options=RATING_OPTIONS,
+            value="1 - Poor",
+            key=f"{model_key}_safety",
+        )
+        feedback_data[model_key] = {
+            "errors": error_ratings,
+            "suggestions_useful": suggestions_useful,
+            "overall_usefulness": overall_usefulness,
+            "safety_concern_severity": safety_severity,
+        }
+        st.markdown("---")
+    # Missed errors
+    st.markdown("#### Missed Errors")
+    missed_errors = st.text_area(
+        "Did either model miss errors that should have been found? Describe them here:",
+        placeholder="e.g. Both models missed that Metformin is contraindicated with eGFR < 30...",
+        key="missed_errors",
+        height=80,
+    )
+    # General comments
+    comments = st.text_area(
+        "Additional comments (optional):",
+        placeholder="Any other observations about the models' performance?",
+        key="fb_comments",
+        height=80,
+    )
+    if st.button("Submit Feedback", type="secondary"):
+        if not st.session_state.physician_id.strip():
+            st.warning("Please enter a Physician ID in the sidebar before submitting.")
+        else:
+            entry = {
+                "timestamp": datetime.now().isoformat(),
+                "physician_id": st.session_state.physician_id.strip(),
+                "clinical_input": st.session_state.input_text,
+                "translation": r.translated_text,
+                "model_a_output": r.model_a_result.raw_response,
+                "model_b_output": r.model_b_result.raw_response,
+                "model_a_latency": r.model_a_result.latency_seconds,
+                "model_b_latency": r.model_b_result.latency_seconds,
+                "translation_latency": r.translation_latency,
+                "total_latency": round(st.session_state.elapsed, 2),
+                "ratings": feedback_data,
+                "missed_errors": missed_errors,
+                "comments": comments,
+            }
+            count = save_feedback(entry)
+            st.success(f"Feedback saved! (Total entries: {count})")
+            st.balloons()
+st.markdown("---")
+st.caption(
+    "Internal Medicine Error-Check | Prospective Research Study 2026 | "
+    "Requires physician verification"
+)