Spaces:

Preetham22
/

medi-llm

Sleeping

App Files Files Community

Preetham22 commited on Aug 18, 2025

Commit

42e56c5

1 Parent(s): 6803326

Add demo UI, token attention rollout & top5 table; clean ignores

Browse files

Files changed (15) hide show

.gitignore +30 -28
app/demo/demo.py +756 -37
app/demo/style.css +379 -0
app/utils/attention_utils.py +0 -20
app/utils/gradcam_utils.py +15 -7
app/utils/inference_utils.py +212 -19
app/utils/test.py +26 -0
config/config.yaml.example +18 -0
sample_data/.DS_Store +0 -0
src/multimodal_model.py +34 -17
tests/__pycache__/__init__.cpython-310.pyc +0 -0
tests/__pycache__/test_dummy.cpython-310-pytest-8.4.1.pyc +0 -0
tests/__pycache__/test_generate_emr_csv.cpython-310-pytest-8.4.1.pyc +0 -0
tests/__pycache__/test_multimodal_model.cpython-310-pytest-8.4.1.pyc +0 -0
tests/__pycache__/test_triage_dataset.cpython-310-pytest-8.4.1.pyc +0 -0

.gitignore CHANGED Viewed

@@ -1,43 +1,45 @@
-# Ignore Data files for tracking
-data/
-checkpoints/
 __pycache__/
 *.py[cod]
 .coverage
-# Weights & Biases
-wandb/
-wandb/*
-wandb_logs/
-# Optuna study databases
-*.db
-# Checkpoints / logs
-checkpoints/
 logs/
 *.pt
-.ipynb_checkpoints
 *.ckpt
-# Model files
-*.pth
-# Environment files
-.env
-# logs
-*.log
-*.tmp
-# --- EXCEPTIONS (ALLOW) ---
 !data/dummy_images/
 !data/dummy_images/COVID/*.png
 !data/dummy_images/NORMAL/*.png
 !data/dummy_images/VIRAL PNEUMONIA/*.png
-# Allow test CSV for CI
-!data/test_emr_records.csv
-# Mac system files
-.DS_Store

+# --- Python ---
 __pycache__/
 *.py[cod]
+*.pyo
+*.so
+*.dylib
+.venv/
+.env
 .coverage
+.ipynb_checkpoints/
+# --- Data & Artifacts ---
+data/
+results/
 logs/
+checkpoints/
+*.log
+*.tmp
+*.db
+# Models / weights
 *.pt
+*.pth
 *.ckpt
+# W&B & experiment outputs
+wandb/
+wandb_logs/
+# Predictions / exports
+predictions_*.csv
+app/demo/uploads/
+app/demo/exports/
+# OS junk
+.DS_Store
+# --- Exceptions (allow small fixtures/samples) ---
 !data/dummy_images/
 !data/dummy_images/COVID/*.png
 !data/dummy_images/NORMAL/*.png
 !data/dummy_images/VIRAL PNEUMONIA/*.png
+!sample_data/**
+!tests/**
+!config/config.yaml.example

app/demo/demo.py CHANGED Viewed

@@ -1,8 +1,10 @@
 import os
 import sys
 import time
 import gradio as gr
 import pandas as pd
 from pathlib import Path
 # Adds root directory to sys.path
@@ -14,91 +16,808 @@ from app.utils.inference_utils import load_model, predict
 # Initial default values
 DEFAULT_MODE = "multimodal"
 MODEL_PATHS = {
-    "text": "medi_llm_state_dict_text.pth",
-    "image": "medi_llm_state_dict_image.pth",
-    "multimodal": "medi_llm_state_dict_multimodal.pth"
 }
 model_cache = {}
-prediction_log = []
-def classify(mode, emr_text, image):
     if mode not in model_cache:
         model_cache[mode] = load_model(mode, MODEL_PATHS[mode])
     model = model_cache[mode]
-    pred_text, cam_image, token_attn = predict(model, mode, emr_text=emr_text, image=image)
     # Save image to file if uploaded
-    img_rel_path = None
-    img_abs_path = None
-    if image is not None:
-        timestamp = time.strftime("%Y%m%d_%H%M%S")
-        img_rel_path = f"app/demo/uploads/xray_{timestamp}.png"
         img_abs_path = os.path.abspath(img_rel_path)
         os.makedirs(os.path.dirname(img_abs_path), exist_ok=True)
-        image.Save(img_abs_path)
     # Append to log
-    prediction_log.append({
         "mode": mode,
-        "emr": emr_text,
-        "image_path": img_rel_path,  # logged as relative path
-        "prediction": pred_text
-    })
-    return pred_text, cam_image, token_attn
-def export_csv(filename):
     if not filename.strip():
         timestamp = time.strftime("%Y%m%d_%H%M%S")
-        filename = f"demo_{timestamp}.csv"
     elif not filename.endswith(".csv"):
         filename += ".csv"
-    csv_path = os.path.abspath(os.path.join("app/demo/exports", filename))
     os.makedirs(os.path.dirname(csv_path), exist_ok=True)
-    df = pd.DataFrame(prediction_log)
     df.to_csv(csv_path, index=False)
-    return csv_path
-with gr.Blocks(theme=gr.themes.Glass(), css=".centered {text-align: center;}") as demo:
     # Centered title and subtitle
     gr.Markdown("<h2 class='centered'>🩺 Medi-LLM: Clinical Triage Assistant 🩻</h2>")
     gr.Markdown("<p class='centered'>Upload a chest X-ray and/or enter EMR text to get a triage level prediction.</p>")
-    # Mode selection
-    with gr.Row():
-        mode = gr.Radio(["text", "image", "multimodal"], value=DEFAULT_MODE, label="Select Input Mode")
     # Input: EMR text and/or image
     with gr.Row():
-        emr_text = gr.Textbox(lines=6, label="EMR Text", placeholder="Enter clinical notes here...")
-        image = gr.Image(type="pil", label="Chest X-ray")
     with gr.Row():
-        submit_btn = gr.Button("Run Inference")
-    result = gr.Textbox(label="Prediction")
-    submit_btn.click(fn=classify, inputs=[mode, emr_text, image], outputs=result)
     # CSV Export UI
     gr.Markdown("### 📁 Export Prediction Log")
-    with gr.Row():
-        filename_input = gr.Textbox(label="CSV filename (optional)", placeholder="e.g., my_predictions.csv")
-        download_btn = gr.Button("Export CSV")
-        csv_output = gr.File(label="Download Link")
     download_btn.click(
         fn=export_csv,
-        inputs=[filename_input],
         outputs=[csv_output]
     )
 if __name__ == "__main__":

 import os
 import sys
 import time
+import shutil
 import gradio as gr
 import pandas as pd
+from PIL import Image
 from pathlib import Path
 # Adds root directory to sys.path
 # Initial default values
 DEFAULT_MODE = "multimodal"
 MODEL_PATHS = {
+    "text": ROOT_DIR / "medi_llm_state_dict_text.pth",
+    "image": ROOT_DIR / "medi_llm_state_dict_image.pth",
+    "multimodal": ROOT_DIR / "medi_llm_state_dict_multimodal.pth"
 }
 model_cache = {}
+prediction_log_user = []
+prediction_log_doctor = []
+def classify(role, mode, normalize_mode, emr_text, image, use_rollout):
+    grad_cam_path = "N/A"
+    token_attn_path = "N/A"
+    # Control output visibility
+    show_tabs = (role == "Doctor")
+    show_gradcam = (role == "Doctor" and mode in ["image", "multimodal"])
+    show_attention = (role == "Doctor" and mode in ["text", "multimodal"])
+    # ✅ Skip inference if no input is provided
+    if ((mode in ["text", "multimodal"] and (not emr_text or not emr_text.strip())) and (mode in ["image", "multimodal"] and image is None)):
+        count = len(prediction_log_doctor) if role == "Doctor" else len(prediction_log_user)
+        return (
+            gr.Textbox(value="⚠️ Please enter EMR text or upload an image to run inference."),
+            gr.Image(visible=False),
+            gr.HighlightedText(visible=False),
+            gr.HTML(value="", visible=False),
+            gr.Label(visible=False),
+            gr.Tabs(visible=False),
+            gr.Textbox(value=f"Predictions: {count}", interactive=False),
+            gr.JSON(value={}, visible=True)  # JSON visible, but empty
+        )
+    # Image size guard + load
+    if image is not None:
+        image_path = Path(image)
+        image_size = image_path.stat().st_size
+        # Enforce 5MB limit (5 * 1024 * 1024 bytes)
+        if image_size > 5 * 1024 * 1024:
+            count = len(prediction_log_doctor) if role == "Doctor" else len(prediction_log_user)
+            return (
+                gr.Textbox(value="❌ Image exceeds 5MB size limit."),
+                gr.Image(visible=False),
+                gr.HighlightedText(visible=False),
+                gr.HTML(value="", visible=False),
+                gr.Label(visible=False),
+                gr.Tabs(visible=False),  # Hide insights tab on error
+                gr.Textbox(value=f"Predictions: {count}", interactive=False),
+                gr.JSON(value={}, visible=True)
+            )
+        image = Image.open(image).convert("RGB")
+    # Model caching
     if mode not in model_cache:
         model_cache[mode] = load_model(mode, MODEL_PATHS[mode])
     model = model_cache[mode]
+    # Run prediction
+    try:
+        print("🧪 classify() passing normalize_mode:", normalize_mode, "| use_rollout:", use_rollout)
+        pred_text, cam_image, token_attn, confidence, probs, top5 = predict(
+            model,
+            mode,
+            emr_text=emr_text,
+            image=image,
+            normalize_mode=normalize_mode,
+            need_token_vis=show_attention,
+            use_rollout=use_rollout,
+        )
+        top5 = top5 or []
+    except ValueError as e:
+        print(f"⚠️ Inference failed: {e}")
+        count = len(prediction_log_doctor) if role == "Doctor" else len(prediction_log_user)
+        return (
+            gr.Textbox(value=f"❌ {str(e)}"),
+            gr.Image(visible=False),
+            gr.HighlightedText(visible=False),
+            gr.HTML(value="", visible=False),
+            gr.Label(visible=False),
+            gr.Tabs(visible=False),
+            gr.Textbox(value=f"Predictions: {count}", interactive=False),
+            gr.JSON(value={}, visible=True)
+        )
+    # Class probabilities (ensure always 3)
+    flat_probs = probs[0] if isinstance(probs[0], list) else probs
+    if len(flat_probs) != 3:
+        class_probs = {"low": 0.0, "medium": 0.0, "high": 0.0}
+    else:
+        class_probs = {label: round(prob, 3) for label, prob in zip(["low", "medium", "high"], flat_probs)}
+    # Save uploads (relative path in logs)
+    timestamp = time.strftime("%Y%m%d_%H%M%S")
+    img_rel_path = f"app/demo/uploads/xray_{timestamp}.png" if image else "N/A"
     # Save image to file if uploaded
+    if image:
         img_abs_path = os.path.abspath(img_rel_path)
         os.makedirs(os.path.dirname(img_abs_path), exist_ok=True)
+        image.save(img_abs_path)
+    # Save Grad-CAM if Doctor and mode uses image
+    if cam_image and role == "Doctor" and mode in ["image", "multimodal"]:
+        cam_rel_path = f"app/demo/exports/{role.lower()}/gradcam/gradcam_{pred_text}_{timestamp}.png"
+        cam_abs_path = os.path.abspath(cam_rel_path)
+        os.makedirs(os.path.dirname(cam_abs_path), exist_ok=True)
+        cam_image.save(cam_abs_path)
+        grad_cam_path = cam_rel_path
+    # Save token attention if Doctor and mode uses text
+    if token_attn and role == "Doctor" and mode in ["text", "multimodal"]:
+        attn_rel_path = f"app/demo/exports/{role.lower()}/tokenattention/token_attn_{pred_text}_{timestamp}.txt"
+        attn_abs_path = os.path.abspath(attn_rel_path)
+        os.makedirs(os.path.dirname(attn_abs_path), exist_ok=True)
+        with open(attn_abs_path, "w") as f:
+            f.write(f"Normalization Mode: {normalize_mode}\n")
+            f.write(f"Use Rollout: {use_rollout}\n")
+            f.write("Token Attention (word | score):\n")
+            f.write(str(token_attn) + "\n\n")
+            f.write("Top 5 tokens (token | % contribution):\n")
+            if top5:
+                for tok, pct in top5:
+                    f.write(f"{tok}\t{pct:.2f}%\n")
+            else:
+                f.write("(none)\n")
+        token_attn_path = attn_rel_path
     # Append to log
+    log_entry = {
         "mode": mode,
+        "normalize_mode": normalize_mode,
+        "use_rollout": bool(use_rollout),
+        "emr_text": emr_text or "N/A",
+        "image_path": img_rel_path if mode in ["image", "multimodal"] else "N/A",  # logged as relative path
+        "prediction": pred_text,
+        "confidence": round(confidence, 3),
+        "grad_cam_path": grad_cam_path if role == "Doctor" else "N/A",
+        "token_attention_path": token_attn_path if role == "Doctor" else "N/A",
+        "top5_tokens": "; ".join([f"{tok}:{pct:.1f}%" for tok, pct in (top5 or [])])
+    }
+    if role == "Doctor":
+        prediction_log_doctor.append(log_entry)
+        count = len(prediction_log_doctor)
+    else:
+        prediction_log_user.append(log_entry)
+        count = len(prediction_log_user)
+    glow_class = f"prediction-{pred_text.lower()}"  # 'high', 'medium', 'low'
+    return (
+        gr.Textbox(value=pred_text, elem_classes=[glow_class]),
+        gr.Image(value=cam_image, visible=show_gradcam),
+        gr.HighlightedText(value=token_attn, visible=show_attention),
+        render_top5_html(top5),
+        gr.Label(value=f"{confidence:.2f}", visible=True),
+        gr.Tabs(visible=show_tabs),
+        gr.Textbox(value=f"Predictions: {count}", interactive=False),
+        gr.JSON(value=class_probs, visible=True)
+    )
+def render_inputs(mode):
+    is_text = mode in ["text", "multimodal"]
+    is_image = mode in ["image", "multimodal"]
+    emr_text = gr.Textbox(
+        visible=is_text,
+        lines=6,
+        label="EMR Text",
+        placeholder="Enter clinical notes here...",
+        elem_id="emr_textbox"
+    )
+    image = gr.Image(
+        visible=is_image,
+        type="filepath",
+        label="Chest X-ray",
+        image_mode="RGB",
+        show_label=True,
+        height=224,
+        elem_id="xray_image"
+    )
+    max_note = gr.HTML(
+        "<p style='font-size: 0.9em; color: #a9b1d6;'>Maximum file size: 5MB</p>",
+        visible=is_image
+    )
+    return emr_text, image, max_note
+def render_top5_html(top5):
+    """
+    top5: list[ (token:str, pct:float) ] where pct is 0..100
+    Returns a gr.update with an HTML table colored by contribution (continuous gradient)
+    """
+    if not top5:
+        return gr.update(value="", visible=False)
+    def _lerp(a, b, t):  # linear interpolation
+        return a + (b - a) * t
+    def _rgb_to_hex(rgb):  # (r, g, b) -> "#rrggbb"
+        r, g, b = (max(0, min(255, int(round(x)))) for x in rgb)
+        return f"#{r:02x}{g:02x}{b:02x}"
+    def _interp_color(stops, t):
+        """
+        stops: list[(pos, (r,g,b))], pos in [0,1], sorted.
+        t in [0,1] -> interpolate between nearest stops
+        """
+        t = max(0.0, min(1.0, float(t)))
+        for i in range(len(stops) - 1):
+            p0, c0 = stops[i]
+            p1, c1 = stops[i + 1]
+            if t <= p1:
+                # local interpolation factor
+                if p1 == p0:
+                    w = 0.0
+                else:
+                    w = (t - p0) / (p1 - p0)
+                return (
+                    _lerp(c0[0], c1[0], w),
+                    _lerp(c0[1], c1[1], w),
+                    _lerp(c0[2], c1[2], w),
+                )
+        return stops[-1][-1]
+    def _text_color_for_bg(rgb):
+        # YIQ luma for contrast; threshold ~128
+        r, g, b = rgb
+        yiq = (r * 299 + g * 587 + b * 114) / 1000.0
+        return "#000000" if yiq >= 128 else "#ffffff"
+    # --- gradient (low->high): green -> chartreuse -> orange -> red ---
+    # tweak the mid stops to our taste
+    color_stops = [
+        (0.00, (27, 67, 50)),  # deep green
+        (0.40, (128, 170, 30)),  # chartreuse-ish
+        (0.70, (255, 165, 0)),  # orange
+        (1.00, (208, 0, 0)),    # red
+    ]
+    # Normalize to [0, 1] on the 5 items so colors spread even if skewed
+    vals = [pct for _, pct in top5]
+    vmin, vmax = min(vals), max(vals)
+    if vmax - vmin < 1e-9:
+        norms = [0.5] * len(vals)  # all equal -> neutral middle color
+    else:
+        norms = [(v - vmin) / (vmax - vmin) for v in vals]
+    # Build rows
+    row_html = []
+    for (tok, pct), t in zip(top5, norms):
+        rgb = _interp_color(color_stops, t)
+        bg = _rgb_to_hex(rgb)
+        fg = _text_color_for_bg(rgb)
+        row_html.append(
+            f"<tr style='background:{bg}; color:{fg};'>"
+            f"<td style='padding:10px 12px; border-bottom:1px solid rgba(255,255,255,0.06);'>{tok}</td>"
+            f"<td style='padding:10px 12px; text-align:right; border-bottom:1px solid rgba(255,255,255,0.06);'>{pct:.1f}%</td>"
+            "</tr>"
+        )
+    # color rows by normalized importance
+    max_score = max(score for _, score in top5)
+    min_score = min(score for _, score in top5)
+    rows = []
+    for tok, pct in top5:
+        # Normalize score 0-1
+        norm = (pct - min_score) / (max_score - min_score + 1e-9)
+        css = "top5-high" if norm > 0.66 else ("top5-medium" if norm > 0.33 else "top5-low")
+        rows.append(f"<tr class='{css}'><td>{tok}</td><td>{pct:.1f}%</td></tr>")
+    table = (
+        "<div class='top5-box' style='margin-top:10px;'>"
+        "<h4 style='margin:0 0 8px; color:#e5e7eb;'>Top 5 tokens (by contribution)</h4>"
+        "<table class='top5-table' style='width:100%; border-collapse:collapse;"
+        " background:#11131a; border:1px solid #2a2f3a; border-radius:10px; overflow:hidden;'>"
+        "<thead>"
+        "<tr style='background:#0f1320; color:#cbd5e1;'>"
+        "<th style='text-align:left; padding:10px 12px; font-weight:600;'>Token</th>"
+        "<th style='text-align:right; padding:10px 12px; font-weight:600;'>Contribution</th>"
+        "</tr>"
+        "</thead>"
+        f"<tbody>{''.join(row_html)}</tbody>"
+        "</table>"
+        "</div>"
+    )
+    return gr.update(value=table, visible=True)
+def export_csv(filename, role):
+    log = prediction_log_doctor if role == "Doctor" else prediction_log_user
+    if not log:
+        # Return values to hide download and show warning
+        return None, gr.update(visible=False), gr.Textbox(value="⚠️ No predictions to export.", interactive=False)  # Prevent empty exports
     if not filename.strip():
         timestamp = time.strftime("%Y%m%d_%H%M%S")
+        filename = f"{role.lower()}_predictions_{timestamp}.csv"
     elif not filename.endswith(".csv"):
         filename += ".csv"
+    csv_path = os.path.abspath(os.path.join(f"app/demo/exports/{role.lower()}", filename))
     os.makedirs(os.path.dirname(csv_path), exist_ok=True)
+    df = pd.DataFrame(log)
+    if role == "Doctor":
+        columns = [
+            "mode", "normalize_mode", "use_rollout", "emr_text", "image_path",
+            "prediction", "confidence",
+            "grad_cam_path", "token_attention_path",
+            "top5_tokens"
+        ]
+    else:
+        columns = ["mode", "emr_text", "image_path", "prediction", "confidence"]
+    df = df[columns]
     df.to_csv(csv_path, index=False)
+    return (
+        csv_path,  # path string -> goes into csv_output (gr.File)
+        csv_path,  # same path string again -> resused for blink_box_effect()
+        gr.update(value=f"✅ Exported to: {csv_path}", visible=True)  # status string -> goes into export_status_box
+    )
+def safe_delete_dir(path):
+    try:
+        if os.path.exists(path) and os.path.isdir(path):
+            shutil.rmtree(path)
+    except Exception as e:
+        print(f"⚠️ Failed to delete {path}: {e}")
+def clear_logs(role):
+    # Step 1: Delete logged image files
+    log = prediction_log_doctor if role == "Doctor" else prediction_log_user
+    for entry in log:
+        # Delete X-ray image if exists and not "N/A"
+        if entry["image_path"] != "N/A":
+            image_file_path = ROOT_DIR / Path(entry["image_path"])
+            if image_file_path.exists():
+                try:
+                    image_file_path.unlink()
+                except Exception as e:
+                    print(f"⚠️ Failed to delete image folder: {image_file_path}: {e}")
+        # Delete Grad-CAM
+        if role == "Doctor" and entry.get("grad_cam_path") not in [None, "N/A"]:
+            grad_path = ROOT_DIR / Path(entry["grad_cam_path"])
+            if grad_path.exists():
+                try:
+                    grad_path.unlink()
+                except Exception as e:
+                    print(f"⚠️ Failed to delete Grad-CAM: {grad_path}: {e}")
+        # Delete token attention
+        if role == "Doctor" and entry.get("token_attention_path") not in [None, "N/A"]:
+            attn_path = ROOT_DIR / Path(entry["token_attention_path"])
+            if attn_path.exists():
+                try:
+                    attn_path.unlink()
+                except Exception as e:
+                    print(f"⚠️ Failed to delete token attention: {attn_path}: {e}")
+    # Step 2: Delete folders safely
+    if role == "Doctor":
+        safe_delete_dir(ROOT_DIR / "app/demo/uploads")
+        safe_delete_dir(ROOT_DIR / "app/demo/exports/doctor/gradcam")
+        safe_delete_dir(ROOT_DIR / "app/demo/exports/doctor/tokenattention")
+        safe_delete_dir(ROOT_DIR / "app/demo/exports/doctor")
+    else:
+        safe_delete_dir(ROOT_DIR / "app/demo/exports/user")
+        safe_delete_dir(ROOT_DIR / "app/demo/uploads")
+    # Step 3: Clear in-memory logs
+    prediction_log_doctor.clear() if role == "Doctor" else prediction_log_user.clear()
+    return gr.Textbox(value="Predictions: 0", interactive=False)
+# Confirm before clearing logs
+def confirm_clear():
+    return gr.Textbox(
+        value="⚠️ Are you sure you want to clear the logs? Click again to confirm.",
+        visible=True,
+        interactive=False,
+        label=""
+    )
+def clear_confirmed(role):
+    cleared = clear_logs(role)
+    return (
+        cleared,
+        gr.Textbox(value="✅ Logs cleared successfully!", visible=True),
+        gr.update(value=None, visible=False),  # csv_output
+        gr.update(interactive=True)  # filename_input
+    )
+def reset_confirm_box():
+    return gr.Textbox(value="", visible=False)
+def disable_filename_input():
+    return gr.Textbox(interactive=False)
+def show_loading_msg():
+    return gr.update(value="⏳ Running inference...", visible=True)
+def blink_box_effect(path):
+    # return file component with blinking class
+    return gr.File(value=path, elem_classes=["download_box", "blink-csv"], visible=True, interactive=True)
+def update_role_state(r):
+    # hide insights + token box when switching to User
+    tabs_vis = (r == "Doctor")
+    return (
+        r,                                   # role_state
+        gr.update(visible=tabs_vis),         # normalize_mode_column
+        gr.update(visible=tabs_vis),         # insights_tab
+        gr.update(visible=False),            # token_attention
+        gr.update(visible=False),            # gradcam_img
+        gr.update(visible=tabs_vis),         # use_rollout,
+        gr.update(visible=False),            # top5_html
+    )
+def rerun_if_done(ran, role, mode, normalize_mode, emr_text, image, use_rollout):
+    if not ran or role != "Doctor":
+        return (
+            gr.Textbox(visible=False),
+            gr.Image(visible=False),
+            gr.HighlightedText(visible=False),
+            gr.HTML(visible=False),
+            gr.Label(visible=False),
+            gr.Tabs(visible=False),
+            gr.Textbox(value="", interactive=False),
+            gr.JSON(value={}, visible=True)
+        )
+    # Let classify() run if already inferred once
+    return classify(role, mode, normalize_mode, emr_text, image, use_rollout)
+def inject_tooltips():
+    return gr.HTML(
+        """
+        <script>
+        const observer = new MutationObserver(() => {
+            document.querySelectorAll(".token-attn-box .token").forEach(token => {
+                const text = token.innerText;
+                const pipeIndex = text.indexOf("|");
+                if (pipeIndex > -1) {
+                    const display = text.slice(0, pipeIndex).trim();
+                    const tooltip = text.slice(pipeIndex + 1).trim();
+                    token.innerText = display;
+                    token.setAttribute("data-tooltip", tooltip);
+                }
+            });
+        });
+        observer.observe(document.body, { childList: true, subtree: true });
+        </script>
+        """
+    )
+def reset_ui():
+    is_text = DEFAULT_MODE in ["text", "multimodal"]
+    is_image = DEFAULT_MODE in ["image", "multimodal"]
+    return (
+        # Inputs (text/image areas)
+        gr.update(value="", visible=is_text),    # emr_text
+        gr.update(value=None, visible=is_image),               # image
+        gr.update(visible=is_image),  # max_file_note
+        # Prediction/result area
+        gr.update(value="", visible=True),     # result_box
+        gr.update(value=None, visible=False),  # gradcam_img
+        gr.update(value=None, visible=False),  # token_attention
+        gr.update(value="", visible=False),    # top5_html
+        gr.update(value="", visible=False),    # confidence_label
+        gr.update(visible=False),              # insights_tab
+        gr.update(value={}, visible=True),     # class_probs_json
+        # Role/mode controls + states
+        "User",                                # role_state
+        DEFAULT_MODE,                          # mode_state
+        "visual",                              # normalization_mode_state
+        gr.update(value="User"),               # role (radio)
+        gr.update(value=DEFAULT_MODE),         # mode (radio)
+        gr.update(value="visual"),             # normalize_mode (radio)
+        gr.update(visible=False),              # normalize_mode_column (hide in User)
+        gr.update(visible=False),              # use_rollout
+        False,                                 # rollout_state
+        # Loading + inference state
+        gr.update(value="", visible=False),    # loading_msg
+        False,                                 # inference_done
+        gr.update(value="", visible=False)     # export_status_box
+    )
+# --- Gradio UI ---
+style_path = Path(__file__).resolve().parent / "style.css"
+with open(style_path, "r") as f:
+    custom_css = f.read()
+with gr.Blocks(css=custom_css) as demo:
     # Centered title and subtitle
     gr.Markdown("<h2 class='centered'>🩺 Medi-LLM: Clinical Triage Assistant 🩻</h2>")
     gr.Markdown("<p class='centered'>Upload a chest X-ray and/or enter EMR text to get a triage level prediction.</p>")
+    gr.HTML(
+        """
+        <div class='welcome-banner' style="background-color: #24283b; border-left: 4px solid #7aa2f7; padding: 16px; border-radius: 8px; margin-bottom: 16px;">
+        <h3 style="margin-top: 0; color: #c0caf5;">👋 Welcome to Medi-LLM</h3>
+        <p style="color: #a9b1d6; line-height: 1.6;">
+            This AI assistant helps triage patients using <strong>EMR text</strong> and <strong>chest X-rays</strong>.<br>
+            📝 Enter EMR notes, 📷 upload a chest X-ray, or use both for a multimodal diagnosis.<br>
+            👩‍⚕️ Select <strong>Doctor</strong> mode to view insights like Grad-CAM heatmaps and token-level attention.<br>
+            💾 Save your results for later by exporting them to a CSV file.
+        </p>
+        </div>
+        """
+    )
+    # Hidden State
+    role_state = gr.State(value="User")
+    mode_state = gr.State(value=DEFAULT_MODE)
+    rollout_state = gr.State(value=False)
+    normaliza_mode_state = gr.State(value="visual")
+    inference_done = gr.State(value=False)
+    # Role and Mode selection
+    with gr.Row(equal_height=True):
+        with gr.Column():
+            role = gr.Radio(["User", "Doctor"], value="User", label="Select Role", info="Doctors see insights like Grad-CAM and token attention", elem_id="role_selector")
+            mode = gr.Radio(["text", "image", "multimodal"], value=DEFAULT_MODE, label="Select Input Mode", info="Choose Diagnosis input type", elem_id="mode_selector")
+            with gr.Column(visible=False) as normalize_mode_column:
+                normalize_mode = gr.Radio(
+                    ["visual", "probabilistic"],
+                    value="visual",
+                    label="Attention Normalization",
+                    info="Softmax sums to 1 (probabilistic). Visual uses gamma-boosted scaling for color clarity."
+                )
+                use_rollout = gr.Checkbox(
+                    label="Use attention rollout (CLS -> inputs)",
+                    value=False,
+                    info="Includes residuals and multiplies attention across layers. Slower but often more faithful."
+                )
+    normalize_mode.change(
+        fn=lambda val: val,
+        inputs=[normalize_mode],
+        outputs=[normaliza_mode_state]
+    )
+    use_rollout.change(
+        fn=lambda v: v,
+        inputs=[use_rollout],
+        outputs=[rollout_state]
+    )
     # Input: EMR text and/or image
     with gr.Row():
+        with gr.Column(scale=3, elem_id="text_col") as text_col:
+            emr_text, image, max_file_note = render_inputs(DEFAULT_MODE)
+    # Submit button
+    with gr.Row():
+        submit_btn = gr.Button(
+            "🔍 Run Inference",
+            elem_id="inference_btn"
+        )
+        reset_btn = gr.Button(
+            "↩️ Reset",
+            elem_id="reset_btn"
+        )
+    # Outputs
+    with gr.Column(elem_classes=["output-box"]):
+        result_box = gr.Textbox(label="🧪 Triage Prediction", interactive=False)
+        confidence_label = gr.Label(label="📊 Confidence", visible=False)
+        prediction_count_box = gr.Textbox(value="Predictions: 0", interactive=False, label="🧮 Count", elem_id="prediction_count_box")
+        insights_tab = gr.Tabs(visible=False)
+        class_probs_json = gr.JSON(label="🔍 Class Probabilities", visible=True, elem_classes=["json-box"])
+        with insights_tab:
+            with gr.Tab("📷 Grad-CAM"):
+                gradcam_img = gr.Image(visible=False, elem_classes=["gr-image-box"])
+            with gr.Tab("🔬 Token Attention"):
+                token_attention = gr.HighlightedText(
+                    visible=False,
+                    show_legend=False,
+                    color_map={
+                        "0.0": "#7aa2f7",   # blue
+                        "0.25": "#80deea",  # cyan
+                        "0.5": "#fbc02d",   # yellow
+                        "0.75": "#ff8a65",  # orange
+                        "1.0": "#f7768e",   # red
+                    },
+                    elem_classes=["token-attn-box"]
+                )
+                top5_html = gr.HTML(value="", visible=False)
+                inject_tooltips()
+                gr.HTML("""
+                <div class="attention-legend">
+                    <div style="display: flex; align-items: center; gap: 8px;">
+                        <span style="font-size: 14px; color: #c0caf5;">0.0</span>
+                        <div class="attention-gradient-bar"></div>
+                        <span style="font-size: 14px; color: #c0caf5;">1.0</span>
+                    </div>
+                </div>
+                """)
     with gr.Row():
+        loading_msg = gr.Markdown(value="", visible=False, elem_classes=["loading-msg"])
+    # Bind inference
+    submit_btn.click(
+        fn=show_loading_msg,
+        outputs=[loading_msg]
+    ).then(
+        fn=classify,
+        inputs=[role_state, mode_state, normaliza_mode_state, emr_text, image, rollout_state],
+        outputs=[
+            result_box,
+            gradcam_img,
+            token_attention,
+            top5_html,
+            confidence_label,
+            insights_tab,
+            prediction_count_box,
+            class_probs_json,
+        ]
+    ).then(
+        fn=lambda: gr.update(value="", visible=False),
+        outputs=[loading_msg]
+    ).then(
+        fn=lambda: True,
+        outputs=[inference_done]
+    )
+    # Input Updates
+    mode.change(
+        fn=lambda m: (*render_inputs(m), m),
+        inputs=[mode],
+        outputs=[emr_text, image, max_file_note, mode_state]
+    )
+    role.change(
+        fn=update_role_state,
+        inputs=[role],
+        outputs=[role_state, normalize_mode_column, insights_tab, token_attention, gradcam_img, use_rollout, top5_html]
+    )
+    normalize_mode.change(
+        fn=rerun_if_done,
+        inputs=[inference_done, role_state, mode_state, normalize_mode, emr_text, image, rollout_state],
+        outputs=[
+            result_box,
+            gradcam_img,
+            token_attention,
+            top5_html,
+            confidence_label,
+            insights_tab,
+            prediction_count_box,
+            class_probs_json,
+        ]
+    )
+    use_rollout.change(
+        fn=rerun_if_done,
+        inputs=[inference_done, role_state, mode_state, normalize_mode, emr_text, image, rollout_state],
+        outputs=[
+            result_box,
+            gradcam_img,
+            token_attention,
+            top5_html,
+            confidence_label,
+            insights_tab,
+            prediction_count_box,
+            class_probs_json
+        ]
+    )
     # CSV Export UI
     gr.Markdown("### 📁 Export Prediction Log")
+    with gr.Row(equal_height=True):
+        with gr.Column(scale=3):
+            filename_input = gr.Textbox(
+                label="CSV filename (optional)",
+                placeholder="e.g., triage_results.csv",
+                info="Set filename as needed or leave blank for auto-naming",
+                elem_id="csv_filename"
+            )
+            export_status_box = gr.Textbox(
+                value="",
+                visible=False,
+                interactive=False,
+                label="",
+                elem_id="export_status"
+            )
+        with gr.Column(scale=4):
+            gr.Markdown(
+                "📑 **Summary**\n\nDownload your triage results for clinical review or research.",
+                elem_classes="centered"
+            )
+            with gr.Row():
+                with gr.Column(scale=1, min_width=200):
+                    download_btn = gr.Button("💾 Export CSV", elem_id="export_button")
+                with gr.Column(scale=1, min_width=200):
+                    clear_btn = gr.Button("🗑️ Clear Logs", elem_id="clear_button")
+            confirm_clear_btn = gr.Button("✅ Confirm Clear", visible=False, elem_id="confirm_button")
+            confirm_box = gr.Textbox(label="Status", interactive=False, visible=False, elem_id="confirm_box")
+        with gr.Column(scale=3):
+            csv_output = gr.File(label="📂 Download Link", elem_id="download_box")
     download_btn.click(
         fn=export_csv,
+        inputs=[filename_input, role_state],
+        outputs=[
+            csv_output,
+            csv_output,
+            export_status_box
+        ]
+    ).then(
+        fn=blink_box_effect,
+        inputs=[csv_output],
         outputs=[csv_output]
+    ).then(
+        fn=disable_filename_input,
+        outputs=[filename_input]
+    )
+    clear_btn.click(
+        fn=lambda: (
+            confirm_clear(),
+            gr.Button(visible=True),
+        ),
+        outputs=[confirm_box, confirm_clear_btn]
+    )
+    confirm_clear_btn.click(
+        fn=clear_confirmed,
+        inputs=[role_state],
+        outputs=[
+            prediction_count_box,  # reset prediction count
+            confirm_box,           # show success message
+            csv_output,            # hide CSV output file
+            filename_input         # re-enable input box
+        ]
+    ).then(
+        fn=lambda: gr.update(visible=False),  # Hide confirm button
+        outputs=[confirm_clear_btn]
+    ).then(
+        fn=reset_confirm_box,
+        outputs=[confirm_box]
+    )
+    # Reset UI
+    reset_btn.click(
+        fn=reset_ui,
+        outputs=[
+            emr_text,               # 1
+            image,                  # 2
+            max_file_note,          # 3
+            result_box,             # 4
+            gradcam_img,            # 5
+            token_attention,        # 6
+            top5_html,              # 7
+            confidence_label,       # 8
+            insights_tab,           # 9
+            class_probs_json,       # 10
+            role_state,             # 11
+            mode_state,             # 12
+            normaliza_mode_state,   # 13
+            role,                   # 14 (radio)
+            mode,                   # 15 (radio)
+            normalize_mode,         # 16 (radio)
+            normalize_mode_column,  # 17 (column visibility)
+            use_rollout,            # 18
+            rollout_state,          # 19
+            loading_msg,            # 20
+            inference_done,         # 21
+            export_status_box       # 22
+        ]
     )
 if __name__ == "__main__":

app/demo/style.css ADDED Viewed

	@@ -0,0 +1,379 @@

+/* === Base Layout === */
+body {
+    background-color: #1a1b26 !important;
+    color: #c0caf5 !important;
+    font-family: 'Fira Code', monospace;
+}
+/* === Welcome Banner Hover Glow === */
+.welcome-banner:hover {
+    box-shadow: 0 0 12px 3px #7aa2f7 !important;
+    transition: 0.3s ease-in-out;
+    cursor: pointer;
+}
+/* === Text Inputs Focus & Hover === */
+#emr_textbox textarea:hover,
+#emr_textbox textarea:focus {
+    border: 1px solid #7aa2f7 !important;
+    box-shadow: 0 0 6px 2px #7aa2f7 !important;
+}
+/* === Image Upload Hover & Focus === */
+#xray_image:hover,
+#xray_image:focus {
+    border: 1px solid #9ece6a !important;
+    box-shadow: 0 0 6px 2px #9ece6a !important;
+}
+/* === Grad-CAM Image Hover & Focus === */
+.gr-image-box:hover,
+.gr-image-box:focus {
+    border: 1px solid #f7768e !important;
+    box-shadow: 0 0 6px 2px #f7768e !important;
+}
+/* === Token Attention Hover & Focus Enhancements === */
+.token-attn-box:hover,
+.token-attn-box:focus {
+    border: 1px solid #bb9af7 !important;
+    box-shadow: 0 0 6px 2px #bb9af7 !important;
+}
+.token-attn-box .token {
+    transition: background-color 0.3s ease-in-out, box-shadow 0.3s ease-in-out, color 0.3s ease-in-out;
+    padding: 4px 8px;
+    border-radius: 4px;
+    font-weight: 500;
+    margin: 2px;
+    display: inline-block;
+    position: relative;
+    cursor: help;
+}
+/* Custom tooltip on hover using title attribute */
+/* === Tooltip decoding for attention === */
+.token-attn-box .token::after {
+    content: attr(data-tooltip);
+    position: absolute;
+    background: #1e1e2e;
+    color: #c0caf5;
+    padding: 4px 8px;
+    border-radius: 4px;
+    top: -30px;
+    left: 0;
+    white-space: nowrap;
+    font-size: 0.85em;
+    box-shadow: 0 0 6px rgba(0, 0, 0, 0.5);
+    z-index: 10;
+    opacity: 0;
+    pointer-events: none;
+    transition: opacity 0.2s ease-in-out;
+}
+.token-attn-box .token:hover::after {
+    opacity: 1;
+}
+/* === Tooltip arrow for custom data-tooltip === */
+.token-attn-box .token[data-tooltip]:hover::before {
+    content: "";
+    position: absolute;
+    top: -12px;
+    left: 50%;
+    transform: translateX(-50%);
+    border-left: 6px solid transparent;
+    border-right: 6px solid transparent;
+    border-bottom: 6px solid #1e1e2e;  /* Match tooltip background */
+    z-index: 9;
+}
+/* Hover and active styles */
+.token-attn-box .token:hover {
+    outline: 2px solid #bb9af7 !important;
+    box-shadow: 0 0 8px 2px #bb9af7 !important;
+    cursor: pointer;
+}
+/* === Highlight top-attention token with glow === */
+.token-attn-box .token[style*="rgba(247, 118, 142, 1)"] {
+    box-shadow: 0 0 10px 5px rgba(247, 118, 142, 0.85);
+    border-radius: 6px;
+    font-weight: 600;
+}
+/* === Attention-based text color tinting for stronger contrast === */
+.token-attn-box .token[style*="rgba(255, 138, 101"],
+.token-attn-box .token[style*="rgba(255, 138, 101, 1)"] {
+    color: #ff8a65;
+}
+.token-attn-box .token[style*="rgba(251, 192, 45"],
+.token-attn-box .token[style*="rgba(251, 192, 45, 1)"] {
+    color: #fbc02d;
+}
+.token-attn-box .token[style*="rgba(128, 222, 234"],
+.token-attn-box .token[style*="rgba(128, 222, 234, 1)"] {
+    color: #80deea;
+}
+.token-attn-box .token[style*="rgba(122, 162, 247"],
+.token-attn-box .token[style*="rgba(122, 162, 247, 1)"] {
+    color: #7aa2f7;
+}
+.token-attn-box .token[style*="rgba(247, 118, 142"],
+.token-attn-box .token[style*="rgba(247, 118, 142, 1)"] {
+    color: #f7768e;
+}
+/* === Token Attention Gradient Bar === */
+.attention-gradient-bar {
+    flex-grow: 1;
+    height: 14px;
+    border-radius: 8px;
+    margin-top: 8px;
+    background: linear-gradient(
+        to right,
+        #7aa2f7 0%,
+        #80deea 25%,
+        #fbc02d 50%,
+        #ff8a65 75%,
+        #f7768e 100%
+    );
+    box-shadow: 0 0 3px rgba(0,0,0,0.4) inset;
+}
+/* === Top5 tokens box === */
+.top5-box .top5-table {
+  box-shadow: 0 6px 16px rgba(0,0,0,0.25);
+  border-radius: 10px;
+}
+.top5-box h4 { letter-spacing: .2px; }
+/* === Triage Prediction Box Glow (based on class) === */
+.prediction-high {
+    border: 2px solid #f7768e !important;
+    box-shadow: 0 0 8px 3px #f7768e !important;
+}
+.prediction-medium {
+    border: 2px solid #e0af68 !important;
+    box-shadow: 0 0 8px 3px #e0af68 !important;
+}
+.prediction-low {
+    border: 2px solid #e0af68 !important;
+    box-shadow: 0 0 8px 3px #e0af68 !important;
+}
+/* === Basic Radio Button Styling (Role + Mode) === */
+#role_selector label,
+#mode_selector label {
+    display: block;
+    margin: 6px 0;
+    padding: 8px 12px;
+    border-radius: 6px;
+    border: 1px solid #3b4261;
+    background-color: #1f2335;
+    color: #c0caf5;
+    font-weight: 500;
+    transition: all 0.2s ease-in-out;
+    cursor: pointer;
+}
+/* Hover and Focus Glow */
+#role_selector label:hover,
+#role_selector input:focus + label,
+#mode_selector label:hover,
+#mode_selector input:focus + label {
+    border: 1px solid #a0cfff !important;
+    box-shadow: 0 0 6px 2px #a0cfff !important;
+}
+/* Selected Option */
+#role_selector input:checked + label,
+#mode_selector input:checked + label {
+    background-color: #3d59a1 !important;
+    border: 1px solid #7aa2f7 !important;
+    color: white !important;
+    box-shadow: 0 0 6px 2px #7aa2f7 !important;
+}
+/* Optional: Ensure radio circles are visible */
+#role_selector input,
+#mode_selector input {
+    margin-right: 8px;
+    transform: scale(1.1);
+}
+/* === Buttons === */
+.gr-button {
+    border-radius: 8px !important;
+    font-weight: 500;
+}
+/* === Primary/Secondary Buttons via IDs === */
+/* Inference (blue) */
+#inference_btn {
+  background-color: #7aa2f7 !important;
+  color: #ffffff !important;
+}
+#inference_btn:hover {
+  background-color: #409eff !important;
+  transform: translateY(-1px);
+  box-shadow: 0 0 10px rgba(122,162,255,0.55) !important;
+}
+/* Reset (red/coral) */
+#reset_btn {
+  background-color: #f7768e !important;
+  color: #ffffff !important;
+}
+#reset_btn:hover {
+  background-color: #ff5c7a !important;
+  transform: translateY(-1px);
+  box-shadow: 0 0 12px rgba(255,92,122,0.75) !important;
+}
+/* Export (blue, same as inference) */
+#export_button {
+  background-color: #7aa2f7 !important;
+  color: #ffffff !important;
+}
+#export_button:hover {
+  background-color: #409eff !important;
+  transform: translateY(-1px);
+  box-shadow: 0 0 10px rgba(122,162,255,0.45) !important;
+}
+/* Clear Logs (red, same as reset) */
+#clear_button {
+  background-color: #f7768e !important;
+  color: #ffffff !important;
+}
+#clear_button:hover {
+  background-color: #ff5c7a !important;
+  transform: translateY(-1px);
+  box-shadow: 0 0 12px rgba(255,92,122,0.75) !important;
+}
+/* Confirm Clear (yellow base, GREEN glow on hover) */
+#confirm_button {
+  background-color: #e0af68 !important;
+  color: #ffffff !important;
+  border-radius: 8px !important;
+  padding: 10px 14px !important;
+  font-weight: 600 !important;
+  border: none !important;
+  cursor: pointer !important;
+}
+#confirm_button:hover {
+  background-color: #d9a147 !important;
+  box-shadow: 0 0 10px 3px rgba(158,206,106,0.9) !important; /* green glow */
+  border: 1px solid #9ece6a !important;
+}
+/* === Tab Panels === */
+.gr-tabitem {
+    background-color: #1f2335 !important;
+    color: #c0caf5 !important;
+}
+/* === Markdown(centered) === */
+.centered {
+    text-align: center;
+}
+/* === Loading message === */
+.loading-msg {
+    text-align: center;
+    color: #7aa2f7;
+    font-weight: bold;
+    font-size: 1.1em;
+}
+/* === Hover & Focus Glow for CSV filename input === */
+#csv_filename input:hover,
+#csv_filename textarea:hover,
+#csv_filename input:focus,
+#csv_filename textarea:focus {
+    border: 1px solid #7aa2f7 !important;
+    box-shadow: 0 0 6px 2px #7aa2f7 !important;
+    transition: 0.3s ease-in-out;
+}
+/* === Blinking effect for CSV download box === */
+@keyframes blink-box {
+    0% { box-shadow: 0 0 6px 2px #7aa2f7; }
+    50% { box-shadow: 0 0 12px 4px #7aa2f7; }
+    100% { box-shadow: 0 0 6px 2px #7aa2f7; }
+}
+.blink-csv {
+    animation: blink-box 1.5s ease-in-out 3;
+    border-radius: 8px;
+    border: 1px solid #7aa2f7 !important;
+}
+/* === Prediction Count Box === */
+#prediction_count_box {
+    font-size: 1em;
+    padding: 10px;
+    border-radius: 6px;
+    background-color: #1f2335;
+    color: #c0caf5;
+    border: 1px solid #7aa2f7;
+    transition: border-color 0.3s, box-shadow 0.3s;
+}
+#prediction_count_box:hover,
+#prediction_count_box:focus {
+    border: 1px solid #a0cfff !important;
+    box-shadow: 0 0 6px 2px #7aa2f7 !important;
+}
+/* === Clear Logs Confirmation Box === */
+#confirm_box {
+    font-size: 0.95em;
+    padding: 10px;
+    border-radius: 6px;
+    background-color: #1f2335;
+    color: #c0caf5;
+    border: 1px solid #e0af68;
+    transition: border-color 0.3s, box-shadow 0.3s;
+}
+#confirm_box:hover,
+#confirm_box:focus {
+    border: 1px solid #e0af68 !important;
+    box-shadow: 0 0 6px 2px #e0af68 !important;
+}
+/* Export status message styling */
+#export_status {
+  color: #9ece6a;              /* Greenish success color */
+  font-weight: bold;
+  padding: 8px 12px;
+  border: 1px solid #9ece6a;
+  background-color: #1a1b26;   /* Match your dark background */
+  border-radius: 6px;
+  margin-top: 8px;
+  transition: opacity 0.5s ease;
+}
+/* Optional fade-out animation (if using JS or if Gradio later supports it natively) */
+#export_status.fade-out {
+  opacity: 0;
+}
+/* === Class level Probabilities === */
+.json-box {
+    background-color: #1e222e;
+    padding: 12px;
+    border-radius: 8px;
+    border: 1px solid #7aa2f7;
+}

app/utils/attention_utils.py DELETED Viewed

@@ -1,20 +0,0 @@
-def extract_token_attention(model, tokenizer, input_ids, attention_mask):
-    if hasattr(model.text_encoder, 'bert'):
-        try:
-            outputs = model.text_encoder.bert(
-                input_ids=input_ids,
-                attention_mask=attention_mask,
-                output_attentions=True
-            )
-            last_attn = outputs.attentions[-1]  # (B, H, S, S), final layer
-            weights = last_attn.mean(dim=1)[0, 0, :]  # mean heads, CLS -> token, dim = 1 mean across heads from batch 0, from CLS token, to connection to all other tokens
-            weights = weights.detach().cpu().numpy()
-            weights = (weights - weights.min()) / (weights.max() - weights.min() + 1e-8)
-            tokens = tokenizer.convert_ids_to_tokens(input_ids[0])
-            return [(tok, float(round(weights[i], 3))) for i, tok in enumerate(tokens)]
-        except Exception as e:
-            print("Attention extraction failed:", e)
-    return None

app/utils/gradcam_utils.py CHANGED Viewed

@@ -16,7 +16,7 @@ def register_hooks(model):
     layer = model.image_encoder.layer4
     fwd_handle = layer.register_forward_hook(forward_hook)
-    bwd_handle = layer.register_backward_hook(backward_hook)
     return activations, gradients, fwd_handle, bwd_handle
@@ -25,16 +25,24 @@ def generate_gradcam(image_pil, activations, gradients):
     grads = gradients["value"]
     acts = activations["value"]
     pooled_grads = torch.mean(grads, dim=[0, 2, 3])
     for i in range(acts.shape[1]):
         acts[:, i, :, :] *= pooled_grads[i]
-    heatmap = torch.mean(acts, dim=1).squeeze().cpu().numpy()
     heatmap = np.maximum(heatmap, 0)
-    heatmap /= heatmap.max()
-    heatmap = Image.fromarray(np.uint8(255 * heatmap)).resize((224, 224)).convert("L")
-    image_np = np.array(image_pil.resize((224, 224)).convert("RGB"))
-    overlay = np.uint8(0.6 * image_np + 0.4 * plt.cm.jet(heatmap / 255.0)[:, :, :3] * 255)
-    return Image.fromarray(overlay.astype(np.uint8))

     layer = model.image_encoder.layer4
     fwd_handle = layer.register_forward_hook(forward_hook)
+    bwd_handle = layer.register_full_backward_hook(backward_hook)
     return activations, gradients, fwd_handle, bwd_handle
     grads = gradients["value"]
     acts = activations["value"]
+    # Out-of-place Grad-CAM weighting
     pooled_grads = torch.mean(grads, dim=[0, 2, 3])
     for i in range(acts.shape[1]):
         acts[:, i, :, :] *= pooled_grads[i]
+    # Normalize heatmap
+    heatmap = torch.mean(acts, dim=1).squeeze().detach().cpu().numpy()
     heatmap = np.maximum(heatmap, 0)
+    heatmap /= heatmap.max() + 1e-8
+    # Convert to image and overlay
+    heatmap_resized = Image.fromarray(np.uint8(255 * heatmap)).resize((224, 224))
+    heatmap_array = np.array(heatmap_resized)
+    colormap = plt.cm.jet(heatmap_array / 255.0)[..., :3]  # shape (H, W, 3), RGB
+    # Combine with original image
+    image_np = np.array(image_pil.resize((224, 224)).convert("RGB")) / 255.0
+    overlay = (0.6 * image_np + 0.4 * colormap) * 255
+    overlay = overlay.astype(np.uint8)
+    return Image.fromarray(overlay)

app/utils/inference_utils.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import sys
 import torch
 import yaml
 from pathlib import Path
 from transformers import AutoTokenizer
 from torchvision import transforms
@@ -9,6 +10,8 @@ ROOT_DIR = Path(__file__).resolve().parent.parent.parent
 sys.path.append(str(ROOT_DIR))
 from src.multimodal_model import MediLLMModel
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
@@ -33,32 +36,222 @@ def load_model(mode, model_path, config_path=str(Path("config/config.yaml").reso
         dropout=config["dropout"],
         hidden_dim=config["hidden_dim"]
     )
-    model.load_state_dict(torch.load(model_path, map_location=DEVICE))
     model.to(DEVICE)
     model.eval()
     return model
-def predict(model, mode, emr_text=None, image=None):
-    with torch.no_grad():
-        input_ids = attention_mask = img_tensor = None
-        if mode in ["text", "multimodal"] and emr_text:
-            text_tokens = tokenizer(
-                emr_text,
-                return_tensors="pt",
-                truncation=True,
-                padding="max_length",
-                max_length=128,
             )
-            input_ids = text_tokens["input_ids"].to(DEVICE)
-            attention_mask = text_tokens["attention_mask"].to(DEVICE)
-        if mode in ["image", "multimodal"] and image:
-            img_tensor = image_transform(image).unsqueeze(0).to(DEVICE)
-        output = model(input_ids=input_ids, attention_mask=attention_mask, image=img_tensor)
-        pred = torch.argmax(output, dim=1).item()
-        confidence = torch.softmax(output, dim=1).squeeze()[pred].item()
-        return f"{inv_map[pred]} (Confidence: {confidence:.2f})"

 import sys
 import torch
 import yaml
+import numpy as np
 from pathlib import Path
 from transformers import AutoTokenizer
 from torchvision import transforms
 sys.path.append(str(ROOT_DIR))
 from src.multimodal_model import MediLLMModel
+from app.utils.gradcam_utils import register_hooks, generate_gradcam
 DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         dropout=config["dropout"],
         hidden_dim=config["hidden_dim"]
     )
+    state = torch.load(model_path, map_location=DEVICE)
+    model.load_state_dict(state)
     model.to(DEVICE)
     model.eval()
     return model
+def attention_rollout(attentions, last_k=4, residual_alpha=0.5):
+    """
+    attentions_tuple: tuple/list of layer attentions; each is (B,H,S,S)
+    last_k: only roll back through the last k layers (keeps contrast)
+    residual_alpha: how much identity to add before normalizing (preserve token self-info)
+    returns: [B, S, S] rollout matrix, or None if input is invalid
+    """
+    if attentions is None:
+        return None
+    if isinstance(attentions, (list, tuple)) and len(attentions) == 0:
+        return None
+    first = attentions[0]
+    if first is None or first.ndim != 4:
+        return None  # expect [B, H, S, S]
+    B, H, S, _ = first.shape
+    eye = torch.eye(S, device=first.device).unsqueeze(0).expand(B, S, S)  # [B, S, S]
+    L = len(attentions)
+    if last_k is None:
+        last_k = L
+    if last_k <= 0:
+        # No layers selected -> return identity (no propagation)
+        return eye.clone()
+    start = max(0, L - last_k)
+    A = None
+    for layer in range(start, L):
+        a = attentions[layer]
+        if a is None or a.ndim != 4 or a.shape[0] != B or a.shape[-1] != S:
+            # Skip malformed layer
+            continue
+        a = a.mean(dim=1)  # [B, S, S] (avg heads)
+        a = a + float(residual_alpha) * eye
+        a = a / (a.sum(dim=-1, keepdim=True) + 1e-12)  # row-normalize
+        A = a if A is None else torch.bmm(A, a)
+    # if we never multiplied like when all layers skipped, fall back to identity
+    return A if A is not None else eye.clone()  # [B,S,S]
+def merge_wordpieces(tokens, scores):
+    merged_tokens, merged_scores = [], []
+    cur_tok, cur_scores = "", []
+    for t, s in zip(tokens, scores):
+        if t.startswith("##"):
+            cur_tok += t[2:]
+            cur_scores.append(s)
+        else:
+            if cur_tok:
+                merged_tokens.append(cur_tok)
+                merged_scores.append(sum(cur_scores) / max(1, len(cur_scores)))
+            cur_tok, cur_scores = t, [s]
+    if cur_tok:
+        merged_tokens.append(cur_tok)
+        merged_scores.append(sum(cur_scores) / max(1, len(cur_scores)))
+    return merged_tokens, merged_scores
+def _normalize_for_display_wordlevel(attn_scores, normalize_mode="visual", temperature=0.30):
+    """
+    Convert raw *word-level* token scores into:
+      - probabilistic mode: probabilities that sum to 1.0 (100%), with labels like "0.237 | 23.7% (contrib)"
+      - visual mode: min-max + gamma scaling (contrast, not sum-to-100), with labels like "0.68 | visual score"
+      Returns:
+        attn_final: np.ndarray of floats in [0, 1] for color scale
+        labels: list[str] per token (tooltip text; first number stays up front for your color_map bucketing)
+    """
+    attn_array = np.array(attn_scores, dtype=float)
+    if normalize_mode == "probabilistic":
+        # ---- percentage view that sums up to 100% ----
+        attn_array = np.maximum(attn_array, 0.0)
+        if attn_array.max() > 0:
+            attn_array = attn_array / (attn_array.max() + 1e-12)  # scale to [0, 1] for stability
+        # sharpen (lower temp => peakier)
+        attn_array = np.power(attn_array + 1e-12, 1.0 / max(1e-6, float(temperature)))
+        prob = attn_array / (attn_array.sum() + 1e-12)
+        percent = prob * 100.0
+        # keep prob (0..1) for color scale; label with % contrib
+        labels = [f"{prob[i]:.3f} | {percent[i]:.1f}% (contrib)" for i in range(len(prob))]
+        return prob, labels
+    else:
+        # ---- visual: min-max + gamma (contrast, not sum-to-100) ---
+        if attn_array.max() > attn_array.min():
+            attn_array0 = (attn_array - attn_array.min()) / (attn_array.max() - attn_array.min() + 1e-8)
+            attn_array0 = np.clip(np.power(attn_array0, 0.75), 0.1, 1.0)
+        else:
+            attn_array0 = np.zeros_like(attn_array)
+        labels = [f"{attn_array0[i]:.2f} | visual score" for i in range(len(attn_array0))]
+        return attn_array0, labels
+def predict(
+    model,
+    mode,
+    emr_text=None,
+    image=None,
+    normalize_mode="visual",
+    need_token_vis=False,
+    use_rollout=False
+):
+    """
+    normalize_mode: "visual" (min-max + gamma boost) or "probabilistic" (softmax)
+    need_token_vis: request/compute token-level attentions (Doctor mode + text/multimodal)
+    use_rollout: use attention rollout across layers
+    """
+    input_ids = attention_mask = img_tensor = None
+    cam_image = None
+    highlighted_tokens = None
+    top5 = []
+    if mode in ["text", "multimodal"] and emr_text:
+        text_tokens = tokenizer(
+            emr_text,
+            return_tensors="pt",
+            truncation=True,
+            padding="max_length",
+            max_length=128,
+        )
+        input_ids = text_tokens["input_ids"].to(DEVICE)
+        attention_mask = text_tokens["attention_mask"].to(DEVICE)
+    if mode in ["image", "multimodal"] and image:
+        img_tensor = image_transform(image).unsqueeze(0).to(DEVICE)
+    # Only Register hooks for Grad-CAM if needed
+    if mode in ["image", "multimodal"]:
+        activations, gradients, fwd_handle, bwd_handle = register_hooks(model)
+        model.zero_grad()
+    # === Forward ===
+    # Only enable attentions when planning to visualize them
+    outputs = model(
+        input_ids=input_ids,
+        attention_mask=attention_mask,
+        image=img_tensor,
+        output_attentions=bool(need_token_vis and (mode in ["text", "multimodal"])),
+        return_raw_attentions=bool(use_rollout and need_token_vis)
+    )
+    logits = outputs["logits"]
+    if logits.numel() == 0:
+        raise ValueError("Model returned empty logits. Check input format.")
+    probs = torch.softmax(logits, dim=1)
+    pred = torch.argmax(probs, dim=1).item()
+    confidence = probs.squeeze()[pred].item()
+    # === Grad-CAM ===
+    if mode in ["image", "multimodal"]:
+        # Enable gradients only for Grad-CAM
+        logits[0, pred].backward(retain_graph=True)
+        cam_image = generate_gradcam(image, activations, gradients)
+        fwd_handle.remove()
+        bwd_handle.remove()
+    # === Token-level attention ===
+    if need_token_vis and (mode in ["text", "multimodal"]):
+        token_attn_scores = None
+        if use_rollout and outputs.get("raw_attentions") is not None:
+            # partial rollout
+            # roll: [B, S, S]; roll[b, 0, :] is CLS-to-all tokens for that batch item
+            roll = attention_rollout(outputs["raw_attentions"], last_k=4, residual_alpha=0.5)  # [B,S,S]  # (S, S)
+            if roll is not None:
+                # roll: [B, S, S]; pick CLS row (index 0)
+                cls_to_tokens = roll[0, 0].detach().cpu().numpy().tolist()  # CLS row
+                token_attn_scores = cls_to_tokens
+        elif outputs.get("token_attentions") is not None:
+            token_attn_scores = outputs["token_attentions"].squeeze().tolist()
+        if token_attn_scores is not None:
+            # Filter out specials/pad + aligh to wordpieces
+            ids = input_ids[0].tolist()
+            amask = attention_mask[0].tolist() if attention_mask is not None else [1] * len(ids)
+            wp_all = tokenizer.convert_ids_to_tokens(ids, skip_special_tokens=False)
+            special_ids = set(tokenizer.all_special_ids)
+            keep_idx = [i for i, (tid, m) in enumerate(zip(ids, amask)) if (tid not in special_ids) and (m == 1)]
+            wp_tokens = [wp_all[i] for i in keep_idx]
+            wp_scores = [token_attn_scores[i] if i < len(token_attn_scores) else 0.0 for i in keep_idx]
+            # Merge wordpieces into words
+            word_tokens, attn_scores = merge_wordpieces(wp_tokens, wp_scores)
+            # Build Top-5 (probabilistic normalization for ranking)
+            _probs_for_rank, _ = _normalize_for_display_wordlevel(
+                attn_scores, normalize_mode="probabilistic", temperature=0.30
+            )
+            pairs = list(zip(word_tokens, _probs_for_rank))
+            pairs.sort(key=lambda x: x[1], reverse=True)
+            top5 = [(tok, float(p * 100.0)) for tok, p in pairs[:5]]
+            # Final display (probabilistic or visual)
+            attn_final, labels = _normalize_for_display_wordlevel(
+                attn_scores,
+                normalize_mode=normalize_mode,
+                temperature=0.30,
             )
+            highlighted_tokens = [(tok, labels[i]) for i, tok in enumerate(word_tokens)]
+        print("🧪 Normalization Mode Received:", normalize_mode)
+        if highlighted_tokens:
+            print("🟣 Highlighted tokens sample:", highlighted_tokens[:5])
+        else:
+            print("🟣 No highlighted tokens (no text or attentions unavailable).")
+    return inv_map[pred], cam_image, highlighted_tokens, confidence, probs.tolist(), top5

app/utils/test.py ADDED Viewed

	@@ -0,0 +1,26 @@

+import sys
+import torch
+from pathlib import Path
+from transformers import AutoTokenizer
+ROOT_DIR = Path(__file__).resolve().parent.parent.parent
+sys.path.append(str(ROOT_DIR))
+from app.utils.inference_utils import load_model
+from app.utils.attention_utils import extract_token_attention
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+tokenizer = AutoTokenizer.from_pretrained("emilyalsentzer/Bio_ClinicalBERT")
+# Load model from config
+model = load_model("multimodal", "medi_llm_state_dict_multimodal.pth")
+# Test input
+text = "Patient-A reports shortness of breath and low oxygen levels."
+tokens = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128)
+input_ids = tokens["input_ids"].to(DEVICE)
+mask = tokens["attention_mask"].to(DEVICE)
+# Extract token attention
+attention = extract_token_attention(model, tokenizer, input_ids, mask)
+print(attention)

config/config.yaml.example ADDED Viewed

	@@ -0,0 +1,18 @@

+text:
+  lr: 1.8711332079056742e-05
+  dropout: 0.33274218952802376
+  hidden_dim: 512
+  batch_size: 8
+  epochs: 5
+image:
+  lr: 9.99473327273459e-05
+  dropout: 0.4451972461446767
+  hidden_dim: 256
+  batch_size: 4
+  epochs: 5
+multimodal:
+  lr: 3.7443867882936816e-05
+  dropout: 0.29940046032586376
+  hidden_dim: 512
+  batch_size: 4
+  epochs: 5

sample_data/.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

src/multimodal_model.py CHANGED Viewed

@@ -86,39 +86,51 @@ class MediLLMModel(nn.Module):
             nn.Linear(hidden_dim, num_classes),  # Final Classification output
         )
-    def forward(self, input_ids=None, attention_mask=None, image=None):
         # input_ids shape: [batch, seq_length]
         # attention_mask: mask to ignore padding, same shape as input_ids
         # image: [batch, 3, 224, 224]
         # Text features
-        if self.mode == "text":
             text_outputs = self.text_encoder(
-                input_ids=input_ids, attention_mask=attention_mask
             )
             # feed tokenized text into the BERT Model which returns a
             # dictionary with last_hidden_state: [batch_size, seq_len,
             # hidden_size], pooler_output: [batch_size, hidden_size]
             # (CLS embeddings), hidden_states: List of tensors,
             # attentions(weights): List of Tensors
-            features = text_outputs.last_hidden_state[
-                :, 0, :
-            ]  # CLS token, return CLS tokens from all batches, position 0,
             # a batch of 3 sentences has 3 CLS tokens
-        # Image features
-        elif self.mode == "image":
-            features = self.image_encoder(
-                image
-            )  # pass the image through ResNet, returns a [batch, 2048] tensor
         else:  # multimodal
-            text_outputs = self.text_encoder(
-                input_ids=input_ids, attention_mask=attention_mask
-            )
-            text_feat = text_outputs.last_hidden_state[:, 0, :]  # CLS token
             image_feat = self.image_encoder(image)
             features = torch.cat(
-                (text_feat, image_feat), dim=1
             )  # Concatenates text and image features along feature dimension
             # [CLS vector from BERT] + [ResNet image vector]
             # -> [batch_size, 2816]
@@ -143,4 +155,9 @@ class MediLLMModel(nn.Module):
             # return self.classifier(fused)
         # Return logits for each class, later apply softmax during evaluation
-        return self.classifier(features)

             nn.Linear(hidden_dim, num_classes),  # Final Classification output
         )
+    def forward(self, input_ids=None, attention_mask=None, image=None, output_attentions=False, return_raw_attentions=False):
         # input_ids shape: [batch, seq_length]
         # attention_mask: mask to ignore padding, same shape as input_ids
         # image: [batch, 3, 224, 224]
         # Text features
+        if self.mode in ["text", "multimodal"]:
             text_outputs = self.text_encoder(
+                input_ids=input_ids,
+                attention_mask=attention_mask,
+                output_attentions=output_attentions,
             )
             # feed tokenized text into the BERT Model which returns a
             # dictionary with last_hidden_state: [batch_size, seq_len,
             # hidden_size], pooler_output: [batch_size, hidden_size]
             # (CLS embeddings), hidden_states: List of tensors,
             # attentions(weights): List of Tensors
+            last_hidden = text_outputs.last_hidden_state  # CLS token, return CLS tokens from all batches, position 0,
             # a batch of 3 sentences has 3 CLS tokens
+            cls_embedding = last_hidden[:, 0, :]  # CLS tokens of all batches [batch, hidden_dim]
+            # Real token attention using last-layer CLS attention weights
+            # attentions = List[12 tensors] -> each [batch, heads, seq_len, seq_len]
+            token_attn_scores = None
+            raw_attentions = None
+            if output_attentions:
+                attention_maps = text_outputs.attentions
+                last_layer_attn = attention_maps[-1]  # [batch, heads, seq_len, seq_len]
+                avg_attn = last_layer_attn.mean(dim=1)  # Average across heads -> [batch, seq_len, seq_len]
+                token_attn_scores = avg_attn[:, 0, :]  # CLS attends to all tokens -> [batch, seq_len]
+                if return_raw_attentions:
+                    raw_attentions = attention_maps
+        else:
+            cls_embedding = None
+            token_attn_scores = None
+            raw_attentions = None
+        # Image features
+        if self.mode == "image":
+            features = self.image_encoder(image)  # pass the image through ResNet, returns a [batch, 2048] tensor
+        elif self.mode == "text":  # text
+            features = cls_embedding
         else:  # multimodal
             image_feat = self.image_encoder(image)
             features = torch.cat(
+                (cls_embedding, image_feat), dim=1
             )  # Concatenates text and image features along feature dimension
             # [CLS vector from BERT] + [ResNet image vector]
             # -> [batch_size, 2816]
             # return self.classifier(fused)
         # Return logits for each class, later apply softmax during evaluation
+        logits = self.classifier(features)
+        return {
+            "logits": logits,
+            "token_attentions": token_attn_scores,  # [batch, seq_len] or None
+            "raw_attentions": raw_attentions if return_raw_attentions else None,
+        }

tests/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (151 Bytes). View file

tests/__pycache__/test_dummy.cpython-310-pytest-8.4.1.pyc ADDED Viewed

Binary file (746 Bytes). View file

tests/__pycache__/test_generate_emr_csv.cpython-310-pytest-8.4.1.pyc ADDED Viewed

Binary file (15.8 kB). View file

tests/__pycache__/test_multimodal_model.cpython-310-pytest-8.4.1.pyc ADDED Viewed

Binary file (4.79 kB). View file

tests/__pycache__/test_triage_dataset.cpython-310-pytest-8.4.1.pyc ADDED Viewed

Binary file (4.47 kB). View file