Spaces:

Krishwall
/

ChestX-Ray_Diagnosis

Configuration error

App Files Files Community

Krishwall commited on Jan 5

Commit

f52734b

verified ·

1 Parent(s): 4d1d01a

Upload 6 files

Browse files

Files changed (6) hide show

.env.example +0 -0
.gitignore +48 -0
README.md +0 -19
app.py +178 -0
create_patient_index.py +104 -0
requirements.txt +0 -3

.env.example ADDED Viewed

File without changes

.gitignore ADDED Viewed

	@@ -0,0 +1,48 @@

+# Raw image formats
+*.raw
+*.dng
+*.cr2
+*.cr3
+*.nef
+*.arw
+*.rw2
+*.orf
+*.srw
+*.x3f
+*.raf
+*.dcr
+*.k25
+*.kdc
+*.mrw
+# Medical imaging formats (common in clinical AI)
+*.dcm
+*.dicom
+*.nii
+*.nii.gz
+*.mha
+*.mhd
+# Compressed raw formats
+*.tiff
+*.tif
+# Processed image formats that might be large
+*.png
+*.jpg
+*.jpeg
+*.bmp
+*.gif
+*.webp
+# Model checkpoints
+*.pt
+*.pth
+*.ckpt
+*.model
+*.h5
+*.pb
+*.onnx
+# Checkpoint directories
+checkpoints/

README.md CHANGED Viewed

@@ -1,19 +0,0 @@
----
-title: ChestX-Ray Diagnosis
-emoji: 🚀
-colorFrom: red
-colorTo: red
-sdk: docker
-app_port: 8501
-tags:
-- streamlit
-pinned: false
-short_description: This demo showcases a multimodal deep learning system that c
----
-# Welcome to Streamlit!
-Edit `/src/streamlit_app.py` to customize this app to your heart's desire. :heart:
-If you have any questions, checkout our [documentation](https://docs.streamlit.io) and [community
-forums](https://discuss.streamlit.io).

app.py ADDED Viewed

	@@ -0,0 +1,178 @@

+import streamlit as st
+import torch
+import torch.nn.functional as F
+from PIL import Image
+from demo.utils.load_model import load_fusion_model
+from demo.utils.grad_cam import GradCAM, overlay_cam
+from demo.utils.saliency import (
+    compute_text_saliency,
+    merge_wordpieces,
+    filter_tokens,
+    highlight_text,
+)
+# --------------------------------------------------
+# Page configuration
+# --------------------------------------------------
+st.set_page_config(
+    page_title="Multimodal Clinical AI",
+    layout="wide",
+    initial_sidebar_state="collapsed"
+)
+# --------------------------------------------------
+# Header
+# --------------------------------------------------
+st.markdown(
+    """
+    <h2 style="margin-bottom:0">Multimodal Clinical Decision Support</h2>
+    <p style="color:gray; margin-top:4px">
+    Chest X-ray + Radiology Text → Ranked Diagnoses with Explainability
+    </p>
+    """,
+    unsafe_allow_html=True
+)
+st.divider()
+# --------------------------------------------------
+# Load model (cached)
+# --------------------------------------------------
+@st.cache_resource
+def load_all():
+    return load_fusion_model(
+        "checkpoints/fusion_model/fusion_layer4_tuned.pt"
+    )
+model, tokenizer, image_transform, LABELS, device = load_all()
+# --------------------------------------------------
+# Input Section
+# --------------------------------------------------
+col1, col2 = st.columns(2)
+with col1:
+    st.subheader("Chest X-ray")
+    uploaded_image = st.file_uploader(
+        "Upload Chest X-ray",
+        type=["png", "jpg", "jpeg"],
+        label_visibility="collapsed"
+    )
+with col2:
+    st.subheader("Radiology Findings")
+    findings = st.text_area(
+        "Enter findings",
+        height=180,
+        placeholder="e.g. Enlarged cardiac silhouette with pulmonary congestion...",
+        label_visibility="collapsed"
+    )
+st.markdown("<br>", unsafe_allow_html=True)
+analyze = st.button("Analyze Case", use_container_width=True)
+st.markdown("<br>", unsafe_allow_html=True)
+# --------------------------------------------------
+# Inference + Explainability
+# --------------------------------------------------
+if analyze and uploaded_image and findings:
+    # ---- Preprocess inputs ----
+    image = Image.open(uploaded_image).convert("RGB")
+    image_tensor = image_transform(image).unsqueeze(0).to(device)
+    enc = tokenizer(
+        findings,
+        padding="max_length",
+        truncation=True,
+        max_length=256,
+        return_tensors="pt"
+    )
+    input_ids = enc["input_ids"].to(device)
+    attention_mask = enc["attention_mask"].to(device)
+    # ---- Forward pass ----
+    with torch.no_grad():
+        logits = model(image_tensor, input_ids, attention_mask)
+        probs = F.softmax(logits, dim=1)
+    top2_prob, top2_idx = torch.topk(probs, k=2, dim=1)
+    primary_idx = top2_idx[0, 0].item()
+    secondary_idx = top2_idx[0, 1].item()
+    # --------------------------------------------------
+    # Diagnosis Output
+    # --------------------------------------------------
+    col1, col2 = st.columns(2)
+    with col1:
+        st.markdown("### 🩺 Primary Diagnosis")
+        st.success(
+            f"{LABELS[primary_idx]}  \nConfidence: {top2_prob[0,0]:.2f}"
+        )
+    with col2:
+        st.markdown("### 🔍 Secondary Diagnosis")
+        st.info(
+            f"{LABELS[secondary_idx]}  \nConfidence: {top2_prob[0,1]:.2f}"
+        )
+    # --------------------------------------------------
+    # Explainability
+    # --------------------------------------------------
+    st.divider()
+    st.markdown("## Explainability")
+    col1, col2 = st.columns(2)
+    # ---- Grad-CAM ----
+    with col1:
+        st.markdown("#### Image Evidence (Grad-CAM)")
+        gradcam = GradCAM(model, model.image_encoder.layer4)
+        cam = gradcam.generate(
+            image_tensor,
+            input_ids,
+            attention_mask,
+            class_idx=primary_idx
+        )
+        overlay = overlay_cam(image_tensor, cam)
+        st.image(
+            overlay,
+            use_column_width=True,
+            caption="Regions influencing the primary diagnosis"
+        )
+    # ---- Text Saliency ----
+    with col2:
+        st.markdown("#### Text Evidence (Important Terms)")
+        saliency, attn_mask = compute_text_saliency(
+            model,
+            input_ids,
+            attention_mask,
+            target_class=primary_idx
+        )
+        tokens = tokenizer.convert_ids_to_tokens(input_ids[0])
+        # Clean tokens
+        tokens, scores = filter_tokens(tokens, saliency, attn_mask)
+        # Merge wordpieces
+        tokens, scores = merge_wordpieces(tokens, scores)
+        # Highlight text
+        html_text = highlight_text(tokens, scores)
+        st.markdown(html_text, unsafe_allow_html=True)
+# --------------------------------------------------
+# Footer / Disclaimer
+# --------------------------------------------------
+st.divider()
+st.caption(
+    "⚠️ For educational and research purposes only. "
+    "Not intended for clinical use."
+)

create_patient_index.py ADDED Viewed

	@@ -0,0 +1,104 @@

+from datasets import load_dataset
+from pathlib import Path
+import pandas as pd
+import re
+from PIL import Image
+# -----------------------------
+# CONFIG
+# -----------------------------
+DATASET_NAME = "itsanmolgupta/mimic-cxr-dataset"   # change this
+SPLIT = "train"
+IMAGE_DIR = Path("data/raw/images")
+OUTPUT_CSV = Path("data/metadata/patient_index.csv")
+IMAGE_DIR.mkdir(parents=True, exist_ok=True)
+OUTPUT_CSV.parent.mkdir(parents=True, exist_ok=True)
+# -----------------------------
+# LABEL DEFINITIONS
+# -----------------------------
+LABEL_KEYWORDS = {
+    "PNEUMOTHORAX": ["pneumothorax"],
+    "PNEUMONIA": ["pneumonia", "consolidation", "airspace disease"],
+    "EDEMA": ["pulmonary edema", "vascular congestion"],
+    "EFFUSION": ["pleural effusion"],
+    "CARDIOMEGALY": ["cardiomegaly", "enlarged heart"],
+    "NORMAL": [
+        "no acute cardiopulmonary",
+        "no acute abnormality",
+        "no acute disease",
+        "normal chest",
+        "unremarkable"
+    ]
+}
+PRIORITY = [
+    "PNEUMOTHORAX",
+    "PNEUMONIA",
+    "EDEMA",
+    "EFFUSION",
+    "CARDIOMEGALY",
+    "NORMAL"
+]
+def assign_label(impression: str) -> str:
+    if not isinstance(impression, str):
+        return "OTHER"
+    text = impression.lower()
+    text = re.sub(r"[^\w\s]", " ", text)
+    for label in PRIORITY:
+        for kw in LABEL_KEYWORDS[label]:
+            if kw in text:
+                return label
+    return "OTHER"
+# -----------------------------
+# MAIN PIPELINE
+# -----------------------------
+def main():
+    print("📥 Loading Hugging Face dataset...")
+    dataset = load_dataset(DATASET_NAME, split=SPLIT)
+    records = []
+    for idx, sample in enumerate(dataset):
+        image = sample["image"]
+        findings = sample["findings"]
+        impression = sample["impression"]
+        if image is None or findings is None or impression is None:
+            continue
+        # Save image locally (important for PyTorch Dataset later)
+        image_path = IMAGE_DIR / f"img_{idx}.png"
+        if not image_path.exists():
+            image.save(image_path)
+        label = assign_label(impression)
+        records.append({
+            "image_path": str(image_path),
+            "findings": findings,
+            "impression": impression,
+            "label": label
+        })
+        if idx % 1000 == 0:
+            print(f"Processed {idx} samples...")
+    df = pd.DataFrame(records)
+    df.to_csv(OUTPUT_CSV, index=False)
+    print("\n✅ patient_index.csv created")
+    print(df["label"].value_counts())
+if __name__ == "__main__":
+    main()

requirements.txt CHANGED Viewed

@@ -1,3 +0,0 @@
-altair
-pandas
-streamlit