Spaces:

willwim
/

Team3_Mod4

Sleeping

App Files Files Community

willwim commited on May 13

Commit

842a7db

verified ·

1 Parent(s): 22b54b9

Update app.py

Browse files

Files changed (1) hide show

app.py +187 -73

app.py CHANGED Viewed

@@ -1,133 +1,247 @@
 import gradio as gr
 import shap
 import numpy as np
-import scipy as sp
 import torch
 import transformers
 from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer, AutoModelForTokenClassification
 import matplotlib.pyplot as plt
 import sys
 import csv
 import os
 HF_TOKEN = os.getenv("hf_token")
 csv.field_size_limit(sys.maxsize)
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
-# Load models and tokenizer
-tokenizer = AutoTokenizer.from_pretrained("willwim/adr_SJM_Notebook-Copy_for_T3", token=HF_TOKEN)
-model = AutoModelForSequenceClassification.from_pretrained("willwim/adr_SJM_Notebook-Copy_for_T3", token=HF_TOKEN).to(device)
-# Build a pipeline object for predictions
-pred = transformers.pipeline("text-classification", model=model, tokenizer=tokenizer, top_k=None, device=device)
-# SHAP explainer
 explainer = shap.Explainer(pred)
-# NER pipeline
 ner_tokenizer = AutoTokenizer.from_pretrained("d4data/biomedical-ner-all")
 ner_model = AutoModelForTokenClassification.from_pretrained("d4data/biomedical-ner-all")
-ner_pipe = pipeline("ner", model=ner_model, tokenizer=ner_tokenizer, aggregation_strategy="simple")
 def adr_predict(x):
     text_input = str(x).lower()
-    encoded_input = tokenizer(text_input, return_tensors='pt').to(device)
     output = model(**encoded_input)
     scores = torch.softmax(output.logits, dim=-1)[0].detach().cpu().numpy()
     try:
         shap_values = explainer([text_input])
-        local_plot = shap.plots.text(shap_values[0], display=False)
     except Exception as e:
-        local_plot = f"<p style='color:red;'>SHAP explanation error: {e}</p>"
     try:
         res = ner_pipe(text_input)
         entity_colors = {
-            'Severity': '#ffcccb',
-            'Sign_symptom': '#bcf5bc',
-            'Medication': '#cfe2f3',
-            'Age': '#fff2cc',
-            'Sex':'#fff2cc',
-            'Diagnostic_procedure':'#eeeeee',
-            'Biological_structure':'#d9d9d9'
         }
-        # FIX: Added inline "color: black;" to force all un-highlighted text to be black
-        htext = "<div style='line-height: 2.0; font-size: 1.1em; color: black;'>"
         prev_end = 0
-        res = sorted(res, key=lambda x: x['start'])
         for entity in res:
-            start, end = entity['start'], entity['end']
-            word = text_input[start:end]
-            color = entity_colors.get(entity['entity_group'], '#f3f3f3')
-            htext += f"{text_input[prev_end:start]}"
-            # Highlighted text is also explicitly set to black
-            htext += f"<mark style='background-color:{color}; color: black; padding: 2px 4px; border-radius: 4px; font-weight: 500;'>{word} <small style='opacity: 0.7;'>[{entity['entity_group']}]</small></mark>"
             prev_end = end
         htext += text_input[prev_end:] + "</div>"
-    except:
-        htext = "<p style='color: black;'>NER processing error.</p>"
-    label_output = {"Severe Reaction": float(scores[1]), "Non-severe Reaction": float(scores[0])}
-    return label_output, local_plot, htext
-# FIX: Added !important tags to ensure Gradio's dark mode doesn't override the white background and black text
 custom_css = """
 .gradio-container { font-family: 'Inter', system-ui, sans-serif; }
 .main-header { text-align: center; margin-bottom: 2rem; }
-.output-box { border-radius: 8px; border: 1px solid #e0e0e0; padding: 15px; background: white !important; color: black !important; }
 footer { visibility: hidden; }
 """
-with gr.Blocks(title="ADR Detector") as demo:
     with gr.Column(elem_classes="main-header"):
         gr.Markdown("# Adverse Drug Reaction (ADR) Detector")
-        gr.Markdown("Analyze clinical text for potential medication-related severity and key medical entities.")
     with gr.Row():
         with gr.Column(scale=1):
             gr.Markdown("### Input")
             prob1 = gr.Textbox(
-                label="Clinical Observations",
-                lines=4,
                 placeholder="Example: Patient experienced acute kidney injury after taking Ibuprofen...",
-                elem_id="input-text"
             )
             submit_btn = gr.Button("Run Analysis", variant="primary")
             gr.Markdown("### Examples")
             gr.Examples(
                 examples=[
-                    ["A 35 year-old male had severe headache after taking Aspirin. The lab results were normal."],
-                    ["A 35 year-old female had minor pain in upper abdomen after taking Acetaminophen."]
                 ],
-                inputs=[prob1]
             )
         with gr.Column(scale=1):
             gr.Markdown("### Classification")
             label = gr.Label(label="Severity Probability")
-            # --- TABS REMOVED HERE ---
-            # Both components are now stacked sequentially in the column
             gr.Markdown("### Medical Entities")
-            htext = gr.HTML(label="NER Mapping", elem_classes="output-box")
             gr.Markdown("### Model Logic (SHAP)")
-            local_plot = gr.HTML(label='Feature Importance', elem_classes="output-box")
     gr.Markdown("---")
-    gr.Markdown("Disclaimer: This tool is for research purposes only and does not constitute medical advice.")
     submit_btn.click(
         fn=adr_predict,
         inputs=[prob1],
-        outputs=[label, local_plot, htext]
     )
-demo.launch(css=custom_css, theme=gr.themes.Soft())

 import gradio as gr
 import shap
 import numpy as np
 import torch
 import transformers
 from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer, AutoModelForTokenClassification
+import matplotlib
+matplotlib.use("Agg")
 import matplotlib.pyplot as plt
+import matplotlib.patches as mpatches
+import io
+import base64
 import sys
 import csv
 import os
 HF_TOKEN = os.getenv("hf_token")
 csv.field_size_limit(sys.maxsize)
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
+# ── Load classification model ──────────────────────────────────────────────────
+tokenizer = AutoTokenizer.from_pretrained(
+    "willwim/adr_SJM_Notebook-Copy_for_T3", token=HF_TOKEN
+)
+model = AutoModelForSequenceClassification.from_pretrained(
+    "willwim/adr_SJM_Notebook-Copy_for_T3", token=HF_TOKEN
+).to(device)
+pred = transformers.pipeline(
+    "text-classification", model=model, tokenizer=tokenizer,
+    top_k=None, device=device
+)
 explainer = shap.Explainer(pred)
+# ── Load NER model ─────────────────────────────────────────────────────────────
 ner_tokenizer = AutoTokenizer.from_pretrained("d4data/biomedical-ner-all")
 ner_model = AutoModelForTokenClassification.from_pretrained("d4data/biomedical-ner-all")
+ner_pipe = pipeline(
+    "ner", model=ner_model, tokenizer=ner_tokenizer,
+    aggregation_strategy="simple"
+)
+# ── Custom SHAP bar-chart renderer ─────────────────────────────────────────────
+def render_shap_bar_chart(shap_values, class_idx: int = 1) -> str:
+    """
+    Builds a horizontal bar chart (red = pushes toward ADR, teal = pushes away)
+    that mirrors the style shown in the reference screenshot.
+    Returns an <img> HTML tag with an embedded base64 PNG.
+    """
+    # shap_values is a shap.Explanation object for a single sample
+    # .values shape: (n_tokens, n_classes)  or  (n_tokens,) when binary
+    values = shap_values.values          # (n_tokens, n_classes)
+    tokens = shap_values.data            # list/array of token strings
+    if values.ndim == 2:
+        sv = values[:, class_idx]        # SHAP values for "Severe Reaction"
+    else:
+        sv = values
+    # Sort by absolute magnitude and keep top-N for readability
+    TOP_N = 20
+    order = np.argsort(np.abs(sv))[::-1][:TOP_N]
+    sv_top = sv[order]
+    tok_top = np.array(tokens)[order]
+    # Re-sort so the chart reads top-to-bottom by value (positive on top)
+    plot_order = np.argsort(sv_top)
+    sv_plot = sv_top[plot_order]
+    tok_plot = tok_top[plot_order]
+    colors = ["#e05c5c" if v > 0 else "#3dbdb0" for v in sv_plot]
+    fig_height = max(4, len(sv_plot) * 0.38)
+    fig, ax = plt.subplots(figsize=(8, fig_height), facecolor="white")
+    ax.set_facecolor("white")
+    y_pos = np.arange(len(sv_plot))
+    bars = ax.barh(y_pos, sv_plot, color=colors, height=0.6, edgecolor="none")
+    # Zero line
+    ax.axvline(0, color="#333333", linewidth=0.9, zorder=3)
+    ax.set_yticks(y_pos)
+    ax.set_yticklabels(tok_plot, fontsize=10, color="#222222")
+    ax.set_xlabel("SHAP Value — impact on ADR prediction", fontsize=10, color="#444444")
+    ax.set_title(
+        "Token-Level Feature Importance\n"
+        "■ Red = pushes toward ADR    ■ Teal = pushes away",
+        fontsize=11, color="#222222", pad=10
+    )
+    # Legend patches
+    red_patch  = mpatches.Patch(color="#e05c5c", label="Pushes toward ADR")
+    teal_patch = mpatches.Patch(color="#3dbdb0", label="Pushes away from ADR")
+    ax.legend(handles=[red_patch, teal_patch], fontsize=9,
+              loc="lower right", framealpha=0.7)
+    ax.spines["top"].set_visible(False)
+    ax.spines["right"].set_visible(False)
+    ax.spines["left"].set_visible(False)
+    ax.tick_params(axis="y", length=0)
+    ax.tick_params(axis="x", colors="#555555")
+    ax.xaxis.label.set_color("#555555")
+    plt.tight_layout()
+    buf = io.BytesIO()
+    fig.savefig(buf, format="png", dpi=130, bbox_inches="tight",
+                facecolor="white")
+    plt.close(fig)
+    buf.seek(0)
+    b64 = base64.b64encode(buf.read()).decode("utf-8")
+    return (
+        f"<div style='background:white; padding:12px; border-radius:8px;'>"
+        f"<img src='data:image/png;base64,{b64}' "
+        f"style='width:100%; max-width:760px; display:block; margin:auto;' />"
+        f"</div>"
+    )
+# ── Main prediction function ───────────────────────────────────────────────────
 def adr_predict(x):
     text_input = str(x).lower()
+    encoded_input = tokenizer(text_input, return_tensors="pt").to(device)
     output = model(**encoded_input)
     scores = torch.softmax(output.logits, dim=-1)[0].detach().cpu().numpy()
+    # ── SHAP (bar chart) ──────────────────────────────────────────────────────
     try:
         shap_values = explainer([text_input])
+        shap_html = render_shap_bar_chart(shap_values[0], class_idx=1)
     except Exception as e:
+        shap_html = f"<p style='color:red;'>SHAP explanation error: {e}</p>"
+    # ── NER ───────────────────────────────────────────────────────────────────
     try:
         res = ner_pipe(text_input)
         entity_colors = {
+            "Severity":             "#ffcccb",
+            "Sign_symptom":         "#bcf5bc",
+            "Medication":           "#cfe2f3",
+            "Age":                  "#fff2cc",
+            "Sex":                  "#fff2cc",
+            "Diagnostic_procedure": "#eeeeee",
+            "Biological_structure": "#d9d9d9",
         }
+        htext = "<div style='line-height:2.0; font-size:1.1em; color:black;'>"
         prev_end = 0
+        res = sorted(res, key=lambda e: e["start"])
         for entity in res:
+            start, end = entity["start"], entity["end"]
+            word  = text_input[start:end]
+            color = entity_colors.get(entity["entity_group"], "#f3f3f3")
+            htext += text_input[prev_end:start]
+            htext += (
+                f"<mark style='background-color:{color}; color:black; "
+                f"padding:2px 4px; border-radius:4px; font-weight:500;'>"
+                f"{word} "
+                f"<small style='opacity:0.7;'>[{entity['entity_group']}]</small>"
+                f"</mark>"
+            )
             prev_end = end
         htext += text_input[prev_end:] + "</div>"
+    except Exception:
+        htext = "<p style='color:black;'>NER processing error.</p>"
+    label_output = {
+        "Severe Reaction":     float(scores[1]),
+        "Non-severe Reaction": float(scores[0]),
+    }
+    return label_output, shap_html, htext
+# ── UI ─────────────────────────────────────────────────────────────────────────
 custom_css = """
 .gradio-container { font-family: 'Inter', system-ui, sans-serif; }
 .main-header { text-align: center; margin-bottom: 2rem; }
+.output-box {
+    border-radius: 8px;
+    border: 1px solid #e0e0e0;
+    padding: 15px;
+    background: white !important;
+    color: black !important;
+}
 footer { visibility: hidden; }
 """
+with gr.Blocks(title="ADR Detector", css=custom_css, theme=gr.themes.Soft()) as demo:
     with gr.Column(elem_classes="main-header"):
         gr.Markdown("# Adverse Drug Reaction (ADR) Detector")
+        gr.Markdown(
+            "Analyze clinical text for potential medication-related severity "
+            "and key medical entities."
+        )
     with gr.Row():
+        # ── Left column: input ────────────────────────────────────────────────
         with gr.Column(scale=1):
             gr.Markdown("### Input")
             prob1 = gr.Textbox(
+                label="Clinical Observations",
+                lines=4,
                 placeholder="Example: Patient experienced acute kidney injury after taking Ibuprofen...",
+                elem_id="input-text",
             )
             submit_btn = gr.Button("Run Analysis", variant="primary")
             gr.Markdown("### Examples")
             gr.Examples(
                 examples=[
+                    ["A 35 year-old male had severe headache after taking Aspirin. "
+                     "The lab results were normal."],
+                    ["A 35 year-old female had minor pain in upper abdomen after "
+                     "taking Acetaminophen."],
                 ],
+                inputs=[prob1],
             )
+        # ── Right column: outputs ─────────────────────────────────────────────
         with gr.Column(scale=1):
             gr.Markdown("### Classification")
             label = gr.Label(label="Severity Probability")
             gr.Markdown("### Medical Entities")
+            htext_out = gr.HTML(label="NER Mapping", elem_classes="output-box")
             gr.Markdown("### Model Logic (SHAP)")
+            shap_out = gr.HTML(label="Feature Importance", elem_classes="output-box")
     gr.Markdown("---")
+    gr.Markdown(
+        "Disclaimer: This tool is for research purposes only and does not "
+        "constitute medical advice."
+    )
     submit_btn.click(
         fn=adr_predict,
         inputs=[prob1],
+        outputs=[label, shap_out, htext_out],
     )
+demo.launch()