Spaces:

willwim
/

Team3_Mod4

Sleeping

App Files Files Community

willwim commited on May 13

Commit

fd765d2

verified ·

1 Parent(s): 842a7db

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -46

app.py CHANGED Viewed

@@ -13,11 +13,11 @@ import base64
 import sys
 import csv
 import os
 HF_TOKEN = os.getenv("hf_token")
 csv.field_size_limit(sys.maxsize)
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
 # ── Load classification model ──────────────────────────────────────────────────
 tokenizer = AutoTokenizer.from_pretrained(
     "willwim/adr_SJM_Notebook-Copy_for_T3", token=HF_TOKEN
@@ -25,14 +25,14 @@ tokenizer = AutoTokenizer.from_pretrained(
 model = AutoModelForSequenceClassification.from_pretrained(
     "willwim/adr_SJM_Notebook-Copy_for_T3", token=HF_TOKEN
 ).to(device)
 pred = transformers.pipeline(
     "text-classification", model=model, tokenizer=tokenizer,
     top_k=None, device=device
 )
 explainer = shap.Explainer(pred)
 # ── Load NER model ─────────────────────────────────────────────────────────────
 ner_tokenizer = AutoTokenizer.from_pretrained("d4data/biomedical-ner-all")
 ner_model = AutoModelForTokenClassification.from_pretrained("d4data/biomedical-ner-all")
@@ -40,8 +40,8 @@ ner_pipe = pipeline(
     "ner", model=ner_model, tokenizer=ner_tokenizer,
     aggregation_strategy="simple"
 )
 # ── Custom SHAP bar-chart renderer ─────────────────────────────────────────────
 def render_shap_bar_chart(shap_values, class_idx: int = 1) -> str:
     """
@@ -53,59 +53,63 @@ def render_shap_bar_chart(shap_values, class_idx: int = 1) -> str:
     # .values shape: (n_tokens, n_classes)  or  (n_tokens,) when binary
     values = shap_values.values          # (n_tokens, n_classes)
     tokens = shap_values.data            # list/array of token strings
     if values.ndim == 2:
         sv = values[:, class_idx]        # SHAP values for "Severe Reaction"
     else:
         sv = values
     # Sort by absolute magnitude and keep top-N for readability
     TOP_N = 20
     order = np.argsort(np.abs(sv))[::-1][:TOP_N]
     sv_top = sv[order]
     tok_top = np.array(tokens)[order]
     # Re-sort so the chart reads top-to-bottom by value (positive on top)
     plot_order = np.argsort(sv_top)
     sv_plot = sv_top[plot_order]
     tok_plot = tok_top[plot_order]
-    colors = ["#e05c5c" if v > 0 else "#3dbdb0" for v in sv_plot]
     fig_height = max(4, len(sv_plot) * 0.38)
     fig, ax = plt.subplots(figsize=(8, fig_height), facecolor="white")
     ax.set_facecolor("white")
     y_pos = np.arange(len(sv_plot))
     bars = ax.barh(y_pos, sv_plot, color=colors, height=0.6, edgecolor="none")
     # Zero line
     ax.axvline(0, color="#333333", linewidth=0.9, zorder=3)
     ax.set_yticks(y_pos)
     ax.set_yticklabels(tok_plot, fontsize=10, color="#222222")
     ax.set_xlabel("SHAP Value — impact on ADR prediction", fontsize=10, color="#444444")
     ax.set_title(
-        "Token-Level Feature Importance\n"
-        "■ Red = pushes toward ADR    ■ Teal = pushes away",
-        fontsize=11, color="#222222", pad=10
     )
-    # Legend patches
-    red_patch  = mpatches.Patch(color="#e05c5c", label="Pushes toward ADR")
-    teal_patch = mpatches.Patch(color="#3dbdb0", label="Pushes away from ADR")
-    ax.legend(handles=[red_patch, teal_patch], fontsize=9,
               loc="lower right", framealpha=0.7)
     ax.spines["top"].set_visible(False)
     ax.spines["right"].set_visible(False)
     ax.spines["left"].set_visible(False)
     ax.tick_params(axis="y", length=0)
     ax.tick_params(axis="x", colors="#555555")
     ax.xaxis.label.set_color("#555555")
     plt.tight_layout()
     buf = io.BytesIO()
     fig.savefig(buf, format="png", dpi=130, bbox_inches="tight",
                 facecolor="white")
@@ -118,22 +122,22 @@ def render_shap_bar_chart(shap_values, class_idx: int = 1) -> str:
         f"style='width:100%; max-width:760px; display:block; margin:auto;' />"
         f"</div>"
     )
 # ── Main prediction function ─────────────────────────────────────────────────���─
 def adr_predict(x):
     text_input = str(x).lower()
     encoded_input = tokenizer(text_input, return_tensors="pt").to(device)
     output = model(**encoded_input)
     scores = torch.softmax(output.logits, dim=-1)[0].detach().cpu().numpy()
     # ── SHAP (bar chart) ──────────────────────────────────────────────────────
     try:
         shap_values = explainer([text_input])
         shap_html = render_shap_bar_chart(shap_values[0], class_idx=1)
     except Exception as e:
         shap_html = f"<p style='color:red;'>SHAP explanation error: {e}</p>"
     # ── NER ───────────────────────────────────────────────────────────────────
     try:
         res = ner_pipe(text_input)
@@ -146,7 +150,7 @@ def adr_predict(x):
             "Diagnostic_procedure": "#eeeeee",
             "Biological_structure": "#d9d9d9",
         }
         htext = "<div style='line-height:2.0; font-size:1.1em; color:black;'>"
         prev_end = 0
         res = sorted(res, key=lambda e: e["start"])
@@ -166,15 +170,15 @@ def adr_predict(x):
         htext += text_input[prev_end:] + "</div>"
     except Exception:
         htext = "<p style='color:black;'>NER processing error.</p>"
     label_output = {
         "Severe Reaction":     float(scores[1]),
         "Non-severe Reaction": float(scores[0]),
     }
     return label_output, shap_html, htext
 # ── UI ─────────────────────────────────────────────────────────────────────────
 custom_css = """
 .gradio-container { font-family: 'Inter', system-ui, sans-serif; }
@@ -188,16 +192,16 @@ custom_css = """
 }
 footer { visibility: hidden; }
 """
 with gr.Blocks(title="ADR Detector", css=custom_css, theme=gr.themes.Soft()) as demo:
     with gr.Column(elem_classes="main-header"):
         gr.Markdown("# Adverse Drug Reaction (ADR) Detector")
         gr.Markdown(
             "Analyze clinical text for potential medication-related severity "
             "and key medical entities."
         )
     with gr.Row():
         # ── Left column: input ────────────────────────────────────────────────
         with gr.Column(scale=1):
@@ -209,7 +213,7 @@ with gr.Blocks(title="ADR Detector", css=custom_css, theme=gr.themes.Soft()) as
                 elem_id="input-text",
             )
             submit_btn = gr.Button("Run Analysis", variant="primary")
             gr.Markdown("### Examples")
             gr.Examples(
                 examples=[
@@ -220,28 +224,28 @@ with gr.Blocks(title="ADR Detector", css=custom_css, theme=gr.themes.Soft()) as
                 ],
                 inputs=[prob1],
             )
         # ── Right column: outputs ─────────────────────────────────────────────
         with gr.Column(scale=1):
             gr.Markdown("### Classification")
             label = gr.Label(label="Severity Probability")
             gr.Markdown("### Medical Entities")
             htext_out = gr.HTML(label="NER Mapping", elem_classes="output-box")
             gr.Markdown("### Model Logic (SHAP)")
             shap_out = gr.HTML(label="Feature Importance", elem_classes="output-box")
     gr.Markdown("---")
     gr.Markdown(
         "Disclaimer: This tool is for research purposes only and does not "
         "constitute medical advice."
     )
     submit_btn.click(
         fn=adr_predict,
         inputs=[prob1],
         outputs=[label, shap_out, htext_out],
     )
 demo.launch()

 import sys
 import csv
 import os
 HF_TOKEN = os.getenv("hf_token")
 csv.field_size_limit(sys.maxsize)
 device = "cuda:0" if torch.cuda.is_available() else "cpu"
 # ── Load classification model ──────────────────────────────────────────────────
 tokenizer = AutoTokenizer.from_pretrained(
     "willwim/adr_SJM_Notebook-Copy_for_T3", token=HF_TOKEN
 model = AutoModelForSequenceClassification.from_pretrained(
     "willwim/adr_SJM_Notebook-Copy_for_T3", token=HF_TOKEN
 ).to(device)
 pred = transformers.pipeline(
     "text-classification", model=model, tokenizer=tokenizer,
     top_k=None, device=device
 )
 explainer = shap.Explainer(pred)
 # ── Load NER model ─────────────────────────────────────────────────────────────
 ner_tokenizer = AutoTokenizer.from_pretrained("d4data/biomedical-ner-all")
 ner_model = AutoModelForTokenClassification.from_pretrained("d4data/biomedical-ner-all")
     "ner", model=ner_model, tokenizer=ner_tokenizer,
     aggregation_strategy="simple"
 )
 # ── Custom SHAP bar-chart renderer ─────────────────────────────────────────────
 def render_shap_bar_chart(shap_values, class_idx: int = 1) -> str:
     """
     # .values shape: (n_tokens, n_classes)  or  (n_tokens,) when binary
     values = shap_values.values          # (n_tokens, n_classes)
     tokens = shap_values.data            # list/array of token strings
     if values.ndim == 2:
         sv = values[:, class_idx]        # SHAP values for "Severe Reaction"
     else:
         sv = values
     # Sort by absolute magnitude and keep top-N for readability
     TOP_N = 20
     order = np.argsort(np.abs(sv))[::-1][:TOP_N]
     sv_top = sv[order]
     tok_top = np.array(tokens)[order]
     # Re-sort so the chart reads top-to-bottom by value (positive on top)
     plot_order = np.argsort(sv_top)
     sv_plot = sv_top[plot_order]
     tok_plot = tok_top[plot_order]
+    COLOR_POSITIVE = "#cc1111"   # bold red  — increases severe ADR probability
+    COLOR_NEGATIVE = "#1a6fcc"   # strong blue — decreases severe ADR probability
+    colors = [COLOR_POSITIVE if v > 0 else COLOR_NEGATIVE for v in sv_plot]
     fig_height = max(4, len(sv_plot) * 0.38)
     fig, ax = plt.subplots(figsize=(8, fig_height), facecolor="white")
     ax.set_facecolor("white")
     y_pos = np.arange(len(sv_plot))
     bars = ax.barh(y_pos, sv_plot, color=colors, height=0.6, edgecolor="none")
     # Zero line
     ax.axvline(0, color="#333333", linewidth=0.9, zorder=3)
     ax.set_yticks(y_pos)
     ax.set_yticklabels(tok_plot, fontsize=10, color="#222222")
     ax.set_xlabel("SHAP Value — impact on ADR prediction", fontsize=10, color="#444444")
     ax.set_title(
+        "Token-Feature Importance: Words Driving Prediction",
+        fontsize=12, fontweight="bold", color="#222222", pad=12
     )
+    # Legend patches — colors match the bars exactly
+    red_patch  = mpatches.Patch(color=COLOR_POSITIVE,
+                                label="Increases severe ADR probability")
+    blue_patch = mpatches.Patch(color=COLOR_NEGATIVE,
+                                label="Decreases severe ADR probability")
+    ax.legend(handles=[red_patch, blue_patch], fontsize=9,
               loc="lower right", framealpha=0.7)
     ax.spines["top"].set_visible(False)
     ax.spines["right"].set_visible(False)
     ax.spines["left"].set_visible(False)
     ax.tick_params(axis="y", length=0)
     ax.tick_params(axis="x", colors="#555555")
     ax.xaxis.label.set_color("#555555")
     plt.tight_layout()
     buf = io.BytesIO()
     fig.savefig(buf, format="png", dpi=130, bbox_inches="tight",
                 facecolor="white")
         f"style='width:100%; max-width:760px; display:block; margin:auto;' />"
         f"</div>"
     )
 # ── Main prediction function ─────────────────────────────────────────────────���─
 def adr_predict(x):
     text_input = str(x).lower()
     encoded_input = tokenizer(text_input, return_tensors="pt").to(device)
     output = model(**encoded_input)
     scores = torch.softmax(output.logits, dim=-1)[0].detach().cpu().numpy()
     # ── SHAP (bar chart) ──────────────────────────────────────────────────────
     try:
         shap_values = explainer([text_input])
         shap_html = render_shap_bar_chart(shap_values[0], class_idx=1)
     except Exception as e:
         shap_html = f"<p style='color:red;'>SHAP explanation error: {e}</p>"
     # ── NER ───────────────────────────────────────────────────────────────────
     try:
         res = ner_pipe(text_input)
             "Diagnostic_procedure": "#eeeeee",
             "Biological_structure": "#d9d9d9",
         }
         htext = "<div style='line-height:2.0; font-size:1.1em; color:black;'>"
         prev_end = 0
         res = sorted(res, key=lambda e: e["start"])
         htext += text_input[prev_end:] + "</div>"
     except Exception:
         htext = "<p style='color:black;'>NER processing error.</p>"
     label_output = {
         "Severe Reaction":     float(scores[1]),
         "Non-severe Reaction": float(scores[0]),
     }
     return label_output, shap_html, htext
 # ── UI ─────────────────────────────────────────────────────────────────────────
 custom_css = """
 .gradio-container { font-family: 'Inter', system-ui, sans-serif; }
 }
 footer { visibility: hidden; }
 """
 with gr.Blocks(title="ADR Detector", css=custom_css, theme=gr.themes.Soft()) as demo:
     with gr.Column(elem_classes="main-header"):
         gr.Markdown("# Adverse Drug Reaction (ADR) Detector")
         gr.Markdown(
             "Analyze clinical text for potential medication-related severity "
             "and key medical entities."
         )
     with gr.Row():
         # ── Left column: input ────────────────────────────────────────────────
         with gr.Column(scale=1):
                 elem_id="input-text",
             )
             submit_btn = gr.Button("Run Analysis", variant="primary")
             gr.Markdown("### Examples")
             gr.Examples(
                 examples=[
                 ],
                 inputs=[prob1],
             )
         # ── Right column: outputs ─────────────────────────────────────────────
         with gr.Column(scale=1):
             gr.Markdown("### Classification")
             label = gr.Label(label="Severity Probability")
             gr.Markdown("### Medical Entities")
             htext_out = gr.HTML(label="NER Mapping", elem_classes="output-box")
             gr.Markdown("### Model Logic (SHAP)")
             shap_out = gr.HTML(label="Feature Importance", elem_classes="output-box")
     gr.Markdown("---")
     gr.Markdown(
         "Disclaimer: This tool is for research purposes only and does not "
         "constitute medical advice."
     )
     submit_btn.click(
         fn=adr_predict,
         inputs=[prob1],
         outputs=[label, shap_out, htext_out],
     )
 demo.launch()