Spaces:

jeffliulab
/

shap-text-explainer

Sleeping

App Files Files Community

jeffliulab commited on 10 days ago

Commit

275113a

verified ·

1 Parent(s): ea9315e

Initial deploy

Browse files

Files changed (3) hide show

README.md +11 -6
app.py +155 -0
requirements.txt +5 -0

README.md CHANGED Viewed

@@ -1,12 +1,17 @@
 ---
-title: Shap Text Explainer
-emoji: 🔥
-colorFrom: green
-colorTo: red
 sdk: gradio
-sdk_version: 6.11.0
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: SHAP Text Explainer
+emoji: "\U0001F50E"
+colorFrom: indigo
+colorTo: blue
 sdk: gradio
+sdk_version: "5.29.0"
 app_file: app.py
 pinned: false
+license: mit
 ---
+# SHAP Text Explainer
+See which words push a text classifier toward positive or negative.
+**Course**: 215 AI Safety ch8 — Explainability (NLP)

app.py ADDED Viewed

	@@ -0,0 +1,155 @@

+"""
+SHAP Text Explainer — Word-level attribution for text classification
+Course: 215 AI Safety ch8
+"""
+import numpy as np
+import torch
+import gradio as gr
+from transformers import pipeline
+# Load sentiment model
+classifier = pipeline(
+    "sentiment-analysis",
+    model="distilbert-base-uncased-finetuned-sst-2-english",
+    return_all_scores=True,
+)
+LABEL_NAMES = ["NEGATIVE", "POSITIVE"]
+def simple_word_attribution(text: str):
+    """
+    Compute word-level attribution using leave-one-out (LOO) method.
+    Faster and more reliable than full SHAP on CPU.
+    """
+    if not text.strip():
+        return "", "", {}
+    # Baseline prediction
+    base_result = classifier(text)[0]
+    base_scores = {r["label"]: r["score"] for r in base_result}
+    pred_label = max(base_scores, key=base_scores.get)
+    pred_score = base_scores[pred_label]
+    words = text.split()
+    if len(words) == 0:
+        return "", "", {}
+    # LOO attribution
+    attributions = []
+    for i in range(len(words)):
+        masked = " ".join(words[:i] + words[i + 1 :])
+        if not masked.strip():
+            attributions.append(0.0)
+            continue
+        result = classifier(masked)[0]
+        masked_scores = {r["label"]: r["score"] for r in result}
+        # Attribution = how much removing this word changes the predicted class score
+        diff = base_scores[pred_label] - masked_scores[pred_label]
+        attributions.append(diff)
+    # Normalize for display
+    max_abs = max(abs(a) for a in attributions) if attributions else 1.0
+    if max_abs == 0:
+        max_abs = 1.0
+    # Build highlighted HTML
+    html_parts = []
+    for word, attr in zip(words, attributions):
+        norm_attr = attr / max_abs  # -1 to 1
+        if norm_attr > 0:
+            # Pushes toward prediction (red = positive contribution)
+            intensity = min(int(abs(norm_attr) * 200), 200)
+            bg = f"rgba(239, 68, 68, {abs(norm_attr) * 0.6})"
+        else:
+            # Pushes against prediction (blue = negative contribution)
+            intensity = min(int(abs(norm_attr) * 200), 200)
+            bg = f"rgba(59, 130, 246, {abs(norm_attr) * 0.6})"
+        html_parts.append(
+            f'<span style="background:{bg};padding:2px 4px;margin:1px;'
+            f'border-radius:3px;display:inline-block;">{word}</span>'
+        )
+    highlighted_html = (
+        '<div style="font-size:16px;line-height:2;padding:10px;">'
+        + " ".join(html_parts)
+        + "</div>"
+    )
+    # Legend
+    legend = (
+        '<div style="margin-top:10px;font-size:13px;">'
+        '<span style="background:rgba(239,68,68,0.5);padding:2px 8px;border-radius:3px;">Red</span>'
+        f" = pushes toward {pred_label} &nbsp;&nbsp;"
+        '<span style="background:rgba(59,130,246,0.5);padding:2px 8px;border-radius:3px;">Blue</span>'
+        f" = pushes against {pred_label} &nbsp;&nbsp;"
+        "(intensity = strength)"
+        "</div>"
+    )
+    # Prediction info
+    pred_info = (
+        f"**Prediction: {pred_label}** ({pred_score:.1%})\n\n"
+        f"| Label | Score |\n|---|---|\n"
+    )
+    for r in base_result:
+        pred_info += f"| {r['label']} | {r['score']:.1%} |\n"
+    # Attribution table
+    pred_info += "\n**Word attributions (leave-one-out):**\n\n"
+    pred_info += "| Word | Attribution | Effect |\n|---|---|---|\n"
+    sorted_attr = sorted(
+        zip(words, attributions), key=lambda x: abs(x[1]), reverse=True
+    )
+    for word, attr in sorted_attr[:15]:
+        direction = "supports" if attr > 0 else "opposes"
+        pred_info += f"| {word} | {attr:+.4f} | {direction} |\n"
+    return highlighted_html + legend, pred_info, base_scores
+with gr.Blocks(title="SHAP Text Explainer") as demo:
+    gr.Markdown(
+        "# SHAP Text Explainer\n"
+        "Enter text to see which words contribute most to the sentiment prediction.\n"
+        "Uses leave-one-out attribution (similar to SHAP) for word-level explanations.\n"
+        "*Course: 215 AI Safety ch8 — Explainability*"
+    )
+    with gr.Row():
+        with gr.Column():
+            text_input = gr.Textbox(
+                label="Input Text",
+                placeholder="Enter a sentence to analyze...",
+                lines=3,
+            )
+            btn = gr.Button("Explain", variant="primary")
+            gr.Markdown(
+                "*Note: Each word is removed one at a time to measure its impact. "
+                "This takes a few seconds for longer texts.*"
+            )
+        with gr.Column():
+            highlighted = gr.HTML(label="Word Attribution")
+            details_md = gr.Markdown()
+    btn.click(
+        lambda t: simple_word_attribution(t)[:2],
+        [text_input],
+        [highlighted, details_md],
+    )
+    gr.Examples(
+        examples=[
+            "This movie was absolutely fantastic! The acting was superb and the plot kept me engaged throughout.",
+            "The food was terrible and the service was even worse. I will never go back to this restaurant.",
+            "The product works okay but nothing special. It does what it says but I expected more for the price.",
+            "I love how this book combines beautiful writing with deep philosophical insights.",
+            "The flight was delayed by 3 hours and the airline offered no compensation or explanation.",
+        ],
+        inputs=[text_input],
+    )
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+gradio>=5.0.0
+transformers>=4.30.0
+torch>=2.0.0
+shap>=0.42.0
+numpy