File size: 5,565 Bytes
4c423a1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 | """
MindWatch — Explainability Module
SHAP-based word importance and attention visualization.
"""
import numpy as np
from typing import List, Dict, Tuple
from utils.preprocessing import preprocess_text, tokenize
# Distress-indicative lexicon (research-backed)
DISTRESS_LEXICON = {
"depression": {
"hopeless", "worthless", "empty", "numb", "alone", "sad", "crying",
"tired", "exhausted", "meaningless", "pointless", "nothing", "dark",
"dead", "dying", "hate", "miserable", "suffering", "broken", "lost",
"heavy", "trapped", "useless", "failure", "burden", "guilty",
},
"anxiety": {
"worried", "nervous", "panic", "afraid", "scared", "terrified",
"overthinking", "racing", "shaking", "trembling", "catastrophe",
"dread", "tense", "restless", "obsessing", "paranoid", "phobia",
"fear", "uneasy", "apprehensive", "overwhelmed",
},
"stress": {
"stressed", "overwhelmed", "pressure", "deadline", "burnout",
"exhausting", "frustrating", "overworked", "struggling", "chaos",
"demanding", "impossible", "swamped", "drowning", "cracking",
"snapped", "breaking", "frantic", "hectic",
},
}
# Intensity modifiers
INTENSIFIERS = {"very", "so", "extremely", "completely", "totally", "absolutely", "utterly"}
NEGATORS = {"not", "no", "never", "nothing", "nobody", "none", "cannot", "hardly", "barely"}
def compute_word_importance(
text: str,
predicted_label: str,
probabilities: Dict[str, float],
) -> List[Tuple[str, float]]:
"""
Compute word-level importance scores using lexicon matching + TF-based scoring.
This is a lightweight alternative to full SHAP for the demo.
Returns:
List of (word, importance_score) tuples, sorted by importance.
"""
clean = preprocess_text(text)
words = tokenize(clean)
if not words:
return []
label_confidence = probabilities.get(predicted_label, 0.5)
target_lexicon = set()
for category in DISTRESS_LEXICON.values():
target_lexicon.update(category)
primary_lexicon = DISTRESS_LEXICON.get(predicted_label, set())
scores = []
for i, word in enumerate(words):
score = 0.0
# Primary category match (strongest signal)
if word in primary_lexicon:
score += 0.8
# Any distress lexicon match
elif word in target_lexicon:
score += 0.4
# Negation / intensifier context
if word in NEGATORS:
score += 0.5
if word in INTENSIFIERS:
score += 0.3
# First-person pronouns (self-focus)
if word in {"i", "me", "my", "myself"}:
score += 0.15
# Absolutist language
if word in {"always", "never", "everything", "nothing", "completely"}:
score += 0.35
# Context: intensifier before a distress word
if i > 0 and words[i - 1] in INTENSIFIERS and word in target_lexicon:
score += 0.3
# Scale by prediction confidence
score *= label_confidence
scores.append((word, round(score, 3)))
# Normalize
max_score = max((s for _, s in scores), default=1.0)
if max_score > 0:
scores = [(w, round(s / max_score, 3)) for w, s in scores]
# Sort by importance
scores.sort(key=lambda x: x[1], reverse=True)
return scores
def get_important_words(
text: str,
predicted_label: str,
probabilities: Dict[str, float],
top_k: int = 8,
) -> List[Dict]:
"""
Get top-k important words with their scores and categories.
"""
word_scores = compute_word_importance(text, predicted_label, probabilities)
results = []
seen = set()
for word, score in word_scores:
if word in seen or score <= 0 or len(word) < 2:
continue
seen.add(word)
category = "neutral"
for cat, lexicon in DISTRESS_LEXICON.items():
if word in lexicon:
category = cat
break
if word in NEGATORS:
category = "negation"
if word in INTENSIFIERS:
category = "intensifier"
results.append({
"word": word,
"score": score,
"category": category,
})
if len(results) >= top_k:
break
return results
def format_explanation(
text: str,
predicted_label: str,
probabilities: Dict[str, float],
) -> str:
"""
Generate a human-readable explanation of the prediction.
"""
important = get_important_words(text, predicted_label, probabilities)
confidence = probabilities.get(predicted_label, 0.0)
if not important:
return f"Prediction: {predicted_label.title()} (confidence: {confidence:.1%})\nNo strong distress indicators found in the text."
lines = [
f"Prediction: {predicted_label.title()} (confidence: {confidence:.1%})",
"",
"Key indicators:",
]
for item in important:
bar = "█" * int(item["score"] * 10)
lines.append(f" • \"{item['word']}\" [{item['category']}] {bar} {item['score']:.2f}")
return "\n".join(lines)
if __name__ == "__main__":
test_text = "I feel completely exhausted and nothing seems to work anymore."
probs = {"depression": 0.72, "anxiety": 0.12, "stress": 0.11, "normal": 0.05}
print(format_explanation(test_text, "depression", probs))
print()
print("Important words:")
for w in get_important_words(test_text, "depression", probs):
print(f" {w}")
|