MohitRajput45 commited on
Commit
9bb8a57
·
verified ·
1 Parent(s): 68035ef

Update src/explainability/shap_explainer.py

Browse files
Files changed (1) hide show
  1. src/explainability/shap_explainer.py +55 -57
src/explainability/shap_explainer.py CHANGED
@@ -14,21 +14,20 @@ class MindGuardSHAPExplainer:
14
  def __init__(self):
15
  print("🔍 Initializing MindGuard SHAP Explainability Engine...")
16
 
17
- # --- STRICT ARCHITECTURE PATHING ---
18
- # 1. Locate the current script (src/explainability/shap_explainer.py)
19
- self.script_dir = os.path.dirname(os.path.abspath(__file__))
20
 
21
- # 2. Traverse up TWO directories to hit the MINDGUARD_AI_PROJECT root
22
- self.project_root = os.path.abspath(os.path.join(self.script_dir, "../../"))
23
 
24
- # 3. Define the exact path to where we saved the trained brain in Day 3
25
- self.model_path = os.path.join(self.project_root, "artifacts", "xlmr_weights", "final_mindguard_model")
26
-
27
- # 4. Define where the visual HTML reports will be saved
28
- self.artifacts_dir = os.path.join(self.project_root, "artifacts")
29
-
30
- # --- THE FIX 1: The Translation Dictionary ---
31
- # We must define the 35 English emotions so SHAP doesn't output "LABEL_X"
32
  self.emotion_map = {
33
  0: 'Anxiety', 1: 'Bipolar', 2: 'Depression', 3: 'Normal',
34
  4: 'Personality disorder', 5: 'Stress', 6: 'Suicidal', 7: 'admiration',
@@ -41,73 +40,72 @@ class MindGuardSHAPExplainer:
41
  32: 'remorse', 33: 'sadness', 34: 'surprise'
42
  }
43
 
44
- print(f"Loading Core Brain from: {self.model_path}...")
45
 
46
- # --- LOAD THE AI CORE ---
47
- # Initialize the tokenizer (translates text to numbers) and the model (the brain)
48
- self.tokenizer = XLMRobertaTokenizer.from_pretrained(self.model_path)
49
- self.model = XLMRobertaForSequenceClassification.from_pretrained(self.model_path)
50
-
51
- # --- THE FIX 2: Inject the Dictionary into the Model's Brain ---
52
- # This permanently forces the Hugging Face model to speak English instead of Math
53
- self.model.config.id2label = self.emotion_map
54
- self.model.config.label2id = {v: k for k, v in self.emotion_map.items()}
 
 
 
 
 
55
 
56
- # Use Hugging Face's pipeline to wrap the model for easy SHAP integration
57
- # Set device to 0 if a GPU is detected, otherwise fallback to CPU (-1)
58
- self.device = 0 if torch.cuda.is_available() else -1
59
- self.classifier = pipeline(
60
- "text-classification",
61
- model=self.model,
62
- tokenizer=self.tokenizer,
63
- device=self.device,
64
- top_k=None # top_k=None forces the AI to output scores for ALL 35 emotions, not just the top guess
65
- )
66
-
67
- # --- WARM UP SHAP ---
68
- print("⚙️ Warming up Game Theory Math (SHAP)...")
69
- # Pass our classifier pipeline into the SHAP Explainer engine
70
- self.explainer = shap.Explainer(self.classifier)
71
- print("✅ SHAP Explainer ready!")
 
 
 
72
 
73
  def generate_visual_report(self, text):
74
  """
75
- Takes a raw string of text, runs it through the model,
76
- calculates SHAP values, and outputs an interactive HTML file.
77
  """
78
  print(f"\n🧠 Analyzing: '{text}'")
79
 
80
  # 1. Run the Game Theory calculations
81
- # This isolates the impact of every single word on the final prediction
82
  shap_values = self.explainer([text])
83
 
84
- # 2. Define the exact save location for the HTML report
85
  html_path = os.path.join(self.artifacts_dir, "shap_report.html")
86
 
87
- # --- THE FIX 3: Targeted Slicing ---
88
- # Instead of drawing 35 overlapping arrows, find the emotion the AI was MOST confident in.
89
  best_class_index = shap_values[0].values.sum(axis=0).argmax()
90
 
91
- # 3. Generate the visualization ONLY for the winning emotion
92
- # display=False ensures it generates the raw HTML instead of trying to open a Jupyter widget
93
  shap_html = shap.plots.text(shap_values[0, :, best_class_index], display=False)
94
 
95
- # 4. Save the HTML string to a physical file in the artifacts folder
96
  with open(html_path, "w", encoding="utf-8") as f:
97
  f.write(shap_html)
98
 
99
- print(f"✅ Diagnostic Complete!")
100
- print(f"Visual Report saved to: {html_path}")
101
- print("Go to your 'artifacts' folder and open 'shap_report.html' in your browser.")
102
 
103
  # --- EXECUTION BLOCK ---
104
- # This block only runs if this specific file is executed directly from the terminal
105
  if __name__ == "__main__":
106
- # Instantiate our explainer class
107
  explainer = MindGuardSHAPExplainer()
108
-
109
- # Define a test patient input
110
  sample_text = "I have a massive presentation tomorrow and my chest is tight."
111
-
112
- # Generate the explanation report
113
  explainer.generate_visual_report(sample_text)
 
14
  def __init__(self):
15
  print("🔍 Initializing MindGuard SHAP Explainability Engine...")
16
 
17
+ # --- HUB ARCHITECTURE PATHING ---
18
+ # 1. Point to your Model Hub ID (NOT a local path)
19
+ self.model_id = "MohitRajput45/mindguard-xlmr"
20
 
21
+ # 2. Use the exact path to where the weights live inside that Hub repo
22
+ self.hf_subfolder = "artifacts/xlmr_weights/final_mindguard_model"
23
 
24
+ # 3. Define where the visual HTML reports will be saved (relative to root)
25
+ # On Hugging Face, /app is the root.
26
+ self.artifacts_dir = os.path.join(os.getcwd(), "artifacts")
27
+ if not os.path.exists(self.artifacts_dir):
28
+ os.makedirs(self.artifacts_dir)
29
+
30
+ # --- THE TRANSLATION DICTIONARY ---
 
31
  self.emotion_map = {
32
  0: 'Anxiety', 1: 'Bipolar', 2: 'Depression', 3: 'Normal',
33
  4: 'Personality disorder', 5: 'Stress', 6: 'Suicidal', 7: 'admiration',
 
40
  32: 'remorse', 33: 'sadness', 34: 'surprise'
41
  }
42
 
43
+ print(f"Loading Core Brain from Hub: {self.model_id}...")
44
 
45
+ # --- LOAD THE AI CORE FROM HUB ---
46
+ try:
47
+ self.tokenizer = XLMRobertaTokenizer.from_pretrained(
48
+ self.model_id,
49
+ subfolder=self.hf_subfolder
50
+ )
51
+ self.model = XLMRobertaForSequenceClassification.from_pretrained(
52
+ self.model_id,
53
+ subfolder=self.hf_subfolder
54
+ )
55
+
56
+ # Inject the Dictionary into the Model's Brain
57
+ self.model.config.id2label = self.emotion_map
58
+ self.model.config.label2id = {v: k for k, v in self.emotion_map.items()}
59
 
60
+ # Set device: 0 for GPU, -1 for CPU
61
+ self.device = 0 if torch.cuda.is_available() else -1
62
+
63
+ # Hugging Face pipeline for SHAP integration
64
+ self.classifier = pipeline(
65
+ "text-classification",
66
+ model=self.model,
67
+ tokenizer=self.tokenizer,
68
+ device=self.device,
69
+ top_k=None # Get scores for all 35 emotions
70
+ )
71
+
72
+ print("⚙️ Warming up Game Theory Math (SHAP)...")
73
+ self.explainer = shap.Explainer(self.classifier)
74
+ print("✅ SHAP Explainer ready!")
75
+
76
+ except Exception as e:
77
+ print(f"❌ SHAP Initialization failed: {e}")
78
+ raise e
79
 
80
  def generate_visual_report(self, text):
81
  """
82
+ Takes raw text, calculates SHAP values, and outputs an interactive HTML file.
 
83
  """
84
  print(f"\n🧠 Analyzing: '{text}'")
85
 
86
  # 1. Run the Game Theory calculations
 
87
  shap_values = self.explainer([text])
88
 
89
+ # 2. Define save location
90
  html_path = os.path.join(self.artifacts_dir, "shap_report.html")
91
 
92
+ # 3. Targeted Slicing: Find the most confident emotion to visualize
93
+ # shap_values[0].values is (tokens, 35). We sum across tokens to find the top class.
94
  best_class_index = shap_values[0].values.sum(axis=0).argmax()
95
 
96
+ # 4. Generate visualization for the winning emotion
97
+ # Note: best_class_index must be passed as the index for the 3rd dimension
98
  shap_html = shap.plots.text(shap_values[0, :, best_class_index], display=False)
99
 
100
+ # 5. Save the HTML
101
  with open(html_path, "w", encoding="utf-8") as f:
102
  f.write(shap_html)
103
 
104
+ print(f"✅ Diagnostic Complete! Visual Report saved to: {html_path}")
105
+ return html_path
 
106
 
107
  # --- EXECUTION BLOCK ---
 
108
  if __name__ == "__main__":
 
109
  explainer = MindGuardSHAPExplainer()
 
 
110
  sample_text = "I have a massive presentation tomorrow and my chest is tight."
 
 
111
  explainer.generate_visual_report(sample_text)