File size: 4,719 Bytes
af09308
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9bb8a57
 
 
af09308
9bb8a57
 
af09308
9bb8a57
 
 
 
 
 
 
af09308
 
 
 
 
 
 
 
 
 
 
 
9bb8a57
af09308
9bb8a57
 
 
 
 
 
 
 
 
 
 
 
 
 
af09308
9bb8a57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
af09308
 
 
9bb8a57
af09308
 
 
 
 
 
9bb8a57
af09308
 
9bb8a57
 
af09308
 
9bb8a57
 
af09308
 
9bb8a57
af09308
 
 
9bb8a57
 
af09308
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# src/explainability/shap_explainer.py

import os
import torch
import shap
from transformers import pipeline, XLMRobertaTokenizer, XLMRobertaForSequenceClassification

class MindGuardSHAPExplainer:
    """
    This class handles the Explainable AI (XAI) component of MindGuard.
    It uses Game Theory (SHAP values) to mathematically prove exactly 
    which words caused the neural network to predict a specific emotion.
    """
    def __init__(self):
        print("🔍 Initializing MindGuard SHAP Explainability Engine...")
        
        # --- HUB ARCHITECTURE PATHING ---
        # 1. Point to your Model Hub ID (NOT a local path)
        self.model_id = "MohitRajput45/mindguard-xlmr"
        
        # 2. Use the exact path to where the weights live inside that Hub repo
        self.hf_subfolder = "artifacts/xlmr_weights/final_mindguard_model"
        
        # 3. Define where the visual HTML reports will be saved (relative to root)
        # On Hugging Face, /app is the root.
        self.artifacts_dir = os.path.join(os.getcwd(), "artifacts")
        if not os.path.exists(self.artifacts_dir):
            os.makedirs(self.artifacts_dir)

        # --- THE TRANSLATION DICTIONARY ---
        self.emotion_map = {
            0: 'Anxiety', 1: 'Bipolar', 2: 'Depression', 3: 'Normal', 
            4: 'Personality disorder', 5: 'Stress', 6: 'Suicidal', 7: 'admiration', 
            8: 'amusement', 9: 'anger', 10: 'annoyance', 11: 'approval', 
            12: 'caring', 13: 'confusion', 14: 'curiosity', 15: 'desire', 
            16: 'disappointment', 17: 'disapproval', 18: 'disgust', 19: 'embarrassment', 
            20: 'excitement', 21: 'fear', 22: 'gratitude', 23: 'grief', 
            24: 'joy', 25: 'love', 26: 'nervousness', 27: 'neutral', 
            28: 'optimism', 29: 'pride', 30: 'realization', 31: 'relief', 
            32: 'remorse', 33: 'sadness', 34: 'surprise'
        }

        print(f"Loading Core Brain from Hub: {self.model_id}...")
        
        # --- LOAD THE AI CORE FROM HUB ---
        try:
            self.tokenizer = XLMRobertaTokenizer.from_pretrained(
                self.model_id, 
                subfolder=self.hf_subfolder
            )
            self.model = XLMRobertaForSequenceClassification.from_pretrained(
                self.model_id, 
                subfolder=self.hf_subfolder
            )
            
            # Inject the Dictionary into the Model's Brain
            self.model.config.id2label = self.emotion_map
            self.model.config.label2id = {v: k for k, v in self.emotion_map.items()}

            # Set device: 0 for GPU, -1 for CPU
            self.device = 0 if torch.cuda.is_available() else -1
            
            # Hugging Face pipeline for SHAP integration
            self.classifier = pipeline(
                "text-classification", 
                model=self.model, 
                tokenizer=self.tokenizer, 
                device=self.device, 
                top_k=None # Get scores for all 35 emotions
            )
            
            print("⚙️ Warming up Game Theory Math (SHAP)...")
            self.explainer = shap.Explainer(self.classifier)
            print("✅ SHAP Explainer ready!")
            
        except Exception as e:
            print(f"❌ SHAP Initialization failed: {e}")
            raise e

    def generate_visual_report(self, text):
        """
        Takes raw text, calculates SHAP values, and outputs an interactive HTML file.
        """
        print(f"\n🧠 Analyzing: '{text}'")
        
        # 1. Run the Game Theory calculations
        shap_values = self.explainer([text])
        
        # 2. Define save location
        html_path = os.path.join(self.artifacts_dir, "shap_report.html")
        
        # 3. Targeted Slicing: Find the most confident emotion to visualize
        # shap_values[0].values is (tokens, 35). We sum across tokens to find the top class.
        best_class_index = shap_values[0].values.sum(axis=0).argmax()
        
        # 4. Generate visualization for the winning emotion
        # Note: best_class_index must be passed as the index for the 3rd dimension
        shap_html = shap.plots.text(shap_values[0, :, best_class_index], display=False)
        
        # 5. Save the HTML
        with open(html_path, "w", encoding="utf-8") as f:
            f.write(shap_html)
            
        print(f"✅ Diagnostic Complete! Visual Report saved to: {html_path}")
        return html_path

# --- EXECUTION BLOCK ---
if __name__ == "__main__":
    explainer = MindGuardSHAPExplainer()
    sample_text = "I have a massive presentation tomorrow and my chest is tight."
    explainer.generate_visual_report(sample_text)