Spaces:

MohitRajput45
/

MindGuard-AI

Running

App Files Files Community

MindGuard-AI / src /explainability /shap_explainer.py

MohitRajput45

Update src/explainability/shap_explainer.py

9bb8a57 verified 22 days ago

raw

history blame contribute delete

4.72 kB

	# src/explainability/shap_explainer.py

	import os
	import torch
	import shap
	from transformers import pipeline, XLMRobertaTokenizer, XLMRobertaForSequenceClassification

	class MindGuardSHAPExplainer:
	"""
	This class handles the Explainable AI (XAI) component of MindGuard.
	It uses Game Theory (SHAP values) to mathematically prove exactly
	which words caused the neural network to predict a specific emotion.
	"""
	def __init__(self):
	print("🔍 Initializing MindGuard SHAP Explainability Engine...")

	# --- HUB ARCHITECTURE PATHING ---
	# 1. Point to your Model Hub ID (NOT a local path)
	self.model_id = "MohitRajput45/mindguard-xlmr"

	# 2. Use the exact path to where the weights live inside that Hub repo
	self.hf_subfolder = "artifacts/xlmr_weights/final_mindguard_model"

	# 3. Define where the visual HTML reports will be saved (relative to root)
	# On Hugging Face, /app is the root.
	self.artifacts_dir = os.path.join(os.getcwd(), "artifacts")
	if not os.path.exists(self.artifacts_dir):
	os.makedirs(self.artifacts_dir)

	# --- THE TRANSLATION DICTIONARY ---
	self.emotion_map = {
	0: 'Anxiety', 1: 'Bipolar', 2: 'Depression', 3: 'Normal',
	4: 'Personality disorder', 5: 'Stress', 6: 'Suicidal', 7: 'admiration',
	8: 'amusement', 9: 'anger', 10: 'annoyance', 11: 'approval',
	12: 'caring', 13: 'confusion', 14: 'curiosity', 15: 'desire',
	16: 'disappointment', 17: 'disapproval', 18: 'disgust', 19: 'embarrassment',
	20: 'excitement', 21: 'fear', 22: 'gratitude', 23: 'grief',
	24: 'joy', 25: 'love', 26: 'nervousness', 27: 'neutral',
	28: 'optimism', 29: 'pride', 30: 'realization', 31: 'relief',
	32: 'remorse', 33: 'sadness', 34: 'surprise'
	}

	print(f"Loading Core Brain from Hub: {self.model_id}...")

	# --- LOAD THE AI CORE FROM HUB ---
	try:
	self.tokenizer = XLMRobertaTokenizer.from_pretrained(
	self.model_id,
	subfolder=self.hf_subfolder
	)
	self.model = XLMRobertaForSequenceClassification.from_pretrained(
	self.model_id,
	subfolder=self.hf_subfolder
	)

	# Inject the Dictionary into the Model's Brain
	self.model.config.id2label = self.emotion_map
	self.model.config.label2id = {v: k for k, v in self.emotion_map.items()}

	# Set device: 0 for GPU, -1 for CPU
	self.device = 0 if torch.cuda.is_available() else -1

	# Hugging Face pipeline for SHAP integration
	self.classifier = pipeline(
	"text-classification",
	model=self.model,
	tokenizer=self.tokenizer,
	device=self.device,
	top_k=None # Get scores for all 35 emotions
	)

	print("⚙️ Warming up Game Theory Math (SHAP)...")
	self.explainer = shap.Explainer(self.classifier)
	print("✅ SHAP Explainer ready!")

	except Exception as e:
	print(f"❌ SHAP Initialization failed: {e}")
	raise e

	def generate_visual_report(self, text):
	"""
	Takes raw text, calculates SHAP values, and outputs an interactive HTML file.
	"""
	print(f"\n🧠 Analyzing: '{text}'")

	# 1. Run the Game Theory calculations
	shap_values = self.explainer([text])

	# 2. Define save location
	html_path = os.path.join(self.artifacts_dir, "shap_report.html")

	# 3. Targeted Slicing: Find the most confident emotion to visualize
	# shap_values[0].values is (tokens, 35). We sum across tokens to find the top class.
	best_class_index = shap_values[0].values.sum(axis=0).argmax()

	# 4. Generate visualization for the winning emotion
	# Note: best_class_index must be passed as the index for the 3rd dimension
	shap_html = shap.plots.text(shap_values[0, :, best_class_index], display=False)

	# 5. Save the HTML
	with open(html_path, "w", encoding="utf-8") as f:
	f.write(shap_html)

	print(f"✅ Diagnostic Complete! Visual Report saved to: {html_path}")
	return html_path

	# --- EXECUTION BLOCK ---
	if __name__ == "__main__":
	explainer = MindGuardSHAPExplainer()
	sample_text = "I have a massive presentation tomorrow and my chest is tight."
	explainer.generate_visual_report(sample_text)