Spaces:

Gagan0141
/

MindSync_AI

Sleeping

App Files Files Community

MindSync_AI / enhanced_rag_system.py

Gagan0141

Update enhanced_rag_system.py

77e64dd verified about 1 month ago

raw

history blame contribute delete

7.61 kB

	"""
	enhanced_rag_system.py
	Complete RAG knowledge base that combines JSON files + conversational AI
	Optimized for AI Therapist with emotional support
	"""

	import json
	import os
	import numpy as np
	from sentence_transformers import SentenceTransformer

	class EnhancedRAGSystem:
	def __init__(self, rag_directory="rag_knowledges"):
	self.rag_dir = rag_directory
	self.knowledge_base = []
	self.embedder = None
	self.index = None

	# Load all knowledge
	self.load_all_knowledge()
	self.build_index()

	def load_all_knowledge(self):
	"""Load all JSON files from rag_knowledges folder"""
	if not os.path.exists(self.rag_dir):
	print(f"Warning: {self.rag_dir} folder not found!")
	return

	for file in os.listdir(self.rag_dir):
	if file.endswith('.json'):
	filepath = os.path.join(self.rag_dir, file)
	try:
	with open(filepath, 'r', encoding='utf-8') as f:
	data = json.load(f)

	# Add emotion category from filename
	emotion_category = file.replace('.json', '')

	for item in data:
	self.knowledge_base.append({
	'user_input': item.get('user_input', ''),
	'bot_response': item.get('bot_response', ''),
	'bot_followup': item.get('bot_followup', ''),
	'emotion_category': emotion_category,
	'combined_response': f"{item.get('bot_response', '')} {item.get('bot_followup', '')}"
	})

	print(f"✅ Loaded {len(data)} entries from {file}")
	except Exception as e:
	print(f"❌ Error loading {file}: {e}")

	def build_index(self):
	"""Build FAISS index for semantic search"""
	if not self.knowledge_base:
	print("No knowledge base loaded!")
	return

	try:
	import faiss

	# Initialize embedder
	self.embedder = SentenceTransformer('all-MiniLM-L6-v2')

	# Create embeddings for all user inputs
	user_inputs = [item['user_input'] for item in self.knowledge_base]
	embeddings = self.embedder.encode(user_inputs, convert_to_numpy=True)

	# Build FAISS index
	dimension = embeddings.shape[1]
	self.index = faiss.IndexFlatL2(dimension)
	self.index.add(embeddings)

	print(f"✅ Built FAISS index with {len(self.knowledge_base)} entries")
	except Exception as e:
	print(f"❌ Error building index: {e}")

	def retrieve_response(self, query, emotion=None, top_k=3):
	"""
	Retrieve best response from RAG knowledge base

	Args:
	query: User's question/input
	emotion: Detected emotion (optional, for filtering)
	top_k: Number of top results to consider

	Returns:
	dict with response and metadata
	"""
	if not self.index or not self.embedder:
	return None

	try:
	# Encode query
	query_embedding = self.embedder.encode([query], convert_to_numpy=True)

	# Search in FAISS index
	distances, indices = self.index.search(query_embedding, top_k * 2) # Get more to filter

	# Filter by emotion if provided
	candidates = []
	for dist, idx in zip(distances[0], indices[0]):
	if idx < len(self.knowledge_base):
	item = self.knowledge_base[idx]

	# If emotion matches category, prioritize it
	if emotion and emotion.lower() in item['emotion_category'].lower():
	candidates.insert(0, {
	'distance': dist,
	'item': item
	})
	else:
	candidates.append({
	'distance': dist,
	'item': item
	})

	# Get best match
	if candidates:
	best = candidates[0]['item']

	return {
	'response': best['bot_response'],
	'followup': best['bot_followup'],
	'combined': best['combined_response'],
	'emotion_category': best['emotion_category'],
	'distance': float(candidates[0]['distance']),
	'confidence': self._calculate_confidence(candidates[0]['distance'])
	}

	except Exception as e:
	print(f"Error retrieving response: {e}")

	return None

	def _calculate_confidence(self, distance):
	"""Calculate confidence score from distance (0-1)"""
	# Lower distance = higher confidence
	# Typical distances range from 0 to 2
	confidence = max(0, min(1, 1 - (distance / 2)))
	return confidence


	# ==================== INTEGRATION WITH MAIN APP ====================

	def get_enhanced_response(user_input, emotion, rag_system):
	"""
	Main function to get response - tries RAG first, then fallback

	Args:
	user_input: User's message
	emotion: Detected emotion
	rag_system: EnhancedRAGSystem instance

	Returns:
	Chatbot response string
	"""

	# Try RAG knowledge base first
	rag_result = rag_system.retrieve_response(user_input, emotion, top_k=3)

	if rag_result and rag_result['confidence'] > 0.6: # Good match
	# Use RAG response
	return rag_result['combined']

	# Fallback to contextual responses (from chatbot_responses.py)
	from hf_llm import generate_with_hf

	def get_enhanced_response(user_input, emotion, rag_system):
	rag_result = rag_system.retrieve_response(user_input, emotion, top_k=3)

	if rag_result and rag_result["confidence"] > 0.6:
	return rag_result["combined"]

	prompt = f"""
	You are an empathetic mental health support assistant.
	User emotion: {emotion}
	User message: {user_input}

	Respond calmly, safely, and supportively.
	Avoid giving medical diagnoses.
	"""

	return generate_response(prompt)



	# ==================== USAGE EXAMPLE ====================

	if __name__ == "__main__":
	# Initialize RAG system
	rag = EnhancedRAGSystem(rag_directory="rag_knowledges")

	# Test queries
	test_queries = [
	("I passed my exam today!", "joy"),
	("I'm feeling really sad and lonely", "sadness"),
	("I got promoted at work", "happiness"),
	("Hey, what's up?", "neutral"),
	("I'm so stressed about my exams", "anxiety"),
	("I came from school and got hurt through bus", "sadness")
	]

	print("\n" + "="*80)
	print("TESTING ENHANCED RAG SYSTEM")
	print("="*80 + "\n")

	for query, emotion in test_queries:
	print(f"USER ({emotion}): {query}")

	# Get response
	response = get_enhanced_response(query, emotion, rag)

	print(f"BOT: {response[:200]}...")
	print("-" * 80 + "\n")