MindSync_AI / enhanced_rag_system.py
Gagan0141's picture
Update enhanced_rag_system.py
77e64dd verified
"""
enhanced_rag_system.py
Complete RAG knowledge base that combines JSON files + conversational AI
Optimized for AI Therapist with emotional support
"""
import json
import os
import numpy as np
from sentence_transformers import SentenceTransformer
class EnhancedRAGSystem:
def __init__(self, rag_directory="rag_knowledges"):
self.rag_dir = rag_directory
self.knowledge_base = []
self.embedder = None
self.index = None
# Load all knowledge
self.load_all_knowledge()
self.build_index()
def load_all_knowledge(self):
"""Load all JSON files from rag_knowledges folder"""
if not os.path.exists(self.rag_dir):
print(f"Warning: {self.rag_dir} folder not found!")
return
for file in os.listdir(self.rag_dir):
if file.endswith('.json'):
filepath = os.path.join(self.rag_dir, file)
try:
with open(filepath, 'r', encoding='utf-8') as f:
data = json.load(f)
# Add emotion category from filename
emotion_category = file.replace('.json', '')
for item in data:
self.knowledge_base.append({
'user_input': item.get('user_input', ''),
'bot_response': item.get('bot_response', ''),
'bot_followup': item.get('bot_followup', ''),
'emotion_category': emotion_category,
'combined_response': f"{item.get('bot_response', '')} {item.get('bot_followup', '')}"
})
print(f"✅ Loaded {len(data)} entries from {file}")
except Exception as e:
print(f"❌ Error loading {file}: {e}")
def build_index(self):
"""Build FAISS index for semantic search"""
if not self.knowledge_base:
print("No knowledge base loaded!")
return
try:
import faiss
# Initialize embedder
self.embedder = SentenceTransformer('all-MiniLM-L6-v2')
# Create embeddings for all user inputs
user_inputs = [item['user_input'] for item in self.knowledge_base]
embeddings = self.embedder.encode(user_inputs, convert_to_numpy=True)
# Build FAISS index
dimension = embeddings.shape[1]
self.index = faiss.IndexFlatL2(dimension)
self.index.add(embeddings)
print(f"✅ Built FAISS index with {len(self.knowledge_base)} entries")
except Exception as e:
print(f"❌ Error building index: {e}")
def retrieve_response(self, query, emotion=None, top_k=3):
"""
Retrieve best response from RAG knowledge base
Args:
query: User's question/input
emotion: Detected emotion (optional, for filtering)
top_k: Number of top results to consider
Returns:
dict with response and metadata
"""
if not self.index or not self.embedder:
return None
try:
# Encode query
query_embedding = self.embedder.encode([query], convert_to_numpy=True)
# Search in FAISS index
distances, indices = self.index.search(query_embedding, top_k * 2) # Get more to filter
# Filter by emotion if provided
candidates = []
for dist, idx in zip(distances[0], indices[0]):
if idx < len(self.knowledge_base):
item = self.knowledge_base[idx]
# If emotion matches category, prioritize it
if emotion and emotion.lower() in item['emotion_category'].lower():
candidates.insert(0, {
'distance': dist,
'item': item
})
else:
candidates.append({
'distance': dist,
'item': item
})
# Get best match
if candidates:
best = candidates[0]['item']
return {
'response': best['bot_response'],
'followup': best['bot_followup'],
'combined': best['combined_response'],
'emotion_category': best['emotion_category'],
'distance': float(candidates[0]['distance']),
'confidence': self._calculate_confidence(candidates[0]['distance'])
}
except Exception as e:
print(f"Error retrieving response: {e}")
return None
def _calculate_confidence(self, distance):
"""Calculate confidence score from distance (0-1)"""
# Lower distance = higher confidence
# Typical distances range from 0 to 2
confidence = max(0, min(1, 1 - (distance / 2)))
return confidence
# ==================== INTEGRATION WITH MAIN APP ====================
def get_enhanced_response(user_input, emotion, rag_system):
"""
Main function to get response - tries RAG first, then fallback
Args:
user_input: User's message
emotion: Detected emotion
rag_system: EnhancedRAGSystem instance
Returns:
Chatbot response string
"""
# Try RAG knowledge base first
rag_result = rag_system.retrieve_response(user_input, emotion, top_k=3)
if rag_result and rag_result['confidence'] > 0.6: # Good match
# Use RAG response
return rag_result['combined']
# Fallback to contextual responses (from chatbot_responses.py)
from hf_llm import generate_with_hf
def get_enhanced_response(user_input, emotion, rag_system):
rag_result = rag_system.retrieve_response(user_input, emotion, top_k=3)
if rag_result and rag_result["confidence"] > 0.6:
return rag_result["combined"]
prompt = f"""
You are an empathetic mental health support assistant.
User emotion: {emotion}
User message: {user_input}
Respond calmly, safely, and supportively.
Avoid giving medical diagnoses.
"""
return generate_response(prompt)
# ==================== USAGE EXAMPLE ====================
if __name__ == "__main__":
# Initialize RAG system
rag = EnhancedRAGSystem(rag_directory="rag_knowledges")
# Test queries
test_queries = [
("I passed my exam today!", "joy"),
("I'm feeling really sad and lonely", "sadness"),
("I got promoted at work", "happiness"),
("Hey, what's up?", "neutral"),
("I'm so stressed about my exams", "anxiety"),
("I came from school and got hurt through bus", "sadness")
]
print("\n" + "="*80)
print("TESTING ENHANCED RAG SYSTEM")
print("="*80 + "\n")
for query, emotion in test_queries:
print(f"USER ({emotion}): {query}")
# Get response
response = get_enhanced_response(query, emotion, rag)
print(f"BOT: {response[:200]}...")
print("-" * 80 + "\n")