File size: 7,606 Bytes
77e64dd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e7afd1c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
"""
enhanced_rag_system.py
Complete RAG knowledge base that combines JSON files + conversational AI
Optimized for AI Therapist with emotional support
"""

import json
import os
import numpy as np
from sentence_transformers import SentenceTransformer

class EnhancedRAGSystem:
    def __init__(self, rag_directory="rag_knowledges"):
        self.rag_dir = rag_directory
        self.knowledge_base = []
        self.embedder = None
        self.index = None
        
        # Load all knowledge
        self.load_all_knowledge()
        self.build_index()
    
    def load_all_knowledge(self):
        """Load all JSON files from rag_knowledges folder"""
        if not os.path.exists(self.rag_dir):
            print(f"Warning: {self.rag_dir} folder not found!")
            return
        
        for file in os.listdir(self.rag_dir):
            if file.endswith('.json'):
                filepath = os.path.join(self.rag_dir, file)
                try:
                    with open(filepath, 'r', encoding='utf-8') as f:
                        data = json.load(f)
                        
                        # Add emotion category from filename
                        emotion_category = file.replace('.json', '')
                        
                        for item in data:
                            self.knowledge_base.append({
                                'user_input': item.get('user_input', ''),
                                'bot_response': item.get('bot_response', ''),
                                'bot_followup': item.get('bot_followup', ''),
                                'emotion_category': emotion_category,
                                'combined_response': f"{item.get('bot_response', '')} {item.get('bot_followup', '')}"
                            })
                    
                    print(f"✅ Loaded {len(data)} entries from {file}")
                except Exception as e:
                    print(f"❌ Error loading {file}: {e}")
    
    def build_index(self):
        """Build FAISS index for semantic search"""
        if not self.knowledge_base:
            print("No knowledge base loaded!")
            return
        
        try:
            import faiss
            
            # Initialize embedder
            self.embedder = SentenceTransformer('all-MiniLM-L6-v2')
            
            # Create embeddings for all user inputs
            user_inputs = [item['user_input'] for item in self.knowledge_base]
            embeddings = self.embedder.encode(user_inputs, convert_to_numpy=True)
            
            # Build FAISS index
            dimension = embeddings.shape[1]
            self.index = faiss.IndexFlatL2(dimension)
            self.index.add(embeddings)
            
            print(f"✅ Built FAISS index with {len(self.knowledge_base)} entries")
        except Exception as e:
            print(f"❌ Error building index: {e}")
    
    def retrieve_response(self, query, emotion=None, top_k=3):
        """
        Retrieve best response from RAG knowledge base
        
        Args:
            query: User's question/input
            emotion: Detected emotion (optional, for filtering)
            top_k: Number of top results to consider
        
        Returns:
            dict with response and metadata
        """
        if not self.index or not self.embedder:
            return None
        
        try:
            # Encode query
            query_embedding = self.embedder.encode([query], convert_to_numpy=True)
            
            # Search in FAISS index
            distances, indices = self.index.search(query_embedding, top_k * 2)  # Get more to filter
            
            # Filter by emotion if provided
            candidates = []
            for dist, idx in zip(distances[0], indices[0]):
                if idx < len(self.knowledge_base):
                    item = self.knowledge_base[idx]
                    
                    # If emotion matches category, prioritize it
                    if emotion and emotion.lower() in item['emotion_category'].lower():
                        candidates.insert(0, {
                            'distance': dist,
                            'item': item
                        })
                    else:
                        candidates.append({
                            'distance': dist,
                            'item': item
                        })
            
            # Get best match
            if candidates:
                best = candidates[0]['item']
                
                return {
                    'response': best['bot_response'],
                    'followup': best['bot_followup'],
                    'combined': best['combined_response'],
                    'emotion_category': best['emotion_category'],
                    'distance': float(candidates[0]['distance']),
                    'confidence': self._calculate_confidence(candidates[0]['distance'])
                }
            
        except Exception as e:
            print(f"Error retrieving response: {e}")
        
        return None
    
    def _calculate_confidence(self, distance):
        """Calculate confidence score from distance (0-1)"""
        # Lower distance = higher confidence
        # Typical distances range from 0 to 2
        confidence = max(0, min(1, 1 - (distance / 2)))
        return confidence


# ==================== INTEGRATION WITH MAIN APP ====================

def get_enhanced_response(user_input, emotion, rag_system):
    """
    Main function to get response - tries RAG first, then fallback
    
    Args:
        user_input: User's message
        emotion: Detected emotion
        rag_system: EnhancedRAGSystem instance
    
    Returns:
        Chatbot response string
    """
    
    # Try RAG knowledge base first
    rag_result = rag_system.retrieve_response(user_input, emotion, top_k=3)
    
    if rag_result and rag_result['confidence'] > 0.6:  # Good match
        # Use RAG response
        return rag_result['combined']
    
    # Fallback to contextual responses (from chatbot_responses.py)
    from hf_llm import generate_with_hf

    def get_enhanced_response(user_input, emotion, rag_system):
        rag_result = rag_system.retrieve_response(user_input, emotion, top_k=3)

        if rag_result and rag_result["confidence"] > 0.6:
            return rag_result["combined"]

        prompt = f"""
    You are an empathetic mental health support assistant.
    User emotion: {emotion}
    User message: {user_input}

    Respond calmly, safely, and supportively.
    Avoid giving medical diagnoses.
    """

        return generate_response(prompt)



# ==================== USAGE EXAMPLE ====================

if __name__ == "__main__":
    # Initialize RAG system
    rag = EnhancedRAGSystem(rag_directory="rag_knowledges")
    
    # Test queries
    test_queries = [
        ("I passed my exam today!", "joy"),
        ("I'm feeling really sad and lonely", "sadness"),
        ("I got promoted at work", "happiness"),
        ("Hey, what's up?", "neutral"),
        ("I'm so stressed about my exams", "anxiety"),
        ("I came from school and got hurt through bus", "sadness")
    ]
    
    print("\n" + "="*80)
    print("TESTING ENHANCED RAG SYSTEM")
    print("="*80 + "\n")
    
    for query, emotion in test_queries:
        print(f"USER ({emotion}): {query}")
        
        # Get response
        response = get_enhanced_response(query, emotion, rag)
        
        print(f"BOT: {response[:200]}...")
        print("-" * 80 + "\n")