Gagan0141 commited on
Commit
77e64dd
·
verified ·
1 Parent(s): 3f0c152

Update enhanced_rag_system.py

Browse files
Files changed (1) hide show
  1. enhanced_rag_system.py +213 -213
enhanced_rag_system.py CHANGED
@@ -1,214 +1,214 @@
1
- """
2
- enhanced_rag_system.py
3
- Complete RAG knowledge base that combines JSON files + conversational AI
4
- Optimized for AI Therapist with emotional support
5
- """
6
-
7
- import json
8
- import os
9
- import numpy as np
10
- from sentence_transformers import SentenceTransformer
11
-
12
- class EnhancedRAGSystem:
13
- def __init__(self, rag_directory="rag_knowledges"):
14
- self.rag_dir = rag_directory
15
- self.knowledge_base = []
16
- self.embedder = None
17
- self.index = None
18
-
19
- # Load all knowledge
20
- self.load_all_knowledge()
21
- self.build_index()
22
-
23
- def load_all_knowledge(self):
24
- """Load all JSON files from rag_knowledges folder"""
25
- if not os.path.exists(self.rag_dir):
26
- print(f"Warning: {self.rag_dir} folder not found!")
27
- return
28
-
29
- for file in os.listdir(self.rag_dir):
30
- if file.endswith('.json'):
31
- filepath = os.path.join(self.rag_dir, file)
32
- try:
33
- with open(filepath, 'r', encoding='utf-8') as f:
34
- data = json.load(f)
35
-
36
- # Add emotion category from filename
37
- emotion_category = file.replace('.json', '')
38
-
39
- for item in data:
40
- self.knowledge_base.append({
41
- 'user_input': item.get('user_input', ''),
42
- 'bot_response': item.get('bot_response', ''),
43
- 'bot_followup': item.get('bot_followup', ''),
44
- 'emotion_category': emotion_category,
45
- 'combined_response': f"{item.get('bot_response', '')} {item.get('bot_followup', '')}"
46
- })
47
-
48
- print(f"✅ Loaded {len(data)} entries from {file}")
49
- except Exception as e:
50
- print(f"❌ Error loading {file}: {e}")
51
-
52
- def build_index(self):
53
- """Build FAISS index for semantic search"""
54
- if not self.knowledge_base:
55
- print("No knowledge base loaded!")
56
- return
57
-
58
- try:
59
- import faiss
60
-
61
- # Initialize embedder
62
- self.embedder = SentenceTransformer('all-MiniLM-L6-v2')
63
-
64
- # Create embeddings for all user inputs
65
- user_inputs = [item['user_input'] for item in self.knowledge_base]
66
- embeddings = self.embedder.encode(user_inputs, convert_to_numpy=True)
67
-
68
- # Build FAISS index
69
- dimension = embeddings.shape[1]
70
- self.index = faiss.IndexFlatL2(dimension)
71
- self.index.add(embeddings)
72
-
73
- print(f"✅ Built FAISS index with {len(self.knowledge_base)} entries")
74
- except Exception as e:
75
- print(f"❌ Error building index: {e}")
76
-
77
- def retrieve_response(self, query, emotion=None, top_k=3):
78
- """
79
- Retrieve best response from RAG knowledge base
80
-
81
- Args:
82
- query: User's question/input
83
- emotion: Detected emotion (optional, for filtering)
84
- top_k: Number of top results to consider
85
-
86
- Returns:
87
- dict with response and metadata
88
- """
89
- if not self.index or not self.embedder:
90
- return None
91
-
92
- try:
93
- # Encode query
94
- query_embedding = self.embedder.encode([query], convert_to_numpy=True)
95
-
96
- # Search in FAISS index
97
- distances, indices = self.index.search(query_embedding, top_k * 2) # Get more to filter
98
-
99
- # Filter by emotion if provided
100
- candidates = []
101
- for dist, idx in zip(distances[0], indices[0]):
102
- if idx < len(self.knowledge_base):
103
- item = self.knowledge_base[idx]
104
-
105
- # If emotion matches category, prioritize it
106
- if emotion and emotion.lower() in item['emotion_category'].lower():
107
- candidates.insert(0, {
108
- 'distance': dist,
109
- 'item': item
110
- })
111
- else:
112
- candidates.append({
113
- 'distance': dist,
114
- 'item': item
115
- })
116
-
117
- # Get best match
118
- if candidates:
119
- best = candidates[0]['item']
120
-
121
- return {
122
- 'response': best['bot_response'],
123
- 'followup': best['bot_followup'],
124
- 'combined': best['combined_response'],
125
- 'emotion_category': best['emotion_category'],
126
- 'distance': float(candidates[0]['distance']),
127
- 'confidence': self._calculate_confidence(candidates[0]['distance'])
128
- }
129
-
130
- except Exception as e:
131
- print(f"Error retrieving response: {e}")
132
-
133
- return None
134
-
135
- def _calculate_confidence(self, distance):
136
- """Calculate confidence score from distance (0-1)"""
137
- # Lower distance = higher confidence
138
- # Typical distances range from 0 to 2
139
- confidence = max(0, min(1, 1 - (distance / 2)))
140
- return confidence
141
-
142
-
143
- # ==================== INTEGRATION WITH MAIN APP ====================
144
-
145
- def get_enhanced_response(user_input, emotion, rag_system):
146
- """
147
- Main function to get response - tries RAG first, then fallback
148
-
149
- Args:
150
- user_input: User's message
151
- emotion: Detected emotion
152
- rag_system: EnhancedRAGSystem instance
153
-
154
- Returns:
155
- Chatbot response string
156
- """
157
-
158
- # Try RAG knowledge base first
159
- rag_result = rag_system.retrieve_response(user_input, emotion, top_k=3)
160
-
161
- if rag_result and rag_result['confidence'] > 0.6: # Good match
162
- # Use RAG response
163
- return rag_result['combined']
164
-
165
- # Fallback to contextual responses (from chatbot_responses.py)
166
- from ollama_llm import generate_response
167
-
168
- def get_enhanced_response(user_input, emotion, rag_system):
169
- rag_result = rag_system.retrieve_response(user_input, emotion, top_k=3)
170
-
171
- if rag_result and rag_result["confidence"] > 0.6:
172
- return rag_result["combined"]
173
-
174
- prompt = f"""
175
- You are an empathetic mental health support assistant.
176
- User emotion: {emotion}
177
- User message: {user_input}
178
-
179
- Respond calmly, safely, and supportively.
180
- Avoid giving medical diagnoses.
181
- """
182
-
183
- return generate_response(prompt)
184
-
185
-
186
-
187
- # ==================== USAGE EXAMPLE ====================
188
-
189
- if __name__ == "__main__":
190
- # Initialize RAG system
191
- rag = EnhancedRAGSystem(rag_directory="rag_knowledges")
192
-
193
- # Test queries
194
- test_queries = [
195
- ("I passed my exam today!", "joy"),
196
- ("I'm feeling really sad and lonely", "sadness"),
197
- ("I got promoted at work", "happiness"),
198
- ("Hey, what's up?", "neutral"),
199
- ("I'm so stressed about my exams", "anxiety"),
200
- ("I came from school and got hurt through bus", "sadness")
201
- ]
202
-
203
- print("\n" + "="*80)
204
- print("TESTING ENHANCED RAG SYSTEM")
205
- print("="*80 + "\n")
206
-
207
- for query, emotion in test_queries:
208
- print(f"USER ({emotion}): {query}")
209
-
210
- # Get response
211
- response = get_enhanced_response(query, emotion, rag)
212
-
213
- print(f"BOT: {response[:200]}...")
214
  print("-" * 80 + "\n")
 
1
+ """
2
+ enhanced_rag_system.py
3
+ Complete RAG knowledge base that combines JSON files + conversational AI
4
+ Optimized for AI Therapist with emotional support
5
+ """
6
+
7
+ import json
8
+ import os
9
+ import numpy as np
10
+ from sentence_transformers import SentenceTransformer
11
+
12
+ class EnhancedRAGSystem:
13
+ def __init__(self, rag_directory="rag_knowledges"):
14
+ self.rag_dir = rag_directory
15
+ self.knowledge_base = []
16
+ self.embedder = None
17
+ self.index = None
18
+
19
+ # Load all knowledge
20
+ self.load_all_knowledge()
21
+ self.build_index()
22
+
23
+ def load_all_knowledge(self):
24
+ """Load all JSON files from rag_knowledges folder"""
25
+ if not os.path.exists(self.rag_dir):
26
+ print(f"Warning: {self.rag_dir} folder not found!")
27
+ return
28
+
29
+ for file in os.listdir(self.rag_dir):
30
+ if file.endswith('.json'):
31
+ filepath = os.path.join(self.rag_dir, file)
32
+ try:
33
+ with open(filepath, 'r', encoding='utf-8') as f:
34
+ data = json.load(f)
35
+
36
+ # Add emotion category from filename
37
+ emotion_category = file.replace('.json', '')
38
+
39
+ for item in data:
40
+ self.knowledge_base.append({
41
+ 'user_input': item.get('user_input', ''),
42
+ 'bot_response': item.get('bot_response', ''),
43
+ 'bot_followup': item.get('bot_followup', ''),
44
+ 'emotion_category': emotion_category,
45
+ 'combined_response': f"{item.get('bot_response', '')} {item.get('bot_followup', '')}"
46
+ })
47
+
48
+ print(f"✅ Loaded {len(data)} entries from {file}")
49
+ except Exception as e:
50
+ print(f"❌ Error loading {file}: {e}")
51
+
52
+ def build_index(self):
53
+ """Build FAISS index for semantic search"""
54
+ if not self.knowledge_base:
55
+ print("No knowledge base loaded!")
56
+ return
57
+
58
+ try:
59
+ import faiss
60
+
61
+ # Initialize embedder
62
+ self.embedder = SentenceTransformer('all-MiniLM-L6-v2')
63
+
64
+ # Create embeddings for all user inputs
65
+ user_inputs = [item['user_input'] for item in self.knowledge_base]
66
+ embeddings = self.embedder.encode(user_inputs, convert_to_numpy=True)
67
+
68
+ # Build FAISS index
69
+ dimension = embeddings.shape[1]
70
+ self.index = faiss.IndexFlatL2(dimension)
71
+ self.index.add(embeddings)
72
+
73
+ print(f"✅ Built FAISS index with {len(self.knowledge_base)} entries")
74
+ except Exception as e:
75
+ print(f"❌ Error building index: {e}")
76
+
77
+ def retrieve_response(self, query, emotion=None, top_k=3):
78
+ """
79
+ Retrieve best response from RAG knowledge base
80
+
81
+ Args:
82
+ query: User's question/input
83
+ emotion: Detected emotion (optional, for filtering)
84
+ top_k: Number of top results to consider
85
+
86
+ Returns:
87
+ dict with response and metadata
88
+ """
89
+ if not self.index or not self.embedder:
90
+ return None
91
+
92
+ try:
93
+ # Encode query
94
+ query_embedding = self.embedder.encode([query], convert_to_numpy=True)
95
+
96
+ # Search in FAISS index
97
+ distances, indices = self.index.search(query_embedding, top_k * 2) # Get more to filter
98
+
99
+ # Filter by emotion if provided
100
+ candidates = []
101
+ for dist, idx in zip(distances[0], indices[0]):
102
+ if idx < len(self.knowledge_base):
103
+ item = self.knowledge_base[idx]
104
+
105
+ # If emotion matches category, prioritize it
106
+ if emotion and emotion.lower() in item['emotion_category'].lower():
107
+ candidates.insert(0, {
108
+ 'distance': dist,
109
+ 'item': item
110
+ })
111
+ else:
112
+ candidates.append({
113
+ 'distance': dist,
114
+ 'item': item
115
+ })
116
+
117
+ # Get best match
118
+ if candidates:
119
+ best = candidates[0]['item']
120
+
121
+ return {
122
+ 'response': best['bot_response'],
123
+ 'followup': best['bot_followup'],
124
+ 'combined': best['combined_response'],
125
+ 'emotion_category': best['emotion_category'],
126
+ 'distance': float(candidates[0]['distance']),
127
+ 'confidence': self._calculate_confidence(candidates[0]['distance'])
128
+ }
129
+
130
+ except Exception as e:
131
+ print(f"Error retrieving response: {e}")
132
+
133
+ return None
134
+
135
+ def _calculate_confidence(self, distance):
136
+ """Calculate confidence score from distance (0-1)"""
137
+ # Lower distance = higher confidence
138
+ # Typical distances range from 0 to 2
139
+ confidence = max(0, min(1, 1 - (distance / 2)))
140
+ return confidence
141
+
142
+
143
+ # ==================== INTEGRATION WITH MAIN APP ====================
144
+
145
+ def get_enhanced_response(user_input, emotion, rag_system):
146
+ """
147
+ Main function to get response - tries RAG first, then fallback
148
+
149
+ Args:
150
+ user_input: User's message
151
+ emotion: Detected emotion
152
+ rag_system: EnhancedRAGSystem instance
153
+
154
+ Returns:
155
+ Chatbot response string
156
+ """
157
+
158
+ # Try RAG knowledge base first
159
+ rag_result = rag_system.retrieve_response(user_input, emotion, top_k=3)
160
+
161
+ if rag_result and rag_result['confidence'] > 0.6: # Good match
162
+ # Use RAG response
163
+ return rag_result['combined']
164
+
165
+ # Fallback to contextual responses (from chatbot_responses.py)
166
+ from hf_llm import generate_with_hf
167
+
168
+ def get_enhanced_response(user_input, emotion, rag_system):
169
+ rag_result = rag_system.retrieve_response(user_input, emotion, top_k=3)
170
+
171
+ if rag_result and rag_result["confidence"] > 0.6:
172
+ return rag_result["combined"]
173
+
174
+ prompt = f"""
175
+ You are an empathetic mental health support assistant.
176
+ User emotion: {emotion}
177
+ User message: {user_input}
178
+
179
+ Respond calmly, safely, and supportively.
180
+ Avoid giving medical diagnoses.
181
+ """
182
+
183
+ return generate_response(prompt)
184
+
185
+
186
+
187
+ # ==================== USAGE EXAMPLE ====================
188
+
189
+ if __name__ == "__main__":
190
+ # Initialize RAG system
191
+ rag = EnhancedRAGSystem(rag_directory="rag_knowledges")
192
+
193
+ # Test queries
194
+ test_queries = [
195
+ ("I passed my exam today!", "joy"),
196
+ ("I'm feeling really sad and lonely", "sadness"),
197
+ ("I got promoted at work", "happiness"),
198
+ ("Hey, what's up?", "neutral"),
199
+ ("I'm so stressed about my exams", "anxiety"),
200
+ ("I came from school and got hurt through bus", "sadness")
201
+ ]
202
+
203
+ print("\n" + "="*80)
204
+ print("TESTING ENHANCED RAG SYSTEM")
205
+ print("="*80 + "\n")
206
+
207
+ for query, emotion in test_queries:
208
+ print(f"USER ({emotion}): {query}")
209
+
210
+ # Get response
211
+ response = get_enhanced_response(query, emotion, rag)
212
+
213
+ print(f"BOT: {response[:200]}...")
214
  print("-" * 80 + "\n")