Sebunya commited on
Commit
ddbf2de
·
verified ·
1 Parent(s): e1eb7f3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +381 -212
app.py CHANGED
@@ -1,12 +1,13 @@
1
  import uuid
2
  import os
3
- import shutil
4
  import gradio as gr
5
  import pandas as pd
 
 
 
 
6
  import chromadb
7
  from langchain_chroma import Chroma
8
- from langchain_google_genai import GoogleGenerativeAIEmbeddings # <--- NEW LIBRARY
9
- import google.generativeai as genai
10
  import gspread
11
  from google.oauth2.service_account import Credentials
12
  from langgraph.checkpoint.sqlite import SqliteSaver
@@ -14,9 +15,10 @@ import sqlite3
14
  import json
15
  from datetime import datetime
16
  import re
17
- from typing import Tuple
18
  import time
19
  from contextlib import contextmanager
 
20
  import logging
21
  import traceback
22
  import sys
@@ -34,6 +36,7 @@ def log_exception(exc_type, exc_value, exc_traceback):
34
  logging.critical("Uncaught exception", exc_info=(exc_type, exc_value, exc_traceback))
35
 
36
  sys.excepthook = log_exception
 
37
 
38
  # ===== Time Tracking Class =====
39
  class PipelineTimer:
@@ -41,295 +44,461 @@ class PipelineTimer:
41
  self.reset()
42
 
43
  def reset(self):
 
44
  self.start_time = time.time()
45
  self.step_times = {}
 
46
  self.current_step = None
47
 
48
  @contextmanager
49
  def time_step(self, step_name: str):
 
50
  step_start = time.time()
51
  self.current_step = step_name
52
  try:
53
  yield
54
  finally:
55
  step_end = time.time()
56
- self.step_times[step_name] = round((step_end - step_start) * 1000, 2)
57
  self.current_step = None
58
 
59
  def get_total_time(self):
 
60
  return round((time.time() - self.start_time) * 1000, 2)
61
 
62
  def get_timing_summary(self):
 
 
63
  return {
64
- 'total_time_ms': self.get_total_time(),
65
- 'step_times': self.step_times
 
66
  }
67
 
 
68
  timer = PipelineTimer()
69
 
70
  # === Configuration ===
71
- api_key = os.environ.get("GEMINI_API_KEY")
72
- if not api_key:
73
- raise ValueError("GEMINI_API_KEY not found in environment variables")
74
-
75
- genai.configure(api_key=api_key)
76
  llm_model_name = "models/gemma-3-4b-it"
77
  collection_name = "xeno_collection"
78
- persist_directory = "/tmp/xeno_db"
79
 
80
  # === Google Sheets Setup ===
81
- sheets_available = False
82
- response_sheet = None
83
- timing_sheet = None
 
 
 
 
 
84
 
85
- def setup_google_sheets():
86
- global sheets_available, response_sheet, timing_sheet
87
- try:
88
- credentials_json = os.environ.get("GOOGLE_SHEETS_CREDENTIALS")
89
- if not credentials_json:
90
- if os.path.exists("credentials.json"):
91
- creds = Credentials.from_service_account_file("credentials.json", scopes=["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"])
92
- else:
93
- return
94
- else:
95
- creds = Credentials.from_service_account_info(json.loads(credentials_json), scopes=["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"])
96
-
97
- client_gspread = gspread.authorize(creds)
98
- spreadsheet = client_gspread.open("Response_Log")
99
- response_sheet = spreadsheet.sheet1
100
- try:
101
- timing_sheet = spreadsheet.worksheet("Timing_Log")
102
- except:
103
- timing_sheet = spreadsheet.add_worksheet(title="Timing_Log", rows="1000", cols="15")
104
- timing_sheet.append_row(["Timestamp", "Session_ID", "Question", "Total_Time_MS", "Error", "Notes"])
105
- sheets_available = True
106
- print("Google Sheets connected.")
107
- except Exception as e:
108
- print(f"Google Sheets logging disabled: {e}")
109
 
110
- setup_google_sheets()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
- def log_response(question, answer, source_ids, session_id):
113
- if not sheets_available: return
 
 
 
 
 
 
 
 
 
114
  try:
115
- timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
116
- # Truncate content to prevent API limits
117
- response_sheet.append_row([timestamp, session_id, question[:500], answer[:1000], source_ids])
118
  except Exception as e:
119
- print(f"Sheet Log Error: {e}")
 
 
120
 
121
  def log_timing_data(question, session_id, timing_summary, error_step=None, notes=None):
122
- if not sheets_available: return
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  try:
124
- timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
125
- # FIX: Truncate notes to prevent 50,000 char limit crash
126
- safe_notes = str(notes)[:2000] if notes else ""
127
- row = [
128
- timestamp, session_id, question[:100],
129
- timing_summary['total_time_ms'],
130
- error_step or "",
131
- safe_notes
132
- ]
133
  timing_sheet.append_row(row)
 
134
  except Exception as e:
135
- print(f"Timing Log Error: {e}")
 
 
 
136
 
137
- # === LangGraph Memory ===
138
  conn = sqlite3.connect("xeno_memory.db", check_same_thread=False)
139
  memory = SqliteSaver(conn=conn)
140
 
141
  def update_memory(config, user_message, assistant_message):
142
- full_checkpoint = memory.get(config) or {}
143
- messages = full_checkpoint.get("channel_values", {}).get("messages", [])
144
- messages.append({"role": "user", "content": user_message})
145
- messages.append({"role": "assistant", "content": assistant_message})
146
- checkpoint = {
147
- "v": 1, "id": str(uuid.uuid4()), "ts": datetime.now().isoformat(),
148
- "channel_values": {"messages": messages}, "channel_versions": {}, "versions_seen": {}
149
- }
150
- memory.put(config, checkpoint, {}, {})
 
 
 
 
 
 
 
 
 
151
 
152
  def retrieve_memory(config):
153
- full_checkpoint = memory.get(config) or {}
154
- return full_checkpoint.get("channel_values", {}).get("messages", [])
 
 
155
 
156
- # === Intent Classification ===
157
  class IntentClassifier:
158
  def __init__(self):
159
  self.intent_patterns = {
160
  'greeting': {
161
- 'patterns': [r'\b(hi|hello|hey|good morning)\b'],
162
- 'responses': ["Hello! How can I help you with XENO today?"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  }
164
  }
165
 
166
  def classify_intent(self, message: str) -> Tuple[str, str]:
167
- # FIX: If message is > 5 words, assume it's a query even if it says "Hello"
168
- if len(message.split()) > 5:
169
- return 'query', ''
170
-
171
- for intent_name, data in self.intent_patterns.items():
172
- for pattern in data['patterns']:
173
- if re.search(pattern, message.lower()):
174
- return intent_name, data['responses'][0]
 
 
175
  return 'query', ''
 
 
 
 
176
 
177
  intent_classifier = IntentClassifier()
178
 
179
- # === Knowledge Base & ChromaDB ===
180
- # FIX: Use Official LangChain Google Embeddings
181
- embeddings = GoogleGenerativeAIEmbeddings(
182
- model="models/embedding-001",
183
- google_api_key=api_key,
184
- task_type="retrieval_document"
185
- )
186
 
187
- # Load JSON Data
188
- try:
189
- df_kb = pd.read_json("XENO_Uganda_KnowledgeBase_Advisory.json")
190
- df_kb.dropna(subset=['Content'], inplace=True)
191
-
192
  documents, metadatas, ids = [], [], []
193
- for item in df_kb.to_dict('records'):
194
  documents.append(f"Question: {item['Question']}\nAnswer: {item['Content']}")
195
- metadatas.append({"question": item["Question"], "content": item["Content"], "id": str(item["ID"])})
196
- ids.append(str(item["ID"]))
197
- except Exception as e:
198
- print(f"Error loading JSON: {e}")
199
- documents, metadatas, ids = [], [], []
 
 
 
 
 
 
 
 
 
200
 
201
- # Setup Chroma
202
- # Reset DB if it exists to prevent format conflicts from previous run
203
- if os.path.exists(persist_directory):
204
  try:
205
- # We try to load it. If it fails, we delete and recreate.
206
- client = chromadb.PersistentClient(path=persist_directory)
207
  collection = client.get_collection(name=collection_name)
 
208
  except:
209
- print("Database corrupted or format mismatch. Rebuilding...")
210
- shutil.rmtree(persist_directory, ignore_errors=True)
211
- client = chromadb.PersistentClient(path=persist_directory)
212
  collection = client.create_collection(name=collection_name)
213
- if documents:
214
- # Batch add to avoid limits
215
- batch_size = 100
216
- for i in range(0, len(documents), batch_size):
217
- collection.add(
218
- documents=documents[i:i+batch_size],
219
- metadatas=metadatas[i:i+batch_size],
220
- ids=ids[i:i+batch_size]
221
- )
222
- else:
223
- client = chromadb.PersistentClient(path=persist_directory)
224
- collection = client.get_or_create_collection(name=collection_name)
225
- if collection.count() == 0 and documents:
226
- print("Populating new database...")
227
- batch_size = 100
228
- for i in range(0, len(documents), batch_size):
229
- collection.add(
230
- documents=documents[i:i+batch_size],
231
- metadatas=metadatas[i:i+batch_size],
232
- ids=ids[i:i+batch_size]
233
- )
234
 
235
- # LangChain Wrapper
236
- vector_store = Chroma(
237
- client=client,
238
- collection_name=collection_name,
239
- embedding_function=embeddings # Using the official wrapper now
240
- )
241
 
242
- # === LLM Response ===
243
- def generate_response(context, question, history):
244
- model = genai.GenerativeModel(llm_model_name)
245
- history_text = "\n".join([f"{m['role']}: {m['content']}" for m in history[-5:]])
246
-
247
- prompt = f"""You are XENO Support. Answer based on Context.
248
-
249
- CONTEXT:
250
- {context}
251
-
252
- HISTORY:
253
- {history_text}
254
-
255
- USER: {question}
256
- """
257
-
258
- try:
259
- return model.generate_content(prompt).text.strip()
260
- except Exception as e:
261
- return "I'm having trouble connecting to the server. Please try again."
 
 
 
 
 
 
 
 
262
 
263
- # === Main Logic ===
264
- def process_message(message, history, session_id):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
  timer.reset()
266
- config = {"configurable": {"thread_id": str(session_id)}}
267
  notes = []
268
 
269
  try:
270
- # 1. Intent
271
- intent, response = intent_classifier.classify_intent(message)
 
 
 
 
 
 
 
 
 
 
 
272
  if intent != 'query':
273
- return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
 
275
- # 2. RAG
276
- with timer.time_step("rag_search"):
277
- # Use k=4 for better breadth
278
- results = vector_store.similarity_search_with_score(message, k=4)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
 
280
- # 3. Context Builder
281
- context_parts = []
282
- valid_docs = 0
283
- source_ids = []
284
 
285
- for doc, score in results:
286
- # Chroma score: Lower is closer distance (better)
287
- # Typically 0.0 to 1.5 range for these embeddings
288
- if score < 1.2: # Threshold
289
- context_parts.append(f"Q: {doc.metadata['question']}\nA: {doc.metadata['content']}")
290
- source_ids.append(doc.metadata['id'])
291
- valid_docs += 1
 
 
292
 
293
- if valid_docs == 0:
294
- context = "No relevant context found."
295
- notes.append("No relevant docs")
296
- else:
297
- context = "\n---\n".join(context_parts)
298
-
299
- # 4. Generate
300
- history_msgs = retrieve_memory(config)
301
- with timer.time_step("llm_gen"):
302
- answer = generate_response(context, message, history_msgs)
303
 
304
- # 5. Save & Log
305
- update_memory(config, message, answer)
306
- log_response(message, answer, str(source_ids), session_id)
307
- log_timing_data(message, session_id, timer.get_timing_summary(), notes=";".join(notes))
308
 
309
- return answer
 
 
 
 
 
 
 
 
 
 
310
 
311
- except Exception as e:
312
- # Log the truncated error to avoid Sheets crash
313
- err_msg = str(e)[:500]
314
- print(f"Error: {e}")
315
- traceback.print_exc()
316
- log_timing_data(message, session_id, timer.get_timing_summary(), error_step="Pipeline", notes=err_msg)
317
- return "I apologize, I'm encountering a technical issue. Please contact support directly."
318
-
319
- # === UI ===
320
- def chat_wrapper(msg, hist, sess_id):
321
- if not sess_id: sess_id = str(uuid.uuid4())
322
- resp = process_message(msg, hist, sess_id)
323
- hist.append((msg, resp))
324
- return "", hist
325
-
326
- with gr.Blocks(theme=gr.themes.Soft()) as demo:
327
- gr.Markdown("# XENO Assistant")
328
- sess = gr.Textbox(value=lambda: str(uuid.uuid4()), visible=False)
329
- chat = gr.Chatbot(height=500)
330
- inp = gr.Textbox(placeholder="Ask a question...")
331
 
332
- inp.submit(chat_wrapper, [inp, chat, sess], [inp, chat])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
333
 
334
  if __name__ == "__main__":
335
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
1
  import uuid
2
  import os
 
3
  import gradio as gr
4
  import pandas as pd
5
+ import torch
6
+ import numpy as np
7
+ from sentence_transformers import util
8
+ import google.generativeai as genai
9
  import chromadb
10
  from langchain_chroma import Chroma
 
 
11
  import gspread
12
  from google.oauth2.service_account import Credentials
13
  from langgraph.checkpoint.sqlite import SqliteSaver
 
15
  import json
16
  from datetime import datetime
17
  import re
18
+ from typing import Dict, List, Tuple
19
  import time
20
  from contextlib import contextmanager
21
+
22
  import logging
23
  import traceback
24
  import sys
 
36
  logging.critical("Uncaught exception", exc_info=(exc_type, exc_value, exc_traceback))
37
 
38
  sys.excepthook = log_exception
39
+ logging.info("App started successfully.")
40
 
41
  # ===== Time Tracking Class =====
42
  class PipelineTimer:
 
44
  self.reset()
45
 
46
  def reset(self):
47
+ """Reset all timing data for a new request"""
48
  self.start_time = time.time()
49
  self.step_times = {}
50
+ self.step_start = None
51
  self.current_step = None
52
 
53
  @contextmanager
54
  def time_step(self, step_name: str):
55
+ """Context manager to time a specific step"""
56
  step_start = time.time()
57
  self.current_step = step_name
58
  try:
59
  yield
60
  finally:
61
  step_end = time.time()
62
+ self.step_times[step_name] = round((step_end - step_start) * 1000, 2) # Convert to milliseconds
63
  self.current_step = None
64
 
65
  def get_total_time(self):
66
+ """Get total elapsed time since reset"""
67
  return round((time.time() - self.start_time) * 1000, 2)
68
 
69
  def get_timing_summary(self):
70
+ """Get a summary of all timing data"""
71
+ total_time = self.get_total_time()
72
  return {
73
+ 'total_time_ms': total_time,
74
+ 'step_times': self.step_times,
75
+ 'timestamp': datetime.now().isoformat()
76
  }
77
 
78
+ # Initialize global timer
79
  timer = PipelineTimer()
80
 
81
  # === Configuration ===
82
+ genai.configure(api_key=os.environ["GEMINI_API_KEY"])
83
+ embedding_model = "models/embedding-001"
 
 
 
84
  llm_model_name = "models/gemma-3-4b-it"
85
  collection_name = "xeno_collection"
 
86
 
87
  # === Google Sheets Setup ===
88
+ def get_google_sheets_credentials():
89
+ credentials_json = os.environ.get("GOOGLE_SHEETS_CREDENTIALS")
90
+ if not credentials_json:
91
+ raise ValueError("GOOGLE_SHEETS_CREDENTIALS environment variable not set.")
92
+ credentials_dict = json.loads(credentials_json)
93
+ scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
94
+ creds = Credentials.from_service_account_info(credentials_dict, scopes=scope)
95
+ return creds
96
 
97
+ client_gspread = gspread.authorize(get_google_sheets_credentials())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
+ # Open the Google Sheet and get both sheets
100
+ spreadsheet = client_gspread.open("Response_Log")
101
+ response_sheet = spreadsheet.sheet1 # Main response log
102
+ try:
103
+ timing_sheet = spreadsheet.worksheet("Timing_Log")
104
+ except:
105
+ # Create timing sheet if it doesn't exist
106
+ timing_sheet = spreadsheet.add_worksheet(title="Timing_Log", rows="1000", cols="15")
107
+ # Add headers
108
+ headers = [
109
+ "Timestamp", "Session_ID", "Question", "Total_Time_MS",
110
+ "Intent_Classification_MS", "Memory_Retrieval_MS", "RAG_Retrieval_MS",
111
+ "Embedding_Generation_MS", "Similarity_Calculation_MS", "Context_Processing_MS",
112
+ "LLM_Generation_MS", "Memory_Update_MS", "Logging_MS", "Error_Step", "Notes"
113
+ ]
114
+ timing_sheet.append_row(headers)
115
 
116
+ def log_response(question, answer, source_ids, knowledge_pairs, session_id):
117
+ """Original response logging function"""
118
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
119
+ knowledge_question_1 = knowledge_pairs[0][0] if len(knowledge_pairs) > 0 else "N/A"
120
+ knowledge_answer_1 = knowledge_pairs[0][1] if len(knowledge_pairs) > 0 else "N/A"
121
+ knowledge_question_2 = knowledge_pairs[1][0] if len(knowledge_pairs) > 1 else "N/A"
122
+ knowledge_answer_2 = knowledge_pairs[1][1] if len(knowledge_pairs) > 1 else "N/A"
123
+ row = [
124
+ timestamp, session_id, question, answer, source_ids,
125
+ knowledge_question_1, knowledge_answer_1, knowledge_question_2, knowledge_answer_2
126
+ ]
127
  try:
128
+ response_sheet.append_row(row)
129
+ print(f"Logged response: {question} | Source IDs: {source_ids}")
 
130
  except Exception as e:
131
+ print(f"Failed to log to Google Sheet: {e}")
132
+ with open("/tmp/response_log.txt", "a") as f:
133
+ f.write(f"{timestamp},{question},{answer},{source_ids},{knowledge_question_1},{knowledge_answer_1},{knowledge_question_2},{knowledge_answer_2}\n")
134
 
135
  def log_timing_data(question, session_id, timing_summary, error_step=None, notes=None):
136
+ """Log timing data to the timing sheet"""
137
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
138
+ step_times = timing_summary['step_times']
139
+
140
+ row = [
141
+ timestamp,
142
+ session_id,
143
+ question[:100] + "..." if len(question) > 100 else question, # Truncate long questions
144
+ timing_summary['total_time_ms'],
145
+ step_times.get('intent_classification', 0),
146
+ step_times.get('memory_retrieval', 0),
147
+ step_times.get('rag_retrieval', 0),
148
+ step_times.get('embedding_generation', 0),
149
+ step_times.get('similarity_calculation', 0),
150
+ step_times.get('context_processing', 0),
151
+ step_times.get('llm_generation', 0),
152
+ step_times.get('memory_update', 0),
153
+ step_times.get('response_logging', 0),
154
+ error_step or "",
155
+ notes or ""
156
+ ]
157
+
158
  try:
 
 
 
 
 
 
 
 
 
159
  timing_sheet.append_row(row)
160
+ print(f"Logged timing data: Total {timing_summary['total_time_ms']}ms")
161
  except Exception as e:
162
+ print(f"Failed to log timing data: {e}")
163
+ # Fallback to local file
164
+ with open("/tmp/timing_log.txt", "a") as f:
165
+ f.write(f"{timestamp},{session_id},{question},{timing_summary}\n")
166
 
167
+ # === LangGraph Memory Setup ===
168
  conn = sqlite3.connect("xeno_memory.db", check_same_thread=False)
169
  memory = SqliteSaver(conn=conn)
170
 
171
  def update_memory(config, user_message, assistant_message):
172
+ """Update memory with timing"""
173
+ with timer.time_step("memory_update"):
174
+ full_checkpoint = memory.get(config) or {}
175
+ messages = full_checkpoint.get("channel_values", {}).get("messages", [])
176
+
177
+ messages.append({"role": "user", "content": user_message})
178
+ messages.append({"role": "assistant", "content": assistant_message})
179
+
180
+ checkpoint_to_save = {
181
+ "v": 1,
182
+ "id": str(uuid.uuid4()),
183
+ "ts": datetime.now().isoformat(),
184
+ "channel_values": {"messages": messages},
185
+ "channel_versions": {},
186
+ "versions_seen": {},
187
+ }
188
+
189
+ memory.put(config, checkpoint_to_save, {}, {})
190
 
191
  def retrieve_memory(config):
192
+ """Retrieve memory with timing"""
193
+ with timer.time_step("memory_retrieval"):
194
+ full_checkpoint = memory.get(config) or {}
195
+ return full_checkpoint.get("channel_values", {}).get("messages", [])
196
 
197
+ # === Intent Classification System ===
198
  class IntentClassifier:
199
  def __init__(self):
200
  self.intent_patterns = {
201
  'greeting': {
202
+ 'patterns': [
203
+ r'\b(hi|hello|hey|good morning|good afternoon|good evening|greetings)\b',
204
+ r'^(hi|hello|hey)[\s!.]*$',
205
+ r'\b(how are you|how do you do)\b'
206
+ ],
207
+ 'responses': [
208
+ "Hello! I'm XENO Assistant. How can I help you with XENO financial services today?",
209
+ "Hi there! I'm here to assist you with any questions about XENO services. What can I help you with?",
210
+ "Good day! Welcome to XENO Support. How may I assist you today?"
211
+ ]
212
+ },
213
+ 'thanks': {
214
+ 'patterns': [
215
+ r'\b(thank you|thanks|thank u|thx|appreciate|grateful)\b',
216
+ r'^(thanks|thank you)[\s!.]*$',
217
+ r'\b(much appreciated|thanks a lot|thank you so much)\b'
218
+ ],
219
+ 'responses': [
220
+ "You're welcome! Is there anything else I can help you with regarding XENO services?",
221
+ "Happy to help! Feel free to ask if you have any other questions about XENO.",
222
+ "Glad I could assist you! Let me know if you need help with anything else."
223
+ ]
224
+ },
225
+ 'goodbye': {
226
+ 'patterns': [
227
+ r'\b(bye|goodbye|see you|farewell|take care|have a good day)\b',
228
+ r'^(bye|goodbye)[\s!.]*$',
229
+ r'\b(talk to you later|see you later|until next time)\b'
230
+ ],
231
+ 'responses': [
232
+ "Goodbye! Thank you for using XENO services. Have a great day!",
233
+ "Take care! Feel free to return anytime you need help with XENO services.",
234
+ "Have a wonderful day! Don't hesitate to reach out if you need assistance with XENO."
235
+ ]
236
  }
237
  }
238
 
239
  def classify_intent(self, message: str) -> Tuple[str, str]:
240
+ """Classify intent with timing"""
241
+ message_lower = message.lower().strip()
242
+
243
+ for intent_name, intent_data in self.intent_patterns.items():
244
+ for pattern in intent_data['patterns']:
245
+ if re.search(pattern, message_lower, re.IGNORECASE):
246
+ import random
247
+ response = random.choice(intent_data['responses'])
248
+ return intent_name, response
249
+
250
  return 'query', ''
251
+
252
+ def is_simple_intent(self, intent: str) -> bool:
253
+ simple_intents = ['greeting', 'thanks']
254
+ return intent in simple_intents
255
 
256
  intent_classifier = IntentClassifier()
257
 
258
+ # === Load and Clean Knowledge Base ===
259
+ df_kb = pd.read_json("XENO_Uganda_KnowledgeBase_Advisory.json")
260
+ df_kb.dropna(subset=['Content'], inplace=True)
 
 
 
 
261
 
262
+ def prepare_documents(data):
 
 
 
 
263
  documents, metadatas, ids = [], [], []
264
+ for item in data:
265
  documents.append(f"Question: {item['Question']}\nAnswer: {item['Content']}")
266
+ metadatas.append({
267
+ "question": item["Question"],
268
+ "content": item["Content"],
269
+ "section": item.get("Section", ""),
270
+ "source": item.get("Source", ""),
271
+ "owner": item.get("Owner", ""),
272
+ "tag": item.get("Tag", ""),
273
+ "id": item["ID"]
274
+ })
275
+ ids.append(item["ID"])
276
+ return documents, metadatas, ids
277
+
278
+ xeno_data_list = df_kb.to_dict('records')
279
+ documents, metadatas, ids = prepare_documents(xeno_data_list)
280
 
281
+ # === Setup ChromaDB ===
282
+ try:
283
+ client = chromadb.PersistentClient(path="/tmp/xeno_db")
284
  try:
 
 
285
  collection = client.get_collection(name=collection_name)
286
+ print(f"Loaded existing ChromaDB collection: {collection_name}")
287
  except:
288
+ print(f"Creating new ChromaDB collection: {collection_name}")
 
 
289
  collection = client.create_collection(name=collection_name)
290
+ collection.add(documents=documents, metadatas=metadatas, ids=ids)
291
+ except Exception as e:
292
+ print(f"Failed to initialize ChromaDB: {e}")
293
+ raise
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
 
295
+ vector_store = Chroma(client=client, collection_name=collection_name)
296
+ retriever = vector_store.as_retriever(search_type="similarity", search_kwargs={"k": 4})
 
 
 
 
297
 
298
+ # === Prompt System ===
299
+ SYSTEM_PROMPT = """You are a friendly XENO Support Assistant, an AI-powered helpful and professional customer service representative.
300
+ Use only the information provided in the knowledge base context to answer user queries.
301
+ Do not hallucinate. If context doesn't contain relevant info, say so in a calm polite manner by saying I'm sorry, I can't assist with that.
302
+ Only use context that is clearly relevant to the user's question.
303
+ For greetings like "hi" or "hello", respond politely without using the context.
304
+ remember previous conversations."""
305
+
306
+ # === Context Processing ===
307
+ def process_context(results, cosine_scores, max_results=2):
308
+ """Process context with timing"""
309
+ with timer.time_step("context_processing"):
310
+ sorted_indices = np.argsort(cosine_scores)[::-1][:max_results]
311
+ formatted_context = ""
312
+ source_ids = []
313
+ knowledge_pairs = []
314
+ for i, idx in enumerate(sorted_indices, 1):
315
+ result = results[idx]
316
+ score = cosine_scores[idx]
317
+ question = result.metadata.get('question', 'N/A')
318
+ answer = result.metadata.get('content', 'N/A')
319
+ formatted_context += f"Knowledge Entry {i}:\n"
320
+ formatted_context += f"Q: {question}\n"
321
+ formatted_context += f"A: {answer}\n"
322
+ formatted_context += "-" * 40 + "\n"
323
+ source_ids.append(result.metadata.get('id', 'N/A'))
324
+ knowledge_pairs.append((question, answer))
325
+ return formatted_context, source_ids, knowledge_pairs
326
 
327
+ # === LLM Generation ===
328
+ def generate_xeno_response(context, question, chat_history):
329
+ """Generate response with timing"""
330
+ with timer.time_step("llm_generation"):
331
+ model = genai.GenerativeModel(llm_model_name)
332
+ formatted_history = "\n".join(
333
+ [f"{msg['role'].capitalize()}: {msg['content']}" for msg in chat_history]
334
+ ) if chat_history else "None"
335
+
336
+ prompt = f"{SYSTEM_PROMPT}\n### HISTORY ###\n{formatted_history}\n### CONTEXT ###\n{context}\n### QUESTION ###\n{question}"
337
+
338
+ response = model.generate_content(prompt)
339
+ return response.text.strip()
340
+
341
+ # === Main Interface Logic ===
342
+ def get_context_and_answer(message, history, session_id="default"):
343
+ """Main pipeline with comprehensive timing"""
344
+ # Reset timer for new request
345
  timer.reset()
346
+ error_step = None
347
  notes = []
348
 
349
  try:
350
+ config = {"configurable": {"thread_id": str(session_id), "checkpoint_ns": ""}}
351
+
352
+ # Step 1: Intent Classification
353
+ with timer.time_step("intent_classification"):
354
+ intent, direct_response = intent_classifier.classify_intent(message)
355
+
356
+ # Step 2: Memory Retrieval
357
+ chat_history = retrieve_memory(config)
358
+
359
+ answer = ""
360
+ source_ids = "N/A"
361
+ knowledge_pairs = []
362
+
363
  if intent != 'query':
364
+ answer = direct_response
365
+ notes.append(f"Simple intent: {intent}")
366
+ else:
367
+ if len(message.strip()) < 3:
368
+ answer = "I'd be happy to help! Could you please provide more details about what you'd like to know?"
369
+ notes.append("Message too short")
370
+ else:
371
+ try:
372
+ # Step 3: RAG Retrieval
373
+ with timer.time_step("rag_retrieval"):
374
+ queried_results = retriever.invoke(message)
375
+
376
+ # Step 4: Embedding Generation
377
+ with timer.time_step("embedding_generation"):
378
+ query_embedding = genai.embed_content(
379
+ model=embedding_model,
380
+ content=message,
381
+ task_type="retrieval_query"
382
+ )['embedding']
383
+
384
+ doc_embeddings = [
385
+ genai.embed_content(
386
+ model=embedding_model,
387
+ content=doc.page_content,
388
+ task_type="retrieval_document"
389
+ )['embedding']
390
+ for doc in queried_results
391
+ ]
392
+
393
+ # Step 5: Similarity Calculation
394
+ with timer.time_step("similarity_calculation"):
395
+ cosine_scores = util.cos_sim(
396
+ torch.tensor(query_embedding).float(),
397
+ torch.tensor(doc_embeddings).float()
398
+ )[0].tolist()
399
+ max_score = max(cosine_scores)
400
 
401
+ if max_score < 0.4:
402
+ answer = "I'm sorry, I couldn't find specific information for your question. Could you try rephrasing it, or contact XENO support directly?"
403
+ notes.append(f"Low similarity score: {max_score:.3f}")
404
+ else:
405
+ # Step 6: Context Processing (timed within function)
406
+ context, source_ids_list, knowledge_pairs = process_context(queried_results, cosine_scores)
407
+
408
+ # Step 7: LLM Generation (timed within function)
409
+ answer = generate_xeno_response(context, message, chat_history)
410
+ source_ids = ", ".join(source_ids_list)
411
+ notes.append(f"Max similarity: {max_score:.3f}")
412
+
413
+ except Exception as e:
414
+ error_step = timer.current_step or "rag_processing"
415
+ print(f"Error during RAG processing: {e}")
416
+ answer = "I apologize, but I'm having a technical issue. Please try again shortly or contact XENO support."
417
+ notes.append(f"Error: {str(e)}")
418
+
419
+ # Step 8: Memory Update (timed within function)
420
+ update_memory(config, message, answer)
421
 
422
+ # Step 9: Response Logging
423
+ with timer.time_step("response_logging"):
424
+ log_response(message, answer, source_ids, knowledge_pairs, session_id)
 
425
 
426
+ # Log timing data
427
+ timing_summary = timer.get_timing_summary()
428
+ log_timing_data(
429
+ message,
430
+ session_id,
431
+ timing_summary,
432
+ error_step=error_step,
433
+ notes="; ".join(notes) if notes else None
434
+ )
435
 
436
+ return answer
 
 
 
 
 
 
 
 
 
437
 
438
+ except Exception as e:
439
+ error_step = timer.current_step or "main_pipeline"
440
+ logging.error(f"Error in main pipeline: {e}")
441
+ logging.error(traceback.format_exc())
442
 
443
+ # Still log timing data even on error
444
+ timing_summary = timer.get_timing_summary()
445
+ log_timing_data(
446
+ message,
447
+ session_id,
448
+ timing_summary,
449
+ error_step=error_step,
450
+ notes=f"Pipeline error: {str(e)}"
451
+ )
452
+
453
+ return "I apologize, but I encountered an error processing your request. Please try again."
454
 
455
+ # === Enhanced Gradio UI ===
456
+ def respond(message, history, session_id):
457
+ """Gradio's main response function"""
458
+ if not session_id:
459
+ session_id = str(uuid.uuid4())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
460
 
461
+ bot_response = get_context_and_answer(message, history, session_id)
462
+ history.append([message, bot_response])
463
+
464
+ return "", history
465
+
466
+ def create_interface():
467
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
468
+ gr.Markdown("""
469
+ # ASKXENO
470
+ **Welcome to XENO AI Support!**
471
+
472
+ I can help you with questions about XENO financial services including:
473
+ - Account management and setup
474
+ - Transaction processes and fees
475
+ - Platform features and troubleshooting
476
+ - General service information
477
+
478
+ *Simply type your question below to get started!*
479
+ """)
480
+
481
+ session_id_box = gr.Textbox(label="Session ID", value=str(uuid.uuid4()), interactive=True)
482
+
483
+ chatbot = gr.Chatbot(
484
+ label="XENO Assistant",
485
+ bubble_full_width=False,
486
+ height=500
487
+ )
488
+
489
+ with gr.Row():
490
+ msg = gr.Textbox(
491
+ label="Your Message",
492
+ placeholder="Type your question here...",
493
+ scale=3,
494
+ )
495
+ send_button = gr.Button("Send", variant="primary", scale=1)
496
+
497
+ send_button.click(respond, [msg, chatbot, session_id_box], [msg, chatbot])
498
+ msg.submit(respond, [msg, chatbot, session_id_box], [msg, chatbot])
499
+
500
+ return demo
501
 
502
  if __name__ == "__main__":
503
+ iface = create_interface()
504
+ iface.launch(share=False, server_name="0.0.0.0", server_port=7860, ssr_mode=False)