Sebunya mukiibi commited on
Commit
2ce8410
·
verified ·
1 Parent(s): 19b5af3

Adding time stamps in code (#15)

Browse files

- Adding time stamps in code (b26e198650fca727c7af1df3674954354dd7bd95)


Co-authored-by: MUKIIBI ROGERS <mukiibi@users.noreply.huggingface.co>

Files changed (1) hide show
  1. app.py +269 -141
app.py CHANGED
@@ -16,6 +16,8 @@ import json
16
  from datetime import datetime
17
  import re
18
  from typing import Dict, List, Tuple
 
 
19
 
20
  import logging
21
  import traceback
@@ -23,31 +25,58 @@ import sys
23
 
24
  # ===== Configure Logging =====
25
  logging.basicConfig(
26
- filename="app.log", # All logs will be saved here
27
- level=logging.INFO, # Change to DEBUG for more detail
28
  format="%(asctime)s - %(levelname)s - %(message)s"
29
  )
30
 
31
- # ===== Capture Uncaught Exceptions =====
32
  def log_exception(exc_type, exc_value, exc_traceback):
33
  if issubclass(exc_type, KeyboardInterrupt):
34
- return # Don't log keyboard interrupts
35
  logging.critical("Uncaught exception", exc_info=(exc_type, exc_value, exc_traceback))
36
 
37
  sys.excepthook = log_exception
38
-
39
- # ===== Optional: Log that the app started =====
40
  logging.info("App started successfully.")
41
 
42
- # ===== Example: Use logging inside try/except =====
43
- def example_function():
44
- try:
45
- result = 1 / 0 # Intentional error
46
- except Exception as e:
47
- logging.error("Error in example_function: %s", e)
48
- logging.error(traceback.format_exc())
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
- logging.info("Test log entry to check logger")
 
51
 
52
  # === Configuration ===
53
  genai.configure(api_key=os.environ["GEMINI_API_KEY"])
@@ -55,7 +84,7 @@ embedding_model = "models/embedding-001"
55
  llm_model_name = "models/gemma-3-4b-it"
56
  collection_name = "xeno_collection"
57
 
58
- # === Google Sheets Setup for Hugging Face ===
59
  def get_google_sheets_credentials():
60
  credentials_json = os.environ.get("GOOGLE_SHEETS_CREDENTIALS")
61
  if not credentials_json:
@@ -65,72 +94,109 @@ def get_google_sheets_credentials():
65
  creds = Credentials.from_service_account_info(credentials_dict, scopes=scope)
66
  return creds
67
 
68
- # Authenticate with Google Sheets
69
  client_gspread = gspread.authorize(get_google_sheets_credentials())
70
 
71
- # Open the Google Sheet
72
- sheet = client_gspread.open("Response_Log").sheet1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
 
74
  def log_response(question, answer, source_ids, knowledge_pairs, session_id):
75
- """
76
- Log a question, answer, source IDs, and knowledge base question-answer pairs to the Google Sheet.
77
-
78
- Args:
79
- question (str): The question asked by the user.
80
- answer (str): The answer provided by the model.
81
- source_ids (str): Comma-separated list of source IDs used.
82
- knowledge_pairs (list): List of tuples containing (question, answer) from the knowledge base.
83
- """
84
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
85
  knowledge_question_1 = knowledge_pairs[0][0] if len(knowledge_pairs) > 0 else "N/A"
86
  knowledge_answer_1 = knowledge_pairs[0][1] if len(knowledge_pairs) > 0 else "N/A"
87
  knowledge_question_2 = knowledge_pairs[1][0] if len(knowledge_pairs) > 1 else "N/A"
88
  knowledge_answer_2 = knowledge_pairs[1][1] if len(knowledge_pairs) > 1 else "N/A"
89
  row = [
90
- timestamp,
91
- session_id,
92
- question,
93
- answer,
94
- source_ids,
95
- knowledge_question_1,
96
- knowledge_answer_1,
97
- knowledge_question_2,
98
- knowledge_answer_2
99
  ]
100
  try:
101
- sheet.append_row(row)
102
- print(f"Logged: {question} | Source IDs: {source_ids}")
103
  except Exception as e:
104
  print(f"Failed to log to Google Sheet: {e}")
105
  with open("/tmp/response_log.txt", "a") as f:
106
  f.write(f"{timestamp},{question},{answer},{source_ids},{knowledge_question_1},{knowledge_answer_1},{knowledge_question_2},{knowledge_answer_2}\n")
107
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  # === LangGraph Memory Setup ===
109
  conn = sqlite3.connect("xeno_memory.db", check_same_thread=False)
110
  memory = SqliteSaver(conn=conn)
111
 
112
  def update_memory(config, user_message, assistant_message):
113
- full_checkpoint = memory.get(config) or {}
114
- messages = full_checkpoint.get("channel_values", {}).get("messages", [])
115
-
116
- messages.append({"role": "user", "content": user_message})
117
- messages.append({"role": "assistant", "content": assistant_message})
118
-
119
- checkpoint_to_save = {
120
- "v": 1,
121
- "id": str(uuid.uuid4()),
122
- "ts": datetime.now().isoformat(),
123
- "channel_values": {"messages": messages},
124
- "channel_versions": {},
125
- "versions_seen": {},
126
- }
127
-
128
- memory.put(config, checkpoint_to_save, {}, {})
129
-
 
 
 
 
 
 
 
 
130
  # === Intent Classification System ===
131
  class IntentClassifier:
132
  def __init__(self):
133
- # Define intent patterns and responses
134
  self.intent_patterns = {
135
  'greeting': {
136
  'patterns': [
@@ -171,10 +237,7 @@ class IntentClassifier:
171
  }
172
 
173
  def classify_intent(self, message: str) -> Tuple[str, str]:
174
- """
175
- Classify the intent of a message and return appropriate response if it's a simple intent.
176
- Returns: (intent_name, response) - response is empty string if intent requires RAG
177
- """
178
  message_lower = message.lower().strip()
179
 
180
  for intent_name, intent_data in self.intent_patterns.items():
@@ -187,11 +250,9 @@ class IntentClassifier:
187
  return 'query', ''
188
 
189
  def is_simple_intent(self, intent: str) -> bool:
190
- """Check if intent can be handled without RAG"""
191
  simple_intents = ['greeting', 'thanks']
192
  return intent in simple_intents
193
 
194
- # Initialize intent classifier
195
  intent_classifier = IntentClassifier()
196
 
197
  # === Load and Clean Knowledge Base ===
@@ -239,98 +300,169 @@ SYSTEM_PROMPT = """You are a friendly XENO Support Assistant, an AI-powered help
239
  Use only the information provided in the knowledge base context to answer user queries.
240
  Do not hallucinate. If context doesn't contain relevant info, say so in a calm polite manner by saying I'm sorry, I can't assist with that.
241
  Only use context that is clearly relevant to the user's question.
242
- For greetings like hi or hello, respond politely without using the context.
243
  remember previous conversations."""
244
 
245
  # === Context Processing ===
246
  def process_context(results, cosine_scores, max_results=2):
247
- sorted_indices = np.argsort(cosine_scores)[::-1][:max_results]
248
- formatted_context = ""
249
- source_ids = []
250
- knowledge_pairs = []
251
- for i, idx in enumerate(sorted_indices, 1):
252
- result = results[idx]
253
- score = cosine_scores[idx]
254
- question = result.metadata.get('question', 'N/A')
255
- answer = result.metadata.get('content', 'N/A')
256
- formatted_context += f"Knowledge Entry {i}:\n"
257
- formatted_context += f"Q: {question}\n"
258
- formatted_context += f"A: {answer}\n"
259
- formatted_context += "-" * 40 + "\n"
260
- source_ids.append(result.metadata.get('id', 'N/A'))
261
- knowledge_pairs.append((question, answer))
262
- return formatted_context, source_ids, knowledge_pairs
263
-
264
- # === LLM Generation (Refactored) ===
 
 
265
  def generate_xeno_response(context, question, chat_history):
266
- """Generates a response but does NOT handle memory."""
267
- model = genai.GenerativeModel(llm_model_name)
268
- formatted_history = "\n".join(
269
- [f"{msg['role'].capitalize()}: {msg['content']}" for msg in chat_history]
270
- ) if chat_history else "None"
271
-
272
- prompt = f"{SYSTEM_PROMPT}\n### HISTORY ###\n{formatted_history}\n### CONTEXT ###\n{context}\n### QUESTION ###\n{question}"
273
-
274
- response = model.generate_content(prompt)
275
- return response.text.strip()
276
-
277
 
278
- # === Main Interface Logic (Refactored) ===
279
  def get_context_and_answer(message, history, session_id="default"):
280
- """
281
- Handles intent classification, RAG, and memory updates in one place.
282
- """
283
- config = {"configurable": {"thread_id": str(session_id), "checkpoint_ns": ""}}
284
-
285
- full_checkpoint = memory.get(config) or {}
286
- chat_history = full_checkpoint.get("channel_values", {}).get("messages", [])
287
- intent, direct_response = intent_classifier.classify_intent(message)
288
-
289
- answer = ""
290
- source_ids = "N/A"
291
- knowledge_pairs = []
292
-
293
- if intent != 'query':
294
- answer = direct_response
295
- else:
296
- if len(message.strip()) < 3:
297
- answer = "I'd be happy to help! Could you please provide more details about what you'd like to know?"
298
- else:
299
- try:
300
- queried_results = retriever.invoke(message)
301
- query_embedding = genai.embed_content(model=embedding_model, content=message, task_type="retrieval_query")['embedding']
302
-
303
- doc_embeddings = [genai.embed_content(model=embedding_model, content=doc.page_content, task_type="retrieval_document")['embedding'] for doc in queried_results]
304
-
305
- cosine_scores = util.cos_sim(torch.tensor(query_embedding).float(), torch.tensor(doc_embeddings).float())[0].tolist()
306
-
307
- if max(cosine_scores) < 0.4:
308
- answer = "I'm sorry, I couldn't find specific information for your question. Could you try rephrasing it, or contact XENO support directly?"
309
- else:
310
- context, source_ids_list, knowledge_pairs = process_context(queried_results, cosine_scores)
311
- answer = generate_xeno_response(context, message, chat_history)
312
- source_ids = ", ".join(source_ids_list)
313
-
314
- except Exception as e:
315
- print(f"Error during RAG processing: {e}")
316
- answer = "I apologize, but I'm having a technical issue. Please try again shortly or contact XENO support."
317
-
318
- update_memory(config, message, answer)
319
- log_response(message, answer, source_ids, knowledge_pairs, session_id)
320
 
321
- return answer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
322
 
323
  # === Enhanced Gradio UI ===
324
  def respond(message, history, session_id):
325
- """Gradio's main response function."""
326
  if not session_id:
327
  session_id = str(uuid.uuid4())
328
 
329
  bot_response = get_context_and_answer(message, history, session_id)
330
-
331
  history.append([message, bot_response])
332
 
333
  return "", history
 
334
  def create_interface():
335
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
336
  gr.Markdown("""
@@ -362,10 +494,6 @@ def create_interface():
362
  )
363
  send_button = gr.Button("Send", variant="primary", scale=1)
364
 
365
- def submit_message(message, chat_history, session_id):
366
- new_msg, new_hist = respond(message, chat_history, session_id)
367
- return "", new_hist
368
-
369
  send_button.click(respond, [msg, chatbot, session_id_box], [msg, chatbot])
370
  msg.submit(respond, [msg, chatbot, session_id_box], [msg, chatbot])
371
 
@@ -373,4 +501,4 @@ def create_interface():
373
 
374
  if __name__ == "__main__":
375
  iface = create_interface()
376
- iface.launch(share=False, server_name="0.0.0.0", server_port=7860, ssr_mode=False)
 
16
  from datetime import datetime
17
  import re
18
  from typing import Dict, List, Tuple
19
+ import time
20
+ from contextlib import contextmanager
21
 
22
  import logging
23
  import traceback
 
25
 
26
  # ===== Configure Logging =====
27
  logging.basicConfig(
28
+ filename="app.log",
29
+ level=logging.INFO,
30
  format="%(asctime)s - %(levelname)s - %(message)s"
31
  )
32
 
 
33
  def log_exception(exc_type, exc_value, exc_traceback):
34
  if issubclass(exc_type, KeyboardInterrupt):
35
+ return
36
  logging.critical("Uncaught exception", exc_info=(exc_type, exc_value, exc_traceback))
37
 
38
  sys.excepthook = log_exception
 
 
39
  logging.info("App started successfully.")
40
 
41
+ # ===== Time Tracking Class =====
42
+ class PipelineTimer:
43
+ def __init__(self):
44
+ self.reset()
45
+
46
+ def reset(self):
47
+ """Reset all timing data for a new request"""
48
+ self.start_time = time.time()
49
+ self.step_times = {}
50
+ self.step_start = None
51
+ self.current_step = None
52
+
53
+ @contextmanager
54
+ def time_step(self, step_name: str):
55
+ """Context manager to time a specific step"""
56
+ step_start = time.time()
57
+ self.current_step = step_name
58
+ try:
59
+ yield
60
+ finally:
61
+ step_end = time.time()
62
+ self.step_times[step_name] = round((step_end - step_start) * 1000, 2) # Convert to milliseconds
63
+ self.current_step = None
64
+
65
+ def get_total_time(self):
66
+ """Get total elapsed time since reset"""
67
+ return round((time.time() - self.start_time) * 1000, 2)
68
+
69
+ def get_timing_summary(self):
70
+ """Get a summary of all timing data"""
71
+ total_time = self.get_total_time()
72
+ return {
73
+ 'total_time_ms': total_time,
74
+ 'step_times': self.step_times,
75
+ 'timestamp': datetime.now().isoformat()
76
+ }
77
 
78
+ # Initialize global timer
79
+ timer = PipelineTimer()
80
 
81
  # === Configuration ===
82
  genai.configure(api_key=os.environ["GEMINI_API_KEY"])
 
84
  llm_model_name = "models/gemma-3-4b-it"
85
  collection_name = "xeno_collection"
86
 
87
+ # === Google Sheets Setup ===
88
  def get_google_sheets_credentials():
89
  credentials_json = os.environ.get("GOOGLE_SHEETS_CREDENTIALS")
90
  if not credentials_json:
 
94
  creds = Credentials.from_service_account_info(credentials_dict, scopes=scope)
95
  return creds
96
 
 
97
  client_gspread = gspread.authorize(get_google_sheets_credentials())
98
 
99
+ # Open the Google Sheet and get both sheets
100
+ spreadsheet = client_gspread.open("Response_Log")
101
+ response_sheet = spreadsheet.sheet1 # Main response log
102
+ try:
103
+ timing_sheet = spreadsheet.worksheet("Timing_Log")
104
+ except:
105
+ # Create timing sheet if it doesn't exist
106
+ timing_sheet = spreadsheet.add_worksheet(title="Timing_Log", rows="1000", cols="15")
107
+ # Add headers
108
+ headers = [
109
+ "Timestamp", "Session_ID", "Question", "Total_Time_MS",
110
+ "Intent_Classification_MS", "Memory_Retrieval_MS", "RAG_Retrieval_MS",
111
+ "Embedding_Generation_MS", "Similarity_Calculation_MS", "Context_Processing_MS",
112
+ "LLM_Generation_MS", "Memory_Update_MS", "Logging_MS", "Error_Step", "Notes"
113
+ ]
114
+ timing_sheet.append_row(headers)
115
 
116
  def log_response(question, answer, source_ids, knowledge_pairs, session_id):
117
+ """Original response logging function"""
 
 
 
 
 
 
 
 
118
  timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
119
  knowledge_question_1 = knowledge_pairs[0][0] if len(knowledge_pairs) > 0 else "N/A"
120
  knowledge_answer_1 = knowledge_pairs[0][1] if len(knowledge_pairs) > 0 else "N/A"
121
  knowledge_question_2 = knowledge_pairs[1][0] if len(knowledge_pairs) > 1 else "N/A"
122
  knowledge_answer_2 = knowledge_pairs[1][1] if len(knowledge_pairs) > 1 else "N/A"
123
  row = [
124
+ timestamp, session_id, question, answer, source_ids,
125
+ knowledge_question_1, knowledge_answer_1, knowledge_question_2, knowledge_answer_2
 
 
 
 
 
 
 
126
  ]
127
  try:
128
+ response_sheet.append_row(row)
129
+ print(f"Logged response: {question} | Source IDs: {source_ids}")
130
  except Exception as e:
131
  print(f"Failed to log to Google Sheet: {e}")
132
  with open("/tmp/response_log.txt", "a") as f:
133
  f.write(f"{timestamp},{question},{answer},{source_ids},{knowledge_question_1},{knowledge_answer_1},{knowledge_question_2},{knowledge_answer_2}\n")
134
 
135
+ def log_timing_data(question, session_id, timing_summary, error_step=None, notes=None):
136
+ """Log timing data to the timing sheet"""
137
+ timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
138
+ step_times = timing_summary['step_times']
139
+
140
+ row = [
141
+ timestamp,
142
+ session_id,
143
+ question[:100] + "..." if len(question) > 100 else question, # Truncate long questions
144
+ timing_summary['total_time_ms'],
145
+ step_times.get('intent_classification', 0),
146
+ step_times.get('memory_retrieval', 0),
147
+ step_times.get('rag_retrieval', 0),
148
+ step_times.get('embedding_generation', 0),
149
+ step_times.get('similarity_calculation', 0),
150
+ step_times.get('context_processing', 0),
151
+ step_times.get('llm_generation', 0),
152
+ step_times.get('memory_update', 0),
153
+ step_times.get('response_logging', 0),
154
+ error_step or "",
155
+ notes or ""
156
+ ]
157
+
158
+ try:
159
+ timing_sheet.append_row(row)
160
+ print(f"Logged timing data: Total {timing_summary['total_time_ms']}ms")
161
+ except Exception as e:
162
+ print(f"Failed to log timing data: {e}")
163
+ # Fallback to local file
164
+ with open("/tmp/timing_log.txt", "a") as f:
165
+ f.write(f"{timestamp},{session_id},{question},{timing_summary}\n")
166
+
167
  # === LangGraph Memory Setup ===
168
  conn = sqlite3.connect("xeno_memory.db", check_same_thread=False)
169
  memory = SqliteSaver(conn=conn)
170
 
171
  def update_memory(config, user_message, assistant_message):
172
+ """Update memory with timing"""
173
+ with timer.time_step("memory_update"):
174
+ full_checkpoint = memory.get(config) or {}
175
+ messages = full_checkpoint.get("channel_values", {}).get("messages", [])
176
+
177
+ messages.append({"role": "user", "content": user_message})
178
+ messages.append({"role": "assistant", "content": assistant_message})
179
+
180
+ checkpoint_to_save = {
181
+ "v": 1,
182
+ "id": str(uuid.uuid4()),
183
+ "ts": datetime.now().isoformat(),
184
+ "channel_values": {"messages": messages},
185
+ "channel_versions": {},
186
+ "versions_seen": {},
187
+ }
188
+
189
+ memory.put(config, checkpoint_to_save, {}, {})
190
+
191
+ def retrieve_memory(config):
192
+ """Retrieve memory with timing"""
193
+ with timer.time_step("memory_retrieval"):
194
+ full_checkpoint = memory.get(config) or {}
195
+ return full_checkpoint.get("channel_values", {}).get("messages", [])
196
+
197
  # === Intent Classification System ===
198
  class IntentClassifier:
199
  def __init__(self):
 
200
  self.intent_patterns = {
201
  'greeting': {
202
  'patterns': [
 
237
  }
238
 
239
  def classify_intent(self, message: str) -> Tuple[str, str]:
240
+ """Classify intent with timing"""
 
 
 
241
  message_lower = message.lower().strip()
242
 
243
  for intent_name, intent_data in self.intent_patterns.items():
 
250
  return 'query', ''
251
 
252
  def is_simple_intent(self, intent: str) -> bool:
 
253
  simple_intents = ['greeting', 'thanks']
254
  return intent in simple_intents
255
 
 
256
  intent_classifier = IntentClassifier()
257
 
258
  # === Load and Clean Knowledge Base ===
 
300
  Use only the information provided in the knowledge base context to answer user queries.
301
  Do not hallucinate. If context doesn't contain relevant info, say so in a calm polite manner by saying I'm sorry, I can't assist with that.
302
  Only use context that is clearly relevant to the user's question.
303
+ For greetings like "hi" or "hello", respond politely without using the context.
304
  remember previous conversations."""
305
 
306
  # === Context Processing ===
307
  def process_context(results, cosine_scores, max_results=2):
308
+ """Process context with timing"""
309
+ with timer.time_step("context_processing"):
310
+ sorted_indices = np.argsort(cosine_scores)[::-1][:max_results]
311
+ formatted_context = ""
312
+ source_ids = []
313
+ knowledge_pairs = []
314
+ for i, idx in enumerate(sorted_indices, 1):
315
+ result = results[idx]
316
+ score = cosine_scores[idx]
317
+ question = result.metadata.get('question', 'N/A')
318
+ answer = result.metadata.get('content', 'N/A')
319
+ formatted_context += f"Knowledge Entry {i}:\n"
320
+ formatted_context += f"Q: {question}\n"
321
+ formatted_context += f"A: {answer}\n"
322
+ formatted_context += "-" * 40 + "\n"
323
+ source_ids.append(result.metadata.get('id', 'N/A'))
324
+ knowledge_pairs.append((question, answer))
325
+ return formatted_context, source_ids, knowledge_pairs
326
+
327
+ # === LLM Generation ===
328
  def generate_xeno_response(context, question, chat_history):
329
+ """Generate response with timing"""
330
+ with timer.time_step("llm_generation"):
331
+ model = genai.GenerativeModel(llm_model_name)
332
+ formatted_history = "\n".join(
333
+ [f"{msg['role'].capitalize()}: {msg['content']}" for msg in chat_history]
334
+ ) if chat_history else "None"
335
+
336
+ prompt = f"{SYSTEM_PROMPT}\n### HISTORY ###\n{formatted_history}\n### CONTEXT ###\n{context}\n### QUESTION ###\n{question}"
337
+
338
+ response = model.generate_content(prompt)
339
+ return response.text.strip()
340
 
341
+ # === Main Interface Logic ===
342
  def get_context_and_answer(message, history, session_id="default"):
343
+ """Main pipeline with comprehensive timing"""
344
+ # Reset timer for new request
345
+ timer.reset()
346
+ error_step = None
347
+ notes = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
348
 
349
+ try:
350
+ config = {"configurable": {"thread_id": str(session_id), "checkpoint_ns": ""}}
351
+
352
+ # Step 1: Intent Classification
353
+ with timer.time_step("intent_classification"):
354
+ intent, direct_response = intent_classifier.classify_intent(message)
355
+
356
+ # Step 2: Memory Retrieval
357
+ chat_history = retrieve_memory(config)
358
+
359
+ answer = ""
360
+ source_ids = "N/A"
361
+ knowledge_pairs = []
362
+
363
+ if intent != 'query':
364
+ answer = direct_response
365
+ notes.append(f"Simple intent: {intent}")
366
+ else:
367
+ if len(message.strip()) < 3:
368
+ answer = "I'd be happy to help! Could you please provide more details about what you'd like to know?"
369
+ notes.append("Message too short")
370
+ else:
371
+ try:
372
+ # Step 3: RAG Retrieval
373
+ with timer.time_step("rag_retrieval"):
374
+ queried_results = retriever.invoke(message)
375
+
376
+ # Step 4: Embedding Generation
377
+ with timer.time_step("embedding_generation"):
378
+ query_embedding = genai.embed_content(
379
+ model=embedding_model,
380
+ content=message,
381
+ task_type="retrieval_query"
382
+ )['embedding']
383
+
384
+ doc_embeddings = [
385
+ genai.embed_content(
386
+ model=embedding_model,
387
+ content=doc.page_content,
388
+ task_type="retrieval_document"
389
+ )['embedding']
390
+ for doc in queried_results
391
+ ]
392
+
393
+ # Step 5: Similarity Calculation
394
+ with timer.time_step("similarity_calculation"):
395
+ cosine_scores = util.cos_sim(
396
+ torch.tensor(query_embedding).float(),
397
+ torch.tensor(doc_embeddings).float()
398
+ )[0].tolist()
399
+ max_score = max(cosine_scores)
400
+
401
+ if max_score < 0.4:
402
+ answer = "I'm sorry, I couldn't find specific information for your question. Could you try rephrasing it, or contact XENO support directly?"
403
+ notes.append(f"Low similarity score: {max_score:.3f}")
404
+ else:
405
+ # Step 6: Context Processing (timed within function)
406
+ context, source_ids_list, knowledge_pairs = process_context(queried_results, cosine_scores)
407
+
408
+ # Step 7: LLM Generation (timed within function)
409
+ answer = generate_xeno_response(context, message, chat_history)
410
+ source_ids = ", ".join(source_ids_list)
411
+ notes.append(f"Max similarity: {max_score:.3f}")
412
+
413
+ except Exception as e:
414
+ error_step = timer.current_step or "rag_processing"
415
+ print(f"Error during RAG processing: {e}")
416
+ answer = "I apologize, but I'm having a technical issue. Please try again shortly or contact XENO support."
417
+ notes.append(f"Error: {str(e)}")
418
+
419
+ # Step 8: Memory Update (timed within function)
420
+ update_memory(config, message, answer)
421
+
422
+ # Step 9: Response Logging
423
+ with timer.time_step("response_logging"):
424
+ log_response(message, answer, source_ids, knowledge_pairs, session_id)
425
+
426
+ # Log timing data
427
+ timing_summary = timer.get_timing_summary()
428
+ log_timing_data(
429
+ message,
430
+ session_id,
431
+ timing_summary,
432
+ error_step=error_step,
433
+ notes="; ".join(notes) if notes else None
434
+ )
435
+
436
+ return answer
437
+
438
+ except Exception as e:
439
+ error_step = timer.current_step or "main_pipeline"
440
+ logging.error(f"Error in main pipeline: {e}")
441
+ logging.error(traceback.format_exc())
442
+
443
+ # Still log timing data even on error
444
+ timing_summary = timer.get_timing_summary()
445
+ log_timing_data(
446
+ message,
447
+ session_id,
448
+ timing_summary,
449
+ error_step=error_step,
450
+ notes=f"Pipeline error: {str(e)}"
451
+ )
452
+
453
+ return "I apologize, but I encountered an error processing your request. Please try again."
454
 
455
  # === Enhanced Gradio UI ===
456
  def respond(message, history, session_id):
457
+ """Gradio's main response function"""
458
  if not session_id:
459
  session_id = str(uuid.uuid4())
460
 
461
  bot_response = get_context_and_answer(message, history, session_id)
 
462
  history.append([message, bot_response])
463
 
464
  return "", history
465
+
466
  def create_interface():
467
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
468
  gr.Markdown("""
 
494
  )
495
  send_button = gr.Button("Send", variant="primary", scale=1)
496
 
 
 
 
 
497
  send_button.click(respond, [msg, chatbot, session_id_box], [msg, chatbot])
498
  msg.submit(respond, [msg, chatbot, session_id_box], [msg, chatbot])
499
 
 
501
 
502
  if __name__ == "__main__":
503
  iface = create_interface()
504
+ iface.launch(share=False, server_name="0.0.0.0", server_port=7860, ssr_mode=False)