wahab5763 commited on
Commit
e4652f2
Β·
verified Β·
1 Parent(s): 872e099

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +79 -63
app.py CHANGED
@@ -54,6 +54,8 @@ if "candidates_message_shown" not in st.session_state:
54
  st.session_state.candidates_message_shown = False
55
  if "vector_db_message_shown" not in st.session_state:
56
  st.session_state.vector_db_message_shown = False
 
 
57
 
58
  def count_tokens(text):
59
  return len(text.split())
@@ -75,6 +77,7 @@ def reset_session_state():
75
  st.session_state.messages = []
76
  st.session_state.candidates_message_shown = False
77
  st.session_state.vector_db_message_shown = False
 
78
  for filename in ["token.json", "data_chunks.pkl", "embeddings.pkl", "vector_store.index", "vector_database.pkl"]:
79
  if os.path.exists(filename):
80
  os.remove(filename)
@@ -203,10 +206,10 @@ def create_chunks_from_gmail(service, label):
203
  progress_bar.progress(min((idx + 1) / total, 1.0))
204
  st.session_state.data_chunks.extend(data_chunks)
205
  if not st.session_state.vector_db_message_shown:
206
- st.success(f"βœ… Data chunks created successfully from {label}! Total emails processed for this label: {len(data_chunks)}")
207
  st.session_state.vector_db_message_shown = True
208
  except Exception as e:
209
- st.error(f"❌ Error creating chunks from Gmail for label {label}: {e}")
210
 
211
  # -------------------------------
212
  # Cached model loaders for efficiency
@@ -245,7 +248,9 @@ def embed_emails(email_chunks):
245
  index = faiss.IndexFlatIP(dimension)
246
  index.add(embeddings)
247
  st.session_state.vector_store = index
248
- st.success("βœ… Data embedding and vector store created successfully!")
 
 
249
  except Exception as e:
250
  st.error(f"❌ Error during embedding: {e}")
251
 
@@ -288,44 +293,44 @@ def process_candidate_emails(query, similarity_threshold):
288
  st.error("❌ Please process your email data or load a saved vector database first.")
289
  return
290
 
291
- with st.spinner('πŸ”„ Processing your query...'):
292
- try:
293
- embed_model, device = get_embed_model()
294
- processed_query = preprocess_query(query)
295
- query_embedding = embed_model.encode(
296
- [processed_query],
297
- convert_to_numpy=True,
298
- show_progress_bar=False,
299
- device=device
300
- )
301
- faiss.normalize_L2(query_embedding)
302
-
303
- # Perform search
304
- distances, indices = st.session_state.vector_store.search(query_embedding, TOP_K)
305
- candidates = []
306
- for idx, sim in zip(indices[0], distances[0]):
307
- # Include candidate only if similarity meets the threshold
308
- if sim >= similarity_threshold:
309
- candidates.append((st.session_state.data_chunks[idx], sim))
310
- if not candidates:
311
- st.warning("⚠️ No matching embeddings found for your query with the selected threshold.")
312
- return
313
-
314
- # Build the context string by concatenating all matching email texts using HTML breaks.
315
- context_str = ""
316
- for candidate, sim in candidates:
317
- context_str += combine_email_text(candidate) + "<br><br>"
318
-
319
- # Optionally limit context size.
320
- MAX_CONTEXT_TOKENS = 500
321
- context_tokens = context_str.split()
322
- if len(context_tokens) > MAX_CONTEXT_TOKENS:
323
- context_str = " ".join(context_tokens[:MAX_CONTEXT_TOKENS])
324
-
325
- st.session_state.candidate_context = context_str
326
- st.session_state.raw_candidates = candidates
327
- except Exception as e:
328
- st.error(f"❌ An error occurred during processing: {e}")
329
 
330
  def call_llm_api(query):
331
  """
@@ -356,23 +361,22 @@ def call_llm_api(query):
356
  "Content-Type": "application/json"
357
  }
358
 
359
- with st.spinner("πŸ”„ Fetching AI response..."):
 
 
 
 
 
 
 
360
  try:
361
- response = requests.post(url, headers=headers, json=payload)
362
- response.raise_for_status() # Raises stored HTTPError, if one occurred.
363
- response_json = response.json()
364
- generated_text = response_json["choices"][0]["message"]["content"]
365
- # Append AI response to chat messages
366
- st.session_state.messages.append({"role": "assistant", "content": generated_text})
367
- except requests.exceptions.HTTPError as http_err:
368
- try:
369
- error_info = response.json().get("error", {})
370
- error_message = error_info.get("message", "An unknown error occurred.")
371
- st.error(f"❌ HTTP error occurred: {error_message}")
372
- except ValueError:
373
- st.error(f"❌ HTTP error occurred: {response.status_code} - {response.text}")
374
- except Exception as err:
375
- st.error(f"❌ An unexpected error occurred: {err}")
376
 
377
  def handle_user_query():
378
  st.header("πŸ’¬ Let's Chat with Your Emails")
@@ -395,15 +399,27 @@ def handle_user_query():
395
  if user_input:
396
  # Append user message to chat
397
  st.session_state.messages.append({"role": "user", "content": user_input})
398
-
 
 
 
 
 
 
 
 
 
 
 
 
399
  # Process the query
400
  process_candidate_emails(user_input, similarity_threshold)
401
-
 
402
  if st.session_state.candidate_context:
403
- # Send the query to the LLM API
404
  call_llm_api(user_input)
405
-
406
- # Display chat messages
407
  for msg in st.session_state.messages:
408
  if msg["role"] == "user":
409
  with st.chat_message("user"):
@@ -411,7 +427,7 @@ def handle_user_query():
411
  elif msg["role"] == "assistant":
412
  with st.chat_message("assistant"):
413
  st.markdown(msg["content"])
414
-
415
  # Display matching email chunks in an expander
416
  if st.session_state.raw_candidates:
417
  with st.expander("πŸ”Ž Matching Email Chunks:", expanded=False):
 
54
  st.session_state.candidates_message_shown = False
55
  if "vector_db_message_shown" not in st.session_state:
56
  st.session_state.vector_db_message_shown = False
57
+ if "pending_query" not in st.session_state:
58
+ st.session_state.pending_query = False
59
 
60
  def count_tokens(text):
61
  return len(text.split())
 
77
  st.session_state.messages = []
78
  st.session_state.candidates_message_shown = False
79
  st.session_state.vector_db_message_shown = False
80
+ st.session_state.pending_query = False
81
  for filename in ["token.json", "data_chunks.pkl", "embeddings.pkl", "vector_store.index", "vector_database.pkl"]:
82
  if os.path.exists(filename):
83
  os.remove(filename)
 
206
  progress_bar.progress(min((idx + 1) / total, 1.0))
207
  st.session_state.data_chunks.extend(data_chunks)
208
  if not st.session_state.vector_db_message_shown:
209
+ st.success(f"πŸ“ Vector database loaded successfully from upload! Total emails processed for label '{label}': {len(data_chunks)}")
210
  st.session_state.vector_db_message_shown = True
211
  except Exception as e:
212
+ st.error(f"❌ Error creating chunks from Gmail for label '{label}': {e}")
213
 
214
  # -------------------------------
215
  # Cached model loaders for efficiency
 
248
  index = faiss.IndexFlatIP(dimension)
249
  index.add(embeddings)
250
  st.session_state.vector_store = index
251
+ if not st.session_state.candidates_message_shown:
252
+ st.success("βœ… Data embedding and vector store created successfully!")
253
+ st.session_state.candidates_message_shown = True
254
  except Exception as e:
255
  st.error(f"❌ Error during embedding: {e}")
256
 
 
293
  st.error("❌ Please process your email data or load a saved vector database first.")
294
  return
295
 
296
+ try:
297
+ embed_model, device = get_embed_model()
298
+ processed_query = preprocess_query(query)
299
+ query_embedding = embed_model.encode(
300
+ [processed_query],
301
+ convert_to_numpy=True,
302
+ show_progress_bar=False,
303
+ device=device
304
+ )
305
+ faiss.normalize_L2(query_embedding)
306
+
307
+ # Perform search
308
+ distances, indices = st.session_state.vector_store.search(query_embedding, TOP_K)
309
+ candidates = []
310
+ for idx, sim in zip(indices[0], distances[0]):
311
+ # Include candidate only if similarity meets the threshold
312
+ if sim >= similarity_threshold:
313
+ candidates.append((st.session_state.data_chunks[idx], sim))
314
+ if not candidates:
315
+ # Append warning message as assistant message
316
+ st.session_state.messages.append({"role": "assistant", "content": "⚠️ No matching embeddings found for your query with the selected threshold."})
317
+ return
318
+
319
+ # Build the context string by concatenating all matching email texts using HTML breaks.
320
+ context_str = ""
321
+ for candidate, sim in candidates:
322
+ context_str += combine_email_text(candidate) + "<br><br>"
323
+
324
+ # Optionally limit context size.
325
+ MAX_CONTEXT_TOKENS = 500
326
+ context_tokens = context_str.split()
327
+ if len(context_tokens) > MAX_CONTEXT_TOKENS:
328
+ context_str = " ".join(context_tokens[:MAX_CONTEXT_TOKENS])
329
+
330
+ st.session_state.candidate_context = context_str
331
+ st.session_state.raw_candidates = candidates
332
+ except Exception as e:
333
+ st.error(f"❌ An error occurred during processing: {e}")
334
 
335
  def call_llm_api(query):
336
  """
 
361
  "Content-Type": "application/json"
362
  }
363
 
364
+ try:
365
+ response = requests.post(url, headers=headers, json=payload)
366
+ response.raise_for_status() # Raises stored HTTPError, if one occurred.
367
+ response_json = response.json()
368
+ generated_text = response_json["choices"][0]["message"]["content"]
369
+ # Append AI response to chat messages
370
+ st.session_state.messages.append({"role": "assistant", "content": generated_text})
371
+ except requests.exceptions.HTTPError as http_err:
372
  try:
373
+ error_info = response.json().get("error", {})
374
+ error_message = error_info.get("message", "An unknown error occurred.")
375
+ st.session_state.messages.append({"role": "assistant", "content": f"❌ HTTP error occurred: {error_message}"})
376
+ except ValueError:
377
+ st.session_state.messages.append({"role": "assistant", "content": f"❌ HTTP error occurred: {response.status_code} - {response.text}"})
378
+ except Exception as err:
379
+ st.session_state.messages.append({"role": "assistant", "content": f"❌ An unexpected error occurred: {err}"})
 
 
 
 
 
 
 
 
380
 
381
  def handle_user_query():
382
  st.header("πŸ’¬ Let's Chat with Your Emails")
 
399
  if user_input:
400
  # Append user message to chat
401
  st.session_state.messages.append({"role": "user", "content": user_input})
402
+
403
+ # Append assistant "thinking" message
404
+ st.session_state.messages.append({"role": "assistant", "content": "πŸ’­ Processing your query..."})
405
+
406
+ # Display chat messages
407
+ for msg in st.session_state.messages:
408
+ if msg["role"] == "user":
409
+ with st.chat_message("user"):
410
+ st.markdown(msg["content"])
411
+ elif msg["role"] == "assistant":
412
+ with st.chat_message("assistant"):
413
+ st.markdown(msg["content"])
414
+
415
  # Process the query
416
  process_candidate_emails(user_input, similarity_threshold)
417
+
418
+ # If there's a candidate context, call the LLM API
419
  if st.session_state.candidate_context:
 
420
  call_llm_api(user_input)
421
+
422
+ # Display chat messages again with updated AI response
423
  for msg in st.session_state.messages:
424
  if msg["role"] == "user":
425
  with st.chat_message("user"):
 
427
  elif msg["role"] == "assistant":
428
  with st.chat_message("assistant"):
429
  st.markdown(msg["content"])
430
+
431
  # Display matching email chunks in an expander
432
  if st.session_state.raw_candidates:
433
  with st.expander("πŸ”Ž Matching Email Chunks:", expanded=False):