Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -54,6 +54,8 @@ if "candidates_message_shown" not in st.session_state:
|
|
| 54 |
st.session_state.candidates_message_shown = False
|
| 55 |
if "vector_db_message_shown" not in st.session_state:
|
| 56 |
st.session_state.vector_db_message_shown = False
|
|
|
|
|
|
|
| 57 |
|
| 58 |
def count_tokens(text):
|
| 59 |
return len(text.split())
|
|
@@ -75,6 +77,7 @@ def reset_session_state():
|
|
| 75 |
st.session_state.messages = []
|
| 76 |
st.session_state.candidates_message_shown = False
|
| 77 |
st.session_state.vector_db_message_shown = False
|
|
|
|
| 78 |
for filename in ["token.json", "data_chunks.pkl", "embeddings.pkl", "vector_store.index", "vector_database.pkl"]:
|
| 79 |
if os.path.exists(filename):
|
| 80 |
os.remove(filename)
|
|
@@ -203,10 +206,10 @@ def create_chunks_from_gmail(service, label):
|
|
| 203 |
progress_bar.progress(min((idx + 1) / total, 1.0))
|
| 204 |
st.session_state.data_chunks.extend(data_chunks)
|
| 205 |
if not st.session_state.vector_db_message_shown:
|
| 206 |
-
st.success(f"
|
| 207 |
st.session_state.vector_db_message_shown = True
|
| 208 |
except Exception as e:
|
| 209 |
-
st.error(f"β Error creating chunks from Gmail for label {label}: {e}")
|
| 210 |
|
| 211 |
# -------------------------------
|
| 212 |
# Cached model loaders for efficiency
|
|
@@ -245,7 +248,9 @@ def embed_emails(email_chunks):
|
|
| 245 |
index = faiss.IndexFlatIP(dimension)
|
| 246 |
index.add(embeddings)
|
| 247 |
st.session_state.vector_store = index
|
| 248 |
-
st.
|
|
|
|
|
|
|
| 249 |
except Exception as e:
|
| 250 |
st.error(f"β Error during embedding: {e}")
|
| 251 |
|
|
@@ -288,44 +293,44 @@ def process_candidate_emails(query, similarity_threshold):
|
|
| 288 |
st.error("β Please process your email data or load a saved vector database first.")
|
| 289 |
return
|
| 290 |
|
| 291 |
-
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
|
| 314 |
-
|
| 315 |
-
|
| 316 |
-
|
| 317 |
-
|
| 318 |
-
|
| 319 |
-
|
| 320 |
-
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
|
| 324 |
-
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
|
| 328 |
-
|
| 329 |
|
| 330 |
def call_llm_api(query):
|
| 331 |
"""
|
|
@@ -356,23 +361,22 @@ def call_llm_api(query):
|
|
| 356 |
"Content-Type": "application/json"
|
| 357 |
}
|
| 358 |
|
| 359 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 360 |
try:
|
| 361 |
-
|
| 362 |
-
|
| 363 |
-
|
| 364 |
-
|
| 365 |
-
|
| 366 |
-
|
| 367 |
-
|
| 368 |
-
try:
|
| 369 |
-
error_info = response.json().get("error", {})
|
| 370 |
-
error_message = error_info.get("message", "An unknown error occurred.")
|
| 371 |
-
st.error(f"β HTTP error occurred: {error_message}")
|
| 372 |
-
except ValueError:
|
| 373 |
-
st.error(f"β HTTP error occurred: {response.status_code} - {response.text}")
|
| 374 |
-
except Exception as err:
|
| 375 |
-
st.error(f"β An unexpected error occurred: {err}")
|
| 376 |
|
| 377 |
def handle_user_query():
|
| 378 |
st.header("π¬ Let's Chat with Your Emails")
|
|
@@ -395,15 +399,27 @@ def handle_user_query():
|
|
| 395 |
if user_input:
|
| 396 |
# Append user message to chat
|
| 397 |
st.session_state.messages.append({"role": "user", "content": user_input})
|
| 398 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 399 |
# Process the query
|
| 400 |
process_candidate_emails(user_input, similarity_threshold)
|
| 401 |
-
|
|
|
|
| 402 |
if st.session_state.candidate_context:
|
| 403 |
-
# Send the query to the LLM API
|
| 404 |
call_llm_api(user_input)
|
| 405 |
-
|
| 406 |
-
# Display chat messages
|
| 407 |
for msg in st.session_state.messages:
|
| 408 |
if msg["role"] == "user":
|
| 409 |
with st.chat_message("user"):
|
|
@@ -411,7 +427,7 @@ def handle_user_query():
|
|
| 411 |
elif msg["role"] == "assistant":
|
| 412 |
with st.chat_message("assistant"):
|
| 413 |
st.markdown(msg["content"])
|
| 414 |
-
|
| 415 |
# Display matching email chunks in an expander
|
| 416 |
if st.session_state.raw_candidates:
|
| 417 |
with st.expander("π Matching Email Chunks:", expanded=False):
|
|
|
|
| 54 |
st.session_state.candidates_message_shown = False
|
| 55 |
if "vector_db_message_shown" not in st.session_state:
|
| 56 |
st.session_state.vector_db_message_shown = False
|
| 57 |
+
if "pending_query" not in st.session_state:
|
| 58 |
+
st.session_state.pending_query = False
|
| 59 |
|
| 60 |
def count_tokens(text):
|
| 61 |
return len(text.split())
|
|
|
|
| 77 |
st.session_state.messages = []
|
| 78 |
st.session_state.candidates_message_shown = False
|
| 79 |
st.session_state.vector_db_message_shown = False
|
| 80 |
+
st.session_state.pending_query = False
|
| 81 |
for filename in ["token.json", "data_chunks.pkl", "embeddings.pkl", "vector_store.index", "vector_database.pkl"]:
|
| 82 |
if os.path.exists(filename):
|
| 83 |
os.remove(filename)
|
|
|
|
| 206 |
progress_bar.progress(min((idx + 1) / total, 1.0))
|
| 207 |
st.session_state.data_chunks.extend(data_chunks)
|
| 208 |
if not st.session_state.vector_db_message_shown:
|
| 209 |
+
st.success(f"π Vector database loaded successfully from upload! Total emails processed for label '{label}': {len(data_chunks)}")
|
| 210 |
st.session_state.vector_db_message_shown = True
|
| 211 |
except Exception as e:
|
| 212 |
+
st.error(f"β Error creating chunks from Gmail for label '{label}': {e}")
|
| 213 |
|
| 214 |
# -------------------------------
|
| 215 |
# Cached model loaders for efficiency
|
|
|
|
| 248 |
index = faiss.IndexFlatIP(dimension)
|
| 249 |
index.add(embeddings)
|
| 250 |
st.session_state.vector_store = index
|
| 251 |
+
if not st.session_state.candidates_message_shown:
|
| 252 |
+
st.success("β
Data embedding and vector store created successfully!")
|
| 253 |
+
st.session_state.candidates_message_shown = True
|
| 254 |
except Exception as e:
|
| 255 |
st.error(f"β Error during embedding: {e}")
|
| 256 |
|
|
|
|
| 293 |
st.error("β Please process your email data or load a saved vector database first.")
|
| 294 |
return
|
| 295 |
|
| 296 |
+
try:
|
| 297 |
+
embed_model, device = get_embed_model()
|
| 298 |
+
processed_query = preprocess_query(query)
|
| 299 |
+
query_embedding = embed_model.encode(
|
| 300 |
+
[processed_query],
|
| 301 |
+
convert_to_numpy=True,
|
| 302 |
+
show_progress_bar=False,
|
| 303 |
+
device=device
|
| 304 |
+
)
|
| 305 |
+
faiss.normalize_L2(query_embedding)
|
| 306 |
+
|
| 307 |
+
# Perform search
|
| 308 |
+
distances, indices = st.session_state.vector_store.search(query_embedding, TOP_K)
|
| 309 |
+
candidates = []
|
| 310 |
+
for idx, sim in zip(indices[0], distances[0]):
|
| 311 |
+
# Include candidate only if similarity meets the threshold
|
| 312 |
+
if sim >= similarity_threshold:
|
| 313 |
+
candidates.append((st.session_state.data_chunks[idx], sim))
|
| 314 |
+
if not candidates:
|
| 315 |
+
# Append warning message as assistant message
|
| 316 |
+
st.session_state.messages.append({"role": "assistant", "content": "β οΈ No matching embeddings found for your query with the selected threshold."})
|
| 317 |
+
return
|
| 318 |
+
|
| 319 |
+
# Build the context string by concatenating all matching email texts using HTML breaks.
|
| 320 |
+
context_str = ""
|
| 321 |
+
for candidate, sim in candidates:
|
| 322 |
+
context_str += combine_email_text(candidate) + "<br><br>"
|
| 323 |
+
|
| 324 |
+
# Optionally limit context size.
|
| 325 |
+
MAX_CONTEXT_TOKENS = 500
|
| 326 |
+
context_tokens = context_str.split()
|
| 327 |
+
if len(context_tokens) > MAX_CONTEXT_TOKENS:
|
| 328 |
+
context_str = " ".join(context_tokens[:MAX_CONTEXT_TOKENS])
|
| 329 |
+
|
| 330 |
+
st.session_state.candidate_context = context_str
|
| 331 |
+
st.session_state.raw_candidates = candidates
|
| 332 |
+
except Exception as e:
|
| 333 |
+
st.error(f"β An error occurred during processing: {e}")
|
| 334 |
|
| 335 |
def call_llm_api(query):
|
| 336 |
"""
|
|
|
|
| 361 |
"Content-Type": "application/json"
|
| 362 |
}
|
| 363 |
|
| 364 |
+
try:
|
| 365 |
+
response = requests.post(url, headers=headers, json=payload)
|
| 366 |
+
response.raise_for_status() # Raises stored HTTPError, if one occurred.
|
| 367 |
+
response_json = response.json()
|
| 368 |
+
generated_text = response_json["choices"][0]["message"]["content"]
|
| 369 |
+
# Append AI response to chat messages
|
| 370 |
+
st.session_state.messages.append({"role": "assistant", "content": generated_text})
|
| 371 |
+
except requests.exceptions.HTTPError as http_err:
|
| 372 |
try:
|
| 373 |
+
error_info = response.json().get("error", {})
|
| 374 |
+
error_message = error_info.get("message", "An unknown error occurred.")
|
| 375 |
+
st.session_state.messages.append({"role": "assistant", "content": f"β HTTP error occurred: {error_message}"})
|
| 376 |
+
except ValueError:
|
| 377 |
+
st.session_state.messages.append({"role": "assistant", "content": f"β HTTP error occurred: {response.status_code} - {response.text}"})
|
| 378 |
+
except Exception as err:
|
| 379 |
+
st.session_state.messages.append({"role": "assistant", "content": f"β An unexpected error occurred: {err}"})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 380 |
|
| 381 |
def handle_user_query():
|
| 382 |
st.header("π¬ Let's Chat with Your Emails")
|
|
|
|
| 399 |
if user_input:
|
| 400 |
# Append user message to chat
|
| 401 |
st.session_state.messages.append({"role": "user", "content": user_input})
|
| 402 |
+
|
| 403 |
+
# Append assistant "thinking" message
|
| 404 |
+
st.session_state.messages.append({"role": "assistant", "content": "π Processing your query..."})
|
| 405 |
+
|
| 406 |
+
# Display chat messages
|
| 407 |
+
for msg in st.session_state.messages:
|
| 408 |
+
if msg["role"] == "user":
|
| 409 |
+
with st.chat_message("user"):
|
| 410 |
+
st.markdown(msg["content"])
|
| 411 |
+
elif msg["role"] == "assistant":
|
| 412 |
+
with st.chat_message("assistant"):
|
| 413 |
+
st.markdown(msg["content"])
|
| 414 |
+
|
| 415 |
# Process the query
|
| 416 |
process_candidate_emails(user_input, similarity_threshold)
|
| 417 |
+
|
| 418 |
+
# If there's a candidate context, call the LLM API
|
| 419 |
if st.session_state.candidate_context:
|
|
|
|
| 420 |
call_llm_api(user_input)
|
| 421 |
+
|
| 422 |
+
# Display chat messages again with updated AI response
|
| 423 |
for msg in st.session_state.messages:
|
| 424 |
if msg["role"] == "user":
|
| 425 |
with st.chat_message("user"):
|
|
|
|
| 427 |
elif msg["role"] == "assistant":
|
| 428 |
with st.chat_message("assistant"):
|
| 429 |
st.markdown(msg["content"])
|
| 430 |
+
|
| 431 |
# Display matching email chunks in an expander
|
| 432 |
if st.session_state.raw_candidates:
|
| 433 |
with st.expander("π Matching Email Chunks:", expanded=False):
|