Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -22,10 +22,7 @@ import sys
|
|
| 22 |
st.set_page_config(page_title="📥 Email Chat Application", layout="wide")
|
| 23 |
st.title("💬 Turn Emails into Conversations—Effortless Chat with Your Inbox! 📩")
|
| 24 |
|
| 25 |
-
#
|
| 26 |
-
# 2. Gmail Authentication Configuration
|
| 27 |
-
# ===============================
|
| 28 |
-
SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']
|
| 29 |
if "authenticated" not in st.session_state:
|
| 30 |
st.session_state.authenticated = False
|
| 31 |
if "creds" not in st.session_state:
|
|
@@ -49,11 +46,15 @@ if "candidate_context" not in st.session_state:
|
|
| 49 |
if "raw_candidates" not in st.session_state:
|
| 50 |
st.session_state.raw_candidates = None
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
def count_tokens(text):
|
| 53 |
return len(text.split())
|
| 54 |
|
| 55 |
# ===============================
|
| 56 |
-
#
|
| 57 |
# ===============================
|
| 58 |
def reset_session_state():
|
| 59 |
st.session_state.authenticated = False
|
|
@@ -66,6 +67,9 @@ def reset_session_state():
|
|
| 66 |
st.session_state.vector_store = None
|
| 67 |
st.session_state.candidate_context = None
|
| 68 |
st.session_state.raw_candidates = None
|
|
|
|
|
|
|
|
|
|
| 69 |
for filename in ["token.json", "data_chunks.pkl", "embeddings.pkl", "vector_store.index", "vector_database.pkl"]:
|
| 70 |
if os.path.exists(filename):
|
| 71 |
os.remove(filename)
|
|
@@ -79,7 +83,9 @@ def authenticate_gmail(credentials_file):
|
|
| 79 |
if creds and creds.valid:
|
| 80 |
st.session_state.creds = creds
|
| 81 |
st.session_state.authenticated = True
|
| 82 |
-
st.
|
|
|
|
|
|
|
| 83 |
return creds
|
| 84 |
except Exception as e:
|
| 85 |
st.error(f"❌ Invalid token.json file: {e}")
|
|
@@ -89,7 +95,9 @@ def authenticate_gmail(credentials_file):
|
|
| 89 |
creds.refresh(Request())
|
| 90 |
st.session_state.creds = creds
|
| 91 |
st.session_state.authenticated = True
|
| 92 |
-
st.
|
|
|
|
|
|
|
| 93 |
with open('token.json', 'w') as token_file:
|
| 94 |
token_file.write(creds.to_json())
|
| 95 |
return creds
|
|
@@ -107,14 +115,16 @@ def submit_auth_code():
|
|
| 107 |
st.session_state.flow.fetch_token(code=st.session_state.auth_code)
|
| 108 |
st.session_state.creds = st.session_state.flow.credentials
|
| 109 |
st.session_state.authenticated = True
|
|
|
|
|
|
|
|
|
|
| 110 |
with open('token.json', 'w') as token_file:
|
| 111 |
token_file.write(st.session_state.creds.to_json())
|
| 112 |
-
st.success("✅ Authentication successful!")
|
| 113 |
except Exception as e:
|
| 114 |
st.error(f"❌ Error during authentication: {e}")
|
| 115 |
|
| 116 |
# ===============================
|
| 117 |
-
#
|
| 118 |
# ===============================
|
| 119 |
def extract_email_body(payload):
|
| 120 |
if 'body' in payload and 'data' in payload['body'] and payload['body']['data']:
|
|
@@ -187,7 +197,9 @@ def create_chunks_from_gmail(service, label):
|
|
| 187 |
data_chunks.append(email_dict)
|
| 188 |
progress_bar.progress(min((idx + 1) / total, 1.0))
|
| 189 |
st.session_state.data_chunks.extend(data_chunks)
|
| 190 |
-
st.
|
|
|
|
|
|
|
| 191 |
except Exception as e:
|
| 192 |
st.error(f"❌ Error creating chunks from Gmail for label {label}: {e}")
|
| 193 |
|
|
@@ -251,7 +263,7 @@ def save_vector_database():
|
|
| 251 |
st.error(f"❌ Error saving vector database: {e}")
|
| 252 |
|
| 253 |
# ===============================
|
| 254 |
-
#
|
| 255 |
# ===============================
|
| 256 |
def preprocess_query(query):
|
| 257 |
return query.lower().strip()
|
|
@@ -283,9 +295,6 @@ def process_candidate_emails(query, similarity_threshold):
|
|
| 283 |
)
|
| 284 |
faiss.normalize_L2(query_embedding)
|
| 285 |
|
| 286 |
-
# Debug: Verify the type of vector_store
|
| 287 |
-
# st.write(f"Vector Store Type: {type(st.session_state.vector_store)}")
|
| 288 |
-
|
| 289 |
# Perform search
|
| 290 |
distances, indices = st.session_state.vector_store.search(query_embedding, TOP_K)
|
| 291 |
candidates = []
|
|
@@ -310,7 +319,8 @@ def process_candidate_emails(query, similarity_threshold):
|
|
| 310 |
|
| 311 |
st.session_state.candidate_context = context_str
|
| 312 |
st.session_state.raw_candidates = candidates
|
| 313 |
-
|
|
|
|
| 314 |
except Exception as e:
|
| 315 |
st.error(f"❌ An error occurred during processing: {e}")
|
| 316 |
|
|
@@ -349,8 +359,8 @@ def call_llm_api(query):
|
|
| 349 |
response.raise_for_status() # Raises stored HTTPError, if one occurred.
|
| 350 |
response_json = response.json()
|
| 351 |
generated_text = response_json["choices"][0]["message"]["content"]
|
| 352 |
-
|
| 353 |
-
st.
|
| 354 |
except requests.exceptions.HTTPError as http_err:
|
| 355 |
try:
|
| 356 |
error_info = response.json().get("error", {})
|
|
@@ -376,37 +386,36 @@ def handle_user_query():
|
|
| 376 |
key='similarity_threshold'
|
| 377 |
)
|
| 378 |
|
| 379 |
-
#
|
| 380 |
-
|
| 381 |
-
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
st.text_input("Enter your query:", key="query_input", on_change=query_callback)
|
| 407 |
|
| 408 |
# ===============================
|
| 409 |
-
#
|
| 410 |
# ===============================
|
| 411 |
def main():
|
| 412 |
st.sidebar.header("🔒 Gmail Authentication")
|
|
@@ -430,7 +439,9 @@ def main():
|
|
| 430 |
st.session_state.vector_store = vector_db.get("vector_store")
|
| 431 |
st.session_state.embeddings = vector_db.get("embeddings")
|
| 432 |
st.session_state.data_chunks = vector_db.get("data_chunks")
|
| 433 |
-
st.
|
|
|
|
|
|
|
| 434 |
except Exception as e:
|
| 435 |
st.error(f"❌ Error loading vector database: {e}")
|
| 436 |
elif data_management_option == "Authenticate and Create New Data":
|
|
|
|
| 22 |
st.set_page_config(page_title="📥 Email Chat Application", layout="wide")
|
| 23 |
st.title("💬 Turn Emails into Conversations—Effortless Chat with Your Inbox! 📩")
|
| 24 |
|
| 25 |
+
# Initialize session state variables
|
|
|
|
|
|
|
|
|
|
| 26 |
if "authenticated" not in st.session_state:
|
| 27 |
st.session_state.authenticated = False
|
| 28 |
if "creds" not in st.session_state:
|
|
|
|
| 46 |
if "raw_candidates" not in st.session_state:
|
| 47 |
st.session_state.raw_candidates = None
|
| 48 |
|
| 49 |
+
# Initialize chat messages
|
| 50 |
+
if "messages" not in st.session_state:
|
| 51 |
+
st.session_state.messages = []
|
| 52 |
+
|
| 53 |
def count_tokens(text):
|
| 54 |
return len(text.split())
|
| 55 |
|
| 56 |
# ===============================
|
| 57 |
+
# 2. Gmail Authentication Functions
|
| 58 |
# ===============================
|
| 59 |
def reset_session_state():
|
| 60 |
st.session_state.authenticated = False
|
|
|
|
| 67 |
st.session_state.vector_store = None
|
| 68 |
st.session_state.candidate_context = None
|
| 69 |
st.session_state.raw_candidates = None
|
| 70 |
+
st.session_state.messages = []
|
| 71 |
+
st.session_state.candidates_message_shown = False
|
| 72 |
+
st.session_state.vector_db_message_shown = False
|
| 73 |
for filename in ["token.json", "data_chunks.pkl", "embeddings.pkl", "vector_store.index", "vector_database.pkl"]:
|
| 74 |
if os.path.exists(filename):
|
| 75 |
os.remove(filename)
|
|
|
|
| 83 |
if creds and creds.valid:
|
| 84 |
st.session_state.creds = creds
|
| 85 |
st.session_state.authenticated = True
|
| 86 |
+
if not st.session_state.get('candidates_message_shown', False):
|
| 87 |
+
st.success("✅ Authentication successful!")
|
| 88 |
+
st.session_state.candidates_message_shown = True
|
| 89 |
return creds
|
| 90 |
except Exception as e:
|
| 91 |
st.error(f"❌ Invalid token.json file: {e}")
|
|
|
|
| 95 |
creds.refresh(Request())
|
| 96 |
st.session_state.creds = creds
|
| 97 |
st.session_state.authenticated = True
|
| 98 |
+
if not st.session_state.get('candidates_message_shown', False):
|
| 99 |
+
st.success("✅ Authentication successful!")
|
| 100 |
+
st.session_state.candidates_message_shown = True
|
| 101 |
with open('token.json', 'w') as token_file:
|
| 102 |
token_file.write(creds.to_json())
|
| 103 |
return creds
|
|
|
|
| 115 |
st.session_state.flow.fetch_token(code=st.session_state.auth_code)
|
| 116 |
st.session_state.creds = st.session_state.flow.credentials
|
| 117 |
st.session_state.authenticated = True
|
| 118 |
+
if not st.session_state.get('candidates_message_shown', False):
|
| 119 |
+
st.success("✅ Authentication successful!")
|
| 120 |
+
st.session_state.candidates_message_shown = True
|
| 121 |
with open('token.json', 'w') as token_file:
|
| 122 |
token_file.write(st.session_state.creds.to_json())
|
|
|
|
| 123 |
except Exception as e:
|
| 124 |
st.error(f"❌ Error during authentication: {e}")
|
| 125 |
|
| 126 |
# ===============================
|
| 127 |
+
# 3. Email Data Extraction, Embedding and Vector Store Functions
|
| 128 |
# ===============================
|
| 129 |
def extract_email_body(payload):
|
| 130 |
if 'body' in payload and 'data' in payload['body'] and payload['body']['data']:
|
|
|
|
| 197 |
data_chunks.append(email_dict)
|
| 198 |
progress_bar.progress(min((idx + 1) / total, 1.0))
|
| 199 |
st.session_state.data_chunks.extend(data_chunks)
|
| 200 |
+
if not st.session_state.get('vector_db_message_shown', False):
|
| 201 |
+
st.success(f"✅ Data chunks created successfully from {label}! Total emails processed for this label: {len(data_chunks)}")
|
| 202 |
+
st.session_state.vector_db_message_shown = True
|
| 203 |
except Exception as e:
|
| 204 |
st.error(f"❌ Error creating chunks from Gmail for label {label}: {e}")
|
| 205 |
|
|
|
|
| 263 |
st.error(f"❌ Error saving vector database: {e}")
|
| 264 |
|
| 265 |
# ===============================
|
| 266 |
+
# 4. Handling User Queries (User-Controlled Threshold)
|
| 267 |
# ===============================
|
| 268 |
def preprocess_query(query):
|
| 269 |
return query.lower().strip()
|
|
|
|
| 295 |
)
|
| 296 |
faiss.normalize_L2(query_embedding)
|
| 297 |
|
|
|
|
|
|
|
|
|
|
| 298 |
# Perform search
|
| 299 |
distances, indices = st.session_state.vector_store.search(query_embedding, TOP_K)
|
| 300 |
candidates = []
|
|
|
|
| 319 |
|
| 320 |
st.session_state.candidate_context = context_str
|
| 321 |
st.session_state.raw_candidates = candidates
|
| 322 |
+
# Append success message to chat messages
|
| 323 |
+
st.session_state.messages.append({"role": "system", "content": "✅ Candidates retrieved and context built!"})
|
| 324 |
except Exception as e:
|
| 325 |
st.error(f"❌ An error occurred during processing: {e}")
|
| 326 |
|
|
|
|
| 359 |
response.raise_for_status() # Raises stored HTTPError, if one occurred.
|
| 360 |
response_json = response.json()
|
| 361 |
generated_text = response_json["choices"][0]["message"]["content"]
|
| 362 |
+
# Append AI response to chat messages
|
| 363 |
+
st.session_state.messages.append({"role": "assistant", "content": generated_text})
|
| 364 |
except requests.exceptions.HTTPError as http_err:
|
| 365 |
try:
|
| 366 |
error_info = response.json().get("error", {})
|
|
|
|
| 386 |
key='similarity_threshold'
|
| 387 |
)
|
| 388 |
|
| 389 |
+
# Chat input for user queries
|
| 390 |
+
user_input = st.chat_input("Enter your query:")
|
| 391 |
+
|
| 392 |
+
if user_input:
|
| 393 |
+
# Append user message to chat
|
| 394 |
+
st.session_state.messages.append({"role": "user", "content": user_input})
|
| 395 |
+
|
| 396 |
+
# Process the query
|
| 397 |
+
process_candidate_emails(user_input, similarity_threshold)
|
| 398 |
+
|
| 399 |
+
if st.session_state.candidate_context:
|
| 400 |
+
# Send the query to the LLM API
|
| 401 |
+
call_llm_api(user_input)
|
| 402 |
+
|
| 403 |
+
# Clear the input field is handled automatically by st.chat_input
|
| 404 |
+
|
| 405 |
+
# Display chat messages
|
| 406 |
+
for msg in st.session_state.messages:
|
| 407 |
+
if msg["role"] == "user":
|
| 408 |
+
with st.chat_message("user"):
|
| 409 |
+
st.markdown(msg["content"])
|
| 410 |
+
elif msg["role"] == "assistant":
|
| 411 |
+
with st.chat_message("assistant"):
|
| 412 |
+
st.markdown(msg["content"])
|
| 413 |
+
elif msg["role"] == "system":
|
| 414 |
+
with st.chat_message("system"):
|
| 415 |
+
st.markdown(msg["content"])
|
|
|
|
| 416 |
|
| 417 |
# ===============================
|
| 418 |
+
# 5. Main Application Logic
|
| 419 |
# ===============================
|
| 420 |
def main():
|
| 421 |
st.sidebar.header("🔒 Gmail Authentication")
|
|
|
|
| 439 |
st.session_state.vector_store = vector_db.get("vector_store")
|
| 440 |
st.session_state.embeddings = vector_db.get("embeddings")
|
| 441 |
st.session_state.data_chunks = vector_db.get("data_chunks")
|
| 442 |
+
if not st.session_state.get('vector_db_message_shown', False):
|
| 443 |
+
st.success("📁 Vector database loaded successfully from upload!")
|
| 444 |
+
st.session_state.vector_db_message_shown = True
|
| 445 |
except Exception as e:
|
| 446 |
st.error(f"❌ Error loading vector database: {e}")
|
| 447 |
elif data_management_option == "Authenticate and Create New Data":
|