Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,9 +12,6 @@ from pyngrok import ngrok
|
|
| 12 |
from googleapiclient.discovery import build
|
| 13 |
from google_auth_oauthlib.flow import InstalledAppFlow
|
| 14 |
from google.auth.transport.requests import Request
|
| 15 |
-
import subprocess
|
| 16 |
-
import time
|
| 17 |
-
import sys
|
| 18 |
|
| 19 |
# ===============================
|
| 20 |
# 1. Streamlit App Configuration
|
|
@@ -22,7 +19,9 @@ import sys
|
|
| 22 |
st.set_page_config(page_title="📥 Email Chat Application", layout="wide")
|
| 23 |
st.title("💬 Turn Emails into Conversations—Effortless Chat with Your Inbox! 📩")
|
| 24 |
|
| 25 |
-
#
|
|
|
|
|
|
|
| 26 |
if "authenticated" not in st.session_state:
|
| 27 |
st.session_state.authenticated = False
|
| 28 |
if "creds" not in st.session_state:
|
|
@@ -50,11 +49,17 @@ if "raw_candidates" not in st.session_state:
|
|
| 50 |
if "messages" not in st.session_state:
|
| 51 |
st.session_state.messages = []
|
| 52 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
def count_tokens(text):
|
| 54 |
return len(text.split())
|
| 55 |
|
| 56 |
# ===============================
|
| 57 |
-
#
|
| 58 |
# ===============================
|
| 59 |
def reset_session_state():
|
| 60 |
st.session_state.authenticated = False
|
|
@@ -83,7 +88,7 @@ def authenticate_gmail(credentials_file):
|
|
| 83 |
if creds and creds.valid:
|
| 84 |
st.session_state.creds = creds
|
| 85 |
st.session_state.authenticated = True
|
| 86 |
-
if not st.session_state.
|
| 87 |
st.success("✅ Authentication successful!")
|
| 88 |
st.session_state.candidates_message_shown = True
|
| 89 |
return creds
|
|
@@ -95,7 +100,7 @@ def authenticate_gmail(credentials_file):
|
|
| 95 |
creds.refresh(Request())
|
| 96 |
st.session_state.creds = creds
|
| 97 |
st.session_state.authenticated = True
|
| 98 |
-
if not st.session_state.
|
| 99 |
st.success("✅ Authentication successful!")
|
| 100 |
st.session_state.candidates_message_shown = True
|
| 101 |
with open('token.json', 'w') as token_file:
|
|
@@ -115,7 +120,7 @@ def submit_auth_code():
|
|
| 115 |
st.session_state.flow.fetch_token(code=st.session_state.auth_code)
|
| 116 |
st.session_state.creds = st.session_state.flow.credentials
|
| 117 |
st.session_state.authenticated = True
|
| 118 |
-
if not st.session_state.
|
| 119 |
st.success("✅ Authentication successful!")
|
| 120 |
st.session_state.candidates_message_shown = True
|
| 121 |
with open('token.json', 'w') as token_file:
|
|
@@ -124,7 +129,7 @@ def submit_auth_code():
|
|
| 124 |
st.error(f"❌ Error during authentication: {e}")
|
| 125 |
|
| 126 |
# ===============================
|
| 127 |
-
#
|
| 128 |
# ===============================
|
| 129 |
def extract_email_body(payload):
|
| 130 |
if 'body' in payload and 'data' in payload['body'] and payload['body']['data']:
|
|
@@ -197,7 +202,7 @@ def create_chunks_from_gmail(service, label):
|
|
| 197 |
data_chunks.append(email_dict)
|
| 198 |
progress_bar.progress(min((idx + 1) / total, 1.0))
|
| 199 |
st.session_state.data_chunks.extend(data_chunks)
|
| 200 |
-
if not st.session_state.
|
| 201 |
st.success(f"✅ Data chunks created successfully from {label}! Total emails processed for this label: {len(data_chunks)}")
|
| 202 |
st.session_state.vector_db_message_shown = True
|
| 203 |
except Exception as e:
|
|
@@ -263,7 +268,7 @@ def save_vector_database():
|
|
| 263 |
st.error(f"❌ Error saving vector database: {e}")
|
| 264 |
|
| 265 |
# ===============================
|
| 266 |
-
#
|
| 267 |
# ===============================
|
| 268 |
def preprocess_query(query):
|
| 269 |
return query.lower().strip()
|
|
@@ -294,7 +299,7 @@ def process_candidate_emails(query, similarity_threshold):
|
|
| 294 |
device=device
|
| 295 |
)
|
| 296 |
faiss.normalize_L2(query_embedding)
|
| 297 |
-
|
| 298 |
# Perform search
|
| 299 |
distances, indices = st.session_state.vector_store.search(query_embedding, TOP_K)
|
| 300 |
candidates = []
|
|
@@ -319,8 +324,6 @@ def process_candidate_emails(query, similarity_threshold):
|
|
| 319 |
|
| 320 |
st.session_state.candidate_context = context_str
|
| 321 |
st.session_state.raw_candidates = candidates
|
| 322 |
-
# Append success message to chat messages
|
| 323 |
-
st.session_state.messages.append({"role": "system", "content": "✅ Candidates retrieved and context built!"})
|
| 324 |
except Exception as e:
|
| 325 |
st.error(f"❌ An error occurred during processing: {e}")
|
| 326 |
|
|
@@ -342,7 +345,7 @@ def call_llm_api(query):
|
|
| 342 |
payload = {
|
| 343 |
"model": "llama-3.3-70b-versatile", # Adjust model as needed.
|
| 344 |
"messages": [
|
| 345 |
-
{"role": "system", "content": f"Use the following context
|
| 346 |
{"role": "user", "content": query}
|
| 347 |
]
|
| 348 |
}
|
|
@@ -400,24 +403,37 @@ def handle_user_query():
|
|
| 400 |
# Send the query to the LLM API
|
| 401 |
call_llm_api(user_input)
|
| 402 |
|
| 403 |
-
#
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
st.
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
|
| 413 |
-
|
| 414 |
-
with st.
|
| 415 |
-
st.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 416 |
|
| 417 |
# ===============================
|
| 418 |
-
#
|
| 419 |
# ===============================
|
| 420 |
def main():
|
|
|
|
| 421 |
st.sidebar.header("🔒 Gmail Authentication")
|
| 422 |
credentials_file = st.sidebar.file_uploader("📁 Upload credentials.json", type=["json"])
|
| 423 |
|
|
@@ -439,7 +455,7 @@ def main():
|
|
| 439 |
st.session_state.vector_store = vector_db.get("vector_store")
|
| 440 |
st.session_state.embeddings = vector_db.get("embeddings")
|
| 441 |
st.session_state.data_chunks = vector_db.get("data_chunks")
|
| 442 |
-
if not st.session_state.
|
| 443 |
st.success("📁 Vector database loaded successfully from upload!")
|
| 444 |
st.session_state.vector_db_message_shown = True
|
| 445 |
except Exception as e:
|
|
|
|
| 12 |
from googleapiclient.discovery import build
|
| 13 |
from google_auth_oauthlib.flow import InstalledAppFlow
|
| 14 |
from google.auth.transport.requests import Request
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
# ===============================
|
| 17 |
# 1. Streamlit App Configuration
|
|
|
|
| 19 |
st.set_page_config(page_title="📥 Email Chat Application", layout="wide")
|
| 20 |
st.title("💬 Turn Emails into Conversations—Effortless Chat with Your Inbox! 📩")
|
| 21 |
|
| 22 |
+
# ===============================
|
| 23 |
+
# 2. Initialize Session State Variables
|
| 24 |
+
# ===============================
|
| 25 |
if "authenticated" not in st.session_state:
|
| 26 |
st.session_state.authenticated = False
|
| 27 |
if "creds" not in st.session_state:
|
|
|
|
| 49 |
if "messages" not in st.session_state:
|
| 50 |
st.session_state.messages = []
|
| 51 |
|
| 52 |
+
# Flags to ensure success messages are shown only once
|
| 53 |
+
if "candidates_message_shown" not in st.session_state:
|
| 54 |
+
st.session_state.candidates_message_shown = False
|
| 55 |
+
if "vector_db_message_shown" not in st.session_state:
|
| 56 |
+
st.session_state.vector_db_message_shown = False
|
| 57 |
+
|
| 58 |
def count_tokens(text):
|
| 59 |
return len(text.split())
|
| 60 |
|
| 61 |
# ===============================
|
| 62 |
+
# 3. Gmail Authentication Functions
|
| 63 |
# ===============================
|
| 64 |
def reset_session_state():
|
| 65 |
st.session_state.authenticated = False
|
|
|
|
| 88 |
if creds and creds.valid:
|
| 89 |
st.session_state.creds = creds
|
| 90 |
st.session_state.authenticated = True
|
| 91 |
+
if not st.session_state.candidates_message_shown:
|
| 92 |
st.success("✅ Authentication successful!")
|
| 93 |
st.session_state.candidates_message_shown = True
|
| 94 |
return creds
|
|
|
|
| 100 |
creds.refresh(Request())
|
| 101 |
st.session_state.creds = creds
|
| 102 |
st.session_state.authenticated = True
|
| 103 |
+
if not st.session_state.candidates_message_shown:
|
| 104 |
st.success("✅ Authentication successful!")
|
| 105 |
st.session_state.candidates_message_shown = True
|
| 106 |
with open('token.json', 'w') as token_file:
|
|
|
|
| 120 |
st.session_state.flow.fetch_token(code=st.session_state.auth_code)
|
| 121 |
st.session_state.creds = st.session_state.flow.credentials
|
| 122 |
st.session_state.authenticated = True
|
| 123 |
+
if not st.session_state.candidates_message_shown:
|
| 124 |
st.success("✅ Authentication successful!")
|
| 125 |
st.session_state.candidates_message_shown = True
|
| 126 |
with open('token.json', 'w') as token_file:
|
|
|
|
| 129 |
st.error(f"❌ Error during authentication: {e}")
|
| 130 |
|
| 131 |
# ===============================
|
| 132 |
+
# 4. Email Data Extraction, Embedding and Vector Store Functions
|
| 133 |
# ===============================
|
| 134 |
def extract_email_body(payload):
|
| 135 |
if 'body' in payload and 'data' in payload['body'] and payload['body']['data']:
|
|
|
|
| 202 |
data_chunks.append(email_dict)
|
| 203 |
progress_bar.progress(min((idx + 1) / total, 1.0))
|
| 204 |
st.session_state.data_chunks.extend(data_chunks)
|
| 205 |
+
if not st.session_state.vector_db_message_shown:
|
| 206 |
st.success(f"✅ Data chunks created successfully from {label}! Total emails processed for this label: {len(data_chunks)}")
|
| 207 |
st.session_state.vector_db_message_shown = True
|
| 208 |
except Exception as e:
|
|
|
|
| 268 |
st.error(f"❌ Error saving vector database: {e}")
|
| 269 |
|
| 270 |
# ===============================
|
| 271 |
+
# 5. Handling User Queries (User-Controlled Threshold)
|
| 272 |
# ===============================
|
| 273 |
def preprocess_query(query):
|
| 274 |
return query.lower().strip()
|
|
|
|
| 299 |
device=device
|
| 300 |
)
|
| 301 |
faiss.normalize_L2(query_embedding)
|
| 302 |
+
|
| 303 |
# Perform search
|
| 304 |
distances, indices = st.session_state.vector_store.search(query_embedding, TOP_K)
|
| 305 |
candidates = []
|
|
|
|
| 324 |
|
| 325 |
st.session_state.candidate_context = context_str
|
| 326 |
st.session_state.raw_candidates = candidates
|
|
|
|
|
|
|
| 327 |
except Exception as e:
|
| 328 |
st.error(f"❌ An error occurred during processing: {e}")
|
| 329 |
|
|
|
|
| 345 |
payload = {
|
| 346 |
"model": "llama-3.3-70b-versatile", # Adjust model as needed.
|
| 347 |
"messages": [
|
| 348 |
+
{"role": "system", "content": f"Use the following context:\n{st.session_state.candidate_context}"},
|
| 349 |
{"role": "user", "content": query}
|
| 350 |
]
|
| 351 |
}
|
|
|
|
| 403 |
# Send the query to the LLM API
|
| 404 |
call_llm_api(user_input)
|
| 405 |
|
| 406 |
+
# Display chat messages
|
| 407 |
+
for msg in st.session_state.messages:
|
| 408 |
+
if msg["role"] == "user":
|
| 409 |
+
with st.chat_message("user"):
|
| 410 |
+
st.markdown(msg["content"])
|
| 411 |
+
elif msg["role"] == "assistant":
|
| 412 |
+
with st.chat_message("assistant"):
|
| 413 |
+
st.markdown(msg["content"])
|
| 414 |
+
|
| 415 |
+
# Display matching email chunks in an expander
|
| 416 |
+
if st.session_state.raw_candidates:
|
| 417 |
+
with st.expander("🔎 Matching Email Chunks:", expanded=False):
|
| 418 |
+
for candidate, sim in st.session_state.raw_candidates:
|
| 419 |
+
# Get a snippet (first 150 characters) of the body instead of full body content.
|
| 420 |
+
body = candidate.get('body', 'No Content')
|
| 421 |
+
snippet = (body[:150] + "...") if len(body) > 150 else body
|
| 422 |
+
st.markdown(
|
| 423 |
+
f"**From:** {candidate.get('sender','Unknown')} <br>"
|
| 424 |
+
f"**To:** {candidate.get('to','Unknown')} <br>"
|
| 425 |
+
f"**Date:** {candidate.get('date','Unknown')} <br>"
|
| 426 |
+
f"**Subject:** {candidate.get('subject','No Subject')} <br>"
|
| 427 |
+
f"**Body Snippet:** {snippet} <br>"
|
| 428 |
+
f"**Similarity:** {sim:.4f}",
|
| 429 |
+
unsafe_allow_html=True
|
| 430 |
+
)
|
| 431 |
|
| 432 |
# ===============================
|
| 433 |
+
# 6. Main Application Logic
|
| 434 |
# ===============================
|
| 435 |
def main():
|
| 436 |
+
SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']
|
| 437 |
st.sidebar.header("🔒 Gmail Authentication")
|
| 438 |
credentials_file = st.sidebar.file_uploader("📁 Upload credentials.json", type=["json"])
|
| 439 |
|
|
|
|
| 455 |
st.session_state.vector_store = vector_db.get("vector_store")
|
| 456 |
st.session_state.embeddings = vector_db.get("embeddings")
|
| 457 |
st.session_state.data_chunks = vector_db.get("data_chunks")
|
| 458 |
+
if not st.session_state.vector_db_message_shown:
|
| 459 |
st.success("📁 Vector database loaded successfully from upload!")
|
| 460 |
st.session_state.vector_db_message_shown = True
|
| 461 |
except Exception as e:
|