Spaces:

wahab5763
/

EmalDataExtractor

Sleeping

App Files Files Community

wahab5763 commited on Jan 24, 2025

Commit

872e099

verified ·

1 Parent(s): 71416c7

Update app.py

Browse files

Files changed (1) hide show

app.py +46 -30

app.py CHANGED Viewed

@@ -12,9 +12,6 @@ from pyngrok import ngrok
 from googleapiclient.discovery import build
 from google_auth_oauthlib.flow import InstalledAppFlow
 from google.auth.transport.requests import Request
-import subprocess
-import time
-import sys
 # ===============================
 # 1. Streamlit App Configuration
@@ -22,7 +19,9 @@ import sys
 st.set_page_config(page_title="📥 Email Chat Application", layout="wide")
 st.title("💬 Turn Emails into Conversations—Effortless Chat with Your Inbox! 📩")
-# Initialize session state variables
 if "authenticated" not in st.session_state:
     st.session_state.authenticated = False
 if "creds" not in st.session_state:
@@ -50,11 +49,17 @@ if "raw_candidates" not in st.session_state:
 if "messages" not in st.session_state:
     st.session_state.messages = []
 def count_tokens(text):
     return len(text.split())
 # ===============================
-# 2. Gmail Authentication Functions
 # ===============================
 def reset_session_state():
     st.session_state.authenticated = False
@@ -83,7 +88,7 @@ def authenticate_gmail(credentials_file):
             if creds and creds.valid:
                 st.session_state.creds = creds
                 st.session_state.authenticated = True
-                if not st.session_state.get('candidates_message_shown', False):
                     st.success("✅ Authentication successful!")
                     st.session_state.candidates_message_shown = True
                 return creds
@@ -95,7 +100,7 @@ def authenticate_gmail(credentials_file):
             creds.refresh(Request())
             st.session_state.creds = creds
             st.session_state.authenticated = True
-            if not st.session_state.get('candidates_message_shown', False):
                 st.success("✅ Authentication successful!")
                 st.session_state.candidates_message_shown = True
             with open('token.json', 'w') as token_file:
@@ -115,7 +120,7 @@ def submit_auth_code():
         st.session_state.flow.fetch_token(code=st.session_state.auth_code)
         st.session_state.creds = st.session_state.flow.credentials
         st.session_state.authenticated = True
-        if not st.session_state.get('candidates_message_shown', False):
             st.success("✅ Authentication successful!")
             st.session_state.candidates_message_shown = True
         with open('token.json', 'w') as token_file:
@@ -124,7 +129,7 @@ def submit_auth_code():
         st.error(f"❌ Error during authentication: {e}")
 # ===============================
-# 3. Email Data Extraction, Embedding and Vector Store Functions
 # ===============================
 def extract_email_body(payload):
     if 'body' in payload and 'data' in payload['body'] and payload['body']['data']:
@@ -197,7 +202,7 @@ def create_chunks_from_gmail(service, label):
             data_chunks.append(email_dict)
             progress_bar.progress(min((idx + 1) / total, 1.0))
         st.session_state.data_chunks.extend(data_chunks)
-        if not st.session_state.get('vector_db_message_shown', False):
             st.success(f"✅ Data chunks created successfully from {label}! Total emails processed for this label: {len(data_chunks)}")
             st.session_state.vector_db_message_shown = True
     except Exception as e:
@@ -263,7 +268,7 @@ def save_vector_database():
         st.error(f"❌ Error saving vector database: {e}")
 # ===============================
-# 4. Handling User Queries (User-Controlled Threshold)
 # ===============================
 def preprocess_query(query):
     return query.lower().strip()
@@ -294,7 +299,7 @@ def process_candidate_emails(query, similarity_threshold):
                 device=device
             )
             faiss.normalize_L2(query_embedding)
             # Perform search
             distances, indices = st.session_state.vector_store.search(query_embedding, TOP_K)
             candidates = []
@@ -319,8 +324,6 @@ def process_candidate_emails(query, similarity_threshold):
             st.session_state.candidate_context = context_str
             st.session_state.raw_candidates = candidates
-            # Append success message to chat messages
-            st.session_state.messages.append({"role": "system", "content": "✅ Candidates retrieved and context built!"})
         except Exception as e:
             st.error(f"❌ An error occurred during processing: {e}")
@@ -342,7 +345,7 @@ def call_llm_api(query):
     payload = {
         "model": "llama-3.3-70b-versatile",  # Adjust model as needed.
         "messages": [
-            {"role": "system", "content": f"Use the following context:<br>{st.session_state.candidate_context}"},
             {"role": "user", "content": query}
         ]
     }
@@ -400,24 +403,37 @@ def handle_user_query():
             # Send the query to the LLM API
             call_llm_api(user_input)
-        # Clear the input field is handled automatically by st.chat_input
-    # Display chat messages
-    for msg in st.session_state.messages:
-        if msg["role"] == "user":
-            with st.chat_message("user"):
-                st.markdown(msg["content"])
-        elif msg["role"] == "assistant":
-            with st.chat_message("assistant"):
-                st.markdown(msg["content"])
-        elif msg["role"] == "system":
-            with st.chat_message("system"):
-                st.markdown(msg["content"])
 # ===============================
-# 5. Main Application Logic
 # ===============================
 def main():
     st.sidebar.header("🔒 Gmail Authentication")
     credentials_file = st.sidebar.file_uploader("📁 Upload credentials.json", type=["json"])
@@ -439,7 +455,7 @@ def main():
                 st.session_state.vector_store = vector_db.get("vector_store")
                 st.session_state.embeddings = vector_db.get("embeddings")
                 st.session_state.data_chunks = vector_db.get("data_chunks")
-                if not st.session_state.get('vector_db_message_shown', False):
                     st.success("📁 Vector database loaded successfully from upload!")
                     st.session_state.vector_db_message_shown = True
             except Exception as e:

 from googleapiclient.discovery import build
 from google_auth_oauthlib.flow import InstalledAppFlow
 from google.auth.transport.requests import Request
 # ===============================
 # 1. Streamlit App Configuration
 st.set_page_config(page_title="📥 Email Chat Application", layout="wide")
 st.title("💬 Turn Emails into Conversations—Effortless Chat with Your Inbox! 📩")
+# ===============================
+# 2. Initialize Session State Variables
+# ===============================
 if "authenticated" not in st.session_state:
     st.session_state.authenticated = False
 if "creds" not in st.session_state:
 if "messages" not in st.session_state:
     st.session_state.messages = []
+# Flags to ensure success messages are shown only once
+if "candidates_message_shown" not in st.session_state:
+    st.session_state.candidates_message_shown = False
+if "vector_db_message_shown" not in st.session_state:
+    st.session_state.vector_db_message_shown = False
 def count_tokens(text):
     return len(text.split())
 # ===============================
+# 3. Gmail Authentication Functions
 # ===============================
 def reset_session_state():
     st.session_state.authenticated = False
             if creds and creds.valid:
                 st.session_state.creds = creds
                 st.session_state.authenticated = True
+                if not st.session_state.candidates_message_shown:
                     st.success("✅ Authentication successful!")
                     st.session_state.candidates_message_shown = True
                 return creds
             creds.refresh(Request())
             st.session_state.creds = creds
             st.session_state.authenticated = True
+            if not st.session_state.candidates_message_shown:
                 st.success("✅ Authentication successful!")
                 st.session_state.candidates_message_shown = True
             with open('token.json', 'w') as token_file:
         st.session_state.flow.fetch_token(code=st.session_state.auth_code)
         st.session_state.creds = st.session_state.flow.credentials
         st.session_state.authenticated = True
+        if not st.session_state.candidates_message_shown:
             st.success("✅ Authentication successful!")
             st.session_state.candidates_message_shown = True
         with open('token.json', 'w') as token_file:
         st.error(f"❌ Error during authentication: {e}")
 # ===============================
+# 4. Email Data Extraction, Embedding and Vector Store Functions
 # ===============================
 def extract_email_body(payload):
     if 'body' in payload and 'data' in payload['body'] and payload['body']['data']:
             data_chunks.append(email_dict)
             progress_bar.progress(min((idx + 1) / total, 1.0))
         st.session_state.data_chunks.extend(data_chunks)
+        if not st.session_state.vector_db_message_shown:
             st.success(f"✅ Data chunks created successfully from {label}! Total emails processed for this label: {len(data_chunks)}")
             st.session_state.vector_db_message_shown = True
     except Exception as e:
         st.error(f"❌ Error saving vector database: {e}")
 # ===============================
+# 5. Handling User Queries (User-Controlled Threshold)
 # ===============================
 def preprocess_query(query):
     return query.lower().strip()
                 device=device
             )
             faiss.normalize_L2(query_embedding)
             # Perform search
             distances, indices = st.session_state.vector_store.search(query_embedding, TOP_K)
             candidates = []
             st.session_state.candidate_context = context_str
             st.session_state.raw_candidates = candidates
         except Exception as e:
             st.error(f"❌ An error occurred during processing: {e}")
     payload = {
         "model": "llama-3.3-70b-versatile",  # Adjust model as needed.
         "messages": [
+            {"role": "system", "content": f"Use the following context:\n{st.session_state.candidate_context}"},
             {"role": "user", "content": query}
         ]
     }
             # Send the query to the LLM API
             call_llm_api(user_input)
+        # Display chat messages
+        for msg in st.session_state.messages:
+            if msg["role"] == "user":
+                with st.chat_message("user"):
+                    st.markdown(msg["content"])
+            elif msg["role"] == "assistant":
+                with st.chat_message("assistant"):
+                    st.markdown(msg["content"])
+        # Display matching email chunks in an expander
+        if st.session_state.raw_candidates:
+            with st.expander("🔎 Matching Email Chunks:", expanded=False):
+                for candidate, sim in st.session_state.raw_candidates:
+                    # Get a snippet (first 150 characters) of the body instead of full body content.
+                    body = candidate.get('body', 'No Content')
+                    snippet = (body[:150] + "...") if len(body) > 150 else body
+                    st.markdown(
+                        f"**From:** {candidate.get('sender','Unknown')}  <br>"
+                        f"**To:** {candidate.get('to','Unknown')}  <br>"
+                        f"**Date:** {candidate.get('date','Unknown')}  <br>"
+                        f"**Subject:** {candidate.get('subject','No Subject')}  <br>"
+                        f"**Body Snippet:** {snippet}  <br>"
+                        f"**Similarity:** {sim:.4f}",
+                        unsafe_allow_html=True
+                    )
 # ===============================
+# 6. Main Application Logic
 # ===============================
 def main():
+    SCOPES = ['https://www.googleapis.com/auth/gmail.readonly']
     st.sidebar.header("🔒 Gmail Authentication")
     credentials_file = st.sidebar.file_uploader("📁 Upload credentials.json", type=["json"])
                 st.session_state.vector_store = vector_db.get("vector_store")
                 st.session_state.embeddings = vector_db.get("embeddings")
                 st.session_state.data_chunks = vector_db.get("data_chunks")
+                if not st.session_state.vector_db_message_shown:
                     st.success("📁 Vector database loaded successfully from upload!")
                     st.session_state.vector_db_message_shown = True
             except Exception as e: