Spaces:

wahab5763
/

EmalDataExtractor

Sleeping

App Files Files Community

wahab5763 commited on Jan 24, 2025

Commit

e4652f2

verified ·

1 Parent(s): 872e099

Update app.py

Browse files

Files changed (1) hide show

app.py +79 -63

app.py CHANGED Viewed

@@ -54,6 +54,8 @@ if "candidates_message_shown" not in st.session_state:
     st.session_state.candidates_message_shown = False
 if "vector_db_message_shown" not in st.session_state:
     st.session_state.vector_db_message_shown = False
 def count_tokens(text):
     return len(text.split())
@@ -75,6 +77,7 @@ def reset_session_state():
     st.session_state.messages = []
     st.session_state.candidates_message_shown = False
     st.session_state.vector_db_message_shown = False
     for filename in ["token.json", "data_chunks.pkl", "embeddings.pkl", "vector_store.index", "vector_database.pkl"]:
         if os.path.exists(filename):
             os.remove(filename)
@@ -203,10 +206,10 @@ def create_chunks_from_gmail(service, label):
             progress_bar.progress(min((idx + 1) / total, 1.0))
         st.session_state.data_chunks.extend(data_chunks)
         if not st.session_state.vector_db_message_shown:
-            st.success(f"✅ Data chunks created successfully from {label}! Total emails processed for this label: {len(data_chunks)}")
             st.session_state.vector_db_message_shown = True
     except Exception as e:
-        st.error(f"❌ Error creating chunks from Gmail for label {label}: {e}")
 # -------------------------------
 # Cached model loaders for efficiency
@@ -245,7 +248,9 @@ def embed_emails(email_chunks):
             index = faiss.IndexFlatIP(dimension)
             index.add(embeddings)
             st.session_state.vector_store = index
-            st.success("✅ Data embedding and vector store created successfully!")
         except Exception as e:
             st.error(f"❌ Error during embedding: {e}")
@@ -288,44 +293,44 @@ def process_candidate_emails(query, similarity_threshold):
         st.error("❌ Please process your email data or load a saved vector database first.")
         return
-    with st.spinner('🔄 Processing your query...'):
-        try:
-            embed_model, device = get_embed_model()
-            processed_query = preprocess_query(query)
-            query_embedding = embed_model.encode(
-                [processed_query],
-                convert_to_numpy=True,
-                show_progress_bar=False,
-                device=device
-            )
-            faiss.normalize_L2(query_embedding)
-            # Perform search
-            distances, indices = st.session_state.vector_store.search(query_embedding, TOP_K)
-            candidates = []
-            for idx, sim in zip(indices[0], distances[0]):
-                # Include candidate only if similarity meets the threshold
-                if sim >= similarity_threshold:
-                    candidates.append((st.session_state.data_chunks[idx], sim))
-            if not candidates:
-                st.warning("⚠️ No matching embeddings found for your query with the selected threshold.")
-                return
-            # Build the context string by concatenating all matching email texts using HTML breaks.
-            context_str = ""
-            for candidate, sim in candidates:
-                context_str += combine_email_text(candidate) + "<br><br>"
-            # Optionally limit context size.
-            MAX_CONTEXT_TOKENS = 500
-            context_tokens = context_str.split()
-            if len(context_tokens) > MAX_CONTEXT_TOKENS:
-                context_str = " ".join(context_tokens[:MAX_CONTEXT_TOKENS])
-            st.session_state.candidate_context = context_str
-            st.session_state.raw_candidates = candidates
-        except Exception as e:
-            st.error(f"❌ An error occurred during processing: {e}")
 def call_llm_api(query):
     """
@@ -356,23 +361,22 @@ def call_llm_api(query):
         "Content-Type": "application/json"
     }
-    with st.spinner("🔄 Fetching AI response..."):
         try:
-            response = requests.post(url, headers=headers, json=payload)
-            response.raise_for_status()  # Raises stored HTTPError, if one occurred.
-            response_json = response.json()
-            generated_text = response_json["choices"][0]["message"]["content"]
-            # Append AI response to chat messages
-            st.session_state.messages.append({"role": "assistant", "content": generated_text})
-        except requests.exceptions.HTTPError as http_err:
-            try:
-                error_info = response.json().get("error", {})
-                error_message = error_info.get("message", "An unknown error occurred.")
-                st.error(f"❌ HTTP error occurred: {error_message}")
-            except ValueError:
-                st.error(f"❌ HTTP error occurred: {response.status_code} - {response.text}")
-        except Exception as err:
-            st.error(f"❌ An unexpected error occurred: {err}")
 def handle_user_query():
     st.header("💬 Let's Chat with Your Emails")
@@ -395,15 +399,27 @@ def handle_user_query():
     if user_input:
         # Append user message to chat
         st.session_state.messages.append({"role": "user", "content": user_input})
         # Process the query
         process_candidate_emails(user_input, similarity_threshold)
         if st.session_state.candidate_context:
-            # Send the query to the LLM API
             call_llm_api(user_input)
-        # Display chat messages
         for msg in st.session_state.messages:
             if msg["role"] == "user":
                 with st.chat_message("user"):
@@ -411,7 +427,7 @@ def handle_user_query():
             elif msg["role"] == "assistant":
                 with st.chat_message("assistant"):
                     st.markdown(msg["content"])
         # Display matching email chunks in an expander
         if st.session_state.raw_candidates:
             with st.expander("🔎 Matching Email Chunks:", expanded=False):

     st.session_state.candidates_message_shown = False
 if "vector_db_message_shown" not in st.session_state:
     st.session_state.vector_db_message_shown = False
+if "pending_query" not in st.session_state:
+    st.session_state.pending_query = False
 def count_tokens(text):
     return len(text.split())
     st.session_state.messages = []
     st.session_state.candidates_message_shown = False
     st.session_state.vector_db_message_shown = False
+    st.session_state.pending_query = False
     for filename in ["token.json", "data_chunks.pkl", "embeddings.pkl", "vector_store.index", "vector_database.pkl"]:
         if os.path.exists(filename):
             os.remove(filename)
             progress_bar.progress(min((idx + 1) / total, 1.0))
         st.session_state.data_chunks.extend(data_chunks)
         if not st.session_state.vector_db_message_shown:
+            st.success(f"📁 Vector database loaded successfully from upload! Total emails processed for label '{label}': {len(data_chunks)}")
             st.session_state.vector_db_message_shown = True
     except Exception as e:
+        st.error(f"❌ Error creating chunks from Gmail for label '{label}': {e}")
 # -------------------------------
 # Cached model loaders for efficiency
             index = faiss.IndexFlatIP(dimension)
             index.add(embeddings)
             st.session_state.vector_store = index
+            if not st.session_state.candidates_message_shown:
+                st.success("✅ Data embedding and vector store created successfully!")
+                st.session_state.candidates_message_shown = True
         except Exception as e:
             st.error(f"❌ Error during embedding: {e}")
         st.error("❌ Please process your email data or load a saved vector database first.")
         return
+    try:
+        embed_model, device = get_embed_model()
+        processed_query = preprocess_query(query)
+        query_embedding = embed_model.encode(
+            [processed_query],
+            convert_to_numpy=True,
+            show_progress_bar=False,
+            device=device
+        )
+        faiss.normalize_L2(query_embedding)
+        # Perform search
+        distances, indices = st.session_state.vector_store.search(query_embedding, TOP_K)
+        candidates = []
+        for idx, sim in zip(indices[0], distances[0]):
+            # Include candidate only if similarity meets the threshold
+            if sim >= similarity_threshold:
+                candidates.append((st.session_state.data_chunks[idx], sim))
+        if not candidates:
+            # Append warning message as assistant message
+            st.session_state.messages.append({"role": "assistant", "content": "⚠️ No matching embeddings found for your query with the selected threshold."})
+            return
+        # Build the context string by concatenating all matching email texts using HTML breaks.
+        context_str = ""
+        for candidate, sim in candidates:
+            context_str += combine_email_text(candidate) + "<br><br>"
+        # Optionally limit context size.
+        MAX_CONTEXT_TOKENS = 500
+        context_tokens = context_str.split()
+        if len(context_tokens) > MAX_CONTEXT_TOKENS:
+            context_str = " ".join(context_tokens[:MAX_CONTEXT_TOKENS])
+        st.session_state.candidate_context = context_str
+        st.session_state.raw_candidates = candidates
+    except Exception as e:
+        st.error(f"❌ An error occurred during processing: {e}")
 def call_llm_api(query):
     """
         "Content-Type": "application/json"
     }
+    try:
+        response = requests.post(url, headers=headers, json=payload)
+        response.raise_for_status()  # Raises stored HTTPError, if one occurred.
+        response_json = response.json()
+        generated_text = response_json["choices"][0]["message"]["content"]
+        # Append AI response to chat messages
+        st.session_state.messages.append({"role": "assistant", "content": generated_text})
+    except requests.exceptions.HTTPError as http_err:
         try:
+            error_info = response.json().get("error", {})
+            error_message = error_info.get("message", "An unknown error occurred.")
+            st.session_state.messages.append({"role": "assistant", "content": f"❌ HTTP error occurred: {error_message}"})
+        except ValueError:
+            st.session_state.messages.append({"role": "assistant", "content": f"❌ HTTP error occurred: {response.status_code} - {response.text}"})
+    except Exception as err:
+        st.session_state.messages.append({"role": "assistant", "content": f"❌ An unexpected error occurred: {err}"})
 def handle_user_query():
     st.header("💬 Let's Chat with Your Emails")
     if user_input:
         # Append user message to chat
         st.session_state.messages.append({"role": "user", "content": user_input})
+        # Append assistant "thinking" message
+        st.session_state.messages.append({"role": "assistant", "content": "💭 Processing your query..."})
+        # Display chat messages
+        for msg in st.session_state.messages:
+            if msg["role"] == "user":
+                with st.chat_message("user"):
+                    st.markdown(msg["content"])
+            elif msg["role"] == "assistant":
+                with st.chat_message("assistant"):
+                    st.markdown(msg["content"])
         # Process the query
         process_candidate_emails(user_input, similarity_threshold)
+        # If there's a candidate context, call the LLM API
         if st.session_state.candidate_context:
             call_llm_api(user_input)
+        # Display chat messages again with updated AI response
         for msg in st.session_state.messages:
             if msg["role"] == "user":
                 with st.chat_message("user"):
             elif msg["role"] == "assistant":
                 with st.chat_message("assistant"):
                     st.markdown(msg["content"])
         # Display matching email chunks in an expander
         if st.session_state.raw_candidates:
             with st.expander("🔎 Matching Email Chunks:", expanded=False):