Spaces:

wahab5763
/

EmalDataExtractor

Sleeping

App Files Files Community

wahab5763 commited on Jan 24, 2025

Commit

81ba234

verified ·

1 Parent(s): 4d8bd8a

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -31

app.py CHANGED Viewed

@@ -242,7 +242,7 @@ def save_vector_database():
         }
         db_data = pickle.dumps(vector_db)
         st.download_button(
-            label="Download Vector Database",
             data=db_data,
             file_name="vector_database.pkl",
             mime="application/octet-stream"
@@ -284,7 +284,7 @@ def process_candidate_emails(query, similarity_threshold):
             faiss.normalize_L2(query_embedding)
             # Debug: Verify the type of vector_store
-            st.write(f"Vector Store Type: {type(st.session_state.vector_store)}")
             # Perform search
             distances, indices = st.session_state.vector_store.search(query_embedding, TOP_K)
@@ -294,7 +294,7 @@ def process_candidate_emails(query, similarity_threshold):
                 if sim >= similarity_threshold:
                     candidates.append((st.session_state.data_chunks[idx], sim))
             if not candidates:
-                st.write("⚠️ No matching embeddings found for your query with the selected threshold.")
                 return
             # Build the context string by concatenating all matching email texts using HTML breaks.
@@ -364,11 +364,8 @@ def call_llm_api(query):
 def handle_user_query():
     st.header("💬 Let's Chat with Your Emails")
-    # Checkbox to show/hide the threshold slider
-    show_threshold = st.checkbox("Adjust Similarity Threshold")
-    # Slider, shown only if 'show_threshold' is True
-    if show_threshold:
         similarity_threshold = st.slider(
             "Select Similarity Threshold",
             min_value=0.0,
@@ -378,37 +375,34 @@ def handle_user_query():
             help="Adjust the similarity threshold to control the relevance of retrieved emails. Higher values yield more relevant results.",
             key='similarity_threshold'
         )
-    else:
-        # Set a default threshold if the slider is not shown
-        if 'similarity_threshold' not in st.session_state:
-            st.session_state.similarity_threshold = 0.3
-        similarity_threshold = st.session_state.similarity_threshold
-    # Callback function to process the query
     def query_callback():
         query = st.session_state.query_input
         if not query.strip():
             return
         process_candidate_emails(query, similarity_threshold)
         if st.session_state.raw_candidates:
-            st.subheader("🔎 Matching Email Chunks:")
-            for candidate, sim in st.session_state.raw_candidates:
-                # Get a snippet (first 150 characters) of the body instead of full body content.
-                body = candidate.get('body', 'No Content')
-                snippet = (body[:150] + "...") if len(body) > 150 else body
-                st.markdown(
-                    f"**From:** {candidate.get('sender','Unknown')}  <br>"
-                    f"**To:** {candidate.get('to','Unknown')}  <br>"
-                    f"**Date:** {candidate.get('date','Unknown')}  <br>"
-                    f"**Subject:** {candidate.get('subject','No Subject')}  <br>"
-                    f"**Body Snippet:** {snippet}  <br>"
-                    f"**Similarity:** {sim:.4f}",
-                    unsafe_allow_html=True
-                )
             # Then send the query along with the context to the LLM API.
             call_llm_api(query)
-    # Text input with callback on change (when Enter is pressed)
     st.text_input("Enter your query:", key="query_input", on_change=query_callback)
 # ===============================
@@ -430,7 +424,7 @@ def main():
             # Check file size; if larger than 200MB, show a warning and then continue.
             file_size_mb = uploaded_db.size / (1024 * 1024)
             if file_size_mb > 200:
-                st.warning("The uploaded file is larger than 200MB. It may take longer to load, but processing will continue.")
             try:
                 vector_db = pickle.load(uploaded_db)
                 st.session_state.vector_store = vector_db.get("vector_store")
@@ -473,7 +467,7 @@ def main():
             if st.session_state.data_chunks:
                 embed_emails(st.session_state.data_chunks)
         if st.session_state.vector_store is not None:
-            with st.expander("💾 Download Data", expanded=True):
                 save_vector_database()
     if st.session_state.vector_store is not None:

         }
         db_data = pickle.dumps(vector_db)
         st.download_button(
+            label="💾 Download Vector Database",
             data=db_data,
             file_name="vector_database.pkl",
             mime="application/octet-stream"
             faiss.normalize_L2(query_embedding)
             # Debug: Verify the type of vector_store
+            # st.write(f"Vector Store Type: {type(st.session_state.vector_store)}")
             # Perform search
             distances, indices = st.session_state.vector_store.search(query_embedding, TOP_K)
                 if sim >= similarity_threshold:
                     candidates.append((st.session_state.data_chunks[idx], sim))
             if not candidates:
+                st.warning("⚠️ No matching embeddings found for your query with the selected threshold.")
                 return
             # Build the context string by concatenating all matching email texts using HTML breaks.
 def handle_user_query():
     st.header("💬 Let's Chat with Your Emails")
+    # Expander for threshold selection
+    with st.expander("🔧 Adjust Similarity Threshold", expanded=False):
         similarity_threshold = st.slider(
             "Select Similarity Threshold",
             min_value=0.0,
             help="Adjust the similarity threshold to control the relevance of retrieved emails. Higher values yield more relevant results.",
             key='similarity_threshold'
         )
+    # Text input with callback on change (when Enter is pressed)
     def query_callback():
         query = st.session_state.query_input
         if not query.strip():
+            st.warning("⚠️ Please enter a valid query.")
             return
         process_candidate_emails(query, similarity_threshold)
         if st.session_state.raw_candidates:
+            with st.expander("🔎 Matching Email Chunks:", expanded=False):
+                for candidate, sim in st.session_state.raw_candidates:
+                    # Get a snippet (first 150 characters) of the body instead of full body content.
+                    body = candidate.get('body', 'No Content')
+                    snippet = (body[:150] + "...") if len(body) > 150 else body
+                    st.markdown(
+                        f"**From:** {candidate.get('sender','Unknown')}  <br>"
+                        f"**To:** {candidate.get('to','Unknown')}  <br>"
+                        f"**Date:** {candidate.get('date','Unknown')}  <br>"
+                        f"**Subject:** {candidate.get('subject','No Subject')}  <br>"
+                        f"**Body Snippet:** {snippet}  <br>"
+                        f"**Similarity:** {sim:.4f}",
+                        unsafe_allow_html=True
+                    )
             # Then send the query along with the context to the LLM API.
             call_llm_api(query)
+        # Clear the input field after processing
+        st.session_state.query_input = ""
     st.text_input("Enter your query:", key="query_input", on_change=query_callback)
 # ===============================
             # Check file size; if larger than 200MB, show a warning and then continue.
             file_size_mb = uploaded_db.size / (1024 * 1024)
             if file_size_mb > 200:
+                st.warning("⚠️ The uploaded file is larger than 200MB. It may take longer to load, but processing will continue.")
             try:
                 vector_db = pickle.load(uploaded_db)
                 st.session_state.vector_store = vector_db.get("vector_store")
             if st.session_state.data_chunks:
                 embed_emails(st.session_state.data_chunks)
         if st.session_state.vector_store is not None:
+            with st.expander("💾 Download Data", expanded=False):
                 save_vector_database()
     if st.session_state.vector_store is not None: