Spaces:

samiee2213
/

DataScribe

Sleeping

App Files Files Community

samiee2213 commited on Nov 14, 2024

Commit

cfde529

verified ·

1 Parent(s): e404aab

Update app.py

Browse files

Files changed (1) hide show

app.py +234 -66

app.py CHANGED Viewed

@@ -24,19 +24,29 @@ import numpy as np
 import gspread
 from dotenv import load_dotenv
 warnings.filterwarnings("ignore", category=DeprecationWarning)
 scopes = ["https://www.googleapis.com/auth/spreadsheets"]
 creds = Credentials.from_service_account_file("credentials.json", scopes=scopes)
 client = gspread.authorize(creds)
 #environment
 load_dotenv()
 GROQ_API_KEY = os.getenv("GROQ_API_KEY")
 SERPER_API_KEY = os.getenv("SERPER_API_KEY")
-llm = ChatGroq(model="llama-3.1-70b-versatile")
 # Initialize Google Serper API wrapper
 search = GoogleSerperAPIWrapper(serp_api_key=SERPER_API_KEY)
 # Create the system and human messages for dynamic query processing
 system_message_content = """
@@ -74,6 +84,7 @@ def perform_web_search(query, max_retries=3, delay=2):
             time.sleep(delay)
     st.error(f"Failed to perform web search for query '{query}' after {max_retries} retries.")
     return "NaN"
 def update_google_sheet(sheet_id, range_name, data):
     try:
         # Define the Google Sheets API scope
@@ -117,6 +128,7 @@ def get_llm_response(entity, query, web_results):
         return cleaned_info
     except Exception as e:
         return "NaN"
 # Retry logic for multiple web searches if necessary
 def refine_answer_with_searches(entity, query, max_retries=3):
     search_results = perform_web_search(query.format(entity=entity))
@@ -329,54 +341,203 @@ elif selected == "Extract Information":
         column_selection = st.session_state["column_selection"]
         entities_column = st.session_state["data"][column_selection]
-        st.write("### Selected Entity Column:")
-        st.dataframe(entities_column)
-        if st.button("Start Extraction"):
-            st.write("Data extraction is in progress. This may take a few moments.")
-            # Custom styled progress bar
-            progress_bar = st.progress(0)
-            try:
-                results = []
-                for i, selected_entity in enumerate(entities_column):
-                    user_query = st.session_state["query_template"].replace("{entity}", str(selected_entity))
-                    final_answer, search_results = refine_answer_with_searches(selected_entity, user_query)
-                    results.append({
-                        "Entity": selected_entity,
-                        "Extracted Information": final_answer,
-                        "Search Results": search_results
-                    })
-                    # Update progress bar with a smooth and cute animation
-                    progress_bar.progress(int((i + 1) / len(entities_column) * 100))
-                st.session_state["results"] = results
-                st.write("### Extracted Information")
-                for result in results:
-                    st.write(f"**Entity:** {result['Entity']}")
-                    st.write(f"**Extracted Information:** {result['Extracted Information']}")
-                st.write("### Web Results:")
-                for result in results:
-                    st.write(result["Search Results"])
-            except Exception as e:
-                st.error(f"An error occurred while extracting information: {e}")
     else:
         st.warning("Please upload your data and define the query template.")
 elif selected == "View & Download":
     st.header("View & Download Results")
-    if "results" in st.session_state:
         results_df = pd.DataFrame(st.session_state["results"])
         st.write("### Results Preview")
-        # Display results with some background color for the relevant columns
-        st.dataframe(results_df.style.applymap(lambda val: 'background-color: #d3f4ff' if isinstance(val, str) else '', subset=["Extracted Information", "Search Results"]))
         download_option = st.selectbox(
             "Select data to download:",
             ["All Results", "Extracted Information", "Web Results"]
@@ -396,36 +557,43 @@ elif selected == "View & Download":
             mime="text/csv"
         )
-        # To ensure the inputs and button are persistent, store their values in session_state
-        if 'sheet_id' not in st.session_state:
-            st.session_state.sheet_id = ''
-        if 'range_name' not in st.session_state:
-            st.session_state.range_name = ''
-        sheet_id = st.text_input("Enter Google Sheet ID", value=st.session_state.sheet_id)
-        range_name = st.text_input("Enter Range (e.g., 'Sheet1!A1')", value=st.session_state.range_name)
-        if sheet_id and range_name:
-            st.session_state.sheet_id = sheet_id
-            st.session_state.range_name = range_name
-            # Define data_to_update to update the Google Sheet
-            data_to_update = [results_df.columns.tolist()] + results_df.values.tolist()
-            # Update Google Sheets button
-            if st.button("Update Google Sheet"):
-                try:
-                    if '!' not in range_name:
-                        st.error("Invalid range format. Please use the format 'SheetName!Range'.")
-                    else:
-                        sheet_name, cell_range = range_name.split('!', 1)
-                        sheet = client.open_by_key(sheet_id).worksheet(sheet_name)
-                        sheet.clear()  # Clear the existing data before updating
-                        sheet.update(f"{cell_range}", data_to_update)  # Update the data to the specified range
-                        st.success("Data updated in the Google Sheet!")
-                except Exception as e:
-                    st.error(f"Error updating Google Sheet: {e}")
-        else:
-            st.warning("Please enter both the Sheet ID and Range name before updating.")
     else:
         st.warning("No results available to view. Please run the extraction process.")

 import gspread
 from dotenv import load_dotenv
 warnings.filterwarnings("ignore", category=DeprecationWarning)
+#google sheet
 scopes = ["https://www.googleapis.com/auth/spreadsheets"]
 creds = Credentials.from_service_account_file("credentials.json", scopes=scopes)
 client = gspread.authorize(creds)
 #environment
 load_dotenv()
 GROQ_API_KEY = os.getenv("GROQ_API_KEY")
 SERPER_API_KEY = os.getenv("SERPER_API_KEY")
+#session state variables
+if "results" not in st.session_state:
+    st.session_state["results"] = []
 # Initialize Google Serper API wrapper
 search = GoogleSerperAPIWrapper(serp_api_key=SERPER_API_KEY)
+llm = ChatGroq(model="llama-3.1-70b-versatile")
 # Create the system and human messages for dynamic query processing
 system_message_content = """
             time.sleep(delay)
     st.error(f"Failed to perform web search for query '{query}' after {max_retries} retries.")
     return "NaN"
 def update_google_sheet(sheet_id, range_name, data):
     try:
         # Define the Google Sheets API scope
         return cleaned_info
     except Exception as e:
         return "NaN"
 # Retry logic for multiple web searches if necessary
 def refine_answer_with_searches(entity, query, max_retries=3):
     search_results = perform_web_search(query.format(entity=entity))
         column_selection = st.session_state["column_selection"]
         entities_column = st.session_state["data"][column_selection]
+        col1, col2 = st.columns([2, 1])
+        with col1:
+            st.write("### Selected Entity Column:")
+            st.dataframe(entities_column, use_container_width=True)
+        with col2:
+            start_button = st.button("Start Extraction", type="primary", use_container_width=True)
+        results_container = st.empty()
+        if start_button:
+            with st.spinner("Extracting information..."):
+                progress_bar = st.progress(0)
+                progress_text = st.empty()
+                try:
+                    results = []
+                    for i, selected_entity in enumerate(entities_column):
+                        user_query = st.session_state["query_template"].replace("{entity}", str(selected_entity))
+                        final_answer, search_results = refine_answer_with_searches(selected_entity, user_query)
+                        results.append({
+                            "Entity": selected_entity,
+                            "Extracted Information": final_answer,
+                            "Search Results": search_results
+                        })
+                        progress = (i + 1) / len(entities_column)
+                        progress_bar.progress(progress)
+                        progress_text.text(f"Processing {i+1}/{len(entities_column)} entities...")
+                    st.session_state["results"] = results
+                    progress_bar.empty()
+                    progress_text.empty()
+                    st.success("Extraction completed successfully!")
+                except Exception as e:
+                    st.error(f"An error occurred during extraction: {str(e)}")
+                    st.session_state.pop("results", None)
+        if "results" in st.session_state and st.session_state["results"]:
+            with results_container:
+                results = st.session_state["results"]
+                search_query = st.text_input("🔍 Search results", "")
+                tab1, tab2 = st.tabs(["Compact View", "Detailed View"])
+                with tab1:
+                    found_results = False
+                    for result in results:
+                        if search_query.lower() in str(result["Entity"]).lower() or \
+                           search_query.lower() in str(result["Extracted Information"]).lower():
+                            found_results = True
+                            with st.expander(f"📋 {result['Entity']}", expanded=False):
+                                st.markdown("#### Extracted Information")
+                                st.write(result["Extracted Information"])
+                    if not found_results and search_query:
+                        st.info("No results found for your search.")
+                with tab2:
+                    found_results = False
+                    for i, result in enumerate(results):
+                        if search_query.lower() in str(result["Entity"]).lower() or \
+                           search_query.lower() in str(result["Extracted Information"]).lower():
+                            found_results = True
+                            st.markdown(f"### Entity {i+1}: {result['Entity']}")
+                            col1, col2 = st.columns(2)
+                            with col1:
+                                st.markdown("#### 📝 Extracted Information")
+                                st.info(result["Extracted Information"])
+                            with col2:
+                                st.markdown("#### 🔍 Search Results")
+                                st.warning(result["Search Results"])
+                            st.divider()
+                    if not found_results and search_query:
+                        st.info("No results found for your search.")
     else:
         st.warning("Please upload your data and define the query template.")
+elif selected == "Extract Information":
+    st.header("Extract Information")
+    if "query_template" in st.session_state and "data" in st.session_state:
+        st.write("### Using Query Template:")
+        st.code(st.session_state["query_template"])
+        column_selection = st.session_state["column_selection"]
+        entities_column = st.session_state["data"][column_selection]
+        col1, col2 = st.columns([2, 1])
+        with col1:
+            st.write("### Selected Entity Column:")
+            st.dataframe(entities_column, use_container_width=True)
+        with col2:
+            start_button = st.button("Start Extraction", type="primary", use_container_width=True)
+        results_container = st.empty()
+        if start_button:
+            with st.spinner("Extracting information..."):
+                progress_bar = st.progress(0)
+                progress_text = st.empty()
+                try:
+                    results = []
+                    for i, selected_entity in enumerate(entities_column):
+                        user_query = st.session_state["query_template"].replace("{entity}", str(selected_entity))
+                        final_answer, search_results = refine_answer_with_searches(selected_entity, user_query)
+                        results.append({
+                            "Entity": selected_entity,
+                            "Extracted Information": final_answer,
+                            "Search Results": search_results
+                        })
+                        progress = (i + 1) / len(entities_column)
+                        progress_bar.progress(progress)
+                        progress_text.text(f"Processing {i+1}/{len(entities_column)} entities...")
+                    st.session_state["results"] = results
+                    progress_bar.empty()
+                    progress_text.empty()
+                    st.success("Extraction completed successfully!")
+                except Exception as e:
+                    st.error(f"An error occurred during extraction: {str(e)}")
+                    st.session_state.pop("results", None)
+        if "results" in st.session_state and st.session_state["results"]:
+            with results_container:
+                results = st.session_state["results"]
+                search_query = st.text_input("🔍 Search results", "")
+                tab1, tab2 = st.tabs(["Compact View", "Detailed View"])
+                with tab1:
+                    found_results = False
+                    for result in results:
+                        if search_query.lower() in str(result["Entity"]).lower() or \
+                           search_query.lower() in str(result["Extracted Information"]).lower():
+                            found_results = True
+                            with st.expander(f"📋 {result['Entity']}", expanded=False):
+                                st.markdown("#### Extracted Information")
+                                st.write(result["Extracted Information"])
+                    if not found_results and search_query:
+                        st.info("No results found for your search.")
+                with tab2:
+                    found_results = False
+                    for i, result in enumerate(results):
+                        if search_query.lower() in str(result["Entity"]).lower() or \
+                           search_query.lower() in str(result["Extracted Information"]).lower():
+                            found_results = True
+                            st.markdown(f"### Entity {i+1}: {result['Entity']}")
+                            col1, col2 = st.columns(2)
+                            with col1:
+                                st.markdown("#### 📝 Extracted Information")
+                                st.info(result["Extracted Information"])
+                            with col2:
+                                st.markdown("#### 🔍 Search Results")
+                                st.warning(result["Search Results"])
+                            st.divider()
+                    if not found_results and search_query:
+                        st.info("No results found for your search.")
+    else:
+        st.warning("Please upload your data and define the query template.")
 elif selected == "View & Download":
     st.header("View & Download Results")
+    if "results" in st.session_state and st.session_state["results"]:
         results_df = pd.DataFrame(st.session_state["results"])
         st.write("### Results Preview")
+        # Display the results preview
+        if "Extracted Information" in results_df.columns and "Search Results" in results_df.columns:
+            st.dataframe(results_df.style.map(lambda val: 'background-color: #d3f4ff' if isinstance(val, str) else '', subset=["Extracted Information", "Search Results"]))
+        else:
+            st.warning("Required columns are missing in results data.")
+        # Download options
         download_option = st.selectbox(
             "Select data to download:",
             ["All Results", "Extracted Information", "Web Results"]
             mime="text/csv"
         )
+        # Option to update Google Sheets
+        update_option = st.selectbox(
+            "Do you want to update Google Sheets?",
+            ["No", "Yes"]
+        )
+        if update_option == "Yes":
+            if 'sheet_id' not in st.session_state:
+                st.session_state.sheet_id = ''
+            if 'range_name' not in st.session_state:
+                st.session_state.range_name = ''
+            # Input fields for Google Sheets ID and Range
+            sheet_id = st.text_input("Enter Google Sheet ID", value=st.session_state.sheet_id)
+            range_name = st.text_input("Enter Range (e.g., 'Sheet1!A1')", value=st.session_state.range_name)
+            if sheet_id and range_name:
+                st.session_state.sheet_id = sheet_id
+                st.session_state.range_name = range_name
+                # Prepare data for update
+                data_to_update = [results_df.columns.tolist()] + results_df.values.tolist()
+                # Update Google Sheets button
+                if st.button("Update Google Sheet"):
+                    try:
+                        if '!' not in range_name:
+                            st.error("Invalid range format. Please use the format 'SheetName!Range'.")
+                        else:
+                            sheet_name, cell_range = range_name.split('!', 1)
+                            sheet = client.open_by_key(sheet_id).worksheet(sheet_name)
+                            sheet.clear()
+                            sheet.update(f"{cell_range}", data_to_update)
+                            st.success("Data updated in the Google Sheet!")
+                    except Exception as e:
+                        st.error(f"Error updating Google Sheet: {e}")
+            else:
+                st.warning("Please enter both the Sheet ID and Range name before updating.")
     else:
         st.warning("No results available to view. Please run the extraction process.")