Spaces:

Muthuraja18
/

query_analysis

Sleeping

App Files Files Community

Update app.py

#18

by Muthuraja18 - opened Jan 27, 2025

base: refs/heads/main

←

from: refs/pr/18

Discussion Files changed

+46

-99

Files changed (1) hide show

app.py +46 -99

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import os
 import pyaudio
 import pandas as pd
 from sentence_transformers import SentenceTransformer
@@ -16,7 +16,7 @@ from google.oauth2.service_account import Credentials
 # Set up paths
 csv_file_path = "context.csv"  # Path to your CSV file
-output_csv_path = "context.csv"  # Path to save query results
 # Google Sheets setup
 SCOPE = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
@@ -38,15 +38,12 @@ sheet = initialize_google_sheets()
 # Function to safely load the CSV dataset
 def load_csv_safely(file_path):
     try:
-        # Attempt to read with error handling for bad lines
         df = pd.read_csv(file_path, on_bad_lines='skip')  # Skips malformed lines
-        # Check if the required columns exist
         required_columns = ['question', 'product', 'price', 'features', 'ratings', 'discount']
         for column in required_columns:
             if column not in df.columns:
                 raise Exception(f"CSV does not contain the required column: '{column}'. Please check your CSV.")
-        # If 'Timestamp' column doesn't exist, create it as NaT or empty
         if 'Timestamp' not in df.columns:
             df['Timestamp'] = pd.NaT  # Set it to NaT (Not a Time) initially
@@ -63,37 +60,34 @@ embedding_model = SentenceTransformer('all-MiniLM-L6-v2')  # Pre-trained sentenc
 # Function to filter data by date
 def filter_data_by_date(data, date_filter):
     if date_filter == "Today":
         start_date = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
         data = data[data['Timestamp'] >= start_date]
     elif date_filter == "One Week":
         start_date = datetime.now() - timedelta(weeks=1)
         data = data[data['Timestamp'] >= start_date]
     return data
 # Function to recognize speech using SpeechRecognition and PyAudio in chunks
 def listen_to_speech():
     recognizer = sr.Recognizer()
-    # Initialize PyAudio microphone stream
     with sr.Microphone() as source:
         recognizer.adjust_for_ambient_noise(source)
-        st.write("Listening...")  # Optional: Add a message to indicate listening state
         try:
-            # Listen for the audio input
-            audio = recognizer.listen(source, timeout=5, phrase_time_limit=10)  # Listen for up to 10 seconds
-            st.write("Recognizing...")  # Optional: Add a message for recognition process
-            # Use Google's speech recognition to convert audio to text
             text = recognizer.recognize_google(audio)
             st.write(f"Recognized: {text}")
-            return text  # Return the text detected from the audio
         except sr.UnknownValueError:
-            st.error("Sorry, I could not understand the audio.")  # Handle case when the audio is unclear
             return None
         except sr.RequestError:
-            st.error("Could not request results from Google Speech Recognition service.")  # Handle network issues
             return None
         except Exception as e:
             st.error(f"An error occurred: {e}")
@@ -110,30 +104,22 @@ def respond_to_greeting():
 # Function to extract the product name from the query
 def extract_product_name(query):
-    # Ensure that all product names are strings and handle NaN values
     for product in dataset['product'].fillna('Unknown').astype(str):
         if product.lower() in query.lower():
             return product
     return None
-# Function to find the best matching answer using embeddings (Retrieve part of RAG)
 def find_answer(query):
     if dataset is None:
         return "Dataset not loaded properly."
-    # Compute the embedding of the query
     query_embedding = embedding_model.encode([query])
-    # Compute embeddings for all the dataset questions
     dataset_embeddings = embedding_model.encode(dataset['question'].tolist())
-    # Find the closest match using cosine similarity
     similarities = cosine_similarity(query_embedding, dataset_embeddings)
-    # Get the index of the most similar question
     closest_idx = np.argmax(similarities)
-    # Retrieve the product info associated with the closest question
     closest_question = dataset.iloc[closest_idx]
     product_name = closest_question['product']
     price = closest_question['price']
@@ -141,14 +127,11 @@ def find_answer(query):
     ratings = closest_question['ratings']
     discount = closest_question['discount']
-    # Ensure 'Timestamp' column exists before appending
     if 'Timestamp' not in closest_question.index:
         closest_question['Timestamp'] = datetime.now()
-    # Save the query and response to CSV
     save_query_to_csv(query, product_name, price, features, ratings, discount)
-    # Return specific info based on query
     if "price" in query.lower():
         return f"The price of {product_name} is {price}"
     elif "features" in query.lower():
@@ -167,55 +150,37 @@ def save_query_to_csv(query, product_name, price, features, ratings, discount):
         'features': features,
         'ratings': ratings,
         'discount': discount,
-        'Timestamp': datetime.now()  # Ensure the timestamp is correct
     }
     new_entry_df = pd.DataFrame([new_entry])
-    # Append to CSV (ensure header is only added for the first entry)
     new_entry_df.to_csv(output_csv_path, mode='a', header=not os.path.exists(output_csv_path), index=False)
 # Function for sentiment analysis using TextBlob with emojis
 def analyze_sentiment_with_emoji(text):
-    # Create a TextBlob object
     blob = TextBlob(text)
-    # Get the sentiment polarity (-1 to 1)
     sentiment_score = blob.sentiment.polarity
-    # Determine sentiment and corresponding emoji based on the polarity score
     if sentiment_score > 0:
         sentiment = "Positive"
-        emoji = "😊"  # Happy emoji for positive sentiment
     elif sentiment_score < 0:
         sentiment = "Negative"
-        emoji = "😞"  # Sad emoji for negative sentiment
     else:
         sentiment = "Neutral"
-        emoji = "😐"  # Neutral emoji for neutral sentiment
     return sentiment, sentiment_score, emoji
-# Function to provide product recommendations (only product names) based on the query
 def recommend_products(query):
     if dataset is None:
         return "Dataset not loaded properly."
-    # Ensure all product names are strings and handle missing data
     dataset['product'] = dataset['product'].fillna('Unknown').astype(str)
-    # Compute the embedding of the query
     query_embedding = embedding_model.encode([query])
-    # Compute embeddings for all the dataset product names
     dataset_embeddings = embedding_model.encode(dataset['product'].tolist())
-    # Find the closest match using cosine similarity
     similarities = cosine_similarity(query_embedding, dataset_embeddings)
-    # Get the indices of the top 3 recommendations
-    top_indices = np.argsort(similarities[0])[-3:][::-1]  # Get top 3 recommendations
-    # Return at least 3 recommendations
     recommendations = []
     for idx in top_indices:
         product = dataset.iloc[idx]
@@ -225,9 +190,8 @@ def recommend_products(query):
             'features': product['features'],
             'ratings': product['ratings'],
             'discount': product['discount']
-        })  # Append product details
-    # If there are less than 3 recommendations, pad with default responses
     while len(recommendations) < 3:
         recommendations.append({
             'product': 'No recommendation available',
@@ -236,24 +200,21 @@ def recommend_products(query):
             'ratings': 'N/A',
             'discount': 'N/A'
         })
     return recommendations
 # Function to handle the entire continuous interaction loop
 def continuous_interaction():
     st.title("Speech Recognition with Product Queries")
     if st.button("Start Speech Recognition"):
-        while True:  # Loop for continuous listening
             user_input = listen_to_speech()
             if user_input:
-                # Check if the user is greeting
                 if is_greeting(user_input):
                     respond_to_greeting()
-                    continue  # Skip the rest of the code and just greet
-                # Extract product name if mentioned
                 product_name = extract_product_name(user_input)
                 if product_name:
-                    # If the user asks for a product like "iPhone price", respond with product details
                     st.write(f"Let me check the details for {product_name}:")
                     product_details = dataset[dataset['product'].str.lower() == product_name.lower()]
                     if not product_details.empty:
@@ -266,53 +227,43 @@ def continuous_interaction():
                     else:
                         st.write("Sorry, I couldn't find the product you're asking for.")
                 else:
-                    # If no specific product is mentioned, perform normal question answering
                     answer = find_answer(user_input)
                     st.write(f"Answer: {answer}")
-                # Sentiment Analysis with Emoji
                 sentiment, sentiment_score, emoji = analyze_sentiment_with_emoji(user_input)
                 st.write(f"Sentiment: {sentiment} (Score: {sentiment_score}) {emoji}")
-                # Product Recommendations based on query
                 st.write("Here are some product recommendations based on your query: ")
                 recommendations = recommend_products(user_input)
-                if recommendations:
-                    for idx, rec in enumerate(recommendations, 1):
-                        st.write(f"**Recommendation {idx}:**")
-                        st.write(f"**Product**: {rec['product']}")
-                        st.write(f"**Price**: {rec['price']}")
-                        st.write(f"**Features**: {rec['features']}")
-                        st.write(f"**Ratings**: {rec['ratings']}")
-                        st.write(f"**Discount**: {rec['discount']}%")
-                        st.write("---")  # Separator between recommendations
-                # Handle objections if any
-                st.write("Do you like the recommendation or should I try again?")
 # Dashboard function with time filtering
 def display_dashboard():
     st.title("Product Dashboard")
     st.write("Welcome to the product query dashboard!")
-    # Sidebar time filter
     time_filter = st.sidebar.selectbox(
         "Select time period",
         ["All Time", "Today", "One Week"]
     )
-    query_results_df = pd.read_csv(output_csv_path, on_bad_lines='skip')  # Load query results from 'context.csv'
-    # Check if 'Timestamp' column exists
     if 'Timestamp' not in query_results_df.columns:
-        query_results_df['Timestamp'] = pd.to_datetime('now')  # Add current timestamp if column is missing
-    # Filter data based on time selection
     query_results_df = filter_data_by_date(query_results_df, time_filter)
     st.subheader(f"Recent Queries Summary ({time_filter})")
-    st.write(query_results_df.tail(10))  # Show the last 10 queries
     sentiment_counts = query_results_df['question'].apply(lambda x: analyze_sentiment_with_emoji(x)[0]).value_counts()
     st.subheader(f"Sentiment Analysis Distribution ({time_filter})")
     st.write(sentiment_counts)
@@ -325,9 +276,6 @@ def display_dashboard():
     )
     st.plotly_chart(sentiment_fig)
-    # Ensure 'Timestamp' is properly converted to datetime
-    query_results_df['Timestamp'] = pd.to_datetime(query_results_df['Timestamp'], errors='coerce')
     query_results_df['sentiment_score'] = query_results_df['question'].apply(lambda x: analyze_sentiment_with_emoji(x)[1])
     sentiment_time_fig = px.line(
@@ -350,7 +298,6 @@ def display_dashboard():
     )
     st.plotly_chart(product_popularity_fig)
-    # Most recommended products
     recommended_products = query_results_df['product'].value_counts()
     st.subheader(f"Most Recommended Products ({time_filter})")
     st.write(recommended_products)
@@ -363,11 +310,11 @@ def display_dashboard():
     )
     st.plotly_chart(recommended_products_fig)
-# Main code logic for running the application
-if __name__ == "__main__":
-    choice = st.sidebar.selectbox("Select Mode", ["Dashboard", "Speech Recognition"])
-    if choice == "Dashboard":
-        display_dashboard()  # Display dashboard if selected
-    else:
-        continuous_interaction()  # Speech recognition interaction

+import os
 import pyaudio
 import pandas as pd
 from sentence_transformers import SentenceTransformer
 # Set up paths
 csv_file_path = "context.csv"  # Path to your CSV file
+output_csv_path = "contents.csv"  # Path to save query results
 # Google Sheets setup
 SCOPE = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
 # Function to safely load the CSV dataset
 def load_csv_safely(file_path):
     try:
         df = pd.read_csv(file_path, on_bad_lines='skip')  # Skips malformed lines
         required_columns = ['question', 'product', 'price', 'features', 'ratings', 'discount']
         for column in required_columns:
             if column not in df.columns:
                 raise Exception(f"CSV does not contain the required column: '{column}'. Please check your CSV.")
         if 'Timestamp' not in df.columns:
             df['Timestamp'] = pd.NaT  # Set it to NaT (Not a Time) initially
 # Function to filter data by date
 def filter_data_by_date(data, date_filter):
+    data['Timestamp'] = pd.to_datetime(data['Timestamp'], errors='coerce')
     if date_filter == "Today":
         start_date = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
         data = data[data['Timestamp'] >= start_date]
     elif date_filter == "One Week":
         start_date = datetime.now() - timedelta(weeks=1)
         data = data[data['Timestamp'] >= start_date]
     return data
 # Function to recognize speech using SpeechRecognition and PyAudio in chunks
 def listen_to_speech():
     recognizer = sr.Recognizer()
     with sr.Microphone() as source:
         recognizer.adjust_for_ambient_noise(source)
+        st.write("Listening...")
         try:
+            audio = recognizer.listen(source, timeout=5, phrase_time_limit=10)
+            st.write("Recognizing...")
             text = recognizer.recognize_google(audio)
             st.write(f"Recognized: {text}")
+            return text
         except sr.UnknownValueError:
+            st.error("Sorry, I could not understand the audio.")
             return None
         except sr.RequestError:
+            st.error("Could not request results from Google Speech Recognition service.")
             return None
         except Exception as e:
             st.error(f"An error occurred: {e}")
 # Function to extract the product name from the query
 def extract_product_name(query):
     for product in dataset['product'].fillna('Unknown').astype(str):
         if product.lower() in query.lower():
             return product
     return None
+# Function to find the best matching answer using embeddings
 def find_answer(query):
     if dataset is None:
         return "Dataset not loaded properly."
     query_embedding = embedding_model.encode([query])
     dataset_embeddings = embedding_model.encode(dataset['question'].tolist())
     similarities = cosine_similarity(query_embedding, dataset_embeddings)
     closest_idx = np.argmax(similarities)
     closest_question = dataset.iloc[closest_idx]
     product_name = closest_question['product']
     price = closest_question['price']
     ratings = closest_question['ratings']
     discount = closest_question['discount']
     if 'Timestamp' not in closest_question.index:
         closest_question['Timestamp'] = datetime.now()
     save_query_to_csv(query, product_name, price, features, ratings, discount)
     if "price" in query.lower():
         return f"The price of {product_name} is {price}"
     elif "features" in query.lower():
         'features': features,
         'ratings': ratings,
         'discount': discount,
+        'Timestamp': datetime.now()
     }
     new_entry_df = pd.DataFrame([new_entry])
     new_entry_df.to_csv(output_csv_path, mode='a', header=not os.path.exists(output_csv_path), index=False)
 # Function for sentiment analysis using TextBlob with emojis
 def analyze_sentiment_with_emoji(text):
     blob = TextBlob(text)
     sentiment_score = blob.sentiment.polarity
     if sentiment_score > 0:
         sentiment = "Positive"
+        emoji = "😊"
     elif sentiment_score < 0:
         sentiment = "Negative"
+        emoji = "😞"
     else:
         sentiment = "Neutral"
+        emoji = "😐"
     return sentiment, sentiment_score, emoji
+# Function to provide product recommendations based on the query
 def recommend_products(query):
     if dataset is None:
         return "Dataset not loaded properly."
     dataset['product'] = dataset['product'].fillna('Unknown').astype(str)
     query_embedding = embedding_model.encode([query])
     dataset_embeddings = embedding_model.encode(dataset['product'].tolist())
     similarities = cosine_similarity(query_embedding, dataset_embeddings)
+    top_indices = np.argsort(similarities[0])[-3:][::-1]
     recommendations = []
     for idx in top_indices:
         product = dataset.iloc[idx]
             'features': product['features'],
             'ratings': product['ratings'],
             'discount': product['discount']
+        })
     while len(recommendations) < 3:
         recommendations.append({
             'product': 'No recommendation available',
             'ratings': 'N/A',
             'discount': 'N/A'
         })
     return recommendations
 # Function to handle the entire continuous interaction loop
 def continuous_interaction():
     st.title("Speech Recognition with Product Queries")
     if st.button("Start Speech Recognition"):
+        while True:
             user_input = listen_to_speech()
             if user_input:
                 if is_greeting(user_input):
                     respond_to_greeting()
+                    continue
                 product_name = extract_product_name(user_input)
                 if product_name:
                     st.write(f"Let me check the details for {product_name}:")
                     product_details = dataset[dataset['product'].str.lower() == product_name.lower()]
                     if not product_details.empty:
                     else:
                         st.write("Sorry, I couldn't find the product you're asking for.")
                 else:
                     answer = find_answer(user_input)
                     st.write(f"Answer: {answer}")
                 sentiment, sentiment_score, emoji = analyze_sentiment_with_emoji(user_input)
                 st.write(f"Sentiment: {sentiment} (Score: {sentiment_score}) {emoji}")
                 st.write("Here are some product recommendations based on your query: ")
                 recommendations = recommend_products(user_input)
+                for idx, rec in enumerate(recommendations, 1):
+                    st.write(f"Recommendation {idx}:")
+                    st.write(f"Product: {rec['product']}")
+                    st.write(f"Price: {rec['price']}")
+                    st.write(f"Features: {rec['features']}")
+                    st.write(f"Ratings: {rec['ratings']}")
+                    st.write(f"Discount: {rec['discount']}%")
+                    st.write("---")
 # Dashboard function with time filtering
 def display_dashboard():
     st.title("Product Dashboard")
     st.write("Welcome to the product query dashboard!")
     time_filter = st.sidebar.selectbox(
         "Select time period",
         ["All Time", "Today", "One Week"]
     )
+    query_results_df = pd.read_csv(output_csv_path, on_bad_lines='skip')
     if 'Timestamp' not in query_results_df.columns:
+        query_results_df['Timestamp'] = pd.to_datetime('now')
     query_results_df = filter_data_by_date(query_results_df, time_filter)
     st.subheader(f"Recent Queries Summary ({time_filter})")
+    st.write(query_results_df.tail(10))
     sentiment_counts = query_results_df['question'].apply(lambda x: analyze_sentiment_with_emoji(x)[0]).value_counts()
     st.subheader(f"Sentiment Analysis Distribution ({time_filter})")
     st.write(sentiment_counts)
     )
     st.plotly_chart(sentiment_fig)
     query_results_df['sentiment_score'] = query_results_df['question'].apply(lambda x: analyze_sentiment_with_emoji(x)[1])
     sentiment_time_fig = px.line(
     )
     st.plotly_chart(product_popularity_fig)
     recommended_products = query_results_df['product'].value_counts()
     st.subheader(f"Most Recommended Products ({time_filter})")
     st.write(recommended_products)
     )
     st.plotly_chart(recommended_products_fig)
+# Main code to run the app
+if __name__ == '__main__':
+    mode = st.sidebar.radio("Select Mode", ("Speech Recognition", "Dashboard"))
+    if mode == "Speech Recognition":
+        continuous_interaction()
+    elif mode == "Dashboard":
+        display_dashboard()