Files changed (1) hide show
  1. app.py +46 -99
app.py CHANGED
@@ -1,4 +1,4 @@
1
- import os
2
  import pyaudio
3
  import pandas as pd
4
  from sentence_transformers import SentenceTransformer
@@ -16,7 +16,7 @@ from google.oauth2.service_account import Credentials
16
 
17
  # Set up paths
18
  csv_file_path = "context.csv" # Path to your CSV file
19
- output_csv_path = "context.csv" # Path to save query results
20
 
21
  # Google Sheets setup
22
  SCOPE = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
@@ -38,15 +38,12 @@ sheet = initialize_google_sheets()
38
  # Function to safely load the CSV dataset
39
  def load_csv_safely(file_path):
40
  try:
41
- # Attempt to read with error handling for bad lines
42
  df = pd.read_csv(file_path, on_bad_lines='skip') # Skips malformed lines
43
- # Check if the required columns exist
44
  required_columns = ['question', 'product', 'price', 'features', 'ratings', 'discount']
45
  for column in required_columns:
46
  if column not in df.columns:
47
  raise Exception(f"CSV does not contain the required column: '{column}'. Please check your CSV.")
48
 
49
- # If 'Timestamp' column doesn't exist, create it as NaT or empty
50
  if 'Timestamp' not in df.columns:
51
  df['Timestamp'] = pd.NaT # Set it to NaT (Not a Time) initially
52
 
@@ -63,37 +60,34 @@ embedding_model = SentenceTransformer('all-MiniLM-L6-v2') # Pre-trained sentenc
63
 
64
  # Function to filter data by date
65
  def filter_data_by_date(data, date_filter):
 
66
  if date_filter == "Today":
67
  start_date = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
68
  data = data[data['Timestamp'] >= start_date]
69
  elif date_filter == "One Week":
70
  start_date = datetime.now() - timedelta(weeks=1)
71
  data = data[data['Timestamp'] >= start_date]
 
72
  return data
73
 
74
  # Function to recognize speech using SpeechRecognition and PyAudio in chunks
75
  def listen_to_speech():
76
  recognizer = sr.Recognizer()
77
-
78
- # Initialize PyAudio microphone stream
79
  with sr.Microphone() as source:
80
  recognizer.adjust_for_ambient_noise(source)
81
- st.write("Listening...") # Optional: Add a message to indicate listening state
82
 
83
  try:
84
- # Listen for the audio input
85
- audio = recognizer.listen(source, timeout=5, phrase_time_limit=10) # Listen for up to 10 seconds
86
- st.write("Recognizing...") # Optional: Add a message for recognition process
87
-
88
- # Use Google's speech recognition to convert audio to text
89
  text = recognizer.recognize_google(audio)
90
  st.write(f"Recognized: {text}")
91
- return text # Return the text detected from the audio
92
  except sr.UnknownValueError:
93
- st.error("Sorry, I could not understand the audio.") # Handle case when the audio is unclear
94
  return None
95
  except sr.RequestError:
96
- st.error("Could not request results from Google Speech Recognition service.") # Handle network issues
97
  return None
98
  except Exception as e:
99
  st.error(f"An error occurred: {e}")
@@ -110,30 +104,22 @@ def respond_to_greeting():
110
 
111
  # Function to extract the product name from the query
112
  def extract_product_name(query):
113
- # Ensure that all product names are strings and handle NaN values
114
  for product in dataset['product'].fillna('Unknown').astype(str):
115
  if product.lower() in query.lower():
116
  return product
117
  return None
118
 
119
- # Function to find the best matching answer using embeddings (Retrieve part of RAG)
120
  def find_answer(query):
121
  if dataset is None:
122
  return "Dataset not loaded properly."
123
-
124
- # Compute the embedding of the query
125
  query_embedding = embedding_model.encode([query])
126
-
127
- # Compute embeddings for all the dataset questions
128
  dataset_embeddings = embedding_model.encode(dataset['question'].tolist())
129
-
130
- # Find the closest match using cosine similarity
131
  similarities = cosine_similarity(query_embedding, dataset_embeddings)
132
-
133
- # Get the index of the most similar question
134
  closest_idx = np.argmax(similarities)
135
-
136
- # Retrieve the product info associated with the closest question
137
  closest_question = dataset.iloc[closest_idx]
138
  product_name = closest_question['product']
139
  price = closest_question['price']
@@ -141,14 +127,11 @@ def find_answer(query):
141
  ratings = closest_question['ratings']
142
  discount = closest_question['discount']
143
 
144
- # Ensure 'Timestamp' column exists before appending
145
  if 'Timestamp' not in closest_question.index:
146
  closest_question['Timestamp'] = datetime.now()
147
 
148
- # Save the query and response to CSV
149
  save_query_to_csv(query, product_name, price, features, ratings, discount)
150
 
151
- # Return specific info based on query
152
  if "price" in query.lower():
153
  return f"The price of {product_name} is {price}"
154
  elif "features" in query.lower():
@@ -167,55 +150,37 @@ def save_query_to_csv(query, product_name, price, features, ratings, discount):
167
  'features': features,
168
  'ratings': ratings,
169
  'discount': discount,
170
- 'Timestamp': datetime.now() # Ensure the timestamp is correct
171
  }
172
  new_entry_df = pd.DataFrame([new_entry])
173
-
174
- # Append to CSV (ensure header is only added for the first entry)
175
  new_entry_df.to_csv(output_csv_path, mode='a', header=not os.path.exists(output_csv_path), index=False)
176
 
177
  # Function for sentiment analysis using TextBlob with emojis
178
  def analyze_sentiment_with_emoji(text):
179
- # Create a TextBlob object
180
  blob = TextBlob(text)
181
-
182
- # Get the sentiment polarity (-1 to 1)
183
  sentiment_score = blob.sentiment.polarity
184
-
185
- # Determine sentiment and corresponding emoji based on the polarity score
186
  if sentiment_score > 0:
187
  sentiment = "Positive"
188
- emoji = "😊" # Happy emoji for positive sentiment
189
  elif sentiment_score < 0:
190
  sentiment = "Negative"
191
- emoji = "😞" # Sad emoji for negative sentiment
192
  else:
193
  sentiment = "Neutral"
194
- emoji = "😐" # Neutral emoji for neutral sentiment
195
-
196
  return sentiment, sentiment_score, emoji
197
 
198
- # Function to provide product recommendations (only product names) based on the query
199
  def recommend_products(query):
200
  if dataset is None:
201
  return "Dataset not loaded properly."
202
 
203
- # Ensure all product names are strings and handle missing data
204
  dataset['product'] = dataset['product'].fillna('Unknown').astype(str)
205
-
206
- # Compute the embedding of the query
207
  query_embedding = embedding_model.encode([query])
208
-
209
- # Compute embeddings for all the dataset product names
210
  dataset_embeddings = embedding_model.encode(dataset['product'].tolist())
211
-
212
- # Find the closest match using cosine similarity
213
  similarities = cosine_similarity(query_embedding, dataset_embeddings)
214
-
215
- # Get the indices of the top 3 recommendations
216
- top_indices = np.argsort(similarities[0])[-3:][::-1] # Get top 3 recommendations
217
-
218
- # Return at least 3 recommendations
219
  recommendations = []
220
  for idx in top_indices:
221
  product = dataset.iloc[idx]
@@ -225,9 +190,8 @@ def recommend_products(query):
225
  'features': product['features'],
226
  'ratings': product['ratings'],
227
  'discount': product['discount']
228
- }) # Append product details
229
-
230
- # If there are less than 3 recommendations, pad with default responses
231
  while len(recommendations) < 3:
232
  recommendations.append({
233
  'product': 'No recommendation available',
@@ -236,24 +200,21 @@ def recommend_products(query):
236
  'ratings': 'N/A',
237
  'discount': 'N/A'
238
  })
239
-
240
  return recommendations
241
 
242
  # Function to handle the entire continuous interaction loop
243
  def continuous_interaction():
244
  st.title("Speech Recognition with Product Queries")
245
  if st.button("Start Speech Recognition"):
246
- while True: # Loop for continuous listening
247
  user_input = listen_to_speech()
248
  if user_input:
249
- # Check if the user is greeting
250
  if is_greeting(user_input):
251
  respond_to_greeting()
252
- continue # Skip the rest of the code and just greet
253
- # Extract product name if mentioned
254
  product_name = extract_product_name(user_input)
255
  if product_name:
256
- # If the user asks for a product like "iPhone price", respond with product details
257
  st.write(f"Let me check the details for {product_name}:")
258
  product_details = dataset[dataset['product'].str.lower() == product_name.lower()]
259
  if not product_details.empty:
@@ -266,53 +227,43 @@ def continuous_interaction():
266
  else:
267
  st.write("Sorry, I couldn't find the product you're asking for.")
268
  else:
269
- # If no specific product is mentioned, perform normal question answering
270
  answer = find_answer(user_input)
271
  st.write(f"Answer: {answer}")
272
-
273
- # Sentiment Analysis with Emoji
274
  sentiment, sentiment_score, emoji = analyze_sentiment_with_emoji(user_input)
275
  st.write(f"Sentiment: {sentiment} (Score: {sentiment_score}) {emoji}")
276
 
277
- # Product Recommendations based on query
278
  st.write("Here are some product recommendations based on your query: ")
279
  recommendations = recommend_products(user_input)
280
- if recommendations:
281
- for idx, rec in enumerate(recommendations, 1):
282
- st.write(f"**Recommendation {idx}:**")
283
- st.write(f"**Product**: {rec['product']}")
284
- st.write(f"**Price**: {rec['price']}")
285
- st.write(f"**Features**: {rec['features']}")
286
- st.write(f"**Ratings**: {rec['ratings']}")
287
- st.write(f"**Discount**: {rec['discount']}%")
288
- st.write("---") # Separator between recommendations
289
-
290
- # Handle objections if any
291
- st.write("Do you like the recommendation or should I try again?")
292
 
293
  # Dashboard function with time filtering
294
  def display_dashboard():
295
  st.title("Product Dashboard")
296
  st.write("Welcome to the product query dashboard!")
297
 
298
- # Sidebar time filter
299
  time_filter = st.sidebar.selectbox(
300
  "Select time period",
301
  ["All Time", "Today", "One Week"]
302
  )
303
 
304
- query_results_df = pd.read_csv(output_csv_path, on_bad_lines='skip') # Load query results from 'context.csv'
305
 
306
- # Check if 'Timestamp' column exists
307
  if 'Timestamp' not in query_results_df.columns:
308
- query_results_df['Timestamp'] = pd.to_datetime('now') # Add current timestamp if column is missing
309
 
310
- # Filter data based on time selection
311
  query_results_df = filter_data_by_date(query_results_df, time_filter)
312
 
313
  st.subheader(f"Recent Queries Summary ({time_filter})")
314
- st.write(query_results_df.tail(10)) # Show the last 10 queries
315
-
316
  sentiment_counts = query_results_df['question'].apply(lambda x: analyze_sentiment_with_emoji(x)[0]).value_counts()
317
  st.subheader(f"Sentiment Analysis Distribution ({time_filter})")
318
  st.write(sentiment_counts)
@@ -325,9 +276,6 @@ def display_dashboard():
325
  )
326
  st.plotly_chart(sentiment_fig)
327
 
328
- # Ensure 'Timestamp' is properly converted to datetime
329
- query_results_df['Timestamp'] = pd.to_datetime(query_results_df['Timestamp'], errors='coerce')
330
-
331
  query_results_df['sentiment_score'] = query_results_df['question'].apply(lambda x: analyze_sentiment_with_emoji(x)[1])
332
 
333
  sentiment_time_fig = px.line(
@@ -350,7 +298,6 @@ def display_dashboard():
350
  )
351
  st.plotly_chart(product_popularity_fig)
352
 
353
- # Most recommended products
354
  recommended_products = query_results_df['product'].value_counts()
355
  st.subheader(f"Most Recommended Products ({time_filter})")
356
  st.write(recommended_products)
@@ -363,11 +310,11 @@ def display_dashboard():
363
  )
364
  st.plotly_chart(recommended_products_fig)
365
 
366
- # Main code logic for running the application
367
- if __name__ == "__main__":
368
- choice = st.sidebar.selectbox("Select Mode", ["Dashboard", "Speech Recognition"])
369
 
370
- if choice == "Dashboard":
371
- display_dashboard() # Display dashboard if selected
372
- else:
373
- continuous_interaction() # Speech recognition interaction
 
1
+ import os
2
  import pyaudio
3
  import pandas as pd
4
  from sentence_transformers import SentenceTransformer
 
16
 
17
  # Set up paths
18
  csv_file_path = "context.csv" # Path to your CSV file
19
+ output_csv_path = "contents.csv" # Path to save query results
20
 
21
  # Google Sheets setup
22
  SCOPE = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
 
38
  # Function to safely load the CSV dataset
39
  def load_csv_safely(file_path):
40
  try:
 
41
  df = pd.read_csv(file_path, on_bad_lines='skip') # Skips malformed lines
 
42
  required_columns = ['question', 'product', 'price', 'features', 'ratings', 'discount']
43
  for column in required_columns:
44
  if column not in df.columns:
45
  raise Exception(f"CSV does not contain the required column: '{column}'. Please check your CSV.")
46
 
 
47
  if 'Timestamp' not in df.columns:
48
  df['Timestamp'] = pd.NaT # Set it to NaT (Not a Time) initially
49
 
 
60
 
61
  # Function to filter data by date
62
  def filter_data_by_date(data, date_filter):
63
+ data['Timestamp'] = pd.to_datetime(data['Timestamp'], errors='coerce')
64
  if date_filter == "Today":
65
  start_date = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
66
  data = data[data['Timestamp'] >= start_date]
67
  elif date_filter == "One Week":
68
  start_date = datetime.now() - timedelta(weeks=1)
69
  data = data[data['Timestamp'] >= start_date]
70
+
71
  return data
72
 
73
  # Function to recognize speech using SpeechRecognition and PyAudio in chunks
74
  def listen_to_speech():
75
  recognizer = sr.Recognizer()
 
 
76
  with sr.Microphone() as source:
77
  recognizer.adjust_for_ambient_noise(source)
78
+ st.write("Listening...")
79
 
80
  try:
81
+ audio = recognizer.listen(source, timeout=5, phrase_time_limit=10)
82
+ st.write("Recognizing...")
 
 
 
83
  text = recognizer.recognize_google(audio)
84
  st.write(f"Recognized: {text}")
85
+ return text
86
  except sr.UnknownValueError:
87
+ st.error("Sorry, I could not understand the audio.")
88
  return None
89
  except sr.RequestError:
90
+ st.error("Could not request results from Google Speech Recognition service.")
91
  return None
92
  except Exception as e:
93
  st.error(f"An error occurred: {e}")
 
104
 
105
  # Function to extract the product name from the query
106
  def extract_product_name(query):
 
107
  for product in dataset['product'].fillna('Unknown').astype(str):
108
  if product.lower() in query.lower():
109
  return product
110
  return None
111
 
112
+ # Function to find the best matching answer using embeddings
113
  def find_answer(query):
114
  if dataset is None:
115
  return "Dataset not loaded properly."
116
+
 
117
  query_embedding = embedding_model.encode([query])
 
 
118
  dataset_embeddings = embedding_model.encode(dataset['question'].tolist())
119
+
 
120
  similarities = cosine_similarity(query_embedding, dataset_embeddings)
 
 
121
  closest_idx = np.argmax(similarities)
122
+
 
123
  closest_question = dataset.iloc[closest_idx]
124
  product_name = closest_question['product']
125
  price = closest_question['price']
 
127
  ratings = closest_question['ratings']
128
  discount = closest_question['discount']
129
 
 
130
  if 'Timestamp' not in closest_question.index:
131
  closest_question['Timestamp'] = datetime.now()
132
 
 
133
  save_query_to_csv(query, product_name, price, features, ratings, discount)
134
 
 
135
  if "price" in query.lower():
136
  return f"The price of {product_name} is {price}"
137
  elif "features" in query.lower():
 
150
  'features': features,
151
  'ratings': ratings,
152
  'discount': discount,
153
+ 'Timestamp': datetime.now()
154
  }
155
  new_entry_df = pd.DataFrame([new_entry])
 
 
156
  new_entry_df.to_csv(output_csv_path, mode='a', header=not os.path.exists(output_csv_path), index=False)
157
 
158
  # Function for sentiment analysis using TextBlob with emojis
159
  def analyze_sentiment_with_emoji(text):
 
160
  blob = TextBlob(text)
 
 
161
  sentiment_score = blob.sentiment.polarity
 
 
162
  if sentiment_score > 0:
163
  sentiment = "Positive"
164
+ emoji = "😊"
165
  elif sentiment_score < 0:
166
  sentiment = "Negative"
167
+ emoji = "😞"
168
  else:
169
  sentiment = "Neutral"
170
+ emoji = "😐"
 
171
  return sentiment, sentiment_score, emoji
172
 
173
+ # Function to provide product recommendations based on the query
174
  def recommend_products(query):
175
  if dataset is None:
176
  return "Dataset not loaded properly."
177
 
 
178
  dataset['product'] = dataset['product'].fillna('Unknown').astype(str)
 
 
179
  query_embedding = embedding_model.encode([query])
 
 
180
  dataset_embeddings = embedding_model.encode(dataset['product'].tolist())
 
 
181
  similarities = cosine_similarity(query_embedding, dataset_embeddings)
182
+ top_indices = np.argsort(similarities[0])[-3:][::-1]
183
+
 
 
 
184
  recommendations = []
185
  for idx in top_indices:
186
  product = dataset.iloc[idx]
 
190
  'features': product['features'],
191
  'ratings': product['ratings'],
192
  'discount': product['discount']
193
+ })
194
+
 
195
  while len(recommendations) < 3:
196
  recommendations.append({
197
  'product': 'No recommendation available',
 
200
  'ratings': 'N/A',
201
  'discount': 'N/A'
202
  })
203
+
204
  return recommendations
205
 
206
  # Function to handle the entire continuous interaction loop
207
  def continuous_interaction():
208
  st.title("Speech Recognition with Product Queries")
209
  if st.button("Start Speech Recognition"):
210
+ while True:
211
  user_input = listen_to_speech()
212
  if user_input:
 
213
  if is_greeting(user_input):
214
  respond_to_greeting()
215
+ continue
 
216
  product_name = extract_product_name(user_input)
217
  if product_name:
 
218
  st.write(f"Let me check the details for {product_name}:")
219
  product_details = dataset[dataset['product'].str.lower() == product_name.lower()]
220
  if not product_details.empty:
 
227
  else:
228
  st.write("Sorry, I couldn't find the product you're asking for.")
229
  else:
 
230
  answer = find_answer(user_input)
231
  st.write(f"Answer: {answer}")
232
+
 
233
  sentiment, sentiment_score, emoji = analyze_sentiment_with_emoji(user_input)
234
  st.write(f"Sentiment: {sentiment} (Score: {sentiment_score}) {emoji}")
235
 
 
236
  st.write("Here are some product recommendations based on your query: ")
237
  recommendations = recommend_products(user_input)
238
+ for idx, rec in enumerate(recommendations, 1):
239
+ st.write(f"Recommendation {idx}:")
240
+ st.write(f"Product: {rec['product']}")
241
+ st.write(f"Price: {rec['price']}")
242
+ st.write(f"Features: {rec['features']}")
243
+ st.write(f"Ratings: {rec['ratings']}")
244
+ st.write(f"Discount: {rec['discount']}%")
245
+ st.write("---")
 
 
 
 
246
 
247
  # Dashboard function with time filtering
248
  def display_dashboard():
249
  st.title("Product Dashboard")
250
  st.write("Welcome to the product query dashboard!")
251
 
 
252
  time_filter = st.sidebar.selectbox(
253
  "Select time period",
254
  ["All Time", "Today", "One Week"]
255
  )
256
 
257
+ query_results_df = pd.read_csv(output_csv_path, on_bad_lines='skip')
258
 
 
259
  if 'Timestamp' not in query_results_df.columns:
260
+ query_results_df['Timestamp'] = pd.to_datetime('now')
261
 
 
262
  query_results_df = filter_data_by_date(query_results_df, time_filter)
263
 
264
  st.subheader(f"Recent Queries Summary ({time_filter})")
265
+ st.write(query_results_df.tail(10))
266
+
267
  sentiment_counts = query_results_df['question'].apply(lambda x: analyze_sentiment_with_emoji(x)[0]).value_counts()
268
  st.subheader(f"Sentiment Analysis Distribution ({time_filter})")
269
  st.write(sentiment_counts)
 
276
  )
277
  st.plotly_chart(sentiment_fig)
278
 
 
 
 
279
  query_results_df['sentiment_score'] = query_results_df['question'].apply(lambda x: analyze_sentiment_with_emoji(x)[1])
280
 
281
  sentiment_time_fig = px.line(
 
298
  )
299
  st.plotly_chart(product_popularity_fig)
300
 
 
301
  recommended_products = query_results_df['product'].value_counts()
302
  st.subheader(f"Most Recommended Products ({time_filter})")
303
  st.write(recommended_products)
 
310
  )
311
  st.plotly_chart(recommended_products_fig)
312
 
313
+ # Main code to run the app
314
+ if __name__ == '__main__':
315
+ mode = st.sidebar.radio("Select Mode", ("Speech Recognition", "Dashboard"))
316
 
317
+ if mode == "Speech Recognition":
318
+ continuous_interaction()
319
+ elif mode == "Dashboard":
320
+ display_dashboard()