Files changed (1) hide show
  1. app.py +137 -150
app.py CHANGED
@@ -1,4 +1,4 @@
1
- import os
2
  import pyaudio
3
  import pandas as pd
4
  from sentence_transformers import SentenceTransformer
@@ -10,36 +10,46 @@ from textblob import TextBlob
10
  import streamlit as st
11
  import seaborn as sns
12
  import plotly.express as px
 
13
  from datetime import datetime, timedelta
14
  import gspread
15
  from google.oauth2.service_account import Credentials
 
 
 
 
 
16
 
17
  # Set up paths for CSV files and Google Sheets credentials
18
- csv_file_path = "context.csv" # Path to CSV file with product info
19
- output_csv_path = "contents.csv" # Path to save query results
20
 
21
- # Google Sheets setup
22
  SCOPE = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
23
- CREDS_PATH = "modern-cycling-444916-g6-82c207d3eb47.json" # Google credentials path
 
 
 
 
24
 
25
  # Initialize Google Sheets connection
26
  def initialize_google_sheets():
27
  credentials = Credentials.from_service_account_file(CREDS_PATH, scopes=SCOPE)
28
  try:
29
  client = gspread.authorize(credentials)
30
- sheet = client.open("infosys").sheet1 # Accessing the Google Sheet "SalesStores"
31
  return sheet
32
  except gspread.exceptions.APIError as e:
33
  st.error(f"Google Sheets API error: {e}")
34
  return None
35
 
36
- sheet = initialize_google_sheets() # Initialize Google Sheets connection
37
 
38
  # Function to safely load the CSV dataset
39
  def load_csv_safely(file_path):
40
  try:
41
- df = pd.read_csv(file_path, on_bad_lines='skip') # Handles malformed lines in CSV
42
- required_columns = ['question', 'product', 'price', 'features', 'ratings', 'discount']
43
  for column in required_columns:
44
  if column not in df.columns:
45
  raise Exception(f"CSV does not contain the required column: '{column}'. Please check your CSV.")
@@ -55,44 +65,28 @@ def load_csv_safely(file_path):
55
  st.error(f"An error occurred: {e}")
56
  return None
57
 
58
- dataset = load_csv_safely(csv_file_path) # Load dataset safely
59
 
60
- embedding_model = SentenceTransformer('all-MiniLM-L6-v2') # Pre-trained sentence transformer model for embeddings
61
 
62
- # Function to filter data by date
63
- def filter_data_by_date(data, date_filter):
64
- data['Timestamp'] = pd.to_datetime(data['Timestamp'], errors='coerce')
65
- if date_filter == "Today":
66
- start_date = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
67
- data = data[data['Timestamp'] >= start_date]
68
- elif date_filter == "One Week":
69
- start_date = datetime.now() - timedelta(weeks=1)
70
- data = data[data['Timestamp'] >= start_date]
71
 
72
- return data
73
-
74
- # Function for speech recognition
75
- def listen_to_speech():
76
- recognizer = sr.Recognizer()
77
- with sr.Microphone() as source:
78
- recognizer.adjust_for_ambient_noise(source)
79
- st.write("Listening...")
80
-
81
- try:
82
- audio = recognizer.listen(source, timeout=5, phrase_time_limit=10)
83
- st.write("Recognizing...")
84
- text = recognizer.recognize_google(audio)
85
- st.write(f"Recognized: {text}")
86
- return text
87
- except sr.UnknownValueError:
88
- st.error("Sorry, I could not understand the audio.")
89
- return None
90
- except sr.RequestError:
91
- st.error("Could not request results from Google Speech Recognition service.")
92
- return None
93
- except Exception as e:
94
- st.error(f"An error occurred: {e}")
95
- return None
96
 
97
  # Function to check if the text is a greeting
98
  def is_greeting(text):
@@ -110,39 +104,42 @@ def extract_product_name(query):
110
  return product
111
  return None
112
 
113
- # Function to search for relevant product details based on query (not relying on product name explicitly)
 
 
 
 
 
 
 
 
114
  def find_answer(query):
 
 
 
115
  if dataset is None:
116
  return "Dataset not loaded properly."
117
 
118
- # Create embeddings for the query and all possible columns (product, features, question)
119
  query_embedding = embedding_model.encode([query])
120
-
121
- # Generate embeddings for all questions, products, and features to find relevance
122
- combined_columns = dataset['question'].fillna('') + " " + dataset['product'].fillna('') + " " + dataset['features'].fillna('')
123
  combined_embeddings = embedding_model.encode(combined_columns.tolist())
124
 
125
- # Calculate cosine similarity between the query embedding and each product's combined embeddings
126
  similarities = cosine_similarity(query_embedding, combined_embeddings)
127
 
128
- # Set a threshold for similarity to determine if the query matches any product
129
- similarity_threshold = 0.5 # You can adjust this threshold based on how strict you want the match
 
130
 
131
- closest_idx = np.argmax(similarities) # Index of the closest match
132
- highest_similarity = similarities[0][closest_idx] # Highest similarity score
133
-
134
- # If no match is found above the threshold, return "No matching product found"
135
  if highest_similarity < similarity_threshold:
136
  return "Sorry, no product found for your query."
137
 
138
- # Get the details for the closest match
139
  closest_question = dataset.iloc[closest_idx]
140
  product_name = closest_question['product']
141
  price = closest_question['price']
142
  features = closest_question['features']
143
  ratings = closest_question['ratings']
144
  discount = closest_question['discount']
145
-
146
  if 'Timestamp' not in closest_question.index:
147
  closest_question['Timestamp'] = datetime.now()
148
 
@@ -166,12 +163,13 @@ def save_query_to_csv(query, product_name, price, features, ratings, discount):
166
  'features': features,
167
  'ratings': ratings,
168
  'discount': discount,
169
- 'Timestamp': datetime.now()
 
170
  }
171
  new_entry_df = pd.DataFrame([new_entry])
172
  new_entry_df.to_csv(output_csv_path, mode='a', header=not os.path.exists(output_csv_path), index=False)
173
 
174
- # Function for sentiment analysis with emojis
175
  def analyze_sentiment_with_emoji(text):
176
  blob = TextBlob(text)
177
  sentiment_score = blob.sentiment.polarity
@@ -186,114 +184,59 @@ def analyze_sentiment_with_emoji(text):
186
  emoji = "😐"
187
  return sentiment, sentiment_score, emoji
188
 
189
- # Function to provide product recommendations based on the query
190
- def recommend_products(query):
191
- if dataset is None:
192
- return "Dataset not loaded properly."
193
-
194
- dataset['product'] = dataset['product'].fillna('Unknown').astype(str)
195
- query_embedding = embedding_model.encode([query])
196
- dataset_embeddings = embedding_model.encode(dataset['product'].tolist())
197
- similarities = cosine_similarity(query_embedding, dataset_embeddings)
198
- top_indices = np.argsort(similarities[0])[-3:][::-1]
199
-
200
- recommendations = []
201
- for idx in top_indices:
202
- product = dataset.iloc[idx]
203
- recommendations.append({
204
- 'product': product['product'],
205
- 'price': product['price'],
206
- 'features': product['features'],
207
- 'ratings': product['ratings'],
208
- 'discount': product['discount']
209
- })
210
 
211
- while len(recommendations) < 3:
212
- recommendations.append({
213
- 'product': 'No recommendation available',
214
- 'price': 'N/A',
215
- 'features': 'N/A',
216
- 'ratings': 'N/A',
217
- 'discount': 'N/A'
218
- })
219
 
220
- return recommendations
221
-
222
- # Function to handle continuous interaction loop
223
- def continuous_interaction():
224
- st.title("Speech Recognition with Product Queries")
225
- if st.button("Start Speech Recognition"):
226
- while True:
227
- user_input = listen_to_speech()
228
- if user_input:
229
- if is_greeting(user_input):
230
- respond_to_greeting()
231
- continue
232
- product_name = extract_product_name(user_input)
233
- if product_name:
234
- st.write(f"Let me check the details for {product_name}:")
235
- product_details = dataset[dataset['product'].str.lower() == product_name.lower()]
236
- if not product_details.empty:
237
- product_info = product_details.iloc[0]
238
- st.write(f"Product: {product_info['product']}")
239
- st.write(f"Price: {product_info['price']}")
240
- st.write(f"Features: {product_info['features']}")
241
- st.write(f"Ratings: {product_info['ratings']}")
242
- st.write(f"Discount: {product_info['discount']}%")
243
- else:
244
- st.write("Sorry, I couldn't find the product you're asking for.")
245
- else:
246
- answer = find_answer(user_input)
247
- st.write(f"Answer: {answer}")
248
-
249
- sentiment, sentiment_score, emoji = analyze_sentiment_with_emoji(user_input)
250
- st.write(f"Sentiment: {sentiment} (Score: {sentiment_score}) {emoji}")
251
-
252
- st.write("Here are some product recommendations based on your query: ")
253
- recommendations = recommend_products(user_input)
254
- for idx, rec in enumerate(recommendations, 1):
255
- st.write(f"Recommendation {idx}:")
256
- st.write(f"Product: {rec['product']}")
257
- st.write(f"Price: {rec['price']}")
258
- st.write(f"Features: {rec['features']}")
259
- st.write(f"Ratings: {rec['ratings']}")
260
- st.write(f"Discount: {rec['discount']}%")
261
- st.write("---")
262
 
263
  # Dashboard for visualizations
264
  def display_dashboard():
265
  st.title("Product Dashboard")
266
  st.write("Welcome to the product query dashboard!")
267
-
 
 
 
 
 
 
268
  time_filter = st.sidebar.selectbox(
269
  "Select time period",
270
  ["All Time", "Today", "One Week"]
271
  )
272
-
273
  query_results_df = pd.read_csv(output_csv_path, on_bad_lines='skip')
274
-
275
  if 'Timestamp' not in query_results_df.columns:
276
  query_results_df['Timestamp'] = pd.to_datetime('now')
277
-
 
 
 
278
  query_results_df = filter_data_by_date(query_results_df, time_filter)
279
-
280
  st.subheader(f"Recent Queries Summary ({time_filter})")
281
  st.write(query_results_df.tail(10))
282
-
283
  sentiment_counts = query_results_df['question'].apply(lambda x: analyze_sentiment_with_emoji(x)[0]).value_counts()
284
  st.subheader(f"Sentiment Analysis Distribution ({time_filter})")
285
  st.write(sentiment_counts)
286
-
287
- sentiment_fig = px.pie(
288
- sentiment_counts,
289
- names=sentiment_counts.index,
290
- values=sentiment_counts.values,
291
- title=f"Sentiment Distribution of Queries ({time_filter})"
292
- )
293
  st.plotly_chart(sentiment_fig)
294
 
295
  query_results_df['sentiment_score'] = query_results_df['question'].apply(lambda x: analyze_sentiment_with_emoji(x)[1])
296
-
297
  sentiment_time_fig = px.line(
298
  query_results_df,
299
  x='Timestamp',
@@ -301,7 +244,7 @@ def display_dashboard():
301
  title=f"Sentiment Score Over Time ({time_filter})"
302
  )
303
  st.plotly_chart(sentiment_time_fig)
304
-
305
  product_counts = query_results_df['product'].value_counts()
306
  st.subheader(f"Product Popularity ({time_filter})")
307
  st.write(product_counts)
@@ -326,11 +269,55 @@ def display_dashboard():
326
  )
327
  st.plotly_chart(recommended_products_fig)
328
 
329
- # Main code to run the app
330
- if __name__ == '__main__':
331
- mode = st.sidebar.radio("Select Mode", ("Speech Recognition", "Dashboard"))
 
 
 
 
 
 
 
 
 
 
 
 
 
332
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
333
  if mode == "Speech Recognition":
334
- continuous_interaction()
 
335
  elif mode == "Dashboard":
336
  display_dashboard()
 
1
+ import os
2
  import pyaudio
3
  import pandas as pd
4
  from sentence_transformers import SentenceTransformer
 
10
  import streamlit as st
11
  import seaborn as sns
12
  import plotly.express as px
13
+ import requests
14
  from datetime import datetime, timedelta
15
  import gspread
16
  from google.oauth2.service_account import Credentials
17
+ from dotenv import load_dotenv # For loading environment variables
18
+ import random # For generating random customer IDs
19
+
20
+ # Load environment variables from a .env file
21
+ load_dotenv()
22
 
23
  # Set up paths for CSV files and Google Sheets credentials
24
+ csv_file_path = r"C:\Users\Muthuraja\OneDrive\Attachments\Desktop\second\database1.csv"
25
+ output_csv_path = r"C:\Users\Muthuraja\OneDrive\Attachments\Desktop\second\Book4.csv"
26
 
27
+ # Load Google Sheets credentials from environment variable
28
  SCOPE = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
29
+ CREDS_PATH = r"C:\Users\Muthuraja\Downloads\modern-cycling-444916-g6-82c207d3eb47.json" # Path to your Google credentials JSON file
30
+
31
+ # Use the provided Groq API key (you can also store this in .env)
32
+ GROQ_API_KEY = "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"
33
+ GROQ_API_URL = 'https://api.groq.com/openai/v1/chat/completions'
34
 
35
  # Initialize Google Sheets connection
36
  def initialize_google_sheets():
37
  credentials = Credentials.from_service_account_file(CREDS_PATH, scopes=SCOPE)
38
  try:
39
  client = gspread.authorize(credentials)
40
+ sheet = client.open("CRM_Interactions").sheet1 # Using CRM_Interactions as the sheet name
41
  return sheet
42
  except gspread.exceptions.APIError as e:
43
  st.error(f"Google Sheets API error: {e}")
44
  return None
45
 
46
+ sheet = initialize_google_sheets()
47
 
48
  # Function to safely load the CSV dataset
49
  def load_csv_safely(file_path):
50
  try:
51
+ df = pd.read_csv(file_path, on_bad_lines='skip')
52
+ required_columns = ['question', 'product', 'price', 'features', 'ratings', 'discount', 'customer_id']
53
  for column in required_columns:
54
  if column not in df.columns:
55
  raise Exception(f"CSV does not contain the required column: '{column}'. Please check your CSV.")
 
65
  st.error(f"An error occurred: {e}")
66
  return None
67
 
68
+ dataset = load_csv_safely(csv_file_path)
69
 
70
+ embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
71
 
72
+ # Function to send a request to the Groq API
73
+ def send_groq_request(query):
74
+ headers = {
75
+ 'Authorization': f'Bearer {GROQ_API_KEY}',
76
+ 'Content-Type': 'application/json'
77
+ }
 
 
 
78
 
79
+ payload = {
80
+ 'query': query
81
+ }
82
+
83
+ try:
84
+ response = requests.post(GROQ_API_URL, headers=headers, json=payload)
85
+ response.raise_for_status() # Will raise an HTTPError for bad responses (4xx or 5xx)
86
+ return response.json() # Return the response in JSON format
87
+ except requests.exceptions.RequestException as e:
88
+ st.error(f"Error communicating with Groq API: {e}")
89
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
90
 
91
  # Function to check if the text is a greeting
92
  def is_greeting(text):
 
104
  return product
105
  return None
106
 
107
+ # Function to handle "more products" requests
108
+ def handle_more_products_request(query):
109
+ if "more products" in query.lower():
110
+ # Select more products from the dataset. You can add filtering logic here.
111
+ more_products = dataset[['product', 'price', 'features', 'ratings', 'discount']].head(5)
112
+ return f"Here are some more products you might like:\n{more_products}"
113
+ return None
114
+
115
+ # Function to find the best answer to a query
116
  def find_answer(query):
117
+ if "more products" in query.lower():
118
+ return handle_more_products_request(query)
119
+
120
  if dataset is None:
121
  return "Dataset not loaded properly."
122
 
 
123
  query_embedding = embedding_model.encode([query])
124
+ combined_columns = dataset['question'].fillna('') + " " + dataset['product'].fillna('') + " " + dataset['features'].fillna('')
 
 
125
  combined_embeddings = embedding_model.encode(combined_columns.tolist())
126
 
 
127
  similarities = cosine_similarity(query_embedding, combined_embeddings)
128
 
129
+ similarity_threshold = 0.5
130
+ closest_idx = np.argmax(similarities)
131
+ highest_similarity = similarities[0][closest_idx]
132
 
 
 
 
 
133
  if highest_similarity < similarity_threshold:
134
  return "Sorry, no product found for your query."
135
 
 
136
  closest_question = dataset.iloc[closest_idx]
137
  product_name = closest_question['product']
138
  price = closest_question['price']
139
  features = closest_question['features']
140
  ratings = closest_question['ratings']
141
  discount = closest_question['discount']
142
+
143
  if 'Timestamp' not in closest_question.index:
144
  closest_question['Timestamp'] = datetime.now()
145
 
 
163
  'features': features,
164
  'ratings': ratings,
165
  'discount': discount,
166
+ 'Timestamp': datetime.now(),
167
+ 'customer_id': random.randint(1000, 9999) # Generate a random customer ID between 1000 and 9999
168
  }
169
  new_entry_df = pd.DataFrame([new_entry])
170
  new_entry_df.to_csv(output_csv_path, mode='a', header=not os.path.exists(output_csv_path), index=False)
171
 
172
+ # Function to perform sentiment analysis with TextBlob
173
  def analyze_sentiment_with_emoji(text):
174
  blob = TextBlob(text)
175
  sentiment_score = blob.sentiment.polarity
 
184
  emoji = "😐"
185
  return sentiment, sentiment_score, emoji
186
 
187
+ # Updated pie chart function with percentages
188
+ def display_sentiment_pie_chart(sentiment_counts):
189
+ sentiment_fig = px.pie(
190
+ sentiment_counts,
191
+ names=sentiment_counts.index,
192
+ values=sentiment_counts.values,
193
+ title="Sentiment Distribution",
194
+ hole=0.3 # For a donut chart (optional)
195
+ )
 
 
 
 
 
 
 
 
 
 
 
 
196
 
197
+ # Add percentage labels inside the slices
198
+ sentiment_fig.update_traces(textinfo='percent+label', pull=[0.1, 0.1, 0.1])
 
 
 
 
 
 
199
 
200
+ return sentiment_fig
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
 
202
  # Dashboard for visualizations
203
  def display_dashboard():
204
  st.title("Product Dashboard")
205
  st.write("Welcome to the product query dashboard!")
206
+
207
+ customer_ids = dataset['customer_id'].unique()
208
+ selected_customer_id = st.sidebar.selectbox(
209
+ "Select Customer ID",
210
+ ["All Customers"] + customer_ids.tolist()
211
+ )
212
+
213
  time_filter = st.sidebar.selectbox(
214
  "Select time period",
215
  ["All Time", "Today", "One Week"]
216
  )
217
+
218
  query_results_df = pd.read_csv(output_csv_path, on_bad_lines='skip')
219
+
220
  if 'Timestamp' not in query_results_df.columns:
221
  query_results_df['Timestamp'] = pd.to_datetime('now')
222
+
223
+ if selected_customer_id != "All Customers":
224
+ query_results_df = query_results_df[query_results_df['customer_id'] == selected_customer_id]
225
+
226
  query_results_df = filter_data_by_date(query_results_df, time_filter)
227
+
228
  st.subheader(f"Recent Queries Summary ({time_filter})")
229
  st.write(query_results_df.tail(10))
230
+
231
  sentiment_counts = query_results_df['question'].apply(lambda x: analyze_sentiment_with_emoji(x)[0]).value_counts()
232
  st.subheader(f"Sentiment Analysis Distribution ({time_filter})")
233
  st.write(sentiment_counts)
234
+
235
+ sentiment_fig = display_sentiment_pie_chart(sentiment_counts)
 
 
 
 
 
236
  st.plotly_chart(sentiment_fig)
237
 
238
  query_results_df['sentiment_score'] = query_results_df['question'].apply(lambda x: analyze_sentiment_with_emoji(x)[1])
239
+
240
  sentiment_time_fig = px.line(
241
  query_results_df,
242
  x='Timestamp',
 
244
  title=f"Sentiment Score Over Time ({time_filter})"
245
  )
246
  st.plotly_chart(sentiment_time_fig)
247
+
248
  product_counts = query_results_df['product'].value_counts()
249
  st.subheader(f"Product Popularity ({time_filter})")
250
  st.write(product_counts)
 
269
  )
270
  st.plotly_chart(recommended_products_fig)
271
 
272
+ # Function to filter data by date
273
+ def filter_data_by_date(query_results_df, time_filter):
274
+ if time_filter == "Today":
275
+ today = datetime.now().date()
276
+ query_results_df['Timestamp'] = pd.to_datetime(query_results_df['Timestamp']).dt.date
277
+ query_results_df = query_results_df[query_results_df['Timestamp'] == today]
278
+ elif time_filter == "One Week":
279
+ one_week_ago = datetime.now() - timedelta(weeks=1)
280
+ query_results_df['Timestamp'] = pd.to_datetime(query_results_df['Timestamp'])
281
+ query_results_df = query_results_df[query_results_df['Timestamp'] > one_week_ago]
282
+ return query_results_df
283
+
284
+ # Function for continuous speech interaction
285
+ def continuous_interaction():
286
+ recognizer = sr.Recognizer()
287
+ microphone = sr.Microphone()
288
 
289
+ st.write("Listening for your query...")
290
+
291
+ while True:
292
+ with microphone as source:
293
+ recognizer.adjust_for_ambient_noise(source)
294
+ audio = recognizer.listen(source)
295
+
296
+ try:
297
+ query = recognizer.recognize_google(audio)
298
+ st.write(f"Your query: {query}")
299
+
300
+ if is_greeting(query):
301
+ respond_to_greeting()
302
+ else:
303
+ answer = find_answer(query)
304
+ sentiment, score, emoji = analyze_sentiment_with_emoji(query)
305
+ st.write(f"Answer: {answer}")
306
+ st.write(f"Sentiment: {sentiment} {emoji}")
307
+ st.write(f"Sentiment Score: {score}")
308
+
309
+ except sr.UnknownValueError:
310
+ st.write("Sorry, I couldn't understand that.")
311
+ except sr.RequestError:
312
+ st.write("Sorry, there was an error with the speech recognition service.")
313
+
314
+ # Main function to run the interface
315
+ if __name__ == "__main__":
316
+ st.sidebar.title("Product Query Interface")
317
+ mode = st.sidebar.selectbox("Select Mode", ["Speech Recognition", "Dashboard"])
318
+
319
  if mode == "Speech Recognition":
320
+ if st.button('Start Listening'):
321
+ continuous_interaction() # Start the speech recognition when button is clicked
322
  elif mode == "Dashboard":
323
  display_dashboard()