Files changed (1) hide show
  1. app.py +177 -134
app.py CHANGED
@@ -1,41 +1,39 @@
1
  import os
2
  import pyaudio
3
  import pandas as pd
 
 
4
  from sentence_transformers import SentenceTransformer
5
  from sklearn.metrics.pairwise import cosine_similarity
6
- import numpy as np
7
  import time
8
  import speech_recognition as sr
9
  from textblob import TextBlob
10
  import streamlit as st
11
  import seaborn as sns
12
  import plotly.express as px
13
- import requests
14
  from datetime import datetime, timedelta
15
  import gspread
16
- from google.oauth2.service_account import Credentials # For loading environment variables
17
- import random # For generating random customer IDs
 
 
 
18
 
19
- # Load environment variables from a .en
20
 
21
  # Set up paths for CSV files and Google Sheets credentials
22
- csv_file_path = "database1.csv"
23
- output_csv_path = "Book4.csv"
24
 
25
- # Load Google Sheets credentials from environment variable
26
  SCOPE = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
27
- CREDS_PATH = "modern-cycling-444916-g6-82c207d3eb47.json" # Path to your Google credentials JSON file
28
-
29
- # Use the provided Groq API key (you can also store this in .env)
30
- GROQ_API_KEY = "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"
31
- GROQ_API_URL = 'https://api.groq.com/openai/v1/chat/completions'
32
 
33
  # Initialize Google Sheets connection
34
  def initialize_google_sheets():
35
  credentials = Credentials.from_service_account_file(CREDS_PATH, scopes=SCOPE)
36
  try:
37
  client = gspread.authorize(credentials)
38
- sheet = client.open("CRM_Interactions").sheet1 # Using CRM_Interactions as the sheet name
39
  return sheet
40
  except gspread.exceptions.APIError as e:
41
  st.error(f"Google Sheets API error: {e}")
@@ -46,14 +44,14 @@ sheet = initialize_google_sheets()
46
  # Function to safely load the CSV dataset
47
  def load_csv_safely(file_path):
48
  try:
49
- df = pd.read_csv(file_path, on_bad_lines='skip')
50
- required_columns = ['question', 'product', 'price', 'features', 'ratings', 'discount', 'customer_id']
51
  for column in required_columns:
52
  if column not in df.columns:
53
- raise Exception(f"CSV does not contain the required column: '{column}'. Please check your CSV.")
54
 
55
  if 'Timestamp' not in df.columns:
56
- df['Timestamp'] = pd.NaT # Initialize Timestamp column if it doesn't exist
57
 
58
  return df
59
  except pd.errors.ParserError as e:
@@ -67,24 +65,66 @@ dataset = load_csv_safely(csv_file_path)
67
 
68
  embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
69
 
70
- # Function to send a request to the Groq API
71
- def send_groq_request(query):
 
 
 
 
 
 
 
 
 
 
 
 
72
  headers = {
73
- 'Authorization': f'Bearer {GROQ_API_KEY}',
74
- 'Content-Type': 'application/json'
75
  }
76
-
77
  payload = {
78
- 'query': query
79
- }
80
-
 
 
81
  try:
82
  response = requests.post(GROQ_API_URL, headers=headers, json=payload)
83
- response.raise_for_status() # Will raise an HTTPError for bad responses (4xx or 5xx)
84
- return response.json() # Return the response in JSON format
 
 
 
 
 
85
  except requests.exceptions.RequestException as e:
86
- st.error(f"Error communicating with Groq API: {e}")
87
- return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
  # Function to check if the text is a greeting
90
  def is_greeting(text):
@@ -102,31 +142,21 @@ def extract_product_name(query):
102
  return product
103
  return None
104
 
105
- # Function to handle "more products" requests
106
- def handle_more_products_request(query):
107
- if "more products" in query.lower():
108
- # Select more products from the dataset. You can add filtering logic here.
109
- more_products = dataset[['product', 'price', 'features', 'ratings', 'discount']].head(5)
110
- return f"Here are some more products you might like:\n{more_products}"
111
- return None
112
-
113
- # Function to find the best answer to a query
114
  def find_answer(query):
115
- if "more products" in query.lower():
116
- return handle_more_products_request(query)
117
-
118
  if dataset is None:
119
  return "Dataset not loaded properly."
120
 
121
  query_embedding = embedding_model.encode([query])
122
- combined_columns = dataset['question'].fillna('') + " " + dataset['product'].fillna('') + " " + dataset['features'].fillna('')
 
123
  combined_embeddings = embedding_model.encode(combined_columns.tolist())
124
 
125
  similarities = cosine_similarity(query_embedding, combined_embeddings)
126
 
127
- similarity_threshold = 0.5
128
- closest_idx = np.argmax(similarities)
129
- highest_similarity = similarities[0][closest_idx]
130
 
131
  if highest_similarity < similarity_threshold:
132
  return "Sorry, no product found for your query."
@@ -137,7 +167,7 @@ def find_answer(query):
137
  features = closest_question['features']
138
  ratings = closest_question['ratings']
139
  discount = closest_question['discount']
140
-
141
  if 'Timestamp' not in closest_question.index:
142
  closest_question['Timestamp'] = datetime.now()
143
 
@@ -161,13 +191,12 @@ def save_query_to_csv(query, product_name, price, features, ratings, discount):
161
  'features': features,
162
  'ratings': ratings,
163
  'discount': discount,
164
- 'Timestamp': datetime.now(),
165
- 'customer_id': random.randint(1000, 9999) # Generate a random customer ID between 1000 and 9999
166
  }
167
  new_entry_df = pd.DataFrame([new_entry])
168
  new_entry_df.to_csv(output_csv_path, mode='a', header=not os.path.exists(output_csv_path), index=False)
169
 
170
- # Function to perform sentiment analysis with TextBlob
171
  def analyze_sentiment_with_emoji(text):
172
  blob = TextBlob(text)
173
  sentiment_score = blob.sentiment.polarity
@@ -182,59 +211,117 @@ def analyze_sentiment_with_emoji(text):
182
  emoji = "😐"
183
  return sentiment, sentiment_score, emoji
184
 
185
- # Updated pie chart function with percentages
186
- def display_sentiment_pie_chart(sentiment_counts):
187
- sentiment_fig = px.pie(
188
- sentiment_counts,
189
- names=sentiment_counts.index,
190
- values=sentiment_counts.values,
191
- title="Sentiment Distribution",
192
- hole=0.3 # For a donut chart (optional)
193
- )
 
 
 
 
 
 
 
 
 
 
 
 
194
 
195
- # Add percentage labels inside the slices
196
- sentiment_fig.update_traces(textinfo='percent+label', pull=[0.1, 0.1, 0.1])
 
 
 
 
 
 
197
 
198
- return sentiment_fig
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
 
200
  # Dashboard for visualizations
201
  def display_dashboard():
202
  st.title("Product Dashboard")
203
  st.write("Welcome to the product query dashboard!")
204
-
205
- customer_ids = dataset['customer_id'].unique()
206
- selected_customer_id = st.sidebar.selectbox(
207
- "Select Customer ID",
208
- ["All Customers"] + customer_ids.tolist()
209
- )
210
-
211
- time_filter = st.sidebar.selectbox(
212
- "Select time period",
213
- ["All Time", "Today", "One Week"]
214
- )
215
-
216
  query_results_df = pd.read_csv(output_csv_path, on_bad_lines='skip')
217
-
218
  if 'Timestamp' not in query_results_df.columns:
219
  query_results_df['Timestamp'] = pd.to_datetime('now')
220
-
221
- if selected_customer_id != "All Customers":
222
- query_results_df = query_results_df[query_results_df['customer_id'] == selected_customer_id]
223
-
224
  query_results_df = filter_data_by_date(query_results_df, time_filter)
225
-
226
  st.subheader(f"Recent Queries Summary ({time_filter})")
227
  st.write(query_results_df.tail(10))
228
-
229
  sentiment_counts = query_results_df['question'].apply(lambda x: analyze_sentiment_with_emoji(x)[0]).value_counts()
230
  st.subheader(f"Sentiment Analysis Distribution ({time_filter})")
231
  st.write(sentiment_counts)
232
-
233
- sentiment_fig = display_sentiment_pie_chart(sentiment_counts)
 
 
 
 
 
234
  st.plotly_chart(sentiment_fig)
235
 
236
  query_results_df['sentiment_score'] = query_results_df['question'].apply(lambda x: analyze_sentiment_with_emoji(x)[1])
237
-
238
  sentiment_time_fig = px.line(
239
  query_results_df,
240
  x='Timestamp',
@@ -242,7 +329,7 @@ def display_dashboard():
242
  title=f"Sentiment Score Over Time ({time_filter})"
243
  )
244
  st.plotly_chart(sentiment_time_fig)
245
-
246
  product_counts = query_results_df['product'].value_counts()
247
  st.subheader(f"Product Popularity ({time_filter})")
248
  st.write(product_counts)
@@ -267,55 +354,11 @@ def display_dashboard():
267
  )
268
  st.plotly_chart(recommended_products_fig)
269
 
270
- # Function to filter data by date
271
- def filter_data_by_date(query_results_df, time_filter):
272
- if time_filter == "Today":
273
- today = datetime.now().date()
274
- query_results_df['Timestamp'] = pd.to_datetime(query_results_df['Timestamp']).dt.date
275
- query_results_df = query_results_df[query_results_df['Timestamp'] == today]
276
- elif time_filter == "One Week":
277
- one_week_ago = datetime.now() - timedelta(weeks=1)
278
- query_results_df['Timestamp'] = pd.to_datetime(query_results_df['Timestamp'])
279
- query_results_df = query_results_df[query_results_df['Timestamp'] > one_week_ago]
280
- return query_results_df
281
-
282
- # Function for continuous speech interaction
283
- def continuous_interaction():
284
- recognizer = sr.Recognizer()
285
- microphone = sr.Microphone()
286
 
287
- st.write("Listening for your query...")
288
-
289
- while True:
290
- with microphone as source:
291
- recognizer.adjust_for_ambient_noise(source)
292
- audio = recognizer.listen(source)
293
-
294
- try:
295
- query = recognizer.recognize_google(audio)
296
- st.write(f"Your query: {query}")
297
-
298
- if is_greeting(query):
299
- respond_to_greeting()
300
- else:
301
- answer = find_answer(query)
302
- sentiment, score, emoji = analyze_sentiment_with_emoji(query)
303
- st.write(f"Answer: {answer}")
304
- st.write(f"Sentiment: {sentiment} {emoji}")
305
- st.write(f"Sentiment Score: {score}")
306
-
307
- except sr.UnknownValueError:
308
- st.write("Sorry, I couldn't understand that.")
309
- except sr.RequestError:
310
- st.write("Sorry, there was an error with the speech recognition service.")
311
-
312
- # Main function to run the interface
313
- if __name__ == "__main__":
314
- st.sidebar.title("Product Query Interface")
315
- mode = st.sidebar.selectbox("Select Mode", ["Speech Recognition", "Dashboard"])
316
-
317
  if mode == "Speech Recognition":
318
- if st.button('Start Listening'):
319
- continuous_interaction() # Start the speech recognition when button is clicked
320
  elif mode == "Dashboard":
321
- display_dashboard()
 
1
  import os
2
  import pyaudio
3
  import pandas as pd
4
+ import numpy as np
5
+ import requests
6
  from sentence_transformers import SentenceTransformer
7
  from sklearn.metrics.pairwise import cosine_similarity
 
8
  import time
9
  import speech_recognition as sr
10
  from textblob import TextBlob
11
  import streamlit as st
12
  import seaborn as sns
13
  import plotly.express as px
 
14
  from datetime import datetime, timedelta
15
  import gspread
16
+ from google.oauth2.service_account import Credentials
17
+
18
+ # Groq API setup
19
+ GROQ_API_KEY = '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'
20
+ GROQ_API_URL ="https://api.groq.com/openai/v1/chat/completions"
21
 
 
22
 
23
  # Set up paths for CSV files and Google Sheets credentials
24
+ csv_file_path = r"E:\second\context.csv"
25
+ output_csv_path = r"E:\second\contents (2).csv"
26
 
27
+ # Google Sheets setup
28
  SCOPE = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
29
+ CREDS_PATH = r"C:\Users\Muthuraja\Downloads\modern-cycling-444916-g6-82c207d3eb47.json"
 
 
 
 
30
 
31
  # Initialize Google Sheets connection
32
  def initialize_google_sheets():
33
  credentials = Credentials.from_service_account_file(CREDS_PATH, scopes=SCOPE)
34
  try:
35
  client = gspread.authorize(credentials)
36
+ sheet = client.open("infosys").sheet1
37
  return sheet
38
  except gspread.exceptions.APIError as e:
39
  st.error(f"Google Sheets API error: {e}")
 
44
  # Function to safely load the CSV dataset
45
  def load_csv_safely(file_path):
46
  try:
47
+ df = pd.read_csv(file_path, encoding='latin1', on_bad_lines='skip')
48
+ required_columns = ['question', 'product', 'price', 'features', 'ratings', 'discount']
49
  for column in required_columns:
50
  if column not in df.columns:
51
+ raise Exception(f"CSV does not contain the required column: '{column}'")
52
 
53
  if 'Timestamp' not in df.columns:
54
+ df['Timestamp'] = pd.NaT
55
 
56
  return df
57
  except pd.errors.ParserError as e:
 
65
 
66
  embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
67
 
68
+ # Function to filter data by date
69
+ def filter_data_by_date(data, date_filter):
70
+ data['Timestamp'] = pd.to_datetime(data['Timestamp'], errors='coerce')
71
+ if date_filter == "Today":
72
+ start_date = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
73
+ data = data[data['Timestamp'] >= start_date]
74
+ elif date_filter == "One Week":
75
+ start_date = datetime.now() - timedelta(weeks=1)
76
+ data = data[data['Timestamp'] >= start_date]
77
+
78
+ return data
79
+
80
+ # Function to get a response from Groq API
81
+ def get_groq_response(query):
82
  headers = {
83
+ "Authorization": f"Bearer {GROQ_API_KEY}",
84
+ "Content-Type": "application/json"
85
  }
86
+
87
  payload = {
88
+ "model": "llama3-8b-8192", # Update to the correct model ID used by Groq
89
+ "messages": [{"role": "user", "content": query}]
90
+ }
91
+
92
+
93
  try:
94
  response = requests.post(GROQ_API_URL, headers=headers, json=payload)
95
+ response.raise_for_status()
96
+
97
+ data = response.json()
98
+ if 'choices' in data and len(data['choices']) > 0:
99
+ return data['choices'][0]['message']['content']
100
+ else:
101
+ return "No response from Groq API."
102
  except requests.exceptions.RequestException as e:
103
+ st.error(f"Error making request to Groq API: {e}")
104
+ return "Error in API request."
105
+
106
+ # Function for speech recognition
107
+ def listen_to_speech():
108
+ recognizer = sr.Recognizer()
109
+ with sr.Microphone() as source:
110
+ recognizer.adjust_for_ambient_noise(source)
111
+ st.write("Listening...")
112
+
113
+ try:
114
+ audio = recognizer.listen(source, timeout=5, phrase_time_limit=10)
115
+ st.write("Recognizing...")
116
+ text = recognizer.recognize_google(audio)
117
+ st.write(f"Recognized: {text}")
118
+ return text
119
+ except sr.UnknownValueError:
120
+ st.error("Sorry, I could not understand the audio.")
121
+ return None
122
+ except sr.RequestError:
123
+ st.error("Could not request results from Google Speech Recognition service.")
124
+ return None
125
+ except Exception as e:
126
+ st.error(f"An error occurred: {e}")
127
+ return None
128
 
129
  # Function to check if the text is a greeting
130
  def is_greeting(text):
 
142
  return product
143
  return None
144
 
145
+ # Function to search for relevant product details based on query
 
 
 
 
 
 
 
 
146
  def find_answer(query):
 
 
 
147
  if dataset is None:
148
  return "Dataset not loaded properly."
149
 
150
  query_embedding = embedding_model.encode([query])
151
+
152
+ combined_columns = dataset['question'].fillna('') + " " + dataset['product'].fillna('') + " " + dataset['features'].fillna('')
153
  combined_embeddings = embedding_model.encode(combined_columns.tolist())
154
 
155
  similarities = cosine_similarity(query_embedding, combined_embeddings)
156
 
157
+ similarity_threshold = 0.5
158
+ closest_idx = np.argmax(similarities)
159
+ highest_similarity = similarities[0][closest_idx]
160
 
161
  if highest_similarity < similarity_threshold:
162
  return "Sorry, no product found for your query."
 
167
  features = closest_question['features']
168
  ratings = closest_question['ratings']
169
  discount = closest_question['discount']
170
+
171
  if 'Timestamp' not in closest_question.index:
172
  closest_question['Timestamp'] = datetime.now()
173
 
 
191
  'features': features,
192
  'ratings': ratings,
193
  'discount': discount,
194
+ 'Timestamp': datetime.now()
 
195
  }
196
  new_entry_df = pd.DataFrame([new_entry])
197
  new_entry_df.to_csv(output_csv_path, mode='a', header=not os.path.exists(output_csv_path), index=False)
198
 
199
+ # Function for sentiment analysis with emojis
200
  def analyze_sentiment_with_emoji(text):
201
  blob = TextBlob(text)
202
  sentiment_score = blob.sentiment.polarity
 
211
  emoji = "😐"
212
  return sentiment, sentiment_score, emoji
213
 
214
+ # Function to provide product recommendations based on the query
215
+ def recommend_products(query):
216
+ if dataset is None:
217
+ return "Dataset not loaded properly."
218
+
219
+ dataset['product'] = dataset['product'].fillna('Unknown').astype(str)
220
+ query_embedding = embedding_model.encode([query])
221
+ dataset_embeddings = embedding_model.encode(dataset['product'].tolist())
222
+ similarities = cosine_similarity(query_embedding, dataset_embeddings)
223
+ top_indices = np.argsort(similarities[0])[-3:][::-1]
224
+
225
+ recommendations = []
226
+ for idx in top_indices:
227
+ product = dataset.iloc[idx]
228
+ recommendations.append({
229
+ 'product': product['product'],
230
+ 'price': product['price'],
231
+ 'features': product['features'],
232
+ 'ratings': product['ratings'],
233
+ 'discount': product['discount']
234
+ })
235
 
236
+ while len(recommendations) < 3:
237
+ recommendations.append({
238
+ 'product': 'No recommendation available',
239
+ 'price': 'N/A',
240
+ 'features': 'N/A',
241
+ 'ratings': 'N/A',
242
+ 'discount': 'N/A'
243
+ })
244
 
245
+ return recommendations
246
+
247
+ # Function to handle continuous interaction loop
248
+ def continuous_interaction():
249
+ st.title("Speech Recognition with Product Queries")
250
+ if st.button("Start Speech Recognition"):
251
+ while True:
252
+ user_input = listen_to_speech()
253
+ if user_input:
254
+ if is_greeting(user_input):
255
+ respond_to_greeting()
256
+ continue
257
+
258
+ # Use Groq API for a response to the query
259
+ groq_response = get_groq_response(user_input)
260
+ st.write(f"Groq Response: {groq_response}")
261
+
262
+ # Process product name and provide details
263
+ product_name = extract_product_name(user_input)
264
+ if product_name:
265
+ st.write(f"Let me check the details for {product_name}:")
266
+ product_details = dataset[dataset['product'].str.lower() == product_name.lower()]
267
+ if not product_details.empty:
268
+ product_info = product_details.iloc[0]
269
+ st.write(f"Product: {product_info['product']}")
270
+ st.write(f"Price: {product_info['price']}")
271
+ st.write(f"Features: {product_info['features']}")
272
+ st.write(f"Ratings: {product_info['ratings']}")
273
+ st.write(f"Discount: {product_info['discount']}%")
274
+ else:
275
+ st.write("Sorry, I couldn't find the product you're asking for.")
276
+ else:
277
+ answer = find_answer(user_input)
278
+ st.write(f"Answer: {answer}")
279
+
280
+ sentiment, sentiment_score, emoji = analyze_sentiment_with_emoji(user_input)
281
+ st.write(f"Sentiment: {sentiment} (Score: {sentiment_score}) {emoji}")
282
+
283
+ st.write("Here are some product recommendations based on your query: ")
284
+ recommendations = recommend_products(user_input)
285
+ for idx, rec in enumerate(recommendations, 1):
286
+ st.write(f"Recommendation {idx}:")
287
+ st.write(f"Product: {rec['product']}")
288
+ st.write(f"Price: {rec['price']}")
289
+ st.write(f"Features: {rec['features']}")
290
+ st.write(f"Ratings: {rec['ratings']}")
291
+ st.write(f"Discount: {rec['discount']}%")
292
+ st.write("---")
293
 
294
  # Dashboard for visualizations
295
  def display_dashboard():
296
  st.title("Product Dashboard")
297
  st.write("Welcome to the product query dashboard!")
298
+
299
+ time_filter = st.sidebar.selectbox("Select time period", ["All Time", "Today", "One Week"])
300
+
 
 
 
 
 
 
 
 
 
301
  query_results_df = pd.read_csv(output_csv_path, on_bad_lines='skip')
302
+
303
  if 'Timestamp' not in query_results_df.columns:
304
  query_results_df['Timestamp'] = pd.to_datetime('now')
305
+
 
 
 
306
  query_results_df = filter_data_by_date(query_results_df, time_filter)
307
+
308
  st.subheader(f"Recent Queries Summary ({time_filter})")
309
  st.write(query_results_df.tail(10))
310
+
311
  sentiment_counts = query_results_df['question'].apply(lambda x: analyze_sentiment_with_emoji(x)[0]).value_counts()
312
  st.subheader(f"Sentiment Analysis Distribution ({time_filter})")
313
  st.write(sentiment_counts)
314
+
315
+ sentiment_fig = px.pie(
316
+ sentiment_counts,
317
+ names=sentiment_counts.index,
318
+ values=sentiment_counts.values,
319
+ title=f"Sentiment Distribution of Queries ({time_filter})"
320
+ )
321
  st.plotly_chart(sentiment_fig)
322
 
323
  query_results_df['sentiment_score'] = query_results_df['question'].apply(lambda x: analyze_sentiment_with_emoji(x)[1])
324
+
325
  sentiment_time_fig = px.line(
326
  query_results_df,
327
  x='Timestamp',
 
329
  title=f"Sentiment Score Over Time ({time_filter})"
330
  )
331
  st.plotly_chart(sentiment_time_fig)
332
+
333
  product_counts = query_results_df['product'].value_counts()
334
  st.subheader(f"Product Popularity ({time_filter})")
335
  st.write(product_counts)
 
354
  )
355
  st.plotly_chart(recommended_products_fig)
356
 
357
+ # Main code to run the app
358
+ if __name__ == '__main__':
359
+ mode = st.sidebar.radio("Select Mode", ("Speech Recognition", "Dashboard"))
 
 
 
 
 
 
 
 
 
 
 
 
 
360
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
361
  if mode == "Speech Recognition":
362
+ continuous_interaction()
 
363
  elif mode == "Dashboard":
364
+ display_dashboard()