Spaces:

Muthuraja18
/

query_analysis

Sleeping

App Files Files Community

Update app.py

#31

by Muthuraja18 - opened May 16, 2025

base: refs/heads/main

←

from: refs/pr/31

Discussion Files changed

+177

-134

Files changed (1) hide show

app.py +177 -134

app.py CHANGED Viewed

@@ -1,41 +1,39 @@
 import os
 import pyaudio
 import pandas as pd
 from sentence_transformers import SentenceTransformer
 from sklearn.metrics.pairwise import cosine_similarity
-import numpy as np
 import time
 import speech_recognition as sr
 from textblob import TextBlob
 import streamlit as st
 import seaborn as sns
 import plotly.express as px
-import requests
 from datetime import datetime, timedelta
 import gspread
-from google.oauth2.service_account import Credentials # For loading environment variables
-import random  # For generating random customer IDs
-# Load environment variables from a .en
 # Set up paths for CSV files and Google Sheets credentials
-csv_file_path = "database1.csv"
-output_csv_path = "Book4.csv"
-# Load Google Sheets credentials from environment variable
 SCOPE = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
-CREDS_PATH = "modern-cycling-444916-g6-82c207d3eb47.json"  # Path to your Google credentials JSON file
-# Use the provided Groq API key (you can also store this in .env)
-GROQ_API_KEY = "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"
-GROQ_API_URL = 'https://api.groq.com/openai/v1/chat/completions'
 # Initialize Google Sheets connection
 def initialize_google_sheets():
     credentials = Credentials.from_service_account_file(CREDS_PATH, scopes=SCOPE)
     try:
         client = gspread.authorize(credentials)
-        sheet = client.open("CRM_Interactions").sheet1  # Using CRM_Interactions as the sheet name
         return sheet
     except gspread.exceptions.APIError as e:
         st.error(f"Google Sheets API error: {e}")
@@ -46,14 +44,14 @@ sheet = initialize_google_sheets()
 # Function to safely load the CSV dataset
 def load_csv_safely(file_path):
     try:
-        df = pd.read_csv(file_path, on_bad_lines='skip')
-        required_columns = ['question', 'product', 'price', 'features', 'ratings', 'discount', 'customer_id']
         for column in required_columns:
             if column not in df.columns:
-                raise Exception(f"CSV does not contain the required column: '{column}'. Please check your CSV.")
         if 'Timestamp' not in df.columns:
-            df['Timestamp'] = pd.NaT  # Initialize Timestamp column if it doesn't exist
         return df
     except pd.errors.ParserError as e:
@@ -67,24 +65,66 @@ dataset = load_csv_safely(csv_file_path)
 embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
-# Function to send a request to the Groq API
-def send_groq_request(query):
     headers = {
-        'Authorization': f'Bearer {GROQ_API_KEY}',
-        'Content-Type': 'application/json'
     }
     payload = {
-        'query': query
-    }
     try:
         response = requests.post(GROQ_API_URL, headers=headers, json=payload)
-        response.raise_for_status()  # Will raise an HTTPError for bad responses (4xx or 5xx)
-        return response.json()  # Return the response in JSON format
     except requests.exceptions.RequestException as e:
-        st.error(f"Error communicating with Groq API: {e}")
-        return None
 # Function to check if the text is a greeting
 def is_greeting(text):
@@ -102,31 +142,21 @@ def extract_product_name(query):
             return product
     return None
-# Function to handle "more products" requests
-def handle_more_products_request(query):
-    if "more products" in query.lower():
-        # Select more products from the dataset. You can add filtering logic here.
-        more_products = dataset[['product', 'price', 'features', 'ratings', 'discount']].head(5)
-        return f"Here are some more products you might like:\n{more_products}"
-    return None
-# Function to find the best answer to a query
 def find_answer(query):
-    if "more products" in query.lower():
-        return handle_more_products_request(query)
     if dataset is None:
         return "Dataset not loaded properly."
     query_embedding = embedding_model.encode([query])
-    combined_columns = dataset['question'].fillna('') + " " + dataset['product'].fillna('') + " " + dataset['features'].fillna('')
     combined_embeddings = embedding_model.encode(combined_columns.tolist())
     similarities = cosine_similarity(query_embedding, combined_embeddings)
-    similarity_threshold = 0.5
-    closest_idx = np.argmax(similarities)
-    highest_similarity = similarities[0][closest_idx]
     if highest_similarity < similarity_threshold:
         return "Sorry, no product found for your query."
@@ -137,7 +167,7 @@ def find_answer(query):
     features = closest_question['features']
     ratings = closest_question['ratings']
     discount = closest_question['discount']
     if 'Timestamp' not in closest_question.index:
         closest_question['Timestamp'] = datetime.now()
@@ -161,13 +191,12 @@ def save_query_to_csv(query, product_name, price, features, ratings, discount):
         'features': features,
         'ratings': ratings,
         'discount': discount,
-        'Timestamp': datetime.now(),
-        'customer_id': random.randint(1000, 9999)  # Generate a random customer ID between 1000 and 9999
     }
     new_entry_df = pd.DataFrame([new_entry])
     new_entry_df.to_csv(output_csv_path, mode='a', header=not os.path.exists(output_csv_path), index=False)
-# Function to perform sentiment analysis with TextBlob
 def analyze_sentiment_with_emoji(text):
     blob = TextBlob(text)
     sentiment_score = blob.sentiment.polarity
@@ -182,59 +211,117 @@ def analyze_sentiment_with_emoji(text):
         emoji = "😐"
     return sentiment, sentiment_score, emoji
-# Updated pie chart function with percentages
-def display_sentiment_pie_chart(sentiment_counts):
-    sentiment_fig = px.pie(
-        sentiment_counts,
-        names=sentiment_counts.index,
-        values=sentiment_counts.values,
-        title="Sentiment Distribution",
-        hole=0.3  # For a donut chart (optional)
-    )
-    # Add percentage labels inside the slices
-    sentiment_fig.update_traces(textinfo='percent+label', pull=[0.1, 0.1, 0.1])
-    return sentiment_fig
 # Dashboard for visualizations
 def display_dashboard():
     st.title("Product Dashboard")
     st.write("Welcome to the product query dashboard!")
-    customer_ids = dataset['customer_id'].unique()
-    selected_customer_id = st.sidebar.selectbox(
-        "Select Customer ID",
-        ["All Customers"] + customer_ids.tolist()
-    )
-    time_filter = st.sidebar.selectbox(
-        "Select time period",
-        ["All Time", "Today", "One Week"]
-    )
     query_results_df = pd.read_csv(output_csv_path, on_bad_lines='skip')
     if 'Timestamp' not in query_results_df.columns:
         query_results_df['Timestamp'] = pd.to_datetime('now')
-    if selected_customer_id != "All Customers":
-        query_results_df = query_results_df[query_results_df['customer_id'] == selected_customer_id]
     query_results_df = filter_data_by_date(query_results_df, time_filter)
     st.subheader(f"Recent Queries Summary ({time_filter})")
     st.write(query_results_df.tail(10))
     sentiment_counts = query_results_df['question'].apply(lambda x: analyze_sentiment_with_emoji(x)[0]).value_counts()
     st.subheader(f"Sentiment Analysis Distribution ({time_filter})")
     st.write(sentiment_counts)
-    sentiment_fig = display_sentiment_pie_chart(sentiment_counts)
     st.plotly_chart(sentiment_fig)
     query_results_df['sentiment_score'] = query_results_df['question'].apply(lambda x: analyze_sentiment_with_emoji(x)[1])
     sentiment_time_fig = px.line(
         query_results_df,
         x='Timestamp',
@@ -242,7 +329,7 @@ def display_dashboard():
         title=f"Sentiment Score Over Time ({time_filter})"
     )
     st.plotly_chart(sentiment_time_fig)
     product_counts = query_results_df['product'].value_counts()
     st.subheader(f"Product Popularity ({time_filter})")
     st.write(product_counts)
@@ -267,55 +354,11 @@ def display_dashboard():
     )
     st.plotly_chart(recommended_products_fig)
-# Function to filter data by date
-def filter_data_by_date(query_results_df, time_filter):
-    if time_filter == "Today":
-        today = datetime.now().date()
-        query_results_df['Timestamp'] = pd.to_datetime(query_results_df['Timestamp']).dt.date
-        query_results_df = query_results_df[query_results_df['Timestamp'] == today]
-    elif time_filter == "One Week":
-        one_week_ago = datetime.now() - timedelta(weeks=1)
-        query_results_df['Timestamp'] = pd.to_datetime(query_results_df['Timestamp'])
-        query_results_df = query_results_df[query_results_df['Timestamp'] > one_week_ago]
-    return query_results_df
-# Function for continuous speech interaction
-def continuous_interaction():
-    recognizer = sr.Recognizer()
-    microphone = sr.Microphone()
-    st.write("Listening for your query...")
-    while True:
-        with microphone as source:
-            recognizer.adjust_for_ambient_noise(source)
-            audio = recognizer.listen(source)
-        try:
-            query = recognizer.recognize_google(audio)
-            st.write(f"Your query: {query}")
-            if is_greeting(query):
-                respond_to_greeting()
-            else:
-                answer = find_answer(query)
-                sentiment, score, emoji = analyze_sentiment_with_emoji(query)
-                st.write(f"Answer: {answer}")
-                st.write(f"Sentiment: {sentiment} {emoji}")
-                st.write(f"Sentiment Score: {score}")
-        except sr.UnknownValueError:
-            st.write("Sorry, I couldn't understand that.")
-        except sr.RequestError:
-            st.write("Sorry, there was an error with the speech recognition service.")
-# Main function to run the interface
-if __name__ == "__main__":
-    st.sidebar.title("Product Query Interface")
-    mode = st.sidebar.selectbox("Select Mode", ["Speech Recognition", "Dashboard"])
     if mode == "Speech Recognition":
-        if st.button('Start Listening'):
-            continuous_interaction()  # Start the speech recognition when button is clicked
     elif mode == "Dashboard":
-        display_dashboard()

 import os
 import pyaudio
 import pandas as pd
+import numpy as np
+import requests
 from sentence_transformers import SentenceTransformer
 from sklearn.metrics.pairwise import cosine_similarity
 import time
 import speech_recognition as sr
 from textblob import TextBlob
 import streamlit as st
 import seaborn as sns
 import plotly.express as px
 from datetime import datetime, timedelta
 import gspread
+from google.oauth2.service_account import Credentials
+# Groq API setup
+GROQ_API_KEY = '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'
+GROQ_API_URL ="https://api.groq.com/openai/v1/chat/completions"
 # Set up paths for CSV files and Google Sheets credentials
+csv_file_path = r"E:\second\context.csv"
+output_csv_path = r"E:\second\contents (2).csv"
+# Google Sheets setup
 SCOPE = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
+CREDS_PATH = r"C:\Users\Muthuraja\Downloads\modern-cycling-444916-g6-82c207d3eb47.json"
 # Initialize Google Sheets connection
 def initialize_google_sheets():
     credentials = Credentials.from_service_account_file(CREDS_PATH, scopes=SCOPE)
     try:
         client = gspread.authorize(credentials)
+        sheet = client.open("infosys").sheet1
         return sheet
     except gspread.exceptions.APIError as e:
         st.error(f"Google Sheets API error: {e}")
 # Function to safely load the CSV dataset
 def load_csv_safely(file_path):
     try:
+        df = pd.read_csv(file_path, encoding='latin1', on_bad_lines='skip')
+        required_columns = ['question', 'product', 'price', 'features', 'ratings', 'discount']
         for column in required_columns:
             if column not in df.columns:
+                raise Exception(f"CSV does not contain the required column: '{column}'")
         if 'Timestamp' not in df.columns:
+            df['Timestamp'] = pd.NaT
         return df
     except pd.errors.ParserError as e:
 embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
+# Function to filter data by date
+def filter_data_by_date(data, date_filter):
+    data['Timestamp'] = pd.to_datetime(data['Timestamp'], errors='coerce')
+    if date_filter == "Today":
+        start_date = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
+        data = data[data['Timestamp'] >= start_date]
+    elif date_filter == "One Week":
+        start_date = datetime.now() - timedelta(weeks=1)
+        data = data[data['Timestamp'] >= start_date]
+    return data
+# Function to get a response from Groq API
+def get_groq_response(query):
     headers = {
+        "Authorization": f"Bearer {GROQ_API_KEY}",
+        "Content-Type": "application/json"
     }
     payload = {
+    "model": "llama3-8b-8192",  # Update to the correct model ID used by Groq
+    "messages": [{"role": "user", "content": query}]
+}
     try:
         response = requests.post(GROQ_API_URL, headers=headers, json=payload)
+        response.raise_for_status()
+        data = response.json()
+        if 'choices' in data and len(data['choices']) > 0:
+            return data['choices'][0]['message']['content']
+        else:
+            return "No response from Groq API."
     except requests.exceptions.RequestException as e:
+        st.error(f"Error making request to Groq API: {e}")
+        return "Error in API request."
+# Function for speech recognition
+def listen_to_speech():
+    recognizer = sr.Recognizer()
+    with sr.Microphone() as source:
+        recognizer.adjust_for_ambient_noise(source)
+        st.write("Listening...")
+        try:
+            audio = recognizer.listen(source, timeout=5, phrase_time_limit=10)
+            st.write("Recognizing...")
+            text = recognizer.recognize_google(audio)
+            st.write(f"Recognized: {text}")
+            return text
+        except sr.UnknownValueError:
+            st.error("Sorry, I could not understand the audio.")
+            return None
+        except sr.RequestError:
+            st.error("Could not request results from Google Speech Recognition service.")
+            return None
+        except Exception as e:
+            st.error(f"An error occurred: {e}")
+            return None
 # Function to check if the text is a greeting
 def is_greeting(text):
             return product
     return None
+# Function to search for relevant product details based on query
 def find_answer(query):
     if dataset is None:
         return "Dataset not loaded properly."
     query_embedding = embedding_model.encode([query])
+    combined_columns = dataset['question'].fillna('') + " " + dataset['product'].fillna('') + " " + dataset['features'].fillna('')
     combined_embeddings = embedding_model.encode(combined_columns.tolist())
     similarities = cosine_similarity(query_embedding, combined_embeddings)
+    similarity_threshold = 0.5
+    closest_idx = np.argmax(similarities)
+    highest_similarity = similarities[0][closest_idx]
     if highest_similarity < similarity_threshold:
         return "Sorry, no product found for your query."
     features = closest_question['features']
     ratings = closest_question['ratings']
     discount = closest_question['discount']
     if 'Timestamp' not in closest_question.index:
         closest_question['Timestamp'] = datetime.now()
         'features': features,
         'ratings': ratings,
         'discount': discount,
+        'Timestamp': datetime.now()
     }
     new_entry_df = pd.DataFrame([new_entry])
     new_entry_df.to_csv(output_csv_path, mode='a', header=not os.path.exists(output_csv_path), index=False)
+# Function for sentiment analysis with emojis
 def analyze_sentiment_with_emoji(text):
     blob = TextBlob(text)
     sentiment_score = blob.sentiment.polarity
         emoji = "😐"
     return sentiment, sentiment_score, emoji
+# Function to provide product recommendations based on the query
+def recommend_products(query):
+    if dataset is None:
+        return "Dataset not loaded properly."
+    dataset['product'] = dataset['product'].fillna('Unknown').astype(str)
+    query_embedding = embedding_model.encode([query])
+    dataset_embeddings = embedding_model.encode(dataset['product'].tolist())
+    similarities = cosine_similarity(query_embedding, dataset_embeddings)
+    top_indices = np.argsort(similarities[0])[-3:][::-1]
+    recommendations = []
+    for idx in top_indices:
+        product = dataset.iloc[idx]
+        recommendations.append({
+            'product': product['product'],
+            'price': product['price'],
+            'features': product['features'],
+            'ratings': product['ratings'],
+            'discount': product['discount']
+        })
+    while len(recommendations) < 3:
+        recommendations.append({
+            'product': 'No recommendation available',
+            'price': 'N/A',
+            'features': 'N/A',
+            'ratings': 'N/A',
+            'discount': 'N/A'
+        })
+    return recommendations
+# Function to handle continuous interaction loop
+def continuous_interaction():
+    st.title("Speech Recognition with Product Queries")
+    if st.button("Start Speech Recognition"):
+        while True:
+            user_input = listen_to_speech()
+            if user_input:
+                if is_greeting(user_input):
+                    respond_to_greeting()
+                    continue
+                # Use Groq API for a response to the query
+                groq_response = get_groq_response(user_input)
+                st.write(f"Groq Response: {groq_response}")
+                # Process product name and provide details
+                product_name = extract_product_name(user_input)
+                if product_name:
+                    st.write(f"Let me check the details for {product_name}:")
+                    product_details = dataset[dataset['product'].str.lower() == product_name.lower()]
+                    if not product_details.empty:
+                        product_info = product_details.iloc[0]
+                        st.write(f"Product: {product_info['product']}")
+                        st.write(f"Price: {product_info['price']}")
+                        st.write(f"Features: {product_info['features']}")
+                        st.write(f"Ratings: {product_info['ratings']}")
+                        st.write(f"Discount: {product_info['discount']}%")
+                    else:
+                        st.write("Sorry, I couldn't find the product you're asking for.")
+                else:
+                    answer = find_answer(user_input)
+                    st.write(f"Answer: {answer}")
+                sentiment, sentiment_score, emoji = analyze_sentiment_with_emoji(user_input)
+                st.write(f"Sentiment: {sentiment} (Score: {sentiment_score}) {emoji}")
+                st.write("Here are some product recommendations based on your query: ")
+                recommendations = recommend_products(user_input)
+                for idx, rec in enumerate(recommendations, 1):
+                    st.write(f"Recommendation {idx}:")
+                    st.write(f"Product: {rec['product']}")
+                    st.write(f"Price: {rec['price']}")
+                    st.write(f"Features: {rec['features']}")
+                    st.write(f"Ratings: {rec['ratings']}")
+                    st.write(f"Discount: {rec['discount']}%")
+                    st.write("---")
 # Dashboard for visualizations
 def display_dashboard():
     st.title("Product Dashboard")
     st.write("Welcome to the product query dashboard!")
+    time_filter = st.sidebar.selectbox("Select time period", ["All Time", "Today", "One Week"])
     query_results_df = pd.read_csv(output_csv_path, on_bad_lines='skip')
     if 'Timestamp' not in query_results_df.columns:
         query_results_df['Timestamp'] = pd.to_datetime('now')
     query_results_df = filter_data_by_date(query_results_df, time_filter)
     st.subheader(f"Recent Queries Summary ({time_filter})")
     st.write(query_results_df.tail(10))
     sentiment_counts = query_results_df['question'].apply(lambda x: analyze_sentiment_with_emoji(x)[0]).value_counts()
     st.subheader(f"Sentiment Analysis Distribution ({time_filter})")
     st.write(sentiment_counts)
+    sentiment_fig = px.pie(
+        sentiment_counts,
+        names=sentiment_counts.index,
+        values=sentiment_counts.values,
+        title=f"Sentiment Distribution of Queries ({time_filter})"
+    )
     st.plotly_chart(sentiment_fig)
     query_results_df['sentiment_score'] = query_results_df['question'].apply(lambda x: analyze_sentiment_with_emoji(x)[1])
     sentiment_time_fig = px.line(
         query_results_df,
         x='Timestamp',
         title=f"Sentiment Score Over Time ({time_filter})"
     )
     st.plotly_chart(sentiment_time_fig)
     product_counts = query_results_df['product'].value_counts()
     st.subheader(f"Product Popularity ({time_filter})")
     st.write(product_counts)
     )
     st.plotly_chart(recommended_products_fig)
+# Main code to run the app
+if __name__ == '__main__':
+    mode = st.sidebar.radio("Select Mode", ("Speech Recognition", "Dashboard"))
     if mode == "Speech Recognition":
+        continuous_interaction()
     elif mode == "Dashboard":
+        display_dashboard()