Spaces:

Muthuraja18
/

query_analysis

Sleeping

App Files Files Community

Update app.py

#26

by Muthuraja18 - opened Feb 6, 2025

base: refs/heads/main

←

from: refs/pr/26

Discussion Files changed

+137

-150

Files changed (1) hide show

app.py +137 -150

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import os
 import pyaudio
 import pandas as pd
 from sentence_transformers import SentenceTransformer
@@ -10,36 +10,46 @@ from textblob import TextBlob
 import streamlit as st
 import seaborn as sns
 import plotly.express as px
 from datetime import datetime, timedelta
 import gspread
 from google.oauth2.service_account import Credentials
 # Set up paths for CSV files and Google Sheets credentials
-csv_file_path = "context.csv"  # Path to CSV file with product info
-output_csv_path = "contents.csv"  # Path to save query results
-# Google Sheets setup
 SCOPE = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
-CREDS_PATH = "modern-cycling-444916-g6-82c207d3eb47.json"  # Google credentials path
 # Initialize Google Sheets connection
 def initialize_google_sheets():
     credentials = Credentials.from_service_account_file(CREDS_PATH, scopes=SCOPE)
     try:
         client = gspread.authorize(credentials)
-        sheet = client.open("infosys").sheet1  # Accessing the Google Sheet "SalesStores"
         return sheet
     except gspread.exceptions.APIError as e:
         st.error(f"Google Sheets API error: {e}")
         return None
-sheet = initialize_google_sheets()  # Initialize Google Sheets connection
 # Function to safely load the CSV dataset
 def load_csv_safely(file_path):
     try:
-        df = pd.read_csv(file_path, on_bad_lines='skip')  # Handles malformed lines in CSV
-        required_columns = ['question', 'product', 'price', 'features', 'ratings', 'discount']
         for column in required_columns:
             if column not in df.columns:
                 raise Exception(f"CSV does not contain the required column: '{column}'. Please check your CSV.")
@@ -55,44 +65,28 @@ def load_csv_safely(file_path):
         st.error(f"An error occurred: {e}")
         return None
-dataset = load_csv_safely(csv_file_path)  # Load dataset safely
-embedding_model = SentenceTransformer('all-MiniLM-L6-v2')  # Pre-trained sentence transformer model for embeddings
-# Function to filter data by date
-def filter_data_by_date(data, date_filter):
-    data['Timestamp'] = pd.to_datetime(data['Timestamp'], errors='coerce')
-    if date_filter == "Today":
-        start_date = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
-        data = data[data['Timestamp'] >= start_date]
-    elif date_filter == "One Week":
-        start_date = datetime.now() - timedelta(weeks=1)
-        data = data[data['Timestamp'] >= start_date]
-    return data
-# Function for speech recognition
-def listen_to_speech():
-    recognizer = sr.Recognizer()
-    with sr.Microphone() as source:
-        recognizer.adjust_for_ambient_noise(source)
-        st.write("Listening...")
-        try:
-            audio = recognizer.listen(source, timeout=5, phrase_time_limit=10)
-            st.write("Recognizing...")
-            text = recognizer.recognize_google(audio)
-            st.write(f"Recognized: {text}")
-            return text
-        except sr.UnknownValueError:
-            st.error("Sorry, I could not understand the audio.")
-            return None
-        except sr.RequestError:
-            st.error("Could not request results from Google Speech Recognition service.")
-            return None
-        except Exception as e:
-            st.error(f"An error occurred: {e}")
-            return None
 # Function to check if the text is a greeting
 def is_greeting(text):
@@ -110,39 +104,42 @@ def extract_product_name(query):
             return product
     return None
-# Function to search for relevant product details based on query (not relying on product name explicitly)
 def find_answer(query):
     if dataset is None:
         return "Dataset not loaded properly."
-    # Create embeddings for the query and all possible columns (product, features, question)
     query_embedding = embedding_model.encode([query])
-    # Generate embeddings for all questions, products, and features to find relevance
-    combined_columns = dataset['question'].fillna('') + " " + dataset['product'].fillna('') + " " + dataset['features'].fillna('')
     combined_embeddings = embedding_model.encode(combined_columns.tolist())
-    # Calculate cosine similarity between the query embedding and each product's combined embeddings
     similarities = cosine_similarity(query_embedding, combined_embeddings)
-    # Set a threshold for similarity to determine if the query matches any product
-    similarity_threshold = 0.5  # You can adjust this threshold based on how strict you want the match
-    closest_idx = np.argmax(similarities)  # Index of the closest match
-    highest_similarity = similarities[0][closest_idx]  # Highest similarity score
-    # If no match is found above the threshold, return "No matching product found"
     if highest_similarity < similarity_threshold:
         return "Sorry, no product found for your query."
-    # Get the details for the closest match
     closest_question = dataset.iloc[closest_idx]
     product_name = closest_question['product']
     price = closest_question['price']
     features = closest_question['features']
     ratings = closest_question['ratings']
     discount = closest_question['discount']
     if 'Timestamp' not in closest_question.index:
         closest_question['Timestamp'] = datetime.now()
@@ -166,12 +163,13 @@ def save_query_to_csv(query, product_name, price, features, ratings, discount):
         'features': features,
         'ratings': ratings,
         'discount': discount,
-        'Timestamp': datetime.now()
     }
     new_entry_df = pd.DataFrame([new_entry])
     new_entry_df.to_csv(output_csv_path, mode='a', header=not os.path.exists(output_csv_path), index=False)
-# Function for sentiment analysis with emojis
 def analyze_sentiment_with_emoji(text):
     blob = TextBlob(text)
     sentiment_score = blob.sentiment.polarity
@@ -186,114 +184,59 @@ def analyze_sentiment_with_emoji(text):
         emoji = "😐"
     return sentiment, sentiment_score, emoji
-# Function to provide product recommendations based on the query
-def recommend_products(query):
-    if dataset is None:
-        return "Dataset not loaded properly."
-    dataset['product'] = dataset['product'].fillna('Unknown').astype(str)
-    query_embedding = embedding_model.encode([query])
-    dataset_embeddings = embedding_model.encode(dataset['product'].tolist())
-    similarities = cosine_similarity(query_embedding, dataset_embeddings)
-    top_indices = np.argsort(similarities[0])[-3:][::-1]
-    recommendations = []
-    for idx in top_indices:
-        product = dataset.iloc[idx]
-        recommendations.append({
-            'product': product['product'],
-            'price': product['price'],
-            'features': product['features'],
-            'ratings': product['ratings'],
-            'discount': product['discount']
-        })
-    while len(recommendations) < 3:
-        recommendations.append({
-            'product': 'No recommendation available',
-            'price': 'N/A',
-            'features': 'N/A',
-            'ratings': 'N/A',
-            'discount': 'N/A'
-        })
-    return recommendations
-# Function to handle continuous interaction loop
-def continuous_interaction():
-    st.title("Speech Recognition with Product Queries")
-    if st.button("Start Speech Recognition"):
-        while True:
-            user_input = listen_to_speech()
-            if user_input:
-                if is_greeting(user_input):
-                    respond_to_greeting()
-                    continue
-                product_name = extract_product_name(user_input)
-                if product_name:
-                    st.write(f"Let me check the details for {product_name}:")
-                    product_details = dataset[dataset['product'].str.lower() == product_name.lower()]
-                    if not product_details.empty:
-                        product_info = product_details.iloc[0]
-                        st.write(f"Product: {product_info['product']}")
-                        st.write(f"Price: {product_info['price']}")
-                        st.write(f"Features: {product_info['features']}")
-                        st.write(f"Ratings: {product_info['ratings']}")
-                        st.write(f"Discount: {product_info['discount']}%")
-                    else:
-                        st.write("Sorry, I couldn't find the product you're asking for.")
-                else:
-                    answer = find_answer(user_input)
-                    st.write(f"Answer: {answer}")
-                sentiment, sentiment_score, emoji = analyze_sentiment_with_emoji(user_input)
-                st.write(f"Sentiment: {sentiment} (Score: {sentiment_score}) {emoji}")
-                st.write("Here are some product recommendations based on your query: ")
-                recommendations = recommend_products(user_input)
-                for idx, rec in enumerate(recommendations, 1):
-                    st.write(f"Recommendation {idx}:")
-                    st.write(f"Product: {rec['product']}")
-                    st.write(f"Price: {rec['price']}")
-                    st.write(f"Features: {rec['features']}")
-                    st.write(f"Ratings: {rec['ratings']}")
-                    st.write(f"Discount: {rec['discount']}%")
-                    st.write("---")
 # Dashboard for visualizations
 def display_dashboard():
     st.title("Product Dashboard")
     st.write("Welcome to the product query dashboard!")
     time_filter = st.sidebar.selectbox(
         "Select time period",
         ["All Time", "Today", "One Week"]
     )
     query_results_df = pd.read_csv(output_csv_path, on_bad_lines='skip')
     if 'Timestamp' not in query_results_df.columns:
         query_results_df['Timestamp'] = pd.to_datetime('now')
     query_results_df = filter_data_by_date(query_results_df, time_filter)
     st.subheader(f"Recent Queries Summary ({time_filter})")
     st.write(query_results_df.tail(10))
     sentiment_counts = query_results_df['question'].apply(lambda x: analyze_sentiment_with_emoji(x)[0]).value_counts()
     st.subheader(f"Sentiment Analysis Distribution ({time_filter})")
     st.write(sentiment_counts)
-    sentiment_fig = px.pie(
-        sentiment_counts,
-        names=sentiment_counts.index,
-        values=sentiment_counts.values,
-        title=f"Sentiment Distribution of Queries ({time_filter})"
-    )
     st.plotly_chart(sentiment_fig)
     query_results_df['sentiment_score'] = query_results_df['question'].apply(lambda x: analyze_sentiment_with_emoji(x)[1])
     sentiment_time_fig = px.line(
         query_results_df,
         x='Timestamp',
@@ -301,7 +244,7 @@ def display_dashboard():
         title=f"Sentiment Score Over Time ({time_filter})"
     )
     st.plotly_chart(sentiment_time_fig)
     product_counts = query_results_df['product'].value_counts()
     st.subheader(f"Product Popularity ({time_filter})")
     st.write(product_counts)
@@ -326,11 +269,55 @@ def display_dashboard():
     )
     st.plotly_chart(recommended_products_fig)
-# Main code to run the app
-if __name__ == '__main__':
-    mode = st.sidebar.radio("Select Mode", ("Speech Recognition", "Dashboard"))
     if mode == "Speech Recognition":
-        continuous_interaction()
     elif mode == "Dashboard":
         display_dashboard()

+import os
 import pyaudio
 import pandas as pd
 from sentence_transformers import SentenceTransformer
 import streamlit as st
 import seaborn as sns
 import plotly.express as px
+import requests
 from datetime import datetime, timedelta
 import gspread
 from google.oauth2.service_account import Credentials
+from dotenv import load_dotenv  # For loading environment variables
+import random  # For generating random customer IDs
+# Load environment variables from a .env file
+load_dotenv()
 # Set up paths for CSV files and Google Sheets credentials
+csv_file_path = r"C:\Users\Muthuraja\OneDrive\Attachments\Desktop\second\database1.csv"
+output_csv_path = r"C:\Users\Muthuraja\OneDrive\Attachments\Desktop\second\Book4.csv"
+# Load Google Sheets credentials from environment variable
 SCOPE = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
+CREDS_PATH = r"C:\Users\Muthuraja\Downloads\modern-cycling-444916-g6-82c207d3eb47.json"  # Path to your Google credentials JSON file
+# Use the provided Groq API key (you can also store this in .env)
+GROQ_API_KEY = "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"
+GROQ_API_URL = 'https://api.groq.com/openai/v1/chat/completions'
 # Initialize Google Sheets connection
 def initialize_google_sheets():
     credentials = Credentials.from_service_account_file(CREDS_PATH, scopes=SCOPE)
     try:
         client = gspread.authorize(credentials)
+        sheet = client.open("CRM_Interactions").sheet1  # Using CRM_Interactions as the sheet name
         return sheet
     except gspread.exceptions.APIError as e:
         st.error(f"Google Sheets API error: {e}")
         return None
+sheet = initialize_google_sheets()
 # Function to safely load the CSV dataset
 def load_csv_safely(file_path):
     try:
+        df = pd.read_csv(file_path, on_bad_lines='skip')
+        required_columns = ['question', 'product', 'price', 'features', 'ratings', 'discount', 'customer_id']
         for column in required_columns:
             if column not in df.columns:
                 raise Exception(f"CSV does not contain the required column: '{column}'. Please check your CSV.")
         st.error(f"An error occurred: {e}")
         return None
+dataset = load_csv_safely(csv_file_path)
+embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
+# Function to send a request to the Groq API
+def send_groq_request(query):
+    headers = {
+        'Authorization': f'Bearer {GROQ_API_KEY}',
+        'Content-Type': 'application/json'
+    }
+    payload = {
+        'query': query
+    }
+    try:
+        response = requests.post(GROQ_API_URL, headers=headers, json=payload)
+        response.raise_for_status()  # Will raise an HTTPError for bad responses (4xx or 5xx)
+        return response.json()  # Return the response in JSON format
+    except requests.exceptions.RequestException as e:
+        st.error(f"Error communicating with Groq API: {e}")
+        return None
 # Function to check if the text is a greeting
 def is_greeting(text):
             return product
     return None
+# Function to handle "more products" requests
+def handle_more_products_request(query):
+    if "more products" in query.lower():
+        # Select more products from the dataset. You can add filtering logic here.
+        more_products = dataset[['product', 'price', 'features', 'ratings', 'discount']].head(5)
+        return f"Here are some more products you might like:\n{more_products}"
+    return None
+# Function to find the best answer to a query
 def find_answer(query):
+    if "more products" in query.lower():
+        return handle_more_products_request(query)
     if dataset is None:
         return "Dataset not loaded properly."
     query_embedding = embedding_model.encode([query])
+    combined_columns = dataset['question'].fillna('') + " " + dataset['product'].fillna('') + " " + dataset['features'].fillna('')
     combined_embeddings = embedding_model.encode(combined_columns.tolist())
     similarities = cosine_similarity(query_embedding, combined_embeddings)
+    similarity_threshold = 0.5
+    closest_idx = np.argmax(similarities)
+    highest_similarity = similarities[0][closest_idx]
     if highest_similarity < similarity_threshold:
         return "Sorry, no product found for your query."
     closest_question = dataset.iloc[closest_idx]
     product_name = closest_question['product']
     price = closest_question['price']
     features = closest_question['features']
     ratings = closest_question['ratings']
     discount = closest_question['discount']
     if 'Timestamp' not in closest_question.index:
         closest_question['Timestamp'] = datetime.now()
         'features': features,
         'ratings': ratings,
         'discount': discount,
+        'Timestamp': datetime.now(),
+        'customer_id': random.randint(1000, 9999)  # Generate a random customer ID between 1000 and 9999
     }
     new_entry_df = pd.DataFrame([new_entry])
     new_entry_df.to_csv(output_csv_path, mode='a', header=not os.path.exists(output_csv_path), index=False)
+# Function to perform sentiment analysis with TextBlob
 def analyze_sentiment_with_emoji(text):
     blob = TextBlob(text)
     sentiment_score = blob.sentiment.polarity
         emoji = "😐"
     return sentiment, sentiment_score, emoji
+# Updated pie chart function with percentages
+def display_sentiment_pie_chart(sentiment_counts):
+    sentiment_fig = px.pie(
+        sentiment_counts,
+        names=sentiment_counts.index,
+        values=sentiment_counts.values,
+        title="Sentiment Distribution",
+        hole=0.3  # For a donut chart (optional)
+    )
+    # Add percentage labels inside the slices
+    sentiment_fig.update_traces(textinfo='percent+label', pull=[0.1, 0.1, 0.1])
+    return sentiment_fig
 # Dashboard for visualizations
 def display_dashboard():
     st.title("Product Dashboard")
     st.write("Welcome to the product query dashboard!")
+    customer_ids = dataset['customer_id'].unique()
+    selected_customer_id = st.sidebar.selectbox(
+        "Select Customer ID",
+        ["All Customers"] + customer_ids.tolist()
+    )
     time_filter = st.sidebar.selectbox(
         "Select time period",
         ["All Time", "Today", "One Week"]
     )
     query_results_df = pd.read_csv(output_csv_path, on_bad_lines='skip')
     if 'Timestamp' not in query_results_df.columns:
         query_results_df['Timestamp'] = pd.to_datetime('now')
+    if selected_customer_id != "All Customers":
+        query_results_df = query_results_df[query_results_df['customer_id'] == selected_customer_id]
     query_results_df = filter_data_by_date(query_results_df, time_filter)
     st.subheader(f"Recent Queries Summary ({time_filter})")
     st.write(query_results_df.tail(10))
     sentiment_counts = query_results_df['question'].apply(lambda x: analyze_sentiment_with_emoji(x)[0]).value_counts()
     st.subheader(f"Sentiment Analysis Distribution ({time_filter})")
     st.write(sentiment_counts)
+    sentiment_fig = display_sentiment_pie_chart(sentiment_counts)
     st.plotly_chart(sentiment_fig)
     query_results_df['sentiment_score'] = query_results_df['question'].apply(lambda x: analyze_sentiment_with_emoji(x)[1])
     sentiment_time_fig = px.line(
         query_results_df,
         x='Timestamp',
         title=f"Sentiment Score Over Time ({time_filter})"
     )
     st.plotly_chart(sentiment_time_fig)
     product_counts = query_results_df['product'].value_counts()
     st.subheader(f"Product Popularity ({time_filter})")
     st.write(product_counts)
     )
     st.plotly_chart(recommended_products_fig)
+# Function to filter data by date
+def filter_data_by_date(query_results_df, time_filter):
+    if time_filter == "Today":
+        today = datetime.now().date()
+        query_results_df['Timestamp'] = pd.to_datetime(query_results_df['Timestamp']).dt.date
+        query_results_df = query_results_df[query_results_df['Timestamp'] == today]
+    elif time_filter == "One Week":
+        one_week_ago = datetime.now() - timedelta(weeks=1)
+        query_results_df['Timestamp'] = pd.to_datetime(query_results_df['Timestamp'])
+        query_results_df = query_results_df[query_results_df['Timestamp'] > one_week_ago]
+    return query_results_df
+# Function for continuous speech interaction
+def continuous_interaction():
+    recognizer = sr.Recognizer()
+    microphone = sr.Microphone()
+    st.write("Listening for your query...")
+    while True:
+        with microphone as source:
+            recognizer.adjust_for_ambient_noise(source)
+            audio = recognizer.listen(source)
+        try:
+            query = recognizer.recognize_google(audio)
+            st.write(f"Your query: {query}")
+            if is_greeting(query):
+                respond_to_greeting()
+            else:
+                answer = find_answer(query)
+                sentiment, score, emoji = analyze_sentiment_with_emoji(query)
+                st.write(f"Answer: {answer}")
+                st.write(f"Sentiment: {sentiment} {emoji}")
+                st.write(f"Sentiment Score: {score}")
+        except sr.UnknownValueError:
+            st.write("Sorry, I couldn't understand that.")
+        except sr.RequestError:
+            st.write("Sorry, there was an error with the speech recognition service.")
+# Main function to run the interface
+if __name__ == "__main__":
+    st.sidebar.title("Product Query Interface")
+    mode = st.sidebar.selectbox("Select Mode", ["Speech Recognition", "Dashboard"])
     if mode == "Speech Recognition":
+        if st.button('Start Listening'):
+            continuous_interaction()  # Start the speech recognition when button is clicked
     elif mode == "Dashboard":
         display_dashboard()