Spaces:
Running
Running
Update app.py
#20
by
Muthuraja18
- opened
app.py
CHANGED
|
@@ -10,42 +10,43 @@ from textblob import TextBlob
|
|
| 10 |
import streamlit as st
|
| 11 |
import seaborn as sns
|
| 12 |
import plotly.express as px
|
|
|
|
| 13 |
from datetime import datetime, timedelta
|
| 14 |
import gspread
|
| 15 |
from google.oauth2.service_account import Credentials
|
| 16 |
|
| 17 |
-
# Set up paths
|
| 18 |
-
csv_file_path = "context.csv" # Path to
|
| 19 |
-
output_csv_path = "contents.csv" # Path to save query results
|
| 20 |
|
| 21 |
# Google Sheets setup
|
| 22 |
SCOPE = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
|
| 23 |
-
CREDS_PATH = "modern-cycling-444916-g6-82c207d3eb47.json" #
|
| 24 |
|
| 25 |
# Initialize Google Sheets connection
|
| 26 |
def initialize_google_sheets():
|
| 27 |
credentials = Credentials.from_service_account_file(CREDS_PATH, scopes=SCOPE)
|
| 28 |
try:
|
| 29 |
client = gspread.authorize(credentials)
|
| 30 |
-
sheet = client.open("infosys").sheet1 #
|
| 31 |
return sheet
|
| 32 |
except gspread.exceptions.APIError as e:
|
| 33 |
st.error(f"Google Sheets API error: {e}")
|
| 34 |
return None
|
| 35 |
|
| 36 |
-
sheet = initialize_google_sheets()
|
| 37 |
|
| 38 |
# Function to safely load the CSV dataset
|
| 39 |
def load_csv_safely(file_path):
|
| 40 |
try:
|
| 41 |
-
df = pd.read_csv(file_path, on_bad_lines='skip') #
|
| 42 |
required_columns = ['question', 'product', 'price', 'features', 'ratings', 'discount']
|
| 43 |
for column in required_columns:
|
| 44 |
if column not in df.columns:
|
| 45 |
raise Exception(f"CSV does not contain the required column: '{column}'. Please check your CSV.")
|
| 46 |
|
| 47 |
if 'Timestamp' not in df.columns:
|
| 48 |
-
df['Timestamp'] = pd.NaT #
|
| 49 |
|
| 50 |
return df
|
| 51 |
except pd.errors.ParserError as e:
|
|
@@ -55,8 +56,9 @@ def load_csv_safely(file_path):
|
|
| 55 |
st.error(f"An error occurred: {e}")
|
| 56 |
return None
|
| 57 |
|
| 58 |
-
dataset = load_csv_safely(csv_file_path) # Load
|
| 59 |
-
|
|
|
|
| 60 |
|
| 61 |
# Function to filter data by date
|
| 62 |
def filter_data_by_date(data, date_filter):
|
|
@@ -70,13 +72,13 @@ def filter_data_by_date(data, date_filter):
|
|
| 70 |
|
| 71 |
return data
|
| 72 |
|
| 73 |
-
# Function
|
| 74 |
def listen_to_speech():
|
| 75 |
recognizer = sr.Recognizer()
|
| 76 |
with sr.Microphone() as source:
|
| 77 |
recognizer.adjust_for_ambient_noise(source)
|
| 78 |
st.write("Listening...")
|
| 79 |
-
|
| 80 |
try:
|
| 81 |
audio = recognizer.listen(source, timeout=5, phrase_time_limit=10)
|
| 82 |
st.write("Recognizing...")
|
|
@@ -109,17 +111,32 @@ def extract_product_name(query):
|
|
| 109 |
return product
|
| 110 |
return None
|
| 111 |
|
| 112 |
-
# Function to
|
| 113 |
def find_answer(query):
|
| 114 |
if dataset is None:
|
| 115 |
return "Dataset not loaded properly."
|
| 116 |
|
|
|
|
| 117 |
query_embedding = embedding_model.encode([query])
|
| 118 |
-
dataset_embeddings = embedding_model.encode(dataset['question'].tolist())
|
| 119 |
|
| 120 |
-
|
| 121 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
closest_question = dataset.iloc[closest_idx]
|
| 124 |
product_name = closest_question['product']
|
| 125 |
price = closest_question['price']
|
|
@@ -155,7 +172,7 @@ def save_query_to_csv(query, product_name, price, features, ratings, discount):
|
|
| 155 |
new_entry_df = pd.DataFrame([new_entry])
|
| 156 |
new_entry_df.to_csv(output_csv_path, mode='a', header=not os.path.exists(output_csv_path), index=False)
|
| 157 |
|
| 158 |
-
# Function for sentiment analysis
|
| 159 |
def analyze_sentiment_with_emoji(text):
|
| 160 |
blob = TextBlob(text)
|
| 161 |
sentiment_score = blob.sentiment.polarity
|
|
@@ -203,7 +220,16 @@ def recommend_products(query):
|
|
| 203 |
|
| 204 |
return recommendations
|
| 205 |
|
| 206 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 207 |
def continuous_interaction():
|
| 208 |
st.title("Speech Recognition with Product Queries")
|
| 209 |
if st.button("Start Speech Recognition"):
|
|
@@ -244,7 +270,7 @@ def continuous_interaction():
|
|
| 244 |
st.write(f"Discount: {rec['discount']}%")
|
| 245 |
st.write("---")
|
| 246 |
|
| 247 |
-
# Dashboard
|
| 248 |
def display_dashboard():
|
| 249 |
st.title("Product Dashboard")
|
| 250 |
st.write("Welcome to the product query dashboard!")
|
|
@@ -312,9 +338,13 @@ def display_dashboard():
|
|
| 312 |
|
| 313 |
# Main code to run the app
|
| 314 |
if __name__ == '__main__':
|
|
|
|
| 315 |
mode = st.sidebar.radio("Select Mode", ("Speech Recognition", "Dashboard"))
|
| 316 |
|
| 317 |
if mode == "Speech Recognition":
|
| 318 |
continuous_interaction()
|
| 319 |
elif mode == "Dashboard":
|
| 320 |
display_dashboard()
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
import streamlit as st
|
| 11 |
import seaborn as sns
|
| 12 |
import plotly.express as px
|
| 13 |
+
import gradio as gr
|
| 14 |
from datetime import datetime, timedelta
|
| 15 |
import gspread
|
| 16 |
from google.oauth2.service_account import Credentials
|
| 17 |
|
| 18 |
+
# Set up paths for CSV files and Google Sheets credentials
|
| 19 |
+
csv_file_path = r"C:\Users\Muthuraja\OneDrive\Attachments\Desktop\second\context.csv" # Path to CSV file with product info
|
| 20 |
+
output_csv_path = r"C:\Users\Muthuraja\OneDrive\Attachments\Desktop\second\contents.csv" # Path to save query results
|
| 21 |
|
| 22 |
# Google Sheets setup
|
| 23 |
SCOPE = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
|
| 24 |
+
CREDS_PATH = r"C:\Users\Muthuraja\Downloads\modern-cycling-444916-g6-82c207d3eb47.json" # Google credentials path
|
| 25 |
|
| 26 |
# Initialize Google Sheets connection
|
| 27 |
def initialize_google_sheets():
|
| 28 |
credentials = Credentials.from_service_account_file(CREDS_PATH, scopes=SCOPE)
|
| 29 |
try:
|
| 30 |
client = gspread.authorize(credentials)
|
| 31 |
+
sheet = client.open("infosys").sheet1 # Accessing the Google Sheet "SalesStores"
|
| 32 |
return sheet
|
| 33 |
except gspread.exceptions.APIError as e:
|
| 34 |
st.error(f"Google Sheets API error: {e}")
|
| 35 |
return None
|
| 36 |
|
| 37 |
+
sheet = initialize_google_sheets() # Initialize Google Sheets connection
|
| 38 |
|
| 39 |
# Function to safely load the CSV dataset
|
| 40 |
def load_csv_safely(file_path):
|
| 41 |
try:
|
| 42 |
+
df = pd.read_csv(file_path, on_bad_lines='skip') # Handles malformed lines in CSV
|
| 43 |
required_columns = ['question', 'product', 'price', 'features', 'ratings', 'discount']
|
| 44 |
for column in required_columns:
|
| 45 |
if column not in df.columns:
|
| 46 |
raise Exception(f"CSV does not contain the required column: '{column}'. Please check your CSV.")
|
| 47 |
|
| 48 |
if 'Timestamp' not in df.columns:
|
| 49 |
+
df['Timestamp'] = pd.NaT # Initialize Timestamp column if it doesn't exist
|
| 50 |
|
| 51 |
return df
|
| 52 |
except pd.errors.ParserError as e:
|
|
|
|
| 56 |
st.error(f"An error occurred: {e}")
|
| 57 |
return None
|
| 58 |
|
| 59 |
+
dataset = load_csv_safely(csv_file_path) # Load dataset safely
|
| 60 |
+
|
| 61 |
+
embedding_model = SentenceTransformer('all-MiniLM-L6-v2') # Pre-trained sentence transformer model for embeddings
|
| 62 |
|
| 63 |
# Function to filter data by date
|
| 64 |
def filter_data_by_date(data, date_filter):
|
|
|
|
| 72 |
|
| 73 |
return data
|
| 74 |
|
| 75 |
+
# Function for speech recognition
|
| 76 |
def listen_to_speech():
|
| 77 |
recognizer = sr.Recognizer()
|
| 78 |
with sr.Microphone() as source:
|
| 79 |
recognizer.adjust_for_ambient_noise(source)
|
| 80 |
st.write("Listening...")
|
| 81 |
+
|
| 82 |
try:
|
| 83 |
audio = recognizer.listen(source, timeout=5, phrase_time_limit=10)
|
| 84 |
st.write("Recognizing...")
|
|
|
|
| 111 |
return product
|
| 112 |
return None
|
| 113 |
|
| 114 |
+
# Function to search for relevant product details based on query (not relying on product name explicitly)
|
| 115 |
def find_answer(query):
|
| 116 |
if dataset is None:
|
| 117 |
return "Dataset not loaded properly."
|
| 118 |
|
| 119 |
+
# Create embeddings for the query and all possible columns (product, features, question)
|
| 120 |
query_embedding = embedding_model.encode([query])
|
|
|
|
| 121 |
|
| 122 |
+
# Generate embeddings for all questions, products, and features to find relevance
|
| 123 |
+
combined_columns = dataset['question'].fillna('') + " " + dataset['product'].fillna('') + " " + dataset['features'].fillna('')
|
| 124 |
+
combined_embeddings = embedding_model.encode(combined_columns.tolist())
|
| 125 |
+
|
| 126 |
+
# Calculate cosine similarity between the query embedding and each product's combined embeddings
|
| 127 |
+
similarities = cosine_similarity(query_embedding, combined_embeddings)
|
| 128 |
+
|
| 129 |
+
# Set a threshold for similarity to determine if the query matches any product
|
| 130 |
+
similarity_threshold = 0.5 # You can adjust this threshold based on how strict you want the match
|
| 131 |
|
| 132 |
+
closest_idx = np.argmax(similarities) # Index of the closest match
|
| 133 |
+
highest_similarity = similarities[0][closest_idx] # Highest similarity score
|
| 134 |
+
|
| 135 |
+
# If no match is found above the threshold, return "No matching product found"
|
| 136 |
+
if highest_similarity < similarity_threshold:
|
| 137 |
+
return "Sorry, no product found for your query."
|
| 138 |
+
|
| 139 |
+
# Get the details for the closest match
|
| 140 |
closest_question = dataset.iloc[closest_idx]
|
| 141 |
product_name = closest_question['product']
|
| 142 |
price = closest_question['price']
|
|
|
|
| 172 |
new_entry_df = pd.DataFrame([new_entry])
|
| 173 |
new_entry_df.to_csv(output_csv_path, mode='a', header=not os.path.exists(output_csv_path), index=False)
|
| 174 |
|
| 175 |
+
# Function for sentiment analysis with emojis
|
| 176 |
def analyze_sentiment_with_emoji(text):
|
| 177 |
blob = TextBlob(text)
|
| 178 |
sentiment_score = blob.sentiment.polarity
|
|
|
|
| 220 |
|
| 221 |
return recommendations
|
| 222 |
|
| 223 |
+
# Gradio Interface for speech input
|
| 224 |
+
def gradio_interface(query):
|
| 225 |
+
answer = find_answer(query)
|
| 226 |
+
sentiment, sentiment_score, emoji = analyze_sentiment_with_emoji(query)
|
| 227 |
+
|
| 228 |
+
recommendations = recommend_products(query)
|
| 229 |
+
|
| 230 |
+
return answer, sentiment, emoji, recommendations
|
| 231 |
+
|
| 232 |
+
# Function to handle continuous interaction loop (Streamlit version)
|
| 233 |
def continuous_interaction():
|
| 234 |
st.title("Speech Recognition with Product Queries")
|
| 235 |
if st.button("Start Speech Recognition"):
|
|
|
|
| 270 |
st.write(f"Discount: {rec['discount']}%")
|
| 271 |
st.write("---")
|
| 272 |
|
| 273 |
+
# Dashboard for visualizations (Streamlit)
|
| 274 |
def display_dashboard():
|
| 275 |
st.title("Product Dashboard")
|
| 276 |
st.write("Welcome to the product query dashboard!")
|
|
|
|
| 338 |
|
| 339 |
# Main code to run the app
|
| 340 |
if __name__ == '__main__':
|
| 341 |
+
# Select mode between Streamlit and Gradio
|
| 342 |
mode = st.sidebar.radio("Select Mode", ("Speech Recognition", "Dashboard"))
|
| 343 |
|
| 344 |
if mode == "Speech Recognition":
|
| 345 |
continuous_interaction()
|
| 346 |
elif mode == "Dashboard":
|
| 347 |
display_dashboard()
|
| 348 |
+
|
| 349 |
+
# Gradio Interface for queries
|
| 350 |
+
gr.Interface(fn=gradio_interface, inputs="text", outputs=["text", "text", "text", "json"]).launch()
|