import pandas as pd import random import os from transformers import DistilBertTokenizer, DistilBertModel from huggingface_hub import login import torch # Load environment variables HUGGINGFACE_TOKEN = os.getenv('HUGGINGFACE_TOKEN') if not HUGGINGFACE_TOKEN: raise EnvironmentError("HUGGINGFACE_TOKEN environment variable is not set.") # Log in to Hugging Face login(token=HUGGINGFACE_TOKEN) # Load model and tokenizer model_name = "distilbert-base-uncased" tokenizer = DistilBertTokenizer.from_pretrained(model_name, use_auth_token=HUGGINGFACE_TOKEN) model = DistilBertModel.from_pretrained(model_name, use_auth_token=HUGGINGFACE_TOKEN) def predict_class(query, system_message): """Predict the class of the query based on the provided system message.""" inputs = tokenizer(system_message + " " + query, return_tensors='pt') with torch.no_grad(): outputs = model(**inputs) logits = outputs.last_hidden_state[:, 0, :] # Using [CLS] token's embedding probabilities = torch.nn.functional.softmax(logits, dim=-1) predicted_class = torch.argmax(probabilities).item() return predicted_class def load_electronics_dataset(): try: return pd.read_csv('electronics.csv') except FileNotFoundError: raise Exception("Electronics dataset file not found.") except pd.errors.EmptyDataError: raise Exception("Electronics dataset file is empty.") except Exception as e: raise Exception(f"Error loading electronics dataset: {e}") def generate_electronics_response(row): templates = [ lambda r: (f"The {r['ProductName']} is a great choice if you're looking for a {r['Category']}. " f"Priced at ${r['Price']}, customers appreciate its {r['Description']}. " f"Would you like to know more about the {r['ProductName']}?"), lambda r: (f"Check out the {r['ProductName']}! It's available for ${r['Price']} and is known for its {r['Description']}. " f"What do you think about this {r['Category']}?"), ] template = random.choice(templates) return template(row) def extract_electronics_filters(query): filters = {} query_lower = query.lower() if 'best' in query_lower and 'rating' in query_lower: filters['Rating'] = 'max' if 'phones' in query_lower: filters['Category'] = 'phone' elif 'laptops' in query_lower: filters['Category'] = 'laptop' return filters def apply_electronics_filters(df, filters): for key, value in filters.items(): if key == 'Rating' and value == 'max': df = df[df['Rating'] == df['Rating'].max()] elif key in df.columns and isinstance(value, str): df = df[df[key].str.contains(value, case=False, na=False)] return df def query_electronics(user_query, n_results=5): electronics_df = load_electronics_dataset() filtered_df = apply_electronics_filters(electronics_df, extract_electronics_filters(user_query)) if 'Rating' in filtered_df.columns: sorted_df = filtered_df.sort_values(by='Rating', ascending=False) else: sorted_df = filtered_df return sorted_df.head(n_results) def electronics_agent_response(user_query): system_message = """You are an expert electronics agent. Your responsibilities include: 1. Handling all electronics-related queries. 2. Providing information about various electronics products. 3. Assisting with product features, specifications, and prices. 4. Offering recommendations based on user preferences. 5. Transferring to other departments when necessary.""" predicted_class = predict_class(user_query, system_message) responses = { 0: "Information about electronics.", 1: "Details about product specifications.", 2: "Assistance with electronics recommendations." } response = responses.get(predicted_class, "I am not sure how to help with that.") electronics_results = query_electronics(user_query) if not electronics_results.empty: for _, row in electronics_results.iterrows(): response += "\n" + generate_electronics_response(row) else: response += "\nSorry, I couldn't find any electronics matching your query." return response