Spaces:

miyukicodes
/

d-commerce

Running

d-commerce / rag_system.py

Crcs1225

beb7895 4 months ago

7.43 kB

	import google.generativeai as genai
	from sentence_transformers import SentenceTransformer
	from typing import List, Tuple, Dict, Any
	import asyncio
	from database import db
	from config import settings

	class ProductRAGPipeline:
	def __init__(self):
	# Initialize embedding model
	self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

	# Initialize Gemini
	genai.configure(api_key=settings.GEMINI_API_KEY)
	self.gemini_model = genai.GenerativeModel('gemini-2.5-flash')

	# Enhanced personality for shopping assistant
	self.personality_traits = """
	You are a friendly, knowledgeable shopping assistant for a fashion e-commerce store. Your personality traits:
	- Warm, approachable, and enthusiastic about fashion
	- Helpful and patient with customer queries
	- Knowledgeable about products, styles, and fashion trends
	- Casual but professional tone, like a friendly store assistant
	- Use emojis occasionally to express emotion (but don't overdo it)
	- Ask follow-up questions to better understand customer needs
	- Be concise but thorough in product recommendations
	- Always mention key product features, price, and benefits
	- If you suggest multiple products, compare them briefly
	"""

	async def get_embeddings(self, texts: List[str]) -> List[List[float]]:
	"""Get embeddings for texts (async wrapper)"""
	loop = asyncio.get_event_loop()
	embeddings = await loop.run_in_executor(
	None, self.embedding_model.encode, texts
	)
	return embeddings.tolist()

	async def retrieve_relevant_products(self, query: str, limit: int = 3) -> List[Dict]:
	"""Retrieve relevant products using vector search with fallback"""
	try:
	# Try vector search first
	query_embedding = await self.get_embeddings([query])
	print(f"🔍 Performing vector search with embedding dim: {len(query_embedding[0])}")
	relevant_docs = await db.similarity_search(query_embedding[0], limit=limit)
	print(f"✅ Vector search returned {len(relevant_docs)} results")

	if not relevant_docs:
	print("🔄 No results from vector search, trying category-based search")
	# Fallback to category-based search
	category_keywords = self._extract_category_from_query(query)
	if category_keywords:
	relevant_docs = await db.search_by_category(category_keywords[0], limit=limit)
	print(f"✅ Category search returned {len(relevant_docs)} results")

	return relevant_docs
	except Exception as e:
	print(f"❌ Error in vector search: {e}")
	# Final fallback to generic product search
	return await db.search_by_category("tops", limit=limit)

	def _extract_category_from_query(self, query: str) -> List[str]:
	"""Extract potential category keywords from user query"""
	query_lower = query.lower()
	categories = []

	category_mapping = {
	'tops': ['top', 'shirt', 'blouse', 't-shirt', 'tshirt', 'crop top', 'spaghetti'],
	'bottoms': ['pant', 'jeans', 'trouser', 'leggings', 'skirt', 'short'],
	'dresses': ['dress', 'gown', 'frock'],
	'outerwear': ['jacket', 'sweater', 'hoodie', 'cardigan', 'coat'],
	'accessories': ['bag', 'jewelry', 'scarf', 'hat', 'belt']
	}

	for category, keywords in category_mapping.items():
	if any(keyword in query_lower for keyword in keywords):
	categories.append(category)

	return categories if categories else ['tops'] # Default to tops

	def create_product_prompt(self, query: str, products: List[Dict]) -> str:
	"""Create context-aware prompt with product information"""
	if products:
	context = "AVAILABLE PRODUCTS:\n"
	for i, product in enumerate(products, 1):
	context += f"{i}. {product['content']}\n"
	else:
	context = "No specific product information available at the moment."

	prompt = f"""
	{self.personality_traits}

	{context}

	USER QUESTION: {query}

	INSTRUCTIONS:
	1. Answer based primarily on the provided product information
	2. If suggesting products, mention:
	- Key features and benefits
	- Price (if available)
	- Why it might suit the user's needs
	3. Be conversational and helpful
	4. If the exact answer isn't in the products, use your general knowledge but be honest about limitations
	5. Keep responses concise but complete (2-4 sentences usually)
	6. Always maintain a friendly, shopping assistant tone
	7. If multiple products are relevant, compare them briefly

	SHOPPING ASSISTANT RESPONSE:
	"""
	return prompt

	async def generate_response(self, query: str) -> Tuple[str, List[Dict]]:
	"""Generate response using product RAG pipeline"""
	try:
	# Retrieve relevant products
	relevant_products = await self.retrieve_relevant_products(query)
	print(f"📦 Retrieved {len(relevant_products)} relevant products")

	# Create context-aware prompt
	prompt = self.create_product_prompt(query, relevant_products)

	# Generate response using Gemini
	response = self.gemini_model.generate_content(prompt)
	response_text = response.text.strip()

	print(f"🤖 Generated response: {response_text[:100]}...")
	return response_text, relevant_products

	except Exception as e:
	print(f"❌ Error generating response: {e}")
	fallback_msg = "I apologize, but I'm having trouble accessing our product information right now. Please try again in a moment or contact our customer service for immediate assistance. 😊"
	return fallback_msg, []

	def generate_followup_questions(self, query: str, products: List[Dict]) -> List[str]:
	"""Generate context-aware follow-up questions"""
	base_questions = [
	"Tell me more about this product",
	"What are the alternatives in different colors?",
	"Do you have similar items in different price ranges?",
	"What's the sizing like for these products?",
	"Are any of these currently on sale?"
	]

	# Context-aware questions
	query_lower = query.lower()
	if any(word in query_lower for word in ['price', 'cost', 'expensive', 'cheap']):
	base_questions.extend([
	"What's the price range for similar items?",
	"Are there any ongoing discounts?"
	])

	if any(word in query_lower for word in ['color', 'colour', 'pattern']):
	base_questions.extend([
	"What other colors are available?",
	"Do you have this in solid colors vs patterns?"
	])

	return base_questions[:2] # Return top 5 questions

	# Global RAG pipeline instance
	rag_pipeline = ProductRAGPipeline()