Spaces:

arka7
/

E-commerce_AgenticRAG

Sleeping

App Files Files Community

E-commerce_AgenticRAG / app.py

arka7

Update app.py

5e91f60 verified 4 months ago

raw

history blame contribute delete

43.9 kB

	import os
	import json
	import requests
	import asyncio
	import uuid
	import re
	import warnings
	from typing import TypedDict, Annotated, List, Dict, Any, Optional, Tuple
	from datetime import datetime
	from pathlib import Path
	from enum import Enum
	import tempfile

	# Suppress torch FutureWarning on HF Spaces
	warnings.filterwarnings('ignore', category=FutureWarning, module='.torch.')

	# Third-party imports
	import gradio as gr
	from dotenv import load_dotenv
	from bs4 import BeautifulSoup
	import faiss
	import pickle
	import numpy as np
	from sentence_transformers import SentenceTransformer
	import tiktoken

	from langchain_core.messages import BaseMessage, HumanMessage, AIMessage, SystemMessage, ToolMessage
	from langchain_mistralai.chat_models import ChatMistralAI
	from langchain_core.tools import tool
	from langchain_community.tools.tavily_search import TavilySearchResults
	from langchain_text_splitters import RecursiveCharacterTextSplitter

	# --- 1. SETUP & CONFIGURATION ---

	load_dotenv()

	# --- API KEY ROTATION ---

	class APIKeyRotator:
	"""Handles API key rotation for fault tolerance and rate limit management"""

	def __init__(self, service_name: str):
	self.service_name = service_name
	self.keys = self._load_keys()
	self.current_index = 0

	def _load_keys(self) -> List[str]:
	"""Load all API keys for the service (e.g., MISTRAL_API_KEY, MISTRAL_API_KEY_2, etc.)"""
	keys = []
	i = 1
	while True:
	if i == 1:
	key = os.getenv(f"{self.service_name}_API_KEY")
	else:
	key = os.getenv(f"{self.service_name}_API_KEY_{i}")

	if not key:
	break

	keys.append(key)
	i += 1

	if not keys:
	raise ValueError(f"❌ No API keys found for {self.service_name}")

	print(f"✅ Loaded {len(keys)} API key(s) for {self.service_name}")
	import random
	random.shuffle(keys) # Randomize order
	return keys

	def get_key(self) -> str:
	"""Get current API key without rotation"""
	if not self.keys:
	raise ValueError(f"❌ No API keys available for {self.service_name}")
	return self.keys[self.current_index]

	def rotate(self) -> str:
	"""Rotate to next API key"""
	if not self.keys:
	raise ValueError(f"❌ No API keys available for {self.service_name}")
	self.current_index = (self.current_index + 1) % len(self.keys)
	print(f"🔄 Rotated {self.service_name} API key to index {self.current_index + 1}/{len(self.keys)}")
	return self.get_key()

	# Initialize key rotators
	mistral_rotator = APIKeyRotator("MISTRAL")
	tavily_rotator = APIKeyRotator("TAVILY")

	# Set current keys in environment
	os.environ["MISTRAL_API_KEY"] = mistral_rotator.get_key()
	os.environ["TAVILY_API_KEY"] = tavily_rotator.get_key()

	MISTRAL_API_KEY = os.environ["MISTRAL_API_KEY"]
	TAVILY_API_KEY = os.environ["TAVILY_API_KEY"]

	print(f"[OK] API Key rotation initialized")

	# Vector DB Configuration
	CHROMA_DB_PATH = Path(tempfile.gettempdir()) / "ecommerce_rag"
	CHROMA_DB_PATH.mkdir(exist_ok=True, parents=True)

	print(f"[VECTOR_STORE] Location: {CHROMA_DB_PATH.absolute()}")
	print(f"[OK] API Keys configured")

	# Global FAISS state
	faiss_index = None
	faiss_chunks_metadata = {}
	faiss_chunk_id_to_index = {}
	faiss_index_counter = 0

	# Resilient LLM wrapper with key rotation
	def create_llm_with_rotation(model: str, temperature: float = 0.2, timeout: float = 120.0, max_retries: int = 5):
	"""Create an LLM that rotates API keys on failure"""
	class ResilientLLM:
	def __init__(self, model, temperature, timeout, max_retries):
	self.model = model
	self.temperature = temperature
	self.timeout = timeout
	self.max_retries = max_retries
	self.llm = self._create_llm()

	def _create_llm(self):
	return ChatMistralAI(
	api_key=mistral_rotator.get_key(),
	model=self.model,
	temperature=self.temperature,
	timeout=self.timeout,
	max_retries=self.max_retries
	)

	def invoke(self, messages, **kwargs):
	"""Invoke with automatic key rotation on failure"""
	max_key_rotations = len(mistral_rotator.keys)
	for attempt in range(max_key_rotations):
	try:
	return self.llm.invoke(messages, **kwargs)
	except Exception as e:
	if attempt < max_key_rotations - 1:
	print(f"⚠️ Attempt {attempt + 1} failed: {str(e)[:80]}")
	new_key = mistral_rotator.rotate()
	self.llm = ChatMistralAI(
	api_key=new_key,
	model=self.model,
	temperature=self.temperature,
	timeout=self.timeout,
	max_retries=self.max_retries
	)
	print(f"🔄 Retrying with rotated key...")
	else:
	raise

	def bind_tools(self, tools):
	"""Bind tools to LLM"""
	return self.llm.bind_tools(tools)

	return ResilientLLM(model, temperature, timeout, max_retries)

	# Initialize LLMs with resilience and key rotation
	llm_small = create_llm_with_rotation("mistral-small-latest", temperature=0.2, timeout=60.0, max_retries=5)
	llm_large = create_llm_with_rotation("mistral-large-latest", temperature=0.2, timeout=120.0, max_retries=5)

	# Initialize embedding model
	print("[LOADING] Embedding model...")

	embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
	embedding_model.to('cpu')
	print("[OK] Embedding model initialized (CPU mode)")

	# Initialize tokenizer
	tokenizer = tiktoken.get_encoding("cl100k_base")

	# --- TRUSTED E-COMMERCE WEBSITES (REMOVED - Now accepting all sources) ---
	# No domain restrictions - Tavily will find the best sources dynamically

	# --- 2. STATE DEFINITIONS ---

	class WorkflowStage(Enum):
	IDLE = "idle"
	SEARCH = "search"
	CHUNKING = "chunking"
	INDEXING = "indexing"
	RETRIEVAL = "retrieval"
	SYNTHESIS = "synthesis"
	COMPLETE = "complete"
	ERROR = "error"

	class EcommerceAgentState(TypedDict):
	"""Unified state for tool-based agentic RAG pipeline"""
	query: str
	stage: str
	search_results: List[Dict]
	cleaned_documents: List[Dict]
	parent_chunks: List[Dict]
	child_chunks: List[Dict]
	retrieved_children: List[Dict]
	retrieved_parents: List[Dict]
	parent_child_mapping: Dict[str, List[str]]
	final_answer: str
	citations: List[Dict]
	confidence: float
	error_message: Optional[str]
	progress_log: List[str]
	chat_messages: List[Tuple[str, str]] # List of (user/assistant, message) tuples

	def create_initial_state(query: str) -> EcommerceAgentState:
	"""Create initial state for a new query"""
	return {
	"query": query,
	"stage": WorkflowStage.IDLE.value,
	"search_results": [],
	"cleaned_documents": [],
	"parent_chunks": [],
	"child_chunks": [],
	"retrieved_children": [],
	"retrieved_parents": [],
	"parent_child_mapping": {},
	"final_answer": "",
	"citations": [],
	"confidence": 0.0,
	"error_message": None,
	"progress_log": [],
	"chat_messages": []
	}

	# --- CONVERSATION CONTEXT ---
	# Global conversation memory to track context between queries
	conversation_history = []
	previous_answers = []

	def add_to_conversation_history(query: str, answer: str):
	"""Add query and answer to conversation history"""
	global conversation_history, previous_answers
	conversation_history.append({"query": query, "answer": answer})
	previous_answers.append(answer)
	# Keep only last 5 exchanges to avoid context bloat
	if len(conversation_history) > 5:
	conversation_history.pop(0)
	previous_answers.pop(0)

	def get_conversation_context() -> str:
	"""Get formatted conversation context"""
	if not conversation_history:
	return ""

	context = "\nPrevious conversation:\n"
	for i, item in enumerate(conversation_history[-3:], 1): # Last 3 exchanges
	context += f"{i}. Q: {item['query'][:100]}...\n A: {item['answer'][:150]}...\n"
	return context

	# --- 3. UTILITY FUNCTIONS ---

	def extract_domain(url: str) -> str:
	"""Extract domain from URL"""
	try:
	from urllib.parse import urlparse
	parsed = urlparse(url)
	domain = parsed.netloc.replace("www.", "")
	return domain.split('/')[0]
	except:
	return "unknown"

	def is_trusted_source(url: str) -> bool:
	"""Accept all sources - no domain restrictions"""
	return True # Accept all domains from Tavily results

	def count_tokens(text: str) -> int:
	"""Count tokens"""
	try:
	return len(tokenizer.encode(text))
	except:
	return len(text.split())

	def generate_uuid() -> str:
	"""Generate unique ID"""
	return str(uuid.uuid4())[:8]

	def faiss_add_chunks(embeddings: np.ndarray, chunk_ids: List[str], metadatas: List[Dict]) -> None:
	"""Add chunks to FAISS"""
	global faiss_index, faiss_chunks_metadata, faiss_chunk_id_to_index, faiss_index_counter

	embeddings = np.array(embeddings, dtype=np.float32)
	faiss.normalize_L2(embeddings)

	if faiss_index is None:
	dimension = embeddings.shape[1]
	faiss_index = faiss.IndexFlatIP(dimension)

	faiss_index.add(embeddings)

	for chunk_id, metadata in zip(chunk_ids, metadatas):
	faiss_chunks_metadata[chunk_id] = metadata
	faiss_chunk_id_to_index[chunk_id] = faiss_index_counter
	faiss_index_counter += 1

	def faiss_search(query_embedding: np.ndarray, k: int = 5) -> Dict:
	"""Search FAISS"""
	global faiss_index, faiss_chunks_metadata, faiss_chunk_id_to_index

	if faiss_index is None or faiss_index.ntotal == 0:
	return {"ids": [[]], "documents": [[]], "metadatas": [[]], "distances": [[]]}

	query_embedding = np.array([query_embedding], dtype=np.float32)
	faiss.normalize_L2(query_embedding)

	distances, indices = faiss_index.search(query_embedding, min(k, faiss_index.ntotal))
	indices = indices[0]

	result_ids = []
	result_docs = []
	result_metadatas = []
	result_distances = []

	for dist, idx in zip(distances[0], indices):
	for chunk_id, chunk_idx in faiss_chunk_id_to_index.items():
	if chunk_idx == idx:
	metadata = faiss_chunks_metadata[chunk_id]
	result_ids.append(chunk_id)
	result_docs.append(metadata.get("document", ""))
	result_metadatas.append({k: v for k, v in metadata.items() if k != "document"})
	# Convert numpy float32 to Python float for JSON serialization
	result_distances.append(float(dist))
	break

	return {
	"ids": [result_ids],
	"documents": [result_docs],
	"metadatas": [result_metadatas],
	"distances": [result_distances]
	}

	# --- 4. TOOL DEFINITIONS ---

	@tool
	def search_ecommerce_products(query: str) -> str:
	"""Search for e-commerce products using Tavily Search - searches broadly, curator filters later"""
	try:
	print(f"🔍 Searching Tavily for: '{query}' (will curate results after)")

	search = TavilySearchResults(
	max_results=15, # Increased to get more diverse results
	api_key=TAVILY_API_KEY,
	search_depth="advanced"
	)
	results = search.invoke(query)
	print(f" ✅ Found {len(results)} raw results (will filter to trusted sources)")
	return json.dumps(results, indent=2)
	except Exception as e:
	return f"Error in search: {str(e)}"

	@tool
	def scrape_product_content(url: str) -> str:
	"""Scrape and clean content from a URL using requests + BeautifulSoup"""
	try:
	print(f"📥 Scraping: {extract_domain(url)}")
	headers = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,/;q=0.8',
	'Accept-Language': 'en-US,en;q=0.5',
	}

	# Use session with proper cleanup to avoid file descriptor issues on HF Spaces
	session = requests.Session()
	try:
	response = session.get(url, timeout=15, headers=headers)
	response.raise_for_status()
	response.encoding = 'utf-8'

	# Parse with BeautifulSoup
	soup = BeautifulSoup(response.content, 'html.parser')

	# Remove unwanted elements
	for element in soup(["script", "style", "meta", "link", "nav", "footer", "header", "noscript", "iframe"]):
	element.decompose()

	# Extract text with newline separator to preserve structure
	text = soup.get_text(separator='\n', strip=True)

	# Clean up whitespace
	lines = (line.strip() for line in text.splitlines())
	chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
	text = '\n'.join(chunk for chunk in chunks if chunk)

	# Limit content
	text = text[:8000]

	if len(text) > 200:
	print(f" ✅ Extracted {len(text)} characters")
	return text
	else:
	return "Insufficient content"
	except requests.Timeout:
	print(f" [ERROR] Timeout after 15s")
	return "Error: Request timeout"
	except requests.RequestException as e:
	print(f" [ERROR] Request error: {str(e)}")
	return f"Error: {str(e)}"
	finally:
	session.close()
	except Exception as e:
	print(f" [ERROR] Unexpected error: {str(e)}")
	return f"Error scraping: {str(e)}"

	@tool
	def curator_filter_sites(urls_json: str) -> str:
	"""Curator Agent - Process and deduplicate URLs (no domain filtering)

	Returns list of unique URLs from search results
	"""
	try:
	urls = json.loads(urls_json) if isinstance(urls_json, str) else urls_json
	if isinstance(urls, dict):
	urls = [urls.get("url", "")] if urls.get("url") else []
	elif not isinstance(urls, list):
	urls = [str(urls)]

	print(f"🎯 Curator Agent: Processing {len(urls)} URLs...")

	curated_urls = []
	seen_urls = set()

	for url in urls:
	if isinstance(url, dict):
	url = url.get("url", "")
	if not url or url in seen_urls:
	continue

	seen_urls.add(url)
	curated_urls.append({
	"url": url,
	"domain": extract_domain(url)
	})
	print(f" ✅ Added: {extract_domain(url)}")

	result = {
	"total_input": len(urls),
	"curated_count": len(curated_urls),
	"urls": curated_urls
	}

	print(f" 📊 Result: {len(curated_urls)} unique URLs")
	return json.dumps(result, indent=2)
	except Exception as e:
	return f"Error in curator filter: {str(e)}"

	@tool
	def chunk_content(content: str, url: str) -> str:
	"""Split content into parent and child chunks"""
	try:
	parent_id = generate_uuid()
	parent_chunk = {
	"id": parent_id,
	"document": content[:4000],
	"source": url,
	"chunk_type": "parent",
	"created_at": datetime.now().isoformat()
	}

	# Create child chunks
	splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
	child_texts = splitter.split_text(content)
	child_chunks = []

	for i, child_text in enumerate(child_texts[:5]): # Limit to 5 children
	child_id = generate_uuid()
	child_chunks.append({
	"id": child_id,
	"document": child_text,
	"source": url,
	"chunk_type": "child",
	"parent_id": parent_id,
	"sequence": i,
	"created_at": datetime.now().isoformat()
	})

	result = {
	"parent": parent_chunk,
	"children": child_chunks,
	"count": len(child_chunks)
	}
	return json.dumps(result, indent=2)
	except Exception as e:
	return f"Error chunking: {str(e)}"

	@tool
	def generate_embeddings_and_index(chunks_json: str) -> str:
	"""Generate embeddings and add to FAISS index"""
	try:
	chunks_data = json.loads(chunks_json)
	parent = chunks_data.get("parent", {})
	children = chunks_data.get("children", [])

	# Only include valid chunks (not empty)
	all_chunks = []
	if parent and parent.get("document"):
	all_chunks.append(parent)
	all_chunks.extend([c for c in children if c.get("document")])

	if not all_chunks:
	print("⚠️ No valid chunks to index")
	return json.dumps({"success": True, "chunks_indexed": 0, "total_index_size": 0})

	# Extract texts
	texts = [chunk.get("document", "") for chunk in all_chunks]
	chunk_ids = [chunk.get("id", "") for chunk in all_chunks]

	print(f"⏳ Generating embeddings for {len(all_chunks)} chunks...")

	# Generate embeddings with minimal batching - optimized for CPU
	embeddings_list = []
	batch_size = 2 # Ultra-minimal for CPU (was 4)

	for i in range(0, len(texts), batch_size):
	batch_texts = texts[i:i+batch_size]
	print(f" Processing batch {i//batch_size + 1}/{(len(texts)-1)//batch_size + 1}...")
	try:
	# Encode without progress bar to avoid hang
	batch_embeddings = embedding_model.encode(batch_texts, convert_to_numpy=True, show_progress_bar=False)
	embeddings_list.extend(batch_embeddings)
	print(f" ✅ Encoded {len(batch_texts)} texts")
	except Exception as be:
	print(f" ⚠️ Batch error: {str(be)}, using random embeddings as fallback")
	# Fallback: use random embeddings
	embeddings_list.extend(np.random.randn(len(batch_texts), 384).astype(np.float32))

	# Add to FAISS
	embeddings = np.array(embeddings_list, dtype=np.float32)
	print(f" Adding {len(embeddings)} embeddings to FAISS...")
	faiss_add_chunks(embeddings, chunk_ids, all_chunks)

	print(f"✅ Successfully indexed {len(all_chunks)} chunks")
	return json.dumps({
	"success": True,
	"chunks_indexed": len(all_chunks),
	"total_index_size": sum(1 for _ in faiss_chunk_id_to_index.keys())
	})
	except Exception as e:
	print(f"❌ Indexing error: {str(e)}")
	return json.dumps({"success": False, "error": str(e)})

	@tool
	def retrieve_similar_chunks(query_text: str, k: int = 5) -> str:
	"""Retrieve similar chunks from FAISS"""
	try:
	print(f"🔎 Retrieving similar chunks for: '{query_text[:50]}...'")

	try:
	# Try to encode query with timeout fallback
	query_embedding = embedding_model.encode(query_text, convert_to_numpy=True, show_progress_bar=False)
	print(f" ✅ Query encoded")
	except Exception as e:
	print(f" ⚠️ Encoding timeout/error: {str(e)[:50]}, using random embedding")
	# Fallback: use random embedding
	query_embedding = np.random.randn(384).astype(np.float32)

	results = faiss_search(query_embedding, k=k)

	if not results["ids"][0]:
	print(f" ⚠️ No chunks found in FAISS (index size: {faiss_index.ntotal if faiss_index else 0})")
	else:
	print(f" ✅ Found {len(results['ids'][0])} similar chunks")

	return json.dumps(results, indent=2)
	except Exception as e:
	print(f" ❌ Retrieval error: {str(e)}")
	return json.dumps({"ids": [[]], "documents": [[]], "metadatas": [[]], "distances": [[]]})

	def generate_answer(query: str, context_chunks: str, chat_history: str = "") -> str:
	"""Generate answer using LLM with context and chat history - with retry logic"""
	max_attempts = 3
	attempt = 0

	while attempt < max_attempts:
	try:
	print(f"💡 Generating answer for: '{query[:50]}...' (Attempt {attempt + 1}/{max_attempts})")
	system_msg = SystemMessage(content="""You are an e-commerce assistant.
	Answer the user's query based on the provided context AND previous conversation.
	Be concise, accurate, and cite sources when possible. Remember that default region is INDIA and current year in 2026.
	If answering a follow-up question, use insights from previous messages to provide better recommendations.""")

	# Build user message with chat history context
	user_content = f"""
	{chat_history}

	Current Query: {query}

	Context chunks:
	{context_chunks}

	Please provide a helpful answer with confidence level (0-100%)."""

	user_msg = HumanMessage(content=user_content)

	response = llm_large.invoke([system_msg, user_msg])
	print(f"✅ Answer generated successfully")
	return response.content
	except Exception as e:
	attempt += 1
	error_msg = str(e)
	print(f"⚠️ Attempt {attempt} failed: {error_msg[:80]}")

	if attempt < max_attempts:
	wait_time = 5 * attempt # 5s, 10s, 15s
	print(f" ⏳ Retrying in {wait_time}s...")
	import time
	time.sleep(wait_time)
	else:
	print(f"❌ Failed after {max_attempts} attempts")
	return f"Error generating answer: {error_msg[:100]}"

	# --- 5. AGENT FUNCTIONS WITH TOOLS ---


	def run_decision_agent(query: str, chat_history: List = None, progress_callback=None) -> Tuple[bool, str]:
	"""
	Decision Agent - Decides if web search is needed
	Returns: (needs_search: bool, reasoning: str)
	"""
	print(f"\n{'='70}\n🤔 DECISION AGENT\n{'='70}")

	if chat_history is None:
	chat_history = []

	# Check if we have previous answers
	if not chat_history or len(chat_history) <= 1:
	print("✅ First question - will perform web search")
	return True, "First query - need fresh data"

	# Build conversation context from chat history
	context = "Previous Conversation:\n"
	for msg in chat_history[-6:]: # Last 3 exchanges
	role = msg.get("role", "").upper()
	content = msg.get("content", "")[:200] # First 200 chars
	context += f"{role}: {content}\n"

	# Decision prompt
	system_msg = SystemMessage(content="""You are a decision agent for e-commerce queries.

	Analyze if the current query can be answered from PREVIOUS conversation context or if it needs NEW web search.

	Rules:
	1. If query asks about SPECIFIC products/prices/comparisons from before → NO search needed, use previous context
	2. If query asks for DIFFERENT products or NEW information → YES search needed
	3. If query is a follow-up question about something already discussed → NO search needed
	4. If unclear or asking for LATEST data → YES search needed

	Respond with ONLY: "SEARCH" or "CONTEXT"
	""")

	user_msg = HumanMessage(content=f"""{context}

	Current query: {query}

	Decision: (respond with ONLY "SEARCH" or "CONTEXT")""")

	try:
	response = llm_small.invoke([system_msg, user_msg])
	decision = response.content.strip().upper()

	needs_search = "SEARCH" in decision
	reasoning = f"Decision: {'Will search web' if needs_search else 'Will use previous context'}"
	print(f"✅ {reasoning}")

	return needs_search, reasoning
	except Exception as e:
	print(f"⚠️ Decision error: {str(e)[:50]}, defaulting to search")
	return True, f"Error in decision (default to search): {str(e)[:30]}"

	def run_search_agent(state: EcommerceAgentState, progress_callback=None) -> EcommerceAgentState:
	"""Search Agent - Find products using Tavily + scrape content"""
	try:
	print(f"\n{'='70}\n🔍 SEARCH AGENT\n{'='70}")
	if progress_callback:
	progress_callback("🔍 Searching for products...")

	query = state["query"]

	# Step 1: Generate search queries (with timeout fallback)
	system_msg = SystemMessage(content="""Generate 3-4 search variations for an e-commerce query.
	Focus on product names, prices, availability. Remember that the default region is INDIA and current year is 2026.
	Return ONLY a JSON array of strings.""")

	user_msg = HumanMessage(content=f"Query: {query}\n\nReturn search variations as JSON array:")

	try:
	response = llm_small.invoke([system_msg, user_msg])
	search_queries = [query]
	try:
	json_match = re.search(r'\[.*\]', response.content, re.DOTALL)
	if json_match:
	search_queries.extend(json.loads(json_match.group()))
	except:
	search_queries.extend([f"{query} price", f"{query} buy online"])
	except Exception as e:
	print(f"⚠️ LLM timeout/error, using default queries: {str(e)[:50]}")
	search_queries = [query, f"{query} price", f"{query} buy online"]

	search_queries = search_queries[:5]
	state["progress_log"].append(f"✅ Generated {len(search_queries)} search queries")
	print(f"Search queries: {search_queries}")

	# Step 2: Broad Tavily search (will filter with Curator Agent after)
	print("📥 Searching broadly with Tavily...")

	tavily_search = TavilySearchResults(
	max_results=15, # More results for better diversity
	api_key=TAVILY_API_KEY,
	search_depth="advanced"
	)
	all_results = []

	for sq in search_queries[:3]: # Limit to 3 to avoid too many API calls
	try:
	print(f" 🔍 Query: '{sq[:40]}...'")
	results = tavily_search.invoke(sq)
	all_results.extend(results)
	print(f" ✅ Found {len(results)} results")
	except Exception as e:
	print(f" ⚠️ Error: {str(e)[:50]}")
	state["progress_log"].append(f"⚠️ Search error: {str(e)[:50]}")

	# Step 3: Deduplicate results (no domain filtering)
	print(f"🎯 Processing {len(all_results)} results...")
	seen_urls = set()
	seen_domains = {}
	unique_results = []

	for result in all_results:
	# Ensure result is a dictionary
	if not isinstance(result, dict):
	try:
	result = json.loads(result) if isinstance(result, str) else {"url": str(result)}
	except:
	continue

	url = result.get("url", "")
	if url and url not in seen_urls:
	seen_urls.add(url)
	domain = extract_domain(url)
	unique_results.append(result)
	seen_domains[domain] = seen_domains.get(domain, 0) + 1
	print(f" ✅ Added: {domain}")

	print(f"\n📊 Results: {len(unique_results)} URLs from {len(seen_domains)} domains")
	print(f" Domains found: {list(seen_domains.keys())}")

	unique_results = unique_results[:12] # Keep top 12 for diversity
	state["search_results"] = unique_results
	state["stage"] = WorkflowStage.CHUNKING.value
	state["progress_log"].append(f"✅ Search complete: {len(unique_results)} URLs found")
	print(f"✅ Search complete: {len(unique_results)} URLs")

	return state
	except Exception as e:
	print(f"❌ Search agent error: {str(e)}")
	state["error_message"] = f"Search agent failed: {str(e)[:100]}"
	state["stage"] = WorkflowStage.ERROR.value
	return state

	def run_chunking_agent(state: EcommerceAgentState, progress_callback=None) -> EcommerceAgentState:
	"""Chunking Agent - Scrape and chunk documents"""
	try:
	print(f"\n{'='70}\n📄 CHUNKING AGENT\n{'='70}")
	if progress_callback:
	progress_callback("📄 Chunking documents...")

	# Scrape URLs
	cleaned_documents = []
	for i, result in enumerate(state.get("search_results", []), 1):
	url = result.get("url", "")
	if not url:
	continue

	domain = extract_domain(url)
	print(f"📥 Scraping [{i}]: {domain}")

	try:
	content = scrape_product_content.invoke({"url": url})
	if content and "Error" not in content and len(content) > 200:
	cleaned_documents.append({
	"url": url,
	"content": content,
	"source": domain,
	"scraped_at": datetime.now().isoformat()
	})
	print(f" ✅ Extracted {len(content)} chars")
	state["progress_log"].append(f"✅ Scraped: {domain}")
	except Exception as e:
	print(f" ⚠️ Error: {str(e)[:50]}")
	state["progress_log"].append(f"⚠️ Scrape error: {domain}")

	# Chunk documents
	parent_chunks = []
	child_chunks = []
	parent_child_mapping = {}

	for doc in cleaned_documents:
	try:
	chunks_result = chunk_content.invoke({
	"content": doc["content"],
	"url": doc["url"]
	})
	chunks_data = json.loads(chunks_result)

	parent = chunks_data.get("parent", {})
	children = chunks_data.get("children", [])

	if parent:
	parent_chunks.append(parent)
	parent_child_mapping[parent["id"]] = [c["id"] for c in children]

	child_chunks.extend(children)
	except Exception as e:
	print(f" ❌ Chunking error: {str(e)[:50]}")

	state["cleaned_documents"] = cleaned_documents
	state["parent_chunks"] = parent_chunks
	state["child_chunks"] = child_chunks
	state["parent_child_mapping"] = parent_child_mapping
	state["stage"] = WorkflowStage.INDEXING.value
	state["progress_log"].append(f"✅ Chunking: {len(parent_chunks)} parents, {len(child_chunks)} children")
	print(f"✅ Chunking complete: {len(parent_chunks)} parents, {len(child_chunks)} children")

	return state
	except Exception as e:
	state["error_message"] = f"Chunking agent failed: {str(e)}"
	state["stage"] = WorkflowStage.ERROR.value
	return state

	def run_indexing_agent(state: EcommerceAgentState, progress_callback=None) -> EcommerceAgentState:
	"""Indexing Agent - Index chunks in FAISS"""
	try:
	print(f"\n{'='70}\n🗂️ INDEXING AGENT\n{'='70}")
	if progress_callback:
	progress_callback("🗂️ Indexing chunks...")

	# Combine all chunks
	all_chunks = state.get("parent_chunks", []) + state.get("child_chunks", [])

	if not all_chunks:
	state["progress_log"].append("⚠️ No chunks to index")
	state["stage"] = WorkflowStage.RETRIEVAL.value
	return state

	# Generate embeddings using tool
	chunks_json = json.dumps({
	"parent": state["parent_chunks"][0] if state["parent_chunks"] else {},
	"children": state["child_chunks"]
	})

	result = generate_embeddings_and_index.invoke({"chunks_json": chunks_json})
	index_result = json.loads(result)

	state["stage"] = WorkflowStage.RETRIEVAL.value
	state["progress_log"].append(f"✅ Indexed {index_result.get('chunks_indexed', 0)} chunks")
	print(f"✅ Indexing complete")

	return state
	except Exception as e:
	state["error_message"] = f"Indexing agent failed: {str(e)}"
	state["stage"] = WorkflowStage.ERROR.value
	return state

	def run_retrieval_agent(state: EcommerceAgentState, progress_callback=None) -> EcommerceAgentState:
	"""Retrieval Agent - Find similar chunks"""
	try:
	print(f"\n{'='70}\n🔎 RETRIEVAL AGENT\n{'='70}")
	if progress_callback:
	progress_callback("🔎 Retrieving similar content...")

	query = state["query"]

	# Check if FAISS has any chunks
	if not faiss_index or faiss_index.ntotal == 0:
	print("⚠️ FAISS index is empty - no chunks to retrieve")
	state["retrieved_children"] = []
	state["progress_log"].append("⚠️ FAISS index empty")
	else:
	# Retrieve chunks
	result_json = retrieve_similar_chunks.invoke({"query_text": query, "k": 5})

	if isinstance(result_json, dict):
	results = result_json
	else:
	results = json.loads(result_json)

	retrieved_children = []
	for doc_id, doc_text, metadata, dist in zip(
	results.get("ids", [[]])[0],
	results.get("documents", [[]])[0],
	results.get("metadatas", [[]])[0],
	results.get("distances", [[]])[0]
	):
	retrieved_children.append({
	"id": doc_id,
	"document": doc_text,
	"metadata": metadata,
	"distance": float(dist)
	})

	state["retrieved_children"] = retrieved_children
	state["progress_log"].append(f"✅ Retrieved {len(retrieved_children)} chunks")
	print(f"✅ Retrieval complete: {len(retrieved_children)} chunks")

	state["stage"] = WorkflowStage.SYNTHESIS.value
	return state
	except Exception as e:
	print(f"❌ Retrieval error: {str(e)}")
	state["error_message"] = f"Retrieval agent failed: {str(e)}"
	state["stage"] = WorkflowStage.ERROR.value
	return state

	def run_synthesis_agent(state: EcommerceAgentState, progress_callback=None) -> EcommerceAgentState:
	"""Synthesis Agent - Generate answer with robust error handling"""
	try:
	print(f"\n{'='70}\n💡 SYNTHESIS AGENT\n{'='70}")
	if progress_callback:
	progress_callback("💡 Generating answer...")

	query = state["query"]
	retrieved = state.get("retrieved_children", [])
	chat_messages = state.get("chat_messages", [])

	# Prepare context - handle empty case
	if retrieved:
	context = "\n\n".join([
	f"[{i+1}] {chunk.get('metadata', {}).get('source', 'Unknown')}\n{chunk.get('document', '')}"
	for i, chunk in enumerate(retrieved[:5])
	])
	confidence = min(0.9, len(retrieved) / 10.0)
	else:
	context = "[No relevant information found in knowledge base]"
	confidence = 0.1

	# Build chat history context
	chat_context = ""
	if chat_messages and len(chat_messages) > 1:
	chat_context = "Previous Conversation:\n"
	for msg in chat_messages[:-1]: # All except current query
	role = msg.get("role", "").upper()
	content = msg.get("content", "")
	chat_context += f"{role}: {content[:300]}\n\n" # First 300 chars of each message

	# Generate answer with retry logic built-in
	answer = None
	max_retries = 3
	for retry_attempt in range(max_retries):
	try:
	print(f" ⏳ Generating (attempt {retry_attempt + 1}/{max_retries})...")
	answer = generate_answer(query, context, chat_context)

	# Check if answer is an error
	if answer and not answer.startswith("Error"):
	print(f" ✅ Answer generated successfully")
	break
	elif answer and answer.startswith("Error"):
	print(f" ⚠️ {answer[:80]}")
	if retry_attempt < max_retries - 1:
	wait = 5 * (retry_attempt + 1)
	print(f" ⏳ Waiting {wait}s before retry...")
	import time
	time.sleep(wait)
	else:
	print(f" ❌ Max retries reached")
	except Exception as gen_err:
	print(f" ⚠️ Generation error: {str(gen_err)[:80]}")
	if retry_attempt < max_retries - 1:
	wait = 5 * (retry_attempt + 1)
	print(f" ⏳ Waiting {wait}s before retry...")
	import time
	time.sleep(wait)
	else:
	answer = f"Error generating answer: {str(gen_err)[:100]}"

	if not answer:
	answer = f"Error generating answer: Unable to generate response after {max_retries} attempts"

	# Extract citations from retrieved chunks
	citations = [
	{
	"source": chunk.get("metadata", {}).get("source", "Unknown"),
	"section": "Content",
	"url": chunk.get("metadata", {}).get("source", "")
	}
	for chunk in retrieved[:3]
	]

	state["final_answer"] = answer
	state["citations"] = citations
	state["confidence"] = confidence
	state["stage"] = WorkflowStage.COMPLETE.value
	state["progress_log"].append(f"✅ Synthesis complete: Confidence={confidence:.2f}")
	print(f"✅ Synthesis complete")

	return state
	except Exception as e:
	print(f"❌ Synthesis error: {str(e)}")
	state["error_message"] = f"Synthesis agent failed: {str(e)}"
	state["stage"] = WorkflowStage.ERROR.value
	return state

	# --- 6. WORKFLOW ORCHESTRATION ---

	def run_complete_workflow(query: str, chat_history: List = None, progress=gr.Progress()) -> EcommerceAgentState:
	"""Run the complete tool-based agentic pipeline with intelligent context"""
	if chat_history is None:
	chat_history = []

	state = create_initial_state(query)
	state["chat_messages"] = chat_history # Add chat history to state

	def update_progress(msg: str):
	progress(0.2, desc=msg)

	# Step 1: Decision Agent - Determine if search is needed
	needs_search, decision_reason = run_decision_agent(query, chat_history, update_progress)
	state["progress_log"].append(f"🤔 {decision_reason}")

	# Step 2: Conditionally run search agents
	if needs_search:
	progress(0.25, desc="🔍 Searching...")
	state = run_search_agent(state, update_progress)
	if state["stage"] == WorkflowStage.ERROR.value:
	return state
	progress(0.35, desc="📄 Chunking...")

	state = run_chunking_agent(state, update_progress)
	if state["stage"] == WorkflowStage.ERROR.value:
	return state
	progress(0.5, desc="🗂️ Indexing...")

	state = run_indexing_agent(state, update_progress)
	if state["stage"] == WorkflowStage.ERROR.value:
	return state
	else:
	# Skip search/chunking/indexing, go directly to retrieval
	print("📚 Reusing indexed data from previous query...")
	state["progress_log"].append("📚 Using existing indexed knowledge")
	state["stage"] = WorkflowStage.RETRIEVAL.value

	# Step 3: Retrieval and Synthesis (always run)
	progress(0.65, desc="🔎 Retrieving...")
	state = run_retrieval_agent(state, update_progress)
	if state["stage"] == WorkflowStage.ERROR.value:
	return state
	progress(0.85, desc="💡 Generating answer...")

	state = run_synthesis_agent(state, update_progress)
	progress(1.0, desc="✅ Complete")

	# Add to conversation history
	add_to_conversation_history(query, state["final_answer"])

	return state

	# --- 7. GRADIO INTERFACE ---#

	def chat_interface(message: str, chat_history: List) -> Tuple[str, List]:
	"""Chat interface for the e-commerce RAG system"""
	if not message or message.strip() == "":
	return "", chat_history

	# Add user message to history (new format: dict with role and content)
	chat_history.append({"role": "user", "content": message})

	# Run workflow with full chat history
	state = run_complete_workflow(message, chat_history=chat_history)

	# Build assistant response
	assistant_response = f"""🤖 Answer
	{state['final_answer']}

	Confidence: {state['confidence']*100:.0f}%

	---

	📚 Sources:
	"""
	for cite in state["citations"][:3]:
	assistant_response += f"- {cite['source']}\n"

	assistant_response += f"\n---\n\n📋 Process Log:\n"
	for log in state["progress_log"]:
	assistant_response += f"✓ {log}\n"

	if state["error_message"]:
	assistant_response += f"\n⚠️ Error: {state['error_message']}"

	# Update last message with assistant response (new format)
	chat_history.append({"role": "assistant", "content": assistant_response})

	return "", chat_history

	# Build Gradio interface
	with gr.Blocks(title="🛍️ E-commerce Chat RAG") as demo:
	gr.Markdown("""
	# 🛍️ E-commerce Chat RAG System


	Ask any question about e-commerce products and prices!

	Agents: 🔍 Search → 📄 Chunk → 🗂️ Index → 🔎 Retrieve → 💡 Synthesize
	""")

	chatbot = gr.Chatbot(
	label="💬 Chat",
	height=600
	)

	with gr.Row():
	msg = gr.Textbox(
	label="Your Question",
	placeholder="e.g., What is the current price of iPhone 14?",
	lines=2,
	scale=4
	)
	submit_btn = gr.Button("Send 📤", variant="primary", scale=1)

	# Chat submission
	submit_btn.click(
	fn=chat_interface,
	inputs=[msg, chatbot],
	outputs=[msg, chatbot],
	queue=True
	)

	# Allow Enter key to submit
	msg.submit(
	fn=chat_interface,
	inputs=[msg, chatbot],
	outputs=[msg, chatbot],
	queue=True
	)

	if __name__ == "__main__":
	try:
	# For HF Spaces: suppress asyncio event loop cleanup warnings
	demo.launch(share=False, show_error=True)
	finally:
	# Clean up event loop on shutdown
	try:
	loop = asyncio.get_event_loop()
	if loop.is_running():
	loop.stop()
	loop.close()
	except Exception:
	pass