Spaces:

hamxaameer
/

OutfitOrbit-Chatbot-Assistant

Running

App Files Files Community

hamxaameer commited on 6 days ago

Commit

a32c4ca

verified ·

1 Parent(s): c993f47

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -247

app.py CHANGED Viewed

@@ -28,8 +28,8 @@ logger = logging.getLogger(__name__)
 CONFIG = {
     "embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
-    "llm_model": None,  # Will be set during initialization
-    "vector_store_path": ".",  # Root directory (files are in root on HF Spaces)
     "top_k": 15,
     "temperature": 0.75,
     "max_tokens": 350,
@@ -40,34 +40,24 @@ CONFIG = {
 # ============================================================================
 def initialize_llm():
-    """Initialize free local LLM with transformers pipeline"""
     logger.info("🔄 Initializing FREE local language model...")
-    # Use FLAN-T5-Large - reliable, fast, and proven to work
     model_name = "google/flan-t5-large"
     try:
         logger.info(f"   Loading {model_name}...")
         device = 0 if torch.cuda.is_available() else -1
-        # T5 configuration
-        task = "text2text-generation"
-        model_type = "t5"
-        # Optimized for speed and quality
-        model_kwargs = {
-            "low_cpu_mem_usage": True,
-        }
         llm_client = pipeline(
-            task,
             model=model_name,
             device=device,
             model_kwargs=model_kwargs
         )
         CONFIG["llm_model"] = model_name
-        CONFIG["model_type"] = model_type
         logger.info(f"✅ LLM initialized: {model_name}")
         logger.info(f"   Device: {'GPU' if device == 0 else 'CPU'}")
         return llm_client
@@ -77,7 +67,6 @@ def initialize_llm():
         raise Exception(f"Failed to initialize LLM: {str(e)}")
 def initialize_embeddings():
-    """Initialize sentence transformer embeddings"""
     logger.info("🔄 Initializing embeddings model...")
     embeddings = HuggingFaceEmbeddings(
@@ -90,28 +79,22 @@ def initialize_embeddings():
     return embeddings
 def load_vector_store(embeddings):
-    """Load FAISS vector store with Pydantic monkey-patch"""
     logger.info("🔄 Loading FAISS vector store...")
     vector_store_path = CONFIG["vector_store_path"]
-    # Check for required FAISS files
     index_file = os.path.join(vector_store_path, "index.faiss")
     pkl_file = os.path.join(vector_store_path, "index.pkl")
     if not os.path.exists(index_file):
-        logger.error(f"❌ index.faiss not found at {index_file}")
         raise FileNotFoundError(f"FAISS index file not found: {index_file}")
     if not os.path.exists(pkl_file):
-        logger.error(f"❌ index.pkl not found at {pkl_file}")
         raise FileNotFoundError(f"FAISS metadata file not found: {pkl_file}")
     logger.info(f"✅ Found index.faiss ({os.path.getsize(index_file)/1024/1024:.2f} MB)")
     logger.info(f"✅ Found index.pkl ({os.path.getsize(pkl_file)/1024:.2f} KB)")
     try:
-        # Try standard loading first
         vectorstore = FAISS.load_local(
             vector_store_path,
             embeddings,
@@ -120,33 +103,25 @@ def load_vector_store(embeddings):
         logger.info(f"✅ FAISS vector store loaded successfully")
         return vectorstore
-    except (KeyError, AttributeError, Exception) as e:
         logger.warning(f"⚠️ Pydantic compatibility issue: {str(e)[:100]}")
         logger.info("🔄 Applying Pydantic monkey-patch and retrying...")
-        # STEP 1: Monkey-patch Pydantic to handle missing __fields_set__
         try:
             import pydantic.v1.main as pydantic_main
-            # Save original __setstate__
             original_setstate = pydantic_main.BaseModel.__setstate__
             def patched_setstate(self, state):
-                """Patched __setstate__ that handles missing __fields_set__"""
-                # Add missing __fields_set__ if not present
                 if '__fields_set__' not in state:
                     state['__fields_set__'] = set(state.get('__dict__', {}).keys())
-                # Call original
                 return original_setstate(self, state)
-            # Apply patch
             pydantic_main.BaseModel.__setstate__ = patched_setstate
             logger.info("   ✅ Pydantic monkey-patch applied")
         except Exception as patch_error:
             logger.warning(f"   ⚠️ Pydantic patch failed: {patch_error}")
-        # STEP 2: Try loading again with patch
         try:
             vectorstore = FAISS.load_local(
                 vector_store_path,
@@ -158,44 +133,30 @@ def load_vector_store(embeddings):
         except Exception as e2:
             logger.error(f"   ✗ Still failed after patch: {str(e2)[:100]}")
-            # STEP 3: Last resort - manual reconstruction
             logger.info("🔄 Using manual reconstruction (last resort)...")
             import faiss
-            import pickle
             from langchain_community.docstore.in_memory import InMemoryDocstore
-            # Load FAISS index
             index = faiss.read_index(index_file)
             logger.info(f"   ✅ FAISS index loaded")
-            # Load pickle with raw binary parsing
             with open(pkl_file, "rb") as f:
-                import io
-                import struct
-                # Read raw bytes
                 raw_bytes = f.read()
                 logger.info(f"   Read {len(raw_bytes)} bytes from pickle")
-                # Try to extract text content directly (bypass Pydantic completely)
-                # This is a fallback that extracts document strings
-                import re
-                # Find all text patterns that look like documents
                 text_pattern = rb'([A-Za-z0-9\s\.\,\;\:\!\?\-\'\"\(\)]{50,})'
                 matches = re.findall(text_pattern, raw_bytes)
                 if len(matches) > 100:
                     logger.info(f"   Found {len(matches)} potential document fragments")
-                    # Create documents from extracted text
                     documents = []
-                    for idx, match in enumerate(matches[:5000]):  # Use first 5000 quality matches
                         try:
                             content = match.decode('utf-8', errors='ignore').strip()
-                            if len(content) >= 100:  # Only high-quality, substantial content
                                 doc = Document(
                                     page_content=content,
                                     metadata={"source": "reconstructed", "id": idx}
@@ -210,7 +171,6 @@ def load_vector_store(embeddings):
                     logger.info(f"   ✅ Extracted {len(documents)} high-quality documents")
                     logger.info(f"   🔄 Rebuilding FAISS index from scratch...")
-                    # Create NEW FAISS index from documents (ignore old corrupted index)
                     vectorstore = FAISS.from_documents(
                         documents=documents,
                         embedding=embeddings
@@ -230,20 +190,15 @@ def retrieve_knowledge_langchain(
     vectorstore,
     top_k: int = 15
 ) -> Tuple[List[Document], float]:
-    """
-    Retrieve relevant documents using LangChain FAISS with query expansion
-    """
     logger.info(f"🔍 Retrieving knowledge for: '{query}'")
-    # Create query variants for better coverage
     query_variants = [
-        query,  # Original
-        f"fashion advice clothing outfit style for {query}",  # Semantic expansion
     ]
     all_docs = []
-    # Retrieve for each variant
     for variant in query_variants:
         try:
             docs_and_scores = vectorstore.similarity_search_with_score(variant, k=top_k)
@@ -257,23 +212,18 @@ def retrieve_knowledge_langchain(
         except Exception as e:
             logger.error(f"Retrieval error for variant '{variant}': {e}")
-    # Deduplicate by content
     unique_docs = {}
     for doc in all_docs:
         content_key = doc.page_content[:100]
         if content_key not in unique_docs:
             unique_docs[content_key] = doc
         else:
-            # Keep document with higher similarity
             if doc.metadata.get('similarity', 0) > unique_docs[content_key].metadata.get('similarity', 0):
                 unique_docs[content_key] = doc
     final_docs = list(unique_docs.values())
-    # Sort by similarity
     final_docs.sort(key=lambda x: x.metadata.get('similarity', 0), reverse=True)
-    # Calculate confidence
     if final_docs:
         avg_similarity = sum(d.metadata.get('similarity', 0) for d in final_docs) / len(final_docs)
         confidence = min(avg_similarity, 1.0)
@@ -290,64 +240,50 @@ def generate_llm_answer(
     llm_client,
     attempt: int = 1
 ) -> Optional[str]:
-    """
-    Generate answer using local LLM with retrieved context
-    """
     if not llm_client:
         logger.error("  → LLM client not initialized")
         return None
-    # Build focused context with relevance filtering
     query_lower = query.lower()
     query_words = set(query_lower.split())
-    # ANTI-HALLUCINATION: Filter for fashion-relevant documents only
-    fashion_terms = {'wear', 'outfit', 'style', 'fashion', 'clothing', 'color', 'dress', 'fabric'}
     scored_docs = []
     for doc in retrieved_docs[:20]:
         content = doc.page_content.lower()
         doc_words = set(content.split())
-        # Check if document contains fashion terms
-        has_fashion = any(term in content for term in fashion_terms)
-        if not has_fashion:
-            continue  # Skip non-fashion documents
         overlap = len(query_words.intersection(doc_words))
-        # Boost for verified/curated
         if doc.metadata.get('verified', False):
             overlap += 10
-        # Boost for longer content
         if len(doc.page_content) > 200:
             overlap += 3
         scored_docs.append((doc, overlap))
-    # If no fashion-relevant docs found, return None
-    if not scored_docs:
-        logger.warning("  ⚠️ No fashion-relevant documents found")
-        return None
-    # Sort and take top 8
-    # Optimized parameters for 2-attempt strategy
     if attempt == 1:
         temperature = 0.75
         max_tokens = 350
         top_p = 0.92
         repetition_penalty = 1.15
-    else:  # attempt == 2
         temperature = 0.85
         max_tokens = 450
         top_p = 0.94
         repetition_penalty = 1.2
-        temperature = 0.75
-        max_new_tokens = 300
-        top_p = 0.92
-        repetition_penalty = 1.25
-    # T5 format - simple and effective for good answers
     user_prompt = f"""Answer this fashion question with detailed, specific advice using the context provided.
 Question: {query}
@@ -356,73 +292,60 @@ Fashion Context:
 {context_text[:1500]}
 Provide a complete, detailed answer (150-250 words):"""
-        repetition_penalty = 1.35
-    # Create COMPACT T5 prompt to stay under 512 tokens (critical!)
-    model_type = CONFIG.get("model_type", "t5")
-    # T5 format - with explicit constraints to prevent hallucination
-    user_prompt = f"""You are a fashion expert. Answer ONLY about fashion, clothing, and style.
-Question: {query}
-Fashion Knowledge:
-{context_text[:600]}
-Rules:
-- Answer ONLY using the fashion knowledge provided
-- Focus on clothing, outfits, colors, fabrics, and styling
-- DO NOT mention: politics, history, wars, empires, architecture
-- If unsure, say "I don't have enough information"
-Fashion Answer:"""
     try:
-        logger.info(f"  → Calling {CONFIG['llm_model']} (temp={temperature}, tokens={max_new_tokens})...")
-        # T5 optimized for SPEED on CPU - use greedy decoding (num_beams=1)
         output = llm_client(
             user_prompt,
-            max_new_tokens=max_new_tokens,
-            min_new_tokens=80,  # Lower minimum for faster completion
-            temperature=temperature,
-            top_p=top_p,
             do_sample=True,
-            num_beams=1,  # Greedy decoding for 4x faster speed on CPU
-            repetition_penalty=repetition_penalty,
-            early_stopping=True,
-            no_repeat_ngram_size=3,
-            truncation=True  # CRITICAL: Truncate input if too long
         )
-        # Extract generated text
         response = output[0]['generated_text'].strip()
         if not response:
             logger.warning(f"  ✗ Empty response (attempt {attempt})")
             return None
-        # Validation - accept responses with meaningful content
-        if len(response) < 80:
-            logger.warning(f"  ✗ Response too short: {len(response)} chars (need 80+)")
             return None
-        # Check for apologies/refusals
         apology_phrases = ["i cannot", "i can't", "i'm sorry", "i apologize", "i don't have"]
         if any(phrase in response.lower()[:100] for phrase in apology_phrases):
             logger.warning(f"  ✗ Apology detected")
             return None
-        # Log response length and word count
-        word_count = len(response.split())
-        logger.info(f"  ✅ Generated answer ({len(response)} chars, {word_count} words)")
         return response
     except Exception as e:
     if not retrieved_docs:
         return "I couldn't find relevant information to answer your question."
-    # Step 2: Try LLM generation (2 fast attempts for efficiency)
     llm_answer = None
     for attempt in range(1, 3):
         logger.info(f"\n  🤖 LLM Generation Attempt {attempt}/2")
@@ -434,33 +357,6 @@ Fashion Answer:"""
         else:
             logger.warning(f"  → Attempt {attempt}/2 failed, retrying...")
-    # Step 3: If all attempts fail, return error
-    if not llm_answer:
-        logger.error(f"  ✗ All 2 LLM attempts failed")
-        return "I apologize, but I'm having trouble generating a response. Please try rephrasing your question or ask something else."
-    return llm_answeronfidence = retrieve_knowledge_langchain(
-        query,
-        vectorstore,
-        top_k=CONFIG["top_k"]
-    )
-def fashion_chatbot(message: str, history: List[List[str]]):
-    """
-    Chatbot function for Gradio interface with streaming
-    """
-    try:
-        if not message or not message.strip():
-            yield "Please ask a fashion-related question!"
-            return
-        # Show searching indicator
-        yield "🔍 Searching fashion knowledge..."d successfully")
-            break
-        else:
-            logger.warning(f"  → Attempt {attempt}/2 failed, retrying...")
-    # Step 3: If all attempts fail, return error
     if not llm_answer:
         logger.error(f"  ✗ All 2 LLM attempts failed")
         return "I apologize, but I'm having trouble generating a response. Please try rephrasing your question or ask something else."
@@ -471,54 +367,14 @@ def fashion_chatbot(message: str, history: List[List[str]]):
 # GRADIO INTERFACE
 # ============================================================================
-        # Generate answer with 2 fast attempts
-        llm_answer = None
-        for attempt in range(1, 3):
-            logger.info(f"\n  🤖 LLM Generation Attempt {attempt}/2")
-            llm_answer = generate_llm_answer(message.strip(), retrieved_docs, llm_client, attempt)
-            if llm_answer:
-                break
-        # If LLM fails, show error
-        if not llm_answer:
-            logger.error(f"  ✗ All LLM attempts failed")
-            yield "I apologize, but I'm having trouble generating a response. Please try rephrasing your question."
-            return', 'match', 'look', 'shirt', 'pants', 'shoes', 'accessory',
-            'wardrobe', 'fit', 'fabric', 'pattern', 'casual', 'formal', 'seasonal',
-            'wedding', 'meeting', 'interview', 'date', 'party', 'jeans', 'suit',
-            'skirt', 'jacket', 'coat', 'sweater', 'blouse', 'tie', 'scarf', 'boots',
-            'hat', 'bag', 'purse', 'jewelry', 'necklace', 'bracelet', 'watch'
-        ]
-        # Reject obviously non-fashion questions FIRST (higher priority)
-        non_fashion_indicators = [
-            'crisis', 'collapse', 'empire', 'war', 'politics', 'economy',
-            'architecture', 'building', 'nebula', 'space', 'republic',
-            'soviet', 'ottoman', 'history', 'government', 'president', 'designed',
-            'architect', 'eastern', 'western', 'communist', 'russia', 'political',
-            'military', 'sapphire crisis', 'who designed', 'what caused'
-        ]
-        has_non_fashion = any(indicator in query_lower for indicator in non_fashion_indicators)
-        # STRICT CHECK: If non-fashion detected, reject immediately
-        if has_non_fashion:
-            logger.info(f"❌ Non-fashion query rejected: {message.strip()}")
-            yield "I'm a fashion advisor and can only answer questions about clothing, style, and fashion. Please ask me about outfits, styling, colors, or wardrobe advice!"
-            return
-        # Check if query contains fashion keywords
-        is_fashion_query = any(keyword in query_lower for keyword in fashion_keywords)
-        if not is_fashion_query:
-            yield "I'm a fashion advisor and can only answer questions about clothing, style, and fashion. Please ask me about outfits, styling, colors, or wardrobe advice!"
             return
-        # Show searching indicator (only for valid fashion queries)
         yield "🔍 Searching fashion knowledge..."
-        # Retrieve documents (only after validation passes)
         retrieved_docs, confidence = retrieve_knowledge_langchain(
             message.strip(),
             vectorstore,
@@ -529,54 +385,21 @@ def fashion_chatbot(message: str, history: List[List[str]]):
             yield "I couldn't find relevant information to answer your question."
             return
-        # ANTI-HALLUCINATION: Check retrieval quality
-        if confidence < 0.35:
-            yield "I don't have enough reliable information about this specific topic. Could you rephrase or ask about common fashion topics like outfit recommendations, color matching, or styling advice?"
-            return
-        # Show generating indicator
         yield f"💭 Generating answer ({len(retrieved_docs)} sources found)..."
-        # Generate answer with 2 quick attempts
         llm_answer = None
         for attempt in range(1, 3):
             logger.info(f"\n  🤖 LLM Generation Attempt {attempt}/2")
             llm_answer = generate_llm_answer(message.strip(), retrieved_docs, llm_client, attempt)
             if llm_answer:
-                # ANTI-HALLUCINATION: Validate answer relevance
-                answer_lower = llm_answer.lower()
-                # Check for hallucination indicators
-                hallucination_markers = [
-                    'empire', 'ottoman', 'soviet', 'russia', 'collapse', 'crisis',
-                    'republic', 'communist', 'nebula', 'architecture', 'political',
-                    'government', 'war', 'military', 'economic'
-                ]
-                has_hallucination = any(marker in answer_lower for marker in hallucination_markers)
-                # Check if answer contains fashion terms
-                fashion_terms = [
-                    'wear', 'outfit', 'style', 'clothing', 'fabric', 'color',
-                    'match', 'fit', 'look', 'fashion', 'dress', 'suit'
-                ]
-                has_fashion_content = any(term in answer_lower for term in fashion_terms)
-                if has_hallucination or not has_fashion_content:
-                    logger.warning(f"  ⚠️ Hallucination detected in attempt {attempt}, retrying...")
-                    llm_answer = None
-                    continue
-                else:
-                    break
-        # If LLM fails, show error
         if not llm_answer:
-            logger.error(f"  ✗ All LLM attempts failed or produced hallucinations")
-            yield "I apologize, but I'm having trouble generating a reliable fashion answer. Please ask about specific fashion topics like outfit recommendations, color coordination, or styling tips."
             return
-        # Stream the answer word by word for natural flow
         import time
         words = llm_answer.split()
         displayed_text = ""
@@ -584,10 +407,9 @@ def fashion_chatbot(message: str, history: List[List[str]]):
         for i, word in enumerate(words):
             displayed_text += word + " "
-            # Yield every 3 words for smooth streaming
             if i % 3 == 0 or i == len(words) - 1:
                 yield displayed_text.strip()
-                time.sleep(0.05)  # Small delay for natural flow
     except Exception as e:
         logger.error(f"Error in chatbot: {e}")
@@ -597,32 +419,23 @@ def fashion_chatbot(message: str, history: List[List[str]]):
 # INITIALIZE AND LAUNCH
 # ============================================================================
-# Global variables
 llm_client = None
 embeddings = None
 vectorstore = None
 def startup():
-    """Initialize all models and load vector store"""
     global llm_client, embeddings, vectorstore
     logger.info("🚀 Starting Fashion Advisor RAG...")
-    # Initialize embeddings
     embeddings = initialize_embeddings()
-    # Load vector store
     vectorstore = load_vector_store(embeddings)
-    # Initialize LLM
     llm_client = initialize_llm()
     logger.info("✅ All components initialized successfully!")
-# Initialize on startup
 startup()
-# Create Gradio interface - simple version compatible with all Gradio versions
 demo = gr.ChatInterface(
     fn=fashion_chatbot,
     title="👗 Fashion Advisor - RAG System",
@@ -647,6 +460,5 @@ I can help with:
     ],
 )
-# Launch
 if __name__ == "__main__":
     demo.launch()

 CONFIG = {
     "embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
+    "llm_model": None,
+    "vector_store_path": ".",
     "top_k": 15,
     "temperature": 0.75,
     "max_tokens": 350,
 # ============================================================================
 def initialize_llm():
     logger.info("🔄 Initializing FREE local language model...")
     model_name = "google/flan-t5-large"
     try:
         logger.info(f"   Loading {model_name}...")
         device = 0 if torch.cuda.is_available() else -1
+        model_kwargs = {"low_cpu_mem_usage": True}
         llm_client = pipeline(
+            "text2text-generation",
             model=model_name,
             device=device,
             model_kwargs=model_kwargs
         )
         CONFIG["llm_model"] = model_name
+        CONFIG["model_type"] = "t5"
         logger.info(f"✅ LLM initialized: {model_name}")
         logger.info(f"   Device: {'GPU' if device == 0 else 'CPU'}")
         return llm_client
         raise Exception(f"Failed to initialize LLM: {str(e)}")
 def initialize_embeddings():
     logger.info("🔄 Initializing embeddings model...")
     embeddings = HuggingFaceEmbeddings(
     return embeddings
 def load_vector_store(embeddings):
     logger.info("🔄 Loading FAISS vector store...")
     vector_store_path = CONFIG["vector_store_path"]
     index_file = os.path.join(vector_store_path, "index.faiss")
     pkl_file = os.path.join(vector_store_path, "index.pkl")
     if not os.path.exists(index_file):
         raise FileNotFoundError(f"FAISS index file not found: {index_file}")
     if not os.path.exists(pkl_file):
         raise FileNotFoundError(f"FAISS metadata file not found: {pkl_file}")
     logger.info(f"✅ Found index.faiss ({os.path.getsize(index_file)/1024/1024:.2f} MB)")
     logger.info(f"✅ Found index.pkl ({os.path.getsize(pkl_file)/1024:.2f} KB)")
     try:
         vectorstore = FAISS.load_local(
             vector_store_path,
             embeddings,
         logger.info(f"✅ FAISS vector store loaded successfully")
         return vectorstore
+    except Exception as e:
         logger.warning(f"⚠️ Pydantic compatibility issue: {str(e)[:100]}")
         logger.info("🔄 Applying Pydantic monkey-patch and retrying...")
         try:
             import pydantic.v1.main as pydantic_main
             original_setstate = pydantic_main.BaseModel.__setstate__
             def patched_setstate(self, state):
                 if '__fields_set__' not in state:
                     state['__fields_set__'] = set(state.get('__dict__', {}).keys())
                 return original_setstate(self, state)
             pydantic_main.BaseModel.__setstate__ = patched_setstate
             logger.info("   ✅ Pydantic monkey-patch applied")
         except Exception as patch_error:
             logger.warning(f"   ⚠️ Pydantic patch failed: {patch_error}")
         try:
             vectorstore = FAISS.load_local(
                 vector_store_path,
         except Exception as e2:
             logger.error(f"   ✗ Still failed after patch: {str(e2)[:100]}")
             logger.info("🔄 Using manual reconstruction (last resort)...")
             import faiss
             from langchain_community.docstore.in_memory import InMemoryDocstore
             index = faiss.read_index(index_file)
             logger.info(f"   ✅ FAISS index loaded")
             with open(pkl_file, "rb") as f:
+                import re
                 raw_bytes = f.read()
                 logger.info(f"   Read {len(raw_bytes)} bytes from pickle")
                 text_pattern = rb'([A-Za-z0-9\s\.\,\;\:\!\?\-\'\"\(\)]{50,})'
                 matches = re.findall(text_pattern, raw_bytes)
                 if len(matches) > 100:
                     logger.info(f"   Found {len(matches)} potential document fragments")
                     documents = []
+                    for idx, match in enumerate(matches[:5000]):
                         try:
                             content = match.decode('utf-8', errors='ignore').strip()
+                            if len(content) >= 100:
                                 doc = Document(
                                     page_content=content,
                                     metadata={"source": "reconstructed", "id": idx}
                     logger.info(f"   ✅ Extracted {len(documents)} high-quality documents")
                     logger.info(f"   🔄 Rebuilding FAISS index from scratch...")
                     vectorstore = FAISS.from_documents(
                         documents=documents,
                         embedding=embeddings
     vectorstore,
     top_k: int = 15
 ) -> Tuple[List[Document], float]:
     logger.info(f"🔍 Retrieving knowledge for: '{query}'")
     query_variants = [
+        query,
+        f"fashion advice clothing outfit style for {query}",
     ]
     all_docs = []
     for variant in query_variants:
         try:
             docs_and_scores = vectorstore.similarity_search_with_score(variant, k=top_k)
         except Exception as e:
             logger.error(f"Retrieval error for variant '{variant}': {e}")
     unique_docs = {}
     for doc in all_docs:
         content_key = doc.page_content[:100]
         if content_key not in unique_docs:
             unique_docs[content_key] = doc
         else:
             if doc.metadata.get('similarity', 0) > unique_docs[content_key].metadata.get('similarity', 0):
                 unique_docs[content_key] = doc
     final_docs = list(unique_docs.values())
     final_docs.sort(key=lambda x: x.metadata.get('similarity', 0), reverse=True)
     if final_docs:
         avg_similarity = sum(d.metadata.get('similarity', 0) for d in final_docs) / len(final_docs)
         confidence = min(avg_similarity, 1.0)
     llm_client,
     attempt: int = 1
 ) -> Optional[str]:
     if not llm_client:
         logger.error("  → LLM client not initialized")
         return None
     query_lower = query.lower()
     query_words = set(query_lower.split())
     scored_docs = []
     for doc in retrieved_docs[:20]:
         content = doc.page_content.lower()
         doc_words = set(content.split())
         overlap = len(query_words.intersection(doc_words))
         if doc.metadata.get('verified', False):
             overlap += 10
         if len(doc.page_content) > 200:
             overlap += 3
         scored_docs.append((doc, overlap))
+    scored_docs.sort(key=lambda x: x[1], reverse=True)
+    top_docs = [doc[0] for doc in scored_docs[:8]]
+    context_parts = []
+    for doc in top_docs:
+        content = doc.page_content.strip()
+        if len(content) > 400:
+            content = content[:400] + "..."
+        context_parts.append(content)
+    context_text = "\n\n".join(context_parts)
     if attempt == 1:
         temperature = 0.75
         max_tokens = 350
         top_p = 0.92
         repetition_penalty = 1.15
+    else:
         temperature = 0.85
         max_tokens = 450
         top_p = 0.94
         repetition_penalty = 1.2
     user_prompt = f"""Answer this fashion question with detailed, specific advice using the context provided.
 Question: {query}
 {context_text[:1500]}
 Provide a complete, detailed answer (150-250 words):"""
     try:
+        logger.info(f"  → Calling {CONFIG['llm_model']} (temp={temperature}, tokens={max_tokens})...")
         output = llm_client(
             user_prompt,
+            max_length=300,
+            temperature=0.75,
+            top_p=0.92,
             do_sample=True,
+            num_beams=2,
+            early_stopping=True
         )
         response = output[0]['generated_text'].strip()
         if not response:
             logger.warning(f"  ✗ Empty response (attempt {attempt})")
             return None
+        if len(response) < 20:
+            logger.warning(f"  ✗ Response too short: {len(response)} chars")
             return None
         apology_phrases = ["i cannot", "i can't", "i'm sorry", "i apologize", "i don't have"]
         if any(phrase in response.lower()[:100] for phrase in apology_phrases):
             logger.warning(f"  ✗ Apology detected")
             return None
+        logger.info(f"  ✅ Generated answer ({len(response)} chars)")
         return response
     except Exception as e:
+        logger.error(f"  ✗ Generation error: {e}")
+        return None
+def generate_answer_langchain(
+    query: str,
+    vectorstore,
+    llm_client
+) -> str:
+    logger.info(f"\n{'='*80}")
+    logger.info(f"Processing query: '{query}'")
+    logger.info(f"{'='*80}")
+    retrieved_docs, confidence = retrieve_knowledge_langchain(
+        query,
+        vectorstore,
+        top_k=CONFIG["top_k"]
+    )
     if not retrieved_docs:
         return "I couldn't find relevant information to answer your question."
     llm_answer = None
     for attempt in range(1, 3):
         logger.info(f"\n  🤖 LLM Generation Attempt {attempt}/2")
         else:
             logger.warning(f"  → Attempt {attempt}/2 failed, retrying...")
     if not llm_answer:
         logger.error(f"  ✗ All 2 LLM attempts failed")
         return "I apologize, but I'm having trouble generating a response. Please try rephrasing your question or ask something else."
 # GRADIO INTERFACE
 # ============================================================================
+def fashion_chatbot(message: str, history: List[List[str]]):
+    try:
+        if not message or not message.strip():
+            yield "Please ask a fashion-related question!"
             return
         yield "🔍 Searching fashion knowledge..."
         retrieved_docs, confidence = retrieve_knowledge_langchain(
             message.strip(),
             vectorstore,
             yield "I couldn't find relevant information to answer your question."
             return
         yield f"💭 Generating answer ({len(retrieved_docs)} sources found)..."
         llm_answer = None
         for attempt in range(1, 3):
             logger.info(f"\n  🤖 LLM Generation Attempt {attempt}/2")
             llm_answer = generate_llm_answer(message.strip(), retrieved_docs, llm_client, attempt)
             if llm_answer:
+                break
         if not llm_answer:
+            logger.error(f"  ✗ All LLM attempts failed")
+            yield "I apologize, but I'm having trouble generating a response. Please try rephrasing your question."
             return
         import time
         words = llm_answer.split()
         displayed_text = ""
         for i, word in enumerate(words):
             displayed_text += word + " "
             if i % 3 == 0 or i == len(words) - 1:
                 yield displayed_text.strip()
+                time.sleep(0.05)
     except Exception as e:
         logger.error(f"Error in chatbot: {e}")
 # INITIALIZE AND LAUNCH
 # ============================================================================
 llm_client = None
 embeddings = None
 vectorstore = None
 def startup():
     global llm_client, embeddings, vectorstore
     logger.info("🚀 Starting Fashion Advisor RAG...")
     embeddings = initialize_embeddings()
     vectorstore = load_vector_store(embeddings)
     llm_client = initialize_llm()
     logger.info("✅ All components initialized successfully!")
 startup()
 demo = gr.ChatInterface(
     fn=fashion_chatbot,
     title="👗 Fashion Advisor - RAG System",
     ],
 )
 if __name__ == "__main__":
     demo.launch()