Spaces:

hamxaameer
/

OutfitOrbit-Chatbot-Assistant

Running

App Files Files Community

hamxaameer commited on 6 days ago

Commit

bd2bde4

verified ·

1 Parent(s): 4096b3f

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -94

app.py CHANGED Viewed

@@ -43,47 +43,38 @@ def initialize_llm():
     """Initialize free local LLM with transformers pipeline"""
     logger.info("🔄 Initializing FREE local language model...")
-    BACKUP_MODELS = [
-        "microsoft/phi-2",  # 2.7B - Best quality that fits in 16GB
-    ]
-    for model_name in BACKUP_MODELS:
-        try:
-            logger.info(f"   Trying {model_name}...")
-            device = 0 if torch.cuda.is_available() else -1
-            # Phi-2 configuration
-            task = "text-generation"
-            model_type = "phi"
-            # Optimized for memory efficiency
-            model_kwargs = {
-                "low_cpu_mem_usage": True,
-                "torch_dtype": torch.float32,  # Use float32 for CPU
-                "trust_remote_code": True  # Required for Phi-2
-            }
-            llm_client = pipeline(
-                task,
-                model=model_name,
-                device=device,
-                max_new_tokens=300,
-                truncation=True,
-                model_kwargs=model_kwargs
-            )
-            CONFIG["llm_model"] = model_name
-            CONFIG["model_type"] = model_type
-            logger.info(f"✅ FREE LLM initialized: {model_name}")
-            logger.info(f"   Device: {'GPU' if device == 0 else 'CPU'}")
-            return llm_client
-        except Exception as e:
-            logger.warning(f"⚠️ Failed {model_name}: {str(e)[:100]}")
-            continue
-    logger.error("⚠️ All models failed - will use fallback generation")
-    return None
 def initialize_embeddings():
     """Initialize sentence transformer embeddings"""
@@ -363,32 +354,31 @@ def generate_llm_answer(
         top_p = 0.97
         repetition_penalty = 1.25
-    # Create prompt for Phi-2
-    model_type = CONFIG.get("model_type", "phi")
-    # Phi-2 optimized format (simple and effective)
-    user_prompt = f"""Instruct: You are a professional fashion advisor. Use the fashion knowledge below to answer the question with specific, detailed advice.
-Fashion Knowledge:
-{context_text[:1200]}
 Question: {query}
-Output: """
     try:
         logger.info(f"  → Calling {CONFIG['llm_model']} (temp={temperature}, tokens={max_tokens})...")
-        # Phi-2 optimized parameters
         output = llm_client(
             user_prompt,
-            max_new_tokens=min(max_tokens, 250),  # Cap for speed
-            temperature=0.7,  # Balanced
-            top_p=0.9,
-            repetition_penalty=1.15,
             do_sample=True,
-            return_full_text=False,
-            pad_token_id=50256  # Phi-2 pad token
         )
         # Extract generated text
@@ -417,34 +407,7 @@ Output: """
         return None
 def synthesize_direct_answer(
-    query: str,
-    retrieved_docs: List[Document]
-) -> str:
-    """
-    Enhanced fallback: Combine multiple documents intelligently
-    """
-    logger.info("  → Using enhanced fallback synthesis")
-    if not retrieved_docs:
-        return "I don't have enough information to answer that question accurately. Please try rephrasing your question."
-    # Combine top 3 most relevant documents
-    top_docs = retrieved_docs[:3]
-    combined_content = []
-    for i, doc in enumerate(top_docs, 1):
-        content = doc.page_content.strip()
-        if len(content) > 200:
-            content = content[:200]
-        combined_content.append(f"{content}")
-    answer = " ".join(combined_content)
-    # Add context-aware prefix
-    answer = f"Based on fashion guidelines: {answer}"
-    return answer
 def generate_answer_langchain(
     query: str,
     vectorstore,
@@ -507,12 +470,12 @@ def fashion_chatbot(message: str, history: List[List[str]]):
             message.strip(),
             vectorstore,
             top_k=CONFIG["top_k"]
-        )
-        if not retrieved_docs:
-            yield "I couldn't find relevant information to answer your question."
-            return
         # Show generating indicator
         yield f"💭 Generating answer ({len(retrieved_docs)} sources found)..."
@@ -552,12 +515,11 @@ def fashion_chatbot(message: str, history: List[List[str]]):
 # ============================================================================
 # INITIALIZE AND LAUNCH
 # ============================================================================
-# Global variables
-llm_client = None
-embeddings = None
-vectorstore = None
 def startup():
     """Initialize all models and load vector store"""
     global llm_client, embeddings, vectorstore

     """Initialize free local LLM with transformers pipeline"""
     logger.info("🔄 Initializing FREE local language model...")
+    # Use FLAN-T5-Large - reliable, fast, and proven to work
+    model_name = "google/flan-t5-large"
+    try:
+        logger.info(f"   Loading {model_name}...")
+        device = 0 if torch.cuda.is_available() else -1
+        # T5 configuration
+        task = "text2text-generation"
+        model_type = "t5"
+        # Optimized for speed and quality
+        model_kwargs = {
+            "low_cpu_mem_usage": True,
+        }
+        llm_client = pipeline(
+            task,
+            model=model_name,
+            device=device,
+            model_kwargs=model_kwargs
+        )
+        CONFIG["llm_model"] = model_name
+        CONFIG["model_type"] = model_type
+        logger.info(f"✅ LLM initialized: {model_name}")
+        logger.info(f"   Device: {'GPU' if device == 0 else 'CPU'}")
+        return llm_client
+    except Exception as e:
+        logger.error(f"❌ Failed to load model: {str(e)}")
+        raise Exception(f"Failed to initialize LLM: {str(e)}")
 def initialize_embeddings():
     """Initialize sentence transformer embeddings"""
         top_p = 0.97
         repetition_penalty = 1.25
+    # Create optimized T5 prompt
+    model_type = CONFIG.get("model_type", "t5")
+    # T5 format - simple and effective for good answers
+    user_prompt = f"""Answer this fashion question with detailed, specific advice using the context provided.
 Question: {query}
+Fashion Context:
+{context_text[:1500]}
+Provide a complete, detailed answer (150-250 words):"""
     try:
         logger.info(f"  → Calling {CONFIG['llm_model']} (temp={temperature}, tokens={max_tokens})...")
+        # T5 optimized parameters for quality and speed
         output = llm_client(
             user_prompt,
+            max_length=300,  # Good length for detailed answers
+            temperature=0.75,  # Balanced creativity
+            top_p=0.92,
             do_sample=True,
+            num_beams=2,  # Light beam search for quality
+            early_stopping=True
         )
         # Extract generated text
         return None
 def synthesize_direct_answer(
+# Removed synthetic fallback - only use LLM
 def generate_answer_langchain(
     query: str,
     vectorstore,
             message.strip(),
             vectorstore,
             top_k=CONFIG["top_k"]
+    # Step 3: If all attempts fail, return error
+    if not llm_answer:
+        logger.error(f"  ✗ All 4 LLM attempts failed")
+        return "I apologize, but I'm having trouble generating a response. Please try rephrasing your question or ask something else."
+    return llm_answer
         # Show generating indicator
         yield f"💭 Generating answer ({len(retrieved_docs)} sources found)..."
 # ============================================================================
 # INITIALIZE AND LAUNCH
 # ============================================================================
+        # If LLM fails, show error
+        if not llm_answer:
+            logger.error(f"  ✗ All LLM attempts failed")
+            yield "I apologize, but I'm having trouble generating a response. Please try rephrasing your question."
+            return
 def startup():
     """Initialize all models and load vector store"""
     global llm_client, embeddings, vectorstore