Spaces:

hamxaameer
/

OutfitOrbit-Chatbot-Assistant

Running

App Files Files Community

hamxaameer commited on 7 days ago

Commit

068ed2b

verified ·

1 Parent(s): df481d0

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -41

app.py CHANGED Viewed

@@ -322,71 +322,68 @@ def generate_llm_answer(
     scored_docs.sort(key=lambda x: x[1], reverse=True)
     top_docs = [doc[0] for doc in scored_docs[:8]]
-    # Build context
     context_parts = []
-    for doc in top_docs:
         content = doc.page_content.strip()
-        if len(content) > 400:
-            content = content[:400] + "..."
         context_parts.append(content)
     context_text = "\n\n".join(context_parts)
-    # Progressive parameters based on attempt - optimized for longer, natural responses
     if attempt == 1:
-        temperature = 0.8
-        max_new_tokens = 450  # Longer responses
-        top_p = 0.92
-        repetition_penalty = 1.15
     elif attempt == 2:
         temperature = 0.85
         max_new_tokens = 500
-        top_p = 0.94
-        repetition_penalty = 1.18
-    elif attempt == 3:
-        temperature = 0.9
-        max_new_tokens = 550
         top_p = 0.95
-        repetition_penalty = 1.2
-    else:
-        temperature = 0.95
-        max_new_tokens = 600
-        top_p = 0.96
-        repetition_penalty = 1.22
-    # Create optimized T5 prompt for detailed, natural responses
     model_type = CONFIG.get("model_type", "t5")
-    # T5 format - encouraging detailed, conversational responses
-    user_prompt = f"""You are a professional fashion advisor. Answer this question with comprehensive, detailed advice using the context provided. Be specific, natural, and conversational.
-Question: {query}
-Fashion Knowledge Base:
-{context_text[:2000]}
-Provide a thorough, well-structured answer (300-500 words) that covers:
-- Main recommendations with specific details
-- Practical styling tips and combinations
-- Why these suggestions work
-- Additional helpful considerations
-Answer:"""
     try:
         logger.info(f"  → Calling {CONFIG['llm_model']} (temp={temperature}, tokens={max_new_tokens})...")
-        # T5 optimized parameters for detailed, natural responses
         output = llm_client(
             user_prompt,
-            max_new_tokens=max_new_tokens,  # Use max_new_tokens instead of max_length
             temperature=temperature,
             top_p=top_p,
             do_sample=True,
-            num_beams=3,  # More beams for better quality
             repetition_penalty=repetition_penalty,
             early_stopping=True,
-            no_repeat_ngram_size=3  # Prevent repetitive phrases
         )
         # Extract generated text
@@ -396,9 +393,9 @@ Answer:"""
             logger.warning(f"  ✗ Empty response (attempt {attempt})")
             return None
-        # Validation - accept longer responses (aim for 200+ chars minimum)
-        if len(response) < 50:
-            logger.warning(f"  ✗ Response too short: {len(response)} chars (need 50+)")
             return None
         # Check for apologies/refusals

     scored_docs.sort(key=lambda x: x[1], reverse=True)
     top_docs = [doc[0] for doc in scored_docs[:8]]
+    # Build context - keep it SHORT to stay under 512 tokens
     context_parts = []
+    for doc in top_docs[:5]:  # Only use top 5 docs
         content = doc.page_content.strip()
+        # Keep each doc snippet under 150 chars
+        if len(content) > 150:
+            content = content[:150] + "..."
         context_parts.append(content)
     context_text = "\n\n".join(context_parts)
+    # Progressive parameters - balanced for T5's capabilities
     if attempt == 1:
+        temperature = 0.7
+        max_new_tokens = 350  # Realistic length for T5
+        top_p = 0.9
+        repetition_penalty = 1.2
     elif attempt == 2:
+        temperature = 0.75
+        max_new_tokens = 400
+        top_p = 0.92
+        repetition_penalty = 1.25
+    elif attempt == 3:
+        temperature = 0.8
+        max_new_tokens = 450
+        top_p = 0.94
+        repetition_penalty = 1.3
+    else:
         temperature = 0.85
         max_new_tokens = 500
         top_p = 0.95
+        repetition_penalty = 1.35
+    # Create COMPACT T5 prompt to stay under 512 tokens (critical!)
     model_type = CONFIG.get("model_type", "t5")
+    # T5 format - simple and effective to minimize tokens
+    # Keep prompt minimal to leave room for generation
+    user_prompt = f"""Fashion Question: {query}
+Relevant Fashion Tips:
+{context_text[:600]}
+Provide detailed fashion advice (200-400 words):"""
     try:
         logger.info(f"  → Calling {CONFIG['llm_model']} (temp={temperature}, tokens={max_new_tokens})...")
+        # T5 optimized parameters - CRITICAL: truncate input to stay under 512 tokens
         output = llm_client(
             user_prompt,
+            max_new_tokens=max_new_tokens,
+            min_new_tokens=100,  # Ensure minimum length generation
             temperature=temperature,
             top_p=top_p,
             do_sample=True,
+            num_beams=4,  # Higher beams for better quality
             repetition_penalty=repetition_penalty,
+            length_penalty=1.2,  # Encourage longer responses
             early_stopping=True,
+            no_repeat_ngram_size=3,
+            truncation=True  # CRITICAL: Truncate input if too long
         )
         # Extract generated text
             logger.warning(f"  ✗ Empty response (attempt {attempt})")
             return None
+        # Validation - accept responses with meaningful content
+        if len(response) < 100:
+            logger.warning(f"  ✗ Response too short: {len(response)} chars (need 100+)")
             return None
         # Check for apologies/refusals