hamxaameer commited on
Commit
c993f47
Β·
verified Β·
1 Parent(s): d73508e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -66
app.py CHANGED
@@ -332,40 +332,30 @@ def generate_llm_answer(
332
  return None
333
 
334
  # Sort and take top 8
335
- scored_docs.sort(key=lambda x: x[1], reverse=True)
336
- top_docs = [doc[0] for doc in scored_docs[:8]]
337
-
338
- # Build context - keep it SHORT to stay under 512 tokens
339
- context_parts = []
340
- for doc in top_docs[:5]: # Only use top 5 docs
341
- content = doc.page_content.strip()
342
- # Keep each doc snippet under 150 chars
343
- if len(content) > 150:
344
- content = content[:150] + "..."
345
- context_parts.append(content)
346
-
347
- context_text = "\n\n".join(context_parts)
348
-
349
- # Progressive parameters - optimized for SPEED (shorter = faster)
350
  if attempt == 1:
351
- temperature = 0.7
352
- max_new_tokens = 250 # Faster generation
353
- top_p = 0.9
 
 
 
 
 
354
  repetition_penalty = 1.2
355
- elif attempt == 2:
356
  temperature = 0.75
357
  max_new_tokens = 300
358
  top_p = 0.92
359
  repetition_penalty = 1.25
360
- elif attempt == 3:
361
- temperature = 0.8
362
- max_new_tokens = 350
363
- top_p = 0.94
364
- repetition_penalty = 1.3
365
- else:
366
- temperature = 0.85
367
- max_new_tokens = 400
368
- top_p = 0.95
369
  repetition_penalty = 1.35
370
 
371
  # Create COMPACT T5 prompt to stay under 512 tokens (critical!)
@@ -429,36 +419,10 @@ Fashion Answer:"""
429
  return response
430
 
431
  except Exception as e:
432
- logger.error(f" βœ— Generation error: {e}")
433
- return None
434
-
435
- # ============================================================================
436
- # MAIN RAG FUNCTION
437
- # ============================================================================
438
-
439
- def generate_answer_langchain(
440
- query: str,
441
- vectorstore,
442
- llm_client
443
- ) -> str:
444
- """
445
- Main RAG pipeline: Retrieve β†’ Generate (no fallback)
446
- """
447
- logger.info(f"\n{'='*80}")
448
- logger.info(f"Processing query: '{query}'")
449
- logger.info(f"{'='*80}")
450
-
451
- # Step 1: Retrieve documents
452
- retrieved_docs, confidence = retrieve_knowledge_langchain(
453
- query,
454
- vectorstore,
455
- top_k=CONFIG["top_k"]
456
- )
457
-
458
  if not retrieved_docs:
459
  return "I couldn't find relevant information to answer your question."
460
 
461
- # Step 2: Try LLM generation (2 attempts for speed)
462
  llm_answer = None
463
  for attempt in range(1, 3):
464
  logger.info(f"\n πŸ€– LLM Generation Attempt {attempt}/2")
@@ -475,12 +439,12 @@ def generate_answer_langchain(
475
  logger.error(f" βœ— All 2 LLM attempts failed")
476
  return "I apologize, but I'm having trouble generating a response. Please try rephrasing your question or ask something else."
477
 
478
- return llm_answer
479
-
480
- # ============================================================================
481
- # GRADIO INTERFACE
482
- # ============================================================================
483
-
484
  def fashion_chatbot(message: str, history: List[List[str]]):
485
  """
486
  Chatbot function for Gradio interface with streaming
@@ -490,11 +454,37 @@ def fashion_chatbot(message: str, history: List[List[str]]):
490
  yield "Please ask a fashion-related question!"
491
  return
492
 
493
- # ANTI-HALLUCINATION: Validate if question is fashion-related
494
- query_lower = message.strip().lower()
495
- fashion_keywords = [
496
- 'wear', 'outfit', 'dress', 'style', 'fashion', 'clothing', 'clothes',
497
- 'color', 'match', 'look', 'shirt', 'pants', 'shoes', 'accessory',
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
498
  'wardrobe', 'fit', 'fabric', 'pattern', 'casual', 'formal', 'seasonal',
499
  'wedding', 'meeting', 'interview', 'date', 'party', 'jeans', 'suit',
500
  'skirt', 'jacket', 'coat', 'sweater', 'blouse', 'tie', 'scarf', 'boots',
 
332
  return None
333
 
334
  # Sort and take top 8
335
+ # Optimized parameters for 2-attempt strategy
 
 
 
 
 
 
 
 
 
 
 
 
 
 
336
  if attempt == 1:
337
+ temperature = 0.75
338
+ max_tokens = 350
339
+ top_p = 0.92
340
+ repetition_penalty = 1.15
341
+ else: # attempt == 2
342
+ temperature = 0.85
343
+ max_tokens = 450
344
+ top_p = 0.94
345
  repetition_penalty = 1.2
 
346
  temperature = 0.75
347
  max_new_tokens = 300
348
  top_p = 0.92
349
  repetition_penalty = 1.25
350
+ # T5 format - simple and effective for good answers
351
+ user_prompt = f"""Answer this fashion question with detailed, specific advice using the context provided.
352
+
353
+ Question: {query}
354
+
355
+ Fashion Context:
356
+ {context_text[:1500]}
357
+
358
+ Provide a complete, detailed answer (150-250 words):"""
359
  repetition_penalty = 1.35
360
 
361
  # Create COMPACT T5 prompt to stay under 512 tokens (critical!)
 
419
  return response
420
 
421
  except Exception as e:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
422
  if not retrieved_docs:
423
  return "I couldn't find relevant information to answer your question."
424
 
425
+ # Step 2: Try LLM generation (2 fast attempts for efficiency)
426
  llm_answer = None
427
  for attempt in range(1, 3):
428
  logger.info(f"\n πŸ€– LLM Generation Attempt {attempt}/2")
 
439
  logger.error(f" βœ— All 2 LLM attempts failed")
440
  return "I apologize, but I'm having trouble generating a response. Please try rephrasing your question or ask something else."
441
 
442
+ return llm_answeronfidence = retrieve_knowledge_langchain(
443
+ query,
444
+ vectorstore,
445
+ top_k=CONFIG["top_k"]
446
+ )
447
+
448
  def fashion_chatbot(message: str, history: List[List[str]]):
449
  """
450
  Chatbot function for Gradio interface with streaming
 
454
  yield "Please ask a fashion-related question!"
455
  return
456
 
457
+ # Show searching indicator
458
+ yield "πŸ” Searching fashion knowledge..."d successfully")
459
+ break
460
+ else:
461
+ logger.warning(f" β†’ Attempt {attempt}/2 failed, retrying...")
462
+
463
+ # Step 3: If all attempts fail, return error
464
+ if not llm_answer:
465
+ logger.error(f" βœ— All 2 LLM attempts failed")
466
+ return "I apologize, but I'm having trouble generating a response. Please try rephrasing your question or ask something else."
467
+
468
+ return llm_answer
469
+
470
+ # ============================================================================
471
+ # GRADIO INTERFACE
472
+ # ============================================================================
473
+
474
+ # Generate answer with 2 fast attempts
475
+ llm_answer = None
476
+ for attempt in range(1, 3):
477
+ logger.info(f"\n πŸ€– LLM Generation Attempt {attempt}/2")
478
+ llm_answer = generate_llm_answer(message.strip(), retrieved_docs, llm_client, attempt)
479
+
480
+ if llm_answer:
481
+ break
482
+
483
+ # If LLM fails, show error
484
+ if not llm_answer:
485
+ logger.error(f" βœ— All LLM attempts failed")
486
+ yield "I apologize, but I'm having trouble generating a response. Please try rephrasing your question."
487
+ return', 'match', 'look', 'shirt', 'pants', 'shoes', 'accessory',
488
  'wardrobe', 'fit', 'fabric', 'pattern', 'casual', 'formal', 'seasonal',
489
  'wedding', 'meeting', 'interview', 'date', 'party', 'jeans', 'suit',
490
  'skirt', 'jacket', 'coat', 'sweater', 'blouse', 'tie', 'scarf', 'boots',