Update app.py
Browse files
app.py
CHANGED
|
@@ -297,15 +297,23 @@ def generate_llm_answer(
|
|
| 297 |
logger.error(" β LLM client not initialized")
|
| 298 |
return None
|
| 299 |
|
| 300 |
-
# Build focused context
|
| 301 |
query_lower = query.lower()
|
| 302 |
query_words = set(query_lower.split())
|
| 303 |
|
| 304 |
-
#
|
|
|
|
|
|
|
| 305 |
scored_docs = []
|
| 306 |
for doc in retrieved_docs[:20]:
|
| 307 |
content = doc.page_content.lower()
|
| 308 |
doc_words = set(content.split())
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 309 |
overlap = len(query_words.intersection(doc_words))
|
| 310 |
|
| 311 |
# Boost for verified/curated
|
|
@@ -318,6 +326,11 @@ def generate_llm_answer(
|
|
| 318 |
|
| 319 |
scored_docs.append((doc, overlap))
|
| 320 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 321 |
# Sort and take top 8
|
| 322 |
scored_docs.sort(key=lambda x: x[1], reverse=True)
|
| 323 |
top_docs = [doc[0] for doc in scored_docs[:8]]
|
|
@@ -358,14 +371,21 @@ def generate_llm_answer(
|
|
| 358 |
# Create COMPACT T5 prompt to stay under 512 tokens (critical!)
|
| 359 |
model_type = CONFIG.get("model_type", "t5")
|
| 360 |
|
| 361 |
-
# T5 format -
|
| 362 |
-
|
| 363 |
-
|
|
|
|
| 364 |
|
| 365 |
-
|
| 366 |
{context_text[:600]}
|
| 367 |
|
| 368 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 369 |
|
| 370 |
try:
|
| 371 |
logger.info(f" β Calling {CONFIG['llm_model']} (temp={temperature}, tokens={max_new_tokens})...")
|
|
@@ -452,7 +472,7 @@ def generate_answer_langchain(
|
|
| 452 |
|
| 453 |
# Step 3: If all attempts fail, return error
|
| 454 |
if not llm_answer:
|
| 455 |
-
logger.error(f" β All
|
| 456 |
return "I apologize, but I'm having trouble generating a response. Please try rephrasing your question or ask something else."
|
| 457 |
|
| 458 |
return llm_answer
|
|
@@ -470,10 +490,45 @@ def fashion_chatbot(message: str, history: List[List[str]]):
|
|
| 470 |
yield "Please ask a fashion-related question!"
|
| 471 |
return
|
| 472 |
|
| 473 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 474 |
yield "π Searching fashion knowledge..."
|
| 475 |
|
| 476 |
-
# Retrieve documents
|
| 477 |
retrieved_docs, confidence = retrieve_knowledge_langchain(
|
| 478 |
message.strip(),
|
| 479 |
vectorstore,
|
|
@@ -484,6 +539,11 @@ def fashion_chatbot(message: str, history: List[List[str]]):
|
|
| 484 |
yield "I couldn't find relevant information to answer your question."
|
| 485 |
return
|
| 486 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 487 |
# Show generating indicator
|
| 488 |
yield f"π Generating answer ({len(retrieved_docs)} sources found)..."
|
| 489 |
|
|
@@ -494,12 +554,36 @@ def fashion_chatbot(message: str, history: List[List[str]]):
|
|
| 494 |
llm_answer = generate_llm_answer(message.strip(), retrieved_docs, llm_client, attempt)
|
| 495 |
|
| 496 |
if llm_answer:
|
| 497 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 498 |
|
| 499 |
# If LLM fails, show error
|
| 500 |
if not llm_answer:
|
| 501 |
-
logger.error(f" β All LLM attempts failed")
|
| 502 |
-
yield "I apologize, but I'm having trouble generating a
|
| 503 |
return
|
| 504 |
|
| 505 |
# Stream the answer word by word for natural flow
|
|
|
|
| 297 |
logger.error(" β LLM client not initialized")
|
| 298 |
return None
|
| 299 |
|
| 300 |
+
# Build focused context with relevance filtering
|
| 301 |
query_lower = query.lower()
|
| 302 |
query_words = set(query_lower.split())
|
| 303 |
|
| 304 |
+
# ANTI-HALLUCINATION: Filter for fashion-relevant documents only
|
| 305 |
+
fashion_terms = {'wear', 'outfit', 'style', 'fashion', 'clothing', 'color', 'dress', 'fabric'}
|
| 306 |
+
|
| 307 |
scored_docs = []
|
| 308 |
for doc in retrieved_docs[:20]:
|
| 309 |
content = doc.page_content.lower()
|
| 310 |
doc_words = set(content.split())
|
| 311 |
+
|
| 312 |
+
# Check if document contains fashion terms
|
| 313 |
+
has_fashion = any(term in content for term in fashion_terms)
|
| 314 |
+
if not has_fashion:
|
| 315 |
+
continue # Skip non-fashion documents
|
| 316 |
+
|
| 317 |
overlap = len(query_words.intersection(doc_words))
|
| 318 |
|
| 319 |
# Boost for verified/curated
|
|
|
|
| 326 |
|
| 327 |
scored_docs.append((doc, overlap))
|
| 328 |
|
| 329 |
+
# If no fashion-relevant docs found, return None
|
| 330 |
+
if not scored_docs:
|
| 331 |
+
logger.warning(" β οΈ No fashion-relevant documents found")
|
| 332 |
+
return None
|
| 333 |
+
|
| 334 |
# Sort and take top 8
|
| 335 |
scored_docs.sort(key=lambda x: x[1], reverse=True)
|
| 336 |
top_docs = [doc[0] for doc in scored_docs[:8]]
|
|
|
|
| 371 |
# Create COMPACT T5 prompt to stay under 512 tokens (critical!)
|
| 372 |
model_type = CONFIG.get("model_type", "t5")
|
| 373 |
|
| 374 |
+
# T5 format - with explicit constraints to prevent hallucination
|
| 375 |
+
user_prompt = f"""You are a fashion expert. Answer ONLY about fashion, clothing, and style.
|
| 376 |
+
|
| 377 |
+
Question: {query}
|
| 378 |
|
| 379 |
+
Fashion Knowledge:
|
| 380 |
{context_text[:600]}
|
| 381 |
|
| 382 |
+
Rules:
|
| 383 |
+
- Answer ONLY using the fashion knowledge provided
|
| 384 |
+
- Focus on clothing, outfits, colors, fabrics, and styling
|
| 385 |
+
- DO NOT mention: politics, history, wars, empires, architecture
|
| 386 |
+
- If unsure, say "I don't have enough information"
|
| 387 |
+
|
| 388 |
+
Fashion Answer:"""
|
| 389 |
|
| 390 |
try:
|
| 391 |
logger.info(f" β Calling {CONFIG['llm_model']} (temp={temperature}, tokens={max_new_tokens})...")
|
|
|
|
| 472 |
|
| 473 |
# Step 3: If all attempts fail, return error
|
| 474 |
if not llm_answer:
|
| 475 |
+
logger.error(f" β All 2 LLM attempts failed")
|
| 476 |
return "I apologize, but I'm having trouble generating a response. Please try rephrasing your question or ask something else."
|
| 477 |
|
| 478 |
return llm_answer
|
|
|
|
| 490 |
yield "Please ask a fashion-related question!"
|
| 491 |
return
|
| 492 |
|
| 493 |
+
# ANTI-HALLUCINATION: Validate if question is fashion-related
|
| 494 |
+
query_lower = message.strip().lower()
|
| 495 |
+
fashion_keywords = [
|
| 496 |
+
'wear', 'outfit', 'dress', 'style', 'fashion', 'clothing', 'clothes',
|
| 497 |
+
'color', 'match', 'look', 'shirt', 'pants', 'shoes', 'accessory',
|
| 498 |
+
'wardrobe', 'fit', 'fabric', 'pattern', 'casual', 'formal', 'seasonal',
|
| 499 |
+
'wedding', 'meeting', 'interview', 'date', 'party', 'jeans', 'suit',
|
| 500 |
+
'skirt', 'jacket', 'coat', 'sweater', 'blouse', 'tie', 'scarf', 'boots',
|
| 501 |
+
'hat', 'bag', 'purse', 'jewelry', 'necklace', 'bracelet', 'watch'
|
| 502 |
+
]
|
| 503 |
+
|
| 504 |
+
# Reject obviously non-fashion questions FIRST (higher priority)
|
| 505 |
+
non_fashion_indicators = [
|
| 506 |
+
'crisis', 'collapse', 'empire', 'war', 'politics', 'economy',
|
| 507 |
+
'architecture', 'building', 'nebula', 'space', 'republic',
|
| 508 |
+
'soviet', 'ottoman', 'history', 'government', 'president', 'designed',
|
| 509 |
+
'architect', 'eastern', 'western', 'communist', 'russia', 'political',
|
| 510 |
+
'military', 'sapphire crisis', 'who designed', 'what caused'
|
| 511 |
+
]
|
| 512 |
+
|
| 513 |
+
has_non_fashion = any(indicator in query_lower for indicator in non_fashion_indicators)
|
| 514 |
+
|
| 515 |
+
# STRICT CHECK: If non-fashion detected, reject immediately
|
| 516 |
+
if has_non_fashion:
|
| 517 |
+
logger.info(f"β Non-fashion query rejected: {message.strip()}")
|
| 518 |
+
yield "I'm a fashion advisor and can only answer questions about clothing, style, and fashion. Please ask me about outfits, styling, colors, or wardrobe advice!"
|
| 519 |
+
return
|
| 520 |
+
|
| 521 |
+
# Check if query contains fashion keywords
|
| 522 |
+
is_fashion_query = any(keyword in query_lower for keyword in fashion_keywords)
|
| 523 |
+
|
| 524 |
+
if not is_fashion_query:
|
| 525 |
+
yield "I'm a fashion advisor and can only answer questions about clothing, style, and fashion. Please ask me about outfits, styling, colors, or wardrobe advice!"
|
| 526 |
+
return
|
| 527 |
+
|
| 528 |
+
# Show searching indicator (only for valid fashion queries)
|
| 529 |
yield "π Searching fashion knowledge..."
|
| 530 |
|
| 531 |
+
# Retrieve documents (only after validation passes)
|
| 532 |
retrieved_docs, confidence = retrieve_knowledge_langchain(
|
| 533 |
message.strip(),
|
| 534 |
vectorstore,
|
|
|
|
| 539 |
yield "I couldn't find relevant information to answer your question."
|
| 540 |
return
|
| 541 |
|
| 542 |
+
# ANTI-HALLUCINATION: Check retrieval quality
|
| 543 |
+
if confidence < 0.35:
|
| 544 |
+
yield "I don't have enough reliable information about this specific topic. Could you rephrase or ask about common fashion topics like outfit recommendations, color matching, or styling advice?"
|
| 545 |
+
return
|
| 546 |
+
|
| 547 |
# Show generating indicator
|
| 548 |
yield f"π Generating answer ({len(retrieved_docs)} sources found)..."
|
| 549 |
|
|
|
|
| 554 |
llm_answer = generate_llm_answer(message.strip(), retrieved_docs, llm_client, attempt)
|
| 555 |
|
| 556 |
if llm_answer:
|
| 557 |
+
# ANTI-HALLUCINATION: Validate answer relevance
|
| 558 |
+
answer_lower = llm_answer.lower()
|
| 559 |
+
|
| 560 |
+
# Check for hallucination indicators
|
| 561 |
+
hallucination_markers = [
|
| 562 |
+
'empire', 'ottoman', 'soviet', 'russia', 'collapse', 'crisis',
|
| 563 |
+
'republic', 'communist', 'nebula', 'architecture', 'political',
|
| 564 |
+
'government', 'war', 'military', 'economic'
|
| 565 |
+
]
|
| 566 |
+
|
| 567 |
+
has_hallucination = any(marker in answer_lower for marker in hallucination_markers)
|
| 568 |
+
|
| 569 |
+
# Check if answer contains fashion terms
|
| 570 |
+
fashion_terms = [
|
| 571 |
+
'wear', 'outfit', 'style', 'clothing', 'fabric', 'color',
|
| 572 |
+
'match', 'fit', 'look', 'fashion', 'dress', 'suit'
|
| 573 |
+
]
|
| 574 |
+
has_fashion_content = any(term in answer_lower for term in fashion_terms)
|
| 575 |
+
|
| 576 |
+
if has_hallucination or not has_fashion_content:
|
| 577 |
+
logger.warning(f" β οΈ Hallucination detected in attempt {attempt}, retrying...")
|
| 578 |
+
llm_answer = None
|
| 579 |
+
continue
|
| 580 |
+
else:
|
| 581 |
+
break
|
| 582 |
|
| 583 |
# If LLM fails, show error
|
| 584 |
if not llm_answer:
|
| 585 |
+
logger.error(f" β All LLM attempts failed or produced hallucinations")
|
| 586 |
+
yield "I apologize, but I'm having trouble generating a reliable fashion answer. Please ask about specific fashion topics like outfit recommendations, color coordination, or styling tips."
|
| 587 |
return
|
| 588 |
|
| 589 |
# Stream the answer word by word for natural flow
|