hamxaameer commited on
Commit
a32c4ca
Β·
verified Β·
1 Parent(s): c993f47

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -247
app.py CHANGED
@@ -28,8 +28,8 @@ logger = logging.getLogger(__name__)
28
 
29
  CONFIG = {
30
  "embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
31
- "llm_model": None, # Will be set during initialization
32
- "vector_store_path": ".", # Root directory (files are in root on HF Spaces)
33
  "top_k": 15,
34
  "temperature": 0.75,
35
  "max_tokens": 350,
@@ -40,34 +40,24 @@ CONFIG = {
40
  # ============================================================================
41
 
42
  def initialize_llm():
43
- """Initialize free local LLM with transformers pipeline"""
44
  logger.info("πŸ”„ Initializing FREE local language model...")
45
-
46
- # Use FLAN-T5-Large - reliable, fast, and proven to work
47
  model_name = "google/flan-t5-large"
48
 
49
  try:
50
  logger.info(f" Loading {model_name}...")
51
  device = 0 if torch.cuda.is_available() else -1
52
 
53
- # T5 configuration
54
- task = "text2text-generation"
55
- model_type = "t5"
56
-
57
- # Optimized for speed and quality
58
- model_kwargs = {
59
- "low_cpu_mem_usage": True,
60
- }
61
 
62
  llm_client = pipeline(
63
- task,
64
  model=model_name,
65
  device=device,
66
  model_kwargs=model_kwargs
67
  )
68
 
69
  CONFIG["llm_model"] = model_name
70
- CONFIG["model_type"] = model_type
71
  logger.info(f"βœ… LLM initialized: {model_name}")
72
  logger.info(f" Device: {'GPU' if device == 0 else 'CPU'}")
73
  return llm_client
@@ -77,7 +67,6 @@ def initialize_llm():
77
  raise Exception(f"Failed to initialize LLM: {str(e)}")
78
 
79
  def initialize_embeddings():
80
- """Initialize sentence transformer embeddings"""
81
  logger.info("πŸ”„ Initializing embeddings model...")
82
 
83
  embeddings = HuggingFaceEmbeddings(
@@ -90,28 +79,22 @@ def initialize_embeddings():
90
  return embeddings
91
 
92
  def load_vector_store(embeddings):
93
- """Load FAISS vector store with Pydantic monkey-patch"""
94
  logger.info("πŸ”„ Loading FAISS vector store...")
95
 
96
  vector_store_path = CONFIG["vector_store_path"]
97
-
98
- # Check for required FAISS files
99
  index_file = os.path.join(vector_store_path, "index.faiss")
100
  pkl_file = os.path.join(vector_store_path, "index.pkl")
101
 
102
  if not os.path.exists(index_file):
103
- logger.error(f"❌ index.faiss not found at {index_file}")
104
  raise FileNotFoundError(f"FAISS index file not found: {index_file}")
105
 
106
  if not os.path.exists(pkl_file):
107
- logger.error(f"❌ index.pkl not found at {pkl_file}")
108
  raise FileNotFoundError(f"FAISS metadata file not found: {pkl_file}")
109
 
110
  logger.info(f"βœ… Found index.faiss ({os.path.getsize(index_file)/1024/1024:.2f} MB)")
111
  logger.info(f"βœ… Found index.pkl ({os.path.getsize(pkl_file)/1024:.2f} KB)")
112
 
113
  try:
114
- # Try standard loading first
115
  vectorstore = FAISS.load_local(
116
  vector_store_path,
117
  embeddings,
@@ -120,33 +103,25 @@ def load_vector_store(embeddings):
120
  logger.info(f"βœ… FAISS vector store loaded successfully")
121
  return vectorstore
122
 
123
- except (KeyError, AttributeError, Exception) as e:
124
  logger.warning(f"⚠️ Pydantic compatibility issue: {str(e)[:100]}")
125
  logger.info("πŸ”„ Applying Pydantic monkey-patch and retrying...")
126
 
127
- # STEP 1: Monkey-patch Pydantic to handle missing __fields_set__
128
  try:
129
  import pydantic.v1.main as pydantic_main
130
-
131
- # Save original __setstate__
132
  original_setstate = pydantic_main.BaseModel.__setstate__
133
 
134
  def patched_setstate(self, state):
135
- """Patched __setstate__ that handles missing __fields_set__"""
136
- # Add missing __fields_set__ if not present
137
  if '__fields_set__' not in state:
138
  state['__fields_set__'] = set(state.get('__dict__', {}).keys())
139
- # Call original
140
  return original_setstate(self, state)
141
 
142
- # Apply patch
143
  pydantic_main.BaseModel.__setstate__ = patched_setstate
144
  logger.info(" βœ… Pydantic monkey-patch applied")
145
 
146
  except Exception as patch_error:
147
  logger.warning(f" ⚠️ Pydantic patch failed: {patch_error}")
148
 
149
- # STEP 2: Try loading again with patch
150
  try:
151
  vectorstore = FAISS.load_local(
152
  vector_store_path,
@@ -158,44 +133,30 @@ def load_vector_store(embeddings):
158
 
159
  except Exception as e2:
160
  logger.error(f" βœ— Still failed after patch: {str(e2)[:100]}")
161
-
162
- # STEP 3: Last resort - manual reconstruction
163
  logger.info("πŸ”„ Using manual reconstruction (last resort)...")
164
 
165
  import faiss
166
- import pickle
167
  from langchain_community.docstore.in_memory import InMemoryDocstore
168
 
169
- # Load FAISS index
170
  index = faiss.read_index(index_file)
171
  logger.info(f" βœ… FAISS index loaded")
172
 
173
- # Load pickle with raw binary parsing
174
  with open(pkl_file, "rb") as f:
175
- import io
176
- import struct
177
-
178
- # Read raw bytes
179
  raw_bytes = f.read()
180
  logger.info(f" Read {len(raw_bytes)} bytes from pickle")
181
 
182
- # Try to extract text content directly (bypass Pydantic completely)
183
- # This is a fallback that extracts document strings
184
- import re
185
-
186
- # Find all text patterns that look like documents
187
  text_pattern = rb'([A-Za-z0-9\s\.\,\;\:\!\?\-\'\"\(\)]{50,})'
188
  matches = re.findall(text_pattern, raw_bytes)
189
 
190
  if len(matches) > 100:
191
  logger.info(f" Found {len(matches)} potential document fragments")
192
 
193
- # Create documents from extracted text
194
  documents = []
195
- for idx, match in enumerate(matches[:5000]): # Use first 5000 quality matches
196
  try:
197
  content = match.decode('utf-8', errors='ignore').strip()
198
- if len(content) >= 100: # Only high-quality, substantial content
199
  doc = Document(
200
  page_content=content,
201
  metadata={"source": "reconstructed", "id": idx}
@@ -210,7 +171,6 @@ def load_vector_store(embeddings):
210
  logger.info(f" βœ… Extracted {len(documents)} high-quality documents")
211
  logger.info(f" πŸ”„ Rebuilding FAISS index from scratch...")
212
 
213
- # Create NEW FAISS index from documents (ignore old corrupted index)
214
  vectorstore = FAISS.from_documents(
215
  documents=documents,
216
  embedding=embeddings
@@ -230,20 +190,15 @@ def retrieve_knowledge_langchain(
230
  vectorstore,
231
  top_k: int = 15
232
  ) -> Tuple[List[Document], float]:
233
- """
234
- Retrieve relevant documents using LangChain FAISS with query expansion
235
- """
236
  logger.info(f"πŸ” Retrieving knowledge for: '{query}'")
237
 
238
- # Create query variants for better coverage
239
  query_variants = [
240
- query, # Original
241
- f"fashion advice clothing outfit style for {query}", # Semantic expansion
242
  ]
243
 
244
  all_docs = []
245
 
246
- # Retrieve for each variant
247
  for variant in query_variants:
248
  try:
249
  docs_and_scores = vectorstore.similarity_search_with_score(variant, k=top_k)
@@ -257,23 +212,18 @@ def retrieve_knowledge_langchain(
257
  except Exception as e:
258
  logger.error(f"Retrieval error for variant '{variant}': {e}")
259
 
260
- # Deduplicate by content
261
  unique_docs = {}
262
  for doc in all_docs:
263
  content_key = doc.page_content[:100]
264
  if content_key not in unique_docs:
265
  unique_docs[content_key] = doc
266
  else:
267
- # Keep document with higher similarity
268
  if doc.metadata.get('similarity', 0) > unique_docs[content_key].metadata.get('similarity', 0):
269
  unique_docs[content_key] = doc
270
 
271
  final_docs = list(unique_docs.values())
272
-
273
- # Sort by similarity
274
  final_docs.sort(key=lambda x: x.metadata.get('similarity', 0), reverse=True)
275
 
276
- # Calculate confidence
277
  if final_docs:
278
  avg_similarity = sum(d.metadata.get('similarity', 0) for d in final_docs) / len(final_docs)
279
  confidence = min(avg_similarity, 1.0)
@@ -290,64 +240,50 @@ def generate_llm_answer(
290
  llm_client,
291
  attempt: int = 1
292
  ) -> Optional[str]:
293
- """
294
- Generate answer using local LLM with retrieved context
295
- """
296
  if not llm_client:
297
  logger.error(" β†’ LLM client not initialized")
298
  return None
299
 
300
- # Build focused context with relevance filtering
301
  query_lower = query.lower()
302
  query_words = set(query_lower.split())
303
 
304
- # ANTI-HALLUCINATION: Filter for fashion-relevant documents only
305
- fashion_terms = {'wear', 'outfit', 'style', 'fashion', 'clothing', 'color', 'dress', 'fabric'}
306
-
307
  scored_docs = []
308
  for doc in retrieved_docs[:20]:
309
  content = doc.page_content.lower()
310
  doc_words = set(content.split())
311
-
312
- # Check if document contains fashion terms
313
- has_fashion = any(term in content for term in fashion_terms)
314
- if not has_fashion:
315
- continue # Skip non-fashion documents
316
-
317
  overlap = len(query_words.intersection(doc_words))
318
 
319
- # Boost for verified/curated
320
  if doc.metadata.get('verified', False):
321
  overlap += 10
322
 
323
- # Boost for longer content
324
  if len(doc.page_content) > 200:
325
  overlap += 3
326
 
327
  scored_docs.append((doc, overlap))
328
 
329
- # If no fashion-relevant docs found, return None
330
- if not scored_docs:
331
- logger.warning(" ⚠️ No fashion-relevant documents found")
332
- return None
 
 
 
 
 
 
 
333
 
334
- # Sort and take top 8
335
- # Optimized parameters for 2-attempt strategy
336
  if attempt == 1:
337
  temperature = 0.75
338
  max_tokens = 350
339
  top_p = 0.92
340
  repetition_penalty = 1.15
341
- else: # attempt == 2
342
  temperature = 0.85
343
  max_tokens = 450
344
  top_p = 0.94
345
  repetition_penalty = 1.2
346
- temperature = 0.75
347
- max_new_tokens = 300
348
- top_p = 0.92
349
- repetition_penalty = 1.25
350
- # T5 format - simple and effective for good answers
351
  user_prompt = f"""Answer this fashion question with detailed, specific advice using the context provided.
352
 
353
  Question: {query}
@@ -356,73 +292,60 @@ Fashion Context:
356
  {context_text[:1500]}
357
 
358
  Provide a complete, detailed answer (150-250 words):"""
359
- repetition_penalty = 1.35
360
-
361
- # Create COMPACT T5 prompt to stay under 512 tokens (critical!)
362
- model_type = CONFIG.get("model_type", "t5")
363
-
364
- # T5 format - with explicit constraints to prevent hallucination
365
- user_prompt = f"""You are a fashion expert. Answer ONLY about fashion, clothing, and style.
366
-
367
- Question: {query}
368
-
369
- Fashion Knowledge:
370
- {context_text[:600]}
371
-
372
- Rules:
373
- - Answer ONLY using the fashion knowledge provided
374
- - Focus on clothing, outfits, colors, fabrics, and styling
375
- - DO NOT mention: politics, history, wars, empires, architecture
376
- - If unsure, say "I don't have enough information"
377
-
378
- Fashion Answer:"""
379
 
380
  try:
381
- logger.info(f" β†’ Calling {CONFIG['llm_model']} (temp={temperature}, tokens={max_new_tokens})...")
382
 
383
- # T5 optimized for SPEED on CPU - use greedy decoding (num_beams=1)
384
  output = llm_client(
385
  user_prompt,
386
- max_new_tokens=max_new_tokens,
387
- min_new_tokens=80, # Lower minimum for faster completion
388
- temperature=temperature,
389
- top_p=top_p,
390
  do_sample=True,
391
- num_beams=1, # Greedy decoding for 4x faster speed on CPU
392
- repetition_penalty=repetition_penalty,
393
- early_stopping=True,
394
- no_repeat_ngram_size=3,
395
- truncation=True # CRITICAL: Truncate input if too long
396
  )
397
 
398
- # Extract generated text
399
  response = output[0]['generated_text'].strip()
400
 
401
  if not response:
402
  logger.warning(f" βœ— Empty response (attempt {attempt})")
403
  return None
404
 
405
- # Validation - accept responses with meaningful content
406
- if len(response) < 80:
407
- logger.warning(f" βœ— Response too short: {len(response)} chars (need 80+)")
408
  return None
409
 
410
- # Check for apologies/refusals
411
  apology_phrases = ["i cannot", "i can't", "i'm sorry", "i apologize", "i don't have"]
412
  if any(phrase in response.lower()[:100] for phrase in apology_phrases):
413
  logger.warning(f" βœ— Apology detected")
414
  return None
415
 
416
- # Log response length and word count
417
- word_count = len(response.split())
418
- logger.info(f" βœ… Generated answer ({len(response)} chars, {word_count} words)")
419
  return response
420
 
421
  except Exception as e:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
422
  if not retrieved_docs:
423
  return "I couldn't find relevant information to answer your question."
424
 
425
- # Step 2: Try LLM generation (2 fast attempts for efficiency)
426
  llm_answer = None
427
  for attempt in range(1, 3):
428
  logger.info(f"\n πŸ€– LLM Generation Attempt {attempt}/2")
@@ -434,33 +357,6 @@ Fashion Answer:"""
434
  else:
435
  logger.warning(f" β†’ Attempt {attempt}/2 failed, retrying...")
436
 
437
- # Step 3: If all attempts fail, return error
438
- if not llm_answer:
439
- logger.error(f" βœ— All 2 LLM attempts failed")
440
- return "I apologize, but I'm having trouble generating a response. Please try rephrasing your question or ask something else."
441
-
442
- return llm_answeronfidence = retrieve_knowledge_langchain(
443
- query,
444
- vectorstore,
445
- top_k=CONFIG["top_k"]
446
- )
447
-
448
- def fashion_chatbot(message: str, history: List[List[str]]):
449
- """
450
- Chatbot function for Gradio interface with streaming
451
- """
452
- try:
453
- if not message or not message.strip():
454
- yield "Please ask a fashion-related question!"
455
- return
456
-
457
- # Show searching indicator
458
- yield "πŸ” Searching fashion knowledge..."d successfully")
459
- break
460
- else:
461
- logger.warning(f" β†’ Attempt {attempt}/2 failed, retrying...")
462
-
463
- # Step 3: If all attempts fail, return error
464
  if not llm_answer:
465
  logger.error(f" βœ— All 2 LLM attempts failed")
466
  return "I apologize, but I'm having trouble generating a response. Please try rephrasing your question or ask something else."
@@ -471,54 +367,14 @@ def fashion_chatbot(message: str, history: List[List[str]]):
471
  # GRADIO INTERFACE
472
  # ============================================================================
473
 
474
- # Generate answer with 2 fast attempts
475
- llm_answer = None
476
- for attempt in range(1, 3):
477
- logger.info(f"\n πŸ€– LLM Generation Attempt {attempt}/2")
478
- llm_answer = generate_llm_answer(message.strip(), retrieved_docs, llm_client, attempt)
479
-
480
- if llm_answer:
481
- break
482
-
483
- # If LLM fails, show error
484
- if not llm_answer:
485
- logger.error(f" βœ— All LLM attempts failed")
486
- yield "I apologize, but I'm having trouble generating a response. Please try rephrasing your question."
487
- return', 'match', 'look', 'shirt', 'pants', 'shoes', 'accessory',
488
- 'wardrobe', 'fit', 'fabric', 'pattern', 'casual', 'formal', 'seasonal',
489
- 'wedding', 'meeting', 'interview', 'date', 'party', 'jeans', 'suit',
490
- 'skirt', 'jacket', 'coat', 'sweater', 'blouse', 'tie', 'scarf', 'boots',
491
- 'hat', 'bag', 'purse', 'jewelry', 'necklace', 'bracelet', 'watch'
492
- ]
493
-
494
- # Reject obviously non-fashion questions FIRST (higher priority)
495
- non_fashion_indicators = [
496
- 'crisis', 'collapse', 'empire', 'war', 'politics', 'economy',
497
- 'architecture', 'building', 'nebula', 'space', 'republic',
498
- 'soviet', 'ottoman', 'history', 'government', 'president', 'designed',
499
- 'architect', 'eastern', 'western', 'communist', 'russia', 'political',
500
- 'military', 'sapphire crisis', 'who designed', 'what caused'
501
- ]
502
-
503
- has_non_fashion = any(indicator in query_lower for indicator in non_fashion_indicators)
504
-
505
- # STRICT CHECK: If non-fashion detected, reject immediately
506
- if has_non_fashion:
507
- logger.info(f"❌ Non-fashion query rejected: {message.strip()}")
508
- yield "I'm a fashion advisor and can only answer questions about clothing, style, and fashion. Please ask me about outfits, styling, colors, or wardrobe advice!"
509
- return
510
-
511
- # Check if query contains fashion keywords
512
- is_fashion_query = any(keyword in query_lower for keyword in fashion_keywords)
513
-
514
- if not is_fashion_query:
515
- yield "I'm a fashion advisor and can only answer questions about clothing, style, and fashion. Please ask me about outfits, styling, colors, or wardrobe advice!"
516
  return
517
 
518
- # Show searching indicator (only for valid fashion queries)
519
  yield "πŸ” Searching fashion knowledge..."
520
 
521
- # Retrieve documents (only after validation passes)
522
  retrieved_docs, confidence = retrieve_knowledge_langchain(
523
  message.strip(),
524
  vectorstore,
@@ -529,54 +385,21 @@ def fashion_chatbot(message: str, history: List[List[str]]):
529
  yield "I couldn't find relevant information to answer your question."
530
  return
531
 
532
- # ANTI-HALLUCINATION: Check retrieval quality
533
- if confidence < 0.35:
534
- yield "I don't have enough reliable information about this specific topic. Could you rephrase or ask about common fashion topics like outfit recommendations, color matching, or styling advice?"
535
- return
536
-
537
- # Show generating indicator
538
  yield f"πŸ’­ Generating answer ({len(retrieved_docs)} sources found)..."
539
 
540
- # Generate answer with 2 quick attempts
541
  llm_answer = None
542
  for attempt in range(1, 3):
543
  logger.info(f"\n πŸ€– LLM Generation Attempt {attempt}/2")
544
  llm_answer = generate_llm_answer(message.strip(), retrieved_docs, llm_client, attempt)
545
 
546
  if llm_answer:
547
- # ANTI-HALLUCINATION: Validate answer relevance
548
- answer_lower = llm_answer.lower()
549
-
550
- # Check for hallucination indicators
551
- hallucination_markers = [
552
- 'empire', 'ottoman', 'soviet', 'russia', 'collapse', 'crisis',
553
- 'republic', 'communist', 'nebula', 'architecture', 'political',
554
- 'government', 'war', 'military', 'economic'
555
- ]
556
-
557
- has_hallucination = any(marker in answer_lower for marker in hallucination_markers)
558
-
559
- # Check if answer contains fashion terms
560
- fashion_terms = [
561
- 'wear', 'outfit', 'style', 'clothing', 'fabric', 'color',
562
- 'match', 'fit', 'look', 'fashion', 'dress', 'suit'
563
- ]
564
- has_fashion_content = any(term in answer_lower for term in fashion_terms)
565
-
566
- if has_hallucination or not has_fashion_content:
567
- logger.warning(f" ⚠️ Hallucination detected in attempt {attempt}, retrying...")
568
- llm_answer = None
569
- continue
570
- else:
571
- break
572
 
573
- # If LLM fails, show error
574
  if not llm_answer:
575
- logger.error(f" βœ— All LLM attempts failed or produced hallucinations")
576
- yield "I apologize, but I'm having trouble generating a reliable fashion answer. Please ask about specific fashion topics like outfit recommendations, color coordination, or styling tips."
577
  return
578
 
579
- # Stream the answer word by word for natural flow
580
  import time
581
  words = llm_answer.split()
582
  displayed_text = ""
@@ -584,10 +407,9 @@ def fashion_chatbot(message: str, history: List[List[str]]):
584
  for i, word in enumerate(words):
585
  displayed_text += word + " "
586
 
587
- # Yield every 3 words for smooth streaming
588
  if i % 3 == 0 or i == len(words) - 1:
589
  yield displayed_text.strip()
590
- time.sleep(0.05) # Small delay for natural flow
591
 
592
  except Exception as e:
593
  logger.error(f"Error in chatbot: {e}")
@@ -597,32 +419,23 @@ def fashion_chatbot(message: str, history: List[List[str]]):
597
  # INITIALIZE AND LAUNCH
598
  # ============================================================================
599
 
600
- # Global variables
601
  llm_client = None
602
  embeddings = None
603
  vectorstore = None
604
 
605
  def startup():
606
- """Initialize all models and load vector store"""
607
  global llm_client, embeddings, vectorstore
608
 
609
  logger.info("πŸš€ Starting Fashion Advisor RAG...")
610
 
611
- # Initialize embeddings
612
  embeddings = initialize_embeddings()
613
-
614
- # Load vector store
615
  vectorstore = load_vector_store(embeddings)
616
-
617
- # Initialize LLM
618
  llm_client = initialize_llm()
619
 
620
  logger.info("βœ… All components initialized successfully!")
621
 
622
- # Initialize on startup
623
  startup()
624
 
625
- # Create Gradio interface - simple version compatible with all Gradio versions
626
  demo = gr.ChatInterface(
627
  fn=fashion_chatbot,
628
  title="πŸ‘— Fashion Advisor - RAG System",
@@ -647,6 +460,5 @@ I can help with:
647
  ],
648
  )
649
 
650
- # Launch
651
  if __name__ == "__main__":
652
  demo.launch()
 
28
 
29
  CONFIG = {
30
  "embedding_model": "sentence-transformers/all-MiniLM-L6-v2",
31
+ "llm_model": None,
32
+ "vector_store_path": ".",
33
  "top_k": 15,
34
  "temperature": 0.75,
35
  "max_tokens": 350,
 
40
  # ============================================================================
41
 
42
  def initialize_llm():
 
43
  logger.info("πŸ”„ Initializing FREE local language model...")
 
 
44
  model_name = "google/flan-t5-large"
45
 
46
  try:
47
  logger.info(f" Loading {model_name}...")
48
  device = 0 if torch.cuda.is_available() else -1
49
 
50
+ model_kwargs = {"low_cpu_mem_usage": True}
 
 
 
 
 
 
 
51
 
52
  llm_client = pipeline(
53
+ "text2text-generation",
54
  model=model_name,
55
  device=device,
56
  model_kwargs=model_kwargs
57
  )
58
 
59
  CONFIG["llm_model"] = model_name
60
+ CONFIG["model_type"] = "t5"
61
  logger.info(f"βœ… LLM initialized: {model_name}")
62
  logger.info(f" Device: {'GPU' if device == 0 else 'CPU'}")
63
  return llm_client
 
67
  raise Exception(f"Failed to initialize LLM: {str(e)}")
68
 
69
  def initialize_embeddings():
 
70
  logger.info("πŸ”„ Initializing embeddings model...")
71
 
72
  embeddings = HuggingFaceEmbeddings(
 
79
  return embeddings
80
 
81
  def load_vector_store(embeddings):
 
82
  logger.info("πŸ”„ Loading FAISS vector store...")
83
 
84
  vector_store_path = CONFIG["vector_store_path"]
 
 
85
  index_file = os.path.join(vector_store_path, "index.faiss")
86
  pkl_file = os.path.join(vector_store_path, "index.pkl")
87
 
88
  if not os.path.exists(index_file):
 
89
  raise FileNotFoundError(f"FAISS index file not found: {index_file}")
90
 
91
  if not os.path.exists(pkl_file):
 
92
  raise FileNotFoundError(f"FAISS metadata file not found: {pkl_file}")
93
 
94
  logger.info(f"βœ… Found index.faiss ({os.path.getsize(index_file)/1024/1024:.2f} MB)")
95
  logger.info(f"βœ… Found index.pkl ({os.path.getsize(pkl_file)/1024:.2f} KB)")
96
 
97
  try:
 
98
  vectorstore = FAISS.load_local(
99
  vector_store_path,
100
  embeddings,
 
103
  logger.info(f"βœ… FAISS vector store loaded successfully")
104
  return vectorstore
105
 
106
+ except Exception as e:
107
  logger.warning(f"⚠️ Pydantic compatibility issue: {str(e)[:100]}")
108
  logger.info("πŸ”„ Applying Pydantic monkey-patch and retrying...")
109
 
 
110
  try:
111
  import pydantic.v1.main as pydantic_main
 
 
112
  original_setstate = pydantic_main.BaseModel.__setstate__
113
 
114
  def patched_setstate(self, state):
 
 
115
  if '__fields_set__' not in state:
116
  state['__fields_set__'] = set(state.get('__dict__', {}).keys())
 
117
  return original_setstate(self, state)
118
 
 
119
  pydantic_main.BaseModel.__setstate__ = patched_setstate
120
  logger.info(" βœ… Pydantic monkey-patch applied")
121
 
122
  except Exception as patch_error:
123
  logger.warning(f" ⚠️ Pydantic patch failed: {patch_error}")
124
 
 
125
  try:
126
  vectorstore = FAISS.load_local(
127
  vector_store_path,
 
133
 
134
  except Exception as e2:
135
  logger.error(f" βœ— Still failed after patch: {str(e2)[:100]}")
 
 
136
  logger.info("πŸ”„ Using manual reconstruction (last resort)...")
137
 
138
  import faiss
 
139
  from langchain_community.docstore.in_memory import InMemoryDocstore
140
 
 
141
  index = faiss.read_index(index_file)
142
  logger.info(f" βœ… FAISS index loaded")
143
 
 
144
  with open(pkl_file, "rb") as f:
145
+ import re
 
 
 
146
  raw_bytes = f.read()
147
  logger.info(f" Read {len(raw_bytes)} bytes from pickle")
148
 
 
 
 
 
 
149
  text_pattern = rb'([A-Za-z0-9\s\.\,\;\:\!\?\-\'\"\(\)]{50,})'
150
  matches = re.findall(text_pattern, raw_bytes)
151
 
152
  if len(matches) > 100:
153
  logger.info(f" Found {len(matches)} potential document fragments")
154
 
 
155
  documents = []
156
+ for idx, match in enumerate(matches[:5000]):
157
  try:
158
  content = match.decode('utf-8', errors='ignore').strip()
159
+ if len(content) >= 100:
160
  doc = Document(
161
  page_content=content,
162
  metadata={"source": "reconstructed", "id": idx}
 
171
  logger.info(f" βœ… Extracted {len(documents)} high-quality documents")
172
  logger.info(f" πŸ”„ Rebuilding FAISS index from scratch...")
173
 
 
174
  vectorstore = FAISS.from_documents(
175
  documents=documents,
176
  embedding=embeddings
 
190
  vectorstore,
191
  top_k: int = 15
192
  ) -> Tuple[List[Document], float]:
 
 
 
193
  logger.info(f"πŸ” Retrieving knowledge for: '{query}'")
194
 
 
195
  query_variants = [
196
+ query,
197
+ f"fashion advice clothing outfit style for {query}",
198
  ]
199
 
200
  all_docs = []
201
 
 
202
  for variant in query_variants:
203
  try:
204
  docs_and_scores = vectorstore.similarity_search_with_score(variant, k=top_k)
 
212
  except Exception as e:
213
  logger.error(f"Retrieval error for variant '{variant}': {e}")
214
 
 
215
  unique_docs = {}
216
  for doc in all_docs:
217
  content_key = doc.page_content[:100]
218
  if content_key not in unique_docs:
219
  unique_docs[content_key] = doc
220
  else:
 
221
  if doc.metadata.get('similarity', 0) > unique_docs[content_key].metadata.get('similarity', 0):
222
  unique_docs[content_key] = doc
223
 
224
  final_docs = list(unique_docs.values())
 
 
225
  final_docs.sort(key=lambda x: x.metadata.get('similarity', 0), reverse=True)
226
 
 
227
  if final_docs:
228
  avg_similarity = sum(d.metadata.get('similarity', 0) for d in final_docs) / len(final_docs)
229
  confidence = min(avg_similarity, 1.0)
 
240
  llm_client,
241
  attempt: int = 1
242
  ) -> Optional[str]:
 
 
 
243
  if not llm_client:
244
  logger.error(" β†’ LLM client not initialized")
245
  return None
246
 
 
247
  query_lower = query.lower()
248
  query_words = set(query_lower.split())
249
 
 
 
 
250
  scored_docs = []
251
  for doc in retrieved_docs[:20]:
252
  content = doc.page_content.lower()
253
  doc_words = set(content.split())
 
 
 
 
 
 
254
  overlap = len(query_words.intersection(doc_words))
255
 
 
256
  if doc.metadata.get('verified', False):
257
  overlap += 10
258
 
 
259
  if len(doc.page_content) > 200:
260
  overlap += 3
261
 
262
  scored_docs.append((doc, overlap))
263
 
264
+ scored_docs.sort(key=lambda x: x[1], reverse=True)
265
+ top_docs = [doc[0] for doc in scored_docs[:8]]
266
+
267
+ context_parts = []
268
+ for doc in top_docs:
269
+ content = doc.page_content.strip()
270
+ if len(content) > 400:
271
+ content = content[:400] + "..."
272
+ context_parts.append(content)
273
+
274
+ context_text = "\n\n".join(context_parts)
275
 
 
 
276
  if attempt == 1:
277
  temperature = 0.75
278
  max_tokens = 350
279
  top_p = 0.92
280
  repetition_penalty = 1.15
281
+ else:
282
  temperature = 0.85
283
  max_tokens = 450
284
  top_p = 0.94
285
  repetition_penalty = 1.2
286
+
 
 
 
 
287
  user_prompt = f"""Answer this fashion question with detailed, specific advice using the context provided.
288
 
289
  Question: {query}
 
292
  {context_text[:1500]}
293
 
294
  Provide a complete, detailed answer (150-250 words):"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
295
 
296
  try:
297
+ logger.info(f" β†’ Calling {CONFIG['llm_model']} (temp={temperature}, tokens={max_tokens})...")
298
 
 
299
  output = llm_client(
300
  user_prompt,
301
+ max_length=300,
302
+ temperature=0.75,
303
+ top_p=0.92,
 
304
  do_sample=True,
305
+ num_beams=2,
306
+ early_stopping=True
 
 
 
307
  )
308
 
 
309
  response = output[0]['generated_text'].strip()
310
 
311
  if not response:
312
  logger.warning(f" βœ— Empty response (attempt {attempt})")
313
  return None
314
 
315
+ if len(response) < 20:
316
+ logger.warning(f" βœ— Response too short: {len(response)} chars")
 
317
  return None
318
 
 
319
  apology_phrases = ["i cannot", "i can't", "i'm sorry", "i apologize", "i don't have"]
320
  if any(phrase in response.lower()[:100] for phrase in apology_phrases):
321
  logger.warning(f" βœ— Apology detected")
322
  return None
323
 
324
+ logger.info(f" βœ… Generated answer ({len(response)} chars)")
 
 
325
  return response
326
 
327
  except Exception as e:
328
+ logger.error(f" βœ— Generation error: {e}")
329
+ return None
330
+
331
+ def generate_answer_langchain(
332
+ query: str,
333
+ vectorstore,
334
+ llm_client
335
+ ) -> str:
336
+ logger.info(f"\n{'='*80}")
337
+ logger.info(f"Processing query: '{query}'")
338
+ logger.info(f"{'='*80}")
339
+
340
+ retrieved_docs, confidence = retrieve_knowledge_langchain(
341
+ query,
342
+ vectorstore,
343
+ top_k=CONFIG["top_k"]
344
+ )
345
+
346
  if not retrieved_docs:
347
  return "I couldn't find relevant information to answer your question."
348
 
 
349
  llm_answer = None
350
  for attempt in range(1, 3):
351
  logger.info(f"\n πŸ€– LLM Generation Attempt {attempt}/2")
 
357
  else:
358
  logger.warning(f" β†’ Attempt {attempt}/2 failed, retrying...")
359
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
360
  if not llm_answer:
361
  logger.error(f" βœ— All 2 LLM attempts failed")
362
  return "I apologize, but I'm having trouble generating a response. Please try rephrasing your question or ask something else."
 
367
  # GRADIO INTERFACE
368
  # ============================================================================
369
 
370
+ def fashion_chatbot(message: str, history: List[List[str]]):
371
+ try:
372
+ if not message or not message.strip():
373
+ yield "Please ask a fashion-related question!"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
374
  return
375
 
 
376
  yield "πŸ” Searching fashion knowledge..."
377
 
 
378
  retrieved_docs, confidence = retrieve_knowledge_langchain(
379
  message.strip(),
380
  vectorstore,
 
385
  yield "I couldn't find relevant information to answer your question."
386
  return
387
 
 
 
 
 
 
 
388
  yield f"πŸ’­ Generating answer ({len(retrieved_docs)} sources found)..."
389
 
 
390
  llm_answer = None
391
  for attempt in range(1, 3):
392
  logger.info(f"\n πŸ€– LLM Generation Attempt {attempt}/2")
393
  llm_answer = generate_llm_answer(message.strip(), retrieved_docs, llm_client, attempt)
394
 
395
  if llm_answer:
396
+ break
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
397
 
 
398
  if not llm_answer:
399
+ logger.error(f" βœ— All LLM attempts failed")
400
+ yield "I apologize, but I'm having trouble generating a response. Please try rephrasing your question."
401
  return
402
 
 
403
  import time
404
  words = llm_answer.split()
405
  displayed_text = ""
 
407
  for i, word in enumerate(words):
408
  displayed_text += word + " "
409
 
 
410
  if i % 3 == 0 or i == len(words) - 1:
411
  yield displayed_text.strip()
412
+ time.sleep(0.05)
413
 
414
  except Exception as e:
415
  logger.error(f"Error in chatbot: {e}")
 
419
  # INITIALIZE AND LAUNCH
420
  # ============================================================================
421
 
 
422
  llm_client = None
423
  embeddings = None
424
  vectorstore = None
425
 
426
  def startup():
 
427
  global llm_client, embeddings, vectorstore
428
 
429
  logger.info("πŸš€ Starting Fashion Advisor RAG...")
430
 
 
431
  embeddings = initialize_embeddings()
 
 
432
  vectorstore = load_vector_store(embeddings)
 
 
433
  llm_client = initialize_llm()
434
 
435
  logger.info("βœ… All components initialized successfully!")
436
 
 
437
  startup()
438
 
 
439
  demo = gr.ChatInterface(
440
  fn=fashion_chatbot,
441
  title="πŸ‘— Fashion Advisor - RAG System",
 
460
  ],
461
  )
462
 
 
463
  if __name__ == "__main__":
464
  demo.launch()