hamxaameer commited on
Commit
068ed2b
Β·
verified Β·
1 Parent(s): df481d0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -41
app.py CHANGED
@@ -322,71 +322,68 @@ def generate_llm_answer(
322
  scored_docs.sort(key=lambda x: x[1], reverse=True)
323
  top_docs = [doc[0] for doc in scored_docs[:8]]
324
 
325
- # Build context
326
  context_parts = []
327
- for doc in top_docs:
328
  content = doc.page_content.strip()
329
- if len(content) > 400:
330
- content = content[:400] + "..."
 
331
  context_parts.append(content)
332
 
333
  context_text = "\n\n".join(context_parts)
334
 
335
- # Progressive parameters based on attempt - optimized for longer, natural responses
336
  if attempt == 1:
337
- temperature = 0.8
338
- max_new_tokens = 450 # Longer responses
339
- top_p = 0.92
340
- repetition_penalty = 1.15
341
  elif attempt == 2:
 
 
 
 
 
 
 
 
 
 
342
  temperature = 0.85
343
  max_new_tokens = 500
344
- top_p = 0.94
345
- repetition_penalty = 1.18
346
- elif attempt == 3:
347
- temperature = 0.9
348
- max_new_tokens = 550
349
  top_p = 0.95
350
- repetition_penalty = 1.2
351
- else:
352
- temperature = 0.95
353
- max_new_tokens = 600
354
- top_p = 0.96
355
- repetition_penalty = 1.22
356
 
357
- # Create optimized T5 prompt for detailed, natural responses
358
  model_type = CONFIG.get("model_type", "t5")
359
 
360
- # T5 format - encouraging detailed, conversational responses
361
- user_prompt = f"""You are a professional fashion advisor. Answer this question with comprehensive, detailed advice using the context provided. Be specific, natural, and conversational.
362
-
363
- Question: {query}
364
-
365
- Fashion Knowledge Base:
366
- {context_text[:2000]}
367
 
368
- Provide a thorough, well-structured answer (300-500 words) that covers:
369
- - Main recommendations with specific details
370
- - Practical styling tips and combinations
371
- - Why these suggestions work
372
- - Additional helpful considerations
373
 
374
- Answer:"""
375
 
376
  try:
377
  logger.info(f" β†’ Calling {CONFIG['llm_model']} (temp={temperature}, tokens={max_new_tokens})...")
378
 
379
- # T5 optimized parameters for detailed, natural responses
380
  output = llm_client(
381
  user_prompt,
382
- max_new_tokens=max_new_tokens, # Use max_new_tokens instead of max_length
 
383
  temperature=temperature,
384
  top_p=top_p,
385
  do_sample=True,
386
- num_beams=3, # More beams for better quality
387
  repetition_penalty=repetition_penalty,
 
388
  early_stopping=True,
389
- no_repeat_ngram_size=3 # Prevent repetitive phrases
 
390
  )
391
 
392
  # Extract generated text
@@ -396,9 +393,9 @@ Answer:"""
396
  logger.warning(f" βœ— Empty response (attempt {attempt})")
397
  return None
398
 
399
- # Validation - accept longer responses (aim for 200+ chars minimum)
400
- if len(response) < 50:
401
- logger.warning(f" βœ— Response too short: {len(response)} chars (need 50+)")
402
  return None
403
 
404
  # Check for apologies/refusals
 
322
  scored_docs.sort(key=lambda x: x[1], reverse=True)
323
  top_docs = [doc[0] for doc in scored_docs[:8]]
324
 
325
+ # Build context - keep it SHORT to stay under 512 tokens
326
  context_parts = []
327
+ for doc in top_docs[:5]: # Only use top 5 docs
328
  content = doc.page_content.strip()
329
+ # Keep each doc snippet under 150 chars
330
+ if len(content) > 150:
331
+ content = content[:150] + "..."
332
  context_parts.append(content)
333
 
334
  context_text = "\n\n".join(context_parts)
335
 
336
+ # Progressive parameters - balanced for T5's capabilities
337
  if attempt == 1:
338
+ temperature = 0.7
339
+ max_new_tokens = 350 # Realistic length for T5
340
+ top_p = 0.9
341
+ repetition_penalty = 1.2
342
  elif attempt == 2:
343
+ temperature = 0.75
344
+ max_new_tokens = 400
345
+ top_p = 0.92
346
+ repetition_penalty = 1.25
347
+ elif attempt == 3:
348
+ temperature = 0.8
349
+ max_new_tokens = 450
350
+ top_p = 0.94
351
+ repetition_penalty = 1.3
352
+ else:
353
  temperature = 0.85
354
  max_new_tokens = 500
 
 
 
 
 
355
  top_p = 0.95
356
+ repetition_penalty = 1.35
 
 
 
 
 
357
 
358
+ # Create COMPACT T5 prompt to stay under 512 tokens (critical!)
359
  model_type = CONFIG.get("model_type", "t5")
360
 
361
+ # T5 format - simple and effective to minimize tokens
362
+ # Keep prompt minimal to leave room for generation
363
+ user_prompt = f"""Fashion Question: {query}
 
 
 
 
364
 
365
+ Relevant Fashion Tips:
366
+ {context_text[:600]}
 
 
 
367
 
368
+ Provide detailed fashion advice (200-400 words):"""
369
 
370
  try:
371
  logger.info(f" β†’ Calling {CONFIG['llm_model']} (temp={temperature}, tokens={max_new_tokens})...")
372
 
373
+ # T5 optimized parameters - CRITICAL: truncate input to stay under 512 tokens
374
  output = llm_client(
375
  user_prompt,
376
+ max_new_tokens=max_new_tokens,
377
+ min_new_tokens=100, # Ensure minimum length generation
378
  temperature=temperature,
379
  top_p=top_p,
380
  do_sample=True,
381
+ num_beams=4, # Higher beams for better quality
382
  repetition_penalty=repetition_penalty,
383
+ length_penalty=1.2, # Encourage longer responses
384
  early_stopping=True,
385
+ no_repeat_ngram_size=3,
386
+ truncation=True # CRITICAL: Truncate input if too long
387
  )
388
 
389
  # Extract generated text
 
393
  logger.warning(f" βœ— Empty response (attempt {attempt})")
394
  return None
395
 
396
+ # Validation - accept responses with meaningful content
397
+ if len(response) < 100:
398
+ logger.warning(f" βœ— Response too short: {len(response)} chars (need 100+)")
399
  return None
400
 
401
  # Check for apologies/refusals