sofzcc commited on
Commit
4ee6d34
·
verified ·
1 Parent(s): ff5c1a5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -16
app.py CHANGED
@@ -409,7 +409,7 @@ class RAGIndex:
409
  return answer
410
 
411
  def answer(self, question: str) -> str:
412
- """Answer a question using RAG with a generative seq2seq model (Flan-T5, BART, etc.)."""
413
  if not self.initialized:
414
  return "❌ Assistant not properly initialized. Please check the logs."
415
 
@@ -451,24 +451,45 @@ class RAGIndex:
451
  # 2) Combine contexts into a single evidence block
452
  combined_context = "\n\n".join(context_texts)
453
 
454
- # Keep context at a reasonable size for the model
455
  max_context_chars = 3000
456
  if len(combined_context) > max_context_chars:
457
  combined_context = combined_context[:max_context_chars]
458
 
459
- # 3) Generate grounded answer from context
460
- try:
461
- answer_text = self._generate_from_context(
462
- question=question,
463
- context=combined_context,
464
- max_new_tokens=180,
465
- ).strip()
466
- except Exception as e:
467
- print(f"Generation error: {e}")
468
- return (
469
- "There was an error while generating the answer. "
470
- "Please try again with a shorter question or different wording."
471
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
472
 
473
  if not answer_text:
474
  answer_text = NO_ANSWER_MSG
@@ -481,7 +502,6 @@ class RAGIndex:
481
  )
482
 
483
 
484
-
485
  # Initialize RAG system
486
  print("=" * 50)
487
  rag_index = RAGIndex()
 
409
  return answer
410
 
411
  def answer(self, question: str) -> str:
412
+ """Answer a question using RAG with a simple extractive approach."""
413
  if not self.initialized:
414
  return "❌ Assistant not properly initialized. Please check the logs."
415
 
 
451
  # 2) Combine contexts into a single evidence block
452
  combined_context = "\n\n".join(context_texts)
453
 
454
+ # Keep context at a reasonable size
455
  max_context_chars = 3000
456
  if len(combined_context) > max_context_chars:
457
  combined_context = combined_context[:max_context_chars]
458
 
459
+ # 3) Sentence-level relevance scoring
460
+ # We pick the sentences that best match the question terms
461
+ raw_sentences = re.split(r'(?<=[.!?])\s+', combined_context)
462
+ question_words = {
463
+ w.lower()
464
+ for w in re.findall(r"\w+", question)
465
+ if len(w) > 3 # ignore very short/common words
466
+ }
467
+
468
+ scored_sentences = []
469
+ for s in raw_sentences:
470
+ s_clean = s.strip()
471
+ if len(s_clean) < 20:
472
+ continue
473
+ words = {w.lower() for w in re.findall(r"\w+", s_clean)}
474
+ overlap = question_words & words
475
+ score = len(overlap)
476
+ scored_sentences.append((score, s_clean))
477
+
478
+ if scored_sentences:
479
+ # Sort by overlap score (descending)
480
+ scored_sentences.sort(key=lambda x: x[0], reverse=True)
481
+
482
+ # Take the best 2–3 sentences that have some overlap
483
+ top = [s for score, s in scored_sentences if score > 0][:3]
484
+
485
+ # If none have overlap (e.g., very vague question), just take the top 2 sentences overall
486
+ if not top:
487
+ top = [s for _, s in scored_sentences[:2]]
488
+
489
+ answer_text = " ".join(top)
490
+ else:
491
+ # Fallback: just take a slice of the combined context
492
+ answer_text = combined_context[:400].strip()
493
 
494
  if not answer_text:
495
  answer_text = NO_ANSWER_MSG
 
502
  )
503
 
504
 
 
505
  # Initialize RAG system
506
  print("=" * 50)
507
  rag_index = RAGIndex()