Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -409,7 +409,7 @@ class RAGIndex:
|
|
| 409 |
return answer
|
| 410 |
|
| 411 |
def answer(self, question: str) -> str:
|
| 412 |
-
"""Answer a question using RAG with a
|
| 413 |
if not self.initialized:
|
| 414 |
return "❌ Assistant not properly initialized. Please check the logs."
|
| 415 |
|
|
@@ -451,24 +451,45 @@ class RAGIndex:
|
|
| 451 |
# 2) Combine contexts into a single evidence block
|
| 452 |
combined_context = "\n\n".join(context_texts)
|
| 453 |
|
| 454 |
-
# Keep context at a reasonable size
|
| 455 |
max_context_chars = 3000
|
| 456 |
if len(combined_context) > max_context_chars:
|
| 457 |
combined_context = combined_context[:max_context_chars]
|
| 458 |
|
| 459 |
-
# 3)
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
-
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 472 |
|
| 473 |
if not answer_text:
|
| 474 |
answer_text = NO_ANSWER_MSG
|
|
@@ -481,7 +502,6 @@ class RAGIndex:
|
|
| 481 |
)
|
| 482 |
|
| 483 |
|
| 484 |
-
|
| 485 |
# Initialize RAG system
|
| 486 |
print("=" * 50)
|
| 487 |
rag_index = RAGIndex()
|
|
|
|
| 409 |
return answer
|
| 410 |
|
| 411 |
def answer(self, question: str) -> str:
|
| 412 |
+
"""Answer a question using RAG with a simple extractive approach."""
|
| 413 |
if not self.initialized:
|
| 414 |
return "❌ Assistant not properly initialized. Please check the logs."
|
| 415 |
|
|
|
|
| 451 |
# 2) Combine contexts into a single evidence block
|
| 452 |
combined_context = "\n\n".join(context_texts)
|
| 453 |
|
| 454 |
+
# Keep context at a reasonable size
|
| 455 |
max_context_chars = 3000
|
| 456 |
if len(combined_context) > max_context_chars:
|
| 457 |
combined_context = combined_context[:max_context_chars]
|
| 458 |
|
| 459 |
+
# 3) Sentence-level relevance scoring
|
| 460 |
+
# We pick the sentences that best match the question terms
|
| 461 |
+
raw_sentences = re.split(r'(?<=[.!?])\s+', combined_context)
|
| 462 |
+
question_words = {
|
| 463 |
+
w.lower()
|
| 464 |
+
for w in re.findall(r"\w+", question)
|
| 465 |
+
if len(w) > 3 # ignore very short/common words
|
| 466 |
+
}
|
| 467 |
+
|
| 468 |
+
scored_sentences = []
|
| 469 |
+
for s in raw_sentences:
|
| 470 |
+
s_clean = s.strip()
|
| 471 |
+
if len(s_clean) < 20:
|
| 472 |
+
continue
|
| 473 |
+
words = {w.lower() for w in re.findall(r"\w+", s_clean)}
|
| 474 |
+
overlap = question_words & words
|
| 475 |
+
score = len(overlap)
|
| 476 |
+
scored_sentences.append((score, s_clean))
|
| 477 |
+
|
| 478 |
+
if scored_sentences:
|
| 479 |
+
# Sort by overlap score (descending)
|
| 480 |
+
scored_sentences.sort(key=lambda x: x[0], reverse=True)
|
| 481 |
+
|
| 482 |
+
# Take the best 2–3 sentences that have some overlap
|
| 483 |
+
top = [s for score, s in scored_sentences if score > 0][:3]
|
| 484 |
+
|
| 485 |
+
# If none have overlap (e.g., very vague question), just take the top 2 sentences overall
|
| 486 |
+
if not top:
|
| 487 |
+
top = [s for _, s in scored_sentences[:2]]
|
| 488 |
+
|
| 489 |
+
answer_text = " ".join(top)
|
| 490 |
+
else:
|
| 491 |
+
# Fallback: just take a slice of the combined context
|
| 492 |
+
answer_text = combined_context[:400].strip()
|
| 493 |
|
| 494 |
if not answer_text:
|
| 495 |
answer_text = NO_ANSWER_MSG
|
|
|
|
| 502 |
)
|
| 503 |
|
| 504 |
|
|
|
|
| 505 |
# Initialize RAG system
|
| 506 |
print("=" * 50)
|
| 507 |
rag_index = RAGIndex()
|