Spaces:
Sleeping
Sleeping
Update rag_retrieval.py
Browse files- rag_retrieval.py +24 -2
rag_retrieval.py
CHANGED
|
@@ -67,8 +67,11 @@ def retrieve_chunks(rewrite: str, session: dict, search_outside_thread: bool):
|
|
| 67 |
|
| 68 |
def build_answer(user_text: str, rewrite: str, retrieved):
|
| 69 |
"""
|
| 70 |
-
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
| 72 |
"""
|
| 73 |
if not retrieved:
|
| 74 |
return (
|
|
@@ -76,6 +79,25 @@ def build_answer(user_text: str, rewrite: str, retrieved):
|
|
| 76 |
[]
|
| 77 |
)
|
| 78 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
lines = [f"**Question:** {user_text}", "", "**Relevant information:**"]
|
| 80 |
citations = []
|
| 81 |
|
|
|
|
| 67 |
|
| 68 |
def build_answer(user_text: str, rewrite: str, retrieved):
|
| 69 |
"""
|
| 70 |
+
Answer builder with a simple 'no clear answer' heuristic.
|
| 71 |
+
|
| 72 |
+
- If scores are very low OR none of the retrieved snippets share
|
| 73 |
+
meaningful words with the question, we return a graceful fallback.
|
| 74 |
+
- Otherwise, we list relevant snippets with citations.
|
| 75 |
"""
|
| 76 |
if not retrieved:
|
| 77 |
return (
|
|
|
|
| 79 |
[]
|
| 80 |
)
|
| 81 |
|
| 82 |
+
# ---- Heuristic: check scores + keyword overlap ----
|
| 83 |
+
question_tokens = {t.lower() for t in user_text.split() if len(t) > 3}
|
| 84 |
+
|
| 85 |
+
def snippet_has_overlap(snippet: str) -> bool:
|
| 86 |
+
words = {w.lower().strip(".,!?;:()[]") for w in snippet.split()}
|
| 87 |
+
return len(question_tokens & words) > 0
|
| 88 |
+
|
| 89 |
+
best_score = max(r["score_combined"] for r in retrieved)
|
| 90 |
+
any_overlap = any(snippet_has_overlap(r["text"]) for r in retrieved)
|
| 91 |
+
|
| 92 |
+
if best_score < 0.2 or not any_overlap:
|
| 93 |
+
# Fallback: nothing strongly relevant in this thread
|
| 94 |
+
return (
|
| 95 |
+
"Within this thread, I don’t see any email that clearly answers this question. "
|
| 96 |
+
"You may need to search outside this thread or check other conversations.",
|
| 97 |
+
[]
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
# ---- Normal snippet-based answer ----
|
| 101 |
lines = [f"**Question:** {user_text}", "", "**Relevant information:**"]
|
| 102 |
citations = []
|
| 103 |
|